From 381e9a90748bb659f56081123052e3e95501a4b4 Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Fri, 22 Jun 2018 11:32:12 +0200 Subject: MAP: Move MAP-E/T to a plugin. Only remaining traces of MAP in the src/vnet is now in buffer.h. Awaiting a new buffer opaque API (hint, hint). Change-Id: Ie165561484731f1d7ed6e0f604b43624e06db3f0 Signed-off-by: Ole Troan --- src/configure.ac | 1 + src/plugins/Makefile.am | 4 + src/plugins/examples/gen-rules.py | 186 +++ src/plugins/examples/health_check.c | 109 ++ src/plugins/examples/test_map.py | 141 ++ src/plugins/map.am | 39 + src/plugins/map/gen-rules.py | 107 ++ src/plugins/map/ip4_map.c | 754 +++++++++++ src/plugins/map/ip4_map_t.c | 898 +++++++++++++ src/plugins/map/ip6_map.c | 1265 ++++++++++++++++++ src/plugins/map/ip6_map_t.c | 1186 +++++++++++++++++ src/plugins/map/map.api | 163 +++ src/plugins/map/map.c | 2373 ++++++++++++++++++++++++++++++++++ src/plugins/map/map.h | 597 +++++++++ src/plugins/map/map_all_api_h.h | 19 + src/plugins/map/map_api.c | 311 +++++ src/plugins/map/map_doc.md | 69 + src/plugins/map/map_dpo.c | 130 ++ src/plugins/map/map_dpo.h | 43 + src/plugins/map/map_msg_enum.h | 31 + src/plugins/map/map_test.c | 504 ++++++++ src/plugins/map/test.c | 206 +++ src/vat/api_format.c | 373 ------ src/vnet.am | 31 - src/vnet/buffer.h | 8 - src/vnet/map/examples/gen-rules.py | 186 --- src/vnet/map/examples/health_check.c | 109 -- src/vnet/map/examples/test_map.py | 141 -- src/vnet/map/gen-rules.py | 107 -- src/vnet/map/ip4_map.c | 754 ----------- src/vnet/map/ip4_map_t.c | 898 ------------- src/vnet/map/ip6_map.c | 1265 ------------------ src/vnet/map/ip6_map_t.c | 1186 ----------------- src/vnet/map/map.api | 163 --- src/vnet/map/map.c | 2363 --------------------------------- src/vnet/map/map.h | 592 --------- src/vnet/map/map_api.c | 305 ----- src/vnet/map/map_doc.md | 69 - src/vnet/map/map_dpo.c | 130 -- src/vnet/map/map_dpo.h | 43 - src/vnet/map/test.c | 206 --- src/vnet/vnet_all_api_h.h | 1 - 42 files changed, 9136 insertions(+), 8930 deletions(-) create mode 100755 src/plugins/examples/gen-rules.py create mode 100644 src/plugins/examples/health_check.c create mode 100755 src/plugins/examples/test_map.py create mode 100644 src/plugins/map.am create mode 100755 src/plugins/map/gen-rules.py create mode 100644 src/plugins/map/ip4_map.c create mode 100644 src/plugins/map/ip4_map_t.c create mode 100644 src/plugins/map/ip6_map.c create mode 100644 src/plugins/map/ip6_map_t.c create mode 100644 src/plugins/map/map.api create mode 100644 src/plugins/map/map.c create mode 100644 src/plugins/map/map.h create mode 100644 src/plugins/map/map_all_api_h.h create mode 100644 src/plugins/map/map_api.c create mode 100644 src/plugins/map/map_doc.md create mode 100644 src/plugins/map/map_dpo.c create mode 100644 src/plugins/map/map_dpo.h create mode 100644 src/plugins/map/map_msg_enum.h create mode 100644 src/plugins/map/map_test.c create mode 100644 src/plugins/map/test.c delete mode 100755 src/vnet/map/examples/gen-rules.py delete mode 100644 src/vnet/map/examples/health_check.c delete mode 100755 src/vnet/map/examples/test_map.py delete mode 100755 src/vnet/map/gen-rules.py delete mode 100644 src/vnet/map/ip4_map.c delete mode 100644 src/vnet/map/ip4_map_t.c delete mode 100644 src/vnet/map/ip6_map.c delete mode 100644 src/vnet/map/ip6_map_t.c delete mode 100644 src/vnet/map/map.api delete mode 100644 src/vnet/map/map.c delete mode 100644 src/vnet/map/map.h delete mode 100644 src/vnet/map/map_api.c delete mode 100644 src/vnet/map/map_doc.md delete mode 100644 src/vnet/map/map_dpo.c delete mode 100644 src/vnet/map/map_dpo.h delete mode 100644 src/vnet/map/test.c (limited to 'src') diff --git a/src/configure.ac b/src/configure.ac index 2b6db9503a8..170a9083590 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -315,6 +315,7 @@ PLUGIN_ENABLED(marvell) PLUGIN_ENABLED(memif) PLUGIN_ENABLED(pppoe) PLUGIN_ENABLED(nat) +PLUGIN_ENABLED(map) PLUGIN_ENABLED(srv6ad) PLUGIN_ENABLED(srv6am) PLUGIN_ENABLED(srv6as) diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 9c891c54b3c..5d9ca8b305a 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -115,6 +115,10 @@ if ENABLE_NAT_PLUGIN include nat.am endif +if ENABLE_MAP_PLUGIN +include map.am +endif + if ENABLE_STN_PLUGIN include stn.am endif diff --git a/src/plugins/examples/gen-rules.py b/src/plugins/examples/gen-rules.py new file mode 100755 index 00000000000..7964aa9a359 --- /dev/null +++ b/src/plugins/examples/gen-rules.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ipaddress +import argparse +import sys + +# map add domain ip4-pfx ip6-pfx ::/0 ip6-src ea-bits-len 0 psid-offset 6 psid-len 6 +# map add rule index <0> psid ip6-dst + +def_ip4_pfx = '192.0.2.0/24' +def_ip6_pfx = '2001:db8::/32' +def_ip6_src = '2001:db8::1' +def_psid_offset = 6 +def_psid_len = 6 +def_ea_bits_len = 0 + +parser = argparse.ArgumentParser(description='MAP VPP configuration generator') +parser.add_argument('-t', action="store", dest="mapmode") +parser.add_argument('-f', action="store", dest="format", default="vpp") +parser.add_argument('--ip4-prefix', action="store", dest="ip4_pfx", default=def_ip4_pfx) +parser.add_argument('--ip6-prefix', action="store", dest="ip6_pfx", default=def_ip6_pfx) +parser.add_argument('--ip6-src', action="store", dest="ip6_src", default=def_ip6_src) +parser.add_argument('--psid-len', action="store", dest="psid_len", default=def_psid_len) +parser.add_argument('--psid-offset', action="store", dest="psid_offset", default=def_psid_offset) +parser.add_argument('--ea-bits-len', action="store", dest="ea_bits_len", default=def_ea_bits_len) +args = parser.parse_args() + +# +# Print domain +# +def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len): + if format == 'vpp': + print("map add domain ip4-pfx " + ip4_pfx + " ip6-pfx", ip6_pfx, "ip6-src " + ip6_src + + " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len) + if format == 'confd': + print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + ip6_src + + " ipv6-prefix " + ip6_pfx + " ipv4-prefix " + ip4_pfx + + " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len) + if format == 'xml': + print("") + print("", i, ""); + print(" " + ip6_src + "") + print(" " + ip6_pfx + "") + print(" " + ip4_pfx + "") + print(" ", eabits_len, "") + print(" ", psid_len, "") + print(" ", psid_offset, "") + +def domain_print_end(): + if format == 'xml': + print("") + +def rule_print(i, psid, dst): + if format == 'vpp': + print("map add rule index", i, "psid", psid, "ip6-dst", dst) + if format == 'confd': + print("binding", psid, "ipv6-addr", dst) + if format == 'xml': + print(" ") + print(" ", psid, "") + print(" ", dst, "") + print(" ") + +# +# Algorithmic mapping Shared IPv4 address +# +def algo(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): + domain_print(0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len) + domain_print_end() + +# +# 1:1 Full IPv4 address +# +def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): + ip4_pfx = ipaddress.ip_network(ip4_pfx_str) + ip6_src = ipaddress.ip_address(ip6_src_str) + ip6_dst = ipaddress.ip_network(ip6_pfx_str) + psid_len = 0 + mod = ip4_pfx.num_addresses / 1024 + + for i in range(ip4_pfx.num_addresses): + domain_print(i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0) + domain_print_end() + if ip6_src_ecmp and not i % mod: + ip6_src = ip6_src + 1 + +# +# 1:1 Shared IPv4 address, shared BR (16) VPP CLI +# +def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): + ip4_pfx = ipaddress.ip_network(ip4_pfx_str) + ip6_src = ipaddress.ip_address(ip6_src_str) + ip6_dst = ipaddress.ip_network(ip6_pfx_str) + mod = ip4_pfx.num_addresses / 1024 + + for i in range(ip4_pfx.num_addresses): + domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len) + for psid in range(0x1 << int(psid_len)): + rule_print(i, psid, str(ip6_dst[(i * (0x1< + + + urn:ietf:params:netconf:base:1.0 + + +]]>]]> + + + + + + + + + + + + + + ''') + +def xml_footer_print(): + print(''' + + + + + + + +]]>]]> + + + + + + +]]>]]> + ''') + + +format = args.format +if format == 'xml': + xml_header_print() +globals()[args.mapmode](args.ip4_pfx, args.ip6_pfx, args.ip6_src, args.ea_bits_len, args.psid_offset, args.psid_len) +if format == 'xml': + xml_footer_print() diff --git a/src/plugins/examples/health_check.c b/src/plugins/examples/health_check.c new file mode 100644 index 00000000000..5f0d85fec08 --- /dev/null +++ b/src/plugins/examples/health_check.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void +usage (void) { + fprintf(stderr, + "Usage: health_check" + " -d debug" + " -I interface" + "\n"); + exit(2); +} + +int +main (int argc, char **argv) +{ + int sd, ch; + uint8_t *opt, *pkt; + struct ifreq ifr; + char *interface = NULL; + bool debug = false; + + while ((ch = getopt(argc, argv, "h?" "I:" "d")) != EOF) { + switch(ch) { + case 'I': + interface = optarg; + break; + case 'd': + debug = true; + break; + default: + usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (!interface) + usage(); + + /* Request a socket descriptor sd. */ + if ((sd = socket (AF_INET6, SOCK_RAW, IPPROTO_IPIP)) < 0) { + perror ("Failed to get socket descriptor "); + exit (EXIT_FAILURE); + } + + memset(&ifr, 0, sizeof(ifr)); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", interface); + + /* Bind socket to interface of this node. */ + if (setsockopt (sd, SOL_SOCKET, SO_BINDTODEVICE, (void *) &ifr, sizeof (ifr)) < 0) { + perror ("SO_BINDTODEVICE failed"); + exit (EXIT_FAILURE); + } + if (debug) printf("Binding to interface %s\n", interface); + + while (1) { + struct sockaddr_in6 src_addr; + socklen_t addrlen = sizeof(src_addr); + char source[INET6_ADDRSTRLEN+1]; + int len; + uint8_t inpack[IP_MAXPACKET]; + + if ((len = recvfrom(sd, inpack, sizeof(inpack), 0, (struct sockaddr *)&src_addr, &addrlen)) < 0) { + perror("recvfrom failed "); + } + if (inet_ntop(AF_INET6, &src_addr.sin6_addr, source, INET6_ADDRSTRLEN) == NULL) { + perror("inet_ntop() failed."); + exit(EXIT_FAILURE); + } + + /* Reply */ + struct iphdr *ip = (struct iphdr *)inpack; + uint32_t saddr; + struct icmphdr *icmp; + + saddr = ip->saddr; + ip->saddr = ip->daddr; + ip->daddr = saddr; + + switch (ip->protocol) { + case 1: + if (debug) printf ("ICMP Echo request from %s\n", source); + icmp = (struct icmphdr *)&ip[1]; + icmp->type = ICMP_ECHOREPLY; + break; + default: + fprintf(stderr, "Unsupported protocol %d", ip->protocol); + } + if (len = sendto(sd, inpack, len, 0, (struct sockaddr *)&src_addr, addrlen) < 0) { + perror("sendto failed "); + } + } + + close (sd); + + return (EXIT_SUCCESS); +} diff --git a/src/plugins/examples/test_map.py b/src/plugins/examples/test_map.py new file mode 100755 index 00000000000..21388d49526 --- /dev/null +++ b/src/plugins/examples/test_map.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +import time,argparse,sys,cmd, unittest +from ipaddress import * + +parser = argparse.ArgumentParser(description='VPP MAP test') +parser.add_argument('-i', nargs='*', action="store", dest="inputdir") +args = parser.parse_args() + +for dir in args.inputdir: + sys.path.append(dir) +from vpp_papi import * + +# +# 1:1 Shared IPv4 address, shared BR (16) VPP CLI +# +def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): + ip4_pfx = ip_network(ip4_pfx_str) + ip6_src = ip_address(ip6_src_str) + ip6_dst = ip_network(ip6_pfx_str) + ip6_nul = IPv6Address(u'0::0') + mod = ip4_pfx.num_addresses / 1024 + + for i in range(ip4_pfx.num_addresses): + a = time.clock() + t = map_add_domain(0, ip6_nul.packed, ip4_pfx[i].packed, ip6_src.packed, 0, 32, 128, ea_bits_len, psid_offset, psid_len, 0, 0) + #print "Return from map_add_domain", t + if t == None: + print "map_add_domain failed" + continue + if t.retval != 0: + print "map_add_domain failed", t + continue + for psid in range(0x1 << int(psid_len)): + r = map_add_del_rule(0, t.index, 1, (ip6_dst[(i * (0x1<H', msg[0:2]) + size = unpack('>H', msg[2:4]) + print "Received", id, "of size", size + i += 1 + #del msg + continue + + #time.sleep(0.001) + return + +# Create RX thread +rxthread = RXThread() +rxthread.setDaemon(True) + +print "Connect", connect_to_vpe("client124") +import timeit +rxthread.start() +print "After thread started" + +#pneum_kill_thread() +print "After thread killed" + +#t = show_version(0) +#print "Result from show version", t + +print timeit.timeit('t = show_version(0)', number=1000, setup="from __main__ import show_version") +time.sleep(10) +#print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping") + + +disconnect_from_vpe() +sys.exit() + + +print t.program, t.version,t.builddate,t.builddirectory + +''' + +t = map_domain_dump(0) +if not t: + print('show map domain failed') + +for d in t: + print("IP6 prefix:",str(IPv6Address(d.ip6prefix))) + print( "IP4 prefix:",str(IPv4Address(d.ip4prefix))) +''' + +suite = unittest.TestLoader().loadTestsFromTestCase(TestMAP) +unittest.TextTestRunner(verbosity=2).run(suite) + +disconnect_from_vpe() + + diff --git a/src/plugins/map.am b/src/plugins/map.am new file mode 100644 index 00000000000..555d7006f49 --- /dev/null +++ b/src/plugins/map.am @@ -0,0 +1,39 @@ + +# Copyright (c) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppapitestplugins_LTLIBRARIES += map_test_plugin.la +vppplugins_LTLIBRARIES += map_plugin.la + +map_plugin_la_SOURCES = \ + map/ip4_map.c \ + map/ip4_map_t.c \ + map/ip6_map.c \ + map/ip6_map_t.c \ + map/map_api.c \ + map/map.c \ + map/map_dpo.c + +API_FILES += map/map.api + +nobase_apiinclude_HEADERS += \ + map/map_all_api_h.h \ + map/map_msg_enum.h \ + map/map.api.h \ + map/map_dpo.h \ + map/map.h + +map_test_plugin_la_SOURCES = \ + map/map_test.c + +# vi:syntax=automake diff --git a/src/plugins/map/gen-rules.py b/src/plugins/map/gen-rules.py new file mode 100755 index 00000000000..533a8e237f7 --- /dev/null +++ b/src/plugins/map/gen-rules.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python + +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import ipaddress +import argparse +import sys + +# map add domain ip4-pfx ip6-pfx ::/0 ip6-src ea-bits-len 0 psid-offset 6 psid-len 6 +# map add rule index <0> psid ip6-dst + +parser = argparse.ArgumentParser(description='MAP VPP configuration generator') +parser.add_argument('-t', action="store", dest="mapmode") +args = parser.parse_args() + +# +# 1:1 Shared IPv4 address, shared BR +# +def shared11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1< +#include + +vlib_node_registration_t ip4_map_reass_node; + +enum ip4_map_next_e +{ + IP4_MAP_NEXT_IP6_LOOKUP, +#ifdef MAP_SKIP_IP6_LOOKUP + IP4_MAP_NEXT_IP6_REWRITE, +#endif + IP4_MAP_NEXT_IP4_FRAGMENT, + IP4_MAP_NEXT_IP6_FRAGMENT, + IP4_MAP_NEXT_REASS, + IP4_MAP_NEXT_ICMP_ERROR, + IP4_MAP_NEXT_DROP, + IP4_MAP_N_NEXT, +}; + +enum ip4_map_reass_next_t +{ + IP4_MAP_REASS_NEXT_IP6_LOOKUP, + IP4_MAP_REASS_NEXT_IP4_FRAGMENT, + IP4_MAP_REASS_NEXT_DROP, + IP4_MAP_REASS_N_NEXT, +}; + +typedef struct +{ + u32 map_domain_index; + u16 port; + u8 cached; +} map_ip4_map_reass_trace_t; + +u8 * +format_ip4_map_reass_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + map_ip4_map_reass_trace_t *t = va_arg (*args, map_ip4_map_reass_trace_t *); + return format (s, "MAP domain index: %d L4 port: %u Status: %s", + t->map_domain_index, t->port, + t->cached ? "cached" : "forwarded"); +} + +static_always_inline u16 +ip4_map_port_and_security_check (map_domain_t * d, ip4_header_t * ip, + u32 * next, u8 * error) +{ + u16 port = 0; + + if (d->psid_length > 0) + { + if (ip4_get_fragment_offset (ip) == 0) + { + if (PREDICT_FALSE + ((ip->ip_version_and_header_length != 0x45) + || clib_host_to_net_u16 (ip->length) < 28)) + { + return 0; + } + port = ip4_get_port (ip, 0); + if (port) + { + /* Verify that port is not among the well-known ports */ + if ((d->psid_offset > 0) + && (clib_net_to_host_u16 (port) < + (0x1 << (16 - d->psid_offset)))) + { + *error = MAP_ERROR_ENCAP_SEC_CHECK; + } + else + { + if (ip4_get_fragment_more (ip)) + *next = IP4_MAP_NEXT_REASS; + return (port); + } + } + else + { + *error = MAP_ERROR_BAD_PROTOCOL; + } + } + else + { + *next = IP4_MAP_NEXT_REASS; + } + } + return (0); +} + +/* + * ip4_map_vtcfl + */ +static_always_inline u32 +ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p) +{ + map_main_t *mm = &map_main; + u8 tc = mm->tc_copy ? ip4->tos : mm->tc; + u32 vtcfl = 0x6 << 28; + vtcfl |= tc << 20; + vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff; + + return (clib_host_to_net_u32 (vtcfl)); +} + +static_always_inline bool +ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip) +{ +#ifdef MAP_SKIP_IP6_LOOKUP + if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei) + { + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = + pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index; + return (true); + } +#endif + return (false); +} + +/* + * ip4_map_ttl + */ +static inline void +ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error) +{ + i32 ttl = ip->ttl; + + /* Input node should have reject packets with ttl 0. */ + ASSERT (ip->ttl > 0); + + u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100); + checksum += checksum >= 0xffff; + ip->checksum = checksum; + ttl -= 1; + ip->ttl = ttl; + *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error; + + /* Verify checksum. */ + ASSERT (ip->checksum == ip4_header_checksum (ip)); +} + +static u32 +ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error) +{ + map_main_t *mm = &map_main; + + if (mm->frag_inner) + { + ip_frag_set_vnet_buffer (b, sizeof (ip6_header_t), mtu, + IP4_FRAG_NEXT_IP6_LOOKUP, + IP_FRAG_FLAG_IP6_HEADER); + return (IP4_MAP_NEXT_IP4_FRAGMENT); + } + else + { + if (df && !mm->frag_ignore_df) + { + icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + mtu); + vlib_buffer_advance (b, sizeof (ip6_header_t)); + *error = MAP_ERROR_DF_SET; + return (IP4_MAP_NEXT_ICMP_ERROR); + } + ip_frag_set_vnet_buffer (b, 0, mtu, IP6_FRAG_NEXT_IP6_LOOKUP, + IP_FRAG_FLAG_IP6_HEADER); + return (IP4_MAP_NEXT_IP6_FRAGMENT); + } +} + +/* + * ip4_map + */ +static uword +ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_map_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 thread_index = vlib_get_thread_index (); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Dual loop */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + map_domain_t *d0, *d1; + u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE; + ip4_header_t *ip40, *ip41; + u16 port0 = 0, port1 = 0; + ip6_header_t *ip6h0, *ip6h1; + u32 map_domain_index0 = ~0, map_domain_index1 = ~0; + u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 = + IP4_MAP_NEXT_IP6_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + /* IPv4 + 8 = 28. possibly plus -40 */ + CLIB_PREFETCH (p2->data - 40, 68, STORE); + CLIB_PREFETCH (p3->data - 40, 68, STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + ip40 = vlib_buffer_get_current (p0); + ip41 = vlib_buffer_get_current (p1); + map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (map_domain_index0); + map_domain_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + d1 = ip4_map_get_domain (map_domain_index1); + ASSERT (d0); + ASSERT (d1); + + /* + * Shared IPv4 address + */ + port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0); + port1 = ip4_map_port_and_security_check (d1, ip41, &next1, &error1); + + /* Decrement IPv4 TTL */ + ip4_map_decrement_ttl (ip40, &error0); + ip4_map_decrement_ttl (ip41, &error1); + bool df0 = + ip40->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + bool df1 = + ip41->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + + /* MAP calc */ + u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32); + u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32); + u16 dp40 = clib_net_to_host_u16 (port0); + u16 dp41 = clib_net_to_host_u16 (port1); + u64 dal60 = map_get_pfx (d0, da40, dp40); + u64 dal61 = map_get_pfx (d1, da41, dp41); + u64 dar60 = map_get_sfx (d0, da40, dp40); + u64 dar61 = map_get_sfx (d1, da41, dp41); + if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE + && next0 != IP4_MAP_NEXT_REASS) + error0 = MAP_ERROR_NO_BINDING; + if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE + && next1 != IP4_MAP_NEXT_REASS) + error1 = MAP_ERROR_NO_BINDING; + + /* construct ipv6 header */ + vlib_buffer_advance (p0, -sizeof (ip6_header_t)); + vlib_buffer_advance (p1, -sizeof (ip6_header_t)); + ip6h0 = vlib_buffer_get_current (p0); + ip6h1 = vlib_buffer_get_current (p1); + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + ip6h0->ip_version_traffic_class_and_flow_label = + ip4_map_vtcfl (ip40, p0); + ip6h1->ip_version_traffic_class_and_flow_label = + ip4_map_vtcfl (ip41, p1); + ip6h0->payload_length = ip40->length; + ip6h1->payload_length = ip41->length; + ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; + ip6h1->protocol = IP_PROTOCOL_IP_IN_IP; + ip6h0->hop_limit = 0x40; + ip6h1->hop_limit = 0x40; + ip6h0->src_address = d0->ip6_src; + ip6h1->src_address = d1->ip6_src; + ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60); + ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60); + ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61); + ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61); + + /* + * Determine next node. Can be one of: + * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + */ + if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) + { + if (PREDICT_FALSE + (d0->mtu + && (clib_net_to_host_u16 (ip6h0->payload_length) + + sizeof (*ip6h0) > d0->mtu))) + { + next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0); + } + else + { + next0 = + ip4_map_ip6_lookup_bypass (p0, + ip40) ? + IP4_MAP_NEXT_IP6_REWRITE : next0; + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, + thread_index, + map_domain_index0, 1, + clib_net_to_host_u16 + (ip6h0->payload_length) + + 40); + } + } + else + { + next0 = IP4_MAP_NEXT_DROP; + } + + /* + * Determine next node. Can be one of: + * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + */ + if (PREDICT_TRUE (error1 == MAP_ERROR_NONE)) + { + if (PREDICT_FALSE + (d1->mtu + && (clib_net_to_host_u16 (ip6h1->payload_length) + + sizeof (*ip6h1) > d1->mtu))) + { + next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1); + } + else + { + next1 = + ip4_map_ip6_lookup_bypass (p1, + ip41) ? + IP4_MAP_NEXT_IP6_REWRITE : next1; + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, + thread_index, + map_domain_index1, 1, + clib_net_to_host_u16 + (ip6h1->payload_length) + + 40); + } + } + else + { + next1 = IP4_MAP_NEXT_DROP; + } + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + } + if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) + { + map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr)); + tr->map_domain_index = map_domain_index1; + tr->port = port1; + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, + next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + map_domain_t *d0; + u8 error0 = MAP_ERROR_NONE; + ip4_header_t *ip40; + u16 port0 = 0; + ip6_header_t *ip6h0; + u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP; + u32 map_domain_index0 = ~0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip40 = vlib_buffer_get_current (p0); + map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (map_domain_index0); + ASSERT (d0); + + /* + * Shared IPv4 address + */ + port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0); + + /* Decrement IPv4 TTL */ + ip4_map_decrement_ttl (ip40, &error0); + bool df0 = + ip40->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + + /* MAP calc */ + u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32); + u16 dp40 = clib_net_to_host_u16 (port0); + u64 dal60 = map_get_pfx (d0, da40, dp40); + u64 dar60 = map_get_sfx (d0, da40, dp40); + if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE + && next0 != IP4_MAP_NEXT_REASS) + error0 = MAP_ERROR_NO_BINDING; + + /* construct ipv6 header */ + vlib_buffer_advance (p0, -(sizeof (ip6_header_t))); + ip6h0 = vlib_buffer_get_current (p0); + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + ip6h0->ip_version_traffic_class_and_flow_label = + ip4_map_vtcfl (ip40, p0); + ip6h0->payload_length = ip40->length; + ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; + ip6h0->hop_limit = 0x40; + ip6h0->src_address = d0->ip6_src; + ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60); + ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60); + + /* + * Determine next node. Can be one of: + * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + */ + if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) + { + if (PREDICT_FALSE + (d0->mtu + && (clib_net_to_host_u16 (ip6h0->payload_length) + + sizeof (*ip6h0) > d0->mtu))) + { + next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0); + } + else + { + next0 = + ip4_map_ip6_lookup_bypass (p0, + ip40) ? + IP4_MAP_NEXT_IP6_REWRITE : next0; + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, + thread_index, + map_domain_index0, 1, + clib_net_to_host_u16 + (ip6h0->payload_length) + + 40); + } + } + else + { + next0 = IP4_MAP_NEXT_DROP; + } + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + } + + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* + * ip4_map_reass + */ +static uword +ip4_map_reass (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_map_reass_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 thread_index = vlib_get_thread_index (); + u32 *fragments_to_drop = NULL; + u32 *fragments_to_loopback = NULL; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + map_domain_t *d0; + u8 error0 = MAP_ERROR_NONE; + ip4_header_t *ip40; + i32 port0 = 0; + ip6_header_t *ip60; + u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP; + u32 map_domain_index0; + u8 cached = 0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip60 = vlib_buffer_get_current (p0); + ip40 = (ip4_header_t *) (ip60 + 1); + map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (map_domain_index0); + + map_ip4_reass_lock (); + map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32, + ip40->dst_address.as_u32, + ip40->fragment_id, + ip40->protocol, + &fragments_to_drop); + if (PREDICT_FALSE (!r)) + { + // Could not create a caching entry + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } + else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40))) + { + if (r->port >= 0) + { + // We know the port already + port0 = r->port; + } + else if (map_ip4_reass_add_fragment (r, pi0)) + { + // Not enough space for caching + error0 = MAP_ERROR_FRAGMENT_MEMORY; + map_ip4_reass_free (r, &fragments_to_drop); + } + else + { + cached = 1; + } + } + else if ((port0 = ip4_get_port (ip40, 0)) == 0) + { + // Could not find port. We'll free the reassembly. + error0 = MAP_ERROR_BAD_PROTOCOL; + port0 = 0; + map_ip4_reass_free (r, &fragments_to_drop); + } + else + { + r->port = port0; + map_ip4_reass_get_fragments (r, &fragments_to_loopback); + } + +#ifdef MAP_IP4_REASS_COUNT_BYTES + if (!cached && r) + { + r->forwarded += clib_host_to_net_u16 (ip40->length) - 20; + if (!ip4_get_fragment_more (ip40)) + r->expected_total = + ip4_get_fragment_offset (ip40) * 8 + + clib_host_to_net_u16 (ip40->length) - 20; + if (r->forwarded >= r->expected_total) + map_ip4_reass_free (r, &fragments_to_drop); + } +#endif + + map_ip4_reass_unlock (); + + // NOTE: Most operations have already been performed by ip4_map + // All we need is the right destination address + ip60->dst_address.as_u64[0] = + map_get_pfx_net (d0, ip40->dst_address.as_u32, port0); + ip60->dst_address.as_u64[1] = + map_get_sfx_net (d0, ip40->dst_address.as_u32, port0); + + if (PREDICT_FALSE + (d0->mtu + && (clib_net_to_host_u16 (ip60->payload_length) + + sizeof (*ip60) > d0->mtu))) + { + vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60); + vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer (p0)->ip_frag.mtu = d0->mtu; + vnet_buffer (p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; + next0 = IP4_MAP_REASS_NEXT_IP4_FRAGMENT; + } + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_ip4_map_reass_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + tr->cached = cached; + } + + if (cached) + { + //Dequeue the packet + n_left_to_next++; + to_next--; + } + else + { + if (error0 == MAP_ERROR_NONE) + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, + thread_index, + map_domain_index0, 1, + clib_net_to_host_u16 + (ip60->payload_length) + 40); + next0 = + (error0 == MAP_ERROR_NONE) ? next0 : IP4_MAP_REASS_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } + + //Loopback when we reach the end of the inpu vector + if (n_left_from == 0 && vec_len (fragments_to_loopback)) + { + from = vlib_frame_vector_args (frame); + u32 len = vec_len (fragments_to_loopback); + if (len <= VLIB_FRAME_SIZE) + { + clib_memcpy (from, fragments_to_loopback, + sizeof (u32) * len); + n_left_from = len; + vec_reset_length (fragments_to_loopback); + } + else + { + clib_memcpy (from, + fragments_to_loopback + (len - + VLIB_FRAME_SIZE), + sizeof (u32) * VLIB_FRAME_SIZE); + n_left_from = VLIB_FRAME_SIZE; + _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; + } + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + map_send_all_to_node (vm, fragments_to_drop, node, + &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], + IP4_MAP_REASS_NEXT_DROP); + + vec_free (fragments_to_drop); + vec_free (fragments_to_loopback); + return frame->n_vectors; +} + +static char *map_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip4_map_node) = { + .function = ip4_map, + .name = "ip4-map", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + + .n_next_nodes = IP4_MAP_N_NEXT, + .next_nodes = { + [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup", +#ifdef MAP_SKIP_IP6_LOOKUP + [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance", +#endif + [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag", + [IP4_MAP_NEXT_REASS] = "ip4-map-reass", + [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [IP4_MAP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip4_map_reass_node) = { + .function = ip4_map_reass, + .name = "ip4-map-reass", + .vector_size = sizeof(u32), + .format_trace = format_ip4_map_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + + .n_next_nodes = IP4_MAP_REASS_N_NEXT, + .next_nodes = { + [IP4_MAP_REASS_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAP_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP4_MAP_REASS_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c new file mode 100644 index 00000000000..ab4b46c916f --- /dev/null +++ b/src/plugins/map/ip4_map_t.c @@ -0,0 +1,898 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "map.h" + +#include +#include + +#define IP4_MAP_T_DUAL_LOOP 1 + +typedef enum +{ + IP4_MAPT_NEXT_MAPT_TCP_UDP, + IP4_MAPT_NEXT_MAPT_ICMP, + IP4_MAPT_NEXT_MAPT_FRAGMENTED, + IP4_MAPT_NEXT_DROP, + IP4_MAPT_N_NEXT +} ip4_mapt_next_t; + +typedef enum +{ + IP4_MAPT_ICMP_NEXT_IP6_LOOKUP, + IP4_MAPT_ICMP_NEXT_IP6_FRAG, + IP4_MAPT_ICMP_NEXT_DROP, + IP4_MAPT_ICMP_N_NEXT +} ip4_mapt_icmp_next_t; + +typedef enum +{ + IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP, + IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG, + IP4_MAPT_TCP_UDP_NEXT_DROP, + IP4_MAPT_TCP_UDP_N_NEXT +} ip4_mapt_tcp_udp_next_t; + +typedef enum +{ + IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP, + IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG, + IP4_MAPT_FRAGMENTED_NEXT_DROP, + IP4_MAPT_FRAGMENTED_N_NEXT +} ip4_mapt_fragmented_next_t; + +//This is used to pass information within the buffer data. +//Buffer structure being too small to contain big structures like this. +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + ip6_address_t daddr; + ip6_address_t saddr; + //IPv6 header + Fragmentation header will be here + //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4) + u8 unused[28]; +}) ip4_mapt_pseudo_header_t; +/* *INDENT-ON* */ + + +static_always_inline int +ip4_map_fragment_cache (ip4_header_t * ip4, u16 port) +{ + u32 *ignore = NULL; + map_ip4_reass_lock (); + map_ip4_reass_t *r = + map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32, + ip4->fragment_id, + (ip4->protocol == + IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, + &ignore); + if (r) + r->port = port; + + map_ip4_reass_unlock (); + return !r; +} + +static_always_inline i32 +ip4_map_fragment_get_port (ip4_header_t * ip4) +{ + u32 *ignore = NULL; + map_ip4_reass_lock (); + map_ip4_reass_t *r = + map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32, + ip4->fragment_id, + (ip4->protocol == + IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, + &ignore); + i32 ret = r ? r->port : -1; + map_ip4_reass_unlock (); + return ret; +} + +typedef struct +{ + map_domain_t *d; + u16 id; +} icmp_to_icmp6_ctx_t; + +static int +ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) +{ + icmp_to_icmp6_ctx_t *ctx = arg; + map_main_t *mm = &map_main; + + if (mm->is_ce) + { + ip6->src_address.as_u64[0] = + map_get_pfx_net (ctx->d, ip4->src_address.as_u32, ctx->id); + ip6->src_address.as_u64[1] = + map_get_sfx_net (ctx->d, ip4->src_address.as_u32, ctx->id); + ip4_map_t_embedded_address (ctx->d, &ip6->dst_address, + &ip4->dst_address); + } + else + { + ip4_map_t_embedded_address (ctx->d, &ip6->src_address, + &ip4->src_address); + ip6->dst_address.as_u64[0] = + map_get_pfx_net (ctx->d, ip4->dst_address.as_u32, ctx->id); + ip6->dst_address.as_u64[1] = + map_get_sfx_net (ctx->d, ip4->dst_address.as_u32, ctx->id); + } + + return 0; +} + +static int +ip4_to_ip6_set_inner_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, + void *arg) +{ + icmp_to_icmp6_ctx_t *ctx = arg; + map_main_t *mm = &map_main; + + if (mm->is_ce) + { + //Note that the destination address is within the domain + //while the source address is the one outside the domain + ip4_map_t_embedded_address (ctx->d, &ip6->src_address, + &ip4->src_address); + ip6->dst_address.as_u64[0] = + map_get_pfx_net (ctx->d, ip4->dst_address.as_u32, ctx->id); + ip6->dst_address.as_u64[1] = + map_get_sfx_net (ctx->d, ip4->dst_address.as_u32, ctx->id); + } + else + { + //Note that the source address is within the domain + //while the destination address is the one outside the domain + ip4_map_t_embedded_address (ctx->d, &ip6->dst_address, + &ip4->dst_address); + ip6->src_address.as_u64[0] = + map_get_pfx_net (ctx->d, ip4->src_address.as_u32, ctx->id); + ip6->src_address.as_u64[1] = + map_get_sfx_net (ctx->d, ip4->src_address.as_u32, ctx->id); + } + + return 0; +} + +static uword +ip4_map_t_icmp (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_map_t_icmp_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 thread_index = vlib_get_thread_index (); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + ip4_mapt_icmp_next_t next0; + u8 error0; + map_domain_t *d0; + u16 len0; + icmp_to_icmp6_ctx_t ctx0; + ip4_header_t *ip40; + icmp46_header_t *icmp0; + + next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP; + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer (vm, pi0); + vlib_buffer_advance (p0, sizeof (ip4_mapt_pseudo_header_t)); //The pseudo-header is not used + len0 = + clib_net_to_host_u16 (((ip4_header_t *) + vlib_buffer_get_current (p0))->length); + d0 = + pool_elt_at_index (map_main.domains, + vnet_buffer (p0)->map_t.map_domain_index); + + ip40 = vlib_buffer_get_current (p0); + icmp0 = (icmp46_header_t *) (ip40 + 1); + + ctx0.id = ip4_get_port (ip40, icmp0->type == ICMP6_echo_request); + ctx0.d = d0; + if (ctx0.id == 0) + { + // In case of 1:1 mapping, we don't care about the port + if (!(d0->ea_bits_len == 0 && d0->rules)) + { + error0 = MAP_ERROR_ICMP; + goto err0; + } + } + + if (icmp_to_icmp6 + (p0, ip4_to_ip6_set_icmp_cb, &ctx0, + ip4_to_ip6_set_inner_icmp_cb, &ctx0)) + { + error0 = MAP_ERROR_ICMP; + goto err0; + } + + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG; + } + err0: + if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) + { + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, + thread_index, + vnet_buffer (p0)-> + map_t.map_domain_index, 1, + len0); + } + else + { + next0 = IP4_MAPT_ICMP_NEXT_DROP; + } + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static int +ip4_to_ip6_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *ctx) +{ + ip4_mapt_pseudo_header_t *pheader = ctx; + + ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0]; + ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1]; + ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0]; + ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1]; + + return 0; +} + +static uword +ip4_map_t_fragmented (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_map_t_fragmented_node.index); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + ip4_mapt_pseudo_header_t *pheader0; + ip4_mapt_fragmented_next_t next0; + + next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP; + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + //Accessing pseudo header + pheader0 = vlib_buffer_get_current (p0); + vlib_buffer_advance (p0, sizeof (*pheader0)); + + if (ip4_to_ip6_fragmented (p0, ip4_to_ip6_set_cb, pheader0)) + { + p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; + next0 = IP4_MAPT_FRAGMENTED_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.next_index = + IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG; + } + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip4_map_t_tcp_udp (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_map_t_tcp_udp_node.index); + + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP4_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip4_mapt_pseudo_header_t *pheader0, *pheader1; + ip4_mapt_tcp_udp_next_t next0, next1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; + next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + //Accessing pseudo header + pheader0 = vlib_buffer_get_current (p0); + pheader1 = vlib_buffer_get_current (p1); + vlib_buffer_advance (p0, sizeof (*pheader0)); + vlib_buffer_advance (p1, sizeof (*pheader1)); + + if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0)) + { + p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next0 = IP4_MAPT_TCP_UDP_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.next_index = + IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } + } + + if (ip4_to_ip6_tcp_udp (p1, ip4_to_ip6_set_cb, pheader1)) + { + p1->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next1 = IP4_MAPT_TCP_UDP_NEXT_DROP; + } + else + { + if (vnet_buffer (p1)->map_t.mtu < p1->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p1)->ip_frag.header_offset = 0; + vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; + vnet_buffer (p1)->ip_frag.next_index = + IP6_FRAG_NEXT_IP6_LOOKUP; + next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, pi0, pi1, + next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + ip4_mapt_pseudo_header_t *pheader0; + ip4_mapt_tcp_udp_next_t next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; + p0 = vlib_get_buffer (vm, pi0); + + //Accessing pseudo header + pheader0 = vlib_buffer_get_current (p0); + vlib_buffer_advance (p0, sizeof (*pheader0)); + + if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0)) + { + p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next0 = IP4_MAPT_TCP_UDP_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.next_index = + IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static_always_inline void +ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, + ip4_header_t * ip40, u16 ip4_len0, i32 * dst_port0, + u8 * error0, ip4_mapt_next_t * next0) +{ + map_main_t *mm = &map_main; + u32 port_offset; + + if (mm->is_ce) + port_offset = 0; + else + port_offset = 2; + + if (PREDICT_FALSE (ip4_get_fragment_offset (ip40))) + { + *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED; + if (d0->ea_bits_len == 0 && d0->rules) + { + *dst_port0 = 0; + } + else + { + *dst_port0 = ip4_map_fragment_get_port (ip40); + *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0; + } + } + else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP)) + { + vnet_buffer (p0)->map_t.checksum_offset = 36; + *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; + *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0; + *dst_port0 = + (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + port_offset)); + } + else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_UDP)) + { + vnet_buffer (p0)->map_t.checksum_offset = 26; + *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; + *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0; + *dst_port0 = + (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + port_offset)); + } + else if (ip40->protocol == IP_PROTOCOL_ICMP) + { + *next0 = IP4_MAPT_NEXT_MAPT_ICMP; + if (d0->ea_bits_len == 0 && d0->rules) + *dst_port0 = 0; + else if (((icmp46_header_t *) u8_ptr_add (ip40, sizeof (*ip40)))->code + == ICMP4_echo_reply + || ((icmp46_header_t *) + u8_ptr_add (ip40, + sizeof (*ip40)))->code == ICMP4_echo_request) + *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 6)); + } + else + { + *error0 = MAP_ERROR_BAD_PROTOCOL; + } +} + +static uword +ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_map_t_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 thread_index = vlib_get_thread_index (); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP4_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip4_header_t *ip40, *ip41; + map_domain_t *d0, *d1; + ip4_mapt_next_t next0 = 0, next1 = 0; + u16 ip4_len0, ip4_len1; + u8 error0, error1; + i32 map_port0, map_port1; + ip4_mapt_pseudo_header_t *pheader0, *pheader1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + error0 = MAP_ERROR_NONE; + error1 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + ip40 = vlib_buffer_get_current (p0); + ip41 = vlib_buffer_get_current (p1); + ip4_len0 = clib_host_to_net_u16 (ip40->length); + ip4_len1 = clib_host_to_net_u16 (ip41->length); + + if (PREDICT_FALSE (p0->current_length < ip4_len0 || + ip40->ip_version_and_header_length != 0x45)) + { + error0 = MAP_ERROR_UNKNOWN; + next0 = IP4_MAPT_NEXT_DROP; + } + + if (PREDICT_FALSE (p1->current_length < ip4_len1 || + ip41->ip_version_and_header_length != 0x45)) + { + error1 = MAP_ERROR_UNKNOWN; + next1 = IP4_MAPT_NEXT_DROP; + } + + vnet_buffer (p0)->map_t.map_domain_index = + vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index); + vnet_buffer (p1)->map_t.map_domain_index = + vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + d1 = ip4_map_get_domain (vnet_buffer (p1)->map_t.map_domain_index); + + vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; + + map_port0 = -1; + map_port1 = -1; + + ip4_map_t_classify (p0, d0, ip40, ip4_len0, &map_port0, &error0, + &next0); + ip4_map_t_classify (p1, d1, ip41, ip4_len1, &map_port1, &error1, + &next1); + + //Add MAP-T pseudo header in front of the packet + vlib_buffer_advance (p0, -sizeof (*pheader0)); + vlib_buffer_advance (p1, -sizeof (*pheader1)); + pheader0 = vlib_buffer_get_current (p0); + pheader1 = vlib_buffer_get_current (p1); + + //Save addresses within the packet + if (mm->is_ce) + { + ip4_map_t_embedded_address (d0, &pheader0->daddr, + &ip40->dst_address); + ip4_map_t_embedded_address (d1, &pheader1->daddr, + &ip41->dst_address); + pheader0->saddr.as_u64[0] = + map_get_pfx_net (d0, ip40->src_address.as_u32, + (u16) map_port0); + pheader0->saddr.as_u64[1] = + map_get_sfx_net (d0, ip40->src_address.as_u32, + (u16) map_port0); + pheader1->saddr.as_u64[0] = + map_get_pfx_net (d1, ip41->src_address.as_u32, + (u16) map_port1); + pheader1->saddr.as_u64[1] = + map_get_sfx_net (d1, ip41->src_address.as_u32, + (u16) map_port1); + } + else + { + ip4_map_t_embedded_address (d0, &pheader0->saddr, + &ip40->src_address); + ip4_map_t_embedded_address (d1, &pheader1->saddr, + &ip41->src_address); + pheader0->daddr.as_u64[0] = + map_get_pfx_net (d0, ip40->dst_address.as_u32, + (u16) map_port0); + pheader0->daddr.as_u64[1] = + map_get_sfx_net (d0, ip40->dst_address.as_u32, + (u16) map_port0); + pheader1->daddr.as_u64[0] = + map_get_pfx_net (d1, ip41->dst_address.as_u32, + (u16) map_port1); + pheader1->daddr.as_u64[1] = + map_get_sfx_net (d1, ip41->dst_address.as_u32, + (u16) map_port1); + } + + if (PREDICT_FALSE + (ip4_is_first_fragment (ip40) && (map_port0 != -1) + && (d0->ea_bits_len != 0 || !d0->rules) + && ip4_map_fragment_cache (ip40, map_port0))) + { + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } + + if (PREDICT_FALSE + (ip4_is_first_fragment (ip41) && (map_port1 != -1) + && (d1->ea_bits_len != 0 || !d1->rules) + && ip4_map_fragment_cache (ip41, map_port1))) + { + error1 = MAP_ERROR_FRAGMENT_MEMORY; + } + + if (PREDICT_TRUE + (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) + { + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, + thread_index, + vnet_buffer (p0)-> + map_t.map_domain_index, 1, + clib_net_to_host_u16 + (ip40->length)); + } + + if (PREDICT_TRUE + (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP)) + { + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, + thread_index, + vnet_buffer (p1)-> + map_t.map_domain_index, 1, + clib_net_to_host_u16 + (ip41->length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; + next1 = (error1 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next1; + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, + next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + ip4_header_t *ip40; + map_domain_t *d0; + ip4_mapt_next_t next0; + u16 ip4_len0; + u8 error0; + i32 map_port0; + ip4_mapt_pseudo_header_t *pheader0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer (vm, pi0); + ip40 = vlib_buffer_get_current (p0); + ip4_len0 = clib_host_to_net_u16 (ip40->length); + if (PREDICT_FALSE (p0->current_length < ip4_len0 || + ip40->ip_version_and_header_length != 0x45)) + { + error0 = MAP_ERROR_UNKNOWN; + next0 = IP4_MAPT_NEXT_DROP; + } + + vnet_buffer (p0)->map_t.map_domain_index = + vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index); + + vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + + map_port0 = -1; + ip4_map_t_classify (p0, d0, ip40, ip4_len0, &map_port0, &error0, + &next0); + + //Add MAP-T pseudo header in front of the packet + vlib_buffer_advance (p0, -sizeof (*pheader0)); + pheader0 = vlib_buffer_get_current (p0); + + //Save addresses within the packet + if (mm->is_ce) + { + ip4_map_t_embedded_address (d0, &pheader0->daddr, + &ip40->dst_address); + pheader0->saddr.as_u64[0] = + map_get_pfx_net (d0, ip40->src_address.as_u32, + (u16) map_port0); + pheader0->saddr.as_u64[1] = + map_get_sfx_net (d0, ip40->src_address.as_u32, + (u16) map_port0); + } + else + { + ip4_map_t_embedded_address (d0, &pheader0->saddr, + &ip40->src_address); + pheader0->daddr.as_u64[0] = + map_get_pfx_net (d0, ip40->dst_address.as_u32, + (u16) map_port0); + pheader0->daddr.as_u64[1] = + map_get_sfx_net (d0, ip40->dst_address.as_u32, + (u16) map_port0); + } + + //It is important to cache at this stage because the result might be necessary + //for packets within the same vector. + //Actually, this approach even provides some limited out-of-order fragments support + if (PREDICT_FALSE + (ip4_is_first_fragment (ip40) && (map_port0 != -1) + && (d0->ea_bits_len != 0 || !d0->rules) + && ip4_map_fragment_cache (ip40, map_port0))) + { + error0 = MAP_ERROR_UNKNOWN; + } + + if (PREDICT_TRUE + (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) + { + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, + thread_index, + vnet_buffer (p0)-> + map_t.map_domain_index, 1, + clib_net_to_host_u16 + (ip40->length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static char *map_t_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = { + .function = ip4_map_t_fragmented, + .name = "ip4-map-t-fragmented", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT, + .next_nodes = { + [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, + [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = { + .function = ip4_map_t_icmp, + .name = "ip4-map-t-icmp", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_ICMP_N_NEXT, + .next_nodes = { + [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, + [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = { + .function = ip4_map_t_tcp_udp, + .name = "ip4-map-t-tcp-udp", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT, + .next_nodes = { + [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, + [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip4_map_t_node) = { + .function = ip4_map_t, + .name = "ip4-map-t", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_N_NEXT, + .next_nodes = { + [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp", + [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp", + [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented", + [IP4_MAPT_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c new file mode 100644 index 00000000000..5cdd9d94582 --- /dev/null +++ b/src/plugins/map/ip6_map.c @@ -0,0 +1,1265 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "map.h" + +#include +#include +#include + +enum ip6_map_next_e +{ + IP6_MAP_NEXT_IP4_LOOKUP, +#ifdef MAP_SKIP_IP6_LOOKUP + IP6_MAP_NEXT_IP4_REWRITE, +#endif + IP6_MAP_NEXT_IP6_REASS, + IP6_MAP_NEXT_IP4_REASS, + IP6_MAP_NEXT_IP4_FRAGMENT, + IP6_MAP_NEXT_IP6_ICMP_RELAY, + IP6_MAP_NEXT_IP6_LOCAL, + IP6_MAP_NEXT_DROP, + IP6_MAP_NEXT_ICMP, + IP6_MAP_N_NEXT, +}; + +enum ip6_map_ip6_reass_next_e +{ + IP6_MAP_IP6_REASS_NEXT_IP6_MAP, + IP6_MAP_IP6_REASS_NEXT_DROP, + IP6_MAP_IP6_REASS_N_NEXT, +}; + +enum ip6_map_ip4_reass_next_e +{ + IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP, + IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT, + IP6_MAP_IP4_REASS_NEXT_DROP, + IP6_MAP_IP4_REASS_N_NEXT, +}; + +enum ip6_icmp_relay_next_e +{ + IP6_ICMP_RELAY_NEXT_IP4_LOOKUP, + IP6_ICMP_RELAY_NEXT_DROP, + IP6_ICMP_RELAY_N_NEXT, +}; + +vlib_node_registration_t ip6_map_ip4_reass_node; +vlib_node_registration_t ip6_map_ip6_reass_node; +static vlib_node_registration_t ip6_map_icmp_relay_node; + +typedef struct +{ + u32 map_domain_index; + u16 port; + u8 cached; +} map_ip6_map_ip4_reass_trace_t; + +u8 * +format_ip6_map_ip4_reass_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + map_ip6_map_ip4_reass_trace_t *t = + va_arg (*args, map_ip6_map_ip4_reass_trace_t *); + return format (s, "MAP domain index: %d L4 port: %u Status: %s", + t->map_domain_index, t->port, + t->cached ? "cached" : "forwarded"); +} + +typedef struct +{ + u16 offset; + u16 frag_len; + u8 out; +} map_ip6_map_ip6_reass_trace_t; + +u8 * +format_ip6_map_ip6_reass_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + map_ip6_map_ip6_reass_trace_t *t = + va_arg (*args, map_ip6_map_ip6_reass_trace_t *); + return format (s, "Offset: %d Fragment length: %d Status: %s", t->offset, + t->frag_len, t->out ? "out" : "in"); +} + +/* + * ip6_map_sec_check + */ +static_always_inline bool +ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4, + ip6_header_t * ip6) +{ + u16 sp4 = clib_net_to_host_u16 (port); + u32 sa4 = clib_net_to_host_u32 (ip4->src_address.as_u32); + u64 sal6 = map_get_pfx (d, sa4, sp4); + u64 sar6 = map_get_sfx (d, sa4, sp4); + + if (PREDICT_FALSE + (sal6 != clib_net_to_host_u64 (ip6->src_address.as_u64[0]) + || sar6 != clib_net_to_host_u64 (ip6->src_address.as_u64[1]))) + return (false); + return (true); +} + +static_always_inline void +ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4, + ip6_header_t * ip6, u32 * next, u8 * error) +{ + map_main_t *mm = &map_main; + if (d->ea_bits_len || d->rules) + { + if (d->psid_length > 0) + { + if (!ip4_is_fragment (ip4)) + { + u16 port = ip4_get_port (ip4, 1); + if (port) + { + if (mm->sec_check) + *error = + ip6_map_sec_check (d, port, ip4, + ip6) ? MAP_ERROR_NONE : + MAP_ERROR_DECAP_SEC_CHECK; + } + else + { + *error = MAP_ERROR_BAD_PROTOCOL; + } + } + else + { + *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next; + } + } + } +} + +static_always_inline bool +ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip) +{ +#ifdef MAP_SKIP_IP6_LOOKUP + if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei) + { + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = + pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index; + return (true); + } +#endif + return (false); +} + +/* + * ip6_map + */ +static uword +ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_node.index); + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 thread_index = vlib_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Dual loop */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + u8 error0 = MAP_ERROR_NONE; + u8 error1 = MAP_ERROR_NONE; + map_domain_t *d0 = 0, *d1 = 0; + ip4_header_t *ip40, *ip41; + ip6_header_t *ip60, *ip61; + u16 port0 = 0, port1 = 0; + u32 map_domain_index0 = ~0, map_domain_index1 = ~0; + u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP; + u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* IPv6 + IPv4 header + 8 bytes of ULP */ + CLIB_PREFETCH (p2->data, 68, LOAD); + CLIB_PREFETCH (p3->data, 68, LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + ip60 = vlib_buffer_get_current (p0); + ip61 = vlib_buffer_get_current (p1); + vlib_buffer_advance (p0, sizeof (ip6_header_t)); + vlib_buffer_advance (p1, sizeof (ip6_header_t)); + ip40 = vlib_buffer_get_current (p0); + ip41 = vlib_buffer_get_current (p1); + + /* + * Encapsulated IPv4 packet + * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled + * - Lookup/Rewrite or Fragment node in case of packet > MTU + * Fragmented IPv6 packet + * ICMP IPv6 packet + * - Error -> Pass to ICMPv6/ICMPv4 relay + * - Info -> Pass to IPv6 local + * Anything else -> drop + */ + if (PREDICT_TRUE + (ip60->protocol == IP_PROTOCOL_IP_IN_IP + && clib_net_to_host_u16 (ip60->payload_length) > 20)) + { + d0 = + ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & ip40-> + src_address.as_u32, &map_domain_index0, + &error0); + } + else if (ip60->protocol == IP_PROTOCOL_ICMP6 && + clib_net_to_host_u16 (ip60->payload_length) > + sizeof (icmp46_header_t)) + { + icmp46_header_t *icmp = (void *) (ip60 + 1); + next0 = (icmp->type == ICMP6_echo_request + || icmp->type == + ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL : + IP6_MAP_NEXT_IP6_ICMP_RELAY; + } + else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + next0 = IP6_MAP_NEXT_IP6_REASS; + } + else + { + error0 = MAP_ERROR_BAD_PROTOCOL; + } + if (PREDICT_TRUE + (ip61->protocol == IP_PROTOCOL_IP_IN_IP + && clib_net_to_host_u16 (ip61->payload_length) > 20)) + { + d1 = + ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & ip41-> + src_address.as_u32, &map_domain_index1, + &error1); + } + else if (ip61->protocol == IP_PROTOCOL_ICMP6 && + clib_net_to_host_u16 (ip61->payload_length) > + sizeof (icmp46_header_t)) + { + icmp46_header_t *icmp = (void *) (ip61 + 1); + next1 = (icmp->type == ICMP6_echo_request + || icmp->type == + ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL : + IP6_MAP_NEXT_IP6_ICMP_RELAY; + } + else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + next1 = IP6_MAP_NEXT_IP6_REASS; + } + else + { + error1 = MAP_ERROR_BAD_PROTOCOL; + } + + if (d0) + { + /* MAP inbound security check */ + ip6_map_security_check (d0, ip40, ip60, &next0, &error0); + + if (PREDICT_TRUE (error0 == MAP_ERROR_NONE && + next0 == IP6_MAP_NEXT_IP4_LOOKUP)) + { + if (PREDICT_FALSE + (d0->mtu + && (clib_host_to_net_u16 (ip40->length) > d0->mtu))) + { + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.flags = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer (p0)->ip_frag.mtu = d0->mtu; + next0 = IP6_MAP_NEXT_IP4_FRAGMENT; + } + else + { + next0 = + ip6_map_ip4_lookup_bypass (p0, + ip40) ? + IP6_MAP_NEXT_IP4_REWRITE : next0; + } + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, + thread_index, + map_domain_index0, 1, + clib_net_to_host_u16 + (ip40->length)); + } + } + if (d1) + { + /* MAP inbound security check */ + ip6_map_security_check (d1, ip41, ip61, &next1, &error1); + + if (PREDICT_TRUE (error1 == MAP_ERROR_NONE && + next1 == IP6_MAP_NEXT_IP4_LOOKUP)) + { + if (PREDICT_FALSE + (d1->mtu + && (clib_host_to_net_u16 (ip41->length) > d1->mtu))) + { + vnet_buffer (p1)->ip_frag.header_offset = 0; + vnet_buffer (p1)->ip_frag.flags = 0; + vnet_buffer (p1)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer (p1)->ip_frag.mtu = d1->mtu; + next1 = IP6_MAP_NEXT_IP4_FRAGMENT; + } + else + { + next1 = + ip6_map_ip4_lookup_bypass (p1, + ip41) ? + IP6_MAP_NEXT_IP4_REWRITE : next1; + } + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, + thread_index, + map_domain_index1, 1, + clib_net_to_host_u16 + (ip41->length)); + } + } + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + } + + if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) + { + map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr)); + tr->map_domain_index = map_domain_index1; + tr->port = port1; + } + + if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled) + { + /* Set ICMP parameters */ + vlib_buffer_advance (p0, -sizeof (ip6_header_t)); + icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable, + ICMP6_destination_unreachable_source_address_failed_policy, + 0); + next0 = IP6_MAP_NEXT_ICMP; + } + else + { + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP; + } + + if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled) + { + /* Set ICMP parameters */ + vlib_buffer_advance (p1, -sizeof (ip6_header_t)); + icmp6_error_set_vnet_buffer (p1, ICMP6_destination_unreachable, + ICMP6_destination_unreachable_source_address_failed_policy, + 0); + next1 = IP6_MAP_NEXT_ICMP; + } + else + { + next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP; + } + + /* Reset packet */ + if (next0 == IP6_MAP_NEXT_IP6_LOCAL) + vlib_buffer_advance (p0, -sizeof (ip6_header_t)); + if (next1 == IP6_MAP_NEXT_IP6_LOCAL) + vlib_buffer_advance (p1, -sizeof (ip6_header_t)); + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, + next1); + } + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + map_domain_t *d0 = 0; + ip4_header_t *ip40; + ip6_header_t *ip60; + i32 port0 = 0; + u32 map_domain_index0 = ~0; + u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip60 = vlib_buffer_get_current (p0); + vlib_buffer_advance (p0, sizeof (ip6_header_t)); + ip40 = vlib_buffer_get_current (p0); + + /* + * Encapsulated IPv4 packet + * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled + * - Lookup/Rewrite or Fragment node in case of packet > MTU + * Fragmented IPv6 packet + * ICMP IPv6 packet + * - Error -> Pass to ICMPv6/ICMPv4 relay + * - Info -> Pass to IPv6 local + * Anything else -> drop + */ + if (PREDICT_TRUE + (ip60->protocol == IP_PROTOCOL_IP_IN_IP + && clib_net_to_host_u16 (ip60->payload_length) > 20)) + { + d0 = + ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & ip40-> + src_address.as_u32, &map_domain_index0, + &error0); + } + else if (ip60->protocol == IP_PROTOCOL_ICMP6 && + clib_net_to_host_u16 (ip60->payload_length) > + sizeof (icmp46_header_t)) + { + icmp46_header_t *icmp = (void *) (ip60 + 1); + next0 = (icmp->type == ICMP6_echo_request + || icmp->type == + ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL : + IP6_MAP_NEXT_IP6_ICMP_RELAY; + } + else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION && + (((ip6_frag_hdr_t *) (ip60 + 1))->next_hdr == + IP_PROTOCOL_IP_IN_IP)) + { + next0 = IP6_MAP_NEXT_IP6_REASS; + } + else + { + error0 = MAP_ERROR_BAD_PROTOCOL; + } + + if (d0) + { + /* MAP inbound security check */ + ip6_map_security_check (d0, ip40, ip60, &next0, &error0); + + if (PREDICT_TRUE (error0 == MAP_ERROR_NONE && + next0 == IP6_MAP_NEXT_IP4_LOOKUP)) + { + if (PREDICT_FALSE + (d0->mtu + && (clib_host_to_net_u16 (ip40->length) > d0->mtu))) + { + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.flags = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer (p0)->ip_frag.mtu = d0->mtu; + next0 = IP6_MAP_NEXT_IP4_FRAGMENT; + } + else + { + next0 = + ip6_map_ip4_lookup_bypass (p0, + ip40) ? + IP6_MAP_NEXT_IP4_REWRITE : next0; + } + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, + thread_index, + map_domain_index0, 1, + clib_net_to_host_u16 + (ip40->length)); + } + } + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = (u16) port0; + } + + if (mm->icmp6_enabled && + (error0 == MAP_ERROR_DECAP_SEC_CHECK + || error0 == MAP_ERROR_NO_DOMAIN)) + { + /* Set ICMP parameters */ + vlib_buffer_advance (p0, -sizeof (ip6_header_t)); + icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable, + ICMP6_destination_unreachable_source_address_failed_policy, + 0); + next0 = IP6_MAP_NEXT_ICMP; + } + else + { + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP; + } + + /* Reset packet */ + if (next0 == IP6_MAP_NEXT_IP6_LOCAL) + vlib_buffer_advance (p0, -sizeof (ip6_header_t)); + + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +static_always_inline void +ip6_map_ip6_reass_prepare (vlib_main_t * vm, vlib_node_runtime_t * node, + map_ip6_reass_t * r, u32 ** fragments_ready, + u32 ** fragments_to_drop) +{ + ip4_header_t *ip40; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + vlib_buffer_t *p0; + + if (!r->ip4_header.ip_version_and_header_length) + return; + + //The IP header is here, we need to check for packets + //that can be forwarded + int i; + for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + { + if (r->fragments[i].pi == ~0 || + ((!r->fragments[i].next_data_len) + && (r->fragments[i].next_data_offset != (0xffff)))) + continue; + + p0 = vlib_get_buffer (vm, r->fragments[i].pi); + ip60 = vlib_buffer_get_current (p0); + frag0 = (ip6_frag_hdr_t *) (ip60 + 1); + ip40 = (ip4_header_t *) (frag0 + 1); + + if (ip6_frag_hdr_offset (frag0)) + { + //Not first fragment, add the IPv4 header + clib_memcpy (ip40, &r->ip4_header, 20); + } + +#ifdef MAP_IP6_REASS_COUNT_BYTES + r->forwarded += + clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0); +#endif + + if (ip6_frag_hdr_more (frag0)) + { + //Not last fragment, we copy end of next + clib_memcpy (u8_ptr_add (ip60, p0->current_length), + r->fragments[i].next_data, 20); + p0->current_length += 20; + ip60->payload_length = u16_net_add (ip60->payload_length, 20); + } + + if (!ip4_is_fragment (ip40)) + { + ip40->fragment_id = frag_id_6to4 (frag0->identification); + ip40->flags_and_fragment_offset = + clib_host_to_net_u16 (ip6_frag_hdr_offset (frag0)); + } + else + { + ip40->flags_and_fragment_offset = + clib_host_to_net_u16 (ip4_get_fragment_offset (ip40) + + ip6_frag_hdr_offset (frag0)); + } + + if (ip6_frag_hdr_more (frag0)) + ip40->flags_and_fragment_offset |= + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + + ip40->length = + clib_host_to_net_u16 (p0->current_length - sizeof (*ip60) - + sizeof (*frag0)); + ip40->checksum = ip4_header_checksum (ip40); + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_ip6_map_ip6_reass_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->offset = ip4_get_fragment_offset (ip40); + tr->frag_len = clib_net_to_host_u16 (ip40->length) - sizeof (*ip40); + tr->out = 1; + } + + vec_add1 (*fragments_ready, r->fragments[i].pi); + r->fragments[i].pi = ~0; + r->fragments[i].next_data_len = 0; + r->fragments[i].next_data_offset = 0; + map_main.ip6_reass_buffered_counter--; + + //TODO: Best solution would be that ip6_map handles extension headers + // and ignores atomic fragment. But in the meantime, let's just copy the header. + + u8 protocol = frag0->next_hdr; + memmove (u8_ptr_add (ip40, -sizeof (*ip60)), ip60, sizeof (*ip60)); + ((ip6_header_t *) u8_ptr_add (ip40, -sizeof (*ip60)))->protocol = + protocol; + vlib_buffer_advance (p0, sizeof (*frag0)); + } +} + +void +map_ip6_drop_pi (u32 pi) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_node_runtime_t *n = + vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index); + vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi); +} + +void +map_ip4_drop_pi (u32 pi) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_node_runtime_t *n = + vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index); + vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi); +} + +/* + * ip6_reass + * TODO: We should count the number of successfully + * transmitted fragment bytes and compare that to the last fragment + * offset such that we can free the reassembly structure when all fragments + * have been forwarded. + */ +static uword +ip6_map_ip6_reass (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index); + u32 *fragments_to_drop = NULL; + u32 *fragments_ready = NULL; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + u16 offset; + u16 next_offset; + u16 frag_len; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip60 = vlib_buffer_get_current (p0); + frag0 = (ip6_frag_hdr_t *) (ip60 + 1); + offset = + clib_host_to_net_u16 (frag0->fragment_offset_and_more) & (~7); + frag_len = + clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0); + next_offset = + ip6_frag_hdr_more (frag0) ? (offset + frag_len) : (0xffff); + + //FIXME: Support other extension headers, maybe + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_ip6_map_ip6_reass_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->offset = offset; + tr->frag_len = frag_len; + tr->out = 0; + } + + map_ip6_reass_lock (); + map_ip6_reass_t *r = + map_ip6_reass_get (&ip60->src_address, &ip60->dst_address, + frag0->identification, frag0->next_hdr, + &fragments_to_drop); + //FIXME: Use better error codes + if (PREDICT_FALSE (!r)) + { + // Could not create a caching entry + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } + else if (PREDICT_FALSE ((frag_len <= 20 && + (ip6_frag_hdr_more (frag0) || (!offset))))) + { + //Very small fragment are restricted to the last one and + //can't be the first one + error0 = MAP_ERROR_FRAGMENT_MALFORMED; + } + else + if (map_ip6_reass_add_fragment + (r, pi0, offset, next_offset, (u8 *) (frag0 + 1), frag_len)) + { + map_ip6_reass_free (r, &fragments_to_drop); + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } + else + { +#ifdef MAP_IP6_REASS_COUNT_BYTES + if (!ip6_frag_hdr_more (frag0)) + r->expected_total = offset + frag_len; +#endif + ip6_map_ip6_reass_prepare (vm, node, r, &fragments_ready, + &fragments_to_drop); +#ifdef MAP_IP6_REASS_COUNT_BYTES + if (r->forwarded >= r->expected_total) + map_ip6_reass_free (r, &fragments_to_drop); +#endif + } + map_ip6_reass_unlock (); + + if (error0 == MAP_ERROR_NONE) + { + if (frag_len > 20) + { + //Dequeue the packet + n_left_to_next++; + to_next--; + } + else + { + //All data from that packet was copied no need to keep it, but this is not an error + p0->error = error_node->errors[MAP_ERROR_NONE]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, + IP6_MAP_IP6_REASS_NEXT_DROP); + } + } + else + { + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, + IP6_MAP_IP6_REASS_NEXT_DROP); + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + map_send_all_to_node (vm, fragments_ready, node, + &error_node->errors[MAP_ERROR_NONE], + IP6_MAP_IP6_REASS_NEXT_IP6_MAP); + map_send_all_to_node (vm, fragments_to_drop, node, + &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], + IP6_MAP_IP6_REASS_NEXT_DROP); + + vec_free (fragments_to_drop); + vec_free (fragments_ready); + return frame->n_vectors; +} + +/* + * ip6_ip4_virt_reass + */ +static uword +ip6_map_ip4_reass (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index); + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 thread_index = vlib_get_thread_index (); + u32 *fragments_to_drop = NULL; + u32 *fragments_to_loopback = NULL; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + map_domain_t *d0; + ip4_header_t *ip40; + ip6_header_t *ip60; + i32 port0 = 0; + u32 map_domain_index0 = ~0; + u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP; + u8 cached = 0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip40 = vlib_buffer_get_current (p0); + ip60 = ((ip6_header_t *) ip40) - 1; + + d0 = + ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & ip40->src_address.as_u32, + &map_domain_index0, &error0); + + map_ip4_reass_lock (); + //This node only deals with fragmented ip4 + map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32, + ip40->dst_address.as_u32, + ip40->fragment_id, + ip40->protocol, + &fragments_to_drop); + if (PREDICT_FALSE (!r)) + { + // Could not create a caching entry + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } + else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40))) + { + // This is a fragment + if (r->port >= 0) + { + // We know the port already + port0 = r->port; + } + else if (map_ip4_reass_add_fragment (r, pi0)) + { + // Not enough space for caching + error0 = MAP_ERROR_FRAGMENT_MEMORY; + map_ip4_reass_free (r, &fragments_to_drop); + } + else + { + cached = 1; + } + } + else if ((port0 = ip4_get_port (ip40, 1)) == 0) + { + // Could not find port from first fragment. Stop reassembling. + error0 = MAP_ERROR_BAD_PROTOCOL; + port0 = 0; + map_ip4_reass_free (r, &fragments_to_drop); + } + else + { + // Found port. Remember it and loopback saved fragments + r->port = port0; + map_ip4_reass_get_fragments (r, &fragments_to_loopback); + } + +#ifdef MAP_IP4_REASS_COUNT_BYTES + if (!cached && r) + { + r->forwarded += clib_host_to_net_u16 (ip40->length) - 20; + if (!ip4_get_fragment_more (ip40)) + r->expected_total = + ip4_get_fragment_offset (ip40) * 8 + + clib_host_to_net_u16 (ip40->length) - 20; + if (r->forwarded >= r->expected_total) + map_ip4_reass_free (r, &fragments_to_drop); + } +#endif + + map_ip4_reass_unlock (); + + if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) + error0 = + ip6_map_sec_check (d0, port0, ip40, + ip60) ? MAP_ERROR_NONE : + MAP_ERROR_DECAP_SEC_CHECK; + + if (PREDICT_FALSE + (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu) + && error0 == MAP_ERROR_NONE && !cached)) + { + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.flags = 0; + vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer (p0)->ip_frag.mtu = d0->mtu; + next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT; + } + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_ip6_map_ip4_reass_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + tr->cached = cached; + } + + if (cached) + { + //Dequeue the packet + n_left_to_next++; + to_next--; + } + else + { + if (error0 == MAP_ERROR_NONE) + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, + thread_index, + map_domain_index0, 1, + clib_net_to_host_u16 + (ip40->length)); + next0 = + (error0 == + MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } + + //Loopback when we reach the end of the inpu vector + if (n_left_from == 0 && vec_len (fragments_to_loopback)) + { + from = vlib_frame_vector_args (frame); + u32 len = vec_len (fragments_to_loopback); + if (len <= VLIB_FRAME_SIZE) + { + clib_memcpy (from, fragments_to_loopback, + sizeof (u32) * len); + n_left_from = len; + vec_reset_length (fragments_to_loopback); + } + else + { + clib_memcpy (from, + fragments_to_loopback + (len - + VLIB_FRAME_SIZE), + sizeof (u32) * VLIB_FRAME_SIZE); + n_left_from = VLIB_FRAME_SIZE; + _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; + } + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + map_send_all_to_node (vm, fragments_to_drop, node, + &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], + IP6_MAP_IP4_REASS_NEXT_DROP); + + vec_free (fragments_to_drop); + vec_free (fragments_to_loopback); + return frame->n_vectors; +} + +/* + * ip6_icmp_relay + */ +static uword +ip6_map_icmp_relay (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index); + map_main_t *mm = &map_main; + u32 thread_index = vlib_get_thread_index (); + u16 *fragment_ids, *fid; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + /* Get random fragment IDs for replies. */ + fid = fragment_ids = + clib_random_buffer_get_data (&vm->random_buffer, + n_left_from * sizeof (fragment_ids[0])); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + ip6_header_t *ip60; + u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP; + u32 mtu; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip60 = vlib_buffer_get_current (p0); + u16 tlen = clib_net_to_host_u16 (ip60->payload_length); + + /* + * In: + * IPv6 header (40) + * ICMPv6 header (8) + * IPv6 header (40) + * Original IPv4 header / packet + * Out: + * New IPv4 header + * New ICMP header + * Original IPv4 header / packet + */ + + /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */ + if (tlen < 76) + { + error0 = MAP_ERROR_ICMP_RELAY; + goto error; + } + + icmp46_header_t *icmp60 = (icmp46_header_t *) (ip60 + 1); + ip6_header_t *inner_ip60 = (ip6_header_t *) (icmp60 + 2); + + if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP) + { + error0 = MAP_ERROR_ICMP_RELAY; + goto error; + } + + ip4_header_t *inner_ip40 = (ip4_header_t *) (inner_ip60 + 1); + vlib_buffer_advance (p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */ + ip4_header_t *new_ip40 = vlib_buffer_get_current (p0); + icmp46_header_t *new_icmp40 = (icmp46_header_t *) (new_ip40 + 1); + + /* + * Relay according to RFC2473, section 8.3 + */ + switch (icmp60->type) + { + case ICMP6_destination_unreachable: + case ICMP6_time_exceeded: + case ICMP6_parameter_problem: + /* Type 3 - destination unreachable, Code 1 - host unreachable */ + new_icmp40->type = ICMP4_destination_unreachable; + new_icmp40->code = + ICMP4_destination_unreachable_destination_unreachable_host; + break; + + case ICMP6_packet_too_big: + /* Type 3 - destination unreachable, Code 4 - packet too big */ + /* Potential TODO: Adjust domain tunnel MTU based on the value received here */ + mtu = clib_net_to_host_u32 (*((u32 *) (icmp60 + 1))); + + /* Check DF flag */ + if (! + (inner_ip40->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))) + { + error0 = MAP_ERROR_ICMP_RELAY; + goto error; + } + + new_icmp40->type = ICMP4_destination_unreachable; + new_icmp40->code = + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set; + *((u32 *) (new_icmp40 + 1)) = + clib_host_to_net_u32 (mtu < 1280 ? 1280 : mtu); + break; + + default: + error0 = MAP_ERROR_ICMP_RELAY; + break; + } + + /* + * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812) + */ + new_ip40->ip_version_and_header_length = 0x45; + new_ip40->tos = 0; + u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20; + new_ip40->length = clib_host_to_net_u16 (nlen); + new_ip40->fragment_id = fid[0]; + fid++; + new_ip40->ttl = 64; + new_ip40->protocol = IP_PROTOCOL_ICMP; + new_ip40->src_address = mm->icmp4_src_address; + new_ip40->dst_address = inner_ip40->src_address; + new_ip40->checksum = ip4_header_checksum (new_ip40); + + new_icmp40->checksum = 0; + ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20); + new_icmp40->checksum = ~ip_csum_fold (sum); + + vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0, + 1); + + error: + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->map_domain_index = 0; + tr->port = 0; + } + + next0 = + (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; + +} + +static char *map_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip6_map_node) = { + .function = ip6_map, + .name = "ip6-map", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + + .n_next_nodes = IP6_MAP_N_NEXT, + .next_nodes = { + [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup", +#ifdef MAP_SKIP_IP6_LOOKUP + [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance", +#endif + [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass", + [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass", + [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay", + [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local", + [IP6_MAP_NEXT_DROP] = "error-drop", + [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = { + .function = ip6_map_ip6_reass, + .name = "ip6-map-ip6-reass", + .vector_size = sizeof(u32), + .format_trace = format_ip6_map_ip6_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT, + .next_nodes = { + [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map", + [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = { + .function = ip6_map_ip4_reass, + .name = "ip6-map-ip4-reass", + .vector_size = sizeof(u32), + .format_trace = format_ip6_map_ip4_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT, + .next_nodes = { + [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = { + .function = ip6_map_icmp_relay, + .name = "ip6-map-icmp-relay", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, //FIXME + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + .n_next_nodes = IP6_ICMP_RELAY_N_NEXT, + .next_nodes = { + [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c new file mode 100644 index 00000000000..852b1274bc0 --- /dev/null +++ b/src/plugins/map/ip6_map_t.c @@ -0,0 +1,1186 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "map.h" + +#include +#include +#include + +#define IP6_MAP_T_DUAL_LOOP + +typedef enum +{ + IP6_MAPT_NEXT_MAPT_TCP_UDP, + IP6_MAPT_NEXT_MAPT_ICMP, + IP6_MAPT_NEXT_MAPT_FRAGMENTED, + IP6_MAPT_NEXT_DROP, + IP6_MAPT_N_NEXT +} ip6_mapt_next_t; + +typedef enum +{ + IP6_MAPT_ICMP_NEXT_IP4_LOOKUP, + IP6_MAPT_ICMP_NEXT_IP4_FRAG, + IP6_MAPT_ICMP_NEXT_DROP, + IP6_MAPT_ICMP_N_NEXT +} ip6_mapt_icmp_next_t; + +typedef enum +{ + IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP, + IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG, + IP6_MAPT_TCP_UDP_NEXT_DROP, + IP6_MAPT_TCP_UDP_N_NEXT +} ip6_mapt_tcp_udp_next_t; + +typedef enum +{ + IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP, + IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG, + IP6_MAPT_FRAGMENTED_NEXT_DROP, + IP6_MAPT_FRAGMENTED_N_NEXT +} ip6_mapt_fragmented_next_t; + +static_always_inline int +ip6_map_fragment_cache (ip6_header_t * ip6, ip6_frag_hdr_t * frag, + map_domain_t * d, u16 port) +{ + u32 *ignore = NULL; + map_ip4_reass_lock (); + map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address, + d->flags), + ip6_map_t_embedded_address (d, + &ip6-> + dst_address), + frag_id_6to4 (frag->identification), + (ip6->protocol == + IP_PROTOCOL_ICMP6) ? + IP_PROTOCOL_ICMP : ip6->protocol, + &ignore); + if (r) + r->port = port; + + map_ip4_reass_unlock (); + return !r; +} + +/* Returns the associated port or -1 */ +static_always_inline i32 +ip6_map_fragment_get (ip6_header_t * ip6, ip6_frag_hdr_t * frag, + map_domain_t * d) +{ + u32 *ignore = NULL; + map_ip4_reass_lock (); + map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address, + d->flags), + ip6_map_t_embedded_address (d, + &ip6-> + dst_address), + frag_id_6to4 (frag->identification), + (ip6->protocol == + IP_PROTOCOL_ICMP6) ? + IP_PROTOCOL_ICMP : ip6->protocol, + &ignore); + i32 ret = r ? r->port : -1; + map_ip4_reass_unlock (); + return ret; +} + +typedef struct +{ + map_domain_t *d; + u16 id; +} icmp6_to_icmp_ctx_t; + +static int +ip6_to_ip4_set_icmp_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) +{ + icmp6_to_icmp_ctx_t *ctx = arg; + map_main_t *mm = &map_main; + + if (mm->is_ce) + { + u32 ip4_dadr; + + //Security check + //Note that this prevents an intermediate IPv6 router from answering the request + ip4_dadr = map_get_ip4 (&ip6->dst_address, ctx->d->flags); + if (ip6->dst_address.as_u64[0] != + map_get_pfx_net (ctx->d, ip4_dadr, ctx->id) + || ip6->dst_address.as_u64[1] != map_get_sfx_net (ctx->d, ip4_dadr, + ctx->id)) + return -1; + + ip4->src_address.as_u32 = + ip6_map_t_embedded_address (ctx->d, &ip6->src_address); + ip4->dst_address.as_u32 = ip4_dadr; + } + else + { + u32 ip4_sadr; + + //Security check + //Note that this prevents an intermediate IPv6 router from answering the request + ip4_sadr = map_get_ip4 (&ip6->src_address, ctx->d->flags); + if (ip6->src_address.as_u64[0] != + map_get_pfx_net (ctx->d, ip4_sadr, ctx->id) + || ip6->src_address.as_u64[1] != map_get_sfx_net (ctx->d, ip4_sadr, + ctx->id)) + return -1; + + ip4->dst_address.as_u32 = + ip6_map_t_embedded_address (ctx->d, &ip6->dst_address); + ip4->src_address.as_u32 = ip4_sadr; + } + + return 0; +} + +static int +ip6_to_ip4_set_inner_icmp_cb (ip6_header_t * ip6, ip4_header_t * ip4, + void *arg) +{ + icmp6_to_icmp_ctx_t *ctx = arg; + map_main_t *mm = &map_main; + + if (mm->is_ce) + { + u32 inner_ip4_sadr; + + //Security check of inner packet + inner_ip4_sadr = map_get_ip4 (&ip6->src_address, ctx->d->flags); + if (ip6->src_address.as_u64[0] != + map_get_pfx_net (ctx->d, inner_ip4_sadr, ctx->id) + || ip6->src_address.as_u64[1] != map_get_sfx_net (ctx->d, + inner_ip4_sadr, + ctx->id)) + return -1; + + ip4->src_address.as_u32 = inner_ip4_sadr; + ip4->dst_address.as_u32 = + ip6_map_t_embedded_address (ctx->d, &ip6->dst_address); + } + else + { + u32 inner_ip4_dadr; + + //Security check of inner packet + inner_ip4_dadr = map_get_ip4 (&ip6->dst_address, ctx->d->flags); + if (ip6->dst_address.as_u64[0] != + map_get_pfx_net (ctx->d, inner_ip4_dadr, ctx->id) + || ip6->dst_address.as_u64[1] != map_get_sfx_net (ctx->d, + inner_ip4_dadr, + ctx->id)) + return -1; + + ip4->dst_address.as_u32 = inner_ip4_dadr; + ip4->src_address.as_u32 = + ip6_map_t_embedded_address (ctx->d, &ip6->src_address); + } + + return 0; +} + +static uword +ip6_map_t_icmp (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_t_icmp_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 thread_index = vlib_get_thread_index (); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + u8 error0; + ip6_mapt_icmp_next_t next0; + map_domain_t *d0; + u16 len0; + icmp6_to_icmp_ctx_t ctx0; + ip6_header_t *ip60; + icmp46_header_t *icmp0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + next0 = IP6_MAPT_ICMP_NEXT_IP4_LOOKUP; + + p0 = vlib_get_buffer (vm, pi0); + ip60 = vlib_buffer_get_current (p0); + len0 = clib_net_to_host_u16 (ip60->payload_length); + icmp0 = (icmp46_header_t *) (ip60 + 1); + d0 = + pool_elt_at_index (map_main.domains, + vnet_buffer (p0)->map_t.map_domain_index); + + ctx0.id = + ip6_get_port (ip60, icmp0->type == ICMP6_echo_request, + p0->current_length); + ctx0.d = d0; + if (ctx0.id == 0) + { + // In case of 1:1 mapping, we don't care about the port + if (!(d0->ea_bits_len == 0 && d0->rules)) + { + error0 = MAP_ERROR_ICMP; + goto err0; + } + } + + if (icmp6_to_icmp + (p0, ip6_to_ip4_set_icmp_cb, &ctx0, + ip6_to_ip4_set_inner_icmp_cb, &ctx0)) + { + error0 = MAP_ERROR_ICMP; + goto err0; + } + + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG; + } + err0: + if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) + { + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, + thread_index, + vnet_buffer (p0)-> + map_t.map_domain_index, 1, + len0); + } + else + { + next0 = IP6_MAPT_ICMP_NEXT_DROP; + } + + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static int +ip6_to_ip4_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *ctx) +{ + vlib_buffer_t *p = ctx; + + ip4->dst_address.as_u32 = vnet_buffer (p)->map_t.v6.daddr; + ip4->src_address.as_u32 = vnet_buffer (p)->map_t.v6.saddr; + + return 0; +} + +static uword +ip6_map_t_fragmented (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_t_fragmented_node.index); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP6_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + u32 next0, next1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + if (ip6_to_ip4_fragmented (p0, ip6_to_ip4_set_cb, p0)) + { + p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; + next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } + } + + if (ip6_to_ip4_fragmented (p1, ip6_to_ip4_set_cb, p1)) + { + p1->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; + next1 = IP6_MAPT_FRAGMENTED_NEXT_DROP; + } + else + { + if (vnet_buffer (p1)->map_t.mtu < p1->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; + vnet_buffer (p1)->ip_frag.header_offset = 0; + vnet_buffer (p1)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, pi0, pi1, + next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + u32 next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + p0 = vlib_get_buffer (vm, pi0); + + if (ip6_to_ip4_fragmented (p0, ip6_to_ip4_set_cb, p0)) + { + p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; + next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip6_map_t_tcp_udp (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_t_tcp_udp_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP6_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip6_mapt_tcp_udp_next_t next0, next1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + if (ip6_to_ip4_tcp_udp (p0, ip6_to_ip4_set_cb, p0, 1)) + { + p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next0 = IP6_MAPT_TCP_UDP_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } + } + + if (ip6_to_ip4_tcp_udp (p1, ip6_to_ip4_set_cb, p1, 1)) + { + p1->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next1 = IP6_MAPT_TCP_UDP_NEXT_DROP; + } + else + { + if (vnet_buffer (p1)->map_t.mtu < p1->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; + vnet_buffer (p1)->ip_frag.header_offset = 0; + vnet_buffer (p1)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, + next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + ip6_mapt_tcp_udp_next_t next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + + p0 = vlib_get_buffer (vm, pi0); + + if (ip6_to_ip4_tcp_udp (p0, ip6_to_ip4_set_cb, p0, 1)) + { + p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next0 = IP6_MAPT_TCP_UDP_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static_always_inline void +ip6_map_t_classify (vlib_buffer_t * p0, ip6_header_t * ip60, + map_domain_t * d0, i32 * map_port0, + u8 * error0, ip6_mapt_next_t * next0, + u32 l4_len0, ip6_frag_hdr_t * frag0) +{ + map_main_t *mm = &map_main; + u32 port_offset; + + if (mm->is_ce) + port_offset = 2; + else + port_offset = 0; + + if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && + ip6_frag_hdr_offset (frag0))) + { + *next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; + if (d0->ea_bits_len == 0 && d0->rules) + { + *map_port0 = 0; + } + else + { + *map_port0 = ip6_map_fragment_get (ip60, frag0, d0); + *error0 = (*map_port0 != -1) ? *error0 : MAP_ERROR_FRAGMENT_DROPPED; + } + } + else + if (PREDICT_TRUE + (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) + { + *error0 = + l4_len0 < sizeof (tcp_header_t) ? MAP_ERROR_MALFORMED : *error0; + vnet_buffer (p0)->map_t.checksum_offset = + vnet_buffer (p0)->map_t.v6.l4_offset + 16; + *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + *map_port0 = + (i32) * + ((u16 *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t.v6.l4_offset + port_offset)); + } + else + if (PREDICT_TRUE + (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) + { + *error0 = + l4_len0 < sizeof (udp_header_t) ? MAP_ERROR_MALFORMED : *error0; + vnet_buffer (p0)->map_t.checksum_offset = + vnet_buffer (p0)->map_t.v6.l4_offset + 6; + *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + *map_port0 = + (i32) * + ((u16 *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t.v6.l4_offset + port_offset)); + } + else if (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6) + { + *error0 = + l4_len0 < sizeof (icmp46_header_t) ? MAP_ERROR_MALFORMED : *error0; + *next0 = IP6_MAPT_NEXT_MAPT_ICMP; + if (d0->ea_bits_len == 0 && d0->rules) + { + *map_port0 = 0; + } + else + if (((icmp46_header_t *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t.v6.l4_offset))->code == + ICMP6_echo_reply + || ((icmp46_header_t *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t.v6.l4_offset))->code == + ICMP6_echo_request) + { + *map_port0 = + (i32) * + ((u16 *) + u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset + 6)); + } + } + else + { + //TODO: In case of 1:1 mapping, it might be possible to do something with those packets. + *error0 = MAP_ERROR_BAD_PROTOCOL; + } +} + +static uword +ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_t_node.index); + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 thread_index = vlib_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP6_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip6_header_t *ip60, *ip61; + u8 error0, error1; + ip6_mapt_next_t next0, next1; + u32 l4_len0, l4_len1; + i32 map_port0, map_port1; + map_domain_t *d0, *d1; + ip6_frag_hdr_t *frag0, *frag1; + next0 = next1 = 0; //Because compiler whines + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + error0 = MAP_ERROR_NONE; + error1 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + ip60 = vlib_buffer_get_current (p0); + ip61 = vlib_buffer_get_current (p1); + + if (mm->is_ce) + { + u32 daddr0, daddr1; + daddr0 = 0; /* TODO */ + daddr1 = 0; /* TODO */ + /* NOTE: ip6_map_get_domain currently doesn't utilize second argument */ + + daddr0 = map_get_ip4 (&ip60->dst_address, 0 /*TODO*/); + daddr1 = map_get_ip4 (&ip61->dst_address, 0 /*TODO*/); + d0 = + ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & daddr0, + &vnet_buffer (p0)->map_t.map_domain_index, + &error0); + d1 = + ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & daddr1, + &vnet_buffer (p1)->map_t.map_domain_index, + &error1); + + daddr0 = map_get_ip4 (&ip60->dst_address, d0->flags); + daddr1 = map_get_ip4 (&ip61->dst_address, d1->flags); + + vnet_buffer (p0)->map_t.v6.daddr = daddr0; + vnet_buffer (p1)->map_t.v6.daddr = daddr1; + vnet_buffer (p0)->map_t.v6.saddr = + ip6_map_t_embedded_address (d0, &ip60->src_address); + vnet_buffer (p1)->map_t.v6.saddr = + ip6_map_t_embedded_address (d1, &ip61->src_address); + } + else + { + u32 saddr0, saddr1; + saddr0 = 0; /* TODO */ + saddr1 = 0; /* TODO */ + /* NOTE: ip6_map_get_domain currently doesn't utilize second argument */ + + saddr0 = map_get_ip4 (&ip60->src_address, 0 /*TODO*/); + saddr1 = map_get_ip4 (&ip61->src_address, 0 /*TODO*/); + d0 = + ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & saddr0, + &vnet_buffer (p0)->map_t.map_domain_index, + &error0); + d1 = + ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & saddr1, + &vnet_buffer (p1)->map_t.map_domain_index, + &error1); + + saddr0 = map_get_ip4 (&ip60->src_address, d0->flags); + saddr1 = map_get_ip4 (&ip61->src_address, d1->flags); + + vnet_buffer (p0)->map_t.v6.saddr = saddr0; + vnet_buffer (p1)->map_t.v6.saddr = saddr1; + vnet_buffer (p0)->map_t.v6.daddr = + ip6_map_t_embedded_address (d0, &ip60->dst_address); + vnet_buffer (p1)->map_t.v6.daddr = + ip6_map_t_embedded_address (d1, &ip61->dst_address); + } + + vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; + + if (PREDICT_FALSE (ip6_parse (ip60, p0->current_length, + &(vnet_buffer (p0)->map_t. + v6.l4_protocol), + &(vnet_buffer (p0)->map_t. + v6.l4_offset), + &(vnet_buffer (p0)->map_t. + v6.frag_offset)))) + { + error0 = MAP_ERROR_MALFORMED; + next0 = IP6_MAPT_NEXT_DROP; + } + + if (PREDICT_FALSE (ip6_parse (ip61, p1->current_length, + &(vnet_buffer (p1)->map_t. + v6.l4_protocol), + &(vnet_buffer (p1)->map_t. + v6.l4_offset), + &(vnet_buffer (p1)->map_t. + v6.frag_offset)))) + { + error1 = MAP_ERROR_MALFORMED; + next1 = IP6_MAPT_NEXT_DROP; + } + + map_port0 = map_port1 = -1; + l4_len0 = (u32) clib_net_to_host_u16 (ip60->payload_length) + + sizeof (*ip60) - vnet_buffer (p0)->map_t.v6.l4_offset; + l4_len1 = (u32) clib_net_to_host_u16 (ip61->payload_length) + + sizeof (*ip60) - vnet_buffer (p1)->map_t.v6.l4_offset; + frag0 = + (ip6_frag_hdr_t *) u8_ptr_add (ip60, + vnet_buffer (p0)->map_t. + v6.frag_offset); + frag1 = + (ip6_frag_hdr_t *) u8_ptr_add (ip61, + vnet_buffer (p1)->map_t. + v6.frag_offset); + + ip6_map_t_classify (p0, ip60, d0, &map_port0, &error0, &next0, + l4_len0, frag0); + ip6_map_t_classify (p1, ip61, d1, &map_port1, &error1, &next1, + l4_len1, frag1); + + if (PREDICT_FALSE + ((map_port0 != -1) + && (ip60->src_address.as_u64[0] != + map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr, + map_port0) + || ip60->src_address.as_u64[1] != map_get_sfx_net (d0, + vnet_buffer + (p0)->map_t.v6.saddr, + map_port0)))) + { + error0 = MAP_ERROR_SEC_CHECK; + } + + if (PREDICT_FALSE + ((map_port1 != -1) + && (ip61->src_address.as_u64[0] != + map_get_pfx_net (d1, vnet_buffer (p1)->map_t.v6.saddr, + map_port1) + || ip61->src_address.as_u64[1] != map_get_sfx_net (d1, + vnet_buffer + (p1)->map_t.v6.saddr, + map_port1)))) + { + error1 = MAP_ERROR_SEC_CHECK; + } + + if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && + !ip6_frag_hdr_offset ((ip6_frag_hdr_t *) + u8_ptr_add (ip60, + vnet_buffer + (p0)->map_t. + v6.frag_offset))) + && (map_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) + && (error0 == MAP_ERROR_NONE)) + { + ip6_map_fragment_cache (ip60, + (ip6_frag_hdr_t *) u8_ptr_add (ip60, + vnet_buffer + (p0)->map_t. + v6.frag_offset), + d0, map_port0); + } + + if (PREDICT_FALSE (vnet_buffer (p1)->map_t.v6.frag_offset && + !ip6_frag_hdr_offset ((ip6_frag_hdr_t *) + u8_ptr_add (ip61, + vnet_buffer + (p1)->map_t. + v6.frag_offset))) + && (map_port1 != -1) && (d1->ea_bits_len != 0 || !d1->rules) + && (error1 == MAP_ERROR_NONE)) + { + ip6_map_fragment_cache (ip61, + (ip6_frag_hdr_t *) u8_ptr_add (ip61, + vnet_buffer + (p1)->map_t. + v6.frag_offset), + d1, map_port1); + } + + if (PREDICT_TRUE + (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) + { + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, + thread_index, + vnet_buffer (p0)-> + map_t.map_domain_index, 1, + clib_net_to_host_u16 + (ip60->payload_length)); + } + + if (PREDICT_TRUE + (error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP)) + { + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, + thread_index, + vnet_buffer (p1)-> + map_t.map_domain_index, 1, + clib_net_to_host_u16 + (ip61->payload_length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; + next1 = (error1 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next1; + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, + next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + ip6_header_t *ip60; + u8 error0; + u32 l4_len0; + i32 map_port0; + map_domain_t *d0; + ip6_frag_hdr_t *frag0; + u32 port_offset; + ip6_mapt_next_t next0 = 0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer (vm, pi0); + ip60 = vlib_buffer_get_current (p0); + + if (mm->is_ce) + { + u32 daddr; + //Save daddr in a different variable to not overwrite ip.adj_index + daddr = 0; /* TODO */ + /* NOTE: ip6_map_get_domain currently doesn't utilize second argument */ + + daddr = map_get_ip4 (&ip60->dst_address, 0 /*TODO*/); + d0 = + ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & daddr, + &vnet_buffer (p0)->map_t.map_domain_index, + &error0); + + daddr = map_get_ip4 (&ip60->dst_address, d0->flags); + + //FIXME: What if d0 is null + vnet_buffer (p0)->map_t.v6.daddr = daddr; + vnet_buffer (p0)->map_t.v6.saddr = + ip6_map_t_embedded_address (d0, &ip60->src_address); + + port_offset = 2; + } + else + { + u32 saddr; + //Save saddr in a different variable to not overwrite ip.adj_index + saddr = 0; /* TODO */ + /* NOTE: ip6_map_get_domain currently doesn't utilize second argument */ + + saddr = map_get_ip4 (&ip60->src_address, 0 /*TODO*/); + d0 = + ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *) & saddr, + &vnet_buffer (p0)->map_t.map_domain_index, + &error0); + + saddr = map_get_ip4 (&ip60->src_address, d0->flags); + + //FIXME: What if d0 is null + vnet_buffer (p0)->map_t.v6.saddr = saddr; + vnet_buffer (p0)->map_t.v6.daddr = + ip6_map_t_embedded_address (d0, &ip60->dst_address); + + port_offset = 0; + } + + vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + + if (PREDICT_FALSE (ip6_parse (ip60, p0->current_length, + &(vnet_buffer (p0)->map_t. + v6.l4_protocol), + &(vnet_buffer (p0)->map_t. + v6.l4_offset), + &(vnet_buffer (p0)->map_t. + v6.frag_offset)))) + { + error0 = MAP_ERROR_MALFORMED; + next0 = IP6_MAPT_NEXT_DROP; + } + + map_port0 = -1; + l4_len0 = (u32) clib_net_to_host_u16 (ip60->payload_length) + + sizeof (*ip60) - vnet_buffer (p0)->map_t.v6.l4_offset; + frag0 = + (ip6_frag_hdr_t *) u8_ptr_add (ip60, + vnet_buffer (p0)->map_t. + v6.frag_offset); + + + if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && + ip6_frag_hdr_offset (frag0))) + { + map_port0 = ip6_map_fragment_get (ip60, frag0, d0); + error0 = (map_port0 != -1) ? error0 : MAP_ERROR_FRAGMENT_MEMORY; + next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; + } + else + if (PREDICT_TRUE + (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) + { + error0 = + l4_len0 < + sizeof (tcp_header_t) ? MAP_ERROR_MALFORMED : error0; + vnet_buffer (p0)->map_t.checksum_offset = + vnet_buffer (p0)->map_t.v6.l4_offset + 16; + next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + map_port0 = + (i32) * + ((u16 *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t.v6.l4_offset + + port_offset)); + } + else + if (PREDICT_TRUE + (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) + { + error0 = + l4_len0 < + sizeof (udp_header_t) ? MAP_ERROR_MALFORMED : error0; + vnet_buffer (p0)->map_t.checksum_offset = + vnet_buffer (p0)->map_t.v6.l4_offset + 6; + next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + map_port0 = + (i32) * + ((u16 *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t.v6.l4_offset + + port_offset)); + } + else if (vnet_buffer (p0)->map_t.v6.l4_protocol == + IP_PROTOCOL_ICMP6) + { + error0 = + l4_len0 < + sizeof (icmp46_header_t) ? MAP_ERROR_MALFORMED : error0; + next0 = IP6_MAPT_NEXT_MAPT_ICMP; + if (((icmp46_header_t *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t.v6.l4_offset))->code == + ICMP6_echo_reply + || ((icmp46_header_t *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t.v6. + l4_offset))->code == ICMP6_echo_request) + map_port0 = + (i32) * + ((u16 *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t.v6.l4_offset + 6)); + } + else + { + //TODO: In case of 1:1 mapping, it might be possible to do something with those packets. + error0 = MAP_ERROR_BAD_PROTOCOL; + } + + //Security check + if (PREDICT_FALSE + ((!mm->is_ce) && (map_port0 != -1) + && (ip60->src_address.as_u64[0] != + map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr, + map_port0) + || ip60->src_address.as_u64[1] != map_get_sfx_net (d0, + vnet_buffer + (p0)->map_t.v6.saddr, + map_port0)))) + { + //Security check when src_port0 is not zero (non-first fragment, UDP or TCP) + error0 = MAP_ERROR_SEC_CHECK; + } + + //Fragmented first packet needs to be cached for following packets + if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && + !ip6_frag_hdr_offset ((ip6_frag_hdr_t *) + u8_ptr_add (ip60, + vnet_buffer + (p0)->map_t. + v6.frag_offset))) + && (map_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) + && (error0 == MAP_ERROR_NONE)) + { + ip6_map_fragment_cache (ip60, + (ip6_frag_hdr_t *) u8_ptr_add (ip60, + vnet_buffer + (p0)->map_t. + v6.frag_offset), + d0, map_port0); + } + + if (PREDICT_TRUE + (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) + { + vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, + thread_index, + vnet_buffer (p0)-> + map_t.map_domain_index, 1, + clib_net_to_host_u16 + (ip60->payload_length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static char *map_t_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = { + .function = ip6_map_t_fragmented, + .name = "ip6-map-t-fragmented", + .vector_size = sizeof (u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_FRAGMENTED_N_NEXT, + .next_nodes = { + [IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = { + .function = ip6_map_t_icmp, + .name = "ip6-map-t-icmp", + .vector_size = sizeof (u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_ICMP_N_NEXT, + .next_nodes = { + [IP6_MAPT_ICMP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_ICMP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = { + .function = ip6_map_t_tcp_udp, + .name = "ip6-map-t-tcp-udp", + .vector_size = sizeof (u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_TCP_UDP_N_NEXT, + .next_nodes = { + [IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE(ip6_map_t_node) = { + .function = ip6_map_t, + .name = "ip6-map-t", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_N_NEXT, + .next_nodes = { + [IP6_MAPT_NEXT_MAPT_TCP_UDP] = "ip6-map-t-tcp-udp", + [IP6_MAPT_NEXT_MAPT_ICMP] = "ip6-map-t-icmp", + [IP6_MAPT_NEXT_MAPT_FRAGMENTED] = "ip6-map-t-fragmented", + [IP6_MAPT_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/map/map.api b/src/plugins/map/map.api new file mode 100644 index 00000000000..a066b688514 --- /dev/null +++ b/src/plugins/map/map.api @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.1.0"; + +/** \brief Add MAP domains + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ip6_prefix - Rule IPv6 prefix + @param ip4_prefix - Rule IPv4 prefix + @param ip6_src - MAP domain IPv6 BR address / Tunnel source + @param ip6_prefix_len - Rule IPv6 prefix length + @param ip4_prefix_len - Rule IPv4 prefix length + @param ea_bits_len - Embedded Address bits length + @param psid_offset - Port Set Identifider (PSID) offset + @param psid_length - PSID length + @param is_translation - MAP-E / MAP-T + @param is_rfc6052 - rfc6052 translation + @param mtu - MTU +*/ +define map_add_domain +{ + u32 client_index; + u32 context; + u8 ip6_prefix[16]; + u8 ip4_prefix[4]; + u8 ip6_src[16]; + u8 ip6_prefix_len; + u8 ip4_prefix_len; + u8 ip6_src_prefix_len; + u8 ea_bits_len; + u8 psid_offset; + u8 psid_length; + u8 is_translation; + u8 is_rfc6052; + u16 mtu; +}; + +/** \brief Reply for MAP domain add + @param context - returned sender context, to match reply w/ request + @param index - MAP domain index + @param retval - return code +*/ +define map_add_domain_reply +{ + u32 context; + u32 index; + i32 retval; +}; + +/** \brief Delete MAP domain + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param index - MAP Domain index +*/ +autoreply define map_del_domain +{ + u32 client_index; + u32 context; + u32 index; +}; + + +/** \brief Add or Delete MAP rule from a domain (Only used for shared IPv4 per subscriber) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param index - MAP Domain index + @param is_add - If 1 add rule, if 0 delete rule + @param ip6_dst - MAP CE IPv6 address + @param psid - Rule PSID +*/ +autoreply define map_add_del_rule +{ + u32 client_index; + u32 context; + u32 index; + u8 is_add; + u8 ip6_dst[16]; + u16 psid; +}; + + +/** \brief Get list of map domains + @param client_index - opaque cookie to identify the sender +*/ +define map_domain_dump +{ + u32 client_index; + u32 context; +}; + +define map_domain_details +{ + u32 context; + u32 domain_index; + u8 ip6_prefix[16]; + u8 ip4_prefix[4]; + u8 ip6_src[16]; + u8 ip6_prefix_len; + u8 ip4_prefix_len; + u8 ip6_src_len; + u8 ea_bits_len; + u8 psid_offset; + u8 psid_length; + u8 flags; + u16 mtu; + u8 is_translation; +}; + +define map_rule_dump +{ + u32 client_index; + u32 context; + u32 domain_index; +}; + +define map_rule_details +{ + u32 context; + u8 ip6_dst[16]; + u16 psid; +}; + +/** \brief Request for a single block of summary stats + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define map_summary_stats +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply for map_summary_stats request + @param context - sender context, to match reply w/ request + @param retval - return code for request + @param total_bindings - + @param total_pkts - + @param total_ip4_fragments - + @param total_security_check - +*/ +define map_summary_stats_reply +{ + u32 context; + i32 retval; + u64 total_bindings; + u64 total_pkts[2]; + u64 total_bytes[2]; + u64 total_ip4_fragments; + u64 total_security_check[2]; +}; diff --git a/src/plugins/map/map.c b/src/plugins/map/map.c new file mode 100644 index 00000000000..6d9730f3b06 --- /dev/null +++ b/src/plugins/map/map.c @@ -0,0 +1,2373 @@ +/* + * map.c : MAP support + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "map.h" + +map_main_t map_main; + +/* + * This code supports the following MAP modes: + * + * Algorithmic Shared IPv4 address (ea_bits_len > 0): + * ea_bits_len + ip4_prefix > 32 + * psid_length > 0, ip6_prefix < 64, ip4_prefix <= 32 + * Algorithmic Full IPv4 address (ea_bits_len > 0): + * ea_bits_len + ip4_prefix = 32 + * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32 + * Algorithmic IPv4 prefix (ea_bits_len > 0): + * ea_bits_len + ip4_prefix < 32 + * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32 + * + * Independent Shared IPv4 address (ea_bits_len = 0): + * ip4_prefix = 32 + * psid_length > 0 + * Rule IPv6 address = 128, Rule PSID Set + * Independent Full IPv4 address (ea_bits_len = 0): + * ip4_prefix = 32 + * psid_length = 0, ip6_prefix = 128 + * Independent IPv4 prefix (ea_bits_len = 0): + * ip4_prefix < 32 + * psid_length = 0, ip6_prefix = 128 + * + */ + +/* + * This code supports MAP-T: + * + * With DMR prefix length equal to 96. + * + */ + + + +int +map_create_domain (ip4_address_t * ip4_prefix, + u8 ip4_prefix_len, + ip6_address_t * ip6_prefix, + u8 ip6_prefix_len, + ip6_address_t * ip6_src, + u8 ip6_src_len, + u8 ea_bits_len, + u8 psid_offset, + u8 psid_length, u32 * map_domain_index, u16 mtu, u8 flags) +{ + u8 suffix_len, suffix_shift; + map_main_t *mm = &map_main; + dpo_id_t dpo_v4 = DPO_INVALID; + dpo_id_t dpo_v6 = DPO_INVALID; + map_domain_t *d; + + /* Sanity check on the src prefix length */ + if (flags & MAP_DOMAIN_TRANSLATION) + { + if (ip6_src_len != 96) + { + clib_warning ("MAP-T only supports ip6_src_len = 96 for now."); + return -1; + } + if ((flags & MAP_DOMAIN_RFC6052) && ip6_prefix_len != 96) + { + clib_warning ("RFC6052 translation only supports ip6_prefix_len = " + "96 for now"); + return -1; + } + } + else + { + if (ip6_src_len != 128) + { + clib_warning + ("MAP-E requires a BR address, not a prefix (ip6_src_len should " + "be 128)."); + return -1; + } + } + + /* How many, and which bits to grab from the IPv4 DA */ + if (ip4_prefix_len + ea_bits_len < 32) + { + if (!(flags & MAP_DOMAIN_TRANSLATION)) + flags |= MAP_DOMAIN_PREFIX; + suffix_shift = 32 - ip4_prefix_len - ea_bits_len; + suffix_len = ea_bits_len; + } + else + { + suffix_shift = 0; + suffix_len = 32 - ip4_prefix_len; + } + + /* EA bits must be within the first 64 bits */ + if (ea_bits_len > 0 && ((ip6_prefix_len + ea_bits_len) > 64 || + ip6_prefix_len + suffix_len + psid_length > 64)) + { + clib_warning + ("Embedded Address bits must be within the first 64 bits of " + "the IPv6 prefix"); + return -1; + } + + if (mm->is_ce && !(flags & MAP_DOMAIN_TRANSLATION)) + { + clib_warning ("MAP-E CE is not supported yet"); + return -1; + } + + /* Get domain index */ + pool_get_aligned (mm->domains, d, CLIB_CACHE_LINE_BYTES); + memset (d, 0, sizeof (*d)); + *map_domain_index = d - mm->domains; + + /* Init domain struct */ + d->ip4_prefix.as_u32 = ip4_prefix->as_u32; + d->ip4_prefix_len = ip4_prefix_len; + d->ip6_prefix = *ip6_prefix; + d->ip6_prefix_len = ip6_prefix_len; + d->ip6_src = *ip6_src; + d->ip6_src_len = ip6_src_len; + d->ea_bits_len = ea_bits_len; + d->psid_offset = psid_offset; + d->psid_length = psid_length; + d->mtu = mtu; + d->flags = flags; + d->suffix_shift = suffix_shift; + d->suffix_mask = (1 << suffix_len) - 1; + + d->psid_shift = 16 - psid_length - psid_offset; + d->psid_mask = (1 << d->psid_length) - 1; + d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length; + + /* MAP data-plane object */ + if (d->flags & MAP_DOMAIN_TRANSLATION) + map_t_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4); + else + map_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4); + + /* Create ip4 route */ + u8 ip4_pfx_len; + ip4_address_t ip4_pfx; + if (mm->is_ce) + { + ip4_pfx_len = 0; + ip4_pfx.as_u32 = 0; + } + else + { + ip4_pfx_len = d->ip4_prefix_len; + ip4_pfx = d->ip4_prefix; + } + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = ip4_pfx_len, + .fp_addr = { + .ip4 = ip4_pfx, + } + , + }; + fib_table_entry_special_dpo_add (0, &pfx, + FIB_SOURCE_MAP, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4); + dpo_reset (&dpo_v4); + + /* + * construct a DPO to use the v6 domain + */ + if (d->flags & MAP_DOMAIN_TRANSLATION) + map_t_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); + else + map_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); + + /* + * Multiple MAP domains may share same source IPv6 TEP. Which is just dandy. + * We are not tracking the sharing. So a v4 lookup to find the correct + * domain post decap/trnaslate is always done + * + * Create ip6 route. This is a reference counted add. If the prefix + * already exists and is MAP sourced, it is now MAP source n+1 times + * and will need to be removed n+1 times. + */ + u8 ip6_pfx_len; + ip6_address_t ip6_pfx; + if (mm->is_ce) + { + ip6_pfx_len = d->ip6_prefix_len; + ip6_pfx = d->ip6_prefix; + } + else + { + ip6_pfx_len = d->ip6_src_len; + ip6_pfx = d->ip6_src; + } + fib_prefix_t pfx6 = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = ip6_pfx_len, + .fp_addr.ip6 = ip6_pfx, + }; + + fib_table_entry_special_dpo_add (0, &pfx6, + FIB_SOURCE_MAP, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6); + dpo_reset (&dpo_v6); + + /* Validate packet/byte counters */ + map_domain_counter_lock (mm); + int i; + for (i = 0; i < vec_len (mm->simple_domain_counters); i++) + { + vlib_validate_simple_counter (&mm->simple_domain_counters[i], + *map_domain_index); + vlib_zero_simple_counter (&mm->simple_domain_counters[i], + *map_domain_index); + } + for (i = 0; i < vec_len (mm->domain_counters); i++) + { + vlib_validate_combined_counter (&mm->domain_counters[i], + *map_domain_index); + vlib_zero_combined_counter (&mm->domain_counters[i], *map_domain_index); + } + map_domain_counter_unlock (mm); + + return 0; +} + +/* + * map_delete_domain + */ +int +map_delete_domain (u32 map_domain_index) +{ + map_main_t *mm = &map_main; + map_domain_t *d; + + if (pool_is_free_index (mm->domains, map_domain_index)) + { + clib_warning ("MAP domain delete: domain does not exist: %d", + map_domain_index); + return -1; + } + + d = pool_elt_at_index (mm->domains, map_domain_index); + + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = d->ip4_prefix_len, + .fp_addr = { + .ip4 = d->ip4_prefix, + } + , + }; + fib_table_entry_special_remove (0, &pfx, FIB_SOURCE_MAP); + + fib_prefix_t pfx6 = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = d->ip6_src_len, + .fp_addr = { + .ip6 = d->ip6_src, + } + , + }; + fib_table_entry_special_remove (0, &pfx6, FIB_SOURCE_MAP); + + /* Deleting rules */ + if (d->rules) + clib_mem_free (d->rules); + + pool_put (mm->domains, d); + + return 0; +} + +int +map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, + u8 is_add) +{ + map_domain_t *d; + map_main_t *mm = &map_main; + + if (pool_is_free_index (mm->domains, map_domain_index)) + { + clib_warning ("MAP rule: domain does not exist: %d", map_domain_index); + return -1; + } + d = pool_elt_at_index (mm->domains, map_domain_index); + + /* Rules are only used in 1:1 independent case */ + if (d->ea_bits_len > 0) + return (-1); + + if (!d->rules) + { + u32 l = (0x1 << d->psid_length) * sizeof (ip6_address_t); + d->rules = clib_mem_alloc_aligned (l, CLIB_CACHE_LINE_BYTES); + if (!d->rules) + return -1; + memset (d->rules, 0, l); + } + + if (psid >= (0x1 << d->psid_length)) + { + clib_warning ("MAP rule: PSID outside bounds: %d [%d]", psid, + 0x1 << d->psid_length); + return -1; + } + + if (is_add) + { + d->rules[psid] = *tep; + } + else + { + memset (&d->rules[psid], 0, sizeof (ip6_address_t)); + } + return 0; +} + +#ifdef MAP_SKIP_IP6_LOOKUP +/** + * Pre-resolvd per-protocol global next-hops + */ +map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX]; + +static void +map_pre_resolve_init (map_main_pre_resolved_t * pr) +{ + pr->fei = FIB_NODE_INDEX_INVALID; + fib_node_init (&pr->node, FIB_NODE_TYPE_MAP_E); +} + +static u8 * +format_map_pre_resolve (u8 * s, va_list * ap) +{ + map_main_pre_resolved_t *pr = va_arg (*ap, map_main_pre_resolved_t *); + + if (FIB_NODE_INDEX_INVALID != pr->fei) + { + fib_prefix_t pfx; + + fib_entry_get_prefix (pr->fei, &pfx); + + return (format (s, "%U (%u)", + format_ip46_address, &pfx.fp_addr, IP46_TYPE_ANY, + pr->dpo.dpoi_index)); + } + else + { + return (format (s, "un-set")); + } +} + + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +map_last_lock_gone (fib_node_t * node) +{ + /* + * The MAP is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT (0); +} + +static map_main_pre_resolved_t * +map_from_fib_node (fib_node_t * node) +{ + ASSERT (FIB_NODE_TYPE_MAP_E == node->fn_type); + return ((map_main_pre_resolved_t *) + (((char *) node) - + STRUCT_OFFSET_OF (map_main_pre_resolved_t, node))); +} + +static void +map_stack (map_main_pre_resolved_t * pr) +{ + const dpo_id_t *dpo; + + dpo = fib_entry_contribute_ip_forwarding (pr->fei); + + dpo_copy (&pr->dpo, dpo); +} + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +map_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) +{ + map_stack (map_from_fib_node (node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t * +map_fib_node_get (fib_node_index_t index) +{ + return (&pre_resolved[index].node); +} + +/* + * Virtual function table registered by MPLS GRE tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t map_vft = { + .fnv_get = map_fib_node_get, + .fnv_last_lock = map_last_lock_gone, + .fnv_back_walk = map_back_walk, +}; + +static void +map_fib_resolve (map_main_pre_resolved_t * pr, + fib_protocol_t proto, u8 len, const ip46_address_t * addr) +{ + fib_prefix_t pfx = { + .fp_proto = proto, + .fp_len = len, + .fp_addr = *addr, + }; + + pr->fei = fib_table_entry_special_add (0, // default fib + &pfx, + FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); + pr->sibling = fib_entry_child_add (pr->fei, FIB_NODE_TYPE_MAP_E, proto); + map_stack (pr); +} + +static void +map_fib_unresolve (map_main_pre_resolved_t * pr, + fib_protocol_t proto, u8 len, const ip46_address_t * addr) +{ + fib_prefix_t pfx = { + .fp_proto = proto, + .fp_len = len, + .fp_addr = *addr, + }; + + fib_entry_child_remove (pr->fei, pr->sibling); + + fib_table_entry_special_remove (0, // default fib + &pfx, FIB_SOURCE_RR); + dpo_reset (&pr->dpo); + + pr->fei = FIB_NODE_INDEX_INVALID; + pr->sibling = FIB_NODE_INDEX_INVALID; +} + +static void +map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, int is_del) +{ + if (ip6 && (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0)) + { + ip46_address_t addr = { + .ip6 = *ip6, + }; + if (is_del) + map_fib_unresolve (&pre_resolved[FIB_PROTOCOL_IP6], + FIB_PROTOCOL_IP6, 128, &addr); + else + map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP6], + FIB_PROTOCOL_IP6, 128, &addr); + } + if (ip4 && (ip4->as_u32 != 0)) + { + ip46_address_t addr = { + .ip4 = *ip4, + }; + if (is_del) + map_fib_unresolve (&pre_resolved[FIB_PROTOCOL_IP4], + FIB_PROTOCOL_IP4, 32, &addr); + else + map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP4], + FIB_PROTOCOL_IP4, 32, &addr); + } +} +#endif + +static clib_error_t * +map_security_check_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "off")) + mm->sec_check = false; + else if (unformat (line_input, "on")) + mm->sec_check = true; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +map_security_check_frag_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "off")) + mm->sec_check_frag = false; + else if (unformat (line_input, "on")) + mm->sec_check_frag = true; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +map_add_domain_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t ip4_prefix; + ip6_address_t ip6_prefix; + ip6_address_t ip6_src; + u32 ip6_prefix_len = 0, ip4_prefix_len = 0, map_domain_index, ip6_src_len; + u32 num_m_args = 0; + /* Optional arguments */ + u32 ea_bits_len = 0, psid_offset = 0, psid_length = 0; + u32 mtu = 0; + u8 flags = 0; + ip6_src_len = 128; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix, + &ip4_prefix_len)) + num_m_args++; + else + if (unformat + (line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix, + &ip6_prefix_len)) + num_m_args++; + else + if (unformat + (line_input, "ip6-src %U/%d", unformat_ip6_address, &ip6_src, + &ip6_src_len)) + num_m_args++; + else + if (unformat + (line_input, "ip6-src %U", unformat_ip6_address, &ip6_src)) + num_m_args++; + else if (unformat (line_input, "ea-bits-len %d", &ea_bits_len)) + num_m_args++; + else if (unformat (line_input, "psid-offset %d", &psid_offset)) + num_m_args++; + else if (unformat (line_input, "psid-len %d", &psid_length)) + num_m_args++; + else if (unformat (line_input, "mtu %d", &mtu)) + num_m_args++; + else if (unformat (line_input, "map-t")) + flags |= MAP_DOMAIN_TRANSLATION; + else if (unformat (line_input, "rfc6052")) + flags |= (MAP_DOMAIN_TRANSLATION | MAP_DOMAIN_RFC6052); + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (num_m_args < 3) + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } + + map_create_domain (&ip4_prefix, ip4_prefix_len, + &ip6_prefix, ip6_prefix_len, &ip6_src, ip6_src_len, + ea_bits_len, psid_offset, psid_length, &map_domain_index, + mtu, flags); + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +map_del_domain_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 num_m_args = 0; + u32 map_domain_index; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "index %d", &map_domain_index)) + num_m_args++; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (num_m_args != 1) + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } + + map_delete_domain (map_domain_index); + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +map_add_rule_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip6_address_t tep; + u32 num_m_args = 0; + u32 psid = 0, map_domain_index; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "index %d", &map_domain_index)) + num_m_args++; + else if (unformat (line_input, "psid %d", &psid)) + num_m_args++; + else + if (unformat (line_input, "ip6-dst %U", unformat_ip6_address, &tep)) + num_m_args++; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (num_m_args != 3) + { + error = clib_error_return (0, "mandatory argument(s) missing"); + goto done; + } + + if (map_add_del_psid (map_domain_index, psid, &tep, 1) != 0) + { + error = clib_error_return (0, "Failing to add Mapping Rule"); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +#if MAP_SKIP_IP6_LOOKUP +static clib_error_t * +map_pre_resolve_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t ip4nh, *p_v4 = NULL; + ip6_address_t ip6nh, *p_v6 = NULL; + clib_error_t *error = NULL; + int is_del = 0; + + memset (&ip4nh, 0, sizeof (ip4nh)); + memset (&ip6nh, 0, sizeof (ip6nh)); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh)) + p_v4 = &ip4nh; + else + if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh)) + p_v6 = &ip6nh; + else if (unformat (line_input, "del")) + is_del = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + map_pre_resolve (p_v4, p_v6, is_del); + +done: + unformat_free (line_input); + + return error; +} +#endif + +static clib_error_t * +map_icmp_relay_source_address_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t icmp_src_address; + map_main_t *mm = &map_main; + clib_error_t *error = NULL; + + mm->icmp4_src_address.as_u32 = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_ip4_address, &icmp_src_address)) + mm->icmp4_src_address = icmp_src_address; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +map_icmp_unreachables_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + int num_m_args = 0; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + num_m_args++; + if (unformat (line_input, "on")) + mm->icmp6_enabled = true; + else if (unformat (line_input, "off")) + mm->icmp6_enabled = false; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + + if (num_m_args != 1) + error = clib_error_return (0, "mandatory argument(s) missing"); + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +map_fragment_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "inner")) + mm->frag_inner = true; + else if (unformat (line_input, "outer")) + mm->frag_inner = false; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +map_fragment_df_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "on")) + mm->frag_ignore_df = true; + else if (unformat (line_input, "off")) + mm->frag_ignore_df = false; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +map_traffic_class_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + u32 tc = 0; + clib_error_t *error = NULL; + + mm->tc_copy = false; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "copy")) + mm->tc_copy = true; + else if (unformat (line_input, "%x", &tc)) + mm->tc = tc & 0xff; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + +done: + unformat_free (line_input); + + return error; +} + +static char * +map_flags_to_string (u32 flags) +{ + if (flags & MAP_DOMAIN_RFC6052) + return "rfc6052"; + if (flags & MAP_DOMAIN_PREFIX) + return "prefix"; + if (flags & MAP_DOMAIN_TRANSLATION) + return "map-t"; + return ""; +} + +static u8 * +format_map_domain (u8 * s, va_list * args) +{ + map_domain_t *d = va_arg (*args, map_domain_t *); + bool counters = va_arg (*args, int); + map_main_t *mm = &map_main; + ip6_address_t ip6_prefix; + + if (d->rules) + memset (&ip6_prefix, 0, sizeof (ip6_prefix)); + else + ip6_prefix = d->ip6_prefix; + + s = format (s, + "[%d] ip4-pfx %U/%d ip6-pfx %U/%d ip6-src %U/%d ea_bits_len %d " + "psid-offset %d psid-len %d mtu %d %s", + d - mm->domains, + format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len, + format_ip6_address, &ip6_prefix, d->ip6_prefix_len, + format_ip6_address, &d->ip6_src, d->ip6_src_len, + d->ea_bits_len, d->psid_offset, d->psid_length, d->mtu, + map_flags_to_string (d->flags)); + + if (counters) + { + map_domain_counter_lock (mm); + vlib_counter_t v; + vlib_get_combined_counter (&mm->domain_counters[MAP_DOMAIN_COUNTER_TX], + d - mm->domains, &v); + s = format (s, " TX: %lld/%lld", v.packets, v.bytes); + vlib_get_combined_counter (&mm->domain_counters[MAP_DOMAIN_COUNTER_RX], + d - mm->domains, &v); + s = format (s, " RX: %lld/%lld", v.packets, v.bytes); + map_domain_counter_unlock (mm); + } + s = format (s, "\n"); + + if (d->rules) + { + int i; + ip6_address_t dst; + for (i = 0; i < (0x1 << d->psid_length); i++) + { + dst = d->rules[i]; + if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0) + continue; + s = format (s, + " rule psid: %d ip6-dst %U\n", i, format_ip6_address, + &dst); + } + } + return s; +} + +static u8 * +format_map_ip4_reass (u8 * s, va_list * args) +{ + map_main_t *mm = &map_main; + map_ip4_reass_t *r = va_arg (*args, map_ip4_reass_t *); + map_ip4_reass_key_t *k = &r->key; + f64 now = vlib_time_now (mm->vlib_main); + f64 lifetime = (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000); + f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1; + s = format (s, + "ip4-reass src=%U dst=%U protocol=%d identifier=%d port=%d lifetime=%.3lf\n", + format_ip4_address, &k->src.as_u8, format_ip4_address, + &k->dst.as_u8, k->protocol, + clib_net_to_host_u16 (k->fragment_id), + (r->port >= 0) ? clib_net_to_host_u16 (r->port) : -1, dt); + return s; +} + +static u8 * +format_map_ip6_reass (u8 * s, va_list * args) +{ + map_main_t *mm = &map_main; + map_ip6_reass_t *r = va_arg (*args, map_ip6_reass_t *); + map_ip6_reass_key_t *k = &r->key; + f64 now = vlib_time_now (mm->vlib_main); + f64 lifetime = (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000); + f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1; + s = format (s, + "ip6-reass src=%U dst=%U protocol=%d identifier=%d lifetime=%.3lf\n", + format_ip6_address, &k->src.as_u8, format_ip6_address, + &k->dst.as_u8, k->protocol, + clib_net_to_host_u32 (k->fragment_id), dt); + return s; +} + +static clib_error_t * +show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + map_domain_t *d; + bool counters = false; + u32 map_domain_index = ~0; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "counters")) + counters = true; + else if (unformat (line_input, "index %d", &map_domain_index)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (pool_elts (mm->domains) == 0) + vlib_cli_output (vm, "No MAP domains are configured..."); + + if (map_domain_index == ~0) + { + /* *INDENT-OFF* */ + pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_map_domain, d, counters);})); + /* *INDENT-ON* */ + } + else + { + if (pool_is_free_index (mm->domains, map_domain_index)) + { + error = clib_error_return (0, "MAP domain does not exists %d", + map_domain_index); + goto done; + } + + d = pool_elt_at_index (mm->domains, map_domain_index); + vlib_cli_output (vm, "%U", format_map_domain, d, counters); + } + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +show_map_fragments_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + map_main_t *mm = &map_main; + map_ip4_reass_t *f4; + map_ip6_reass_t *f6; + + /* *INDENT-OFF* */ + pool_foreach(f4, mm->ip4_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip4_reass, f4);})); + /* *INDENT-ON* */ + /* *INDENT-OFF* */ + pool_foreach(f6, mm->ip6_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip6_reass, f6);})); + /* *INDENT-ON* */ + return (0); +} + +u64 +map_error_counter_get (u32 node_index, map_error_t map_error) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, node_index); + vlib_error_main_t *em = &vm->error_main; + vlib_error_t e = error_node->errors[map_error]; + vlib_node_t *n = vlib_get_node (vm, node_index); + u32 ci; + + ci = vlib_error_get_code (e); + ASSERT (ci < n->n_errors); + ci += n->error_heap_index; + + return (em->counters[ci]); +} + +static clib_error_t * +show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + map_main_t *mm = &map_main; + map_domain_t *d; + int domains = 0, rules = 0, domaincount = 0, rulecount = 0; + if (pool_elts (mm->domains) == 0) + { + vlib_cli_output (vm, "No MAP domains are configured..."); + return 0; + } + + /* *INDENT-OFF* */ + pool_foreach(d, mm->domains, ({ + if (d->rules) { + rulecount+= 0x1 << d->psid_length; + rules += sizeof(ip6_address_t) * 0x1 << d->psid_length; + } + domains += sizeof(*d); + domaincount++; + })); + /* *INDENT-ON* */ + + vlib_cli_output (vm, "MAP domains structure: %d\n", sizeof (map_domain_t)); + vlib_cli_output (vm, "MAP domains: %d (%d bytes)\n", domaincount, domains); + vlib_cli_output (vm, "MAP rules: %d (%d bytes)\n", rulecount, rules); + vlib_cli_output (vm, "Total: %d bytes)\n", rules + domains); + +#if MAP_SKIP_IP6_LOOKUP + vlib_cli_output (vm, + "MAP pre-resolve: IP6 next-hop: %U, IP4 next-hop: %U\n", + format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP6], + format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP4]); + +#endif + + if (mm->tc_copy) + vlib_cli_output (vm, "MAP traffic-class: copy"); + else + vlib_cli_output (vm, "MAP traffic-class: %x", mm->tc); + + vlib_cli_output (vm, + "MAP IPv6 inbound security check: %s, fragmented packet security check: %s", + mm->sec_check ? "enabled" : "disabled", + mm->sec_check_frag ? "enabled" : "disabled"); + + vlib_cli_output (vm, "ICMP-relay IPv4 source address: %U\n", + format_ip4_address, &mm->icmp4_src_address); + vlib_cli_output (vm, "ICMP6 unreachables sent for unmatched packets: %s\n", + mm->icmp6_enabled ? "enabled" : "disabled"); + vlib_cli_output (vm, "Inner fragmentation: %s\n", + mm->frag_inner ? "enabled" : "disabled"); + vlib_cli_output (vm, "Fragment packets regardless of DF flag: %s\n", + mm->frag_ignore_df ? "enabled" : "disabled"); + + /* + * Counters + */ + vlib_combined_counter_main_t *cm = mm->domain_counters; + u64 total_pkts[MAP_N_DOMAIN_COUNTER]; + u64 total_bytes[MAP_N_DOMAIN_COUNTER]; + int which, i; + vlib_counter_t v; + + memset (total_pkts, 0, sizeof (total_pkts)); + memset (total_bytes, 0, sizeof (total_bytes)); + + map_domain_counter_lock (mm); + vec_foreach (cm, mm->domain_counters) + { + which = cm - mm->domain_counters; + + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) + { + vlib_get_combined_counter (cm, i, &v); + total_pkts[which] += v.packets; + total_bytes[which] += v.bytes; + } + } + map_domain_counter_unlock (mm); + + vlib_cli_output (vm, "Encapsulated packets: %lld bytes: %lld\n", + total_pkts[MAP_DOMAIN_COUNTER_TX], + total_bytes[MAP_DOMAIN_COUNTER_TX]); + vlib_cli_output (vm, "Decapsulated packets: %lld bytes: %lld\n", + total_pkts[MAP_DOMAIN_COUNTER_RX], + total_bytes[MAP_DOMAIN_COUNTER_RX]); + + vlib_cli_output (vm, "ICMP relayed packets: %d\n", + vlib_get_simple_counter (&mm->icmp_relayed, 0)); + + return 0; +} + +static clib_error_t * +map_params_reass_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 lifetime = ~0; + f64 ht_ratio = (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1); + u32 pool_size = ~0; + u64 buffers = ~(0ull); + u8 ip4 = 0, ip6 = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "lifetime %u", &lifetime)) + ; + else if (unformat (line_input, "ht-ratio %lf", &ht_ratio)) + ; + else if (unformat (line_input, "pool-size %u", &pool_size)) + ; + else if (unformat (line_input, "buffers %llu", &buffers)) + ; + else if (unformat (line_input, "ip4")) + ip4 = 1; + else if (unformat (line_input, "ip6")) + ip6 = 1; + else + { + unformat_free (line_input); + return clib_error_return (0, "invalid input"); + } + } + unformat_free (line_input); + + if (!ip4 && !ip6) + return clib_error_return (0, "must specify ip4 and/or ip6"); + + if (ip4) + { + if (pool_size != ~0 && pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) + return clib_error_return (0, "invalid ip4-reass pool-size ( > %d)", + MAP_IP4_REASS_CONF_POOL_SIZE_MAX); + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1) + && ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) + return clib_error_return (0, "invalid ip4-reass ht-ratio ( > %d)", + MAP_IP4_REASS_CONF_HT_RATIO_MAX); + if (lifetime != ~0 && lifetime > MAP_IP4_REASS_CONF_LIFETIME_MAX) + return clib_error_return (0, "invalid ip4-reass lifetime ( > %d)", + MAP_IP4_REASS_CONF_LIFETIME_MAX); + if (buffers != ~(0ull) && buffers > MAP_IP4_REASS_CONF_BUFFERS_MAX) + return clib_error_return (0, "invalid ip4-reass buffers ( > %ld)", + MAP_IP4_REASS_CONF_BUFFERS_MAX); + } + + if (ip6) + { + if (pool_size != ~0 && pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX) + return clib_error_return (0, "invalid ip6-reass pool-size ( > %d)", + MAP_IP6_REASS_CONF_POOL_SIZE_MAX); + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1) + && ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) + return clib_error_return (0, "invalid ip6-reass ht-log2len ( > %d)", + MAP_IP6_REASS_CONF_HT_RATIO_MAX); + if (lifetime != ~0 && lifetime > MAP_IP6_REASS_CONF_LIFETIME_MAX) + return clib_error_return (0, "invalid ip6-reass lifetime ( > %d)", + MAP_IP6_REASS_CONF_LIFETIME_MAX); + if (buffers != ~(0ull) && buffers > MAP_IP6_REASS_CONF_BUFFERS_MAX) + return clib_error_return (0, "invalid ip6-reass buffers ( > %ld)", + MAP_IP6_REASS_CONF_BUFFERS_MAX); + } + + if (ip4) + { + u32 reass = 0, packets = 0; + if (pool_size != ~0) + { + if (map_ip4_reass_conf_pool_size (pool_size, &reass, &packets)) + { + vlib_cli_output (vm, "Could not set ip4-reass pool-size"); + } + else + { + vlib_cli_output (vm, + "Setting ip4-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", + reass, packets); + } + } + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1)) + { + if (map_ip4_reass_conf_ht_ratio (ht_ratio, &reass, &packets)) + { + vlib_cli_output (vm, "Could not set ip4-reass ht-log2len"); + } + else + { + vlib_cli_output (vm, + "Setting ip4-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", + reass, packets); + } + } + if (lifetime != ~0) + { + if (map_ip4_reass_conf_lifetime (lifetime)) + vlib_cli_output (vm, "Could not set ip4-reass lifetime"); + else + vlib_cli_output (vm, "Setting ip4-reass lifetime"); + } + if (buffers != ~(0ull)) + { + if (map_ip4_reass_conf_buffers (buffers)) + vlib_cli_output (vm, "Could not set ip4-reass buffers"); + else + vlib_cli_output (vm, "Setting ip4-reass buffers"); + } + + if (map_main.ip4_reass_conf_buffers > + map_main.ip4_reass_conf_pool_size * + MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) + { + vlib_cli_output (vm, + "Note: 'ip4-reass buffers' > pool-size * max-fragments-per-reassembly."); + } + } + + if (ip6) + { + u32 reass = 0, packets = 0; + if (pool_size != ~0) + { + if (map_ip6_reass_conf_pool_size (pool_size, &reass, &packets)) + { + vlib_cli_output (vm, "Could not set ip6-reass pool-size"); + } + else + { + vlib_cli_output (vm, + "Setting ip6-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", + reass, packets); + } + } + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1)) + { + if (map_ip6_reass_conf_ht_ratio (ht_ratio, &reass, &packets)) + { + vlib_cli_output (vm, "Could not set ip6-reass ht-log2len"); + } + else + { + vlib_cli_output (vm, + "Setting ip6-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", + reass, packets); + } + } + if (lifetime != ~0) + { + if (map_ip6_reass_conf_lifetime (lifetime)) + vlib_cli_output (vm, "Could not set ip6-reass lifetime"); + else + vlib_cli_output (vm, "Setting ip6-reass lifetime"); + } + if (buffers != ~(0ull)) + { + if (map_ip6_reass_conf_buffers (buffers)) + vlib_cli_output (vm, "Could not set ip6-reass buffers"); + else + vlib_cli_output (vm, "Setting ip6-reass buffers"); + } + + if (map_main.ip6_reass_conf_buffers > + map_main.ip6_reass_conf_pool_size * + MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) + { + vlib_cli_output (vm, + "Note: 'ip6-reass buffers' > pool-size * max-fragments-per-reassembly."); + } + } + + return 0; +} + + +/* + * packet trace format function + */ +u8 * +format_map_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + map_trace_t *t = va_arg (*args, map_trace_t *); + u32 map_domain_index = t->map_domain_index; + u16 port = t->port; + + s = + format (s, "MAP domain index: %d L4 port: %u", map_domain_index, + clib_net_to_host_u16 (port)); + + return s; +} + +static_always_inline map_ip4_reass_t * +map_ip4_reass_lookup (map_ip4_reass_key_t * k, u32 bucket, f64 now) +{ + map_main_t *mm = &map_main; + u32 ri = mm->ip4_reass_hash_table[bucket]; + while (ri != MAP_REASS_INDEX_NONE) + { + map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri); + if (r->key.as_u64[0] == k->as_u64[0] && + r->key.as_u64[1] == k->as_u64[1] && + now < r->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000)) + { + return r; + } + ri = r->bucket_next; + } + return NULL; +} + +#define map_ip4_reass_pool_index(r) (r - map_main.ip4_reass_pool) + +void +map_ip4_reass_free (map_ip4_reass_t * r, u32 ** pi_to_drop) +{ + map_main_t *mm = &map_main; + map_ip4_reass_get_fragments (r, pi_to_drop); + + // Unlink in hash bucket + map_ip4_reass_t *r2 = NULL; + u32 r2i = mm->ip4_reass_hash_table[r->bucket]; + while (r2i != map_ip4_reass_pool_index (r)) + { + ASSERT (r2i != MAP_REASS_INDEX_NONE); + r2 = pool_elt_at_index (mm->ip4_reass_pool, r2i); + r2i = r2->bucket_next; + } + if (r2) + { + r2->bucket_next = r->bucket_next; + } + else + { + mm->ip4_reass_hash_table[r->bucket] = r->bucket_next; + } + + // Unlink in list + if (r->fifo_next == map_ip4_reass_pool_index (r)) + { + mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; + } + else + { + if (mm->ip4_reass_fifo_last == map_ip4_reass_pool_index (r)) + mm->ip4_reass_fifo_last = r->fifo_prev; + pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next = + r->fifo_next; + pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev = + r->fifo_prev; + } + + pool_put (mm->ip4_reass_pool, r); + mm->ip4_reass_allocated--; +} + +map_ip4_reass_t * +map_ip4_reass_get (u32 src, u32 dst, u16 fragment_id, + u8 protocol, u32 ** pi_to_drop) +{ + map_ip4_reass_t *r; + map_main_t *mm = &map_main; + map_ip4_reass_key_t k = {.src.data_u32 = src, + .dst.data_u32 = dst, + .fragment_id = fragment_id, + .protocol = protocol + }; + + u32 h = 0; +#ifdef clib_crc32c_uses_intrinsics + h = clib_crc32c ((u8 *) k.as_u32, 16); +#else + u64 tmp = k.as_u32[0] ^ k.as_u32[1] ^ k.as_u32[2] ^ k.as_u32[3]; + h = clib_xxhash (tmp); +#endif + h = h >> (32 - mm->ip4_reass_ht_log2len); + + f64 now = vlib_time_now (mm->vlib_main); + + //Cache garbage collection + while (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) + { + map_ip4_reass_t *last = + pool_elt_at_index (mm->ip4_reass_pool, mm->ip4_reass_fifo_last); + if (last->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000) < now) + map_ip4_reass_free (last, pi_to_drop); + else + break; + } + + if ((r = map_ip4_reass_lookup (&k, h, now))) + return r; + + if (mm->ip4_reass_allocated >= mm->ip4_reass_conf_pool_size) + return NULL; + + pool_get (mm->ip4_reass_pool, r); + mm->ip4_reass_allocated++; + int i; + for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + r->fragments[i] = ~0; + + u32 ri = map_ip4_reass_pool_index (r); + + //Link in new bucket + r->bucket = h; + r->bucket_next = mm->ip4_reass_hash_table[h]; + mm->ip4_reass_hash_table[h] = ri; + + //Link in fifo + if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) + { + r->fifo_next = + pool_elt_at_index (mm->ip4_reass_pool, + mm->ip4_reass_fifo_last)->fifo_next; + r->fifo_prev = mm->ip4_reass_fifo_last; + pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next = ri; + pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev = ri; + } + else + { + r->fifo_next = r->fifo_prev = ri; + mm->ip4_reass_fifo_last = ri; + } + + //Set other fields + r->ts = now; + r->key = k; + r->port = -1; +#ifdef MAP_IP4_REASS_COUNT_BYTES + r->expected_total = 0xffff; + r->forwarded = 0; +#endif + + return r; +} + +int +map_ip4_reass_add_fragment (map_ip4_reass_t * r, u32 pi) +{ + if (map_main.ip4_reass_buffered_counter >= map_main.ip4_reass_conf_buffers) + return -1; + + int i; + for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if (r->fragments[i] == ~0) + { + r->fragments[i] = pi; + map_main.ip4_reass_buffered_counter++; + return 0; + } + return -1; +} + +static_always_inline map_ip6_reass_t * +map_ip6_reass_lookup (map_ip6_reass_key_t * k, u32 bucket, f64 now) +{ + map_main_t *mm = &map_main; + u32 ri = mm->ip6_reass_hash_table[bucket]; + while (ri != MAP_REASS_INDEX_NONE) + { + map_ip6_reass_t *r = pool_elt_at_index (mm->ip6_reass_pool, ri); + if (now < r->ts + (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000) && + r->key.as_u64[0] == k->as_u64[0] && + r->key.as_u64[1] == k->as_u64[1] && + r->key.as_u64[2] == k->as_u64[2] && + r->key.as_u64[3] == k->as_u64[3] && + r->key.as_u64[4] == k->as_u64[4]) + return r; + ri = r->bucket_next; + } + return NULL; +} + +#define map_ip6_reass_pool_index(r) (r - map_main.ip6_reass_pool) + +void +map_ip6_reass_free (map_ip6_reass_t * r, u32 ** pi_to_drop) +{ + map_main_t *mm = &map_main; + int i; + for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if (r->fragments[i].pi != ~0) + { + vec_add1 (*pi_to_drop, r->fragments[i].pi); + r->fragments[i].pi = ~0; + map_main.ip6_reass_buffered_counter--; + } + + // Unlink in hash bucket + map_ip6_reass_t *r2 = NULL; + u32 r2i = mm->ip6_reass_hash_table[r->bucket]; + while (r2i != map_ip6_reass_pool_index (r)) + { + ASSERT (r2i != MAP_REASS_INDEX_NONE); + r2 = pool_elt_at_index (mm->ip6_reass_pool, r2i); + r2i = r2->bucket_next; + } + if (r2) + { + r2->bucket_next = r->bucket_next; + } + else + { + mm->ip6_reass_hash_table[r->bucket] = r->bucket_next; + } + + // Unlink in list + if (r->fifo_next == map_ip6_reass_pool_index (r)) + { + //Single element in the list, list is now empty + mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; + } + else + { + if (mm->ip6_reass_fifo_last == map_ip6_reass_pool_index (r)) //First element + mm->ip6_reass_fifo_last = r->fifo_prev; + pool_elt_at_index (mm->ip6_reass_pool, r->fifo_prev)->fifo_next = + r->fifo_next; + pool_elt_at_index (mm->ip6_reass_pool, r->fifo_next)->fifo_prev = + r->fifo_prev; + } + + // Free from pool if necessary + pool_put (mm->ip6_reass_pool, r); + mm->ip6_reass_allocated--; +} + +map_ip6_reass_t * +map_ip6_reass_get (ip6_address_t * src, ip6_address_t * dst, u32 fragment_id, + u8 protocol, u32 ** pi_to_drop) +{ + map_ip6_reass_t *r; + map_main_t *mm = &map_main; + map_ip6_reass_key_t k = { + .src = *src, + .dst = *dst, + .fragment_id = fragment_id, + .protocol = protocol + }; + + u32 h = 0; + int i; + +#ifdef clib_crc32c_uses_intrinsics + h = clib_crc32c ((u8 *) k.as_u32, 40); +#else + u64 tmp = + k.as_u64[0] ^ k.as_u64[1] ^ k.as_u64[2] ^ k.as_u64[3] ^ k.as_u64[4]; + h = clib_xxhash (tmp); +#endif + + h = h >> (32 - mm->ip6_reass_ht_log2len); + + f64 now = vlib_time_now (mm->vlib_main); + + //Cache garbage collection + while (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) + { + map_ip6_reass_t *last = + pool_elt_at_index (mm->ip6_reass_pool, mm->ip6_reass_fifo_last); + if (last->ts + (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000) < now) + map_ip6_reass_free (last, pi_to_drop); + else + break; + } + + if ((r = map_ip6_reass_lookup (&k, h, now))) + return r; + + if (mm->ip6_reass_allocated >= mm->ip6_reass_conf_pool_size) + return NULL; + + pool_get (mm->ip6_reass_pool, r); + mm->ip6_reass_allocated++; + for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + { + r->fragments[i].pi = ~0; + r->fragments[i].next_data_len = 0; + r->fragments[i].next_data_offset = 0; + } + + u32 ri = map_ip6_reass_pool_index (r); + + //Link in new bucket + r->bucket = h; + r->bucket_next = mm->ip6_reass_hash_table[h]; + mm->ip6_reass_hash_table[h] = ri; + + //Link in fifo + if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) + { + r->fifo_next = + pool_elt_at_index (mm->ip6_reass_pool, + mm->ip6_reass_fifo_last)->fifo_next; + r->fifo_prev = mm->ip6_reass_fifo_last; + pool_elt_at_index (mm->ip6_reass_pool, r->fifo_prev)->fifo_next = ri; + pool_elt_at_index (mm->ip6_reass_pool, r->fifo_next)->fifo_prev = ri; + } + else + { + r->fifo_next = r->fifo_prev = ri; + mm->ip6_reass_fifo_last = ri; + } + + //Set other fields + r->ts = now; + r->key = k; + r->ip4_header.ip_version_and_header_length = 0; +#ifdef MAP_IP6_REASS_COUNT_BYTES + r->expected_total = 0xffff; + r->forwarded = 0; +#endif + return r; +} + +int +map_ip6_reass_add_fragment (map_ip6_reass_t * r, u32 pi, + u16 data_offset, u16 next_data_offset, + u8 * data_start, u16 data_len) +{ + map_ip6_fragment_t *f = NULL, *prev_f = NULL; + u16 copied_len = (data_len > 20) ? 20 : data_len; + + if (map_main.ip6_reass_buffered_counter >= map_main.ip6_reass_conf_buffers) + return -1; + + //Lookup for fragments for the current buffer + //and the one before that + int i; + for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + { + if (data_offset && r->fragments[i].next_data_offset == data_offset) + { + prev_f = &r->fragments[i]; // This is buffer for previous packet + } + else if (r->fragments[i].next_data_offset == next_data_offset) + { + f = &r->fragments[i]; // This is a buffer for the current packet + } + else if (r->fragments[i].next_data_offset == 0) + { //Available + if (f == NULL) + f = &r->fragments[i]; + else if (prev_f == NULL) + prev_f = &r->fragments[i]; + } + } + + if (!f || f->pi != ~0) + return -1; + + if (data_offset) + { + if (!prev_f) + return -1; + + clib_memcpy (prev_f->next_data, data_start, copied_len); + prev_f->next_data_len = copied_len; + prev_f->next_data_offset = data_offset; + } + else + { + if (((ip4_header_t *) data_start)->ip_version_and_header_length != 0x45) + return -1; + + if (r->ip4_header.ip_version_and_header_length == 0) + clib_memcpy (&r->ip4_header, data_start, sizeof (ip4_header_t)); + } + + if (data_len > 20) + { + f->next_data_offset = next_data_offset; + f->pi = pi; + map_main.ip6_reass_buffered_counter++; + } + return 0; +} + +void +map_ip4_reass_reinit (u32 * trashed_reass, u32 * dropped_packets) +{ + map_main_t *mm = &map_main; + int i; + + if (dropped_packets) + *dropped_packets = mm->ip4_reass_buffered_counter; + if (trashed_reass) + *trashed_reass = mm->ip4_reass_allocated; + if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) + { + u16 ri = mm->ip4_reass_fifo_last; + do + { + map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri); + for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if (r->fragments[i] != ~0) + map_ip4_drop_pi (r->fragments[i]); + + ri = r->fifo_next; + pool_put (mm->ip4_reass_pool, r); + } + while (ri != mm->ip4_reass_fifo_last); + } + + vec_free (mm->ip4_reass_hash_table); + vec_resize (mm->ip4_reass_hash_table, 1 << mm->ip4_reass_ht_log2len); + for (i = 0; i < (1 << mm->ip4_reass_ht_log2len); i++) + mm->ip4_reass_hash_table[i] = MAP_REASS_INDEX_NONE; + pool_free (mm->ip4_reass_pool); + pool_alloc (mm->ip4_reass_pool, mm->ip4_reass_conf_pool_size); + + mm->ip4_reass_allocated = 0; + mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; + mm->ip4_reass_buffered_counter = 0; +} + +u8 +map_get_ht_log2len (f32 ht_ratio, u16 pool_size) +{ + u32 desired_size = (u32) (pool_size * ht_ratio); + u8 i; + for (i = 1; i < 31; i++) + if ((1 << i) >= desired_size) + return i; + return 4; +} + +int +map_ip4_reass_conf_ht_ratio (f32 ht_ratio, u32 * trashed_reass, + u32 * dropped_packets) +{ + map_main_t *mm = &map_main; + if (ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) + return -1; + + map_ip4_reass_lock (); + mm->ip4_reass_conf_ht_ratio = ht_ratio; + mm->ip4_reass_ht_log2len = + map_get_ht_log2len (ht_ratio, mm->ip4_reass_conf_pool_size); + map_ip4_reass_reinit (trashed_reass, dropped_packets); + map_ip4_reass_unlock (); + return 0; +} + +int +map_ip4_reass_conf_pool_size (u16 pool_size, u32 * trashed_reass, + u32 * dropped_packets) +{ + map_main_t *mm = &map_main; + if (pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) + return -1; + + map_ip4_reass_lock (); + mm->ip4_reass_conf_pool_size = pool_size; + map_ip4_reass_reinit (trashed_reass, dropped_packets); + map_ip4_reass_unlock (); + return 0; +} + +int +map_ip4_reass_conf_lifetime (u16 lifetime_ms) +{ + map_main.ip4_reass_conf_lifetime_ms = lifetime_ms; + return 0; +} + +int +map_ip4_reass_conf_buffers (u32 buffers) +{ + map_main.ip4_reass_conf_buffers = buffers; + return 0; +} + +void +map_ip6_reass_reinit (u32 * trashed_reass, u32 * dropped_packets) +{ + map_main_t *mm = &map_main; + if (dropped_packets) + *dropped_packets = mm->ip6_reass_buffered_counter; + if (trashed_reass) + *trashed_reass = mm->ip6_reass_allocated; + int i; + if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) + { + u16 ri = mm->ip6_reass_fifo_last; + do + { + map_ip6_reass_t *r = pool_elt_at_index (mm->ip6_reass_pool, ri); + for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if (r->fragments[i].pi != ~0) + map_ip6_drop_pi (r->fragments[i].pi); + + ri = r->fifo_next; + pool_put (mm->ip6_reass_pool, r); + } + while (ri != mm->ip6_reass_fifo_last); + mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; + } + + vec_free (mm->ip6_reass_hash_table); + vec_resize (mm->ip6_reass_hash_table, 1 << mm->ip6_reass_ht_log2len); + for (i = 0; i < (1 << mm->ip6_reass_ht_log2len); i++) + mm->ip6_reass_hash_table[i] = MAP_REASS_INDEX_NONE; + pool_free (mm->ip6_reass_pool); + pool_alloc (mm->ip6_reass_pool, mm->ip4_reass_conf_pool_size); + + mm->ip6_reass_allocated = 0; + mm->ip6_reass_buffered_counter = 0; +} + +int +map_ip6_reass_conf_ht_ratio (f32 ht_ratio, u32 * trashed_reass, + u32 * dropped_packets) +{ + map_main_t *mm = &map_main; + if (ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) + return -1; + + map_ip6_reass_lock (); + mm->ip6_reass_conf_ht_ratio = ht_ratio; + mm->ip6_reass_ht_log2len = + map_get_ht_log2len (ht_ratio, mm->ip6_reass_conf_pool_size); + map_ip6_reass_reinit (trashed_reass, dropped_packets); + map_ip6_reass_unlock (); + return 0; +} + +int +map_ip6_reass_conf_pool_size (u16 pool_size, u32 * trashed_reass, + u32 * dropped_packets) +{ + map_main_t *mm = &map_main; + if (pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX) + return -1; + + map_ip6_reass_lock (); + mm->ip6_reass_conf_pool_size = pool_size; + map_ip6_reass_reinit (trashed_reass, dropped_packets); + map_ip6_reass_unlock (); + return 0; +} + +int +map_ip6_reass_conf_lifetime (u16 lifetime_ms) +{ + map_main.ip6_reass_conf_lifetime_ms = lifetime_ms; + return 0; +} + +int +map_ip6_reass_conf_buffers (u32 buffers) +{ + map_main.ip6_reass_conf_buffers = buffers; + return 0; +} + +/* *INDENT-OFF* */ + +/*? + * Configure MAP reassembly behaviour + * + * @cliexpar + * @cliexstart{map params reassembly} + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_ip4_reass_lifetime_command, static) = { + .path = "map params reassembly", + .short_help = "map params reassembly [ip4 | ip6] [lifetime ] " + "[pool-size ] [buffers ] " + "[ht-ratio ]", + .function = map_params_reass_command_fn, +}; + +/*? + * Set or copy the IP TOS/Traffic Class field + * + * @cliexpar + * @cliexstart{map params traffic-class} + * + * This command is used to set the traffic-class field in translated + * or encapsulated packets. If copy is specifed (the default) then the + * traffic-class/TOS field is copied from the original packet to the + * translated / encapsulating header. + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_traffic_class_command, static) = { + .path = "map params traffic-class", + .short_help = "map params traffic-class {0x0-0xff | copy}", + .function = map_traffic_class_command_fn, +}; + +/*? + * Bypass IP4/IP6 lookup + * + * @cliexpar + * @cliexstart{map params pre-resolve} + * + * Bypass a second FIB lookup of the translated or encapsulated + * packet, and forward the packet directly to the specified + * next-hop. This optimization trades forwarding flexibility for + * performance. + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_pre_resolve_command, static) = { + .path = "map params pre-resolve", + .short_help = " map params pre-resolve {ip4-nh
} " + "| {ip6-nh
}", + .function = map_pre_resolve_command_fn, +}; + +/*? + * Enable or disable the MAP-E inbound security check + * + * @cliexpar + * @cliexstart{map params security-check} + * + * By default, a decapsulated packet's IPv4 source address will be + * verified against the outer header's IPv6 source address. Disabling + * this feature will allow IPv4 source address spoofing. + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_security_check_command, static) = { + .path = "map params security-check", + .short_help = "map params security-check on|off", + .function = map_security_check_command_fn, +}; + +/*? + * Specifiy the IPv4 source address used for relayed ICMP error messages + * + * @cliexpar + * @cliexstart{map params icmp source-address} + * + * This command specifies which IPv4 source address (must be local to + * the system), that is used for relayed received IPv6 ICMP error + * messages. + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_icmp_relay_source_address_command, static) = { + .path = "map params icmp source-address", + .short_help = "map params icmp source-address ", + .function = map_icmp_relay_source_address_command_fn, +}; + +/*? + * Send IPv6 ICMP unreachables + * + * @cliexpar + * @cliexstart{map params icmp6 unreachables} + * + * Send IPv6 ICMP unreachable messages back if security check fails or + * no MAP domain exists. + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_icmp_unreachables_command, static) = { + .path = "map params icmp6 unreachables", + .short_help = "map params icmp6 unreachables {on|off}", + .function = map_icmp_unreachables_command_fn, +}; + +/*? + * Configure MAP fragmentation behaviour + * + * @cliexpar + * @cliexstart{map params fragment} + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_fragment_command, static) = { + .path = "map params fragment", + .short_help = "map params fragment inner|outer", + .function = map_fragment_command_fn, +}; + +/*? + * Ignore the IPv4 Don't fragment bit + * + * @cliexpar + * @cliexstart{map params fragment ignore-df} + * + * Allows fragmentation of the IPv4 packet even if the DF bit is + * set. The choice between inner or outer fragmentation of tunnel + * packets is complicated. The benefit of inner fragmentation is that + * the ultimate endpoint must reassemble, instead of the tunnel + * endpoint. + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_fragment_df_command, static) = { + .path = "map params fragment ignore-df", + .short_help = "map params fragment ignore-df on|off", + .function = map_fragment_df_command_fn, +}; + +/*? + * Specifiy if the inbound security check should be done on fragments + * + * @cliexpar + * @cliexstart{map params security-check fragments} + * + * Typically the inbound on-decapsulation security check is only done + * on the first packet. The packet that contains the L4 + * information. While a security check on every fragment is possible, + * it has a cost. State must be created on the first fragment. + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_security_check_frag_command, static) = { + .path = "map params security-check fragments", + .short_help = "map params security-check fragments on|off", + .function = map_security_check_frag_command_fn, +}; + +/*? + * Add MAP domain + * + * @cliexpar + * @cliexstart{map add domain} + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_add_domain_command, static) = { + .path = "map add domain", + .short_help = "map add domain ip4-pfx ip6-pfx " + "ip6-src ea-bits-len psid-offset psid-len " + "[map-t] [map-ce] [mtu ]", + .function = map_add_domain_command_fn, +}; + +/*? + * Add MAP rule to a domain + * + * @cliexpar + * @cliexstart{map add rule} + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_add_rule_command, static) = { + .path = "map add rule", + .short_help = "map add rule index psid ip6-dst ", + .function = map_add_rule_command_fn, +}; + +/*? + * Delete MAP domain + * + * @cliexpar + * @cliexstart{map del domain} + * @cliexend + ?*/ +VLIB_CLI_COMMAND(map_del_command, static) = { + .path = "map del domain", + .short_help = "map del domain index ", + .function = map_del_domain_command_fn, +}; + +/*? + * Show MAP domains + * + * @cliexpar + * @cliexstart{show map domain} + * @cliexend + ?*/ +VLIB_CLI_COMMAND(show_map_domain_command, static) = { + .path = "show map domain", + .short_help = "show map domain index [counters]", + .function = show_map_domain_command_fn, +}; + +/*? + * Show MAP statistics + * + * @cliexpar + * @cliexstart{show map stats} + * @cliexend + ?*/ +VLIB_CLI_COMMAND(show_map_stats_command, static) = { + .path = "show map stats", + .short_help = "show map stats", + .function = show_map_stats_command_fn, +}; + +/*? + * Show MAP fragmentation information + * + * @cliexpar + * @cliexstart{show map fragments} + * @cliexend + ?*/ +VLIB_CLI_COMMAND(show_map_fragments_command, static) = { + .path = "show map fragments", + .short_help = "show map fragments", + .function = show_map_fragments_command_fn, +}; + +VLIB_PLUGIN_REGISTER() = { + .version = VPP_BUILD_VER, + .description = "Mapping of address and port (MAP)", +}; + +/* *INDENT-ON* */ + +static clib_error_t * +map_config (vlib_main_t * vm, unformat_input_t * input) +{ + map_main_t *mm = &map_main; + u8 is_ce = false; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "customer edge")) + is_ce = true; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + mm->is_ce = is_ce; + + return 0; +} + +VLIB_CONFIG_FUNCTION (map_config, "map"); + +/* + * map_init + */ +clib_error_t * +map_init (vlib_main_t * vm) +{ + map_main_t *mm = &map_main; + clib_error_t *error = 0; + mm->vnet_main = vnet_get_main (); + mm->vlib_main = vm; + +#ifdef MAP_SKIP_IP6_LOOKUP + fib_protocol_t proto; + + FOR_EACH_FIB_PROTOCOL (proto) + { + map_pre_resolve_init (&pre_resolved[proto]); + } +#endif + + /* traffic class */ + mm->tc = 0; + mm->tc_copy = true; + + /* Inbound security check */ + mm->sec_check = true; + mm->sec_check_frag = false; + + /* ICMP6 Type 1, Code 5 for security check failure */ + mm->icmp6_enabled = false; + + mm->is_ce = false; + + /* Inner or outer fragmentation */ + mm->frag_inner = false; + mm->frag_ignore_df = false; + + vec_validate (mm->domain_counters, MAP_N_DOMAIN_COUNTER - 1); + mm->domain_counters[MAP_DOMAIN_COUNTER_RX].name = "rx"; + mm->domain_counters[MAP_DOMAIN_COUNTER_TX].name = "tx"; + + vlib_validate_simple_counter (&mm->icmp_relayed, 0); + vlib_zero_simple_counter (&mm->icmp_relayed, 0); + + /* IP4 virtual reassembly */ + mm->ip4_reass_hash_table = 0; + mm->ip4_reass_pool = 0; + mm->ip4_reass_lock = + clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + *mm->ip4_reass_lock = 0; + mm->ip4_reass_conf_ht_ratio = MAP_IP4_REASS_HT_RATIO_DEFAULT; + mm->ip4_reass_conf_lifetime_ms = MAP_IP4_REASS_LIFETIME_DEFAULT; + mm->ip4_reass_conf_pool_size = MAP_IP4_REASS_POOL_SIZE_DEFAULT; + mm->ip4_reass_conf_buffers = MAP_IP4_REASS_BUFFERS_DEFAULT; + mm->ip4_reass_ht_log2len = + map_get_ht_log2len (mm->ip4_reass_conf_ht_ratio, + mm->ip4_reass_conf_pool_size); + mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; + map_ip4_reass_reinit (NULL, NULL); + + /* IP6 virtual reassembly */ + mm->ip6_reass_hash_table = 0; + mm->ip6_reass_pool = 0; + mm->ip6_reass_lock = + clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + *mm->ip6_reass_lock = 0; + mm->ip6_reass_conf_ht_ratio = MAP_IP6_REASS_HT_RATIO_DEFAULT; + mm->ip6_reass_conf_lifetime_ms = MAP_IP6_REASS_LIFETIME_DEFAULT; + mm->ip6_reass_conf_pool_size = MAP_IP6_REASS_POOL_SIZE_DEFAULT; + mm->ip6_reass_conf_buffers = MAP_IP6_REASS_BUFFERS_DEFAULT; + mm->ip6_reass_ht_log2len = + map_get_ht_log2len (mm->ip6_reass_conf_ht_ratio, + mm->ip6_reass_conf_pool_size); + mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; + map_ip6_reass_reinit (NULL, NULL); + +#ifdef MAP_SKIP_IP6_LOOKUP + fib_node_register_type (FIB_NODE_TYPE_MAP_E, &map_vft); +#endif + map_dpo_module_init (); + + error = map_plugin_api_hookup (vm); + + return error; +} + +VLIB_INIT_FUNCTION (map_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/map/map.h b/src/plugins/map/map.h new file mode 100644 index 00000000000..45959f0d0e1 --- /dev/null +++ b/src/plugins/map/map.h @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAP_SKIP_IP6_LOOKUP 1 + +int map_create_domain (ip4_address_t * ip4_prefix, u8 ip4_prefix_len, + ip6_address_t * ip6_prefix, u8 ip6_prefix_len, + ip6_address_t * ip6_src, u8 ip6_src_len, + u8 ea_bits_len, u8 psid_offset, u8 psid_length, + u32 * map_domain_index, u16 mtu, u8 flags); +int map_delete_domain (u32 map_domain_index); +int map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, + u8 is_add); +u8 *format_map_trace (u8 * s, va_list * args); + +typedef enum +{ + MAP_DOMAIN_PREFIX = 1 << 0, + MAP_DOMAIN_TRANSLATION = 1 << 1, // The domain uses MAP-T + MAP_DOMAIN_RFC6052 = 1 << 2, +} __attribute__ ((__packed__)) map_domain_flags_e; + +/** + * IP4 reassembly logic: + * One virtually reassembled flow requires a map_ip4_reass_t structure in order + * to keep the first-fragment port number and, optionally, cache out of sequence + * packets. + * There are up to MAP_IP4_REASS_MAX_REASSEMBLY such structures. + * When in use, those structures are stored in a hash table of MAP_IP4_REASS_BUCKETS buckets. + * When a new structure needs to be used, it is allocated from available ones. + * If there is no structure available, the oldest in use is selected and used if and + * only if it was first allocated more than MAP_IP4_REASS_LIFETIME seconds ago. + * In case no structure can be allocated, the fragment is dropped. + */ + +#define MAP_IP4_REASS_LIFETIME_DEFAULT (100) /* ms */ +#define MAP_IP4_REASS_HT_RATIO_DEFAULT (1.0) +#define MAP_IP4_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures +#define MAP_IP4_REASS_BUFFERS_DEFAULT 2048 + +#define MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 // Number of fragment per reassembly + +#define MAP_IP6_REASS_LIFETIME_DEFAULT (100) /* ms */ +#define MAP_IP6_REASS_HT_RATIO_DEFAULT (1.0) +#define MAP_IP6_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures +#define MAP_IP6_REASS_BUFFERS_DEFAULT 2048 + +#define MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 + +#define MAP_IP6_REASS_COUNT_BYTES +#define MAP_IP4_REASS_COUNT_BYTES + +//#define IP6_MAP_T_OVERRIDE_TOS 0 + +/* + * This structure _MUST_ be no larger than a single cache line (64 bytes). + * If more space is needed make a union of ip6_prefix and *rules, those are mutually exclusive. + */ +typedef struct +{ + /* Required for pool_get_aligned */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + ip6_address_t ip6_src; + ip6_address_t ip6_prefix; + ip6_address_t *rules; + u32 suffix_mask; + ip4_address_t ip4_prefix; + u16 psid_mask; + u16 mtu; + map_domain_flags_e flags; + u8 ip6_prefix_len; + u8 ip6_src_len; + u8 ea_bits_len; + u8 psid_offset; + u8 psid_length; + + /* helpers */ + u8 psid_shift; + u8 suffix_shift; + u8 ea_shift; + + /* not used by forwarding */ + u8 ip4_prefix_len; +} map_domain_t; + +STATIC_ASSERT ((sizeof (map_domain_t) <= CLIB_CACHE_LINE_BYTES), + "MAP domain fits in one cacheline"); + +#define MAP_REASS_INDEX_NONE ((u16)0xffff) + +/* + * Hash key, padded out to 16 bytes for fast compare + */ +/* *INDENT-OFF* */ +typedef union { + CLIB_PACKED (struct { + ip4_address_t src; + ip4_address_t dst; + u16 fragment_id; + u8 protocol; + }); + u64 as_u64[2]; + u32 as_u32[4]; +} map_ip4_reass_key_t; +/* *INDENT-ON* */ + +typedef struct +{ + map_ip4_reass_key_t key; + f64 ts; +#ifdef MAP_IP4_REASS_COUNT_BYTES + u16 expected_total; + u16 forwarded; +#endif + i32 port; + u16 bucket; + u16 bucket_next; + u16 fifo_prev; + u16 fifo_next; + u32 fragments[MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; +} map_ip4_reass_t; + +/* + * MAP domain counters + */ +typedef enum +{ + /* Simple counters */ + MAP_DOMAIN_IPV4_FRAGMENT = 0, + /* Combined counters */ + MAP_DOMAIN_COUNTER_RX = 0, + MAP_DOMAIN_COUNTER_TX, + MAP_N_DOMAIN_COUNTER +} map_domain_counter_t; + +/* + * main_main_t + */ +/* *INDENT-OFF* */ +typedef union { + CLIB_PACKED (struct { + ip6_address_t src; + ip6_address_t dst; + u32 fragment_id; + u8 protocol; + }); + u64 as_u64[5]; + u32 as_u32[10]; +} map_ip6_reass_key_t; +/* *INDENT-OFF* */ + +typedef struct { + u32 pi; //Cached packet or ~0 + u16 next_data_offset; //The data offset of the additional 20 bytes or ~0 + u8 next_data_len; //Number of bytes ready to be copied (20 if not last fragment) + u8 next_data[20]; //The 20 additional bytes +} map_ip6_fragment_t; + +typedef struct { + map_ip6_reass_key_t key; + f64 ts; +#ifdef MAP_IP6_REASS_COUNT_BYTES + u16 expected_total; + u16 forwarded; +#endif + u16 bucket; //What hash bucket this element is linked in + u16 bucket_next; + u16 fifo_prev; + u16 fifo_next; + ip4_header_t ip4_header; + map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; +} map_ip6_reass_t; + +#ifdef MAP_SKIP_IP6_LOOKUP +/** + * A pre-resolved next-hop + */ +typedef struct map_main_pre_resolved_t_ +{ + /** + * Linkage into the FIB graph + */ + fib_node_t node; + + /** + * The FIB entry index of the next-hop + */ + fib_node_index_t fei; + + /** + * This object sibling index on the FIB entry's child dependency list + */ + u32 sibling; + + /** + * The Load-balance object index to use to forward + */ + dpo_id_t dpo; +} map_main_pre_resolved_t; + +/** + * Pre-resolved next hops for v4 and v6. Why these are global and not + * per-domain is beyond me. + */ +extern map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX]; +#endif + +typedef struct { + /* pool of MAP domains */ + map_domain_t *domains; + + /* MAP Domain packet/byte counters indexed by map domain index */ + vlib_simple_counter_main_t *simple_domain_counters; + vlib_combined_counter_main_t *domain_counters; + volatile u32 *counter_lock; + + /* API message id base */ + u16 msg_id_base; + + /* Traffic class: zero, copy (~0) or fixed value */ + u8 tc; + bool tc_copy; + + bool sec_check; /* Inbound security check */ + bool sec_check_frag; /* Inbound security check for (subsequent) fragments */ + bool icmp6_enabled; /* Send destination unreachable for security check failure */ + + bool is_ce; /* If this MAP node is a Customer Edge router*/ + + /* ICMPv6 -> ICMPv4 relay parameters */ + ip4_address_t icmp4_src_address; + vlib_simple_counter_main_t icmp_relayed; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + /* + * IPv4 encap and decap reassembly + */ + /* Configuration */ + f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) + u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures + u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms + u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly + + /* Runtime */ + map_ip4_reass_t *ip4_reass_pool; + u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len + u16 ip4_reass_allocated; + u16 *ip4_reass_hash_table; + u16 ip4_reass_fifo_last; + volatile u32 *ip4_reass_lock; + + /* Counters */ + u32 ip4_reass_buffered_counter; + + bool frag_inner; /* Inner or outer fragmentation */ + bool frag_ignore_df; /* Fragment (outer) packet even if DF is set */ + + /* + * IPv6 decap reassembly + */ + /* Configuration */ + f32 ip6_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) + u16 ip6_reass_conf_pool_size; //Max number of allocated reass structures + u16 ip6_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms + u32 ip6_reass_conf_buffers; //Maximum number of buffers used by ip6 reassembly + + /* Runtime */ + map_ip6_reass_t *ip6_reass_pool; + u8 ip6_reass_ht_log2len; //Hash table size is 2^log2len + u16 ip6_reass_allocated; + u16 *ip6_reass_hash_table; + u16 ip6_reass_fifo_last; + volatile u32 *ip6_reass_lock; + + /* Counters */ + u32 ip6_reass_buffered_counter; + +} map_main_t; + +/* + * MAP Error counters/messages + */ +#define foreach_map_error \ + /* Must be first. */ \ + _(NONE, "valid MAP packets") \ + _(BAD_PROTOCOL, "bad protocol") \ + _(SEC_CHECK, "security check failed") \ + _(ENCAP_SEC_CHECK, "encap security check failed") \ + _(DECAP_SEC_CHECK, "decap security check failed") \ + _(ICMP, "unable to translate ICMP") \ + _(ICMP_RELAY, "unable to relay ICMP") \ + _(UNKNOWN, "unknown") \ + _(NO_BINDING, "no binding") \ + _(NO_DOMAIN, "no domain") \ + _(FRAGMENTED, "packet is a fragment") \ + _(FRAGMENT_MEMORY, "could not cache fragment") \ + _(FRAGMENT_MALFORMED, "fragment has unexpected format")\ + _(FRAGMENT_DROPPED, "dropped cached fragment") \ + _(MALFORMED, "malformed packet") \ + _(DF_SET, "can't fragment, DF set") + +typedef enum { +#define _(sym,str) MAP_ERROR_##sym, + foreach_map_error +#undef _ + MAP_N_ERROR, + } map_error_t; + +u64 map_error_counter_get(u32 node_index, map_error_t map_error); + +typedef struct { + u32 map_domain_index; + u16 port; +} map_trace_t; + +extern map_main_t map_main; + +extern vlib_node_registration_t ip4_map_node; +extern vlib_node_registration_t ip6_map_node; + +extern vlib_node_registration_t ip4_map_t_node; +extern vlib_node_registration_t ip4_map_t_fragmented_node; +extern vlib_node_registration_t ip4_map_t_tcp_udp_node; +extern vlib_node_registration_t ip4_map_t_icmp_node; + +extern vlib_node_registration_t ip6_map_t_node; +extern vlib_node_registration_t ip6_map_t_fragmented_node; +extern vlib_node_registration_t ip6_map_t_tcp_udp_node; +extern vlib_node_registration_t ip6_map_t_icmp_node; + +/* + * map_get_pfx + */ +static_always_inline u64 +map_get_pfx (map_domain_t *d, u32 addr, u16 port) +{ + u16 psid = (port >> d->psid_shift) & d->psid_mask; + + if (d->ea_bits_len == 0 && d->rules) + return clib_net_to_host_u64(d->rules[psid].as_u64[0]); + + u32 suffix = (addr >> d->suffix_shift) & d->suffix_mask; + u64 ea = d->ea_bits_len == 0 ? 0 : (((u64) suffix << d->psid_length)) | psid; + + return clib_net_to_host_u64(d->ip6_prefix.as_u64[0]) | ea << d->ea_shift; +} + +static_always_inline u64 +map_get_pfx_net (map_domain_t *d, u32 addr, u16 port) +{ + return clib_host_to_net_u64(map_get_pfx(d, clib_net_to_host_u32(addr), + clib_net_to_host_u16(port))); +} + +/* + * map_get_sfx + */ +static_always_inline u64 +map_get_sfx (map_domain_t *d, u32 addr, u16 port) +{ + u16 psid = (port >> d->psid_shift) & d->psid_mask; + + /* Shared 1:1 mode. */ + if (d->ea_bits_len == 0 && d->rules) + return clib_net_to_host_u64(d->rules[psid].as_u64[1]); + if (d->ip6_prefix_len == 128) + return clib_net_to_host_u64(d->ip6_prefix.as_u64[1]); + + if (d->flags & MAP_DOMAIN_RFC6052) + return (clib_net_to_host_u64(d->ip6_prefix.as_u64[1]) | addr); + + /* IPv4 prefix */ + if (d->flags & MAP_DOMAIN_PREFIX) + return (u64) (addr & (0xFFFFFFFF << d->suffix_shift)) << 16; + + /* Shared or full IPv4 address */ + return ((u64) addr << 16) | psid; +} + +static_always_inline u64 +map_get_sfx_net (map_domain_t *d, u32 addr, u16 port) +{ + return clib_host_to_net_u64(map_get_sfx(d, clib_net_to_host_u32(addr), + clib_net_to_host_u16(port))); +} + +static_always_inline u32 +map_get_ip4 (ip6_address_t *addr, map_domain_flags_e flags) +{ + if (flags & MAP_DOMAIN_RFC6052) + return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1])); + else + return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1]) >> 16); +} + +/* + * Get the MAP domain from an IPv4 lookup adjacency. + */ +static_always_inline map_domain_t * +ip4_map_get_domain (u32 mdi) +{ + map_main_t *mm = &map_main; + + return pool_elt_at_index(mm->domains, mdi); +} + +/* + * Get the MAP domain from an IPv6 lookup adjacency. + * If the IPv6 address or prefix is not shared, no lookup is required. + * The IPv4 address is used otherwise. + */ +static_always_inline map_domain_t * +ip6_map_get_domain (u32 mdi, + ip4_address_t *addr, + u32 *map_domain_index, + u8 *error) +{ + map_main_t *mm = &map_main; + +#ifdef TODO + /* + * Disable direct MAP domain lookup on decap, until the security check is updated to verify IPv4 SA. + * (That's done implicitly when MAP domain is looked up in the IPv4 FIB) + */ + //#ifdef MAP_NONSHARED_DOMAIN_ENABLED + //#error "How can you be sure this domain is not shared?" +#endif + + *map_domain_index = mdi; + return pool_elt_at_index(mm->domains, mdi); + +#ifdef TODO + u32 lbi = ip4_fib_forwarding_lookup(0, addr); + const dpo_id_t *dpo = load_balance_get_bucket(lbi, 0); + if (PREDICT_TRUE(dpo->dpoi_type == map_dpo_type || + dpo->dpoi_type == map_t_dpo_type)) + { + *map_domain_index = dpo->dpoi_index; + return pool_elt_at_index(mm->domains, *map_domain_index); + } + *error = MAP_ERROR_NO_DOMAIN; + return NULL; +#endif +} + +map_ip4_reass_t * +map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id, + u8 protocol, u32 **pi_to_drop); +void +map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop); + +#define map_ip4_reass_lock() while (__sync_lock_test_and_set(map_main.ip4_reass_lock, 1)) {} +#define map_ip4_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip4_reass_lock = 0;} while(0) + +static_always_inline void +map_ip4_reass_get_fragments(map_ip4_reass_t *r, u32 **pi) +{ + int i; + for (i=0; ifragments[i] != ~0) { + vec_add1(*pi, r->fragments[i]); + r->fragments[i] = ~0; + map_main.ip4_reass_buffered_counter--; + } +} + +clib_error_t * map_plugin_api_hookup (vlib_main_t * vm); + +int map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi); + +map_ip6_reass_t * +map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id, + u8 protocol, u32 **pi_to_drop); +void +map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop); + +#define map_ip6_reass_lock() while (__sync_lock_test_and_set(map_main.ip6_reass_lock, 1)) {} +#define map_ip6_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip6_reass_lock = 0;} while(0) + +int +map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi, + u16 data_offset, u16 next_data_offset, + u8 *data_start, u16 data_len); + +void map_ip4_drop_pi(u32 pi); + +int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP4_REASS_CONF_HT_RATIO_MAX 100 +int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP4_REASS_CONF_POOL_SIZE_MAX (0xfeff) +int map_ip4_reass_conf_lifetime(u16 lifetime_ms); +#define MAP_IP4_REASS_CONF_LIFETIME_MAX 0xffff +int map_ip4_reass_conf_buffers(u32 buffers); +#define MAP_IP4_REASS_CONF_BUFFERS_MAX (0xffffffff) + +void map_ip6_drop_pi(u32 pi); + + +int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP6_REASS_CONF_HT_RATIO_MAX 100 +int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP6_REASS_CONF_POOL_SIZE_MAX (0xfeff) +int map_ip6_reass_conf_lifetime(u16 lifetime_ms); +#define MAP_IP6_REASS_CONF_LIFETIME_MAX 0xffff +int map_ip6_reass_conf_buffers(u32 buffers); +#define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff) + +static_always_inline void +ip4_map_t_embedded_address (map_domain_t *d, + ip6_address_t *ip6, const ip4_address_t *ip4) +{ + ASSERT(d->ip6_src_len == 96); //No support for other lengths for now + ip6->as_u64[0] = d->ip6_src.as_u64[0]; + ip6->as_u32[2] = d->ip6_src.as_u32[2]; + ip6->as_u32[3] = ip4->as_u32; +} + +static_always_inline u32 +ip6_map_t_embedded_address (map_domain_t *d, ip6_address_t *addr) +{ + ASSERT(d->ip6_src_len == 96); //No support for other lengths for now + return addr->as_u32[3]; +} + +static inline void +map_domain_counter_lock (map_main_t *mm) +{ + if (mm->counter_lock) + while (__sync_lock_test_and_set(mm->counter_lock, 1)) + /* zzzz */ ; +} +static inline void +map_domain_counter_unlock (map_main_t *mm) +{ + if (mm->counter_lock) + *mm->counter_lock = 0; +} + + +static_always_inline void +map_send_all_to_node(vlib_main_t *vm, u32 *pi_vector, + vlib_node_runtime_t *node, vlib_error_t *error, + u32 next) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + //Deal with fragments that are ready + from = pi_vector; + n_left_from = vec_len(pi_vector); + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + vlib_buffer_t *p0 = vlib_get_buffer(vm, pi0); + p0->error = *error; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/map/map_all_api_h.h b/src/plugins/map/map_all_api_h.h new file mode 100644 index 00000000000..4fb3e199bb2 --- /dev/null +++ b/src/plugins/map/map_all_api_h.h @@ -0,0 +1,19 @@ + +/* + * map_all_api_h.h - skeleton vpp engine plug-in api #include file + * + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include diff --git a/src/plugins/map/map_api.c b/src/plugins/map/map_api.c new file mode 100644 index 00000000000..90dbd1bcfd6 --- /dev/null +++ b/src/plugins/map/map_api.c @@ -0,0 +1,311 @@ +/* + *------------------------------------------------------------------ + * map_api.c - vnet map api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +#define REPLY_MSG_ID_BASE mm->msg_id_base +#include + +static void +vl_api_map_add_domain_t_handler (vl_api_map_add_domain_t * mp) +{ + map_main_t *mm = &map_main; + vl_api_map_add_domain_reply_t *rmp; + int rv = 0; + u32 index; + u8 flags = 0; + + if (mp->is_translation) + flags |= MAP_DOMAIN_TRANSLATION; + + if (mp->is_rfc6052) + flags |= MAP_DOMAIN_RFC6052; + + rv = + map_create_domain ((ip4_address_t *) & mp->ip4_prefix, mp->ip4_prefix_len, + (ip6_address_t *) & mp->ip6_prefix, mp->ip6_prefix_len, + (ip6_address_t *) & mp->ip6_src, + mp->ip6_src_prefix_len, mp->ea_bits_len, + mp->psid_offset, mp->psid_length, &index, + ntohs (mp->mtu), flags); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_MAP_ADD_DOMAIN_REPLY, + ({ + rmp->index = ntohl(index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_map_del_domain_t_handler (vl_api_map_del_domain_t * mp) +{ + map_main_t *mm = &map_main; + vl_api_map_del_domain_reply_t *rmp; + int rv = 0; + + rv = map_delete_domain (ntohl (mp->index)); + + REPLY_MACRO (VL_API_MAP_DEL_DOMAIN_REPLY); +} + +static void +vl_api_map_add_del_rule_t_handler (vl_api_map_add_del_rule_t * mp) +{ + map_main_t *mm = &map_main; + vl_api_map_del_domain_reply_t *rmp; + int rv = 0; + + rv = + map_add_del_psid (ntohl (mp->index), ntohs (mp->psid), + (ip6_address_t *) mp->ip6_dst, mp->is_add); + + REPLY_MACRO (VL_API_MAP_ADD_DEL_RULE_REPLY); +} + +static void +vl_api_map_domain_dump_t_handler (vl_api_map_domain_dump_t * mp) +{ + vl_api_map_domain_details_t *rmp; + map_main_t *mm = &map_main; + map_domain_t *d; + vl_api_registration_t *reg; + + if (pool_elts (mm->domains) == 0) + return; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + /* *INDENT-OFF* */ + pool_foreach(d, mm->domains, + ({ + /* Make sure every field is initiated (or don't skip the memset()) */ + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons(VL_API_MAP_DOMAIN_DETAILS + mm->msg_id_base); + rmp->context = mp->context; + rmp->domain_index = htonl(d - mm->domains); + clib_memcpy(rmp->ip6_prefix, &d->ip6_prefix, sizeof(rmp->ip6_prefix)); + clib_memcpy(rmp->ip4_prefix, &d->ip4_prefix, sizeof(rmp->ip4_prefix)); + clib_memcpy(rmp->ip6_src, &d->ip6_src, sizeof(rmp->ip6_src)); + rmp->ip6_prefix_len = d->ip6_prefix_len; + rmp->ip4_prefix_len = d->ip4_prefix_len; + rmp->ip6_src_len = d->ip6_src_len; + rmp->ea_bits_len = d->ea_bits_len; + rmp->psid_offset = d->psid_offset; + rmp->psid_length = d->psid_length; + rmp->flags = d->flags; + rmp->mtu = htons(d->mtu); + rmp->is_translation = (d->flags & MAP_DOMAIN_TRANSLATION); // Redundant + + vl_api_send_msg (reg, (u8 *) rmp); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_map_rule_dump_t_handler (vl_api_map_rule_dump_t * mp) +{ + vl_api_registration_t *reg; + u16 i; + ip6_address_t dst; + vl_api_map_rule_details_t *rmp; + map_main_t *mm = &map_main; + u32 domain_index = ntohl (mp->domain_index); + map_domain_t *d; + + if (pool_elts (mm->domains) == 0) + return; + + d = pool_elt_at_index (mm->domains, domain_index); + if (!d || !d->rules) + { + return; + } + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + for (i = 0; i < (0x1 << d->psid_length); i++) + { + dst = d->rules[i]; + if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0) + { + continue; + } + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_MAP_RULE_DETAILS + mm->msg_id_base); + rmp->psid = htons (i); + clib_memcpy (rmp->ip6_dst, &dst, sizeof (rmp->ip6_dst)); + rmp->context = mp->context; + vl_api_send_msg (reg, (u8 *) rmp); + } +} + +static void +vl_api_map_summary_stats_t_handler (vl_api_map_summary_stats_t * mp) +{ + vl_api_map_summary_stats_reply_t *rmp; + vlib_combined_counter_main_t *cm; + vlib_counter_t v; + int i, which; + u64 total_pkts[VLIB_N_RX_TX]; + u64 total_bytes[VLIB_N_RX_TX]; + map_main_t *mm = &map_main; + vl_api_registration_t *reg; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_MAP_SUMMARY_STATS_REPLY + mm->msg_id_base); + rmp->context = mp->context; + rmp->retval = 0; + + if (pool_elts (mm->domains) == 0) + { + rmp->retval = -1; + goto out; + } + + memset (total_pkts, 0, sizeof (total_pkts)); + memset (total_bytes, 0, sizeof (total_bytes)); + + map_domain_counter_lock (mm); + vec_foreach (cm, mm->domain_counters) + { + which = cm - mm->domain_counters; + + for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) + { + vlib_get_combined_counter (cm, i, &v); + total_pkts[which] += v.packets; + total_bytes[which] += v.bytes; + } + } + + map_domain_counter_unlock (mm); + + /* Note: in network byte order! */ + rmp->total_pkts[MAP_DOMAIN_COUNTER_RX] = + clib_host_to_net_u64 (total_pkts[MAP_DOMAIN_COUNTER_RX]); + rmp->total_bytes[MAP_DOMAIN_COUNTER_RX] = + clib_host_to_net_u64 (total_bytes[MAP_DOMAIN_COUNTER_RX]); + rmp->total_pkts[MAP_DOMAIN_COUNTER_TX] = + clib_host_to_net_u64 (total_pkts[MAP_DOMAIN_COUNTER_TX]); + rmp->total_bytes[MAP_DOMAIN_COUNTER_TX] = + clib_host_to_net_u64 (total_bytes[MAP_DOMAIN_COUNTER_TX]); + rmp->total_bindings = clib_host_to_net_u64 (pool_elts (mm->domains)); + rmp->total_ip4_fragments = 0; // Not yet implemented. Should be a simple counter. + rmp->total_security_check[MAP_DOMAIN_COUNTER_TX] = + clib_host_to_net_u64 (map_error_counter_get + (ip4_map_node.index, MAP_ERROR_ENCAP_SEC_CHECK)); + rmp->total_security_check[MAP_DOMAIN_COUNTER_RX] = + clib_host_to_net_u64 (map_error_counter_get + (ip4_map_node.index, MAP_ERROR_DECAP_SEC_CHECK)); + +out: + vl_api_send_msg (reg, (u8 *) rmp); +} + +#define foreach_map_plugin_api_msg \ +_(MAP_ADD_DOMAIN, map_add_domain) \ +_(MAP_DEL_DOMAIN, map_del_domain) \ +_(MAP_ADD_DEL_RULE, map_add_del_rule) \ +_(MAP_DOMAIN_DUMP, map_domain_dump) \ +_(MAP_RULE_DUMP, map_rule_dump) \ +_(MAP_SUMMARY_STATS, map_summary_stats) + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (map_main_t * mm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + mm->msg_id_base); + foreach_vl_msg_name_crc_map; +#undef _ +} + +/* Set up the API message handling tables */ +clib_error_t * +map_plugin_api_hookup (vlib_main_t * vm) +{ + map_main_t *mm = &map_main; + u8 *name = format (0, "map_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + mm->msg_id_base = + vl_msg_api_get_msg_ids ((char *) name, VL_MSG_FIRST_AVAILABLE); +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + mm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_map_plugin_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (mm, &api_main); + + vec_free (name); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/map/map_doc.md b/src/plugins/map/map_doc.md new file mode 100644 index 00000000000..17f3c51174b --- /dev/null +++ b/src/plugins/map/map_doc.md @@ -0,0 +1,69 @@ +# VPP MAP and Lw4o6 implementation {#map_doc} + +This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations. +Everything that is not directly obvious should come here. + + + +## MAP-E Virtual Reassembly + +The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments. + +Fragment caching and handling is not always necessary. It is performed when: +* An IPv4 fragment is received and the destination IPv4 address is shared. +* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on. +* An IPv6 fragment is received. + +There are 3 dedicated nodes: +* ip4-map-reass +* ip6-map-ip4-reass +* ip6-map-ip6-reass + +ip4-map sends all fragments to ip4-map-reass. +ip6-map sends all inner-fragments to ip6-map-ip4-reass. +ip6-map sends all outer-fragments to ip6-map-ip6-reass. + +IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes. + +An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received. + +#### Virtual Reassembly configuration + +IPv4 and IPv6 virtual reassembly support the following configuration: + map params reassembly [ip4 | ip6] [lifetime ] [pool-size ] [buffers ] [ht-ratio ] + +lifetime: + The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 people. Those values are not realistic for high-throughput cases. + +buffers: + The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool. + +pool-size: + The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total. + +ht-ratio: + The amount of buckets in the hash-table is pool-size * ht-ratio. + + +Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost. + + +##### Additional considerations + +Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart. + +Let: +R be the packet rate at which fragments are received. +F be the number of fragments per packet. + +Assuming the first fragment is always received last. We should have: +buffers > lifetime * R / F * (F - 1) +pool-size > lifetime * R/F + +This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'. + +But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments. + +If you want to do that, be prepared to configure a lot of fragments. + + diff --git a/src/plugins/map/map_dpo.c b/src/plugins/map/map_dpo.c new file mode 100644 index 00000000000..059a4df0a44 --- /dev/null +++ b/src/plugins/map/map_dpo.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/** + * The register MAP DPO type + */ +dpo_type_t map_dpo_type; +dpo_type_t map_t_dpo_type; + +void +map_dpo_create (dpo_proto_t dproto, + u32 domain_index, + dpo_id_t *dpo) +{ + dpo_set(dpo, + map_dpo_type, + dproto, + domain_index); +} + +void +map_t_dpo_create (dpo_proto_t dproto, + u32 domain_index, + dpo_id_t *dpo) +{ + dpo_set(dpo, + map_t_dpo_type, + dproto, + domain_index); +} + + +u8* +format_map_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED(u32 indent) = va_arg (*args, u32); + + return (format(s, "map: domain:%d", index)); +} + +u8* +format_map_t_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED(u32 indent) = va_arg (*args, u32); + + return (format(s, "map-t: domain:%d", index)); +} + + +static void +map_dpo_lock (dpo_id_t *dpo) +{ +} + +static void +map_dpo_unlock (dpo_id_t *dpo) +{ +} + +const static dpo_vft_t md_vft = { + .dv_lock = map_dpo_lock, + .dv_unlock = map_dpo_unlock, + .dv_format = format_map_dpo, +}; + +const static char* const map_ip4_nodes[] = +{ + "ip4-map", + NULL, +}; +const static char* const map_ip6_nodes[] = +{ + "ip6-map", + NULL, +}; + +const static char* const * const map_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = map_ip4_nodes, + [DPO_PROTO_IP6] = map_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +const static dpo_vft_t md_t_vft = { + .dv_lock = map_dpo_lock, + .dv_unlock = map_dpo_unlock, + .dv_format = format_map_t_dpo, +}; + +const static char* const map_t_ip4_nodes[] = +{ + "ip4-map-t", + NULL, +}; +const static char* const map_t_ip6_nodes[] = +{ + "ip6-map-t", + NULL, +}; + +const static char* const * const map_t_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = map_t_ip4_nodes, + [DPO_PROTO_IP6] = map_t_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +map_dpo_module_init (void) +{ + map_dpo_type = dpo_register_new_type(&md_vft, map_nodes); + map_t_dpo_type = dpo_register_new_type(&md_t_vft, map_t_nodes); +} diff --git a/src/plugins/map/map_dpo.h b/src/plugins/map/map_dpo.h new file mode 100644 index 00000000000..63bf4787383 --- /dev/null +++ b/src/plugins/map/map_dpo.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MAP_DPO_H__ +#define __MAP_DPO_H__ + +#include +#include + +/** + * A representation of a MAP DPO + */ + +extern void map_dpo_create (dpo_proto_t dproto, + u32 domain_index, + dpo_id_t *dpo); +extern void map_t_dpo_create (dpo_proto_t dproto, + u32 domain_index, + dpo_id_t *dpo); + +extern u8* format_map_dpo(u8 *s, va_list *args); + +/* + * Encapsulation violation for fast data-path access + */ +extern dpo_type_t map_dpo_type; +extern dpo_type_t map_t_dpo_type; + +extern void map_dpo_module_init(void); + +#endif diff --git a/src/plugins/map/map_msg_enum.h b/src/plugins/map/map_msg_enum.h new file mode 100644 index 00000000000..b135cfc7510 --- /dev/null +++ b/src/plugins/map/map_msg_enum.h @@ -0,0 +1,31 @@ + +/* + * map_msg_enum.h - skeleton vpp engine plug-in message enumeration + * + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_map_msg_enum_h +#define included_map_msg_enum_h + +#include + +#define vl_msg_id(n,h) n, +typedef enum { +#include + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif diff --git a/src/plugins/map/map_test.c b/src/plugins/map/map_test.c new file mode 100644 index 00000000000..bb69cbcd29d --- /dev/null +++ b/src/plugins/map/map_test.c @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * map_test.c - test harness plugin + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include + +#define __plugin_msg_base map_test_main.msg_id_base +#include + +/* Declare message IDs */ +#include + +/* Get CRC codes of the messages defined outside of this plugin */ +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +/* define message structures */ +#define vl_typedefs +#include +#include +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + u32 ping_id; + vat_main_t *vat_main; +} map_test_main_t; + +map_test_main_t map_test_main; + +#define foreach_standard_reply_retval_handler \ +_(map_del_domain_reply) \ +_(map_add_del_rule_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = map_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->json_output) { \ + vat_json_node_t node; \ + vat_json_init_object (&node); \ + vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); \ + vat_json_print (vam->ofp, &node); \ + vat_json_free (&node); \ + return; \ + } \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(MAP_ADD_DOMAIN_REPLY, map_add_domain_reply) \ +_(MAP_DEL_DOMAIN_REPLY, map_del_domain_reply) \ +_(MAP_ADD_DEL_RULE_REPLY, map_add_del_rule_reply) \ +_(MAP_DOMAIN_DETAILS, map_domain_details) + +static int +api_map_add_domain (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_map_add_domain_t *mp; + + ip4_address_t ip4_prefix; + ip6_address_t ip6_prefix; + ip6_address_t ip6_src; + u32 num_m_args = 0; + u32 ip6_prefix_len = 0, ip4_prefix_len = 0, ea_bits_len = 0, psid_offset = + 0, psid_length = 0; + u8 is_translation = 0; + u32 mtu = 0; + u32 ip6_src_len = 128; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "ip4-pfx %U/%d", unformat_ip4_address, + &ip4_prefix, &ip4_prefix_len)) + num_m_args++; + else if (unformat (i, "ip6-pfx %U/%d", unformat_ip6_address, + &ip6_prefix, &ip6_prefix_len)) + num_m_args++; + else + if (unformat + (i, "ip6-src %U/%d", unformat_ip6_address, &ip6_src, + &ip6_src_len)) + num_m_args++; + else if (unformat (i, "ip6-src %U", unformat_ip6_address, &ip6_src)) + num_m_args++; + else if (unformat (i, "ea-bits-len %d", &ea_bits_len)) + num_m_args++; + else if (unformat (i, "psid-offset %d", &psid_offset)) + num_m_args++; + else if (unformat (i, "psid-len %d", &psid_length)) + num_m_args++; + else if (unformat (i, "mtu %d", &mtu)) + num_m_args++; + else if (unformat (i, "map-t")) + is_translation = 1; + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (num_m_args < 3) + { + errmsg ("mandatory argument(s) missing"); + return -99; + } + + /* Construct the API message */ + M (MAP_ADD_DOMAIN, mp); + + clib_memcpy (mp->ip4_prefix, &ip4_prefix, sizeof (ip4_prefix)); + mp->ip4_prefix_len = ip4_prefix_len; + + clib_memcpy (mp->ip6_prefix, &ip6_prefix, sizeof (ip6_prefix)); + mp->ip6_prefix_len = ip6_prefix_len; + + clib_memcpy (mp->ip6_src, &ip6_src, sizeof (ip6_src)); + mp->ip6_src_prefix_len = ip6_src_len; + + mp->ea_bits_len = ea_bits_len; + mp->psid_offset = psid_offset; + mp->psid_length = psid_length; + mp->is_translation = is_translation; + mp->mtu = htons (mtu); + + /* send it... */ + S (mp); + + /* Wait for a reply, return good/bad news */ + W (ret); + return ret; +} +static int +api_map_del_domain (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_map_del_domain_t *mp; + + u32 num_m_args = 0; + u32 index; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "index %d", &index)) + num_m_args++; + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + if (num_m_args != 1) + { + errmsg ("mandatory argument(s) missing"); + return -99; + } + + /* Construct the API message */ + M (MAP_DEL_DOMAIN, mp); + + mp->index = ntohl (index); + + /* send it... */ + S (mp); + + /* Wait for a reply, return good/bad news */ + W (ret); + return ret; +} + +static int +api_map_add_del_rule (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_map_add_del_rule_t *mp; + u8 is_add = 1; + ip6_address_t ip6_dst; + u32 num_m_args = 0, index, psid = 0; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "index %d", &index)) + num_m_args++; + else if (unformat (i, "psid %d", &psid)) + num_m_args++; + else if (unformat (i, "dst %U", unformat_ip6_address, &ip6_dst)) + num_m_args++; + else if (unformat (i, "del")) + { + is_add = 0; + } + else + { + clib_warning ("parse error '%U'", format_unformat_error, i); + return -99; + } + } + + /* Construct the API message */ + M (MAP_ADD_DEL_RULE, mp); + + mp->index = ntohl (index); + mp->is_add = is_add; + clib_memcpy (mp->ip6_dst, &ip6_dst, sizeof (ip6_dst)); + mp->psid = ntohs (psid); + + /* send it... */ + S (mp); + + /* Wait for a reply, return good/bad news */ + W (ret); + return ret; +} +static int +api_map_domain_dump (vat_main_t * vam) +{ + map_test_main_t *mm = &map_test_main; + vl_api_map_domain_dump_t *mp; + vl_api_control_ping_t *mp_ping; + int ret; + + /* Construct the API message */ + M (MAP_DOMAIN_DUMP, mp); + + /* send it... */ + S (mp); + + /* Use a control ping for synchronization */ + mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping)); + mp_ping->_vl_msg_id = htons (mm->ping_id); + mp_ping->client_index = vam->my_client_index; + + fformat (vam->ofp, "Sending ping id=%d\n", mm->ping_id); + + vam->result_ready = 0; + S (mp_ping); + + W (ret); + + return ret; +} + +static int +api_map_rule_dump (vat_main_t * vam) +{ + map_test_main_t *mm = &map_test_main; + unformat_input_t *i = vam->input; + vl_api_map_rule_dump_t *mp; + vl_api_control_ping_t *mp_ping; + u32 domain_index = ~0; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "index %u", &domain_index)) + ; + else + break; + } + + if (domain_index == ~0) + { + clib_warning ("parse error: domain index expected"); + return -99; + } + + /* Construct the API message */ + M (MAP_RULE_DUMP, mp); + + mp->domain_index = htonl (domain_index); + + /* send it... */ + S (mp); + + /* Use a control ping for synchronization */ + /* Use a control ping for synchronization */ + mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping)); + mp_ping->_vl_msg_id = htons (mm->ping_id); + mp_ping->client_index = vam->my_client_index; + + vam->result_ready = 0; + S (mp_ping); + + W (ret); + return ret; +} + +static void vl_api_map_add_domain_reply_t_handler + (vl_api_map_add_domain_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + + if (vam->json_output) { + vat_json_node_t node; + vat_json_init_object (&node); + vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); + vat_json_object_add_uint (&node, "index", ntohl (mp->index)); + vat_json_print (vam->ofp, &node); + vat_json_free (&node); + } + + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +} + +static void vl_api_map_domain_details_t_handler_json + (vl_api_map_domain_details_t * mp) +{ + vat_json_node_t *node = NULL; + vat_main_t *vam = &vat_main; + struct in6_addr ip6; + struct in_addr ip4; + + if (VAT_JSON_ARRAY != vam->json_tree.type) + { + ASSERT (VAT_JSON_NONE == vam->json_tree.type); + vat_json_init_array (&vam->json_tree); + } + + node = vat_json_array_add (&vam->json_tree); + vat_json_init_object (node); + + vat_json_object_add_uint (node, "domain_index", + clib_net_to_host_u32 (mp->domain_index)); + clib_memcpy (&ip6, mp->ip6_prefix, sizeof (ip6)); + vat_json_object_add_ip6 (node, "ip6_prefix", ip6); + clib_memcpy (&ip4, mp->ip4_prefix, sizeof (ip4)); + vat_json_object_add_ip4 (node, "ip4_prefix", ip4); + clib_memcpy (&ip6, mp->ip6_src, sizeof (ip6)); + vat_json_object_add_ip6 (node, "ip6_src", ip6); + vat_json_object_add_int (node, "ip6_prefix_len", mp->ip6_prefix_len); + vat_json_object_add_int (node, "ip4_prefix_len", mp->ip4_prefix_len); + vat_json_object_add_int (node, "ip6_src_len", mp->ip6_src_len); + vat_json_object_add_int (node, "ea_bits_len", mp->ea_bits_len); + vat_json_object_add_int (node, "psid_offset", mp->psid_offset); + vat_json_object_add_int (node, "psid_length", mp->psid_length); + vat_json_object_add_uint (node, "flags", mp->flags); + vat_json_object_add_uint (node, "mtu", clib_net_to_host_u16 (mp->mtu)); + vat_json_object_add_int (node, "is_translation", mp->is_translation); +} + +static void vl_api_map_domain_details_t_handler + (vl_api_map_domain_details_t * mp) +{ + vat_main_t *vam = &vat_main; + + if (vam->json_output) + return vl_api_map_domain_details_t_handler_json (mp); + + if (mp->is_translation) + { + print (vam->ofp, + "* %U/%d (ipv4-prefix) %U/%d (ipv6-prefix) %U/%d (ip6-src) index: %u", + format_ip4_address, mp->ip4_prefix, mp->ip4_prefix_len, + format_ip6_address, mp->ip6_prefix, mp->ip6_prefix_len, + format_ip6_address, mp->ip6_src, mp->ip6_src_len, + clib_net_to_host_u32 (mp->domain_index)); + } + else + { + print (vam->ofp, + "* %U/%d (ipv4-prefix) %U/%d (ipv6-prefix) %U (ip6-src) index: %u", + format_ip4_address, mp->ip4_prefix, mp->ip4_prefix_len, + format_ip6_address, mp->ip6_prefix, mp->ip6_prefix_len, + format_ip6_address, mp->ip6_src, + clib_net_to_host_u32 (mp->domain_index)); + } + print (vam->ofp, " ea-len %d psid-offset %d psid-len %d mtu %d %s", + mp->ea_bits_len, mp->psid_offset, mp->psid_length, mp->mtu, + mp->is_translation ? "map-t" : ""); +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(map_add_domain, \ + "ip4-pfx ip6-pfx " \ + "ip6-src " \ + "ea-bits-len psid-offset psid-len ") \ +_(map_del_domain, "index ") \ +_(map_add_del_rule, \ + "index psid dst [del]") \ +_(map_domain_dump, "") \ +_(map_rule_dump, "index ") + +static void map_api_hookup (vat_main_t *vam) +{ + map_test_main_t * mm = &map_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + mm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) \ + hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + map_test_main_t * mm = &map_test_main; + u8 * name; + + mm->vat_main = vam; + + name = format (0, "map_%08x%c", api_version, 0); + mm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + /* Get the control ping ID */ +#define _(id,n,crc) \ + const char *id ## _CRC __attribute__ ((unused)) = #n "_" #crc; + foreach_vl_msg_name_crc_vpe; +#undef _ + mm->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC)); + + if (mm->msg_id_base != (u16) ~0) + map_api_hookup (vam); + + vec_free(name); + + return 0; +} diff --git a/src/plugins/map/test.c b/src/plugins/map/test.c new file mode 100644 index 00000000000..ff10fc929a6 --- /dev/null +++ b/src/plugins/map/test.c @@ -0,0 +1,206 @@ +/* + * test.c : MAP unit tests + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "map.h" + +static map_domain_t * +get_domain(ip4_address_t * ip4_prefix, u8 ip4_prefix_len, + ip6_address_t * ip6_prefix, u8 ip6_prefix_len, + ip6_address_t * ip6_src, u8 ip6_src_len, + u8 ea_bits_len, u8 psid_offset, + u8 psid_length, u16 mtu, u8 flags) +{ + map_domain_t * d = malloc(sizeof(*d)); + u8 suffix_len; + + /* EA bits must be within the first 64 bits */ + if (ea_bits_len > 0 && (ip6_prefix_len + ea_bits_len) > 64) + return NULL; + + /* Init domain struct */ + d->ip4_prefix.as_u32 = ip4_prefix->as_u32; + d->ip4_prefix_len = ip4_prefix_len; + d->ip6_prefix = *ip6_prefix; + d->ip6_prefix_len = ip6_prefix_len; + d->ip6_src = *ip6_src; + d->ip6_src_len = ip6_src_len; + d->ea_bits_len = ea_bits_len; + d->psid_offset = psid_offset; + d->psid_length = psid_length; + d->mtu = mtu; + d->flags = flags; + + /* How many, and which bits to grab from the IPv4 DA */ + if (ip4_prefix_len + ea_bits_len < 32) + { + if (!(flags & MAP_DOMAIN_TRANSLATION)) + d->flags |= MAP_DOMAIN_PREFIX; + d->suffix_shift = 32 - ip4_prefix_len - ea_bits_len; + suffix_len = ea_bits_len; + } + else + { + d->suffix_shift = 0; + suffix_len = 32 - ip4_prefix_len; + } + d->suffix_mask = (1 << suffix_len) - 1; + + d->psid_shift = 16 - psid_length - psid_offset; + d->psid_mask = (1 << d->psid_length) - 1; + + if (ip6_prefix_len + suffix_len + d->psid_length > 64) + return NULL; + + d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length; + + return d; +} + + +/* + * VPP-340: + * map_add_domain ip4-pfx 20.0.0.0/8 ip6-pfx 2001:db8::/40 ip6-src 2001:db8:ffff::/96 ea-bits-len 24 psid-offset 0 psid-len 0 map-t + * IPv4 src = 100.0.0.1 + * IPv4 dst = 20.169.201.219 + * UDP dest port = 1232 + * IPv6 src = 2001:db8:ffff::6400:1 + * IPv6 dst = a9c9:dfb8::14a9:c9db:0 + * a9c9:dfb8::14a9:c9db:0 != 2001:db8:a9:c9db:0:14a9:c9db:0 + */ +static void +test_map_t_destaddr (void) +{ + ip4_address_t ip4_prefix; + ip6_address_t ip6_prefix; + ip6_address_t ip6_src; + + ip4_prefix.as_u32 = clib_host_to_net_u32(0x14000000); + ip6_prefix.as_u64[0] = clib_host_to_net_u64(0x20010db800000000); + ip6_prefix.as_u64[1] = 0; + ip6_src.as_u64[0] = clib_host_to_net_u64(0x20010db8ffff0000); + map_domain_t * d = get_domain (&ip4_prefix, 8, &ip6_prefix, 40, &ip6_src, 96, 24, 0, 0, 0, MAP_DOMAIN_TRANSLATION); + + ip6_address_t dst6; + + dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); + dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); + assert(dst6.as_u64[0] == 0x20010db800a9c9db); + assert(dst6.as_u64[1] == 0x000014a9c9db0000); +} + +/* + * VPP-228 + * ip4-pfx 20.0.0.0/8 + * ip6-pfx 2001:db8::/ + * ip6-src 2001:db8:ffff::1 + * ea-bits-len 16 psid-offset 6 psid-len 8 + * 20.169.201.219 port 1232 + */ +static void +test_map_eabits (void) +{ + ip4_address_t ip4_prefix; + ip6_address_t ip6_prefix; + ip6_address_t ip6_src; + ip6_address_t dst6; + + ip4_prefix.as_u32 = clib_host_to_net_u32(0x14000000); + ip6_prefix.as_u64[0] = clib_host_to_net_u64(0x20010db800000000); + ip6_prefix.as_u64[1] = 0; + ip6_src.as_u64[0] = clib_host_to_net_u64(0x20010db8ffff0000); + ip6_src.as_u64[1] = clib_host_to_net_u64(0x0000000000000001); + map_domain_t * d = get_domain (&ip4_prefix, 16, &ip6_prefix, 48, &ip6_src, + 128, 16, 6, 8, 0, 0); + assert(!d); + + //20.0.0.0/8 2001:db8::/32 4 2001:db8:a000::14a0:0:0 + d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, + 128, 4, 0, 0, 0, 0); + dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); + dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); + assert(dst6.as_u64[0] == 0x20010db8a0000000); + assert(dst6.as_u64[1] == 0x000014a000000000); + + //20.0.0.0/8 2001:db8::/32 8 2001:db8:a900::14a9:0:0 + d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, + 128, 8, 0, 0, 0, 0); + dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); + dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); + assert(dst6.as_u64[0] == 0x20010db8a9000000); + assert(dst6.as_u64[1] == 0x000014a900000000); + + //20.0.0.0/8 2001:db8::/32 10 2001:db8:a9c0::14a9:c000:0 + d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, + 128, 10, 0, 0, 0, 0); + dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); + dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); + assert(dst6.as_u64[0] == 0x20010db8a9c00000); + assert(dst6.as_u64[1] == 0x000014a9c0000000); + + //20.0.0.0/8 2001:db8::/32 16 2001:db8:a9c9::14a9:c900:0 + d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, + 128, 16, 0, 0, 0, 0); + dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); + dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); + assert(dst6.as_u64[0] == 0x20010db8a9c90000); + assert(dst6.as_u64[1] == 0x000014a9c9000000); + + //20.0.0.0/8 2001:db8::/32 20 2001:db8:a9c9:d000:0:14a9:c9d0:0 + d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, + 128, 20, 0, 0, 0, 0); + dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); + dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); + assert(dst6.as_u64[0] == 0x20010db8a9c9d000); + assert(dst6.as_u64[1] == 0x000014a9c9d00000); + + //20.0.0.0/8 2001:db8::/32 23 2001:db8:a9c9:da00:0:14a9:c9da:0 + d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, + 128, 23, 0, 0, 0, 0); + dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); + dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); + assert(dst6.as_u64[0] == 0x20010db8a9c9da00); + assert(dst6.as_u64[1] == 0x000014a9c9da0000); + + //20.169.201.0/24 2001:db8::/32 7 2001:db8:da00::14a9:c9da:0 + d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, + 128, 7, 0, 0, 0, 0); + dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); + dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); + assert(dst6.as_u64[0] == 0x20010db8a8000000); + assert(dst6.as_u64[1] == 0x000014a800000000); +} + +#define foreach_test_case \ + _(map_t_destaddr) \ + _(map_eabits) + +static void +run_tests (void) +{ +#define _(_test_name) \ + test_ ## _test_name (); + + foreach_test_case +#undef _ +} + +int main() +{ + run_tests (); + return 0; +} diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 14f66c31fa7..b87ec932c28 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -2615,99 +2614,6 @@ vl_api_ip_details_t_handler_json (vl_api_ip_details_t * mp) clib_net_to_host_u32 (mp->sw_if_index)); } -static void vl_api_map_domain_details_t_handler_json - (vl_api_map_domain_details_t * mp) -{ - vat_json_node_t *node = NULL; - vat_main_t *vam = &vat_main; - struct in6_addr ip6; - struct in_addr ip4; - - if (VAT_JSON_ARRAY != vam->json_tree.type) - { - ASSERT (VAT_JSON_NONE == vam->json_tree.type); - vat_json_init_array (&vam->json_tree); - } - - node = vat_json_array_add (&vam->json_tree); - vat_json_init_object (node); - - vat_json_object_add_uint (node, "domain_index", - clib_net_to_host_u32 (mp->domain_index)); - clib_memcpy (&ip6, mp->ip6_prefix, sizeof (ip6)); - vat_json_object_add_ip6 (node, "ip6_prefix", ip6); - clib_memcpy (&ip4, mp->ip4_prefix, sizeof (ip4)); - vat_json_object_add_ip4 (node, "ip4_prefix", ip4); - clib_memcpy (&ip6, mp->ip6_src, sizeof (ip6)); - vat_json_object_add_ip6 (node, "ip6_src", ip6); - vat_json_object_add_int (node, "ip6_prefix_len", mp->ip6_prefix_len); - vat_json_object_add_int (node, "ip4_prefix_len", mp->ip4_prefix_len); - vat_json_object_add_int (node, "ip6_src_len", mp->ip6_src_len); - vat_json_object_add_int (node, "ea_bits_len", mp->ea_bits_len); - vat_json_object_add_int (node, "psid_offset", mp->psid_offset); - vat_json_object_add_int (node, "psid_length", mp->psid_length); - vat_json_object_add_uint (node, "flags", mp->flags); - vat_json_object_add_uint (node, "mtu", clib_net_to_host_u16 (mp->mtu)); - vat_json_object_add_int (node, "is_translation", mp->is_translation); -} - -static void vl_api_map_domain_details_t_handler - (vl_api_map_domain_details_t * mp) -{ - vat_main_t *vam = &vat_main; - - if (mp->is_translation) - { - print (vam->ofp, - "* %U/%d (ipv4-prefix) %U/%d (ipv6-prefix) %U/%d (ip6-src) index: %u", - format_ip4_address, mp->ip4_prefix, mp->ip4_prefix_len, - format_ip6_address, mp->ip6_prefix, mp->ip6_prefix_len, - format_ip6_address, mp->ip6_src, mp->ip6_src_len, - clib_net_to_host_u32 (mp->domain_index)); - } - else - { - print (vam->ofp, - "* %U/%d (ipv4-prefix) %U/%d (ipv6-prefix) %U (ip6-src) index: %u", - format_ip4_address, mp->ip4_prefix, mp->ip4_prefix_len, - format_ip6_address, mp->ip6_prefix, mp->ip6_prefix_len, - format_ip6_address, mp->ip6_src, - clib_net_to_host_u32 (mp->domain_index)); - } - print (vam->ofp, " ea-len %d psid-offset %d psid-len %d mtu %d %s", - mp->ea_bits_len, mp->psid_offset, mp->psid_length, mp->mtu, - mp->is_translation ? "map-t" : ""); -} - -static void vl_api_map_rule_details_t_handler_json - (vl_api_map_rule_details_t * mp) -{ - struct in6_addr ip6; - vat_json_node_t *node = NULL; - vat_main_t *vam = &vat_main; - - if (VAT_JSON_ARRAY != vam->json_tree.type) - { - ASSERT (VAT_JSON_NONE == vam->json_tree.type); - vat_json_init_array (&vam->json_tree); - } - - node = vat_json_array_add (&vam->json_tree); - vat_json_init_object (node); - - vat_json_object_add_uint (node, "psid", clib_net_to_host_u16 (mp->psid)); - clib_memcpy (&ip6, mp->ip6_dst, sizeof (ip6)); - vat_json_object_add_ip6 (node, "ip6_dst", ip6); -} - -static void -vl_api_map_rule_details_t_handler (vl_api_map_rule_details_t * mp) -{ - vat_main_t *vam = &vat_main; - print (vam->ofp, " %d (psid) %U (ip6-dst)", - clib_net_to_host_u16 (mp->psid), format_ip6_address, mp->ip6_dst); -} - static void vl_api_dhcp_compl_event_t_handler (vl_api_dhcp_compl_event_t * mp) { @@ -5525,8 +5431,6 @@ _(ikev2_initiate_del_child_sa_reply) \ _(ikev2_initiate_rekey_child_sa_reply) \ _(delete_loopback_reply) \ _(bd_ip_mac_add_del_reply) \ -_(map_del_domain_reply) \ -_(map_add_del_rule_reply) \ _(want_interface_events_reply) \ _(want_stats_reply) \ _(cop_interface_enable_disable_reply) \ @@ -5781,11 +5685,6 @@ _(IKEV2_INITIATE_REKEY_CHILD_SA_REPLY, ikev2_initiate_rekey_child_sa_reply) \ _(DELETE_LOOPBACK_REPLY, delete_loopback_reply) \ _(BD_IP_MAC_ADD_DEL_REPLY, bd_ip_mac_add_del_reply) \ _(DHCP_COMPL_EVENT, dhcp_compl_event) \ -_(MAP_ADD_DOMAIN_REPLY, map_add_domain_reply) \ -_(MAP_DEL_DOMAIN_REPLY, map_del_domain_reply) \ -_(MAP_ADD_DEL_RULE_REPLY, map_add_del_rule_reply) \ -_(MAP_DOMAIN_DETAILS, map_domain_details) \ -_(MAP_RULE_DETAILS, map_rule_details) \ _(WANT_INTERFACE_EVENTS_REPLY, want_interface_events_reply) \ _(WANT_STATS_REPLY, want_stats_reply) \ _(GET_FIRST_MSG_ID_REPLY, get_first_msg_id_reply) \ @@ -16368,269 +16267,6 @@ api_ikev2_initiate_rekey_child_sa (vat_main_t * vam) return ret; } -/* - * MAP - */ -static int -api_map_add_domain (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_map_add_domain_t *mp; - - ip4_address_t ip4_prefix; - ip6_address_t ip6_prefix; - ip6_address_t ip6_src; - u32 num_m_args = 0; - u32 ip6_prefix_len = 0, ip4_prefix_len = 0, ea_bits_len = 0, psid_offset = - 0, psid_length = 0; - u8 is_translation = 0; - u32 mtu = 0; - u32 ip6_src_len = 128; - int ret; - - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "ip4-pfx %U/%d", unformat_ip4_address, - &ip4_prefix, &ip4_prefix_len)) - num_m_args++; - else if (unformat (i, "ip6-pfx %U/%d", unformat_ip6_address, - &ip6_prefix, &ip6_prefix_len)) - num_m_args++; - else - if (unformat - (i, "ip6-src %U/%d", unformat_ip6_address, &ip6_src, - &ip6_src_len)) - num_m_args++; - else if (unformat (i, "ip6-src %U", unformat_ip6_address, &ip6_src)) - num_m_args++; - else if (unformat (i, "ea-bits-len %d", &ea_bits_len)) - num_m_args++; - else if (unformat (i, "psid-offset %d", &psid_offset)) - num_m_args++; - else if (unformat (i, "psid-len %d", &psid_length)) - num_m_args++; - else if (unformat (i, "mtu %d", &mtu)) - num_m_args++; - else if (unformat (i, "map-t")) - is_translation = 1; - else - { - clib_warning ("parse error '%U'", format_unformat_error, i); - return -99; - } - } - - if (num_m_args < 3) - { - errmsg ("mandatory argument(s) missing"); - return -99; - } - - /* Construct the API message */ - M (MAP_ADD_DOMAIN, mp); - - clib_memcpy (mp->ip4_prefix, &ip4_prefix, sizeof (ip4_prefix)); - mp->ip4_prefix_len = ip4_prefix_len; - - clib_memcpy (mp->ip6_prefix, &ip6_prefix, sizeof (ip6_prefix)); - mp->ip6_prefix_len = ip6_prefix_len; - - clib_memcpy (mp->ip6_src, &ip6_src, sizeof (ip6_src)); - mp->ip6_src_prefix_len = ip6_src_len; - - mp->ea_bits_len = ea_bits_len; - mp->psid_offset = psid_offset; - mp->psid_length = psid_length; - mp->is_translation = is_translation; - mp->mtu = htons (mtu); - - /* send it... */ - S (mp); - - /* Wait for a reply, return good/bad news */ - W (ret); - return ret; -} - -static int -api_map_del_domain (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_map_del_domain_t *mp; - - u32 num_m_args = 0; - u32 index; - int ret; - - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "index %d", &index)) - num_m_args++; - else - { - clib_warning ("parse error '%U'", format_unformat_error, i); - return -99; - } - } - - if (num_m_args != 1) - { - errmsg ("mandatory argument(s) missing"); - return -99; - } - - /* Construct the API message */ - M (MAP_DEL_DOMAIN, mp); - - mp->index = ntohl (index); - - /* send it... */ - S (mp); - - /* Wait for a reply, return good/bad news */ - W (ret); - return ret; -} - -static int -api_map_add_del_rule (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_map_add_del_rule_t *mp; - u8 is_add = 1; - ip6_address_t ip6_dst; - u32 num_m_args = 0, index, psid = 0; - int ret; - - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "index %d", &index)) - num_m_args++; - else if (unformat (i, "psid %d", &psid)) - num_m_args++; - else if (unformat (i, "dst %U", unformat_ip6_address, &ip6_dst)) - num_m_args++; - else if (unformat (i, "del")) - { - is_add = 0; - } - else - { - clib_warning ("parse error '%U'", format_unformat_error, i); - return -99; - } - } - - /* Construct the API message */ - M (MAP_ADD_DEL_RULE, mp); - - mp->index = ntohl (index); - mp->is_add = is_add; - clib_memcpy (mp->ip6_dst, &ip6_dst, sizeof (ip6_dst)); - mp->psid = ntohs (psid); - - /* send it... */ - S (mp); - - /* Wait for a reply, return good/bad news */ - W (ret); - return ret; -} - -static int -api_map_domain_dump (vat_main_t * vam) -{ - vl_api_map_domain_dump_t *mp; - vl_api_control_ping_t *mp_ping; - int ret; - - /* Construct the API message */ - M (MAP_DOMAIN_DUMP, mp); - - /* send it... */ - S (mp); - - /* Use a control ping for synchronization */ - MPING (CONTROL_PING, mp_ping); - S (mp_ping); - - W (ret); - return ret; -} - -static int -api_map_rule_dump (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_map_rule_dump_t *mp; - vl_api_control_ping_t *mp_ping; - u32 domain_index = ~0; - int ret; - - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "index %u", &domain_index)) - ; - else - break; - } - - if (domain_index == ~0) - { - clib_warning ("parse error: domain index expected"); - return -99; - } - - /* Construct the API message */ - M (MAP_RULE_DUMP, mp); - - mp->domain_index = htonl (domain_index); - - /* send it... */ - S (mp); - - /* Use a control ping for synchronization */ - MPING (CONTROL_PING, mp_ping); - S (mp_ping); - - W (ret); - return ret; -} - -static void vl_api_map_add_domain_reply_t_handler - (vl_api_map_add_domain_reply_t * mp) -{ - vat_main_t *vam = &vat_main; - i32 retval = ntohl (mp->retval); - - if (vam->async_mode) - { - vam->async_errors += (retval < 0); - } - else - { - vam->retval = retval; - vam->result_ready = 1; - } -} - -static void vl_api_map_add_domain_reply_t_handler_json - (vl_api_map_add_domain_reply_t * mp) -{ - vat_main_t *vam = &vat_main; - vat_json_node_t node; - - vat_json_init_object (&node); - vat_json_object_add_int (&node, "retval", ntohl (mp->retval)); - vat_json_object_add_uint (&node, "index", ntohl (mp->index)); - - vat_json_print (vam->ofp, &node); - vat_json_free (&node); - - vam->retval = ntohl (mp->retval); - vam->result_ready = 1; -} - static int api_get_first_msg_id (vat_main_t * vam) { @@ -23761,15 +23397,6 @@ _(ikev2_initiate_del_child_sa, "") \ _(ikev2_initiate_rekey_child_sa, "") \ _(delete_loopback,"sw_if_index ") \ _(bd_ip_mac_add_del, "bd_id [del]") \ -_(map_add_domain, \ - "ip4-pfx ip6-pfx " \ - "ip6-src " \ - "ea-bits-len psid-offset psid-len ") \ -_(map_del_domain, "index ") \ -_(map_add_del_rule, \ - "index psid dst [del]") \ -_(map_domain_dump, "") \ -_(map_rule_dump, "index ") \ _(want_interface_events, "enable|disable") \ _(want_stats,"enable|disable") \ _(get_first_msg_id, "client ") \ diff --git a/src/vnet.am b/src/vnet.am index 95b94c3c09c..78eb481bc05 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -488,37 +488,6 @@ libvnet_la_SOURCES += \ nobase_include_HEADERS += \ vnet/osi/osi.h -######################################## -# Layer 3 protocol: MAP -######################################## -libvnet_la_SOURCES += \ - vnet/map/map.c \ - vnet/map/map_dpo.c \ - vnet/map/ip4_map.c \ - vnet/map/ip6_map.c \ - vnet/map/ip4_map_t.c \ - vnet/map/ip6_map_t.c \ - vnet/map/map_api.c - -nobase_include_HEADERS += \ - vnet/map/map.h \ - vnet/map/map_dpo.h \ - vnet/map/map.api.h - -API_FILES += vnet/map/map.api - -if ENABLE_TESTS -TESTS += test_map -test_map_SOURCES = \ - vnet/map/test.c -test_map_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG - -test_map_LDADD = libvnet.la libvppinfra.la libvlib.la \ - -lpthread -lvlibmemory -ldl -lsvm -lrt - -test_map_LDFLAGS = -static -endif - ######################################## # Layer 4 protocol: tcp ######################################## diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 2ddea865024..60efe6e4027 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -85,7 +85,6 @@ enum #define foreach_buffer_opaque_union_subtype \ _(ip) \ -_(swt) \ _(l2) \ _(l2t) \ _(l2_classify) \ @@ -210,13 +209,6 @@ typedef struct } bier; } mpls; - /* ip4-in-ip6 softwire termination, only valid there */ - struct - { - u8 swt_disable; - u32 mapping_index; - } swt; - /* l2 bridging path, only valid there */ struct opaque_l2 { diff --git a/src/vnet/map/examples/gen-rules.py b/src/vnet/map/examples/gen-rules.py deleted file mode 100755 index 7964aa9a359..00000000000 --- a/src/vnet/map/examples/gen-rules.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (c) 2015 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import ipaddress -import argparse -import sys - -# map add domain ip4-pfx ip6-pfx ::/0 ip6-src ea-bits-len 0 psid-offset 6 psid-len 6 -# map add rule index <0> psid ip6-dst - -def_ip4_pfx = '192.0.2.0/24' -def_ip6_pfx = '2001:db8::/32' -def_ip6_src = '2001:db8::1' -def_psid_offset = 6 -def_psid_len = 6 -def_ea_bits_len = 0 - -parser = argparse.ArgumentParser(description='MAP VPP configuration generator') -parser.add_argument('-t', action="store", dest="mapmode") -parser.add_argument('-f', action="store", dest="format", default="vpp") -parser.add_argument('--ip4-prefix', action="store", dest="ip4_pfx", default=def_ip4_pfx) -parser.add_argument('--ip6-prefix', action="store", dest="ip6_pfx", default=def_ip6_pfx) -parser.add_argument('--ip6-src', action="store", dest="ip6_src", default=def_ip6_src) -parser.add_argument('--psid-len', action="store", dest="psid_len", default=def_psid_len) -parser.add_argument('--psid-offset', action="store", dest="psid_offset", default=def_psid_offset) -parser.add_argument('--ea-bits-len', action="store", dest="ea_bits_len", default=def_ea_bits_len) -args = parser.parse_args() - -# -# Print domain -# -def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len): - if format == 'vpp': - print("map add domain ip4-pfx " + ip4_pfx + " ip6-pfx", ip6_pfx, "ip6-src " + ip6_src + - " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len) - if format == 'confd': - print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + ip6_src + - " ipv6-prefix " + ip6_pfx + " ipv4-prefix " + ip4_pfx + - " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len) - if format == 'xml': - print("") - print("", i, ""); - print(" " + ip6_src + "") - print(" " + ip6_pfx + "") - print(" " + ip4_pfx + "") - print(" ", eabits_len, "") - print(" ", psid_len, "") - print(" ", psid_offset, "") - -def domain_print_end(): - if format == 'xml': - print("") - -def rule_print(i, psid, dst): - if format == 'vpp': - print("map add rule index", i, "psid", psid, "ip6-dst", dst) - if format == 'confd': - print("binding", psid, "ipv6-addr", dst) - if format == 'xml': - print(" ") - print(" ", psid, "") - print(" ", dst, "") - print(" ") - -# -# Algorithmic mapping Shared IPv4 address -# -def algo(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): - domain_print(0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len) - domain_print_end() - -# -# 1:1 Full IPv4 address -# -def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): - ip4_pfx = ipaddress.ip_network(ip4_pfx_str) - ip6_src = ipaddress.ip_address(ip6_src_str) - ip6_dst = ipaddress.ip_network(ip6_pfx_str) - psid_len = 0 - mod = ip4_pfx.num_addresses / 1024 - - for i in range(ip4_pfx.num_addresses): - domain_print(i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0) - domain_print_end() - if ip6_src_ecmp and not i % mod: - ip6_src = ip6_src + 1 - -# -# 1:1 Shared IPv4 address, shared BR (16) VPP CLI -# -def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): - ip4_pfx = ipaddress.ip_network(ip4_pfx_str) - ip6_src = ipaddress.ip_address(ip6_src_str) - ip6_dst = ipaddress.ip_network(ip6_pfx_str) - mod = ip4_pfx.num_addresses / 1024 - - for i in range(ip4_pfx.num_addresses): - domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len) - for psid in range(0x1 << int(psid_len)): - rule_print(i, psid, str(ip6_dst[(i * (0x1< - - - urn:ietf:params:netconf:base:1.0 - - -]]>]]> - - - - - - - - - - - - - - ''') - -def xml_footer_print(): - print(''' - - - - - - - -]]>]]> - - - - - - -]]>]]> - ''') - - -format = args.format -if format == 'xml': - xml_header_print() -globals()[args.mapmode](args.ip4_pfx, args.ip6_pfx, args.ip6_src, args.ea_bits_len, args.psid_offset, args.psid_len) -if format == 'xml': - xml_footer_print() diff --git a/src/vnet/map/examples/health_check.c b/src/vnet/map/examples/health_check.c deleted file mode 100644 index 5f0d85fec08..00000000000 --- a/src/vnet/map/examples/health_check.c +++ /dev/null @@ -1,109 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void -usage (void) { - fprintf(stderr, - "Usage: health_check" - " -d debug" - " -I interface" - "\n"); - exit(2); -} - -int -main (int argc, char **argv) -{ - int sd, ch; - uint8_t *opt, *pkt; - struct ifreq ifr; - char *interface = NULL; - bool debug = false; - - while ((ch = getopt(argc, argv, "h?" "I:" "d")) != EOF) { - switch(ch) { - case 'I': - interface = optarg; - break; - case 'd': - debug = true; - break; - default: - usage(); - break; - } - } - - argc -= optind; - argv += optind; - - if (!interface) - usage(); - - /* Request a socket descriptor sd. */ - if ((sd = socket (AF_INET6, SOCK_RAW, IPPROTO_IPIP)) < 0) { - perror ("Failed to get socket descriptor "); - exit (EXIT_FAILURE); - } - - memset(&ifr, 0, sizeof(ifr)); - snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", interface); - - /* Bind socket to interface of this node. */ - if (setsockopt (sd, SOL_SOCKET, SO_BINDTODEVICE, (void *) &ifr, sizeof (ifr)) < 0) { - perror ("SO_BINDTODEVICE failed"); - exit (EXIT_FAILURE); - } - if (debug) printf("Binding to interface %s\n", interface); - - while (1) { - struct sockaddr_in6 src_addr; - socklen_t addrlen = sizeof(src_addr); - char source[INET6_ADDRSTRLEN+1]; - int len; - uint8_t inpack[IP_MAXPACKET]; - - if ((len = recvfrom(sd, inpack, sizeof(inpack), 0, (struct sockaddr *)&src_addr, &addrlen)) < 0) { - perror("recvfrom failed "); - } - if (inet_ntop(AF_INET6, &src_addr.sin6_addr, source, INET6_ADDRSTRLEN) == NULL) { - perror("inet_ntop() failed."); - exit(EXIT_FAILURE); - } - - /* Reply */ - struct iphdr *ip = (struct iphdr *)inpack; - uint32_t saddr; - struct icmphdr *icmp; - - saddr = ip->saddr; - ip->saddr = ip->daddr; - ip->daddr = saddr; - - switch (ip->protocol) { - case 1: - if (debug) printf ("ICMP Echo request from %s\n", source); - icmp = (struct icmphdr *)&ip[1]; - icmp->type = ICMP_ECHOREPLY; - break; - default: - fprintf(stderr, "Unsupported protocol %d", ip->protocol); - } - if (len = sendto(sd, inpack, len, 0, (struct sockaddr *)&src_addr, addrlen) < 0) { - perror("sendto failed "); - } - } - - close (sd); - - return (EXIT_SUCCESS); -} diff --git a/src/vnet/map/examples/test_map.py b/src/vnet/map/examples/test_map.py deleted file mode 100755 index 21388d49526..00000000000 --- a/src/vnet/map/examples/test_map.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python - -import time,argparse,sys,cmd, unittest -from ipaddress import * - -parser = argparse.ArgumentParser(description='VPP MAP test') -parser.add_argument('-i', nargs='*', action="store", dest="inputdir") -args = parser.parse_args() - -for dir in args.inputdir: - sys.path.append(dir) -from vpp_papi import * - -# -# 1:1 Shared IPv4 address, shared BR (16) VPP CLI -# -def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): - ip4_pfx = ip_network(ip4_pfx_str) - ip6_src = ip_address(ip6_src_str) - ip6_dst = ip_network(ip6_pfx_str) - ip6_nul = IPv6Address(u'0::0') - mod = ip4_pfx.num_addresses / 1024 - - for i in range(ip4_pfx.num_addresses): - a = time.clock() - t = map_add_domain(0, ip6_nul.packed, ip4_pfx[i].packed, ip6_src.packed, 0, 32, 128, ea_bits_len, psid_offset, psid_len, 0, 0) - #print "Return from map_add_domain", t - if t == None: - print "map_add_domain failed" - continue - if t.retval != 0: - print "map_add_domain failed", t - continue - for psid in range(0x1 << int(psid_len)): - r = map_add_del_rule(0, t.index, 1, (ip6_dst[(i * (0x1<H', msg[0:2]) - size = unpack('>H', msg[2:4]) - print "Received", id, "of size", size - i += 1 - #del msg - continue - - #time.sleep(0.001) - return - -# Create RX thread -rxthread = RXThread() -rxthread.setDaemon(True) - -print "Connect", connect_to_vpe("client124") -import timeit -rxthread.start() -print "After thread started" - -#pneum_kill_thread() -print "After thread killed" - -#t = show_version(0) -#print "Result from show version", t - -print timeit.timeit('t = show_version(0)', number=1000, setup="from __main__ import show_version") -time.sleep(10) -#print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping") - - -disconnect_from_vpe() -sys.exit() - - -print t.program, t.version,t.builddate,t.builddirectory - -''' - -t = map_domain_dump(0) -if not t: - print('show map domain failed') - -for d in t: - print("IP6 prefix:",str(IPv6Address(d.ip6prefix))) - print( "IP4 prefix:",str(IPv4Address(d.ip4prefix))) -''' - -suite = unittest.TestLoader().loadTestsFromTestCase(TestMAP) -unittest.TextTestRunner(verbosity=2).run(suite) - -disconnect_from_vpe() - - diff --git a/src/vnet/map/gen-rules.py b/src/vnet/map/gen-rules.py deleted file mode 100755 index 533a8e237f7..00000000000 --- a/src/vnet/map/gen-rules.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) 2015 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import ipaddress -import argparse -import sys - -# map add domain ip4-pfx ip6-pfx ::/0 ip6-src ea-bits-len 0 psid-offset 6 psid-len 6 -# map add rule index <0> psid ip6-dst - -parser = argparse.ArgumentParser(description='MAP VPP configuration generator') -parser.add_argument('-t', action="store", dest="mapmode") -args = parser.parse_args() - -# -# 1:1 Shared IPv4 address, shared BR -# -def shared11br(): - ip4_pfx = ipaddress.ip_network('20.0.0.0/16') - ip6_dst = ipaddress.ip_network('bbbb::/32') - psid_len = 6 - for i in range(ip4_pfx.num_addresses): - print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", - "ea-bits-len 0 psid-offset 6 psid-len", psid_len) - for psid in range(0x1 << psid_len): - print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1< - -vlib_node_registration_t ip4_map_reass_node; - -enum ip4_map_next_e -{ - IP4_MAP_NEXT_IP6_LOOKUP, -#ifdef MAP_SKIP_IP6_LOOKUP - IP4_MAP_NEXT_IP6_REWRITE, -#endif - IP4_MAP_NEXT_IP4_FRAGMENT, - IP4_MAP_NEXT_IP6_FRAGMENT, - IP4_MAP_NEXT_REASS, - IP4_MAP_NEXT_ICMP_ERROR, - IP4_MAP_NEXT_DROP, - IP4_MAP_N_NEXT, -}; - -enum ip4_map_reass_next_t -{ - IP4_MAP_REASS_NEXT_IP6_LOOKUP, - IP4_MAP_REASS_NEXT_IP4_FRAGMENT, - IP4_MAP_REASS_NEXT_DROP, - IP4_MAP_REASS_N_NEXT, -}; - -typedef struct -{ - u32 map_domain_index; - u16 port; - u8 cached; -} map_ip4_map_reass_trace_t; - -u8 * -format_ip4_map_reass_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - map_ip4_map_reass_trace_t *t = va_arg (*args, map_ip4_map_reass_trace_t *); - return format (s, "MAP domain index: %d L4 port: %u Status: %s", - t->map_domain_index, t->port, - t->cached ? "cached" : "forwarded"); -} - -static_always_inline u16 -ip4_map_port_and_security_check (map_domain_t * d, ip4_header_t * ip, - u32 * next, u8 * error) -{ - u16 port = 0; - - if (d->psid_length > 0) - { - if (ip4_get_fragment_offset (ip) == 0) - { - if (PREDICT_FALSE - ((ip->ip_version_and_header_length != 0x45) - || clib_host_to_net_u16 (ip->length) < 28)) - { - return 0; - } - port = ip4_get_port (ip, 0); - if (port) - { - /* Verify that port is not among the well-known ports */ - if ((d->psid_offset > 0) - && (clib_net_to_host_u16 (port) < - (0x1 << (16 - d->psid_offset)))) - { - *error = MAP_ERROR_ENCAP_SEC_CHECK; - } - else - { - if (ip4_get_fragment_more (ip)) - *next = IP4_MAP_NEXT_REASS; - return (port); - } - } - else - { - *error = MAP_ERROR_BAD_PROTOCOL; - } - } - else - { - *next = IP4_MAP_NEXT_REASS; - } - } - return (0); -} - -/* - * ip4_map_vtcfl - */ -static_always_inline u32 -ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p) -{ - map_main_t *mm = &map_main; - u8 tc = mm->tc_copy ? ip4->tos : mm->tc; - u32 vtcfl = 0x6 << 28; - vtcfl |= tc << 20; - vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff; - - return (clib_host_to_net_u32 (vtcfl)); -} - -static_always_inline bool -ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip) -{ -#ifdef MAP_SKIP_IP6_LOOKUP - if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei) - { - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = - pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index; - return (true); - } -#endif - return (false); -} - -/* - * ip4_map_ttl - */ -static inline void -ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error) -{ - i32 ttl = ip->ttl; - - /* Input node should have reject packets with ttl 0. */ - ASSERT (ip->ttl > 0); - - u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100); - checksum += checksum >= 0xffff; - ip->checksum = checksum; - ttl -= 1; - ip->ttl = ttl; - *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error; - - /* Verify checksum. */ - ASSERT (ip->checksum == ip4_header_checksum (ip)); -} - -static u32 -ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error) -{ - map_main_t *mm = &map_main; - - if (mm->frag_inner) - { - ip_frag_set_vnet_buffer (b, sizeof (ip6_header_t), mtu, - IP4_FRAG_NEXT_IP6_LOOKUP, - IP_FRAG_FLAG_IP6_HEADER); - return (IP4_MAP_NEXT_IP4_FRAGMENT); - } - else - { - if (df && !mm->frag_ignore_df) - { - icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable, - ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, - mtu); - vlib_buffer_advance (b, sizeof (ip6_header_t)); - *error = MAP_ERROR_DF_SET; - return (IP4_MAP_NEXT_ICMP_ERROR); - } - ip_frag_set_vnet_buffer (b, 0, mtu, IP6_FRAG_NEXT_IP6_LOOKUP, - IP_FRAG_FLAG_IP6_HEADER); - return (IP4_MAP_NEXT_IP6_FRAGMENT); - } -} - -/* - * ip4_map - */ -static uword -ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip4_map_node.index); - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - map_main_t *mm = &map_main; - vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 thread_index = vlib_get_thread_index (); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Dual loop */ - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - map_domain_t *d0, *d1; - u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE; - ip4_header_t *ip40, *ip41; - u16 port0 = 0, port1 = 0; - ip6_header_t *ip6h0, *ip6h1; - u32 map_domain_index0 = ~0, map_domain_index1 = ~0; - u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 = - IP4_MAP_NEXT_IP6_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, STORE); - vlib_prefetch_buffer_header (p3, STORE); - /* IPv4 + 8 = 28. possibly plus -40 */ - CLIB_PREFETCH (p2->data - 40, 68, STORE); - CLIB_PREFETCH (p3->data - 40, 68, STORE); - } - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - ip40 = vlib_buffer_get_current (p0); - ip41 = vlib_buffer_get_current (p1); - map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (map_domain_index0); - map_domain_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - d1 = ip4_map_get_domain (map_domain_index1); - ASSERT (d0); - ASSERT (d1); - - /* - * Shared IPv4 address - */ - port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0); - port1 = ip4_map_port_and_security_check (d1, ip41, &next1, &error1); - - /* Decrement IPv4 TTL */ - ip4_map_decrement_ttl (ip40, &error0); - ip4_map_decrement_ttl (ip41, &error1); - bool df0 = - ip40->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); - bool df1 = - ip41->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); - - /* MAP calc */ - u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32); - u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32); - u16 dp40 = clib_net_to_host_u16 (port0); - u16 dp41 = clib_net_to_host_u16 (port1); - u64 dal60 = map_get_pfx (d0, da40, dp40); - u64 dal61 = map_get_pfx (d1, da41, dp41); - u64 dar60 = map_get_sfx (d0, da40, dp40); - u64 dar61 = map_get_sfx (d1, da41, dp41); - if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE - && next0 != IP4_MAP_NEXT_REASS) - error0 = MAP_ERROR_NO_BINDING; - if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE - && next1 != IP4_MAP_NEXT_REASS) - error1 = MAP_ERROR_NO_BINDING; - - /* construct ipv6 header */ - vlib_buffer_advance (p0, -sizeof (ip6_header_t)); - vlib_buffer_advance (p1, -sizeof (ip6_header_t)); - ip6h0 = vlib_buffer_get_current (p0); - ip6h1 = vlib_buffer_get_current (p1); - vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - ip6h0->ip_version_traffic_class_and_flow_label = - ip4_map_vtcfl (ip40, p0); - ip6h1->ip_version_traffic_class_and_flow_label = - ip4_map_vtcfl (ip41, p1); - ip6h0->payload_length = ip40->length; - ip6h1->payload_length = ip41->length; - ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; - ip6h1->protocol = IP_PROTOCOL_IP_IN_IP; - ip6h0->hop_limit = 0x40; - ip6h1->hop_limit = 0x40; - ip6h0->src_address = d0->ip6_src; - ip6h1->src_address = d1->ip6_src; - ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60); - ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60); - ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61); - ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61); - - /* - * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop - */ - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) - { - if (PREDICT_FALSE - (d0->mtu - && (clib_net_to_host_u16 (ip6h0->payload_length) + - sizeof (*ip6h0) > d0->mtu))) - { - next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0); - } - else - { - next0 = - ip4_map_ip6_lookup_bypass (p0, - ip40) ? - IP4_MAP_NEXT_IP6_REWRITE : next0; - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip6h0->payload_length) + - 40); - } - } - else - { - next0 = IP4_MAP_NEXT_DROP; - } - - /* - * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop - */ - if (PREDICT_TRUE (error1 == MAP_ERROR_NONE)) - { - if (PREDICT_FALSE - (d1->mtu - && (clib_net_to_host_u16 (ip6h1->payload_length) + - sizeof (*ip6h1) > d1->mtu))) - { - next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1); - } - else - { - next1 = - ip4_map_ip6_lookup_bypass (p1, - ip41) ? - IP4_MAP_NEXT_IP6_REWRITE : next1; - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - map_domain_index1, 1, - clib_net_to_host_u16 - (ip6h1->payload_length) + - 40); - } - } - else - { - next1 = IP4_MAP_NEXT_DROP; - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = port0; - } - if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) - { - map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr)); - tr->map_domain_index = map_domain_index1; - tr->port = port1; - } - - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, next0, - next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - map_domain_t *d0; - u8 error0 = MAP_ERROR_NONE; - ip4_header_t *ip40; - u16 port0 = 0; - ip6_header_t *ip6h0; - u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP; - u32 map_domain_index0 = ~0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - ip40 = vlib_buffer_get_current (p0); - map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (map_domain_index0); - ASSERT (d0); - - /* - * Shared IPv4 address - */ - port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0); - - /* Decrement IPv4 TTL */ - ip4_map_decrement_ttl (ip40, &error0); - bool df0 = - ip40->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); - - /* MAP calc */ - u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32); - u16 dp40 = clib_net_to_host_u16 (port0); - u64 dal60 = map_get_pfx (d0, da40, dp40); - u64 dar60 = map_get_sfx (d0, da40, dp40); - if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE - && next0 != IP4_MAP_NEXT_REASS) - error0 = MAP_ERROR_NO_BINDING; - - /* construct ipv6 header */ - vlib_buffer_advance (p0, -(sizeof (ip6_header_t))); - ip6h0 = vlib_buffer_get_current (p0); - vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - ip6h0->ip_version_traffic_class_and_flow_label = - ip4_map_vtcfl (ip40, p0); - ip6h0->payload_length = ip40->length; - ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; - ip6h0->hop_limit = 0x40; - ip6h0->src_address = d0->ip6_src; - ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60); - ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60); - - /* - * Determine next node. Can be one of: - * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop - */ - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) - { - if (PREDICT_FALSE - (d0->mtu - && (clib_net_to_host_u16 (ip6h0->payload_length) + - sizeof (*ip6h0) > d0->mtu))) - { - next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0); - } - else - { - next0 = - ip4_map_ip6_lookup_bypass (p0, - ip40) ? - IP4_MAP_NEXT_IP6_REWRITE : next0; - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip6h0->payload_length) + - 40); - } - } - else - { - next0 = IP4_MAP_NEXT_DROP; - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = port0; - } - - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - return frame->n_vectors; -} - -/* - * ip4_map_reass - */ -static uword -ip4_map_reass (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip4_map_reass_node.index); - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - map_main_t *mm = &map_main; - vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 thread_index = vlib_get_thread_index (); - u32 *fragments_to_drop = NULL; - u32 *fragments_to_loopback = NULL; - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - map_domain_t *d0; - u8 error0 = MAP_ERROR_NONE; - ip4_header_t *ip40; - i32 port0 = 0; - ip6_header_t *ip60; - u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP; - u32 map_domain_index0; - u8 cached = 0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - ip60 = vlib_buffer_get_current (p0); - ip40 = (ip4_header_t *) (ip60 + 1); - map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (map_domain_index0); - - map_ip4_reass_lock (); - map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32, - ip40->dst_address.as_u32, - ip40->fragment_id, - ip40->protocol, - &fragments_to_drop); - if (PREDICT_FALSE (!r)) - { - // Could not create a caching entry - error0 = MAP_ERROR_FRAGMENT_MEMORY; - } - else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40))) - { - if (r->port >= 0) - { - // We know the port already - port0 = r->port; - } - else if (map_ip4_reass_add_fragment (r, pi0)) - { - // Not enough space for caching - error0 = MAP_ERROR_FRAGMENT_MEMORY; - map_ip4_reass_free (r, &fragments_to_drop); - } - else - { - cached = 1; - } - } - else if ((port0 = ip4_get_port (ip40, 0)) == 0) - { - // Could not find port. We'll free the reassembly. - error0 = MAP_ERROR_BAD_PROTOCOL; - port0 = 0; - map_ip4_reass_free (r, &fragments_to_drop); - } - else - { - r->port = port0; - map_ip4_reass_get_fragments (r, &fragments_to_loopback); - } - -#ifdef MAP_IP4_REASS_COUNT_BYTES - if (!cached && r) - { - r->forwarded += clib_host_to_net_u16 (ip40->length) - 20; - if (!ip4_get_fragment_more (ip40)) - r->expected_total = - ip4_get_fragment_offset (ip40) * 8 + - clib_host_to_net_u16 (ip40->length) - 20; - if (r->forwarded >= r->expected_total) - map_ip4_reass_free (r, &fragments_to_drop); - } -#endif - - map_ip4_reass_unlock (); - - // NOTE: Most operations have already been performed by ip4_map - // All we need is the right destination address - ip60->dst_address.as_u64[0] = - map_get_pfx_net (d0, ip40->dst_address.as_u32, port0); - ip60->dst_address.as_u64[1] = - map_get_sfx_net (d0, ip40->dst_address.as_u32, port0); - - if (PREDICT_FALSE - (d0->mtu - && (clib_net_to_host_u16 (ip60->payload_length) + - sizeof (*ip60) > d0->mtu))) - { - vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60); - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; - vnet_buffer (p0)->ip_frag.mtu = d0->mtu; - vnet_buffer (p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; - next0 = IP4_MAP_REASS_NEXT_IP4_FRAGMENT; - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_ip4_map_reass_trace_t *tr = - vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = port0; - tr->cached = cached; - } - - if (cached) - { - //Dequeue the packet - n_left_to_next++; - to_next--; - } - else - { - if (error0 == MAP_ERROR_NONE) - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip60->payload_length) + 40); - next0 = - (error0 == MAP_ERROR_NONE) ? next0 : IP4_MAP_REASS_NEXT_DROP; - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); - } - - //Loopback when we reach the end of the inpu vector - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy (from, - fragments_to_loopback + (len - - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - map_send_all_to_node (vm, fragments_to_drop, node, - &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], - IP4_MAP_REASS_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); - return frame->n_vectors; -} - -static char *map_error_strings[] = { -#define _(sym,string) string, - foreach_map_error -#undef _ -}; - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip4_map_node) = { - .function = ip4_map, - .name = "ip4-map", - .vector_size = sizeof(u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_error_strings, - - .n_next_nodes = IP4_MAP_N_NEXT, - .next_nodes = { - [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup", -#ifdef MAP_SKIP_IP6_LOOKUP - [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance", -#endif - [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", - [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag", - [IP4_MAP_NEXT_REASS] = "ip4-map-reass", - [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error", - [IP4_MAP_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip4_map_reass_node) = { - .function = ip4_map_reass, - .name = "ip4-map-reass", - .vector_size = sizeof(u32), - .format_trace = format_ip4_map_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_error_strings, - - .n_next_nodes = IP4_MAP_REASS_N_NEXT, - .next_nodes = { - [IP4_MAP_REASS_NEXT_IP6_LOOKUP] = "ip6-lookup", - [IP4_MAP_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", - [IP4_MAP_REASS_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c deleted file mode 100644 index 0a1dc8f3212..00000000000 --- a/src/vnet/map/ip4_map_t.c +++ /dev/null @@ -1,898 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "map.h" - -#include "../ip/ip_frag.h" -#include - -#define IP4_MAP_T_DUAL_LOOP 1 - -typedef enum -{ - IP4_MAPT_NEXT_MAPT_TCP_UDP, - IP4_MAPT_NEXT_MAPT_ICMP, - IP4_MAPT_NEXT_MAPT_FRAGMENTED, - IP4_MAPT_NEXT_DROP, - IP4_MAPT_N_NEXT -} ip4_mapt_next_t; - -typedef enum -{ - IP4_MAPT_ICMP_NEXT_IP6_LOOKUP, - IP4_MAPT_ICMP_NEXT_IP6_FRAG, - IP4_MAPT_ICMP_NEXT_DROP, - IP4_MAPT_ICMP_N_NEXT -} ip4_mapt_icmp_next_t; - -typedef enum -{ - IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP, - IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG, - IP4_MAPT_TCP_UDP_NEXT_DROP, - IP4_MAPT_TCP_UDP_N_NEXT -} ip4_mapt_tcp_udp_next_t; - -typedef enum -{ - IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP, - IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG, - IP4_MAPT_FRAGMENTED_NEXT_DROP, - IP4_MAPT_FRAGMENTED_N_NEXT -} ip4_mapt_fragmented_next_t; - -//This is used to pass information within the buffer data. -//Buffer structure being too small to contain big structures like this. -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct { - ip6_address_t daddr; - ip6_address_t saddr; - //IPv6 header + Fragmentation header will be here - //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4) - u8 unused[28]; -}) ip4_mapt_pseudo_header_t; -/* *INDENT-ON* */ - - -static_always_inline int -ip4_map_fragment_cache (ip4_header_t * ip4, u16 port) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = - map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32, - ip4->fragment_id, - (ip4->protocol == - IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, - &ignore); - if (r) - r->port = port; - - map_ip4_reass_unlock (); - return !r; -} - -static_always_inline i32 -ip4_map_fragment_get_port (ip4_header_t * ip4) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = - map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32, - ip4->fragment_id, - (ip4->protocol == - IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, - &ignore); - i32 ret = r ? r->port : -1; - map_ip4_reass_unlock (); - return ret; -} - -typedef struct -{ - map_domain_t *d; - u16 id; -} icmp_to_icmp6_ctx_t; - -static int -ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) -{ - icmp_to_icmp6_ctx_t *ctx = arg; - map_main_t *mm = &map_main; - - if (mm->is_ce) - { - ip6->src_address.as_u64[0] = - map_get_pfx_net (ctx->d, ip4->src_address.as_u32, ctx->id); - ip6->src_address.as_u64[1] = - map_get_sfx_net (ctx->d, ip4->src_address.as_u32, ctx->id); - ip4_map_t_embedded_address (ctx->d, &ip6->dst_address, - &ip4->dst_address); - } - else - { - ip4_map_t_embedded_address (ctx->d, &ip6->src_address, - &ip4->src_address); - ip6->dst_address.as_u64[0] = - map_get_pfx_net (ctx->d, ip4->dst_address.as_u32, ctx->id); - ip6->dst_address.as_u64[1] = - map_get_sfx_net (ctx->d, ip4->dst_address.as_u32, ctx->id); - } - - return 0; -} - -static int -ip4_to_ip6_set_inner_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, - void *arg) -{ - icmp_to_icmp6_ctx_t *ctx = arg; - map_main_t *mm = &map_main; - - if (mm->is_ce) - { - //Note that the destination address is within the domain - //while the source address is the one outside the domain - ip4_map_t_embedded_address (ctx->d, &ip6->src_address, - &ip4->src_address); - ip6->dst_address.as_u64[0] = - map_get_pfx_net (ctx->d, ip4->dst_address.as_u32, ctx->id); - ip6->dst_address.as_u64[1] = - map_get_sfx_net (ctx->d, ip4->dst_address.as_u32, ctx->id); - } - else - { - //Note that the source address is within the domain - //while the destination address is the one outside the domain - ip4_map_t_embedded_address (ctx->d, &ip6->dst_address, - &ip4->dst_address); - ip6->src_address.as_u64[0] = - map_get_pfx_net (ctx->d, ip4->src_address.as_u32, ctx->id); - ip6->src_address.as_u64[1] = - map_get_sfx_net (ctx->d, ip4->src_address.as_u32, ctx->id); - } - - return 0; -} - -static uword -ip4_map_t_icmp (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip4_map_t_icmp_node.index); - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 thread_index = vlib_get_thread_index (); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - ip4_mapt_icmp_next_t next0; - u8 error0; - map_domain_t *d0; - u16 len0; - icmp_to_icmp6_ctx_t ctx0; - ip4_header_t *ip40; - icmp46_header_t *icmp0; - - next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP; - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - error0 = MAP_ERROR_NONE; - - p0 = vlib_get_buffer (vm, pi0); - vlib_buffer_advance (p0, sizeof (ip4_mapt_pseudo_header_t)); //The pseudo-header is not used - len0 = - clib_net_to_host_u16 (((ip4_header_t *) - vlib_buffer_get_current (p0))->length); - d0 = - pool_elt_at_index (map_main.domains, - vnet_buffer (p0)->map_t.map_domain_index); - - ip40 = vlib_buffer_get_current (p0); - icmp0 = (icmp46_header_t *) (ip40 + 1); - - ctx0.id = ip4_get_port (ip40, icmp0->type == ICMP6_echo_request); - ctx0.d = d0; - if (ctx0.id == 0) - { - // In case of 1:1 mapping, we don't care about the port - if (!(d0->ea_bits_len == 0 && d0->rules)) - { - error0 = MAP_ERROR_ICMP; - goto err0; - } - } - - if (icmp_to_icmp6 - (p0, ip4_to_ip6_set_icmp_cb, &ctx0, - ip4_to_ip6_set_inner_icmp_cb, &ctx0)) - { - error0 = MAP_ERROR_ICMP; - goto err0; - } - - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; - next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG; - } - err0: - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - vnet_buffer (p0)-> - map_t.map_domain_index, 1, - len0); - } - else - { - next0 = IP4_MAPT_ICMP_NEXT_DROP; - } - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, pi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return frame->n_vectors; -} - -static int -ip4_to_ip6_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *ctx) -{ - ip4_mapt_pseudo_header_t *pheader = ctx; - - ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0]; - ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1]; - ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0]; - ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1]; - - return 0; -} - -static uword -ip4_map_t_fragmented (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip4_map_t_fragmented_node.index); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - ip4_mapt_pseudo_header_t *pheader0; - ip4_mapt_fragmented_next_t next0; - - next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP; - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - - //Accessing pseudo header - pheader0 = vlib_buffer_get_current (p0); - vlib_buffer_advance (p0, sizeof (*pheader0)); - - if (ip4_to_ip6_fragmented (p0, ip4_to_ip6_set_cb, pheader0)) - { - p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; - next0 = IP4_MAPT_FRAGMENTED_NEXT_DROP; - } - else - { - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; - next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG; - } - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, pi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return frame->n_vectors; -} - -static uword -ip4_map_t_tcp_udp (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip4_map_t_tcp_udp_node.index); - - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - -#ifdef IP4_MAP_T_DUAL_LOOP - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - ip4_mapt_pseudo_header_t *pheader0, *pheader1; - ip4_mapt_tcp_udp_next_t next0, next1; - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; - next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - - //Accessing pseudo header - pheader0 = vlib_buffer_get_current (p0); - pheader1 = vlib_buffer_get_current (p1); - vlib_buffer_advance (p0, sizeof (*pheader0)); - vlib_buffer_advance (p1, sizeof (*pheader1)); - - if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0)) - { - p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; - next0 = IP4_MAPT_TCP_UDP_NEXT_DROP; - } - else - { - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; - next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; - } - } - - if (ip4_to_ip6_tcp_udp (p1, ip4_to_ip6_set_cb, pheader1)) - { - p1->error = error_node->errors[MAP_ERROR_UNKNOWN]; - next1 = IP4_MAPT_TCP_UDP_NEXT_DROP; - } - else - { - if (vnet_buffer (p1)->map_t.mtu < p1->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p1)->ip_frag.header_offset = 0; - vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; - vnet_buffer (p1)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; - next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; - } - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, pi0, pi1, - next0, next1); - } -#endif - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - ip4_mapt_pseudo_header_t *pheader0; - ip4_mapt_tcp_udp_next_t next0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; - p0 = vlib_get_buffer (vm, pi0); - - //Accessing pseudo header - pheader0 = vlib_buffer_get_current (p0); - vlib_buffer_advance (p0, sizeof (*pheader0)); - - if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0)) - { - p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; - next0 = IP4_MAPT_TCP_UDP_NEXT_DROP; - } - else - { - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = - IP6_FRAG_NEXT_IP6_LOOKUP; - next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; - } - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, pi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - return frame->n_vectors; -} - -static_always_inline void -ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0, - ip4_header_t * ip40, u16 ip4_len0, i32 * dst_port0, - u8 * error0, ip4_mapt_next_t * next0) -{ - map_main_t *mm = &map_main; - u32 port_offset; - - if (mm->is_ce) - port_offset = 0; - else - port_offset = 2; - - if (PREDICT_FALSE (ip4_get_fragment_offset (ip40))) - { - *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED; - if (d0->ea_bits_len == 0 && d0->rules) - { - *dst_port0 = 0; - } - else - { - *dst_port0 = ip4_map_fragment_get_port (ip40); - *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0; - } - } - else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP)) - { - vnet_buffer (p0)->map_t.checksum_offset = 36; - *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; - *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0; - *dst_port0 = - (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + port_offset)); - } - else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_UDP)) - { - vnet_buffer (p0)->map_t.checksum_offset = 26; - *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; - *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0; - *dst_port0 = - (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + port_offset)); - } - else if (ip40->protocol == IP_PROTOCOL_ICMP) - { - *next0 = IP4_MAPT_NEXT_MAPT_ICMP; - if (d0->ea_bits_len == 0 && d0->rules) - *dst_port0 = 0; - else if (((icmp46_header_t *) u8_ptr_add (ip40, sizeof (*ip40)))->code - == ICMP4_echo_reply - || ((icmp46_header_t *) - u8_ptr_add (ip40, - sizeof (*ip40)))->code == ICMP4_echo_request) - *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 6)); - } - else - { - *error0 = MAP_ERROR_BAD_PROTOCOL; - } -} - -static uword -ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip4_map_t_node.index); - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - map_main_t *mm = &map_main; - vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 thread_index = vlib_get_thread_index (); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - -#ifdef IP4_MAP_T_DUAL_LOOP - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - ip4_header_t *ip40, *ip41; - map_domain_t *d0, *d1; - ip4_mapt_next_t next0 = 0, next1 = 0; - u16 ip4_len0, ip4_len1; - u8 error0, error1; - i32 map_port0, map_port1; - ip4_mapt_pseudo_header_t *pheader0, *pheader1; - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - error0 = MAP_ERROR_NONE; - error1 = MAP_ERROR_NONE; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - ip40 = vlib_buffer_get_current (p0); - ip41 = vlib_buffer_get_current (p1); - ip4_len0 = clib_host_to_net_u16 (ip40->length); - ip4_len1 = clib_host_to_net_u16 (ip41->length); - - if (PREDICT_FALSE (p0->current_length < ip4_len0 || - ip40->ip_version_and_header_length != 0x45)) - { - error0 = MAP_ERROR_UNKNOWN; - next0 = IP4_MAPT_NEXT_DROP; - } - - if (PREDICT_FALSE (p1->current_length < ip4_len1 || - ip41->ip_version_and_header_length != 0x45)) - { - error1 = MAP_ERROR_UNKNOWN; - next1 = IP4_MAPT_NEXT_DROP; - } - - vnet_buffer (p0)->map_t.map_domain_index = - vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index); - vnet_buffer (p1)->map_t.map_domain_index = - vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - d1 = ip4_map_get_domain (vnet_buffer (p1)->map_t.map_domain_index); - - vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; - vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; - - map_port0 = -1; - map_port1 = -1; - - ip4_map_t_classify (p0, d0, ip40, ip4_len0, &map_port0, &error0, - &next0); - ip4_map_t_classify (p1, d1, ip41, ip4_len1, &map_port1, &error1, - &next1); - - //Add MAP-T pseudo header in front of the packet - vlib_buffer_advance (p0, -sizeof (*pheader0)); - vlib_buffer_advance (p1, -sizeof (*pheader1)); - pheader0 = vlib_buffer_get_current (p0); - pheader1 = vlib_buffer_get_current (p1); - - //Save addresses within the packet - if (mm->is_ce) - { - ip4_map_t_embedded_address (d0, &pheader0->daddr, - &ip40->dst_address); - ip4_map_t_embedded_address (d1, &pheader1->daddr, - &ip41->dst_address); - pheader0->saddr.as_u64[0] = - map_get_pfx_net (d0, ip40->src_address.as_u32, - (u16) map_port0); - pheader0->saddr.as_u64[1] = - map_get_sfx_net (d0, ip40->src_address.as_u32, - (u16) map_port0); - pheader1->saddr.as_u64[0] = - map_get_pfx_net (d1, ip41->src_address.as_u32, - (u16) map_port1); - pheader1->saddr.as_u64[1] = - map_get_sfx_net (d1, ip41->src_address.as_u32, - (u16) map_port1); - } - else - { - ip4_map_t_embedded_address (d0, &pheader0->saddr, - &ip40->src_address); - ip4_map_t_embedded_address (d1, &pheader1->saddr, - &ip41->src_address); - pheader0->daddr.as_u64[0] = - map_get_pfx_net (d0, ip40->dst_address.as_u32, - (u16) map_port0); - pheader0->daddr.as_u64[1] = - map_get_sfx_net (d0, ip40->dst_address.as_u32, - (u16) map_port0); - pheader1->daddr.as_u64[0] = - map_get_pfx_net (d1, ip41->dst_address.as_u32, - (u16) map_port1); - pheader1->daddr.as_u64[1] = - map_get_sfx_net (d1, ip41->dst_address.as_u32, - (u16) map_port1); - } - - if (PREDICT_FALSE - (ip4_is_first_fragment (ip40) && (map_port0 != -1) - && (d0->ea_bits_len != 0 || !d0->rules) - && ip4_map_fragment_cache (ip40, map_port0))) - { - error0 = MAP_ERROR_FRAGMENT_MEMORY; - } - - if (PREDICT_FALSE - (ip4_is_first_fragment (ip41) && (map_port1 != -1) - && (d1->ea_bits_len != 0 || !d1->rules) - && ip4_map_fragment_cache (ip41, map_port1))) - { - error1 = MAP_ERROR_FRAGMENT_MEMORY; - } - - if (PREDICT_TRUE - (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - vnet_buffer (p0)-> - map_t.map_domain_index, 1, - clib_net_to_host_u16 - (ip40->length)); - } - - if (PREDICT_TRUE - (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - vnet_buffer (p1)-> - map_t.map_domain_index, 1, - clib_net_to_host_u16 - (ip41->length)); - } - - next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; - next1 = (error1 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next1; - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, next0, - next1); - } -#endif - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - ip4_header_t *ip40; - map_domain_t *d0; - ip4_mapt_next_t next0; - u16 ip4_len0; - u8 error0; - i32 map_port0; - ip4_mapt_pseudo_header_t *pheader0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - error0 = MAP_ERROR_NONE; - - p0 = vlib_get_buffer (vm, pi0); - ip40 = vlib_buffer_get_current (p0); - ip4_len0 = clib_host_to_net_u16 (ip40->length); - if (PREDICT_FALSE (p0->current_length < ip4_len0 || - ip40->ip_version_and_header_length != 0x45)) - { - error0 = MAP_ERROR_UNKNOWN; - next0 = IP4_MAPT_NEXT_DROP; - } - - vnet_buffer (p0)->map_t.map_domain_index = - vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index); - - vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; - - map_port0 = -1; - ip4_map_t_classify (p0, d0, ip40, ip4_len0, &map_port0, &error0, - &next0); - - //Add MAP-T pseudo header in front of the packet - vlib_buffer_advance (p0, -sizeof (*pheader0)); - pheader0 = vlib_buffer_get_current (p0); - - //Save addresses within the packet - if (mm->is_ce) - { - ip4_map_t_embedded_address (d0, &pheader0->daddr, - &ip40->dst_address); - pheader0->saddr.as_u64[0] = - map_get_pfx_net (d0, ip40->src_address.as_u32, - (u16) map_port0); - pheader0->saddr.as_u64[1] = - map_get_sfx_net (d0, ip40->src_address.as_u32, - (u16) map_port0); - } - else - { - ip4_map_t_embedded_address (d0, &pheader0->saddr, - &ip40->src_address); - pheader0->daddr.as_u64[0] = - map_get_pfx_net (d0, ip40->dst_address.as_u32, - (u16) map_port0); - pheader0->daddr.as_u64[1] = - map_get_sfx_net (d0, ip40->dst_address.as_u32, - (u16) map_port0); - } - - //It is important to cache at this stage because the result might be necessary - //for packets within the same vector. - //Actually, this approach even provides some limited out-of-order fragments support - if (PREDICT_FALSE - (ip4_is_first_fragment (ip40) && (map_port0 != -1) - && (d0->ea_bits_len != 0 || !d0->rules) - && ip4_map_fragment_cache (ip40, map_port0))) - { - error0 = MAP_ERROR_UNKNOWN; - } - - if (PREDICT_TRUE - (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, - thread_index, - vnet_buffer (p0)-> - map_t.map_domain_index, 1, - clib_net_to_host_u16 - (ip40->length)); - } - - next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, pi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return frame->n_vectors; -} - -static char *map_t_error_strings[] = { -#define _(sym,string) string, - foreach_map_error -#undef _ -}; - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = { - .function = ip4_map_t_fragmented, - .name = "ip4-map-t-fragmented", - .vector_size = sizeof(u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_t_error_strings, - - .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT, - .next_nodes = { - [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup", - [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, - [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = { - .function = ip4_map_t_icmp, - .name = "ip4-map-t-icmp", - .vector_size = sizeof(u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_t_error_strings, - - .n_next_nodes = IP4_MAPT_ICMP_N_NEXT, - .next_nodes = { - [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup", - [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, - [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = { - .function = ip4_map_t_tcp_udp, - .name = "ip4-map-t-tcp-udp", - .vector_size = sizeof(u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_t_error_strings, - - .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT, - .next_nodes = { - [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup", - [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, - [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip4_map_t_node) = { - .function = ip4_map_t, - .name = "ip4-map-t", - .vector_size = sizeof(u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_t_error_strings, - - .n_next_nodes = IP4_MAPT_N_NEXT, - .next_nodes = { - [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp", - [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp", - [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented", - [IP4_MAPT_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/map/ip6_map.c b/src/vnet/map/ip6_map.c deleted file mode 100644 index 720d13c2879..00000000000 --- a/src/vnet/map/ip6_map.c +++ /dev/null @@ -1,1265 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "map.h" - -#include "../ip/ip_frag.h" -#include -#include - -enum ip6_map_next_e -{ - IP6_MAP_NEXT_IP4_LOOKUP, -#ifdef MAP_SKIP_IP6_LOOKUP - IP6_MAP_NEXT_IP4_REWRITE, -#endif - IP6_MAP_NEXT_IP6_REASS, - IP6_MAP_NEXT_IP4_REASS, - IP6_MAP_NEXT_IP4_FRAGMENT, - IP6_MAP_NEXT_IP6_ICMP_RELAY, - IP6_MAP_NEXT_IP6_LOCAL, - IP6_MAP_NEXT_DROP, - IP6_MAP_NEXT_ICMP, - IP6_MAP_N_NEXT, -}; - -enum ip6_map_ip6_reass_next_e -{ - IP6_MAP_IP6_REASS_NEXT_IP6_MAP, - IP6_MAP_IP6_REASS_NEXT_DROP, - IP6_MAP_IP6_REASS_N_NEXT, -}; - -enum ip6_map_ip4_reass_next_e -{ - IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP, - IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT, - IP6_MAP_IP4_REASS_NEXT_DROP, - IP6_MAP_IP4_REASS_N_NEXT, -}; - -enum ip6_icmp_relay_next_e -{ - IP6_ICMP_RELAY_NEXT_IP4_LOOKUP, - IP6_ICMP_RELAY_NEXT_DROP, - IP6_ICMP_RELAY_N_NEXT, -}; - -vlib_node_registration_t ip6_map_ip4_reass_node; -vlib_node_registration_t ip6_map_ip6_reass_node; -static vlib_node_registration_t ip6_map_icmp_relay_node; - -typedef struct -{ - u32 map_domain_index; - u16 port; - u8 cached; -} map_ip6_map_ip4_reass_trace_t; - -u8 * -format_ip6_map_ip4_reass_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - map_ip6_map_ip4_reass_trace_t *t = - va_arg (*args, map_ip6_map_ip4_reass_trace_t *); - return format (s, "MAP domain index: %d L4 port: %u Status: %s", - t->map_domain_index, t->port, - t->cached ? "cached" : "forwarded"); -} - -typedef struct -{ - u16 offset; - u16 frag_len; - u8 out; -} map_ip6_map_ip6_reass_trace_t; - -u8 * -format_ip6_map_ip6_reass_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - map_ip6_map_ip6_reass_trace_t *t = - va_arg (*args, map_ip6_map_ip6_reass_trace_t *); - return format (s, "Offset: %d Fragment length: %d Status: %s", t->offset, - t->frag_len, t->out ? "out" : "in"); -} - -/* - * ip6_map_sec_check - */ -static_always_inline bool -ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4, - ip6_header_t * ip6) -{ - u16 sp4 = clib_net_to_host_u16 (port); - u32 sa4 = clib_net_to_host_u32 (ip4->src_address.as_u32); - u64 sal6 = map_get_pfx (d, sa4, sp4); - u64 sar6 = map_get_sfx (d, sa4, sp4); - - if (PREDICT_FALSE - (sal6 != clib_net_to_host_u64 (ip6->src_address.as_u64[0]) - || sar6 != clib_net_to_host_u64 (ip6->src_address.as_u64[1]))) - return (false); - return (true); -} - -static_always_inline void -ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4, - ip6_header_t * ip6, u32 * next, u8 * error) -{ - map_main_t *mm = &map_main; - if (d->ea_bits_len || d->rules) - { - if (d->psid_length > 0) - { - if (!ip4_is_fragment (ip4)) - { - u16 port = ip4_get_port (ip4, 1); - if (port) - { - if (mm->sec_check) - *error = - ip6_map_sec_check (d, port, ip4, - ip6) ? MAP_ERROR_NONE : - MAP_ERROR_DECAP_SEC_CHECK; - } - else - { - *error = MAP_ERROR_BAD_PROTOCOL; - } - } - else - { - *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next; - } - } - } -} - -static_always_inline bool -ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip) -{ -#ifdef MAP_SKIP_IP6_LOOKUP - if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei) - { - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = - pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index; - return (true); - } -#endif - return (false); -} - -/* - * ip6_map - */ -static uword -ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_map_node.index); - map_main_t *mm = &map_main; - vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 thread_index = vlib_get_thread_index (); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Dual loop */ - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - u8 error0 = MAP_ERROR_NONE; - u8 error1 = MAP_ERROR_NONE; - map_domain_t *d0 = 0, *d1 = 0; - ip4_header_t *ip40, *ip41; - ip6_header_t *ip60, *ip61; - u16 port0 = 0, port1 = 0; - u32 map_domain_index0 = ~0, map_domain_index1 = ~0; - u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP; - u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - /* IPv6 + IPv4 header + 8 bytes of ULP */ - CLIB_PREFETCH (p2->data, 68, LOAD); - CLIB_PREFETCH (p3->data, 68, LOAD); - } - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - ip60 = vlib_buffer_get_current (p0); - ip61 = vlib_buffer_get_current (p1); - vlib_buffer_advance (p0, sizeof (ip6_header_t)); - vlib_buffer_advance (p1, sizeof (ip6_header_t)); - ip40 = vlib_buffer_get_current (p0); - ip41 = vlib_buffer_get_current (p1); - - /* - * Encapsulated IPv4 packet - * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled - * - Lookup/Rewrite or Fragment node in case of packet > MTU - * Fragmented IPv6 packet - * ICMP IPv6 packet - * - Error -> Pass to ICMPv6/ICMPv4 relay - * - Info -> Pass to IPv6 local - * Anything else -> drop - */ - if (PREDICT_TRUE - (ip60->protocol == IP_PROTOCOL_IP_IN_IP - && clib_net_to_host_u16 (ip60->payload_length) > 20)) - { - d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip40-> - src_address.as_u32, &map_domain_index0, - &error0); - } - else if (ip60->protocol == IP_PROTOCOL_ICMP6 && - clib_net_to_host_u16 (ip60->payload_length) > - sizeof (icmp46_header_t)) - { - icmp46_header_t *icmp = (void *) (ip60 + 1); - next0 = (icmp->type == ICMP6_echo_request - || icmp->type == - ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL : - IP6_MAP_NEXT_IP6_ICMP_RELAY; - } - else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) - { - next0 = IP6_MAP_NEXT_IP6_REASS; - } - else - { - error0 = MAP_ERROR_BAD_PROTOCOL; - } - if (PREDICT_TRUE - (ip61->protocol == IP_PROTOCOL_IP_IN_IP - && clib_net_to_host_u16 (ip61->payload_length) > 20)) - { - d1 = - ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip41-> - src_address.as_u32, &map_domain_index1, - &error1); - } - else if (ip61->protocol == IP_PROTOCOL_ICMP6 && - clib_net_to_host_u16 (ip61->payload_length) > - sizeof (icmp46_header_t)) - { - icmp46_header_t *icmp = (void *) (ip61 + 1); - next1 = (icmp->type == ICMP6_echo_request - || icmp->type == - ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL : - IP6_MAP_NEXT_IP6_ICMP_RELAY; - } - else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) - { - next1 = IP6_MAP_NEXT_IP6_REASS; - } - else - { - error1 = MAP_ERROR_BAD_PROTOCOL; - } - - if (d0) - { - /* MAP inbound security check */ - ip6_map_security_check (d0, ip40, ip60, &next0, &error0); - - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE && - next0 == IP6_MAP_NEXT_IP4_LOOKUP)) - { - if (PREDICT_FALSE - (d0->mtu - && (clib_host_to_net_u16 (ip40->length) > d0->mtu))) - { - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.flags = 0; - vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; - vnet_buffer (p0)->ip_frag.mtu = d0->mtu; - next0 = IP6_MAP_NEXT_IP4_FRAGMENT; - } - else - { - next0 = - ip6_map_ip4_lookup_bypass (p0, - ip40) ? - IP6_MAP_NEXT_IP4_REWRITE : next0; - } - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip40->length)); - } - } - if (d1) - { - /* MAP inbound security check */ - ip6_map_security_check (d1, ip41, ip61, &next1, &error1); - - if (PREDICT_TRUE (error1 == MAP_ERROR_NONE && - next1 == IP6_MAP_NEXT_IP4_LOOKUP)) - { - if (PREDICT_FALSE - (d1->mtu - && (clib_host_to_net_u16 (ip41->length) > d1->mtu))) - { - vnet_buffer (p1)->ip_frag.header_offset = 0; - vnet_buffer (p1)->ip_frag.flags = 0; - vnet_buffer (p1)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; - vnet_buffer (p1)->ip_frag.mtu = d1->mtu; - next1 = IP6_MAP_NEXT_IP4_FRAGMENT; - } - else - { - next1 = - ip6_map_ip4_lookup_bypass (p1, - ip41) ? - IP6_MAP_NEXT_IP4_REWRITE : next1; - } - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - thread_index, - map_domain_index1, 1, - clib_net_to_host_u16 - (ip41->length)); - } - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = port0; - } - - if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED)) - { - map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr)); - tr->map_domain_index = map_domain_index1; - tr->port = port1; - } - - if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled) - { - /* Set ICMP parameters */ - vlib_buffer_advance (p0, -sizeof (ip6_header_t)); - icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable, - ICMP6_destination_unreachable_source_address_failed_policy, - 0); - next0 = IP6_MAP_NEXT_ICMP; - } - else - { - next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP; - } - - if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled) - { - /* Set ICMP parameters */ - vlib_buffer_advance (p1, -sizeof (ip6_header_t)); - icmp6_error_set_vnet_buffer (p1, ICMP6_destination_unreachable, - ICMP6_destination_unreachable_source_address_failed_policy, - 0); - next1 = IP6_MAP_NEXT_ICMP; - } - else - { - next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP; - } - - /* Reset packet */ - if (next0 == IP6_MAP_NEXT_IP6_LOCAL) - vlib_buffer_advance (p0, -sizeof (ip6_header_t)); - if (next1 == IP6_MAP_NEXT_IP6_LOCAL) - vlib_buffer_advance (p1, -sizeof (ip6_header_t)); - - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, next0, - next1); - } - - /* Single loop */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - u8 error0 = MAP_ERROR_NONE; - map_domain_t *d0 = 0; - ip4_header_t *ip40; - ip6_header_t *ip60; - i32 port0 = 0; - u32 map_domain_index0 = ~0; - u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - ip60 = vlib_buffer_get_current (p0); - vlib_buffer_advance (p0, sizeof (ip6_header_t)); - ip40 = vlib_buffer_get_current (p0); - - /* - * Encapsulated IPv4 packet - * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled - * - Lookup/Rewrite or Fragment node in case of packet > MTU - * Fragmented IPv6 packet - * ICMP IPv6 packet - * - Error -> Pass to ICMPv6/ICMPv4 relay - * - Info -> Pass to IPv6 local - * Anything else -> drop - */ - if (PREDICT_TRUE - (ip60->protocol == IP_PROTOCOL_IP_IN_IP - && clib_net_to_host_u16 (ip60->payload_length) > 20)) - { - d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip40-> - src_address.as_u32, &map_domain_index0, - &error0); - } - else if (ip60->protocol == IP_PROTOCOL_ICMP6 && - clib_net_to_host_u16 (ip60->payload_length) > - sizeof (icmp46_header_t)) - { - icmp46_header_t *icmp = (void *) (ip60 + 1); - next0 = (icmp->type == ICMP6_echo_request - || icmp->type == - ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL : - IP6_MAP_NEXT_IP6_ICMP_RELAY; - } - else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION && - (((ip6_frag_hdr_t *) (ip60 + 1))->next_hdr == - IP_PROTOCOL_IP_IN_IP)) - { - next0 = IP6_MAP_NEXT_IP6_REASS; - } - else - { - error0 = MAP_ERROR_BAD_PROTOCOL; - } - - if (d0) - { - /* MAP inbound security check */ - ip6_map_security_check (d0, ip40, ip60, &next0, &error0); - - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE && - next0 == IP6_MAP_NEXT_IP4_LOOKUP)) - { - if (PREDICT_FALSE - (d0->mtu - && (clib_host_to_net_u16 (ip40->length) > d0->mtu))) - { - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.flags = 0; - vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; - vnet_buffer (p0)->ip_frag.mtu = d0->mtu; - next0 = IP6_MAP_NEXT_IP4_FRAGMENT; - } - else - { - next0 = - ip6_map_ip4_lookup_bypass (p0, - ip40) ? - IP6_MAP_NEXT_IP4_REWRITE : next0; - } - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip40->length)); - } - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = (u16) port0; - } - - if (mm->icmp6_enabled && - (error0 == MAP_ERROR_DECAP_SEC_CHECK - || error0 == MAP_ERROR_NO_DOMAIN)) - { - /* Set ICMP parameters */ - vlib_buffer_advance (p0, -sizeof (ip6_header_t)); - icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable, - ICMP6_destination_unreachable_source_address_failed_policy, - 0); - next0 = IP6_MAP_NEXT_ICMP; - } - else - { - next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP; - } - - /* Reset packet */ - if (next0 == IP6_MAP_NEXT_IP6_LOCAL) - vlib_buffer_advance (p0, -sizeof (ip6_header_t)); - - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - return frame->n_vectors; -} - - -static_always_inline void -ip6_map_ip6_reass_prepare (vlib_main_t * vm, vlib_node_runtime_t * node, - map_ip6_reass_t * r, u32 ** fragments_ready, - u32 ** fragments_to_drop) -{ - ip4_header_t *ip40; - ip6_header_t *ip60; - ip6_frag_hdr_t *frag0; - vlib_buffer_t *p0; - - if (!r->ip4_header.ip_version_and_header_length) - return; - - //The IP header is here, we need to check for packets - //that can be forwarded - int i; - for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - { - if (r->fragments[i].pi == ~0 || - ((!r->fragments[i].next_data_len) - && (r->fragments[i].next_data_offset != (0xffff)))) - continue; - - p0 = vlib_get_buffer (vm, r->fragments[i].pi); - ip60 = vlib_buffer_get_current (p0); - frag0 = (ip6_frag_hdr_t *) (ip60 + 1); - ip40 = (ip4_header_t *) (frag0 + 1); - - if (ip6_frag_hdr_offset (frag0)) - { - //Not first fragment, add the IPv4 header - clib_memcpy (ip40, &r->ip4_header, 20); - } - -#ifdef MAP_IP6_REASS_COUNT_BYTES - r->forwarded += - clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0); -#endif - - if (ip6_frag_hdr_more (frag0)) - { - //Not last fragment, we copy end of next - clib_memcpy (u8_ptr_add (ip60, p0->current_length), - r->fragments[i].next_data, 20); - p0->current_length += 20; - ip60->payload_length = u16_net_add (ip60->payload_length, 20); - } - - if (!ip4_is_fragment (ip40)) - { - ip40->fragment_id = frag_id_6to4 (frag0->identification); - ip40->flags_and_fragment_offset = - clib_host_to_net_u16 (ip6_frag_hdr_offset (frag0)); - } - else - { - ip40->flags_and_fragment_offset = - clib_host_to_net_u16 (ip4_get_fragment_offset (ip40) + - ip6_frag_hdr_offset (frag0)); - } - - if (ip6_frag_hdr_more (frag0)) - ip40->flags_and_fragment_offset |= - clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); - - ip40->length = - clib_host_to_net_u16 (p0->current_length - sizeof (*ip60) - - sizeof (*frag0)); - ip40->checksum = ip4_header_checksum (ip40); - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_ip6_map_ip6_reass_trace_t *tr = - vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->offset = ip4_get_fragment_offset (ip40); - tr->frag_len = clib_net_to_host_u16 (ip40->length) - sizeof (*ip40); - tr->out = 1; - } - - vec_add1 (*fragments_ready, r->fragments[i].pi); - r->fragments[i].pi = ~0; - r->fragments[i].next_data_len = 0; - r->fragments[i].next_data_offset = 0; - map_main.ip6_reass_buffered_counter--; - - //TODO: Best solution would be that ip6_map handles extension headers - // and ignores atomic fragment. But in the meantime, let's just copy the header. - - u8 protocol = frag0->next_hdr; - memmove (u8_ptr_add (ip40, -sizeof (*ip60)), ip60, sizeof (*ip60)); - ((ip6_header_t *) u8_ptr_add (ip40, -sizeof (*ip60)))->protocol = - protocol; - vlib_buffer_advance (p0, sizeof (*frag0)); - } -} - -void -map_ip6_drop_pi (u32 pi) -{ - vlib_main_t *vm = vlib_get_main (); - vlib_node_runtime_t *n = - vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index); - vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi); -} - -void -map_ip4_drop_pi (u32 pi) -{ - vlib_main_t *vm = vlib_get_main (); - vlib_node_runtime_t *n = - vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index); - vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi); -} - -/* - * ip6_reass - * TODO: We should count the number of successfully - * transmitted fragment bytes and compare that to the last fragment - * offset such that we can free the reassembly structure when all fragments - * have been forwarded. - */ -static uword -ip6_map_ip6_reass (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index); - u32 *fragments_to_drop = NULL; - u32 *fragments_ready = NULL; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Single loop */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - u8 error0 = MAP_ERROR_NONE; - ip6_header_t *ip60; - ip6_frag_hdr_t *frag0; - u16 offset; - u16 next_offset; - u16 frag_len; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - ip60 = vlib_buffer_get_current (p0); - frag0 = (ip6_frag_hdr_t *) (ip60 + 1); - offset = - clib_host_to_net_u16 (frag0->fragment_offset_and_more) & (~7); - frag_len = - clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0); - next_offset = - ip6_frag_hdr_more (frag0) ? (offset + frag_len) : (0xffff); - - //FIXME: Support other extension headers, maybe - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_ip6_map_ip6_reass_trace_t *tr = - vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->offset = offset; - tr->frag_len = frag_len; - tr->out = 0; - } - - map_ip6_reass_lock (); - map_ip6_reass_t *r = - map_ip6_reass_get (&ip60->src_address, &ip60->dst_address, - frag0->identification, frag0->next_hdr, - &fragments_to_drop); - //FIXME: Use better error codes - if (PREDICT_FALSE (!r)) - { - // Could not create a caching entry - error0 = MAP_ERROR_FRAGMENT_MEMORY; - } - else if (PREDICT_FALSE ((frag_len <= 20 && - (ip6_frag_hdr_more (frag0) || (!offset))))) - { - //Very small fragment are restricted to the last one and - //can't be the first one - error0 = MAP_ERROR_FRAGMENT_MALFORMED; - } - else - if (map_ip6_reass_add_fragment - (r, pi0, offset, next_offset, (u8 *) (frag0 + 1), frag_len)) - { - map_ip6_reass_free (r, &fragments_to_drop); - error0 = MAP_ERROR_FRAGMENT_MEMORY; - } - else - { -#ifdef MAP_IP6_REASS_COUNT_BYTES - if (!ip6_frag_hdr_more (frag0)) - r->expected_total = offset + frag_len; -#endif - ip6_map_ip6_reass_prepare (vm, node, r, &fragments_ready, - &fragments_to_drop); -#ifdef MAP_IP6_REASS_COUNT_BYTES - if (r->forwarded >= r->expected_total) - map_ip6_reass_free (r, &fragments_to_drop); -#endif - } - map_ip6_reass_unlock (); - - if (error0 == MAP_ERROR_NONE) - { - if (frag_len > 20) - { - //Dequeue the packet - n_left_to_next++; - to_next--; - } - else - { - //All data from that packet was copied no need to keep it, but this is not an error - p0->error = error_node->errors[MAP_ERROR_NONE]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - pi0, - IP6_MAP_IP6_REASS_NEXT_DROP); - } - } - else - { - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, - IP6_MAP_IP6_REASS_NEXT_DROP); - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - map_send_all_to_node (vm, fragments_ready, node, - &error_node->errors[MAP_ERROR_NONE], - IP6_MAP_IP6_REASS_NEXT_IP6_MAP); - map_send_all_to_node (vm, fragments_to_drop, node, - &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], - IP6_MAP_IP6_REASS_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_ready); - return frame->n_vectors; -} - -/* - * ip6_ip4_virt_reass - */ -static uword -ip6_map_ip4_reass (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index); - map_main_t *mm = &map_main; - vlib_combined_counter_main_t *cm = mm->domain_counters; - u32 thread_index = vlib_get_thread_index (); - u32 *fragments_to_drop = NULL; - u32 *fragments_to_loopback = NULL; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Single loop */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - u8 error0 = MAP_ERROR_NONE; - map_domain_t *d0; - ip4_header_t *ip40; - ip6_header_t *ip60; - i32 port0 = 0; - u32 map_domain_index0 = ~0; - u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP; - u8 cached = 0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - ip40 = vlib_buffer_get_current (p0); - ip60 = ((ip6_header_t *) ip40) - 1; - - d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip40->src_address.as_u32, - &map_domain_index0, &error0); - - map_ip4_reass_lock (); - //This node only deals with fragmented ip4 - map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32, - ip40->dst_address.as_u32, - ip40->fragment_id, - ip40->protocol, - &fragments_to_drop); - if (PREDICT_FALSE (!r)) - { - // Could not create a caching entry - error0 = MAP_ERROR_FRAGMENT_MEMORY; - } - else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40))) - { - // This is a fragment - if (r->port >= 0) - { - // We know the port already - port0 = r->port; - } - else if (map_ip4_reass_add_fragment (r, pi0)) - { - // Not enough space for caching - error0 = MAP_ERROR_FRAGMENT_MEMORY; - map_ip4_reass_free (r, &fragments_to_drop); - } - else - { - cached = 1; - } - } - else if ((port0 = ip4_get_port (ip40, 1)) == 0) - { - // Could not find port from first fragment. Stop reassembling. - error0 = MAP_ERROR_BAD_PROTOCOL; - port0 = 0; - map_ip4_reass_free (r, &fragments_to_drop); - } - else - { - // Found port. Remember it and loopback saved fragments - r->port = port0; - map_ip4_reass_get_fragments (r, &fragments_to_loopback); - } - -#ifdef MAP_IP4_REASS_COUNT_BYTES - if (!cached && r) - { - r->forwarded += clib_host_to_net_u16 (ip40->length) - 20; - if (!ip4_get_fragment_more (ip40)) - r->expected_total = - ip4_get_fragment_offset (ip40) * 8 + - clib_host_to_net_u16 (ip40->length) - 20; - if (r->forwarded >= r->expected_total) - map_ip4_reass_free (r, &fragments_to_drop); - } -#endif - - map_ip4_reass_unlock (); - - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) - error0 = - ip6_map_sec_check (d0, port0, ip40, - ip60) ? MAP_ERROR_NONE : - MAP_ERROR_DECAP_SEC_CHECK; - - if (PREDICT_FALSE - (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu) - && error0 == MAP_ERROR_NONE && !cached)) - { - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.flags = 0; - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; - vnet_buffer (p0)->ip_frag.mtu = d0->mtu; - next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT; - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_ip6_map_ip4_reass_trace_t *tr = - vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = map_domain_index0; - tr->port = port0; - tr->cached = cached; - } - - if (cached) - { - //Dequeue the packet - n_left_to_next++; - to_next--; - } - else - { - if (error0 == MAP_ERROR_NONE) - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - thread_index, - map_domain_index0, 1, - clib_net_to_host_u16 - (ip40->length)); - next0 = - (error0 == - MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP; - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); - } - - //Loopback when we reach the end of the inpu vector - if (n_left_from == 0 && vec_len (fragments_to_loopback)) - { - from = vlib_frame_vector_args (frame); - u32 len = vec_len (fragments_to_loopback); - if (len <= VLIB_FRAME_SIZE) - { - clib_memcpy (from, fragments_to_loopback, - sizeof (u32) * len); - n_left_from = len; - vec_reset_length (fragments_to_loopback); - } - else - { - clib_memcpy (from, - fragments_to_loopback + (len - - VLIB_FRAME_SIZE), - sizeof (u32) * VLIB_FRAME_SIZE); - n_left_from = VLIB_FRAME_SIZE; - _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE; - } - } - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - map_send_all_to_node (vm, fragments_to_drop, node, - &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], - IP6_MAP_IP4_REASS_NEXT_DROP); - - vec_free (fragments_to_drop); - vec_free (fragments_to_loopback); - return frame->n_vectors; -} - -/* - * ip6_icmp_relay - */ -static uword -ip6_map_icmp_relay (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index); - map_main_t *mm = &map_main; - u32 thread_index = vlib_get_thread_index (); - u16 *fragment_ids, *fid; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - /* Get random fragment IDs for replies. */ - fid = fragment_ids = - clib_random_buffer_get_data (&vm->random_buffer, - n_left_from * sizeof (fragment_ids[0])); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Single loop */ - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - u8 error0 = MAP_ERROR_NONE; - ip6_header_t *ip60; - u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP; - u32 mtu; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - ip60 = vlib_buffer_get_current (p0); - u16 tlen = clib_net_to_host_u16 (ip60->payload_length); - - /* - * In: - * IPv6 header (40) - * ICMPv6 header (8) - * IPv6 header (40) - * Original IPv4 header / packet - * Out: - * New IPv4 header - * New ICMP header - * Original IPv4 header / packet - */ - - /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */ - if (tlen < 76) - { - error0 = MAP_ERROR_ICMP_RELAY; - goto error; - } - - icmp46_header_t *icmp60 = (icmp46_header_t *) (ip60 + 1); - ip6_header_t *inner_ip60 = (ip6_header_t *) (icmp60 + 2); - - if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP) - { - error0 = MAP_ERROR_ICMP_RELAY; - goto error; - } - - ip4_header_t *inner_ip40 = (ip4_header_t *) (inner_ip60 + 1); - vlib_buffer_advance (p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */ - ip4_header_t *new_ip40 = vlib_buffer_get_current (p0); - icmp46_header_t *new_icmp40 = (icmp46_header_t *) (new_ip40 + 1); - - /* - * Relay according to RFC2473, section 8.3 - */ - switch (icmp60->type) - { - case ICMP6_destination_unreachable: - case ICMP6_time_exceeded: - case ICMP6_parameter_problem: - /* Type 3 - destination unreachable, Code 1 - host unreachable */ - new_icmp40->type = ICMP4_destination_unreachable; - new_icmp40->code = - ICMP4_destination_unreachable_destination_unreachable_host; - break; - - case ICMP6_packet_too_big: - /* Type 3 - destination unreachable, Code 4 - packet too big */ - /* Potential TODO: Adjust domain tunnel MTU based on the value received here */ - mtu = clib_net_to_host_u32 (*((u32 *) (icmp60 + 1))); - - /* Check DF flag */ - if (! - (inner_ip40->flags_and_fragment_offset & - clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))) - { - error0 = MAP_ERROR_ICMP_RELAY; - goto error; - } - - new_icmp40->type = ICMP4_destination_unreachable; - new_icmp40->code = - ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set; - *((u32 *) (new_icmp40 + 1)) = - clib_host_to_net_u32 (mtu < 1280 ? 1280 : mtu); - break; - - default: - error0 = MAP_ERROR_ICMP_RELAY; - break; - } - - /* - * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812) - */ - new_ip40->ip_version_and_header_length = 0x45; - new_ip40->tos = 0; - u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20; - new_ip40->length = clib_host_to_net_u16 (nlen); - new_ip40->fragment_id = fid[0]; - fid++; - new_ip40->ttl = 64; - new_ip40->protocol = IP_PROTOCOL_ICMP; - new_ip40->src_address = mm->icmp4_src_address; - new_ip40->dst_address = inner_ip40->src_address; - new_ip40->checksum = ip4_header_checksum (new_ip40); - - new_icmp40->checksum = 0; - ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20); - new_icmp40->checksum = ~ip_csum_fold (sum); - - vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0, - 1); - - error: - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->map_domain_index = 0; - tr->port = 0; - } - - next0 = - (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP; - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - return frame->n_vectors; - -} - -static char *map_error_strings[] = { -#define _(sym,string) string, - foreach_map_error -#undef _ -}; - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip6_map_node) = { - .function = ip6_map, - .name = "ip6-map", - .vector_size = sizeof(u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_error_strings, - - .n_next_nodes = IP6_MAP_N_NEXT, - .next_nodes = { - [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup", -#ifdef MAP_SKIP_IP6_LOOKUP - [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance", -#endif - [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass", - [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass", - [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", - [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay", - [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local", - [IP6_MAP_NEXT_DROP] = "error-drop", - [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = { - .function = ip6_map_ip6_reass, - .name = "ip6-map-ip6-reass", - .vector_size = sizeof(u32), - .format_trace = format_ip6_map_ip6_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = MAP_N_ERROR, - .error_strings = map_error_strings, - .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT, - .next_nodes = { - [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map", - [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = { - .function = ip6_map_ip4_reass, - .name = "ip6-map-ip4-reass", - .vector_size = sizeof(u32), - .format_trace = format_ip6_map_ip4_reass_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = MAP_N_ERROR, - .error_strings = map_error_strings, - .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT, - .next_nodes = { - [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", - [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = { - .function = ip6_map_icmp_relay, - .name = "ip6-map-icmp-relay", - .vector_size = sizeof(u32), - .format_trace = format_map_trace, //FIXME - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = MAP_N_ERROR, - .error_strings = map_error_strings, - .n_next_nodes = IP6_ICMP_RELAY_N_NEXT, - .next_nodes = { - [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/map/ip6_map_t.c b/src/vnet/map/ip6_map_t.c deleted file mode 100644 index 05bac1d429d..00000000000 --- a/src/vnet/map/ip6_map_t.c +++ /dev/null @@ -1,1186 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "map.h" - -#include "../ip/ip_frag.h" -#include -#include - -#define IP6_MAP_T_DUAL_LOOP - -typedef enum -{ - IP6_MAPT_NEXT_MAPT_TCP_UDP, - IP6_MAPT_NEXT_MAPT_ICMP, - IP6_MAPT_NEXT_MAPT_FRAGMENTED, - IP6_MAPT_NEXT_DROP, - IP6_MAPT_N_NEXT -} ip6_mapt_next_t; - -typedef enum -{ - IP6_MAPT_ICMP_NEXT_IP4_LOOKUP, - IP6_MAPT_ICMP_NEXT_IP4_FRAG, - IP6_MAPT_ICMP_NEXT_DROP, - IP6_MAPT_ICMP_N_NEXT -} ip6_mapt_icmp_next_t; - -typedef enum -{ - IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP, - IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG, - IP6_MAPT_TCP_UDP_NEXT_DROP, - IP6_MAPT_TCP_UDP_N_NEXT -} ip6_mapt_tcp_udp_next_t; - -typedef enum -{ - IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP, - IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG, - IP6_MAPT_FRAGMENTED_NEXT_DROP, - IP6_MAPT_FRAGMENTED_N_NEXT -} ip6_mapt_fragmented_next_t; - -static_always_inline int -ip6_map_fragment_cache (ip6_header_t * ip6, ip6_frag_hdr_t * frag, - map_domain_t * d, u16 port) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address, - d->flags), - ip6_map_t_embedded_address (d, - &ip6-> - dst_address), - frag_id_6to4 (frag->identification), - (ip6->protocol == - IP_PROTOCOL_ICMP6) ? - IP_PROTOCOL_ICMP : ip6->protocol, - &ignore); - if (r) - r->port = port; - - map_ip4_reass_unlock (); - return !r; -} - -/* Returns the associated port or -1 */ -static_always_inline i32 -ip6_map_fragment_get (ip6_header_t * ip6, ip6_frag_hdr_t * frag, - map_domain_t * d) -{ - u32 *ignore = NULL; - map_ip4_reass_lock (); - map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address, - d->flags), - ip6_map_t_embedded_address (d, - &ip6-> - dst_address), - frag_id_6to4 (frag->identification), - (ip6->protocol == - IP_PROTOCOL_ICMP6) ? - IP_PROTOCOL_ICMP : ip6->protocol, - &ignore); - i32 ret = r ? r->port : -1; - map_ip4_reass_unlock (); - return ret; -} - -typedef struct -{ - map_domain_t *d; - u16 id; -} icmp6_to_icmp_ctx_t; - -static int -ip6_to_ip4_set_icmp_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) -{ - icmp6_to_icmp_ctx_t *ctx = arg; - map_main_t *mm = &map_main; - - if (mm->is_ce) - { - u32 ip4_dadr; - - //Security check - //Note that this prevents an intermediate IPv6 router from answering the request - ip4_dadr = map_get_ip4 (&ip6->dst_address, ctx->d->flags); - if (ip6->dst_address.as_u64[0] != - map_get_pfx_net (ctx->d, ip4_dadr, ctx->id) - || ip6->dst_address.as_u64[1] != map_get_sfx_net (ctx->d, ip4_dadr, - ctx->id)) - return -1; - - ip4->src_address.as_u32 = - ip6_map_t_embedded_address (ctx->d, &ip6->src_address); - ip4->dst_address.as_u32 = ip4_dadr; - } - else - { - u32 ip4_sadr; - - //Security check - //Note that this prevents an intermediate IPv6 router from answering the request - ip4_sadr = map_get_ip4 (&ip6->src_address, ctx->d->flags); - if (ip6->src_address.as_u64[0] != - map_get_pfx_net (ctx->d, ip4_sadr, ctx->id) - || ip6->src_address.as_u64[1] != map_get_sfx_net (ctx->d, ip4_sadr, - ctx->id)) - return -1; - - ip4->dst_address.as_u32 = - ip6_map_t_embedded_address (ctx->d, &ip6->dst_address); - ip4->src_address.as_u32 = ip4_sadr; - } - - return 0; -} - -static int -ip6_to_ip4_set_inner_icmp_cb (ip6_header_t * ip6, ip4_header_t * ip4, - void *arg) -{ - icmp6_to_icmp_ctx_t *ctx = arg; - map_main_t *mm = &map_main; - - if (mm->is_ce) - { - u32 inner_ip4_sadr; - - //Security check of inner packet - inner_ip4_sadr = map_get_ip4 (&ip6->src_address, ctx->d->flags); - if (ip6->src_address.as_u64[0] != - map_get_pfx_net (ctx->d, inner_ip4_sadr, ctx->id) - || ip6->src_address.as_u64[1] != map_get_sfx_net (ctx->d, - inner_ip4_sadr, - ctx->id)) - return -1; - - ip4->src_address.as_u32 = inner_ip4_sadr; - ip4->dst_address.as_u32 = - ip6_map_t_embedded_address (ctx->d, &ip6->dst_address); - } - else - { - u32 inner_ip4_dadr; - - //Security check of inner packet - inner_ip4_dadr = map_get_ip4 (&ip6->dst_address, ctx->d->flags); - if (ip6->dst_address.as_u64[0] != - map_get_pfx_net (ctx->d, inner_ip4_dadr, ctx->id) - || ip6->dst_address.as_u64[1] != map_get_sfx_net (ctx->d, - inner_ip4_dadr, - ctx->id)) - return -1; - - ip4->dst_address.as_u32 = inner_ip4_dadr; - ip4->src_address.as_u32 = - ip6_map_t_embedded_address (ctx->d, &ip6->src_address); - } - - return 0; -} - -static uword -ip6_map_t_icmp (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_map_t_icmp_node.index); - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 thread_index = vlib_get_thread_index (); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - u8 error0; - ip6_mapt_icmp_next_t next0; - map_domain_t *d0; - u16 len0; - icmp6_to_icmp_ctx_t ctx0; - ip6_header_t *ip60; - icmp46_header_t *icmp0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - error0 = MAP_ERROR_NONE; - next0 = IP6_MAPT_ICMP_NEXT_IP4_LOOKUP; - - p0 = vlib_get_buffer (vm, pi0); - ip60 = vlib_buffer_get_current (p0); - len0 = clib_net_to_host_u16 (ip60->payload_length); - icmp0 = (icmp46_header_t *) (ip60 + 1); - d0 = - pool_elt_at_index (map_main.domains, - vnet_buffer (p0)->map_t.map_domain_index); - - ctx0.id = - ip6_get_port (ip60, icmp0->type == ICMP6_echo_request, - p0->current_length); - ctx0.d = d0; - if (ctx0.id == 0) - { - // In case of 1:1 mapping, we don't care about the port - if (!(d0->ea_bits_len == 0 && d0->rules)) - { - error0 = MAP_ERROR_ICMP; - goto err0; - } - } - - if (icmp6_to_icmp - (p0, ip6_to_ip4_set_icmp_cb, &ctx0, - ip6_to_ip4_set_inner_icmp_cb, &ctx0)) - { - error0 = MAP_ERROR_ICMP; - goto err0; - } - - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; - next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG; - } - err0: - if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - thread_index, - vnet_buffer (p0)-> - map_t.map_domain_index, 1, - len0); - } - else - { - next0 = IP6_MAPT_ICMP_NEXT_DROP; - } - - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, pi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return frame->n_vectors; -} - -static int -ip6_to_ip4_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *ctx) -{ - vlib_buffer_t *p = ctx; - - ip4->dst_address.as_u32 = vnet_buffer (p)->map_t.v6.daddr; - ip4->src_address.as_u32 = vnet_buffer (p)->map_t.v6.saddr; - - return 0; -} - -static uword -ip6_map_t_fragmented (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_map_t_fragmented_node.index); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - -#ifdef IP6_MAP_T_DUAL_LOOP - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - u32 next0, next1; - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; - next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - - if (ip6_to_ip4_fragmented (p0, ip6_to_ip4_set_cb, p0)) - { - p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; - next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP; - } - else - { - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; - next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; - } - } - - if (ip6_to_ip4_fragmented (p1, ip6_to_ip4_set_cb, p1)) - { - p1->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; - next1 = IP6_MAPT_FRAGMENTED_NEXT_DROP; - } - else - { - if (vnet_buffer (p1)->map_t.mtu < p1->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; - vnet_buffer (p1)->ip_frag.header_offset = 0; - vnet_buffer (p1)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; - next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; - } - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, pi0, pi1, - next0, next1); - } -#endif - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - u32 next0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; - p0 = vlib_get_buffer (vm, pi0); - - if (ip6_to_ip4_fragmented (p0, ip6_to_ip4_set_cb, p0)) - { - p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; - next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP; - } - else - { - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; - next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; - } - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, pi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return frame->n_vectors; -} - -static uword -ip6_map_t_tcp_udp (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_map_t_tcp_udp_node.index); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - -#ifdef IP6_MAP_T_DUAL_LOOP - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - ip6_mapt_tcp_udp_next_t next0, next1; - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; - next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - - if (ip6_to_ip4_tcp_udp (p0, ip6_to_ip4_set_cb, p0, 1)) - { - p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; - next0 = IP6_MAPT_TCP_UDP_NEXT_DROP; - } - else - { - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; - next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; - } - } - - if (ip6_to_ip4_tcp_udp (p1, ip6_to_ip4_set_cb, p1, 1)) - { - p1->error = error_node->errors[MAP_ERROR_UNKNOWN]; - next1 = IP6_MAPT_TCP_UDP_NEXT_DROP; - } - else - { - if (vnet_buffer (p1)->map_t.mtu < p1->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; - vnet_buffer (p1)->ip_frag.header_offset = 0; - vnet_buffer (p1)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; - next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; - } - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, next0, - next1); - } -#endif - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - ip6_mapt_tcp_udp_next_t next0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; - - p0 = vlib_get_buffer (vm, pi0); - - if (ip6_to_ip4_tcp_udp (p0, ip6_to_ip4_set_cb, p0, 1)) - { - p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; - next0 = IP6_MAPT_TCP_UDP_NEXT_DROP; - } - else - { - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.next_index = - IP4_FRAG_NEXT_IP4_LOOKUP; - next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; - } - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, pi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return frame->n_vectors; -} - -static_always_inline void -ip6_map_t_classify (vlib_buffer_t * p0, ip6_header_t * ip60, - map_domain_t * d0, i32 * map_port0, - u8 * error0, ip6_mapt_next_t * next0, - u32 l4_len0, ip6_frag_hdr_t * frag0) -{ - map_main_t *mm = &map_main; - u32 port_offset; - - if (mm->is_ce) - port_offset = 2; - else - port_offset = 0; - - if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && - ip6_frag_hdr_offset (frag0))) - { - *next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; - if (d0->ea_bits_len == 0 && d0->rules) - { - *map_port0 = 0; - } - else - { - *map_port0 = ip6_map_fragment_get (ip60, frag0, d0); - *error0 = (*map_port0 != -1) ? *error0 : MAP_ERROR_FRAGMENT_DROPPED; - } - } - else - if (PREDICT_TRUE - (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) - { - *error0 = - l4_len0 < sizeof (tcp_header_t) ? MAP_ERROR_MALFORMED : *error0; - vnet_buffer (p0)->map_t.checksum_offset = - vnet_buffer (p0)->map_t.v6.l4_offset + 16; - *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; - *map_port0 = - (i32) * - ((u16 *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6.l4_offset + port_offset)); - } - else - if (PREDICT_TRUE - (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) - { - *error0 = - l4_len0 < sizeof (udp_header_t) ? MAP_ERROR_MALFORMED : *error0; - vnet_buffer (p0)->map_t.checksum_offset = - vnet_buffer (p0)->map_t.v6.l4_offset + 6; - *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; - *map_port0 = - (i32) * - ((u16 *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6.l4_offset + port_offset)); - } - else if (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6) - { - *error0 = - l4_len0 < sizeof (icmp46_header_t) ? MAP_ERROR_MALFORMED : *error0; - *next0 = IP6_MAPT_NEXT_MAPT_ICMP; - if (d0->ea_bits_len == 0 && d0->rules) - { - *map_port0 = 0; - } - else - if (((icmp46_header_t *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6.l4_offset))->code == - ICMP6_echo_reply - || ((icmp46_header_t *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6.l4_offset))->code == - ICMP6_echo_request) - { - *map_port0 = - (i32) * - ((u16 *) - u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset + 6)); - } - } - else - { - //TODO: In case of 1:1 mapping, it might be possible to do something with those packets. - *error0 = MAP_ERROR_BAD_PROTOCOL; - } -} - -static uword -ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip6_map_t_node.index); - map_main_t *mm = &map_main; - vlib_combined_counter_main_t *cm = map_main.domain_counters; - u32 thread_index = vlib_get_thread_index (); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - -#ifdef IP6_MAP_T_DUAL_LOOP - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 pi0, pi1; - vlib_buffer_t *p0, *p1; - ip6_header_t *ip60, *ip61; - u8 error0, error1; - ip6_mapt_next_t next0, next1; - u32 l4_len0, l4_len1; - i32 map_port0, map_port1; - map_domain_t *d0, *d1; - ip6_frag_hdr_t *frag0, *frag1; - next0 = next1 = 0; //Because compiler whines - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - error0 = MAP_ERROR_NONE; - error1 = MAP_ERROR_NONE; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - ip60 = vlib_buffer_get_current (p0); - ip61 = vlib_buffer_get_current (p1); - - if (mm->is_ce) - { - u32 daddr0, daddr1; - daddr0 = 0; /* TODO */ - daddr1 = 0; /* TODO */ - /* NOTE: ip6_map_get_domain currently doesn't utilize second argument */ - - daddr0 = map_get_ip4 (&ip60->dst_address, 0 /*TODO*/); - daddr1 = map_get_ip4 (&ip61->dst_address, 0 /*TODO*/); - d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & daddr0, - &vnet_buffer (p0)->map_t.map_domain_index, - &error0); - d1 = - ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & daddr1, - &vnet_buffer (p1)->map_t.map_domain_index, - &error1); - - daddr0 = map_get_ip4 (&ip60->dst_address, d0->flags); - daddr1 = map_get_ip4 (&ip61->dst_address, d1->flags); - - vnet_buffer (p0)->map_t.v6.daddr = daddr0; - vnet_buffer (p1)->map_t.v6.daddr = daddr1; - vnet_buffer (p0)->map_t.v6.saddr = - ip6_map_t_embedded_address (d0, &ip60->src_address); - vnet_buffer (p1)->map_t.v6.saddr = - ip6_map_t_embedded_address (d1, &ip61->src_address); - } - else - { - u32 saddr0, saddr1; - saddr0 = 0; /* TODO */ - saddr1 = 0; /* TODO */ - /* NOTE: ip6_map_get_domain currently doesn't utilize second argument */ - - saddr0 = map_get_ip4 (&ip60->src_address, 0 /*TODO*/); - saddr1 = map_get_ip4 (&ip61->src_address, 0 /*TODO*/); - d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & saddr0, - &vnet_buffer (p0)->map_t.map_domain_index, - &error0); - d1 = - ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & saddr1, - &vnet_buffer (p1)->map_t.map_domain_index, - &error1); - - saddr0 = map_get_ip4 (&ip60->src_address, d0->flags); - saddr1 = map_get_ip4 (&ip61->src_address, d1->flags); - - vnet_buffer (p0)->map_t.v6.saddr = saddr0; - vnet_buffer (p1)->map_t.v6.saddr = saddr1; - vnet_buffer (p0)->map_t.v6.daddr = - ip6_map_t_embedded_address (d0, &ip60->dst_address); - vnet_buffer (p1)->map_t.v6.daddr = - ip6_map_t_embedded_address (d1, &ip61->dst_address); - } - - vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; - vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; - - if (PREDICT_FALSE (ip6_parse (ip60, p0->current_length, - &(vnet_buffer (p0)->map_t. - v6.l4_protocol), - &(vnet_buffer (p0)->map_t. - v6.l4_offset), - &(vnet_buffer (p0)->map_t. - v6.frag_offset)))) - { - error0 = MAP_ERROR_MALFORMED; - next0 = IP6_MAPT_NEXT_DROP; - } - - if (PREDICT_FALSE (ip6_parse (ip61, p1->current_length, - &(vnet_buffer (p1)->map_t. - v6.l4_protocol), - &(vnet_buffer (p1)->map_t. - v6.l4_offset), - &(vnet_buffer (p1)->map_t. - v6.frag_offset)))) - { - error1 = MAP_ERROR_MALFORMED; - next1 = IP6_MAPT_NEXT_DROP; - } - - map_port0 = map_port1 = -1; - l4_len0 = (u32) clib_net_to_host_u16 (ip60->payload_length) + - sizeof (*ip60) - vnet_buffer (p0)->map_t.v6.l4_offset; - l4_len1 = (u32) clib_net_to_host_u16 (ip61->payload_length) + - sizeof (*ip60) - vnet_buffer (p1)->map_t.v6.l4_offset; - frag0 = - (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.frag_offset); - frag1 = - (ip6_frag_hdr_t *) u8_ptr_add (ip61, - vnet_buffer (p1)->map_t. - v6.frag_offset); - - ip6_map_t_classify (p0, ip60, d0, &map_port0, &error0, &next0, - l4_len0, frag0); - ip6_map_t_classify (p1, ip61, d1, &map_port1, &error1, &next1, - l4_len1, frag1); - - if (PREDICT_FALSE - ((map_port0 != -1) - && (ip60->src_address.as_u64[0] != - map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr, - map_port0) - || ip60->src_address.as_u64[1] != map_get_sfx_net (d0, - vnet_buffer - (p0)->map_t.v6.saddr, - map_port0)))) - { - error0 = MAP_ERROR_SEC_CHECK; - } - - if (PREDICT_FALSE - ((map_port1 != -1) - && (ip61->src_address.as_u64[0] != - map_get_pfx_net (d1, vnet_buffer (p1)->map_t.v6.saddr, - map_port1) - || ip61->src_address.as_u64[1] != map_get_sfx_net (d1, - vnet_buffer - (p1)->map_t.v6.saddr, - map_port1)))) - { - error1 = MAP_ERROR_SEC_CHECK; - } - - if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && - !ip6_frag_hdr_offset ((ip6_frag_hdr_t *) - u8_ptr_add (ip60, - vnet_buffer - (p0)->map_t. - v6.frag_offset))) - && (map_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) - && (error0 == MAP_ERROR_NONE)) - { - ip6_map_fragment_cache (ip60, - (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer - (p0)->map_t. - v6.frag_offset), - d0, map_port0); - } - - if (PREDICT_FALSE (vnet_buffer (p1)->map_t.v6.frag_offset && - !ip6_frag_hdr_offset ((ip6_frag_hdr_t *) - u8_ptr_add (ip61, - vnet_buffer - (p1)->map_t. - v6.frag_offset))) - && (map_port1 != -1) && (d1->ea_bits_len != 0 || !d1->rules) - && (error1 == MAP_ERROR_NONE)) - { - ip6_map_fragment_cache (ip61, - (ip6_frag_hdr_t *) u8_ptr_add (ip61, - vnet_buffer - (p1)->map_t. - v6.frag_offset), - d1, map_port1); - } - - if (PREDICT_TRUE - (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - thread_index, - vnet_buffer (p0)-> - map_t.map_domain_index, 1, - clib_net_to_host_u16 - (ip60->payload_length)); - } - - if (PREDICT_TRUE - (error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - thread_index, - vnet_buffer (p1)-> - map_t.map_domain_index, 1, - clib_net_to_host_u16 - (ip61->payload_length)); - } - - next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; - next1 = (error1 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next1; - p0->error = error_node->errors[error0]; - p1->error = error_node->errors[error1]; - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, next0, - next1); - } -#endif - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - ip6_header_t *ip60; - u8 error0; - u32 l4_len0; - i32 map_port0; - map_domain_t *d0; - ip6_frag_hdr_t *frag0; - u32 port_offset; - ip6_mapt_next_t next0 = 0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - error0 = MAP_ERROR_NONE; - - p0 = vlib_get_buffer (vm, pi0); - ip60 = vlib_buffer_get_current (p0); - - if (mm->is_ce) - { - u32 daddr; - //Save daddr in a different variable to not overwrite ip.adj_index - daddr = 0; /* TODO */ - /* NOTE: ip6_map_get_domain currently doesn't utilize second argument */ - - daddr = map_get_ip4 (&ip60->dst_address, 0 /*TODO*/); - d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & daddr, - &vnet_buffer (p0)->map_t.map_domain_index, - &error0); - - daddr = map_get_ip4 (&ip60->dst_address, d0->flags); - - //FIXME: What if d0 is null - vnet_buffer (p0)->map_t.v6.daddr = daddr; - vnet_buffer (p0)->map_t.v6.saddr = - ip6_map_t_embedded_address (d0, &ip60->src_address); - - port_offset = 2; - } - else - { - u32 saddr; - //Save saddr in a different variable to not overwrite ip.adj_index - saddr = 0; /* TODO */ - /* NOTE: ip6_map_get_domain currently doesn't utilize second argument */ - - saddr = map_get_ip4 (&ip60->src_address, 0 /*TODO*/); - d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & saddr, - &vnet_buffer (p0)->map_t.map_domain_index, - &error0); - - saddr = map_get_ip4 (&ip60->src_address, d0->flags); - - //FIXME: What if d0 is null - vnet_buffer (p0)->map_t.v6.saddr = saddr; - vnet_buffer (p0)->map_t.v6.daddr = - ip6_map_t_embedded_address (d0, &ip60->dst_address); - - port_offset = 0; - } - - vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; - - if (PREDICT_FALSE (ip6_parse (ip60, p0->current_length, - &(vnet_buffer (p0)->map_t. - v6.l4_protocol), - &(vnet_buffer (p0)->map_t. - v6.l4_offset), - &(vnet_buffer (p0)->map_t. - v6.frag_offset)))) - { - error0 = MAP_ERROR_MALFORMED; - next0 = IP6_MAPT_NEXT_DROP; - } - - map_port0 = -1; - l4_len0 = (u32) clib_net_to_host_u16 (ip60->payload_length) + - sizeof (*ip60) - vnet_buffer (p0)->map_t.v6.l4_offset; - frag0 = - (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.frag_offset); - - - if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && - ip6_frag_hdr_offset (frag0))) - { - map_port0 = ip6_map_fragment_get (ip60, frag0, d0); - error0 = (map_port0 != -1) ? error0 : MAP_ERROR_FRAGMENT_MEMORY; - next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; - } - else - if (PREDICT_TRUE - (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) - { - error0 = - l4_len0 < - sizeof (tcp_header_t) ? MAP_ERROR_MALFORMED : error0; - vnet_buffer (p0)->map_t.checksum_offset = - vnet_buffer (p0)->map_t.v6.l4_offset + 16; - next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; - map_port0 = - (i32) * - ((u16 *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6.l4_offset + - port_offset)); - } - else - if (PREDICT_TRUE - (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) - { - error0 = - l4_len0 < - sizeof (udp_header_t) ? MAP_ERROR_MALFORMED : error0; - vnet_buffer (p0)->map_t.checksum_offset = - vnet_buffer (p0)->map_t.v6.l4_offset + 6; - next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; - map_port0 = - (i32) * - ((u16 *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6.l4_offset + - port_offset)); - } - else if (vnet_buffer (p0)->map_t.v6.l4_protocol == - IP_PROTOCOL_ICMP6) - { - error0 = - l4_len0 < - sizeof (icmp46_header_t) ? MAP_ERROR_MALFORMED : error0; - next0 = IP6_MAPT_NEXT_MAPT_ICMP; - if (((icmp46_header_t *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6.l4_offset))->code == - ICMP6_echo_reply - || ((icmp46_header_t *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6. - l4_offset))->code == ICMP6_echo_request) - map_port0 = - (i32) * - ((u16 *) - u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6.l4_offset + 6)); - } - else - { - //TODO: In case of 1:1 mapping, it might be possible to do something with those packets. - error0 = MAP_ERROR_BAD_PROTOCOL; - } - - //Security check - if (PREDICT_FALSE - ((!mm->is_ce) && (map_port0 != -1) - && (ip60->src_address.as_u64[0] != - map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr, - map_port0) - || ip60->src_address.as_u64[1] != map_get_sfx_net (d0, - vnet_buffer - (p0)->map_t.v6.saddr, - map_port0)))) - { - //Security check when src_port0 is not zero (non-first fragment, UDP or TCP) - error0 = MAP_ERROR_SEC_CHECK; - } - - //Fragmented first packet needs to be cached for following packets - if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && - !ip6_frag_hdr_offset ((ip6_frag_hdr_t *) - u8_ptr_add (ip60, - vnet_buffer - (p0)->map_t. - v6.frag_offset))) - && (map_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) - && (error0 == MAP_ERROR_NONE)) - { - ip6_map_fragment_cache (ip60, - (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer - (p0)->map_t. - v6.frag_offset), - d0, map_port0); - } - - if (PREDICT_TRUE - (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) - { - vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, - thread_index, - vnet_buffer (p0)-> - map_t.map_domain_index, 1, - clib_net_to_host_u16 - (ip60->payload_length)); - } - - next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; - p0->error = error_node->errors[error0]; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, pi0, - next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return frame->n_vectors; -} - -static char *map_t_error_strings[] = { -#define _(sym,string) string, - foreach_map_error -#undef _ -}; - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = { - .function = ip6_map_t_fragmented, - .name = "ip6-map-t-fragmented", - .vector_size = sizeof (u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_t_error_strings, - - .n_next_nodes = IP6_MAPT_FRAGMENTED_N_NEXT, - .next_nodes = { - [IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, - [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = { - .function = ip6_map_t_icmp, - .name = "ip6-map-t-icmp", - .vector_size = sizeof (u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_t_error_strings, - - .n_next_nodes = IP6_MAPT_ICMP_N_NEXT, - .next_nodes = { - [IP6_MAPT_ICMP_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_MAPT_ICMP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, - [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = { - .function = ip6_map_t_tcp_udp, - .name = "ip6-map-t-tcp-udp", - .vector_size = sizeof (u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_t_error_strings, - - .n_next_nodes = IP6_MAPT_TCP_UDP_N_NEXT, - .next_nodes = { - [IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, - [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE(ip6_map_t_node) = { - .function = ip6_map_t, - .name = "ip6-map-t", - .vector_size = sizeof(u32), - .format_trace = format_map_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = MAP_N_ERROR, - .error_strings = map_t_error_strings, - - .n_next_nodes = IP6_MAPT_N_NEXT, - .next_nodes = { - [IP6_MAPT_NEXT_MAPT_TCP_UDP] = "ip6-map-t-tcp-udp", - [IP6_MAPT_NEXT_MAPT_ICMP] = "ip6-map-t-icmp", - [IP6_MAPT_NEXT_MAPT_FRAGMENTED] = "ip6-map-t-fragmented", - [IP6_MAPT_NEXT_DROP] = "error-drop", - }, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/map/map.api b/src/vnet/map/map.api deleted file mode 100644 index a066b688514..00000000000 --- a/src/vnet/map/map.api +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -option version = "1.1.0"; - -/** \brief Add MAP domains - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param ip6_prefix - Rule IPv6 prefix - @param ip4_prefix - Rule IPv4 prefix - @param ip6_src - MAP domain IPv6 BR address / Tunnel source - @param ip6_prefix_len - Rule IPv6 prefix length - @param ip4_prefix_len - Rule IPv4 prefix length - @param ea_bits_len - Embedded Address bits length - @param psid_offset - Port Set Identifider (PSID) offset - @param psid_length - PSID length - @param is_translation - MAP-E / MAP-T - @param is_rfc6052 - rfc6052 translation - @param mtu - MTU -*/ -define map_add_domain -{ - u32 client_index; - u32 context; - u8 ip6_prefix[16]; - u8 ip4_prefix[4]; - u8 ip6_src[16]; - u8 ip6_prefix_len; - u8 ip4_prefix_len; - u8 ip6_src_prefix_len; - u8 ea_bits_len; - u8 psid_offset; - u8 psid_length; - u8 is_translation; - u8 is_rfc6052; - u16 mtu; -}; - -/** \brief Reply for MAP domain add - @param context - returned sender context, to match reply w/ request - @param index - MAP domain index - @param retval - return code -*/ -define map_add_domain_reply -{ - u32 context; - u32 index; - i32 retval; -}; - -/** \brief Delete MAP domain - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param index - MAP Domain index -*/ -autoreply define map_del_domain -{ - u32 client_index; - u32 context; - u32 index; -}; - - -/** \brief Add or Delete MAP rule from a domain (Only used for shared IPv4 per subscriber) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param index - MAP Domain index - @param is_add - If 1 add rule, if 0 delete rule - @param ip6_dst - MAP CE IPv6 address - @param psid - Rule PSID -*/ -autoreply define map_add_del_rule -{ - u32 client_index; - u32 context; - u32 index; - u8 is_add; - u8 ip6_dst[16]; - u16 psid; -}; - - -/** \brief Get list of map domains - @param client_index - opaque cookie to identify the sender -*/ -define map_domain_dump -{ - u32 client_index; - u32 context; -}; - -define map_domain_details -{ - u32 context; - u32 domain_index; - u8 ip6_prefix[16]; - u8 ip4_prefix[4]; - u8 ip6_src[16]; - u8 ip6_prefix_len; - u8 ip4_prefix_len; - u8 ip6_src_len; - u8 ea_bits_len; - u8 psid_offset; - u8 psid_length; - u8 flags; - u16 mtu; - u8 is_translation; -}; - -define map_rule_dump -{ - u32 client_index; - u32 context; - u32 domain_index; -}; - -define map_rule_details -{ - u32 context; - u8 ip6_dst[16]; - u16 psid; -}; - -/** \brief Request for a single block of summary stats - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request -*/ -define map_summary_stats -{ - u32 client_index; - u32 context; -}; - -/** \brief Reply for map_summary_stats request - @param context - sender context, to match reply w/ request - @param retval - return code for request - @param total_bindings - - @param total_pkts - - @param total_ip4_fragments - - @param total_security_check - -*/ -define map_summary_stats_reply -{ - u32 context; - i32 retval; - u64 total_bindings; - u64 total_pkts[2]; - u64 total_bytes[2]; - u64 total_ip4_fragments; - u64 total_security_check[2]; -}; diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c deleted file mode 100644 index 702b5f80e52..00000000000 --- a/src/vnet/map/map.c +++ /dev/null @@ -1,2363 +0,0 @@ -/* - * map.c : MAP support - * - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -#include "map.h" - -map_main_t map_main; - -/* - * This code supports the following MAP modes: - * - * Algorithmic Shared IPv4 address (ea_bits_len > 0): - * ea_bits_len + ip4_prefix > 32 - * psid_length > 0, ip6_prefix < 64, ip4_prefix <= 32 - * Algorithmic Full IPv4 address (ea_bits_len > 0): - * ea_bits_len + ip4_prefix = 32 - * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32 - * Algorithmic IPv4 prefix (ea_bits_len > 0): - * ea_bits_len + ip4_prefix < 32 - * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32 - * - * Independent Shared IPv4 address (ea_bits_len = 0): - * ip4_prefix = 32 - * psid_length > 0 - * Rule IPv6 address = 128, Rule PSID Set - * Independent Full IPv4 address (ea_bits_len = 0): - * ip4_prefix = 32 - * psid_length = 0, ip6_prefix = 128 - * Independent IPv4 prefix (ea_bits_len = 0): - * ip4_prefix < 32 - * psid_length = 0, ip6_prefix = 128 - * - */ - -/* - * This code supports MAP-T: - * - * With DMR prefix length equal to 96. - * - */ - - - -int -map_create_domain (ip4_address_t * ip4_prefix, - u8 ip4_prefix_len, - ip6_address_t * ip6_prefix, - u8 ip6_prefix_len, - ip6_address_t * ip6_src, - u8 ip6_src_len, - u8 ea_bits_len, - u8 psid_offset, - u8 psid_length, u32 * map_domain_index, u16 mtu, u8 flags) -{ - u8 suffix_len, suffix_shift; - map_main_t *mm = &map_main; - dpo_id_t dpo_v4 = DPO_INVALID; - dpo_id_t dpo_v6 = DPO_INVALID; - map_domain_t *d; - - /* Sanity check on the src prefix length */ - if (flags & MAP_DOMAIN_TRANSLATION) - { - if (ip6_src_len != 96) - { - clib_warning ("MAP-T only supports ip6_src_len = 96 for now."); - return -1; - } - if ((flags & MAP_DOMAIN_RFC6052) && ip6_prefix_len != 96) - { - clib_warning ("RFC6052 translation only supports ip6_prefix_len = " - "96 for now"); - return -1; - } - } - else - { - if (ip6_src_len != 128) - { - clib_warning - ("MAP-E requires a BR address, not a prefix (ip6_src_len should " - "be 128)."); - return -1; - } - } - - /* How many, and which bits to grab from the IPv4 DA */ - if (ip4_prefix_len + ea_bits_len < 32) - { - if (!(flags & MAP_DOMAIN_TRANSLATION)) - flags |= MAP_DOMAIN_PREFIX; - suffix_shift = 32 - ip4_prefix_len - ea_bits_len; - suffix_len = ea_bits_len; - } - else - { - suffix_shift = 0; - suffix_len = 32 - ip4_prefix_len; - } - - /* EA bits must be within the first 64 bits */ - if (ea_bits_len > 0 && ((ip6_prefix_len + ea_bits_len) > 64 || - ip6_prefix_len + suffix_len + psid_length > 64)) - { - clib_warning - ("Embedded Address bits must be within the first 64 bits of " - "the IPv6 prefix"); - return -1; - } - - if (mm->is_ce && !(flags & MAP_DOMAIN_TRANSLATION)) - { - clib_warning ("MAP-E CE is not supported yet"); - return -1; - } - - /* Get domain index */ - pool_get_aligned (mm->domains, d, CLIB_CACHE_LINE_BYTES); - memset (d, 0, sizeof (*d)); - *map_domain_index = d - mm->domains; - - /* Init domain struct */ - d->ip4_prefix.as_u32 = ip4_prefix->as_u32; - d->ip4_prefix_len = ip4_prefix_len; - d->ip6_prefix = *ip6_prefix; - d->ip6_prefix_len = ip6_prefix_len; - d->ip6_src = *ip6_src; - d->ip6_src_len = ip6_src_len; - d->ea_bits_len = ea_bits_len; - d->psid_offset = psid_offset; - d->psid_length = psid_length; - d->mtu = mtu; - d->flags = flags; - d->suffix_shift = suffix_shift; - d->suffix_mask = (1 << suffix_len) - 1; - - d->psid_shift = 16 - psid_length - psid_offset; - d->psid_mask = (1 << d->psid_length) - 1; - d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length; - - /* MAP data-plane object */ - if (d->flags & MAP_DOMAIN_TRANSLATION) - map_t_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4); - else - map_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4); - - /* Create ip4 route */ - u8 ip4_pfx_len; - ip4_address_t ip4_pfx; - if (mm->is_ce) - { - ip4_pfx_len = 0; - ip4_pfx.as_u32 = 0; - } - else - { - ip4_pfx_len = d->ip4_prefix_len; - ip4_pfx = d->ip4_prefix; - } - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP4, - .fp_len = ip4_pfx_len, - .fp_addr = { - .ip4 = ip4_pfx, - } - , - }; - fib_table_entry_special_dpo_add (0, &pfx, - FIB_SOURCE_MAP, - FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4); - dpo_reset (&dpo_v4); - - /* - * construct a DPO to use the v6 domain - */ - if (d->flags & MAP_DOMAIN_TRANSLATION) - map_t_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); - else - map_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); - - /* - * Multiple MAP domains may share same source IPv6 TEP. Which is just dandy. - * We are not tracking the sharing. So a v4 lookup to find the correct - * domain post decap/trnaslate is always done - * - * Create ip6 route. This is a reference counted add. If the prefix - * already exists and is MAP sourced, it is now MAP source n+1 times - * and will need to be removed n+1 times. - */ - u8 ip6_pfx_len; - ip6_address_t ip6_pfx; - if (mm->is_ce) - { - ip6_pfx_len = d->ip6_prefix_len; - ip6_pfx = d->ip6_prefix; - } - else - { - ip6_pfx_len = d->ip6_src_len; - ip6_pfx = d->ip6_src; - } - fib_prefix_t pfx6 = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = ip6_pfx_len, - .fp_addr.ip6 = ip6_pfx, - }; - - fib_table_entry_special_dpo_add (0, &pfx6, - FIB_SOURCE_MAP, - FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6); - dpo_reset (&dpo_v6); - - /* Validate packet/byte counters */ - map_domain_counter_lock (mm); - int i; - for (i = 0; i < vec_len (mm->simple_domain_counters); i++) - { - vlib_validate_simple_counter (&mm->simple_domain_counters[i], - *map_domain_index); - vlib_zero_simple_counter (&mm->simple_domain_counters[i], - *map_domain_index); - } - for (i = 0; i < vec_len (mm->domain_counters); i++) - { - vlib_validate_combined_counter (&mm->domain_counters[i], - *map_domain_index); - vlib_zero_combined_counter (&mm->domain_counters[i], *map_domain_index); - } - map_domain_counter_unlock (mm); - - return 0; -} - -/* - * map_delete_domain - */ -int -map_delete_domain (u32 map_domain_index) -{ - map_main_t *mm = &map_main; - map_domain_t *d; - - if (pool_is_free_index (mm->domains, map_domain_index)) - { - clib_warning ("MAP domain delete: domain does not exist: %d", - map_domain_index); - return -1; - } - - d = pool_elt_at_index (mm->domains, map_domain_index); - - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP4, - .fp_len = d->ip4_prefix_len, - .fp_addr = { - .ip4 = d->ip4_prefix, - } - , - }; - fib_table_entry_special_remove (0, &pfx, FIB_SOURCE_MAP); - - fib_prefix_t pfx6 = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = d->ip6_src_len, - .fp_addr = { - .ip6 = d->ip6_src, - } - , - }; - fib_table_entry_special_remove (0, &pfx6, FIB_SOURCE_MAP); - - /* Deleting rules */ - if (d->rules) - clib_mem_free (d->rules); - - pool_put (mm->domains, d); - - return 0; -} - -int -map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, - u8 is_add) -{ - map_domain_t *d; - map_main_t *mm = &map_main; - - if (pool_is_free_index (mm->domains, map_domain_index)) - { - clib_warning ("MAP rule: domain does not exist: %d", map_domain_index); - return -1; - } - d = pool_elt_at_index (mm->domains, map_domain_index); - - /* Rules are only used in 1:1 independent case */ - if (d->ea_bits_len > 0) - return (-1); - - if (!d->rules) - { - u32 l = (0x1 << d->psid_length) * sizeof (ip6_address_t); - d->rules = clib_mem_alloc_aligned (l, CLIB_CACHE_LINE_BYTES); - if (!d->rules) - return -1; - memset (d->rules, 0, l); - } - - if (psid >= (0x1 << d->psid_length)) - { - clib_warning ("MAP rule: PSID outside bounds: %d [%d]", psid, - 0x1 << d->psid_length); - return -1; - } - - if (is_add) - { - d->rules[psid] = *tep; - } - else - { - memset (&d->rules[psid], 0, sizeof (ip6_address_t)); - } - return 0; -} - -#ifdef MAP_SKIP_IP6_LOOKUP -/** - * Pre-resolvd per-protocol global next-hops - */ -map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX]; - -static void -map_pre_resolve_init (map_main_pre_resolved_t * pr) -{ - pr->fei = FIB_NODE_INDEX_INVALID; - fib_node_init (&pr->node, FIB_NODE_TYPE_MAP_E); -} - -static u8 * -format_map_pre_resolve (u8 * s, va_list * ap) -{ - map_main_pre_resolved_t *pr = va_arg (*ap, map_main_pre_resolved_t *); - - if (FIB_NODE_INDEX_INVALID != pr->fei) - { - fib_prefix_t pfx; - - fib_entry_get_prefix (pr->fei, &pfx); - - return (format (s, "%U (%u)", - format_ip46_address, &pfx.fp_addr, IP46_TYPE_ANY, - pr->dpo.dpoi_index)); - } - else - { - return (format (s, "un-set")); - } -} - - -/** - * Function definition to inform the FIB node that its last lock has gone. - */ -static void -map_last_lock_gone (fib_node_t * node) -{ - /* - * The MAP is a root of the graph. As such - * it never has children and thus is never locked. - */ - ASSERT (0); -} - -static map_main_pre_resolved_t * -map_from_fib_node (fib_node_t * node) -{ - ASSERT (FIB_NODE_TYPE_MAP_E == node->fn_type); - return ((map_main_pre_resolved_t *) - (((char *) node) - - STRUCT_OFFSET_OF (map_main_pre_resolved_t, node))); -} - -static void -map_stack (map_main_pre_resolved_t * pr) -{ - const dpo_id_t *dpo; - - dpo = fib_entry_contribute_ip_forwarding (pr->fei); - - dpo_copy (&pr->dpo, dpo); -} - -/** - * Function definition to backwalk a FIB node - */ -static fib_node_back_walk_rc_t -map_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) -{ - map_stack (map_from_fib_node (node)); - - return (FIB_NODE_BACK_WALK_CONTINUE); -} - -/** - * Function definition to get a FIB node from its index - */ -static fib_node_t * -map_fib_node_get (fib_node_index_t index) -{ - return (&pre_resolved[index].node); -} - -/* - * Virtual function table registered by MPLS GRE tunnels - * for participation in the FIB object graph. - */ -const static fib_node_vft_t map_vft = { - .fnv_get = map_fib_node_get, - .fnv_last_lock = map_last_lock_gone, - .fnv_back_walk = map_back_walk, -}; - -static void -map_fib_resolve (map_main_pre_resolved_t * pr, - fib_protocol_t proto, u8 len, const ip46_address_t * addr) -{ - fib_prefix_t pfx = { - .fp_proto = proto, - .fp_len = len, - .fp_addr = *addr, - }; - - pr->fei = fib_table_entry_special_add (0, // default fib - &pfx, - FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); - pr->sibling = fib_entry_child_add (pr->fei, FIB_NODE_TYPE_MAP_E, proto); - map_stack (pr); -} - -static void -map_fib_unresolve (map_main_pre_resolved_t * pr, - fib_protocol_t proto, u8 len, const ip46_address_t * addr) -{ - fib_prefix_t pfx = { - .fp_proto = proto, - .fp_len = len, - .fp_addr = *addr, - }; - - fib_entry_child_remove (pr->fei, pr->sibling); - - fib_table_entry_special_remove (0, // default fib - &pfx, FIB_SOURCE_RR); - dpo_reset (&pr->dpo); - - pr->fei = FIB_NODE_INDEX_INVALID; - pr->sibling = FIB_NODE_INDEX_INVALID; -} - -static void -map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, int is_del) -{ - if (ip6 && (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0)) - { - ip46_address_t addr = { - .ip6 = *ip6, - }; - if (is_del) - map_fib_unresolve (&pre_resolved[FIB_PROTOCOL_IP6], - FIB_PROTOCOL_IP6, 128, &addr); - else - map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP6], - FIB_PROTOCOL_IP6, 128, &addr); - } - if (ip4 && (ip4->as_u32 != 0)) - { - ip46_address_t addr = { - .ip4 = *ip4, - }; - if (is_del) - map_fib_unresolve (&pre_resolved[FIB_PROTOCOL_IP4], - FIB_PROTOCOL_IP4, 32, &addr); - else - map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP4], - FIB_PROTOCOL_IP4, 32, &addr); - } -} -#endif - -static clib_error_t * -map_security_check_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - map_main_t *mm = &map_main; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "off")) - mm->sec_check = false; - else if (unformat (line_input, "on")) - mm->sec_check = true; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -map_security_check_frag_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - map_main_t *mm = &map_main; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "off")) - mm->sec_check_frag = false; - else if (unformat (line_input, "on")) - mm->sec_check_frag = true; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -map_add_domain_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - ip4_address_t ip4_prefix; - ip6_address_t ip6_prefix; - ip6_address_t ip6_src; - u32 ip6_prefix_len = 0, ip4_prefix_len = 0, map_domain_index, ip6_src_len; - u32 num_m_args = 0; - /* Optional arguments */ - u32 ea_bits_len = 0, psid_offset = 0, psid_length = 0; - u32 mtu = 0; - u8 flags = 0; - ip6_src_len = 128; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix, - &ip4_prefix_len)) - num_m_args++; - else - if (unformat - (line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix, - &ip6_prefix_len)) - num_m_args++; - else - if (unformat - (line_input, "ip6-src %U/%d", unformat_ip6_address, &ip6_src, - &ip6_src_len)) - num_m_args++; - else - if (unformat - (line_input, "ip6-src %U", unformat_ip6_address, &ip6_src)) - num_m_args++; - else if (unformat (line_input, "ea-bits-len %d", &ea_bits_len)) - num_m_args++; - else if (unformat (line_input, "psid-offset %d", &psid_offset)) - num_m_args++; - else if (unformat (line_input, "psid-len %d", &psid_length)) - num_m_args++; - else if (unformat (line_input, "mtu %d", &mtu)) - num_m_args++; - else if (unformat (line_input, "map-t")) - flags |= MAP_DOMAIN_TRANSLATION; - else if (unformat (line_input, "rfc6052")) - flags |= (MAP_DOMAIN_TRANSLATION | MAP_DOMAIN_RFC6052); - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (num_m_args < 3) - { - error = clib_error_return (0, "mandatory argument(s) missing"); - goto done; - } - - map_create_domain (&ip4_prefix, ip4_prefix_len, - &ip6_prefix, ip6_prefix_len, &ip6_src, ip6_src_len, - ea_bits_len, psid_offset, psid_length, &map_domain_index, - mtu, flags); - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -map_del_domain_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u32 num_m_args = 0; - u32 map_domain_index; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "index %d", &map_domain_index)) - num_m_args++; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (num_m_args != 1) - { - error = clib_error_return (0, "mandatory argument(s) missing"); - goto done; - } - - map_delete_domain (map_domain_index); - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -map_add_rule_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - ip6_address_t tep; - u32 num_m_args = 0; - u32 psid = 0, map_domain_index; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "index %d", &map_domain_index)) - num_m_args++; - else if (unformat (line_input, "psid %d", &psid)) - num_m_args++; - else - if (unformat (line_input, "ip6-dst %U", unformat_ip6_address, &tep)) - num_m_args++; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (num_m_args != 3) - { - error = clib_error_return (0, "mandatory argument(s) missing"); - goto done; - } - - if (map_add_del_psid (map_domain_index, psid, &tep, 1) != 0) - { - error = clib_error_return (0, "Failing to add Mapping Rule"); - goto done; - } - -done: - unformat_free (line_input); - - return error; -} - -#if MAP_SKIP_IP6_LOOKUP -static clib_error_t * -map_pre_resolve_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - ip4_address_t ip4nh, *p_v4 = NULL; - ip6_address_t ip6nh, *p_v6 = NULL; - clib_error_t *error = NULL; - int is_del = 0; - - memset (&ip4nh, 0, sizeof (ip4nh)); - memset (&ip6nh, 0, sizeof (ip6nh)); - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh)) - p_v4 = &ip4nh; - else - if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh)) - p_v6 = &ip6nh; - else if (unformat (line_input, "del")) - is_del = 1; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - map_pre_resolve (p_v4, p_v6, is_del); - -done: - unformat_free (line_input); - - return error; -} -#endif - -static clib_error_t * -map_icmp_relay_source_address_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - ip4_address_t icmp_src_address; - map_main_t *mm = &map_main; - clib_error_t *error = NULL; - - mm->icmp4_src_address.as_u32 = 0; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat - (line_input, "%U", unformat_ip4_address, &icmp_src_address)) - mm->icmp4_src_address = icmp_src_address; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -map_icmp_unreachables_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - map_main_t *mm = &map_main; - int num_m_args = 0; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - num_m_args++; - if (unformat (line_input, "on")) - mm->icmp6_enabled = true; - else if (unformat (line_input, "off")) - mm->icmp6_enabled = false; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - - if (num_m_args != 1) - error = clib_error_return (0, "mandatory argument(s) missing"); - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -map_fragment_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - map_main_t *mm = &map_main; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "inner")) - mm->frag_inner = true; - else if (unformat (line_input, "outer")) - mm->frag_inner = false; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -map_fragment_df_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - map_main_t *mm = &map_main; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "on")) - mm->frag_ignore_df = true; - else if (unformat (line_input, "off")) - mm->frag_ignore_df = false; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -map_traffic_class_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - map_main_t *mm = &map_main; - u32 tc = 0; - clib_error_t *error = NULL; - - mm->tc_copy = false; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "copy")) - mm->tc_copy = true; - else if (unformat (line_input, "%x", &tc)) - mm->tc = tc & 0xff; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - -done: - unformat_free (line_input); - - return error; -} - -static char * -map_flags_to_string (u32 flags) -{ - if (flags & MAP_DOMAIN_RFC6052) - return "rfc6052"; - if (flags & MAP_DOMAIN_PREFIX) - return "prefix"; - if (flags & MAP_DOMAIN_TRANSLATION) - return "map-t"; - return ""; -} - -static u8 * -format_map_domain (u8 * s, va_list * args) -{ - map_domain_t *d = va_arg (*args, map_domain_t *); - bool counters = va_arg (*args, int); - map_main_t *mm = &map_main; - ip6_address_t ip6_prefix; - - if (d->rules) - memset (&ip6_prefix, 0, sizeof (ip6_prefix)); - else - ip6_prefix = d->ip6_prefix; - - s = format (s, - "[%d] ip4-pfx %U/%d ip6-pfx %U/%d ip6-src %U/%d ea_bits_len %d " - "psid-offset %d psid-len %d mtu %d %s", - d - mm->domains, - format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len, - format_ip6_address, &ip6_prefix, d->ip6_prefix_len, - format_ip6_address, &d->ip6_src, d->ip6_src_len, - d->ea_bits_len, d->psid_offset, d->psid_length, d->mtu, - map_flags_to_string (d->flags)); - - if (counters) - { - map_domain_counter_lock (mm); - vlib_counter_t v; - vlib_get_combined_counter (&mm->domain_counters[MAP_DOMAIN_COUNTER_TX], - d - mm->domains, &v); - s = format (s, " TX: %lld/%lld", v.packets, v.bytes); - vlib_get_combined_counter (&mm->domain_counters[MAP_DOMAIN_COUNTER_RX], - d - mm->domains, &v); - s = format (s, " RX: %lld/%lld", v.packets, v.bytes); - map_domain_counter_unlock (mm); - } - s = format (s, "\n"); - - if (d->rules) - { - int i; - ip6_address_t dst; - for (i = 0; i < (0x1 << d->psid_length); i++) - { - dst = d->rules[i]; - if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0) - continue; - s = format (s, - " rule psid: %d ip6-dst %U\n", i, format_ip6_address, - &dst); - } - } - return s; -} - -static u8 * -format_map_ip4_reass (u8 * s, va_list * args) -{ - map_main_t *mm = &map_main; - map_ip4_reass_t *r = va_arg (*args, map_ip4_reass_t *); - map_ip4_reass_key_t *k = &r->key; - f64 now = vlib_time_now (mm->vlib_main); - f64 lifetime = (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000); - f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1; - s = format (s, - "ip4-reass src=%U dst=%U protocol=%d identifier=%d port=%d lifetime=%.3lf\n", - format_ip4_address, &k->src.as_u8, format_ip4_address, - &k->dst.as_u8, k->protocol, - clib_net_to_host_u16 (k->fragment_id), - (r->port >= 0) ? clib_net_to_host_u16 (r->port) : -1, dt); - return s; -} - -static u8 * -format_map_ip6_reass (u8 * s, va_list * args) -{ - map_main_t *mm = &map_main; - map_ip6_reass_t *r = va_arg (*args, map_ip6_reass_t *); - map_ip6_reass_key_t *k = &r->key; - f64 now = vlib_time_now (mm->vlib_main); - f64 lifetime = (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000); - f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1; - s = format (s, - "ip6-reass src=%U dst=%U protocol=%d identifier=%d lifetime=%.3lf\n", - format_ip6_address, &k->src.as_u8, format_ip6_address, - &k->dst.as_u8, k->protocol, - clib_net_to_host_u32 (k->fragment_id), dt); - return s; -} - -static clib_error_t * -show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - map_main_t *mm = &map_main; - map_domain_t *d; - bool counters = false; - u32 map_domain_index = ~0; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "counters")) - counters = true; - else if (unformat (line_input, "index %d", &map_domain_index)) - ; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (pool_elts (mm->domains) == 0) - vlib_cli_output (vm, "No MAP domains are configured..."); - - if (map_domain_index == ~0) - { - /* *INDENT-OFF* */ - pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_map_domain, d, counters);})); - /* *INDENT-ON* */ - } - else - { - if (pool_is_free_index (mm->domains, map_domain_index)) - { - error = clib_error_return (0, "MAP domain does not exists %d", - map_domain_index); - goto done; - } - - d = pool_elt_at_index (mm->domains, map_domain_index); - vlib_cli_output (vm, "%U", format_map_domain, d, counters); - } - -done: - unformat_free (line_input); - - return error; -} - -static clib_error_t * -show_map_fragments_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - map_main_t *mm = &map_main; - map_ip4_reass_t *f4; - map_ip6_reass_t *f6; - - /* *INDENT-OFF* */ - pool_foreach(f4, mm->ip4_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip4_reass, f4);})); - /* *INDENT-ON* */ - /* *INDENT-OFF* */ - pool_foreach(f6, mm->ip6_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip6_reass, f6);})); - /* *INDENT-ON* */ - return (0); -} - -u64 -map_error_counter_get (u32 node_index, map_error_t map_error) -{ - vlib_main_t *vm = vlib_get_main (); - vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, node_index); - vlib_error_main_t *em = &vm->error_main; - vlib_error_t e = error_node->errors[map_error]; - vlib_node_t *n = vlib_get_node (vm, node_index); - u32 ci; - - ci = vlib_error_get_code (e); - ASSERT (ci < n->n_errors); - ci += n->error_heap_index; - - return (em->counters[ci]); -} - -static clib_error_t * -show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - map_main_t *mm = &map_main; - map_domain_t *d; - int domains = 0, rules = 0, domaincount = 0, rulecount = 0; - if (pool_elts (mm->domains) == 0) - { - vlib_cli_output (vm, "No MAP domains are configured..."); - return 0; - } - - /* *INDENT-OFF* */ - pool_foreach(d, mm->domains, ({ - if (d->rules) { - rulecount+= 0x1 << d->psid_length; - rules += sizeof(ip6_address_t) * 0x1 << d->psid_length; - } - domains += sizeof(*d); - domaincount++; - })); - /* *INDENT-ON* */ - - vlib_cli_output (vm, "MAP domains structure: %d\n", sizeof (map_domain_t)); - vlib_cli_output (vm, "MAP domains: %d (%d bytes)\n", domaincount, domains); - vlib_cli_output (vm, "MAP rules: %d (%d bytes)\n", rulecount, rules); - vlib_cli_output (vm, "Total: %d bytes)\n", rules + domains); - -#if MAP_SKIP_IP6_LOOKUP - vlib_cli_output (vm, - "MAP pre-resolve: IP6 next-hop: %U, IP4 next-hop: %U\n", - format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP6], - format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP4]); - -#endif - - if (mm->tc_copy) - vlib_cli_output (vm, "MAP traffic-class: copy"); - else - vlib_cli_output (vm, "MAP traffic-class: %x", mm->tc); - - vlib_cli_output (vm, - "MAP IPv6 inbound security check: %s, fragmented packet security check: %s", - mm->sec_check ? "enabled" : "disabled", - mm->sec_check_frag ? "enabled" : "disabled"); - - vlib_cli_output (vm, "ICMP-relay IPv4 source address: %U\n", - format_ip4_address, &mm->icmp4_src_address); - vlib_cli_output (vm, "ICMP6 unreachables sent for unmatched packets: %s\n", - mm->icmp6_enabled ? "enabled" : "disabled"); - vlib_cli_output (vm, "Inner fragmentation: %s\n", - mm->frag_inner ? "enabled" : "disabled"); - vlib_cli_output (vm, "Fragment packets regardless of DF flag: %s\n", - mm->frag_ignore_df ? "enabled" : "disabled"); - - /* - * Counters - */ - vlib_combined_counter_main_t *cm = mm->domain_counters; - u64 total_pkts[MAP_N_DOMAIN_COUNTER]; - u64 total_bytes[MAP_N_DOMAIN_COUNTER]; - int which, i; - vlib_counter_t v; - - memset (total_pkts, 0, sizeof (total_pkts)); - memset (total_bytes, 0, sizeof (total_bytes)); - - map_domain_counter_lock (mm); - vec_foreach (cm, mm->domain_counters) - { - which = cm - mm->domain_counters; - - for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) - { - vlib_get_combined_counter (cm, i, &v); - total_pkts[which] += v.packets; - total_bytes[which] += v.bytes; - } - } - map_domain_counter_unlock (mm); - - vlib_cli_output (vm, "Encapsulated packets: %lld bytes: %lld\n", - total_pkts[MAP_DOMAIN_COUNTER_TX], - total_bytes[MAP_DOMAIN_COUNTER_TX]); - vlib_cli_output (vm, "Decapsulated packets: %lld bytes: %lld\n", - total_pkts[MAP_DOMAIN_COUNTER_RX], - total_bytes[MAP_DOMAIN_COUNTER_RX]); - - vlib_cli_output (vm, "ICMP relayed packets: %d\n", - vlib_get_simple_counter (&mm->icmp_relayed, 0)); - - return 0; -} - -static clib_error_t * -map_params_reass_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u32 lifetime = ~0; - f64 ht_ratio = (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1); - u32 pool_size = ~0; - u64 buffers = ~(0ull); - u8 ip4 = 0, ip6 = 0; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "lifetime %u", &lifetime)) - ; - else if (unformat (line_input, "ht-ratio %lf", &ht_ratio)) - ; - else if (unformat (line_input, "pool-size %u", &pool_size)) - ; - else if (unformat (line_input, "buffers %llu", &buffers)) - ; - else if (unformat (line_input, "ip4")) - ip4 = 1; - else if (unformat (line_input, "ip6")) - ip6 = 1; - else - { - unformat_free (line_input); - return clib_error_return (0, "invalid input"); - } - } - unformat_free (line_input); - - if (!ip4 && !ip6) - return clib_error_return (0, "must specify ip4 and/or ip6"); - - if (ip4) - { - if (pool_size != ~0 && pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) - return clib_error_return (0, "invalid ip4-reass pool-size ( > %d)", - MAP_IP4_REASS_CONF_POOL_SIZE_MAX); - if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1) - && ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) - return clib_error_return (0, "invalid ip4-reass ht-ratio ( > %d)", - MAP_IP4_REASS_CONF_HT_RATIO_MAX); - if (lifetime != ~0 && lifetime > MAP_IP4_REASS_CONF_LIFETIME_MAX) - return clib_error_return (0, "invalid ip4-reass lifetime ( > %d)", - MAP_IP4_REASS_CONF_LIFETIME_MAX); - if (buffers != ~(0ull) && buffers > MAP_IP4_REASS_CONF_BUFFERS_MAX) - return clib_error_return (0, "invalid ip4-reass buffers ( > %ld)", - MAP_IP4_REASS_CONF_BUFFERS_MAX); - } - - if (ip6) - { - if (pool_size != ~0 && pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX) - return clib_error_return (0, "invalid ip6-reass pool-size ( > %d)", - MAP_IP6_REASS_CONF_POOL_SIZE_MAX); - if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1) - && ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) - return clib_error_return (0, "invalid ip6-reass ht-log2len ( > %d)", - MAP_IP6_REASS_CONF_HT_RATIO_MAX); - if (lifetime != ~0 && lifetime > MAP_IP6_REASS_CONF_LIFETIME_MAX) - return clib_error_return (0, "invalid ip6-reass lifetime ( > %d)", - MAP_IP6_REASS_CONF_LIFETIME_MAX); - if (buffers != ~(0ull) && buffers > MAP_IP6_REASS_CONF_BUFFERS_MAX) - return clib_error_return (0, "invalid ip6-reass buffers ( > %ld)", - MAP_IP6_REASS_CONF_BUFFERS_MAX); - } - - if (ip4) - { - u32 reass = 0, packets = 0; - if (pool_size != ~0) - { - if (map_ip4_reass_conf_pool_size (pool_size, &reass, &packets)) - { - vlib_cli_output (vm, "Could not set ip4-reass pool-size"); - } - else - { - vlib_cli_output (vm, - "Setting ip4-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", - reass, packets); - } - } - if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1)) - { - if (map_ip4_reass_conf_ht_ratio (ht_ratio, &reass, &packets)) - { - vlib_cli_output (vm, "Could not set ip4-reass ht-log2len"); - } - else - { - vlib_cli_output (vm, - "Setting ip4-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", - reass, packets); - } - } - if (lifetime != ~0) - { - if (map_ip4_reass_conf_lifetime (lifetime)) - vlib_cli_output (vm, "Could not set ip4-reass lifetime"); - else - vlib_cli_output (vm, "Setting ip4-reass lifetime"); - } - if (buffers != ~(0ull)) - { - if (map_ip4_reass_conf_buffers (buffers)) - vlib_cli_output (vm, "Could not set ip4-reass buffers"); - else - vlib_cli_output (vm, "Setting ip4-reass buffers"); - } - - if (map_main.ip4_reass_conf_buffers > - map_main.ip4_reass_conf_pool_size * - MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) - { - vlib_cli_output (vm, - "Note: 'ip4-reass buffers' > pool-size * max-fragments-per-reassembly."); - } - } - - if (ip6) - { - u32 reass = 0, packets = 0; - if (pool_size != ~0) - { - if (map_ip6_reass_conf_pool_size (pool_size, &reass, &packets)) - { - vlib_cli_output (vm, "Could not set ip6-reass pool-size"); - } - else - { - vlib_cli_output (vm, - "Setting ip6-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", - reass, packets); - } - } - if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1)) - { - if (map_ip6_reass_conf_ht_ratio (ht_ratio, &reass, &packets)) - { - vlib_cli_output (vm, "Could not set ip6-reass ht-log2len"); - } - else - { - vlib_cli_output (vm, - "Setting ip6-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", - reass, packets); - } - } - if (lifetime != ~0) - { - if (map_ip6_reass_conf_lifetime (lifetime)) - vlib_cli_output (vm, "Could not set ip6-reass lifetime"); - else - vlib_cli_output (vm, "Setting ip6-reass lifetime"); - } - if (buffers != ~(0ull)) - { - if (map_ip6_reass_conf_buffers (buffers)) - vlib_cli_output (vm, "Could not set ip6-reass buffers"); - else - vlib_cli_output (vm, "Setting ip6-reass buffers"); - } - - if (map_main.ip6_reass_conf_buffers > - map_main.ip6_reass_conf_pool_size * - MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) - { - vlib_cli_output (vm, - "Note: 'ip6-reass buffers' > pool-size * max-fragments-per-reassembly."); - } - } - - return 0; -} - - -/* - * packet trace format function - */ -u8 * -format_map_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - map_trace_t *t = va_arg (*args, map_trace_t *); - u32 map_domain_index = t->map_domain_index; - u16 port = t->port; - - s = - format (s, "MAP domain index: %d L4 port: %u", map_domain_index, - clib_net_to_host_u16 (port)); - - return s; -} - -static_always_inline map_ip4_reass_t * -map_ip4_reass_lookup (map_ip4_reass_key_t * k, u32 bucket, f64 now) -{ - map_main_t *mm = &map_main; - u32 ri = mm->ip4_reass_hash_table[bucket]; - while (ri != MAP_REASS_INDEX_NONE) - { - map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri); - if (r->key.as_u64[0] == k->as_u64[0] && - r->key.as_u64[1] == k->as_u64[1] && - now < r->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000)) - { - return r; - } - ri = r->bucket_next; - } - return NULL; -} - -#define map_ip4_reass_pool_index(r) (r - map_main.ip4_reass_pool) - -void -map_ip4_reass_free (map_ip4_reass_t * r, u32 ** pi_to_drop) -{ - map_main_t *mm = &map_main; - map_ip4_reass_get_fragments (r, pi_to_drop); - - // Unlink in hash bucket - map_ip4_reass_t *r2 = NULL; - u32 r2i = mm->ip4_reass_hash_table[r->bucket]; - while (r2i != map_ip4_reass_pool_index (r)) - { - ASSERT (r2i != MAP_REASS_INDEX_NONE); - r2 = pool_elt_at_index (mm->ip4_reass_pool, r2i); - r2i = r2->bucket_next; - } - if (r2) - { - r2->bucket_next = r->bucket_next; - } - else - { - mm->ip4_reass_hash_table[r->bucket] = r->bucket_next; - } - - // Unlink in list - if (r->fifo_next == map_ip4_reass_pool_index (r)) - { - mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; - } - else - { - if (mm->ip4_reass_fifo_last == map_ip4_reass_pool_index (r)) - mm->ip4_reass_fifo_last = r->fifo_prev; - pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next = - r->fifo_next; - pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev = - r->fifo_prev; - } - - pool_put (mm->ip4_reass_pool, r); - mm->ip4_reass_allocated--; -} - -map_ip4_reass_t * -map_ip4_reass_get (u32 src, u32 dst, u16 fragment_id, - u8 protocol, u32 ** pi_to_drop) -{ - map_ip4_reass_t *r; - map_main_t *mm = &map_main; - map_ip4_reass_key_t k = {.src.data_u32 = src, - .dst.data_u32 = dst, - .fragment_id = fragment_id, - .protocol = protocol - }; - - u32 h = 0; -#ifdef clib_crc32c_uses_intrinsics - h = clib_crc32c ((u8 *) k.as_u32, 16); -#else - u64 tmp = k.as_u32[0] ^ k.as_u32[1] ^ k.as_u32[2] ^ k.as_u32[3]; - h = clib_xxhash (tmp); -#endif - h = h >> (32 - mm->ip4_reass_ht_log2len); - - f64 now = vlib_time_now (mm->vlib_main); - - //Cache garbage collection - while (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) - { - map_ip4_reass_t *last = - pool_elt_at_index (mm->ip4_reass_pool, mm->ip4_reass_fifo_last); - if (last->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000) < now) - map_ip4_reass_free (last, pi_to_drop); - else - break; - } - - if ((r = map_ip4_reass_lookup (&k, h, now))) - return r; - - if (mm->ip4_reass_allocated >= mm->ip4_reass_conf_pool_size) - return NULL; - - pool_get (mm->ip4_reass_pool, r); - mm->ip4_reass_allocated++; - int i; - for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - r->fragments[i] = ~0; - - u32 ri = map_ip4_reass_pool_index (r); - - //Link in new bucket - r->bucket = h; - r->bucket_next = mm->ip4_reass_hash_table[h]; - mm->ip4_reass_hash_table[h] = ri; - - //Link in fifo - if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) - { - r->fifo_next = - pool_elt_at_index (mm->ip4_reass_pool, - mm->ip4_reass_fifo_last)->fifo_next; - r->fifo_prev = mm->ip4_reass_fifo_last; - pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next = ri; - pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev = ri; - } - else - { - r->fifo_next = r->fifo_prev = ri; - mm->ip4_reass_fifo_last = ri; - } - - //Set other fields - r->ts = now; - r->key = k; - r->port = -1; -#ifdef MAP_IP4_REASS_COUNT_BYTES - r->expected_total = 0xffff; - r->forwarded = 0; -#endif - - return r; -} - -int -map_ip4_reass_add_fragment (map_ip4_reass_t * r, u32 pi) -{ - if (map_main.ip4_reass_buffered_counter >= map_main.ip4_reass_conf_buffers) - return -1; - - int i; - for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - if (r->fragments[i] == ~0) - { - r->fragments[i] = pi; - map_main.ip4_reass_buffered_counter++; - return 0; - } - return -1; -} - -static_always_inline map_ip6_reass_t * -map_ip6_reass_lookup (map_ip6_reass_key_t * k, u32 bucket, f64 now) -{ - map_main_t *mm = &map_main; - u32 ri = mm->ip6_reass_hash_table[bucket]; - while (ri != MAP_REASS_INDEX_NONE) - { - map_ip6_reass_t *r = pool_elt_at_index (mm->ip6_reass_pool, ri); - if (now < r->ts + (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000) && - r->key.as_u64[0] == k->as_u64[0] && - r->key.as_u64[1] == k->as_u64[1] && - r->key.as_u64[2] == k->as_u64[2] && - r->key.as_u64[3] == k->as_u64[3] && - r->key.as_u64[4] == k->as_u64[4]) - return r; - ri = r->bucket_next; - } - return NULL; -} - -#define map_ip6_reass_pool_index(r) (r - map_main.ip6_reass_pool) - -void -map_ip6_reass_free (map_ip6_reass_t * r, u32 ** pi_to_drop) -{ - map_main_t *mm = &map_main; - int i; - for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - if (r->fragments[i].pi != ~0) - { - vec_add1 (*pi_to_drop, r->fragments[i].pi); - r->fragments[i].pi = ~0; - map_main.ip6_reass_buffered_counter--; - } - - // Unlink in hash bucket - map_ip6_reass_t *r2 = NULL; - u32 r2i = mm->ip6_reass_hash_table[r->bucket]; - while (r2i != map_ip6_reass_pool_index (r)) - { - ASSERT (r2i != MAP_REASS_INDEX_NONE); - r2 = pool_elt_at_index (mm->ip6_reass_pool, r2i); - r2i = r2->bucket_next; - } - if (r2) - { - r2->bucket_next = r->bucket_next; - } - else - { - mm->ip6_reass_hash_table[r->bucket] = r->bucket_next; - } - - // Unlink in list - if (r->fifo_next == map_ip6_reass_pool_index (r)) - { - //Single element in the list, list is now empty - mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; - } - else - { - if (mm->ip6_reass_fifo_last == map_ip6_reass_pool_index (r)) //First element - mm->ip6_reass_fifo_last = r->fifo_prev; - pool_elt_at_index (mm->ip6_reass_pool, r->fifo_prev)->fifo_next = - r->fifo_next; - pool_elt_at_index (mm->ip6_reass_pool, r->fifo_next)->fifo_prev = - r->fifo_prev; - } - - // Free from pool if necessary - pool_put (mm->ip6_reass_pool, r); - mm->ip6_reass_allocated--; -} - -map_ip6_reass_t * -map_ip6_reass_get (ip6_address_t * src, ip6_address_t * dst, u32 fragment_id, - u8 protocol, u32 ** pi_to_drop) -{ - map_ip6_reass_t *r; - map_main_t *mm = &map_main; - map_ip6_reass_key_t k = { - .src = *src, - .dst = *dst, - .fragment_id = fragment_id, - .protocol = protocol - }; - - u32 h = 0; - int i; - -#ifdef clib_crc32c_uses_intrinsics - h = clib_crc32c ((u8 *) k.as_u32, 40); -#else - u64 tmp = - k.as_u64[0] ^ k.as_u64[1] ^ k.as_u64[2] ^ k.as_u64[3] ^ k.as_u64[4]; - h = clib_xxhash (tmp); -#endif - - h = h >> (32 - mm->ip6_reass_ht_log2len); - - f64 now = vlib_time_now (mm->vlib_main); - - //Cache garbage collection - while (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) - { - map_ip6_reass_t *last = - pool_elt_at_index (mm->ip6_reass_pool, mm->ip6_reass_fifo_last); - if (last->ts + (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000) < now) - map_ip6_reass_free (last, pi_to_drop); - else - break; - } - - if ((r = map_ip6_reass_lookup (&k, h, now))) - return r; - - if (mm->ip6_reass_allocated >= mm->ip6_reass_conf_pool_size) - return NULL; - - pool_get (mm->ip6_reass_pool, r); - mm->ip6_reass_allocated++; - for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - { - r->fragments[i].pi = ~0; - r->fragments[i].next_data_len = 0; - r->fragments[i].next_data_offset = 0; - } - - u32 ri = map_ip6_reass_pool_index (r); - - //Link in new bucket - r->bucket = h; - r->bucket_next = mm->ip6_reass_hash_table[h]; - mm->ip6_reass_hash_table[h] = ri; - - //Link in fifo - if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) - { - r->fifo_next = - pool_elt_at_index (mm->ip6_reass_pool, - mm->ip6_reass_fifo_last)->fifo_next; - r->fifo_prev = mm->ip6_reass_fifo_last; - pool_elt_at_index (mm->ip6_reass_pool, r->fifo_prev)->fifo_next = ri; - pool_elt_at_index (mm->ip6_reass_pool, r->fifo_next)->fifo_prev = ri; - } - else - { - r->fifo_next = r->fifo_prev = ri; - mm->ip6_reass_fifo_last = ri; - } - - //Set other fields - r->ts = now; - r->key = k; - r->ip4_header.ip_version_and_header_length = 0; -#ifdef MAP_IP6_REASS_COUNT_BYTES - r->expected_total = 0xffff; - r->forwarded = 0; -#endif - return r; -} - -int -map_ip6_reass_add_fragment (map_ip6_reass_t * r, u32 pi, - u16 data_offset, u16 next_data_offset, - u8 * data_start, u16 data_len) -{ - map_ip6_fragment_t *f = NULL, *prev_f = NULL; - u16 copied_len = (data_len > 20) ? 20 : data_len; - - if (map_main.ip6_reass_buffered_counter >= map_main.ip6_reass_conf_buffers) - return -1; - - //Lookup for fragments for the current buffer - //and the one before that - int i; - for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - { - if (data_offset && r->fragments[i].next_data_offset == data_offset) - { - prev_f = &r->fragments[i]; // This is buffer for previous packet - } - else if (r->fragments[i].next_data_offset == next_data_offset) - { - f = &r->fragments[i]; // This is a buffer for the current packet - } - else if (r->fragments[i].next_data_offset == 0) - { //Available - if (f == NULL) - f = &r->fragments[i]; - else if (prev_f == NULL) - prev_f = &r->fragments[i]; - } - } - - if (!f || f->pi != ~0) - return -1; - - if (data_offset) - { - if (!prev_f) - return -1; - - clib_memcpy (prev_f->next_data, data_start, copied_len); - prev_f->next_data_len = copied_len; - prev_f->next_data_offset = data_offset; - } - else - { - if (((ip4_header_t *) data_start)->ip_version_and_header_length != 0x45) - return -1; - - if (r->ip4_header.ip_version_and_header_length == 0) - clib_memcpy (&r->ip4_header, data_start, sizeof (ip4_header_t)); - } - - if (data_len > 20) - { - f->next_data_offset = next_data_offset; - f->pi = pi; - map_main.ip6_reass_buffered_counter++; - } - return 0; -} - -void -map_ip4_reass_reinit (u32 * trashed_reass, u32 * dropped_packets) -{ - map_main_t *mm = &map_main; - int i; - - if (dropped_packets) - *dropped_packets = mm->ip4_reass_buffered_counter; - if (trashed_reass) - *trashed_reass = mm->ip4_reass_allocated; - if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) - { - u16 ri = mm->ip4_reass_fifo_last; - do - { - map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri); - for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - if (r->fragments[i] != ~0) - map_ip4_drop_pi (r->fragments[i]); - - ri = r->fifo_next; - pool_put (mm->ip4_reass_pool, r); - } - while (ri != mm->ip4_reass_fifo_last); - } - - vec_free (mm->ip4_reass_hash_table); - vec_resize (mm->ip4_reass_hash_table, 1 << mm->ip4_reass_ht_log2len); - for (i = 0; i < (1 << mm->ip4_reass_ht_log2len); i++) - mm->ip4_reass_hash_table[i] = MAP_REASS_INDEX_NONE; - pool_free (mm->ip4_reass_pool); - pool_alloc (mm->ip4_reass_pool, mm->ip4_reass_conf_pool_size); - - mm->ip4_reass_allocated = 0; - mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; - mm->ip4_reass_buffered_counter = 0; -} - -u8 -map_get_ht_log2len (f32 ht_ratio, u16 pool_size) -{ - u32 desired_size = (u32) (pool_size * ht_ratio); - u8 i; - for (i = 1; i < 31; i++) - if ((1 << i) >= desired_size) - return i; - return 4; -} - -int -map_ip4_reass_conf_ht_ratio (f32 ht_ratio, u32 * trashed_reass, - u32 * dropped_packets) -{ - map_main_t *mm = &map_main; - if (ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) - return -1; - - map_ip4_reass_lock (); - mm->ip4_reass_conf_ht_ratio = ht_ratio; - mm->ip4_reass_ht_log2len = - map_get_ht_log2len (ht_ratio, mm->ip4_reass_conf_pool_size); - map_ip4_reass_reinit (trashed_reass, dropped_packets); - map_ip4_reass_unlock (); - return 0; -} - -int -map_ip4_reass_conf_pool_size (u16 pool_size, u32 * trashed_reass, - u32 * dropped_packets) -{ - map_main_t *mm = &map_main; - if (pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) - return -1; - - map_ip4_reass_lock (); - mm->ip4_reass_conf_pool_size = pool_size; - map_ip4_reass_reinit (trashed_reass, dropped_packets); - map_ip4_reass_unlock (); - return 0; -} - -int -map_ip4_reass_conf_lifetime (u16 lifetime_ms) -{ - map_main.ip4_reass_conf_lifetime_ms = lifetime_ms; - return 0; -} - -int -map_ip4_reass_conf_buffers (u32 buffers) -{ - map_main.ip4_reass_conf_buffers = buffers; - return 0; -} - -void -map_ip6_reass_reinit (u32 * trashed_reass, u32 * dropped_packets) -{ - map_main_t *mm = &map_main; - if (dropped_packets) - *dropped_packets = mm->ip6_reass_buffered_counter; - if (trashed_reass) - *trashed_reass = mm->ip6_reass_allocated; - int i; - if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) - { - u16 ri = mm->ip6_reass_fifo_last; - do - { - map_ip6_reass_t *r = pool_elt_at_index (mm->ip6_reass_pool, ri); - for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) - if (r->fragments[i].pi != ~0) - map_ip6_drop_pi (r->fragments[i].pi); - - ri = r->fifo_next; - pool_put (mm->ip6_reass_pool, r); - } - while (ri != mm->ip6_reass_fifo_last); - mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; - } - - vec_free (mm->ip6_reass_hash_table); - vec_resize (mm->ip6_reass_hash_table, 1 << mm->ip6_reass_ht_log2len); - for (i = 0; i < (1 << mm->ip6_reass_ht_log2len); i++) - mm->ip6_reass_hash_table[i] = MAP_REASS_INDEX_NONE; - pool_free (mm->ip6_reass_pool); - pool_alloc (mm->ip6_reass_pool, mm->ip4_reass_conf_pool_size); - - mm->ip6_reass_allocated = 0; - mm->ip6_reass_buffered_counter = 0; -} - -int -map_ip6_reass_conf_ht_ratio (f32 ht_ratio, u32 * trashed_reass, - u32 * dropped_packets) -{ - map_main_t *mm = &map_main; - if (ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) - return -1; - - map_ip6_reass_lock (); - mm->ip6_reass_conf_ht_ratio = ht_ratio; - mm->ip6_reass_ht_log2len = - map_get_ht_log2len (ht_ratio, mm->ip6_reass_conf_pool_size); - map_ip6_reass_reinit (trashed_reass, dropped_packets); - map_ip6_reass_unlock (); - return 0; -} - -int -map_ip6_reass_conf_pool_size (u16 pool_size, u32 * trashed_reass, - u32 * dropped_packets) -{ - map_main_t *mm = &map_main; - if (pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX) - return -1; - - map_ip6_reass_lock (); - mm->ip6_reass_conf_pool_size = pool_size; - map_ip6_reass_reinit (trashed_reass, dropped_packets); - map_ip6_reass_unlock (); - return 0; -} - -int -map_ip6_reass_conf_lifetime (u16 lifetime_ms) -{ - map_main.ip6_reass_conf_lifetime_ms = lifetime_ms; - return 0; -} - -int -map_ip6_reass_conf_buffers (u32 buffers) -{ - map_main.ip6_reass_conf_buffers = buffers; - return 0; -} - -/* *INDENT-OFF* */ - -/*? - * Configure MAP reassembly behaviour - * - * @cliexpar - * @cliexstart{map params reassembly} - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_ip4_reass_lifetime_command, static) = { - .path = "map params reassembly", - .short_help = "map params reassembly [ip4 | ip6] [lifetime ] " - "[pool-size ] [buffers ] " - "[ht-ratio ]", - .function = map_params_reass_command_fn, -}; - -/*? - * Set or copy the IP TOS/Traffic Class field - * - * @cliexpar - * @cliexstart{map params traffic-class} - * - * This command is used to set the traffic-class field in translated - * or encapsulated packets. If copy is specifed (the default) then the - * traffic-class/TOS field is copied from the original packet to the - * translated / encapsulating header. - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_traffic_class_command, static) = { - .path = "map params traffic-class", - .short_help = "map params traffic-class {0x0-0xff | copy}", - .function = map_traffic_class_command_fn, -}; - -/*? - * Bypass IP4/IP6 lookup - * - * @cliexpar - * @cliexstart{map params pre-resolve} - * - * Bypass a second FIB lookup of the translated or encapsulated - * packet, and forward the packet directly to the specified - * next-hop. This optimization trades forwarding flexibility for - * performance. - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_pre_resolve_command, static) = { - .path = "map params pre-resolve", - .short_help = " map params pre-resolve {ip4-nh
} " - "| {ip6-nh
}", - .function = map_pre_resolve_command_fn, -}; - -/*? - * Enable or disable the MAP-E inbound security check - * - * @cliexpar - * @cliexstart{map params security-check} - * - * By default, a decapsulated packet's IPv4 source address will be - * verified against the outer header's IPv6 source address. Disabling - * this feature will allow IPv4 source address spoofing. - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_security_check_command, static) = { - .path = "map params security-check", - .short_help = "map params security-check on|off", - .function = map_security_check_command_fn, -}; - -/*? - * Specifiy the IPv4 source address used for relayed ICMP error messages - * - * @cliexpar - * @cliexstart{map params icmp source-address} - * - * This command specifies which IPv4 source address (must be local to - * the system), that is used for relayed received IPv6 ICMP error - * messages. - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_icmp_relay_source_address_command, static) = { - .path = "map params icmp source-address", - .short_help = "map params icmp source-address ", - .function = map_icmp_relay_source_address_command_fn, -}; - -/*? - * Send IPv6 ICMP unreachables - * - * @cliexpar - * @cliexstart{map params icmp6 unreachables} - * - * Send IPv6 ICMP unreachable messages back if security check fails or - * no MAP domain exists. - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_icmp_unreachables_command, static) = { - .path = "map params icmp6 unreachables", - .short_help = "map params icmp6 unreachables {on|off}", - .function = map_icmp_unreachables_command_fn, -}; - -/*? - * Configure MAP fragmentation behaviour - * - * @cliexpar - * @cliexstart{map params fragment} - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_fragment_command, static) = { - .path = "map params fragment", - .short_help = "map params fragment inner|outer", - .function = map_fragment_command_fn, -}; - -/*? - * Ignore the IPv4 Don't fragment bit - * - * @cliexpar - * @cliexstart{map params fragment ignore-df} - * - * Allows fragmentation of the IPv4 packet even if the DF bit is - * set. The choice between inner or outer fragmentation of tunnel - * packets is complicated. The benefit of inner fragmentation is that - * the ultimate endpoint must reassemble, instead of the tunnel - * endpoint. - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_fragment_df_command, static) = { - .path = "map params fragment ignore-df", - .short_help = "map params fragment ignore-df on|off", - .function = map_fragment_df_command_fn, -}; - -/*? - * Specifiy if the inbound security check should be done on fragments - * - * @cliexpar - * @cliexstart{map params security-check fragments} - * - * Typically the inbound on-decapsulation security check is only done - * on the first packet. The packet that contains the L4 - * information. While a security check on every fragment is possible, - * it has a cost. State must be created on the first fragment. - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_security_check_frag_command, static) = { - .path = "map params security-check fragments", - .short_help = "map params security-check fragments on|off", - .function = map_security_check_frag_command_fn, -}; - -/*? - * Add MAP domain - * - * @cliexpar - * @cliexstart{map add domain} - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_add_domain_command, static) = { - .path = "map add domain", - .short_help = "map add domain ip4-pfx ip6-pfx " - "ip6-src ea-bits-len psid-offset psid-len " - "[map-t] [map-ce] [mtu ]", - .function = map_add_domain_command_fn, -}; - -/*? - * Add MAP rule to a domain - * - * @cliexpar - * @cliexstart{map add rule} - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_add_rule_command, static) = { - .path = "map add rule", - .short_help = "map add rule index psid ip6-dst ", - .function = map_add_rule_command_fn, -}; - -/*? - * Delete MAP domain - * - * @cliexpar - * @cliexstart{map del domain} - * @cliexend - ?*/ -VLIB_CLI_COMMAND(map_del_command, static) = { - .path = "map del domain", - .short_help = "map del domain index ", - .function = map_del_domain_command_fn, -}; - -/*? - * Show MAP domains - * - * @cliexpar - * @cliexstart{show map domain} - * @cliexend - ?*/ -VLIB_CLI_COMMAND(show_map_domain_command, static) = { - .path = "show map domain", - .short_help = "show map domain index [counters]", - .function = show_map_domain_command_fn, -}; - -/*? - * Show MAP statistics - * - * @cliexpar - * @cliexstart{show map stats} - * @cliexend - ?*/ -VLIB_CLI_COMMAND(show_map_stats_command, static) = { - .path = "show map stats", - .short_help = "show map stats", - .function = show_map_stats_command_fn, -}; - -/*? - * Show MAP fragmentation information - * - * @cliexpar - * @cliexstart{show map fragments} - * @cliexend - ?*/ -VLIB_CLI_COMMAND(show_map_fragments_command, static) = { - .path = "show map fragments", - .short_help = "show map fragments", - .function = show_map_fragments_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -map_config (vlib_main_t * vm, unformat_input_t * input) -{ - map_main_t *mm = &map_main; - u8 is_ce = false; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "customer edge")) - is_ce = true; - else - return clib_error_return (0, "unknown input '%U'", - format_unformat_error, input); - } - - mm->is_ce = is_ce; - - return 0; -} - -VLIB_CONFIG_FUNCTION (map_config, "map"); - -/* - * map_init - */ -clib_error_t * -map_init (vlib_main_t * vm) -{ - map_main_t *mm = &map_main; - mm->vnet_main = vnet_get_main (); - mm->vlib_main = vm; - -#ifdef MAP_SKIP_IP6_LOOKUP - fib_protocol_t proto; - - FOR_EACH_FIB_PROTOCOL (proto) - { - map_pre_resolve_init (&pre_resolved[proto]); - } -#endif - - /* traffic class */ - mm->tc = 0; - mm->tc_copy = true; - - /* Inbound security check */ - mm->sec_check = true; - mm->sec_check_frag = false; - - /* ICMP6 Type 1, Code 5 for security check failure */ - mm->icmp6_enabled = false; - - mm->is_ce = false; - - /* Inner or outer fragmentation */ - mm->frag_inner = false; - mm->frag_ignore_df = false; - - vec_validate (mm->domain_counters, MAP_N_DOMAIN_COUNTER - 1); - mm->domain_counters[MAP_DOMAIN_COUNTER_RX].name = "rx"; - mm->domain_counters[MAP_DOMAIN_COUNTER_TX].name = "tx"; - - vlib_validate_simple_counter (&mm->icmp_relayed, 0); - vlib_zero_simple_counter (&mm->icmp_relayed, 0); - - /* IP4 virtual reassembly */ - mm->ip4_reass_hash_table = 0; - mm->ip4_reass_pool = 0; - mm->ip4_reass_lock = - clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); - *mm->ip4_reass_lock = 0; - mm->ip4_reass_conf_ht_ratio = MAP_IP4_REASS_HT_RATIO_DEFAULT; - mm->ip4_reass_conf_lifetime_ms = MAP_IP4_REASS_LIFETIME_DEFAULT; - mm->ip4_reass_conf_pool_size = MAP_IP4_REASS_POOL_SIZE_DEFAULT; - mm->ip4_reass_conf_buffers = MAP_IP4_REASS_BUFFERS_DEFAULT; - mm->ip4_reass_ht_log2len = - map_get_ht_log2len (mm->ip4_reass_conf_ht_ratio, - mm->ip4_reass_conf_pool_size); - mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; - map_ip4_reass_reinit (NULL, NULL); - - /* IP6 virtual reassembly */ - mm->ip6_reass_hash_table = 0; - mm->ip6_reass_pool = 0; - mm->ip6_reass_lock = - clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); - *mm->ip6_reass_lock = 0; - mm->ip6_reass_conf_ht_ratio = MAP_IP6_REASS_HT_RATIO_DEFAULT; - mm->ip6_reass_conf_lifetime_ms = MAP_IP6_REASS_LIFETIME_DEFAULT; - mm->ip6_reass_conf_pool_size = MAP_IP6_REASS_POOL_SIZE_DEFAULT; - mm->ip6_reass_conf_buffers = MAP_IP6_REASS_BUFFERS_DEFAULT; - mm->ip6_reass_ht_log2len = - map_get_ht_log2len (mm->ip6_reass_conf_ht_ratio, - mm->ip6_reass_conf_pool_size); - mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; - map_ip6_reass_reinit (NULL, NULL); - -#ifdef MAP_SKIP_IP6_LOOKUP - fib_node_register_type (FIB_NODE_TYPE_MAP_E, &map_vft); -#endif - map_dpo_module_init (); - - return 0; -} - -VLIB_INIT_FUNCTION (map_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/map/map.h b/src/vnet/map/map.h deleted file mode 100644 index c304a1ea38a..00000000000 --- a/src/vnet/map/map.h +++ /dev/null @@ -1,592 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAP_SKIP_IP6_LOOKUP 1 - -int map_create_domain (ip4_address_t * ip4_prefix, u8 ip4_prefix_len, - ip6_address_t * ip6_prefix, u8 ip6_prefix_len, - ip6_address_t * ip6_src, u8 ip6_src_len, - u8 ea_bits_len, u8 psid_offset, u8 psid_length, - u32 * map_domain_index, u16 mtu, u8 flags); -int map_delete_domain (u32 map_domain_index); -int map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, - u8 is_add); -u8 *format_map_trace (u8 * s, va_list * args); - -typedef enum -{ - MAP_DOMAIN_PREFIX = 1 << 0, - MAP_DOMAIN_TRANSLATION = 1 << 1, // The domain uses MAP-T - MAP_DOMAIN_RFC6052 = 1 << 2, -} __attribute__ ((__packed__)) map_domain_flags_e; - -/** - * IP4 reassembly logic: - * One virtually reassembled flow requires a map_ip4_reass_t structure in order - * to keep the first-fragment port number and, optionally, cache out of sequence - * packets. - * There are up to MAP_IP4_REASS_MAX_REASSEMBLY such structures. - * When in use, those structures are stored in a hash table of MAP_IP4_REASS_BUCKETS buckets. - * When a new structure needs to be used, it is allocated from available ones. - * If there is no structure available, the oldest in use is selected and used if and - * only if it was first allocated more than MAP_IP4_REASS_LIFETIME seconds ago. - * In case no structure can be allocated, the fragment is dropped. - */ - -#define MAP_IP4_REASS_LIFETIME_DEFAULT (100) /* ms */ -#define MAP_IP4_REASS_HT_RATIO_DEFAULT (1.0) -#define MAP_IP4_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures -#define MAP_IP4_REASS_BUFFERS_DEFAULT 2048 - -#define MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 // Number of fragment per reassembly - -#define MAP_IP6_REASS_LIFETIME_DEFAULT (100) /* ms */ -#define MAP_IP6_REASS_HT_RATIO_DEFAULT (1.0) -#define MAP_IP6_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures -#define MAP_IP6_REASS_BUFFERS_DEFAULT 2048 - -#define MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 - -#define MAP_IP6_REASS_COUNT_BYTES -#define MAP_IP4_REASS_COUNT_BYTES - -//#define IP6_MAP_T_OVERRIDE_TOS 0 - -/* - * This structure _MUST_ be no larger than a single cache line (64 bytes). - * If more space is needed make a union of ip6_prefix and *rules, those are mutually exclusive. - */ -typedef struct -{ - /* Required for pool_get_aligned */ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - ip6_address_t ip6_src; - ip6_address_t ip6_prefix; - ip6_address_t *rules; - u32 suffix_mask; - ip4_address_t ip4_prefix; - u16 psid_mask; - u16 mtu; - map_domain_flags_e flags; - u8 ip6_prefix_len; - u8 ip6_src_len; - u8 ea_bits_len; - u8 psid_offset; - u8 psid_length; - - /* helpers */ - u8 psid_shift; - u8 suffix_shift; - u8 ea_shift; - - /* not used by forwarding */ - u8 ip4_prefix_len; -} map_domain_t; - -STATIC_ASSERT ((sizeof (map_domain_t) <= CLIB_CACHE_LINE_BYTES), - "MAP domain fits in one cacheline"); - -#define MAP_REASS_INDEX_NONE ((u16)0xffff) - -/* - * Hash key, padded out to 16 bytes for fast compare - */ -/* *INDENT-OFF* */ -typedef union { - CLIB_PACKED (struct { - ip4_address_t src; - ip4_address_t dst; - u16 fragment_id; - u8 protocol; - }); - u64 as_u64[2]; - u32 as_u32[4]; -} map_ip4_reass_key_t; -/* *INDENT-ON* */ - -typedef struct -{ - map_ip4_reass_key_t key; - f64 ts; -#ifdef MAP_IP4_REASS_COUNT_BYTES - u16 expected_total; - u16 forwarded; -#endif - i32 port; - u16 bucket; - u16 bucket_next; - u16 fifo_prev; - u16 fifo_next; - u32 fragments[MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; -} map_ip4_reass_t; - -/* - * MAP domain counters - */ -typedef enum -{ - /* Simple counters */ - MAP_DOMAIN_IPV4_FRAGMENT = 0, - /* Combined counters */ - MAP_DOMAIN_COUNTER_RX = 0, - MAP_DOMAIN_COUNTER_TX, - MAP_N_DOMAIN_COUNTER -} map_domain_counter_t; - -/* - * main_main_t - */ -/* *INDENT-OFF* */ -typedef union { - CLIB_PACKED (struct { - ip6_address_t src; - ip6_address_t dst; - u32 fragment_id; - u8 protocol; - }); - u64 as_u64[5]; - u32 as_u32[10]; -} map_ip6_reass_key_t; -/* *INDENT-OFF* */ - -typedef struct { - u32 pi; //Cached packet or ~0 - u16 next_data_offset; //The data offset of the additional 20 bytes or ~0 - u8 next_data_len; //Number of bytes ready to be copied (20 if not last fragment) - u8 next_data[20]; //The 20 additional bytes -} map_ip6_fragment_t; - -typedef struct { - map_ip6_reass_key_t key; - f64 ts; -#ifdef MAP_IP6_REASS_COUNT_BYTES - u16 expected_total; - u16 forwarded; -#endif - u16 bucket; //What hash bucket this element is linked in - u16 bucket_next; - u16 fifo_prev; - u16 fifo_next; - ip4_header_t ip4_header; - map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; -} map_ip6_reass_t; - -#ifdef MAP_SKIP_IP6_LOOKUP -/** - * A pre-resolved next-hop - */ -typedef struct map_main_pre_resolved_t_ -{ - /** - * Linkage into the FIB graph - */ - fib_node_t node; - - /** - * The FIB entry index of the next-hop - */ - fib_node_index_t fei; - - /** - * This object sibling index on the FIB entry's child dependency list - */ - u32 sibling; - - /** - * The Load-balance object index to use to forward - */ - dpo_id_t dpo; -} map_main_pre_resolved_t; - -/** - * Pre-resolved next hops for v4 and v6. Why these are global and not - * per-domain is beyond me. - */ -extern map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX]; -#endif - -typedef struct { - /* pool of MAP domains */ - map_domain_t *domains; - - /* MAP Domain packet/byte counters indexed by map domain index */ - vlib_simple_counter_main_t *simple_domain_counters; - vlib_combined_counter_main_t *domain_counters; - volatile u32 *counter_lock; - - /* Traffic class: zero, copy (~0) or fixed value */ - u8 tc; - bool tc_copy; - - bool sec_check; /* Inbound security check */ - bool sec_check_frag; /* Inbound security check for (subsequent) fragments */ - bool icmp6_enabled; /* Send destination unreachable for security check failure */ - - bool is_ce; /* If this MAP node is a Customer Edge router*/ - - /* ICMPv6 -> ICMPv4 relay parameters */ - ip4_address_t icmp4_src_address; - vlib_simple_counter_main_t icmp_relayed; - - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; - - /* - * IPv4 encap and decap reassembly - */ - /* Configuration */ - f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) - u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures - u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms - u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly - - /* Runtime */ - map_ip4_reass_t *ip4_reass_pool; - u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len - u16 ip4_reass_allocated; - u16 *ip4_reass_hash_table; - u16 ip4_reass_fifo_last; - volatile u32 *ip4_reass_lock; - - /* Counters */ - u32 ip4_reass_buffered_counter; - - bool frag_inner; /* Inner or outer fragmentation */ - bool frag_ignore_df; /* Fragment (outer) packet even if DF is set */ - - /* - * IPv6 decap reassembly - */ - /* Configuration */ - f32 ip6_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) - u16 ip6_reass_conf_pool_size; //Max number of allocated reass structures - u16 ip6_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms - u32 ip6_reass_conf_buffers; //Maximum number of buffers used by ip6 reassembly - - /* Runtime */ - map_ip6_reass_t *ip6_reass_pool; - u8 ip6_reass_ht_log2len; //Hash table size is 2^log2len - u16 ip6_reass_allocated; - u16 *ip6_reass_hash_table; - u16 ip6_reass_fifo_last; - volatile u32 *ip6_reass_lock; - - /* Counters */ - u32 ip6_reass_buffered_counter; - -} map_main_t; - -/* - * MAP Error counters/messages - */ -#define foreach_map_error \ - /* Must be first. */ \ - _(NONE, "valid MAP packets") \ - _(BAD_PROTOCOL, "bad protocol") \ - _(SEC_CHECK, "security check failed") \ - _(ENCAP_SEC_CHECK, "encap security check failed") \ - _(DECAP_SEC_CHECK, "decap security check failed") \ - _(ICMP, "unable to translate ICMP") \ - _(ICMP_RELAY, "unable to relay ICMP") \ - _(UNKNOWN, "unknown") \ - _(NO_BINDING, "no binding") \ - _(NO_DOMAIN, "no domain") \ - _(FRAGMENTED, "packet is a fragment") \ - _(FRAGMENT_MEMORY, "could not cache fragment") \ - _(FRAGMENT_MALFORMED, "fragment has unexpected format")\ - _(FRAGMENT_DROPPED, "dropped cached fragment") \ - _(MALFORMED, "malformed packet") \ - _(DF_SET, "can't fragment, DF set") - -typedef enum { -#define _(sym,str) MAP_ERROR_##sym, - foreach_map_error -#undef _ - MAP_N_ERROR, - } map_error_t; - -u64 map_error_counter_get(u32 node_index, map_error_t map_error); - -typedef struct { - u32 map_domain_index; - u16 port; -} map_trace_t; - -extern map_main_t map_main; - -extern vlib_node_registration_t ip4_map_node; -extern vlib_node_registration_t ip6_map_node; - -extern vlib_node_registration_t ip4_map_t_node; -extern vlib_node_registration_t ip4_map_t_fragmented_node; -extern vlib_node_registration_t ip4_map_t_tcp_udp_node; -extern vlib_node_registration_t ip4_map_t_icmp_node; - -extern vlib_node_registration_t ip6_map_t_node; -extern vlib_node_registration_t ip6_map_t_fragmented_node; -extern vlib_node_registration_t ip6_map_t_tcp_udp_node; -extern vlib_node_registration_t ip6_map_t_icmp_node; - -/* - * map_get_pfx - */ -static_always_inline u64 -map_get_pfx (map_domain_t *d, u32 addr, u16 port) -{ - u16 psid = (port >> d->psid_shift) & d->psid_mask; - - if (d->ea_bits_len == 0 && d->rules) - return clib_net_to_host_u64(d->rules[psid].as_u64[0]); - - u32 suffix = (addr >> d->suffix_shift) & d->suffix_mask; - u64 ea = d->ea_bits_len == 0 ? 0 : (((u64) suffix << d->psid_length)) | psid; - - return clib_net_to_host_u64(d->ip6_prefix.as_u64[0]) | ea << d->ea_shift; -} - -static_always_inline u64 -map_get_pfx_net (map_domain_t *d, u32 addr, u16 port) -{ - return clib_host_to_net_u64(map_get_pfx(d, clib_net_to_host_u32(addr), - clib_net_to_host_u16(port))); -} - -/* - * map_get_sfx - */ -static_always_inline u64 -map_get_sfx (map_domain_t *d, u32 addr, u16 port) -{ - u16 psid = (port >> d->psid_shift) & d->psid_mask; - - /* Shared 1:1 mode. */ - if (d->ea_bits_len == 0 && d->rules) - return clib_net_to_host_u64(d->rules[psid].as_u64[1]); - if (d->ip6_prefix_len == 128) - return clib_net_to_host_u64(d->ip6_prefix.as_u64[1]); - - if (d->flags & MAP_DOMAIN_RFC6052) - return (clib_net_to_host_u64(d->ip6_prefix.as_u64[1]) | addr); - - /* IPv4 prefix */ - if (d->flags & MAP_DOMAIN_PREFIX) - return (u64) (addr & (0xFFFFFFFF << d->suffix_shift)) << 16; - - /* Shared or full IPv4 address */ - return ((u64) addr << 16) | psid; -} - -static_always_inline u64 -map_get_sfx_net (map_domain_t *d, u32 addr, u16 port) -{ - return clib_host_to_net_u64(map_get_sfx(d, clib_net_to_host_u32(addr), - clib_net_to_host_u16(port))); -} - -static_always_inline u32 -map_get_ip4 (ip6_address_t *addr, map_domain_flags_e flags) -{ - if (flags & MAP_DOMAIN_RFC6052) - return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1])); - else - return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1]) >> 16); -} - -/* - * Get the MAP domain from an IPv4 lookup adjacency. - */ -static_always_inline map_domain_t * -ip4_map_get_domain (u32 mdi) -{ - map_main_t *mm = &map_main; - - return pool_elt_at_index(mm->domains, mdi); -} - -/* - * Get the MAP domain from an IPv6 lookup adjacency. - * If the IPv6 address or prefix is not shared, no lookup is required. - * The IPv4 address is used otherwise. - */ -static_always_inline map_domain_t * -ip6_map_get_domain (u32 mdi, - ip4_address_t *addr, - u32 *map_domain_index, - u8 *error) -{ - map_main_t *mm = &map_main; - -#ifdef TODO - /* - * Disable direct MAP domain lookup on decap, until the security check is updated to verify IPv4 SA. - * (That's done implicitly when MAP domain is looked up in the IPv4 FIB) - */ - //#ifdef MAP_NONSHARED_DOMAIN_ENABLED - //#error "How can you be sure this domain is not shared?" -#endif - - *map_domain_index = mdi; - return pool_elt_at_index(mm->domains, mdi); - -#ifdef TODO - u32 lbi = ip4_fib_forwarding_lookup(0, addr); - const dpo_id_t *dpo = load_balance_get_bucket(lbi, 0); - if (PREDICT_TRUE(dpo->dpoi_type == map_dpo_type || - dpo->dpoi_type == map_t_dpo_type)) - { - *map_domain_index = dpo->dpoi_index; - return pool_elt_at_index(mm->domains, *map_domain_index); - } - *error = MAP_ERROR_NO_DOMAIN; - return NULL; -#endif -} - -map_ip4_reass_t * -map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id, - u8 protocol, u32 **pi_to_drop); -void -map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop); - -#define map_ip4_reass_lock() while (__sync_lock_test_and_set(map_main.ip4_reass_lock, 1)) {} -#define map_ip4_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip4_reass_lock = 0;} while(0) - -static_always_inline void -map_ip4_reass_get_fragments(map_ip4_reass_t *r, u32 **pi) -{ - int i; - for (i=0; ifragments[i] != ~0) { - vec_add1(*pi, r->fragments[i]); - r->fragments[i] = ~0; - map_main.ip4_reass_buffered_counter--; - } -} - -int map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi); - -map_ip6_reass_t * -map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id, - u8 protocol, u32 **pi_to_drop); -void -map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop); - -#define map_ip6_reass_lock() while (__sync_lock_test_and_set(map_main.ip6_reass_lock, 1)) {} -#define map_ip6_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip6_reass_lock = 0;} while(0) - -int -map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi, - u16 data_offset, u16 next_data_offset, - u8 *data_start, u16 data_len); - -void map_ip4_drop_pi(u32 pi); - -int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); -#define MAP_IP4_REASS_CONF_HT_RATIO_MAX 100 -int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); -#define MAP_IP4_REASS_CONF_POOL_SIZE_MAX (0xfeff) -int map_ip4_reass_conf_lifetime(u16 lifetime_ms); -#define MAP_IP4_REASS_CONF_LIFETIME_MAX 0xffff -int map_ip4_reass_conf_buffers(u32 buffers); -#define MAP_IP4_REASS_CONF_BUFFERS_MAX (0xffffffff) - -void map_ip6_drop_pi(u32 pi); - - -int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); -#define MAP_IP6_REASS_CONF_HT_RATIO_MAX 100 -int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); -#define MAP_IP6_REASS_CONF_POOL_SIZE_MAX (0xfeff) -int map_ip6_reass_conf_lifetime(u16 lifetime_ms); -#define MAP_IP6_REASS_CONF_LIFETIME_MAX 0xffff -int map_ip6_reass_conf_buffers(u32 buffers); -#define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff) - -static_always_inline void -ip4_map_t_embedded_address (map_domain_t *d, - ip6_address_t *ip6, const ip4_address_t *ip4) -{ - ASSERT(d->ip6_src_len == 96); //No support for other lengths for now - ip6->as_u64[0] = d->ip6_src.as_u64[0]; - ip6->as_u32[2] = d->ip6_src.as_u32[2]; - ip6->as_u32[3] = ip4->as_u32; -} - -static_always_inline u32 -ip6_map_t_embedded_address (map_domain_t *d, ip6_address_t *addr) -{ - ASSERT(d->ip6_src_len == 96); //No support for other lengths for now - return addr->as_u32[3]; -} - -static inline void -map_domain_counter_lock (map_main_t *mm) -{ - if (mm->counter_lock) - while (__sync_lock_test_and_set(mm->counter_lock, 1)) - /* zzzz */ ; -} -static inline void -map_domain_counter_unlock (map_main_t *mm) -{ - if (mm->counter_lock) - *mm->counter_lock = 0; -} - - -static_always_inline void -map_send_all_to_node(vlib_main_t *vm, u32 *pi_vector, - vlib_node_runtime_t *node, vlib_error_t *error, - u32 next) -{ - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - //Deal with fragments that are ready - from = pi_vector; - n_left_from = vec_len(pi_vector); - next_index = node->cached_next_index; - while (n_left_from > 0) { - vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); - while (n_left_from > 0 && n_left_to_next > 0) { - u32 pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - vlib_buffer_t *p0 = vlib_get_buffer(vm, pi0); - p0->error = *error; - vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next); - } - vlib_put_next_frame(vm, node, next_index, n_left_to_next); - } -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/map/map_api.c b/src/vnet/map/map_api.c deleted file mode 100644 index e06dd594d59..00000000000 --- a/src/vnet/map/map_api.c +++ /dev/null @@ -1,305 +0,0 @@ -/* - *------------------------------------------------------------------ - * map_api.c - vnet map api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include - -#include "map.h" -#include -#include -#include -#include - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -#define vl_endianfun /* define message structures */ -#include -#undef vl_endianfun - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -#include - -#define foreach_vpe_api_msg \ -_(MAP_ADD_DOMAIN, map_add_domain) \ -_(MAP_DEL_DOMAIN, map_del_domain) \ -_(MAP_ADD_DEL_RULE, map_add_del_rule) \ -_(MAP_DOMAIN_DUMP, map_domain_dump) \ -_(MAP_RULE_DUMP, map_rule_dump) \ -_(MAP_SUMMARY_STATS, map_summary_stats) - -static void -vl_api_map_add_domain_t_handler (vl_api_map_add_domain_t * mp) -{ - vl_api_map_add_domain_reply_t *rmp; - int rv = 0; - u32 index; - u8 flags = 0; - - if (mp->is_translation) - flags |= MAP_DOMAIN_TRANSLATION; - - if (mp->is_rfc6052) - flags |= MAP_DOMAIN_RFC6052; - - rv = - map_create_domain ((ip4_address_t *) & mp->ip4_prefix, mp->ip4_prefix_len, - (ip6_address_t *) & mp->ip6_prefix, mp->ip6_prefix_len, - (ip6_address_t *) & mp->ip6_src, - mp->ip6_src_prefix_len, mp->ea_bits_len, - mp->psid_offset, mp->psid_length, &index, - ntohs (mp->mtu), flags); - - /* *INDENT-OFF* */ - REPLY_MACRO2(VL_API_MAP_ADD_DOMAIN_REPLY, - ({ - rmp->index = ntohl(index); - })); - /* *INDENT-ON* */ -} - -static void -vl_api_map_del_domain_t_handler (vl_api_map_del_domain_t * mp) -{ - vl_api_map_del_domain_reply_t *rmp; - int rv = 0; - - rv = map_delete_domain (ntohl (mp->index)); - - REPLY_MACRO (VL_API_MAP_DEL_DOMAIN_REPLY); -} - -static void -vl_api_map_add_del_rule_t_handler (vl_api_map_add_del_rule_t * mp) -{ - vl_api_map_del_domain_reply_t *rmp; - int rv = 0; - - rv = - map_add_del_psid (ntohl (mp->index), ntohs (mp->psid), - (ip6_address_t *) mp->ip6_dst, mp->is_add); - - REPLY_MACRO (VL_API_MAP_ADD_DEL_RULE_REPLY); -} - -static void -vl_api_map_domain_dump_t_handler (vl_api_map_domain_dump_t * mp) -{ - vl_api_map_domain_details_t *rmp; - map_main_t *mm = &map_main; - map_domain_t *d; - vl_api_registration_t *reg; - - if (pool_elts (mm->domains) == 0) - return; - - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - /* *INDENT-OFF* */ - pool_foreach(d, mm->domains, - ({ - /* Make sure every field is initiated (or don't skip the memset()) */ - rmp = vl_msg_api_alloc (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs(VL_API_MAP_DOMAIN_DETAILS); - rmp->domain_index = htonl(d - mm->domains); - rmp->ea_bits_len = d->ea_bits_len; - rmp->psid_offset = d->psid_offset; - rmp->psid_length = d->psid_length; - clib_memcpy(rmp->ip4_prefix, &d->ip4_prefix, sizeof(rmp->ip4_prefix)); - rmp->ip4_prefix_len = d->ip4_prefix_len; - clib_memcpy(rmp->ip6_prefix, &d->ip6_prefix, sizeof(rmp->ip6_prefix)); - rmp->ip6_prefix_len = d->ip6_prefix_len; - clib_memcpy(rmp->ip6_src, &d->ip6_src, sizeof(rmp->ip6_src)); - rmp->ip6_src_len = d->ip6_src_len; - rmp->mtu = htons(d->mtu); - rmp->is_translation = (d->flags & MAP_DOMAIN_TRANSLATION); - rmp->context = mp->context; - - vl_api_send_msg (reg, (u8 *)&rmp); - })); - /* *INDENT-ON* */ -} - -static void -vl_api_map_rule_dump_t_handler (vl_api_map_rule_dump_t * mp) -{ - vl_api_registration_t *reg; - u16 i; - ip6_address_t dst; - vl_api_map_rule_details_t *rmp; - map_main_t *mm = &map_main; - u32 domain_index = ntohl (mp->domain_index); - map_domain_t *d; - - if (pool_elts (mm->domains) == 0) - return; - - d = pool_elt_at_index (mm->domains, domain_index); - if (!d || !d->rules) - { - return; - } - - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - for (i = 0; i < (0x1 << d->psid_length); i++) - { - dst = d->rules[i]; - if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0) - { - continue; - } - rmp = vl_msg_api_alloc (sizeof (*rmp)); - memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_MAP_RULE_DETAILS); - rmp->psid = htons (i); - clib_memcpy (rmp->ip6_dst, &dst, sizeof (rmp->ip6_dst)); - rmp->context = mp->context; - vl_api_send_msg (reg, (u8 *) rmp); - } -} - -static void -vl_api_map_summary_stats_t_handler (vl_api_map_summary_stats_t * mp) -{ - vl_api_map_summary_stats_reply_t *rmp; - vlib_combined_counter_main_t *cm; - vlib_counter_t v; - int i, which; - u64 total_pkts[VLIB_N_RX_TX]; - u64 total_bytes[VLIB_N_RX_TX]; - map_main_t *mm = &map_main; - vl_api_registration_t *reg; - - reg = vl_api_client_index_to_registration (mp->client_index); - if (!reg) - return; - - rmp = vl_msg_api_alloc (sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_MAP_SUMMARY_STATS_REPLY); - rmp->context = mp->context; - rmp->retval = 0; - - if (pool_elts (mm->domains) == 0) - { - rmp->retval = -1; - goto out; - } - - memset (total_pkts, 0, sizeof (total_pkts)); - memset (total_bytes, 0, sizeof (total_bytes)); - - map_domain_counter_lock (mm); - vec_foreach (cm, mm->domain_counters) - { - which = cm - mm->domain_counters; - - for (i = 0; i < vlib_combined_counter_n_counters (cm); i++) - { - vlib_get_combined_counter (cm, i, &v); - total_pkts[which] += v.packets; - total_bytes[which] += v.bytes; - } - } - - map_domain_counter_unlock (mm); - - /* Note: in network byte order! */ - rmp->total_pkts[MAP_DOMAIN_COUNTER_RX] = - clib_host_to_net_u64 (total_pkts[MAP_DOMAIN_COUNTER_RX]); - rmp->total_bytes[MAP_DOMAIN_COUNTER_RX] = - clib_host_to_net_u64 (total_bytes[MAP_DOMAIN_COUNTER_RX]); - rmp->total_pkts[MAP_DOMAIN_COUNTER_TX] = - clib_host_to_net_u64 (total_pkts[MAP_DOMAIN_COUNTER_TX]); - rmp->total_bytes[MAP_DOMAIN_COUNTER_TX] = - clib_host_to_net_u64 (total_bytes[MAP_DOMAIN_COUNTER_TX]); - rmp->total_bindings = clib_host_to_net_u64 (pool_elts (mm->domains)); - rmp->total_ip4_fragments = 0; // Not yet implemented. Should be a simple counter. - rmp->total_security_check[MAP_DOMAIN_COUNTER_TX] = - clib_host_to_net_u64 (map_error_counter_get - (ip4_map_node.index, MAP_ERROR_ENCAP_SEC_CHECK)); - rmp->total_security_check[MAP_DOMAIN_COUNTER_RX] = - clib_host_to_net_u64 (map_error_counter_get - (ip4_map_node.index, MAP_ERROR_DECAP_SEC_CHECK)); - -out: - vl_api_send_msg (reg, (u8 *) rmp); -} - -/* - * vpe_api_hookup - * Add vpe's API message handlers to the table. - * vlib has alread mapped shared memory and - * added the client registration handlers. - * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() - */ -#define vl_msg_name_crc_list -#include -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (api_main_t * am) -{ -#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); - foreach_vl_msg_name_crc_map; -#undef _ -} - -static clib_error_t * -map_api_hookup (vlib_main_t * vm) -{ - api_main_t *am = &api_main; - -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_api_msg; -#undef _ - - /* - * Set up the (msg_name, crc, message-id) table - */ - setup_message_id_table (am); - - return 0; -} - -VLIB_API_INIT_FUNCTION (map_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/map/map_doc.md b/src/vnet/map/map_doc.md deleted file mode 100644 index 17f3c51174b..00000000000 --- a/src/vnet/map/map_doc.md +++ /dev/null @@ -1,69 +0,0 @@ -# VPP MAP and Lw4o6 implementation {#map_doc} - -This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations. -Everything that is not directly obvious should come here. - - - -## MAP-E Virtual Reassembly - -The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments. - -Fragment caching and handling is not always necessary. It is performed when: -* An IPv4 fragment is received and the destination IPv4 address is shared. -* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on. -* An IPv6 fragment is received. - -There are 3 dedicated nodes: -* ip4-map-reass -* ip6-map-ip4-reass -* ip6-map-ip6-reass - -ip4-map sends all fragments to ip4-map-reass. -ip6-map sends all inner-fragments to ip6-map-ip4-reass. -ip6-map sends all outer-fragments to ip6-map-ip6-reass. - -IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes. - -An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received. - -#### Virtual Reassembly configuration - -IPv4 and IPv6 virtual reassembly support the following configuration: - map params reassembly [ip4 | ip6] [lifetime ] [pool-size ] [buffers ] [ht-ratio ] - -lifetime: - The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 people. Those values are not realistic for high-throughput cases. - -buffers: - The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool. - -pool-size: - The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total. - -ht-ratio: - The amount of buckets in the hash-table is pool-size * ht-ratio. - - -Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost. - - -##### Additional considerations - -Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart. - -Let: -R be the packet rate at which fragments are received. -F be the number of fragments per packet. - -Assuming the first fragment is always received last. We should have: -buffers > lifetime * R / F * (F - 1) -pool-size > lifetime * R/F - -This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'. - -But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments. - -If you want to do that, be prepared to configure a lot of fragments. - - diff --git a/src/vnet/map/map_dpo.c b/src/vnet/map/map_dpo.c deleted file mode 100644 index 430c1fbf266..00000000000 --- a/src/vnet/map/map_dpo.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -/** - * The register MAP DPO type - */ -dpo_type_t map_dpo_type; -dpo_type_t map_t_dpo_type; - -void -map_dpo_create (dpo_proto_t dproto, - u32 domain_index, - dpo_id_t *dpo) -{ - dpo_set(dpo, - map_dpo_type, - dproto, - domain_index); -} - -void -map_t_dpo_create (dpo_proto_t dproto, - u32 domain_index, - dpo_id_t *dpo) -{ - dpo_set(dpo, - map_t_dpo_type, - dproto, - domain_index); -} - - -u8* -format_map_dpo (u8 *s, va_list *args) -{ - index_t index = va_arg (*args, index_t); - CLIB_UNUSED(u32 indent) = va_arg (*args, u32); - - return (format(s, "map: domain:%d", index)); -} - -u8* -format_map_t_dpo (u8 *s, va_list *args) -{ - index_t index = va_arg (*args, index_t); - CLIB_UNUSED(u32 indent) = va_arg (*args, u32); - - return (format(s, "map-t: domain:%d", index)); -} - - -static void -map_dpo_lock (dpo_id_t *dpo) -{ -} - -static void -map_dpo_unlock (dpo_id_t *dpo) -{ -} - -const static dpo_vft_t md_vft = { - .dv_lock = map_dpo_lock, - .dv_unlock = map_dpo_unlock, - .dv_format = format_map_dpo, -}; - -const static char* const map_ip4_nodes[] = -{ - "ip4-map", - NULL, -}; -const static char* const map_ip6_nodes[] = -{ - "ip6-map", - NULL, -}; - -const static char* const * const map_nodes[DPO_PROTO_NUM] = -{ - [DPO_PROTO_IP4] = map_ip4_nodes, - [DPO_PROTO_IP6] = map_ip6_nodes, - [DPO_PROTO_MPLS] = NULL, -}; - -const static dpo_vft_t md_t_vft = { - .dv_lock = map_dpo_lock, - .dv_unlock = map_dpo_unlock, - .dv_format = format_map_t_dpo, -}; - -const static char* const map_t_ip4_nodes[] = -{ - "ip4-map-t", - NULL, -}; -const static char* const map_t_ip6_nodes[] = -{ - "ip6-map-t", - NULL, -}; - -const static char* const * const map_t_nodes[DPO_PROTO_NUM] = -{ - [DPO_PROTO_IP4] = map_t_ip4_nodes, - [DPO_PROTO_IP6] = map_t_ip6_nodes, - [DPO_PROTO_MPLS] = NULL, -}; - -void -map_dpo_module_init (void) -{ - map_dpo_type = dpo_register_new_type(&md_vft, map_nodes); - map_t_dpo_type = dpo_register_new_type(&md_t_vft, map_t_nodes); -} diff --git a/src/vnet/map/map_dpo.h b/src/vnet/map/map_dpo.h deleted file mode 100644 index 63bf4787383..00000000000 --- a/src/vnet/map/map_dpo.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MAP_DPO_H__ -#define __MAP_DPO_H__ - -#include -#include - -/** - * A representation of a MAP DPO - */ - -extern void map_dpo_create (dpo_proto_t dproto, - u32 domain_index, - dpo_id_t *dpo); -extern void map_t_dpo_create (dpo_proto_t dproto, - u32 domain_index, - dpo_id_t *dpo); - -extern u8* format_map_dpo(u8 *s, va_list *args); - -/* - * Encapsulation violation for fast data-path access - */ -extern dpo_type_t map_dpo_type; -extern dpo_type_t map_t_dpo_type; - -extern void map_dpo_module_init(void); - -#endif diff --git a/src/vnet/map/test.c b/src/vnet/map/test.c deleted file mode 100644 index ff10fc929a6..00000000000 --- a/src/vnet/map/test.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * test.c : MAP unit tests - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include "map.h" - -static map_domain_t * -get_domain(ip4_address_t * ip4_prefix, u8 ip4_prefix_len, - ip6_address_t * ip6_prefix, u8 ip6_prefix_len, - ip6_address_t * ip6_src, u8 ip6_src_len, - u8 ea_bits_len, u8 psid_offset, - u8 psid_length, u16 mtu, u8 flags) -{ - map_domain_t * d = malloc(sizeof(*d)); - u8 suffix_len; - - /* EA bits must be within the first 64 bits */ - if (ea_bits_len > 0 && (ip6_prefix_len + ea_bits_len) > 64) - return NULL; - - /* Init domain struct */ - d->ip4_prefix.as_u32 = ip4_prefix->as_u32; - d->ip4_prefix_len = ip4_prefix_len; - d->ip6_prefix = *ip6_prefix; - d->ip6_prefix_len = ip6_prefix_len; - d->ip6_src = *ip6_src; - d->ip6_src_len = ip6_src_len; - d->ea_bits_len = ea_bits_len; - d->psid_offset = psid_offset; - d->psid_length = psid_length; - d->mtu = mtu; - d->flags = flags; - - /* How many, and which bits to grab from the IPv4 DA */ - if (ip4_prefix_len + ea_bits_len < 32) - { - if (!(flags & MAP_DOMAIN_TRANSLATION)) - d->flags |= MAP_DOMAIN_PREFIX; - d->suffix_shift = 32 - ip4_prefix_len - ea_bits_len; - suffix_len = ea_bits_len; - } - else - { - d->suffix_shift = 0; - suffix_len = 32 - ip4_prefix_len; - } - d->suffix_mask = (1 << suffix_len) - 1; - - d->psid_shift = 16 - psid_length - psid_offset; - d->psid_mask = (1 << d->psid_length) - 1; - - if (ip6_prefix_len + suffix_len + d->psid_length > 64) - return NULL; - - d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length; - - return d; -} - - -/* - * VPP-340: - * map_add_domain ip4-pfx 20.0.0.0/8 ip6-pfx 2001:db8::/40 ip6-src 2001:db8:ffff::/96 ea-bits-len 24 psid-offset 0 psid-len 0 map-t - * IPv4 src = 100.0.0.1 - * IPv4 dst = 20.169.201.219 - * UDP dest port = 1232 - * IPv6 src = 2001:db8:ffff::6400:1 - * IPv6 dst = a9c9:dfb8::14a9:c9db:0 - * a9c9:dfb8::14a9:c9db:0 != 2001:db8:a9:c9db:0:14a9:c9db:0 - */ -static void -test_map_t_destaddr (void) -{ - ip4_address_t ip4_prefix; - ip6_address_t ip6_prefix; - ip6_address_t ip6_src; - - ip4_prefix.as_u32 = clib_host_to_net_u32(0x14000000); - ip6_prefix.as_u64[0] = clib_host_to_net_u64(0x20010db800000000); - ip6_prefix.as_u64[1] = 0; - ip6_src.as_u64[0] = clib_host_to_net_u64(0x20010db8ffff0000); - map_domain_t * d = get_domain (&ip4_prefix, 8, &ip6_prefix, 40, &ip6_src, 96, 24, 0, 0, 0, MAP_DOMAIN_TRANSLATION); - - ip6_address_t dst6; - - dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); - dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); - assert(dst6.as_u64[0] == 0x20010db800a9c9db); - assert(dst6.as_u64[1] == 0x000014a9c9db0000); -} - -/* - * VPP-228 - * ip4-pfx 20.0.0.0/8 - * ip6-pfx 2001:db8::/ - * ip6-src 2001:db8:ffff::1 - * ea-bits-len 16 psid-offset 6 psid-len 8 - * 20.169.201.219 port 1232 - */ -static void -test_map_eabits (void) -{ - ip4_address_t ip4_prefix; - ip6_address_t ip6_prefix; - ip6_address_t ip6_src; - ip6_address_t dst6; - - ip4_prefix.as_u32 = clib_host_to_net_u32(0x14000000); - ip6_prefix.as_u64[0] = clib_host_to_net_u64(0x20010db800000000); - ip6_prefix.as_u64[1] = 0; - ip6_src.as_u64[0] = clib_host_to_net_u64(0x20010db8ffff0000); - ip6_src.as_u64[1] = clib_host_to_net_u64(0x0000000000000001); - map_domain_t * d = get_domain (&ip4_prefix, 16, &ip6_prefix, 48, &ip6_src, - 128, 16, 6, 8, 0, 0); - assert(!d); - - //20.0.0.0/8 2001:db8::/32 4 2001:db8:a000::14a0:0:0 - d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, - 128, 4, 0, 0, 0, 0); - dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); - dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); - assert(dst6.as_u64[0] == 0x20010db8a0000000); - assert(dst6.as_u64[1] == 0x000014a000000000); - - //20.0.0.0/8 2001:db8::/32 8 2001:db8:a900::14a9:0:0 - d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, - 128, 8, 0, 0, 0, 0); - dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); - dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); - assert(dst6.as_u64[0] == 0x20010db8a9000000); - assert(dst6.as_u64[1] == 0x000014a900000000); - - //20.0.0.0/8 2001:db8::/32 10 2001:db8:a9c0::14a9:c000:0 - d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, - 128, 10, 0, 0, 0, 0); - dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); - dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); - assert(dst6.as_u64[0] == 0x20010db8a9c00000); - assert(dst6.as_u64[1] == 0x000014a9c0000000); - - //20.0.0.0/8 2001:db8::/32 16 2001:db8:a9c9::14a9:c900:0 - d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, - 128, 16, 0, 0, 0, 0); - dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); - dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); - assert(dst6.as_u64[0] == 0x20010db8a9c90000); - assert(dst6.as_u64[1] == 0x000014a9c9000000); - - //20.0.0.0/8 2001:db8::/32 20 2001:db8:a9c9:d000:0:14a9:c9d0:0 - d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, - 128, 20, 0, 0, 0, 0); - dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); - dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); - assert(dst6.as_u64[0] == 0x20010db8a9c9d000); - assert(dst6.as_u64[1] == 0x000014a9c9d00000); - - //20.0.0.0/8 2001:db8::/32 23 2001:db8:a9c9:da00:0:14a9:c9da:0 - d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, - 128, 23, 0, 0, 0, 0); - dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); - dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); - assert(dst6.as_u64[0] == 0x20010db8a9c9da00); - assert(dst6.as_u64[1] == 0x000014a9c9da0000); - - //20.169.201.0/24 2001:db8::/32 7 2001:db8:da00::14a9:c9da:0 - d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src, - 128, 7, 0, 0, 0, 0); - dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232); - dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232); - assert(dst6.as_u64[0] == 0x20010db8a8000000); - assert(dst6.as_u64[1] == 0x000014a800000000); -} - -#define foreach_test_case \ - _(map_t_destaddr) \ - _(map_eabits) - -static void -run_tests (void) -{ -#define _(_test_name) \ - test_ ## _test_name (); - - foreach_test_case -#undef _ -} - -int main() -{ - run_tests (); - return 0; -} diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index c5c47e810b6..06b76a5e926 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include -- cgit 1.2.3-korg