diff options
Diffstat (limited to 'src/plugins/map')
-rwxr-xr-x | src/plugins/map/examples/gen-rules.py | 161 | ||||
-rwxr-xr-x | src/plugins/map/examples/test_map.py | 120 | ||||
-rwxr-xr-x | src/plugins/map/gen-rules.py | 124 | ||||
-rw-r--r-- | src/plugins/map/ip4_map.c | 5 | ||||
-rw-r--r-- | src/plugins/map/ip4_map_t.c | 10 | ||||
-rw-r--r-- | src/plugins/map/ip6_map.c | 6 | ||||
-rw-r--r-- | src/plugins/map/ip6_map_t.c | 13 | ||||
-rw-r--r-- | src/plugins/map/lpm.c | 28 | ||||
-rw-r--r-- | src/plugins/map/map.c | 20 | ||||
-rw-r--r-- | src/plugins/map/map.h | 6 | ||||
-rw-r--r-- | src/plugins/map/map_api.c | 8 | ||||
-rw-r--r-- | src/plugins/map/map_doc.md | 69 | ||||
-rw-r--r-- | src/plugins/map/map_doc.rst | 99 |
13 files changed, 429 insertions, 240 deletions
diff --git a/src/plugins/map/examples/gen-rules.py b/src/plugins/map/examples/gen-rules.py index 7964aa9a359..3d98f65b95d 100755 --- a/src/plugins/map/examples/gen-rules.py +++ b/src/plugins/map/examples/gen-rules.py @@ -20,38 +20,64 @@ import sys # map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6 # map add rule index <0> psid <psid> ip6-dst <ip6-dst> -def_ip4_pfx = '192.0.2.0/24' -def_ip6_pfx = '2001:db8::/32' -def_ip6_src = '2001:db8::1' +def_ip4_pfx = "192.0.2.0/24" +def_ip6_pfx = "2001:db8::/32" +def_ip6_src = "2001:db8::1" def_psid_offset = 6 def_psid_len = 6 def_ea_bits_len = 0 -parser = argparse.ArgumentParser(description='MAP VPP configuration generator') -parser.add_argument('-t', action="store", dest="mapmode") -parser.add_argument('-f', action="store", dest="format", default="vpp") -parser.add_argument('--ip4-prefix', action="store", dest="ip4_pfx", default=def_ip4_pfx) -parser.add_argument('--ip6-prefix', action="store", dest="ip6_pfx", default=def_ip6_pfx) -parser.add_argument('--ip6-src', action="store", dest="ip6_src", default=def_ip6_src) -parser.add_argument('--psid-len', action="store", dest="psid_len", default=def_psid_len) -parser.add_argument('--psid-offset', action="store", dest="psid_offset", default=def_psid_offset) -parser.add_argument('--ea-bits-len', action="store", dest="ea_bits_len", default=def_ea_bits_len) +parser = argparse.ArgumentParser(description="MAP VPP configuration generator") +parser.add_argument("-t", action="store", dest="mapmode") +parser.add_argument("-f", action="store", dest="format", default="vpp") +parser.add_argument("--ip4-prefix", action="store", dest="ip4_pfx", default=def_ip4_pfx) +parser.add_argument("--ip6-prefix", action="store", dest="ip6_pfx", default=def_ip6_pfx) +parser.add_argument("--ip6-src", action="store", dest="ip6_src", default=def_ip6_src) +parser.add_argument("--psid-len", action="store", dest="psid_len", default=def_psid_len) +parser.add_argument( + "--psid-offset", action="store", dest="psid_offset", default=def_psid_offset +) +parser.add_argument( + "--ea-bits-len", action="store", dest="ea_bits_len", default=def_ea_bits_len +) args = parser.parse_args() + # # Print domain # def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len): - if format == 'vpp': - print("map add domain ip4-pfx " + ip4_pfx + " ip6-pfx", ip6_pfx, "ip6-src " + ip6_src + - " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len) - if format == 'confd': - print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + ip6_src + - " ipv6-prefix " + ip6_pfx + " ipv4-prefix " + ip4_pfx + - " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len) - if format == 'xml': + if format == "vpp": + print( + "map add domain ip4-pfx " + ip4_pfx + " ip6-pfx", + ip6_pfx, + "ip6-src " + ip6_src + " ea-bits-len", + eabits_len, + "psid-offset", + psid_offset, + "psid-len", + psid_len, + ) + if format == "confd": + print( + "vpp softwire softwire-instances softwire-instance", + i, + "br-ipv6 " + + ip6_src + + " ipv6-prefix " + + ip6_pfx + + " ipv4-prefix " + + ip4_pfx + + " ea-bits-len", + eabits_len, + "psid-offset", + psid_offset, + "psid-len", + psid_len, + ) + if format == "xml": print("<softwire-instance>") - print("<id>", i, "</id>"); + print("<id>", i, "</id>") print(" <br-ipv6>" + ip6_src + "</br-ipv6>") print(" <ipv6-prefix>" + ip6_pfx + "</ipv6-prefix>") print(" <ipv4-prefix>" + ip4_pfx + "</ipv4-prefix>") @@ -59,32 +85,54 @@ def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len print(" <psid-len>", psid_len, "</psid-len>") print(" <psid-offset>", psid_offset, "</psid-offset>") + def domain_print_end(): - if format == 'xml': + if format == "xml": print("</softwire-instance>") + def rule_print(i, psid, dst): - if format == 'vpp': + if format == "vpp": print("map add rule index", i, "psid", psid, "ip6-dst", dst) - if format == 'confd': + if format == "confd": print("binding", psid, "ipv6-addr", dst) - if format == 'xml': + if format == "xml": print(" <binding>") print(" <psid>", psid, "</psid>") print(" <ipv6-addr>", dst, "</ipv6-addr>") print(" </binding>") + # # Algorithmic mapping Shared IPv4 address # -def algo(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): - domain_print(0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len) +def algo( + ip4_pfx_str, + ip6_pfx_str, + ip6_src_str, + ea_bits_len, + psid_offset, + psid_len, + ip6_src_ecmp=False, +): + domain_print( + 0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len + ) domain_print_end() + # # 1:1 Full IPv4 address # -def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): +def lw46( + ip4_pfx_str, + ip6_pfx_str, + ip6_src_str, + ea_bits_len, + psid_offset, + psid_len, + ip6_src_ecmp=False, +): ip4_pfx = ipaddress.ip_network(ip4_pfx_str) ip6_src = ipaddress.ip_address(ip6_src_str) ip6_dst = ipaddress.ip_network(ip6_pfx_str) @@ -92,15 +140,26 @@ def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_l mod = ip4_pfx.num_addresses / 1024 for i in range(ip4_pfx.num_addresses): - domain_print(i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0) + domain_print( + i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0 + ) domain_print_end() if ip6_src_ecmp and not i % mod: ip6_src = ip6_src + 1 + # # 1:1 Shared IPv4 address, shared BR (16) VPP CLI # -def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): +def lw46_shared( + ip4_pfx_str, + ip6_pfx_str, + ip6_src_str, + ea_bits_len, + psid_offset, + psid_len, + ip6_src_ecmp=False, +): ip4_pfx = ipaddress.ip_network(ip4_pfx_str) ip6_src = ipaddress.ip_address(ip6_src_str) ip6_dst = ipaddress.ip_network(ip6_pfx_str) @@ -109,7 +168,7 @@ def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, for i in range(ip4_pfx.num_addresses): domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len) for psid in range(0x1 << int(psid_len)): - rule_print(i, psid, str(ip6_dst[(i * (0x1<<int(psid_len))) + psid])) + rule_print(i, psid, str(ip6_dst[(i * (0x1 << int(psid_len))) + psid])) domain_print_end() if ip6_src_ecmp and not i % mod: ip6_src = ip6_src + 1 @@ -118,7 +177,15 @@ def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, # # 1:1 Shared IPv4 address, shared BR # -def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): +def lw46_shared_b( + ip4_pfx_str, + ip6_pfx_str, + ip6_src_str, + ea_bits_len, + psid_offset, + psid_len, + ip6_src_ecmp=False, +): ip4_pfx = ipaddress.ip_network(ip4_pfx_str) ip6_src = ipaddress.ip_address(ip6_src_str) ip6_dst = list(ipaddress.ip_network(ip6_pfx_str).subnets(new_prefix=56)) @@ -127,15 +194,16 @@ def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offse for i in range(ip4_pfx.num_addresses): domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len) for psid in range(0x1 << psid_len): - enduserprefix = list(ip6_dst.pop(0).subnets(new_prefix=64))[255-1] - rule_print(i, psid, enduserprefix[(i * (0x1<<psid_len)) + psid]) + enduserprefix = list(ip6_dst.pop(0).subnets(new_prefix=64))[255 - 1] + rule_print(i, psid, enduserprefix[(i * (0x1 << psid_len)) + psid]) domain_print_end() if ip6_src_ecmp and not i % mod: ip6_src = ip6_src + 1 def xml_header_print(): - print(''' + print( + """ <?xml version="1.0" encoding="UTF-8"?> <hello xmlns="urn:ietf:params:xml:ns:netconf:base:1.0"> <capabilities> @@ -156,10 +224,13 @@ def xml_header_print(): <softwire> <softwire-instances> - ''') + """ + ) + def xml_footer_print(): - print(''' + print( + """ </softwire-instances> </softwire> </vpp> @@ -175,12 +246,20 @@ def xml_footer_print(): </rpc> ]]>]]> - ''') + """ + ) format = args.format -if format == 'xml': +if format == "xml": xml_header_print() -globals()[args.mapmode](args.ip4_pfx, args.ip6_pfx, args.ip6_src, args.ea_bits_len, args.psid_offset, args.psid_len) -if format == 'xml': +globals()[args.mapmode]( + args.ip4_pfx, + args.ip6_pfx, + args.ip6_src, + args.ea_bits_len, + args.psid_offset, + args.psid_len, +) +if format == "xml": xml_footer_print() diff --git a/src/plugins/map/examples/test_map.py b/src/plugins/map/examples/test_map.py index 7a48964b3f2..f141ba3338c 100755 --- a/src/plugins/map/examples/test_map.py +++ b/src/plugins/map/examples/test_map.py @@ -1,128 +1,164 @@ #!/usr/bin/env python3 -import time,argparse,sys,cmd, unittest +import time, argparse, sys, cmd, unittest from ipaddress import * -parser = argparse.ArgumentParser(description='VPP MAP test') -parser.add_argument('-i', nargs='*', action="store", dest="inputdir") +parser = argparse.ArgumentParser(description="VPP MAP test") +parser.add_argument("-i", nargs="*", action="store", dest="inputdir") args = parser.parse_args() for dir in args.inputdir: sys.path.append(dir) from vpp_papi import * + # # 1:1 Shared IPv4 address, shared BR (16) VPP CLI # -def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): +def lw46_shared( + ip4_pfx_str, + ip6_pfx_str, + ip6_src_str, + ea_bits_len, + psid_offset, + psid_len, + ip6_src_ecmp=False, +): ip4_pfx = ip_network(ip4_pfx_str) ip6_src = ip_address(ip6_src_str) ip6_dst = ip_network(ip6_pfx_str) - ip6_nul = IPv6Address(u'0::0') + ip6_nul = IPv6Address("0::0") mod = ip4_pfx.num_addresses / 1024 for i in range(ip4_pfx.num_addresses): a = time.clock() - t = map_add_domain(0, ip6_nul.packed, ip4_pfx[i].packed, ip6_src.packed, 0, 32, 128, ea_bits_len, psid_offset, psid_len, 0, 0) - #print "Return from map_add_domain", t + t = map_add_domain( + 0, + ip6_nul.packed, + ip4_pfx[i].packed, + ip6_src.packed, + 0, + 32, + 128, + ea_bits_len, + psid_offset, + psid_len, + 0, + 0, + ) + # print "Return from map_add_domain", t if t == None: - print "map_add_domain failed" + print("map_add_domain failed") continue if t.retval != 0: - print "map_add_domain failed", t + print(f"map_add_domain failed, {t}") continue for psid in range(0x1 << int(psid_len)): - r = map_add_del_rule(0, t.index, 1, (ip6_dst[(i * (0x1<<int(psid_len))) + psid]).packed, psid) - #print "Return from map_add_del_rule", r + r = map_add_del_rule( + 0, + t.index, + 1, + (ip6_dst[(i * (0x1 << int(psid_len))) + psid]).packed, + psid, + ) + # print "Return from map_add_del_rule", r if ip6_src_ecmp and not i % mod: ip6_src = ip6_src + 1 - print "Running time:", time.clock() - a + print(f"Running time: {time.clock() - a}") + class TestMAP(unittest.TestCase): - ''' + """ def test_delete_all(self): t = map_domain_dump(0) self.assertNotEqual(t, None) - print "Number of domains configured: ", len(t) + print(f"Number of domains configured: {len(t)}") for d in t: ts = map_del_domain(0, d.domainindex) self.assertNotEqual(ts, None) t = map_domain_dump(0) self.assertNotEqual(t, None) - print "Number of domains configured: ", len(t) - self.assertEqual(len(t), 0) + print(f"Number of domains configured: {len(t)}") + self.assertEqual(len(t), 0)/ - ''' + """ def test_a_million_rules(self): - ip4_pfx = u'192.0.2.0/24' - ip6_pfx = u'2001:db8::/32' - ip6_src = u'2001:db8::1' + ip4_pfx = "192.0.2.0/24" + ip6_pfx = "2001:db8::/32" + ip6_src = "2001:db8::1" psid_offset = 6 psid_len = 6 ea_bits_len = 0 lw46_shared(ip4_pfx, ip6_pfx, ip6_src, ea_bits_len, psid_offset, psid_len) + # # RX thread, that should sit on blocking vpe_api_read() -# +# # # # import threading -class RXThread (threading.Thread): + + +class RXThread(threading.Thread): def __init__(self): threading.Thread.__init__(self) def run(self): - print "Starting " + print("Starting ") i = 0 while True: msg = vpe_api_read() if msg: - #print msg - id = unpack('>H', msg[0:2]) - size = unpack('>H', msg[2:4]) - print "Received", id, "of size", size + # print msg + id = unpack(">H", msg[0:2]) + size = unpack(">H", msg[2:4]) + print(f"Received {id} of size {size}") i += 1 - #del msg + # del msg continue - #time.sleep(0.001) + # time.sleep(0.001) return + # Create RX thread rxthread = RXThread() rxthread.setDaemon(True) - -print "Connect", connect_to_vpe("client124") + +print(f"Connect {connect_to_vpe('client124')}") import timeit + rxthread.start() -print "After thread started" +print("After thread started") -#pneum_kill_thread() -print "After thread killed" +# pneum_kill_thread() +print("After thread killed") -#t = show_version(0) -#print "Result from show version", t +# t = show_version(0) +# print "Result from show version", t -print timeit.timeit('t = show_version(0)', number=1000, setup="from __main__ import show_version") +print( + f"{timeit.timeit('t = show_version(0)', number=1000, setup='from __main__ import show_version')}" +) time.sleep(10) -#print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping") +# print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping") disconnect_from_vpe() sys.exit() -print t.program, t.version,t.builddate,t.builddirectory +print(f"{t.program} {t.version}{t.builddate}{t.builddirectory}") -''' +""" t = map_domain_dump(0) if not t: @@ -131,11 +167,9 @@ if not t: for d in t: print("IP6 prefix:",str(IPv6Address(d.ip6prefix))) print( "IP4 prefix:",str(IPv4Address(d.ip4prefix))) -''' +""" suite = unittest.TestLoader().loadTestsFromTestCase(TestMAP) unittest.TextTestRunner(verbosity=2).run(suite) disconnect_from_vpe() - - diff --git a/src/plugins/map/gen-rules.py b/src/plugins/map/gen-rules.py index e43b8e155be..e804763d0cf 100755 --- a/src/plugins/map/gen-rules.py +++ b/src/plugins/map/gen-rules.py @@ -21,87 +21,143 @@ import sys # map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6 # map add rule index <0> psid <psid> ip6-dst <ip6-dst> -parser = argparse.ArgumentParser(description='MAP VPP configuration generator') -parser.add_argument('-t', action="store", dest="mapmode") +parser = argparse.ArgumentParser(description="MAP VPP configuration generator") +parser.add_argument("-t", action="store", dest="mapmode") args = parser.parse_args() + # # 1:1 Shared IPv4 address, shared BR # def shared11br(): - ip4_pfx = ipaddress.ip_network('20.0.0.0/16') - ip6_dst = ipaddress.ip_network('bbbb::/32') + ip4_pfx = ipaddress.ip_network("20.0.0.0/16") + ip6_dst = ipaddress.ip_network("bbbb::/32") psid_len = 6 for i in range(ip4_pfx.num_addresses): - print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", - "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + print( + "map add domain ip4-pfx " + + str(ip4_pfx[i]) + + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 6 psid-len", + psid_len, + ) for psid in range(0x1 << psid_len): - print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + print( + "map add rule index", + i, + "psid", + psid, + "ip6-dst", + ip6_dst[(i * (0x1 << psid_len)) + psid], + ) # # 1:1 Shared IPv4 address # def shared11(): - ip4_pfx = ipaddress.ip_network('20.0.0.0/16') - ip6_src = ipaddress.ip_network('cccc:bbbb::/64') - ip6_dst = ipaddress.ip_network('bbbb::/32') + ip4_pfx = ipaddress.ip_network("20.0.0.0/16") + ip6_src = ipaddress.ip_network("cccc:bbbb::/64") + ip6_dst = ipaddress.ip_network("bbbb::/32") psid_len = 6 for i in range(ip4_pfx.num_addresses): - print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], - "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + print( + "map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", + ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", + psid_len, + ) for psid in range(0x1 << psid_len): - print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + print( + "map add rule index", + i, + "psid", + psid, + "ip6-dst", + ip6_dst[(i * (0x1 << psid_len)) + psid], + ) + # # 1:1 Shared IPv4 address small # def smallshared11(): - ip4_pfx = ipaddress.ip_network('20.0.0.0/24') - ip6_src = ipaddress.ip_network('cccc:bbbb::/64') - ip6_dst = ipaddress.ip_network('bbbb::/32') + ip4_pfx = ipaddress.ip_network("20.0.0.0/24") + ip6_src = ipaddress.ip_network("cccc:bbbb::/64") + ip6_dst = ipaddress.ip_network("bbbb::/32") psid_len = 6 for i in range(ip4_pfx.num_addresses): - print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], - "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + print( + "map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", + ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", + psid_len, + ) for psid in range(0x1 << psid_len): - print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + print( + "map add rule index", + i, + "psid", + psid, + "ip6-dst", + ip6_dst[(i * (0x1 << psid_len)) + psid], + ) + # # 1:1 Full IPv4 address # def full11(): - ip4_pfx = ipaddress.ip_network('20.0.0.0/16') - ip6_src = ipaddress.ip_network('cccc:bbbb::/64') - ip6_dst = ipaddress.ip_network('bbbb::/32') + ip4_pfx = ipaddress.ip_network("20.0.0.0/16") + ip6_src = ipaddress.ip_network("cccc:bbbb::/64") + ip6_dst = ipaddress.ip_network("bbbb::/32") psid_len = 0 for i in range(ip4_pfx.num_addresses): - print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i], - "ea-bits-len 0 psid-offset 0 psid-len 0") + print( + "map add domain ip4-pfx " + + str(ip4_pfx[i]) + + "/32 ip6-pfx " + + str(ip6_dst[i]) + + "/128 ip6-src", + ip6_src[i], + "ea-bits-len 0 psid-offset 0 psid-len 0", + ) + + def full11br(): - ip4_pfx = ipaddress.ip_network('20.0.0.0/16') - ip6_dst = ipaddress.ip_network('bbbb::/32') + ip4_pfx = ipaddress.ip_network("20.0.0.0/16") + ip6_dst = ipaddress.ip_network("bbbb::/32") psid_len = 0 for i in range(ip4_pfx.num_addresses): - print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1", - "ea-bits-len 0 psid-offset 0 psid-len 0") + print( + "map add domain ip4-pfx " + + str(ip4_pfx[i]) + + "/32 ip6-pfx " + + str(ip6_dst[i]) + + "/128 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 0 psid-len 0", + ) + # # Algorithmic mapping Shared IPv4 address # def algo(): - print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8") - print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0") + print( + "map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8" + ) + print( + "map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0" + ) + # # IP4 forwarding # def ip4(): - ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip4_pfx = ipaddress.ip_network("20.0.0.0/16") for i in range(ip4_pfx.num_addresses): - print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2") + print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2") globals()[args.mapmode]() - - diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c index 1ab5cc2dc4f..652808e6d37 100644 --- a/src/plugins/map/ip4_map.c +++ b/src/plugins/map/ip4_map.c @@ -155,6 +155,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) map_main_t *mm = &map_main; vlib_combined_counter_main_t *cm = mm->domain_counters; u32 thread_index = vm->thread_index; + u32 *buffer0 = 0; while (n_left_from > 0) { @@ -170,7 +171,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip6_header_t *ip6h0; u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP; u32 map_domain_index0 = ~0; - u32 *buffer0 = 0; bool free_original_buffer0 = false; u32 *frag_from0, frag_left0; @@ -322,10 +322,10 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vlib_put_next_frame (vm, node, next_index, n_left_to_next); } + vec_free (buffer0); return frame->n_vectors; } -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip4_map_feature, static) = { .arc_name = "ip4-unicast", @@ -354,7 +354,6 @@ VLIB_REGISTER_NODE(ip4_map_node) = { [IP4_MAP_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c index 8ae76f331f6..fe29af458a2 100644 --- a/src/plugins/map/ip4_map_t.c +++ b/src/plugins/map/ip4_map_t.c @@ -56,7 +56,6 @@ typedef enum //This is used to pass information within the buffer data. //Buffer structure being too small to contain big structures like this. -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { ip6_address_t daddr; ip6_address_t saddr; @@ -64,7 +63,6 @@ typedef CLIB_PACKED (struct { //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4) u8 unused[28]; }) ip4_mapt_pseudo_header_t; -/* *INDENT-ON* */ typedef struct { @@ -684,7 +682,6 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return frame->n_vectors; } -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip4_map_t_feature, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-map-t", @@ -710,9 +707,7 @@ VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = { [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = { .function = ip4_map_t_icmp, .name = "ip4-map-t-icmp", @@ -731,9 +726,7 @@ VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = { [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = { .function = ip4_map_t_tcp_udp, .name = "ip4-map-t-tcp-udp", @@ -752,9 +745,7 @@ VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = { [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE(ip4_map_t_node) = { .function = ip4_map_t, .name = "ip4-map-t", @@ -774,7 +765,6 @@ VLIB_REGISTER_NODE(ip4_map_t_node) = { [IP4_MAPT_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c index 1193dda0a80..3d9b21dfcd9 100644 --- a/src/plugins/map/ip6_map.c +++ b/src/plugins/map/ip6_map.c @@ -803,7 +803,6 @@ ip6_map_icmp_relay (vlib_main_t * vm, } -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip6_map_feature, static) = { .arc_name = "ip6-unicast", @@ -836,9 +835,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = { [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = { .function = ip6_map_post_ip4_reass, .name = "ip6-map-post-ip4-reass", @@ -854,9 +851,7 @@ VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = { [IP6_MAP_POST_IP4_REASS_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = { .function = ip6_map_icmp_relay, .name = "ip6-map-icmp-relay", @@ -871,7 +866,6 @@ VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = { [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ clib_error_t * ip6_map_init (vlib_main_t * vm) diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c index 861c049b0f4..51853d619e6 100644 --- a/src/plugins/map/ip6_map_t.c +++ b/src/plugins/map/ip6_map_t.c @@ -529,7 +529,10 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip60 = vlib_buffer_get_current (p0); d0 = - ip6_map_get_domain (&ip60->dst_address, + /* Originally using the IPv6 dest for rule lookup, now source + * [dgeist] ip6_map_get_domain (&ip60->dst_address, + */ + ip6_map_get_domain (&ip60->src_address, &vnet_buffer (p0)->map_t.map_domain_index, &error0); if (!d0) @@ -687,7 +690,6 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = { .function = ip6_map_t_fragmented, .name = "ip6-map-t-fragmented", @@ -707,9 +709,7 @@ VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = { [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = { .function = ip6_map_t_icmp, .name = "ip6-map-t-icmp", @@ -729,9 +729,7 @@ VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = { [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = { .function = ip6_map_t_tcp_udp, .name = "ip6-map-t-tcp-udp", @@ -751,9 +749,7 @@ VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = { [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", }, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VNET_FEATURE_INIT (ip6_map_t_feature, static) = { .arc_name = "ip6-unicast", .node_name = "ip6-map-t", @@ -781,7 +777,6 @@ VLIB_REGISTER_NODE(ip6_map_t_node) = { [IP6_MAPT_NEXT_ICMP] = "ip6-icmp-error", }, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/map/lpm.c b/src/plugins/map/lpm.c index c0e5bad1417..a2fc3337167 100644 --- a/src/plugins/map/lpm.c +++ b/src/plugins/map/lpm.c @@ -28,7 +28,13 @@ masked_address32 (uint32_t addr, uint8_t len) static uint64_t masked_address64 (uint64_t addr, uint8_t len) { - return len == 64 ? addr : addr & ~(~0ull >> len); + /* This was originally causing non-64-bit masks to not match due to LSB vs + * MSB masking (0s at the head of the value) Probably needs some corner case + * checking in case my masking logic was off [dgeist] + * + * return len == 64 ? addr : addr & ~(~0ull >> len); + */ + return len == 64 ? addr : addr & ((1ull << (len)) - 1); } static void @@ -126,13 +132,25 @@ lpm_128_add (lpm_t *lpm, void *addr_v, u8 pfxlen, u32 value) BVT(clib_bihash_kv) kv; ip6_address_t *addr = addr_v; - kv.key[0] = masked_address64(addr->as_u64[0], pfxlen > 64 ? 64 : pfxlen); + /* This is a quick hack. It works for pfxlen < 64 but needs validation for + * other [dgeist] + * + * kv.key[0] = masked_address64(addr->as_u64[0], pfxlen > 64 ? 64 : pfxlen); + */ + kv.key[0] = masked_address64 (addr->as_u64[0], pfxlen > 64 ? 64 : 64); kv.key[1] = masked_address64(addr->as_u64[1], pfxlen > 64 ? pfxlen - 64 : 0); kv.key[2] = pfxlen; kv.value = value; BV(clib_bihash_add_del)(&lpm->bihash, &kv, 1); lpm->prefix_length_refcount[pfxlen]++; - lpm->prefix_lengths_bitmap = clib_bitmap_set (lpm->prefix_lengths_bitmap, 128 - pfxlen, 1); + /* Populating the lengths bitmap table with prefix of 48 instead of 80 + * (128 - 48) [dgeist] + * + * lpm->prefix_lengths_bitmap = clib_bitmap_set ( + * lpm->prefix_lengths_bitmap, 128 - pfxlen, 1); + */ + lpm->prefix_lengths_bitmap = clib_bitmap_set ( + lpm->prefix_lengths_bitmap, pfxlen > 64 ? 128 - pfxlen : pfxlen, 1); } static void @@ -148,8 +166,8 @@ lpm_128_delete (lpm_t *lpm, void *addr_v, u8 pfxlen) /* refcount accounting */ ASSERT (lpm->prefix_length_refcount[pfxlen] > 0); if (--lpm->prefix_length_refcount[pfxlen] == 0) { - lpm->prefix_lengths_bitmap = clib_bitmap_set (lpm->prefix_lengths_bitmap, - 128 - pfxlen, 0); + lpm->prefix_lengths_bitmap = + clib_bitmap_set (lpm->prefix_lengths_bitmap, 128 - pfxlen, 0); } } diff --git a/src/plugins/map/map.c b/src/plugins/map/map.c index 938793c8947..3cffadd39e8 100644 --- a/src/plugins/map/map.c +++ b/src/plugins/map/map.c @@ -176,6 +176,10 @@ map_create_domain (ip4_address_t * ip4_prefix, mm->ip6_src_prefix_tbl->add (mm->ip6_src_prefix_tbl, &d->ip6_src, d->ip6_src_len, *map_domain_index); + /* Let's build a table with the MAP rule ip6 prefixes as well [dgeist] */ + mm->ip6_prefix_tbl->add (mm->ip6_prefix_tbl, &d->ip6_prefix, + d->ip6_prefix_len, *map_domain_index); + /* Validate packet/byte counters */ map_domain_counter_lock (mm); int i; @@ -218,6 +222,9 @@ map_delete_domain (u32 map_domain_index) d->ip4_prefix_len); mm->ip6_src_prefix_tbl->delete (mm->ip6_src_prefix_tbl, &d->ip6_src, d->ip6_src_len); + /* Addition to remove the new table [dgeist] */ + mm->ip6_prefix_tbl->delete (mm->ip6_prefix_tbl, &d->ip6_prefix, + d->ip6_prefix_len); /* Release user-assigned MAP domain name. */ map_free_extras (map_domain_index); @@ -979,10 +986,8 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) { - /* *INDENT-OFF* */ pool_foreach (d, mm->domains) {vlib_cli_output(vm, "%U", format_map_domain, d, counters);} - /* *INDENT-ON* */ return 0; } @@ -1008,10 +1013,8 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, if (map_domain_index == ~0) { - /* *INDENT-OFF* */ pool_foreach (d, mm->domains) {vlib_cli_output(vm, "%U", format_map_domain, d, counters);} - /* *INDENT-ON* */ } else { @@ -1062,7 +1065,6 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } - /* *INDENT-OFF* */ pool_foreach (d, mm->domains) { if (d->rules) { rulecount+= 0x1 << d->psid_length; @@ -1071,7 +1073,6 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, domains += sizeof(*d); domaincount++; } - /* *INDENT-ON* */ vlib_cli_output (vm, "MAP domains structure: %d\n", sizeof (map_domain_t)); vlib_cli_output (vm, "MAP domains: %d (%d bytes)\n", domaincount, domains); @@ -1255,7 +1256,6 @@ done: } -/* *INDENT-OFF* */ /*? * Set or copy the IP TOS/Traffic Class field @@ -1312,7 +1312,7 @@ VLIB_CLI_COMMAND(map_pre_resolve_command, static) = { /*? * Enable or disable the MAP-E inbound security check - * Specifiy if the inbound security check should be done on fragments + * Specify if the inbound security check should be done on fragments * * @cliexpar * @cliexstart{map params security-check} @@ -1333,9 +1333,8 @@ VLIB_CLI_COMMAND(map_security_check_command, static) = { .function = map_security_check_command_fn, }; - /*? - * Specifiy the IPv4 source address used for relayed ICMP error messages + * Specify the IPv4 source address used for relayed ICMP error messages * * @cliexpar * @cliexstart{map params icmp source-address} @@ -1470,7 +1469,6 @@ VLIB_PLUGIN_REGISTER() = { .description = "Mapping of Address and Port (MAP)", }; -/* *INDENT-ON* */ /* * map_init diff --git a/src/plugins/map/map.h b/src/plugins/map/map.h index d874aa47b3a..86b8ec22df4 100644 --- a/src/plugins/map/map.h +++ b/src/plugins/map/map.h @@ -335,7 +335,11 @@ ip6_map_get_domain (ip6_address_t * addr, u32 * map_domain_index, u8 * error) { map_main_t *mm = &map_main; u32 mdi = - mm->ip6_src_prefix_tbl->lookup (mm->ip6_src_prefix_tbl, addr, 128); + /* This is the old src (ip6 destination) hash lookup [dgeist] + * + * mm->ip6_src_prefix_tbl->lookup (mm->ip6_src_prefix_tbl, addr, 128); + */ + mm->ip6_prefix_tbl->lookup (mm->ip6_prefix_tbl, addr, 128); if (mdi == ~0) { *error = MAP_ERROR_NO_DOMAIN; diff --git a/src/plugins/map/map_api.c b/src/plugins/map/map_api.c index f81216dddbd..1dbff4ca0d1 100644 --- a/src/plugins/map/map_api.c +++ b/src/plugins/map/map_api.c @@ -50,13 +50,11 @@ vl_api_map_add_domain_t_handler (vl_api_map_add_domain_t * mp) mp->ip6_src.len, mp->ea_bits_len, mp->psid_offset, mp->psid_length, &index, mp->mtu, flags, mp->tag); - /* *INDENT-OFF* */ REPLY_MACRO2_END(VL_API_MAP_ADD_DOMAIN_REPLY, ({ rmp->index = index; })); - /* *INDENT-ON* */ } static void @@ -98,7 +96,6 @@ send_domain_details (u32 map_domain_index, vl_api_registration_t * rp, vec_elt_at_index (mm->domain_extras, map_domain_index); int tag_len = clib_min (ARRAY_LEN (rmp->tag), vec_len (de->tag) + 1); - /* *INDENT-OFF* */ REPLY_MACRO_DETAILS4(VL_API_MAP_DOMAIN_DETAILS, rp, context, ({ rmp->domain_index = htonl (map_domain_index); @@ -119,7 +116,6 @@ send_domain_details (u32 map_domain_index, vl_api_registration_t * rp, memcpy (rmp->tag, de->tag, tag_len - 1); rmp->tag[tag_len - 1] = '\0'; })); - /* *INDENT-ON* */ } static void @@ -136,12 +132,10 @@ vl_api_map_domain_dump_t_handler (vl_api_map_domain_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach_index (i, mm->domains) { send_domain_details(i, reg, mp->context); } - /* *INDENT-ON* */ } static void @@ -152,12 +146,10 @@ vl_api_map_domains_get_t_handler (vl_api_map_domains_get_t * mp) i32 rv = 0; - /* *INDENT-OFF* */ REPLY_AND_DETAILS_MACRO (VL_API_MAP_DOMAINS_GET_REPLY, mm->domains, ({ send_domain_details (cursor, rp, mp->context); })); - /* *INDENT-ON* */ } static void diff --git a/src/plugins/map/map_doc.md b/src/plugins/map/map_doc.md deleted file mode 100644 index f3e2a56706d..00000000000 --- a/src/plugins/map/map_doc.md +++ /dev/null @@ -1,69 +0,0 @@ -# VPP MAP and Lw4o6 implementation {#map_doc} - -This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations. -Everything that is not directly obvious should come here. - - - -## MAP-E Virtual Reassembly - -The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments. - -Fragment caching and handling is not always necessary. It is performed when: -* An IPv4 fragment is received and the destination IPv4 address is shared. -* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on. -* An IPv6 fragment is received. - -There are 3 dedicated nodes: -* ip4-map-reass -* ip6-map-ip4-reass -* ip6-map-ip6-reass - -ip4-map sends all fragments to ip4-map-reass. -ip6-map sends all inner-fragments to ip6-map-ip4-reass. -ip6-map sends all outer-fragments to ip6-map-ip6-reass. - -IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes. - -An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received. - -#### Virtual Reassembly configuration - -IPv4 and IPv6 virtual reassembly support the following configuration: - map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>] - -lifetime: - The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 seconds. Those values are not realistic for high-throughput cases. - -buffers: - The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool. - -pool-size: - The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total. - -ht-ratio: - The amount of buckets in the hash-table is pool-size * ht-ratio. - - -Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost. - - -##### Additional considerations - -Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart. - -Let: -R be the packet rate at which fragments are received. -F be the number of fragments per packet. - -Assuming the first fragment is always received last. We should have: -buffers > lifetime * R / F * (F - 1) -pool-size > lifetime * R/F - -This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'. - -But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments. - -If you want to do that, be prepared to configure a lot of fragments. - - diff --git a/src/plugins/map/map_doc.rst b/src/plugins/map/map_doc.rst new file mode 100644 index 00000000000..663e815d545 --- /dev/null +++ b/src/plugins/map/map_doc.rst @@ -0,0 +1,99 @@ +MAP and Lw4o6 +============= + +This is a memo intended to contain documentation of the VPP MAP and +Lw4o6 implementations. Everything that is not directly obvious should +come here. + +MAP-E Virtual Reassembly +------------------------ + +The MAP-E implementation supports handling of IPv4 fragments as well as +IPv4-in-IPv6 inner and outer fragments. This is called virtual +reassembly because the fragments are not actually reassembled. Instead, +some meta-data are kept about the first fragment and reused for +subsequent fragments. + +Fragment caching and handling is not always necessary. It is performed +when: \* An IPv4 fragment is received and the destination IPv4 address +is shared. \* An IPv6 packet is received with an inner IPv4 fragment, +the IPv4 source address is shared, and ‘security-check fragments’ is on. +\* An IPv6 fragment is received. + +There are 3 dedicated nodes: \* ip4-map-reass \* ip6-map-ip4-reass \* +ip6-map-ip6-reass + +ip4-map sends all fragments to ip4-map-reass. ip6-map sends all +inner-fragments to ip6-map-ip4-reass. ip6-map sends all outer-fragments +to ip6-map-ip6-reass. + +IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order +to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based +on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. +IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). +Therefore, each packet reassembly makes use of exactly one reassembly +structure. When such a structure is allocated, it is timestamped with +the current time. Finally, those structures are capable of storing a +limited number of buffer indexes. + +An IPv4 (resp. IPv6) reassembly structure can cache up to +MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. +MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached +until the first fragment is received. + +Virtual Reassembly configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +IPv4 and IPv6 virtual reassembly support the following configuration: +map params reassembly [ip4 \| ip6] [lifetime ] [pool-size ] [buffers ] +[ht-ratio ] + +lifetime: The time in milliseconds a reassembly structure is considered +valid. The longer, the more reliable is reassembly, but the more likely +it is to exhaust the pool of reassembly structures. IPv4 standard +suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 +seconds. Those values are not realistic for high-throughput cases. + +buffers: The upper limit of buffers that are allowed to be cached. It +can be used to protect against fragmentation attacks which would aim to +exhaust the global buffers pool. + +pool-size: The number of reassembly structures that can be allocated. As +each structure can store a small fixed number of fragments, it also sets +an upper-bound of ‘pool-size \* +MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY’ buffers that can be cached +in total. + +ht-ratio: The amount of buckets in the hash-table is pool-size \* +ht-ratio. + +Any time pool-size and ht-ratio is modified, the hash-table is destroyed +and created again, which means all current state is lost. + +Additional considerations +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Reassembly at high rate is expensive in terms of buffers. There is a +trade-off between the lifetime and number of allocated buffers. Reducing +the lifetime helps, but at the cost of loosing state for fragments that +are wide apart. + +Let: R be the packet rate at which fragments are received. F be the +number of fragments per packet. + +Assuming the first fragment is always received last. We should have: +buffers > lifetime \* R / F \* (F - 1) pool-size > lifetime \* R/F + +This is a worst case. Receiving the first fragment earlier helps +reducing the number of required buffers. Also, an optimization is +implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) +which counts the number of transmitted bytes and remembers the total +number of bytes which should be transmitted based on the last fragment, +and therefore helps reducing ‘pool-size’. + +But the formula shows that it is challenging to forward a significant +amount of fragmented packets at high rates. For instance, with a +lifetime of 1 second, 5Mpps packet rate would require buffering up to +2.5 millions fragments. + +If you want to do that, be prepared to configure a lot of fragments. |