diff options
Diffstat (limited to 'src/plugins/nat/nat64/nat64.c')
-rw-r--r-- | src/plugins/nat/nat64/nat64.c | 1646 |
1 files changed, 1646 insertions, 0 deletions
diff --git a/src/plugins/nat/nat64/nat64.c b/src/plugins/nat/nat64/nat64.c new file mode 100644 index 00000000000..5da498670f6 --- /dev/null +++ b/src/plugins/nat/nat64/nat64.c @@ -0,0 +1,1646 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/crc32.h> +#include <vnet/fib/ip4_fib.h> + +#include <vnet/ip/reass/ip4_sv_reass.h> +#include <vnet/ip/reass/ip6_sv_reass.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +#include <nat/nat64/nat64.h> + +nat64_main_t nat64_main; + +/* *INDENT-OFF* */ +/* Hook up input features */ +VNET_FEATURE_INIT (nat64_in2out, static) = { + .arc_name = "ip6-unicast", + .node_name = "nat64-in2out", + .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (nat64_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat64-out2in", + .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (nat64_in2out_handoff, static) = { + .arc_name = "ip6-unicast", + .node_name = "nat64-in2out-handoff", + .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (nat64_out2in_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat64-out2in-handoff", + .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), +}; +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "NAT64", +}; +static u8 well_known_prefix[] = { + 0x00, 0x64, 0xff, 0x9b, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; +/* *INDENT-ON* */ + +#define nat_elog_str(_str) \ +do \ + { \ + ELOG_TYPE_DECLARE (e) = \ + { \ + .format = "nat-msg " _str, \ + .format_args = "", \ + }; \ + ELOG_DATA (&vlib_global_main.elog_main, e); \ + } while (0); + +static void +nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_delete) +{ + nat64_main_t *nm = &nat64_main; + int i, j; + + if (plugin_enabled () == 0) + return; + + for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++) + { + if (sw_if_index == nm->auto_add_sw_if_indices[i]) + { + if (!is_delete) + { + /* Don't trip over lease renewal, static config */ + for (j = 0; j < vec_len (nm->addr_pool); j++) + if (nm->addr_pool[j].addr.as_u32 == address->as_u32) + return; + + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + address, ~0, 1); + return; + } + else + { + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + address, ~0, 0); + return; + } + } + } +} + +u32 +nat64_get_worker_in2out (ip6_address_t * addr) +{ + nat64_main_t *nm = &nat64_main; + u32 next_worker_index = nm->first_worker_index; + u32 hash; + +#ifdef clib_crc32c_uses_intrinsics + hash = clib_crc32c ((u8 *) addr->as_u32, 16); +#else + u64 tmp = addr->as_u64[0] ^ addr->as_u64[1]; + hash = clib_xxhash (tmp); +#endif + + if (PREDICT_TRUE (is_pow2 (_vec_len (nm->workers)))) + next_worker_index += nm->workers[hash & (_vec_len (nm->workers) - 1)]; + else + next_worker_index += nm->workers[hash % _vec_len (nm->workers)]; + + return next_worker_index; +} + +u32 +nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip) +{ + nat64_main_t *nm = &nat64_main; + udp_header_t *udp; + u16 port; + u32 proto; + + proto = ip_proto_to_nat_proto (ip->protocol); + udp = ip4_next_header (ip); + port = udp->dst_port; + + /* unknown protocol */ + if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER)) + { + nat64_db_t *db; + ip46_address_t daddr; + nat64_db_bib_entry_t *bibe; + + clib_memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ip->dst_address.as_u32; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0); + if (bibe) + return (u32) (db - nm->db); + } + /* *INDENT-ON* */ + return vlib_get_thread_index (); + } + + /* ICMP */ + if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP)) + { + icmp46_header_t *icmp = (icmp46_header_t *) udp; + icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + port = vnet_buffer (b)->ip.reass.l4_src_port; + else + { + /* if error message, then it's not fragmented and we can access it */ + ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); + proto = ip_proto_to_nat_proto (inner_ip->protocol); + void *l4_header = ip4_next_header (inner_ip); + switch (proto) + { + case NAT_PROTOCOL_ICMP: + icmp = (icmp46_header_t *) l4_header; + echo = (icmp_echo_header_t *) (icmp + 1); + port = echo->identifier; + break; + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + port = ((tcp_udp_header_t *) l4_header)->src_port; + break; + default: + return vlib_get_thread_index (); + } + } + } + + /* worker by outside port (TCP/UDP) */ + port = clib_net_to_host_u16 (port); + if (port > 1024) + return nm->first_worker_index + ((port - 1024) / nm->port_per_thread); + + return vlib_get_thread_index (); +} + +clib_error_t * +nat64_init (vlib_main_t * vm) +{ + nat64_main_t *nm = &nat64_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + ip4_add_del_interface_address_callback_t cb4; + vlib_node_t *node; + + clib_memset (nm, 0, sizeof (*nm)); + + nm->ip4_main = &ip4_main; + nm->log_class = vlib_log_register_class ("nat64", 0); + + nm->port_per_thread = 0xffff - 1024; + + nm->fq_in2out_index = ~0; + nm->fq_out2in_index = ~0; + + node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + nm->error_node_index = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out"); + nm->in2out_node_index = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-slowpath"); + nm->in2out_slowpath_node_index = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in"); + nm->out2in_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "nat64-expire-worker-walk"); + nm->expire_worker_walk_node_index = node->index; + + nm->fib_src_hi = fib_source_allocate ("nat64-hi", + FIB_SOURCE_PRIORITY_HI, + FIB_SOURCE_BH_SIMPLE); + nm->fib_src_low = fib_source_allocate ("nat64-low", + FIB_SOURCE_PRIORITY_LOW, + FIB_SOURCE_BH_SIMPLE); + + // set protocol timeouts to defaults + nat64_reset_timeouts (); + + /* Set up the interface address add/del callback */ + cb4.function = nat64_ip4_add_del_interface_address_cb; + cb4.function_opaque = 0; + vec_add1 (nm->ip4_main->add_del_interface_address_callbacks, cb4); + + /* Init counters */ + nm->total_bibs.name = "total-bibs"; + nm->total_bibs.stat_segment_name = "/nat64/total-bibs"; + vlib_validate_simple_counter (&nm->total_bibs, 0); + vlib_zero_simple_counter (&nm->total_bibs, 0); + nm->total_sessions.name = "total-sessions"; + nm->total_sessions.stat_segment_name = "/nat64/total-sessions"; + vlib_validate_simple_counter (&nm->total_sessions, 0); + vlib_zero_simple_counter (&nm->total_sessions, 0); + + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + if (p) + { + vlib_thread_registration_t *tr; + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + nm->num_workers = tr->count; + nm->first_worker_index = tr->first_index; + } + } + + if (nm->num_workers > 1) + { + int i; + uword *bitmap = 0; + + for (i = 0; i < nm->num_workers; i++) + bitmap = clib_bitmap_set (bitmap, i, 1); + + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, bitmap, + ({ + vec_add1(nm->workers, i); + })); + /* *INDENT-ON* */ + + clib_bitmap_free (bitmap); + + nm->port_per_thread = (0xffff - 1024) / _vec_len (nm->workers); + } + + // TODO: ipfix needs to be separated from NAT base plugin + /* Init IPFIX logging */ + //snat_ipfix_logging_init (vm); + +#define _(x) \ + nm->counters.in2out.x.name = #x; \ + nm->counters.in2out.x.stat_segment_name = "/nat64/in2out/" #x; \ + nm->counters.out2in.x.name = #x; \ + nm->counters.out2in.x.stat_segment_name = "/nat64/out2in/" #x; + foreach_nat_counter; +#undef _ + return nat64_api_hookup (vm); +} + +VLIB_INIT_FUNCTION (nat64_init); + +static void nat64_free_out_addr_and_port (struct nat64_db_s *db, + ip4_address_t * addr, u16 port, + u8 protocol); + +int +nat64_init_hash (nat64_config_t c) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + nat64_main_t *nm = &nat64_main; + nat64_db_t *db; + int rv = 0; + + vec_validate (nm->db, tm->n_vlib_mains - 1); + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + if (nat64_db_init (db, c, nat64_free_out_addr_and_port)) + { + nat64_log_err ("NAT64 DB init failed"); + rv = 1; + } + } + /* *INDENT-ON* */ + + return rv; +} + +int +nat64_free_hash () +{ + nat64_main_t *nm = &nat64_main; + nat64_db_t *db; + int rv = 0; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + if (nat64_db_free (db)) + { + nat64_log_err ("NAT64 DB free failed"); + rv = 1; + } + } + /* *INDENT-ON* */ + + vec_free (nm->db); + + return rv; +} + +int +nat64_add_del_pool_addr (u32 thread_index, + ip4_address_t * addr, u32 vrf_id, u8 is_add) +{ + nat64_main_t *nm = &nat64_main; + nat64_address_t *a = 0; + nat64_interface_t *interface; + int i; + nat64_db_t *db; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + /* Check if address already exists */ + for (i = 0; i < vec_len (nm->addr_pool); i++) + { + if (nm->addr_pool[i].addr.as_u32 == addr->as_u32) + { + a = nm->addr_pool + i; + break; + } + } + + if (is_add) + { + if (a) + return VNET_API_ERROR_VALUE_EXIST; + + vec_add2 (nm->addr_pool, a, 1); + a->addr = *addr; + a->fib_index = ~0; + if (vrf_id != ~0) + a->fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + nm->fib_src_hi); +#define _(N, id, n, s) \ + clib_memset (a->busy_##n##_port_refcounts, 0, sizeof(a->busy_##n##_port_refcounts)); \ + a->busy_##n##_ports = 0; \ + vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0); + foreach_nat_protocol +#undef _ + } + else + { + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (a->fib_index != ~0) + fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nm->fib_src_hi); + /* Delete sessions using address */ + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + nat64_db_free_out_addr (thread_index, db, &a->addr); + vlib_set_simple_counter (&nm->total_bibs, db - nm->db, 0, + db->bib.bib_entries_num); + vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0, + db->st.st_entries_num); + } + /* *INDENT-ON* */ + vec_del1 (nm->addr_pool, i); + } + + /* Add/del external address to FIB */ + /* *INDENT-OFF* */ + pool_foreach (interface, nm->interfaces, + ({ + if (nat64_interface_is_inside(interface)) + continue; + + nat64_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add); + break; + })); + /* *INDENT-ON* */ + + return 0; +} + +void +nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx) +{ + nat64_main_t *nm = &nat64_main; + nat64_address_t *a = 0; + + /* *INDENT-OFF* */ + vec_foreach (a, nm->addr_pool) + { + if (fn (a, ctx)) + break; + }; + /* *INDENT-ON* */ +} + +int +nat64_add_interface_address (u32 sw_if_index, int is_add) +{ + nat64_main_t *nm = &nat64_main; + ip4_main_t *ip4_main = nm->ip4_main; + ip4_address_t *first_int_addr; + int i; + + first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0); + + for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++) + { + if (nm->auto_add_sw_if_indices[i] == sw_if_index) + { + if (is_add) + return VNET_API_ERROR_VALUE_EXIST; + else + { + /* if have address remove it */ + if (first_int_addr) + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + first_int_addr, ~0, 0); + vec_del1 (nm->auto_add_sw_if_indices, i); + return 0; + } + } + } + + if (!is_add) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* add to the auto-address list */ + vec_add1 (nm->auto_add_sw_if_indices, sw_if_index); + + /* If the address is already bound - or static - add it now */ + if (first_int_addr) + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + first_int_addr, ~0, 1); + + return 0; +} + +static void +nat64_validate_counters (nat64_main_t * nm, u32 sw_if_index) +{ +#define _(x) \ + vlib_validate_simple_counter (&nm->counters.in2out.x, sw_if_index); \ + vlib_zero_simple_counter (&nm->counters.in2out.x, sw_if_index); \ + vlib_validate_simple_counter (&nm->counters.out2in.x, sw_if_index); \ + vlib_zero_simple_counter (&nm->counters.out2in.x, sw_if_index); + foreach_nat_counter; +#undef _ +} + +void +nat64_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index, + int is_add) +{ + nat64_main_t *nm = &nat64_main; + fib_prefix_t prefix = { + .fp_len = p_len, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = addr->as_u32, + }, + }; + u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + + if (is_add) + fib_table_entry_update_one_path (fib_index, + &prefix, + nm->fib_src_low, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL | + FIB_ENTRY_FLAG_EXCLUSIVE), + DPO_PROTO_IP4, + NULL, + sw_if_index, + ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); + else + fib_table_entry_delete (fib_index, &prefix, nm->fib_src_low); +} + +int +nat64_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add) +{ + vlib_main_t *vm = vlib_get_main (); + nat64_main_t *nm = &nat64_main; + nat64_interface_t *interface = 0, *i; + nat64_address_t *ap; + const char *feature_name, *arc_name; + + // TODO: is enabled ? we can't signal if it is not + + /* Check if interface already exists */ + /* *INDENT-OFF* */ + pool_foreach (i, nm->interfaces, + ({ + if (i->sw_if_index == sw_if_index) + { + interface = i; + break; + } + })); + /* *INDENT-ON* */ + + if (is_add) + { + if (interface) + goto set_flags; + + pool_get (nm->interfaces, interface); + interface->sw_if_index = sw_if_index; + interface->flags = 0; + nat64_validate_counters (nm, sw_if_index); + set_flags: + if (is_inside) + interface->flags |= NAT64_INTERFACE_FLAG_IS_INSIDE; + else + interface->flags |= NAT64_INTERFACE_FLAG_IS_OUTSIDE; + + nm->total_enabled_count++; + vlib_process_signal_event (vm, + nm->expire_walk_node_index, + NAT64_CLEANER_RESCHEDULE, 0); + + } + else + { + if (!interface) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if ((nat64_interface_is_inside (interface) + && nat64_interface_is_outside (interface))) + interface->flags &= + is_inside ? ~NAT64_INTERFACE_FLAG_IS_INSIDE : + ~NAT64_INTERFACE_FLAG_IS_OUTSIDE; + else + pool_put (nm->interfaces, interface); + + nm->total_enabled_count--; + } + + if (!is_inside) + { + /* *INDENT-OFF* */ + vec_foreach (ap, nm->addr_pool) + nat64_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, is_add); + /* *INDENT-ON* */ + } + + if (nm->num_workers > 1) + { + feature_name = + is_inside ? "nat64-in2out-handoff" : "nat64-out2in-handoff"; + if (nm->fq_in2out_index == ~0) + nm->fq_in2out_index = + vlib_frame_queue_main_init (nat64_in2out_node.index, 0); + if (nm->fq_out2in_index == ~0) + nm->fq_out2in_index = + vlib_frame_queue_main_init (nat64_out2in_node.index, 0); + } + else + feature_name = is_inside ? "nat64-in2out" : "nat64-out2in"; + + arc_name = is_inside ? "ip6-unicast" : "ip4-unicast"; + + if (is_inside) + { + int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add); + if (rv) + return rv; + } + else + { + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add); + if (rv) + return rv; + } + + return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index, + is_add, 0, 0); +} + +void +nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx) +{ + nat64_main_t *nm = &nat64_main; + nat64_interface_t *i = 0; + + /* *INDENT-OFF* */ + pool_foreach (i, nm->interfaces, + ({ + if (fn (i, ctx)) + break; + })); + /* *INDENT-ON* */ +} + +// TODO: plugin independent +static_always_inline u16 +nat64_random_port (u16 min, u16 max) +{ + nat64_main_t *nm = &nat64_main; + u32 rwide; + u16 r; + + rwide = random_u32 (&nm->random_seed); + r = rwide & 0xFFFF; + if (r >= min && r <= max) + return r; + + return min + (rwide % (max - min + 1)); +} + +static_always_inline int +nat64_alloc_addr_and_port_default (nat64_address_t * addresses, + u32 fib_index, + u32 thread_index, + nat_protocol_t proto, + ip4_address_t * addr, + u16 * port, + u16 port_per_thread, u32 nat_thread_index) +{ + int i; + nat64_address_t *a, *ga = 0; + u32 portnum; + + for (i = 0; i < vec_len (addresses); i++) + { + a = addresses + i; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \ + { \ + if (a->fib_index == fib_index) \ + { \ + while (1) \ + { \ + portnum = (port_per_thread * \ + nat_thread_index) + \ + nat64_random_port(0, port_per_thread - 1) + 1024; \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + --a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16(portnum); \ + return 0; \ + } \ + } \ + else if (a->fib_index == ~0) \ + { \ + ga = a; \ + } \ + } \ + break; + foreach_nat_protocol +#undef _ + default: + return 1; + } + + } + + if (ga) + { + a = ga; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + while (1) \ + { \ + portnum = (port_per_thread * \ + nat_thread_index) + \ + nat64_random_port(0, port_per_thread - 1) + 1024; \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + ++a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16(portnum); \ + return 0; \ + } + break; + foreach_nat_protocol +#undef _ + default: + return 1; + } + } + + /* Totally out of translations to use... */ + //snat_ipfix_logging_addresses_exhausted (thread_index, 0); + return 1; +} + +int +nat64_alloc_out_addr_and_port (u32 fib_index, nat_protocol_t proto, + ip4_address_t * addr, u16 * port, + u32 thread_index) +{ + nat64_main_t *nm = &nat64_main; + u32 worker_index = 0; + int rv; + + if (nm->num_workers > 1) + worker_index = thread_index - nm->first_worker_index; + + rv = nat64_alloc_addr_and_port_default (nm->addr_pool, fib_index, + thread_index, + proto, addr, port, + nm->port_per_thread, worker_index); + + return rv; +} + +static void +nat64_free_out_addr_and_port (struct nat64_db_s *db, ip4_address_t * addr, + u16 port, u8 protocol) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = db - nm->db; + nat_protocol_t proto = ip_proto_to_nat_proto (protocol); + u16 port_host_byte_order = clib_net_to_host_u16 (port); + nat64_address_t *a; + int i; + + for (i = 0; i < vec_len (nm->addr_pool); i++) + { + a = nm->addr_pool + i; + if (addr->as_u32 != a->addr.as_u32) + continue; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \ + --a->busy_##n##_port_refcounts[port_host_byte_order]; \ + a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[thread_index]--; \ + break; + foreach_nat_protocol +#undef _ + default: + nat_elog_str ("unknown protocol"); + return; + } + break; + } +} + +/** + * @brief Add/delete static BIB entry in worker thread. + */ +static uword +nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = vm->thread_index; + nat64_db_t *db = &nm->db[thread_index]; + nat64_static_bib_to_update_t *static_bib; + nat64_db_bib_entry_t *bibe; + ip46_address_t addr; + + /* *INDENT-OFF* */ + pool_foreach (static_bib, nm->static_bibs, + ({ + if ((static_bib->thread_index != thread_index) || (static_bib->done)) + continue; + + if (static_bib->is_add) + { + (void) nat64_db_bib_entry_create (thread_index, db, + &static_bib->in_addr, + &static_bib->out_addr, + static_bib->in_port, + static_bib->out_port, + static_bib->fib_index, + static_bib->proto, 1); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + else + { + addr.as_u64[0] = static_bib->in_addr.as_u64[0]; + addr.as_u64[1] = static_bib->in_addr.as_u64[1]; + bibe = nat64_db_bib_entry_find (db, &addr, static_bib->in_port, + static_bib->proto, + static_bib->fib_index, 1); + if (bibe) + { + nat64_db_bib_entry_free (thread_index, db, bibe); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); + } + } + + static_bib->done = 1; + })); + /* *INDENT-ON* */ + + return 0; +} + +static vlib_node_registration_t nat64_static_bib_worker_node; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = { + .function = nat64_static_bib_worker_fn, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .name = "nat64-static-bib-worker", +}; +/* *INDENT-ON* */ + +int +nat64_add_del_static_bib_entry (ip6_address_t * in_addr, + ip4_address_t * out_addr, u16 in_port, + u16 out_port, u8 proto, u32 vrf_id, u8 is_add) +{ + nat64_main_t *nm = &nat64_main; + nat64_db_bib_entry_t *bibe; + u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + nm->fib_src_hi); + nat_protocol_t p = ip_proto_to_nat_proto (proto); + ip46_address_t addr; + int i; + nat64_address_t *a; + u32 thread_index = 0; + nat64_db_t *db; + nat64_static_bib_to_update_t *static_bib; + vlib_main_t *worker_vm; + u32 *to_be_free = 0, *index; + + if (nm->num_workers > 1) + { + thread_index = nat64_get_worker_in2out (in_addr); + db = &nm->db[thread_index]; + } + else + db = &nm->db[nm->num_workers]; + + addr.as_u64[0] = in_addr->as_u64[0]; + addr.as_u64[1] = in_addr->as_u64[1]; + bibe = + nat64_db_bib_entry_find (db, &addr, clib_host_to_net_u16 (in_port), + proto, fib_index, 1); + + if (is_add) + { + if (bibe) + return VNET_API_ERROR_VALUE_EXIST; + + /* outside port must be assigned to same thread as internall address */ + if ((out_port > 1024) && (nm->num_workers > 1)) + { + if (thread_index != ((out_port - 1024) / nm->port_per_thread)) + return VNET_API_ERROR_INVALID_VALUE_2; + } + + for (i = 0; i < vec_len (nm->addr_pool); i++) + { + a = nm->addr_pool + i; + if (out_addr->as_u32 != a->addr.as_u32) + continue; + switch (p) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_port_refcounts[out_port]) \ + return VNET_API_ERROR_INVALID_VALUE; \ + ++a->busy_##n##_port_refcounts[out_port]; \ + if (out_port > 1024) \ + { \ + a->busy_##n##_ports++; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + } \ + break; + foreach_nat_protocol +#undef _ + default: + clib_memset (&addr, 0, sizeof (addr)); + addr.ip4.as_u32 = out_addr->as_u32; + if (nat64_db_bib_entry_find (db, &addr, 0, proto, fib_index, 0)) + return VNET_API_ERROR_INVALID_VALUE; + } + break; + } + if (!nm->num_workers) + { + bibe = + nat64_db_bib_entry_create (thread_index, db, in_addr, out_addr, + clib_host_to_net_u16 (in_port), + clib_host_to_net_u16 (out_port), + fib_index, proto, 1); + if (!bibe) + return VNET_API_ERROR_UNSPECIFIED; + + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + } + else + { + if (!bibe) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (!nm->num_workers) + { + nat64_db_bib_entry_free (thread_index, db, bibe); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + } + + if (nm->num_workers) + { + /* *INDENT-OFF* */ + pool_foreach (static_bib, nm->static_bibs, + ({ + if (static_bib->done) + vec_add1 (to_be_free, static_bib - nm->static_bibs); + })); + vec_foreach (index, to_be_free) + pool_put_index (nm->static_bibs, index[0]); + /* *INDENT-ON* */ + vec_free (to_be_free); + pool_get (nm->static_bibs, static_bib); + static_bib->in_addr.as_u64[0] = in_addr->as_u64[0]; + static_bib->in_addr.as_u64[1] = in_addr->as_u64[1]; + static_bib->in_port = clib_host_to_net_u16 (in_port); + static_bib->out_addr.as_u32 = out_addr->as_u32; + static_bib->out_port = clib_host_to_net_u16 (out_port); + static_bib->fib_index = fib_index; + static_bib->proto = proto; + static_bib->is_add = is_add; + static_bib->thread_index = thread_index; + static_bib->done = 0; + worker_vm = vlib_mains[thread_index]; + if (worker_vm) + vlib_node_set_interrupt_pending (worker_vm, + nat64_static_bib_worker_node.index); + else + return VNET_API_ERROR_UNSPECIFIED; + } + + return 0; +} + +int +nat64_set_udp_timeout (u32 timeout) +{ + nat64_main_t *nm = &nat64_main; + + if (timeout == 0) + nm->udp_timeout = NAT_UDP_TIMEOUT; + else + nm->udp_timeout = timeout; + + return 0; +} + +u32 +nat64_get_udp_timeout (void) +{ + nat64_main_t *nm = &nat64_main; + + return nm->udp_timeout; +} + +int +nat64_set_icmp_timeout (u32 timeout) +{ + nat64_main_t *nm = &nat64_main; + + if (timeout == 0) + nm->icmp_timeout = NAT_ICMP_TIMEOUT; + else + nm->icmp_timeout = timeout; + + return 0; +} + +void +nat64_reset_timeouts () +{ + nat64_main_t *nm = &nat64_main; + + nm->udp_timeout = NAT_UDP_TIMEOUT; + nm->icmp_timeout = NAT_ICMP_TIMEOUT; + nm->tcp_est_timeout = NAT_TCP_ESTABLISHED_TIMEOUT; + nm->tcp_trans_timeout = NAT_TCP_TRANSITORY_TIMEOUT; +} + +u32 +nat64_get_icmp_timeout (void) +{ + nat64_main_t *nm = &nat64_main; + + return nm->icmp_timeout; +} + +int +nat64_set_tcp_timeouts (u32 trans, u32 est) +{ + nat64_main_t *nm = &nat64_main; + + if (trans == 0) + nm->tcp_trans_timeout = NAT_TCP_TRANSITORY_TIMEOUT; + else + nm->tcp_trans_timeout = trans; + + if (est == 0) + nm->tcp_est_timeout = NAT_TCP_ESTABLISHED_TIMEOUT; + else + nm->tcp_est_timeout = est; + + return 0; +} + +u32 +nat64_get_tcp_trans_timeout (void) +{ + nat64_main_t *nm = &nat64_main; + + return nm->tcp_trans_timeout; +} + +u32 +nat64_get_tcp_est_timeout (void) +{ + nat64_main_t *nm = &nat64_main; + + return nm->tcp_est_timeout; +} + +void +nat64_session_reset_timeout (nat64_db_st_entry_t * ste, vlib_main_t * vm) +{ + nat64_main_t *nm = &nat64_main; + u32 now = (u32) vlib_time_now (vm); + + switch (ip_proto_to_nat_proto (ste->proto)) + { + case NAT_PROTOCOL_ICMP: + ste->expire = now + nm->icmp_timeout; + return; + case NAT_PROTOCOL_TCP: + { + switch (ste->tcp_state) + { + case NAT64_TCP_STATE_V4_INIT: + case NAT64_TCP_STATE_V6_INIT: + case NAT64_TCP_STATE_V4_FIN_RCV: + case NAT64_TCP_STATE_V6_FIN_RCV: + case NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV: + case NAT64_TCP_STATE_TRANS: + ste->expire = now + nm->tcp_trans_timeout; + return; + case NAT64_TCP_STATE_ESTABLISHED: + ste->expire = now + nm->tcp_est_timeout; + return; + default: + return; + } + } + case NAT_PROTOCOL_UDP: + ste->expire = now + nm->udp_timeout; + return; + default: + ste->expire = now + nm->udp_timeout; + return; + } +} + +void +nat64_tcp_session_set_state (nat64_db_st_entry_t * ste, tcp_header_t * tcp, + u8 is_ip6) +{ + switch (ste->tcp_state) + { + case NAT64_TCP_STATE_CLOSED: + { + if (tcp->flags & TCP_FLAG_SYN) + { + if (is_ip6) + ste->tcp_state = NAT64_TCP_STATE_V6_INIT; + else + ste->tcp_state = NAT64_TCP_STATE_V4_INIT; + } + return; + } + case NAT64_TCP_STATE_V4_INIT: + { + if (is_ip6 && (tcp->flags & TCP_FLAG_SYN)) + ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED; + return; + } + case NAT64_TCP_STATE_V6_INIT: + { + if (!is_ip6 && (tcp->flags & TCP_FLAG_SYN)) + ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED; + return; + } + case NAT64_TCP_STATE_ESTABLISHED: + { + if (tcp->flags & TCP_FLAG_FIN) + { + if (is_ip6) + ste->tcp_state = NAT64_TCP_STATE_V6_FIN_RCV; + else + ste->tcp_state = NAT64_TCP_STATE_V4_FIN_RCV; + } + else if (tcp->flags & TCP_FLAG_RST) + { + ste->tcp_state = NAT64_TCP_STATE_TRANS; + } + return; + } + case NAT64_TCP_STATE_V4_FIN_RCV: + { + if (is_ip6 && (tcp->flags & TCP_FLAG_FIN)) + ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV; + return; + } + case NAT64_TCP_STATE_V6_FIN_RCV: + { + if (!is_ip6 && (tcp->flags & TCP_FLAG_FIN)) + ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV; + return; + } + case NAT64_TCP_STATE_TRANS: + { + if (!(tcp->flags & TCP_FLAG_RST)) + ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED; + return; + } + default: + return; + } +} + +int +nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add) +{ + nat64_main_t *nm = &nat64_main; + nat64_prefix_t *p = 0; + int i; + + /* Verify prefix length */ + if (plen != 32 && plen != 40 && plen != 48 && plen != 56 && plen != 64 + && plen != 96) + return VNET_API_ERROR_INVALID_VALUE; + + /* Check if tenant already have prefix */ + for (i = 0; i < vec_len (nm->pref64); i++) + { + if (nm->pref64[i].vrf_id == vrf_id) + { + p = nm->pref64 + i; + break; + } + } + + if (is_add) + { + if (!p) + { + vec_add2 (nm->pref64, p, 1); + p->fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + nm->fib_src_hi); + p->vrf_id = vrf_id; + } + + p->prefix.as_u64[0] = prefix->as_u64[0]; + p->prefix.as_u64[1] = prefix->as_u64[1]; + p->plen = plen; + } + else + { + if (!p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + // TODO: missing fib_table_unlock ? + + vec_del1 (nm->pref64, i); + } + + return 0; +} + +void +nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx) +{ + nat64_main_t *nm = &nat64_main; + nat64_prefix_t *p = 0; + + /* *INDENT-OFF* */ + vec_foreach (p, nm->pref64) + { + if (fn (p, ctx)) + break; + }; + /* *INDENT-ON* */ +} + +void +nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index) +{ + nat64_main_t *nm = &nat64_main; + nat64_prefix_t *p, *gp = 0, *prefix = 0; + + /* *INDENT-OFF* */ + vec_foreach (p, nm->pref64) + { + if (p->fib_index == fib_index) + { + prefix = p; + break; + } + + if (p->fib_index == 0) + gp = p; + }; + /* *INDENT-ON* */ + + if (!prefix) + prefix = gp; + + if (prefix) + { + clib_memcpy_fast (ip6, &p->prefix, sizeof (ip6_address_t)); + switch (p->plen) + { + case 32: + ip6->as_u32[1] = ip4->as_u32; + break; + case 40: + ip6->as_u8[5] = ip4->as_u8[0]; + ip6->as_u8[6] = ip4->as_u8[1]; + ip6->as_u8[7] = ip4->as_u8[2]; + ip6->as_u8[9] = ip4->as_u8[3]; + break; + case 48: + ip6->as_u8[6] = ip4->as_u8[0]; + ip6->as_u8[7] = ip4->as_u8[1]; + ip6->as_u8[9] = ip4->as_u8[2]; + ip6->as_u8[10] = ip4->as_u8[3]; + break; + case 56: + ip6->as_u8[7] = ip4->as_u8[0]; + ip6->as_u8[9] = ip4->as_u8[1]; + ip6->as_u8[10] = ip4->as_u8[2]; + ip6->as_u8[11] = ip4->as_u8[3]; + break; + case 64: + ip6->as_u8[9] = ip4->as_u8[0]; + ip6->as_u8[10] = ip4->as_u8[1]; + ip6->as_u8[11] = ip4->as_u8[2]; + ip6->as_u8[12] = ip4->as_u8[3]; + break; + case 96: + ip6->as_u32[3] = ip4->as_u32; + break; + default: + nat_elog_str ("invalid prefix length"); + break; + } + } + else + { + clib_memcpy_fast (ip6, well_known_prefix, sizeof (ip6_address_t)); + ip6->as_u32[3] = ip4->as_u32; + } +} + +void +nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index) +{ + nat64_main_t *nm = &nat64_main; + nat64_prefix_t *p, *gp = 0; + u8 plen = 0; + + /* *INDENT-OFF* */ + vec_foreach (p, nm->pref64) + { + if (p->fib_index == fib_index) + { + plen = p->plen; + break; + } + + if (p->vrf_id == 0) + gp = p; + }; + /* *INDENT-ON* */ + + if (!plen) + { + if (gp) + plen = gp->plen; + else + plen = 96; + } + + switch (plen) + { + case 32: + ip4->as_u32 = ip6->as_u32[1]; + break; + case 40: + ip4->as_u8[0] = ip6->as_u8[5]; + ip4->as_u8[1] = ip6->as_u8[6]; + ip4->as_u8[2] = ip6->as_u8[7]; + ip4->as_u8[3] = ip6->as_u8[9]; + break; + case 48: + ip4->as_u8[0] = ip6->as_u8[6]; + ip4->as_u8[1] = ip6->as_u8[7]; + ip4->as_u8[2] = ip6->as_u8[9]; + ip4->as_u8[3] = ip6->as_u8[10]; + break; + case 56: + ip4->as_u8[0] = ip6->as_u8[7]; + ip4->as_u8[1] = ip6->as_u8[9]; + ip4->as_u8[2] = ip6->as_u8[10]; + ip4->as_u8[3] = ip6->as_u8[11]; + break; + case 64: + ip4->as_u8[0] = ip6->as_u8[9]; + ip4->as_u8[1] = ip6->as_u8[10]; + ip4->as_u8[2] = ip6->as_u8[11]; + ip4->as_u8[3] = ip6->as_u8[12]; + break; + case 96: + ip4->as_u32 = ip6->as_u32[3]; + break; + default: + nat_elog_str ("invalid prefix length"); + break; + } +} + +/** + * @brief Per worker process checking expire time for NAT64 sessions. + */ +static uword +nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = vm->thread_index; + nat64_db_t *db; + u32 now; + + // TODO: barier sync on plugin enabled + if (plugin_enabled () == 0) + return 0; + + db = &nm->db[thread_index]; + now = (u32) vlib_time_now (vm); + + nad64_db_st_free_expired (thread_index, db, now); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = { + .function = nat64_expire_worker_walk_fn, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .name = "nat64-expire-worker-walk", +}; +/* *INDENT-ON* */ + +/** + * @brief Centralized process to drive per worker expire walk. + */ +static uword +nat64_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + vlib_main_t **worker_vms = 0, *worker_vm; + int i; + uword event_type, *event_data = 0; + + if (vec_len (vlib_mains) == 0) + vec_add1 (worker_vms, vm); + else + { + for (i = 0; i < vec_len (vlib_mains); i++) + { + worker_vm = vlib_mains[i]; + if (worker_vm) + vec_add1 (worker_vms, worker_vm); + } + } + + while (1) + { + if (nm->total_enabled_count) + { + vlib_process_wait_for_event_or_clock (vm, 10.0); + event_type = vlib_process_get_events (vm, &event_data); + } + else + { + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + } + + switch (event_type) + { + case ~0: + break; + case NAT64_CLEANER_RESCHEDULE: + break; + default: + nat64_log_err ("unknown event %u", event_type); + break; + } + + for (i = 0; i < vec_len (worker_vms); i++) + { + worker_vm = worker_vms[i]; + vlib_node_set_interrupt_pending (worker_vm, + nm->expire_worker_walk_node_index); + } + } + + return 0; +} + +void +nat64_create_expire_walk_process () +{ + nat64_main_t *nm = &nat64_main; + + if (nm->expire_walk_node_index) + return; + nm->expire_walk_node_index = vlib_process_create (vlib_get_main (), + "nat64-expire-walk", + nat64_expire_walk_fn, + 16 /* stack_bytes */ ); +} + +int +nat64_plugin_enable (nat64_config_t c) +{ + nat64_main_t *nm = &nat64_main; + + if (plugin_enabled () == 1) + { + nat64_log_err ("plugin already enabled!"); + return 1; + } + + if (!c.bib_buckets) + c.bib_buckets = 1024; + + if (!c.bib_memory_size) + c.bib_memory_size = 128 << 20; + + if (!c.st_buckets) + c.st_buckets = 2048; + + if (!c.st_memory_size) + c.st_memory_size = 256 << 20; + + nm->config = c; + + if (nat64_init_hash (c)) + { + nat64_log_err ("initializing hashes failed!"); + return 1; + } + + nat64_create_expire_walk_process (); + + nm->enabled = 1; + return 0; +} + +int +nat64_plugin_disable () +{ + nat64_main_t *nm = &nat64_main; + vnet_main_t *vnm = vnet_get_main (); + int rv = 0; + + nat64_address_t *a; + nat64_interface_t *i, *interfaces; + + if (plugin_enabled () == 0) + { + nat64_log_err ("plugin already disabled!"); + return 1; + } + nm->enabled = 0; + + interfaces = vec_dup (nm->interfaces); + vec_foreach (i, interfaces) + { + rv = nat64_interface_add_del (i->sw_if_index, i->flags, 1); + if (rv) + { + nat64_log_err ("%U %s interface del failed", + unformat_vnet_sw_interface, + i->flags & NAT64_INTERFACE_FLAG_IS_INSIDE ? + "inside" : "outside", vnm, i->sw_if_index); + } + } + vec_free (interfaces); + pool_free (nm->interfaces); + + nat64_reset_timeouts (); + + if (nat64_free_hash ()) + { + rv = 1; + nat64_log_err ("freeing hashes failed!"); + } + + // TODO: based on nat64_add_del_prefix fib_table_unlock is not called + vec_free (nm->pref64); + + if (vec_len (nm->addr_pool)) + { + vec_foreach (a, nm->addr_pool) + { + if (a->fib_index != ~0) + fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nm->fib_src_hi); + } + vec_free (nm->addr_pool); + } + return rv; +} + +uword +unformat_nat_protocol (unformat_input_t * input, va_list * args) +{ + u32 *r = va_arg (*args, u32 *); + + if (0); +#define _(N, i, n, s) else if (unformat (input, s)) *r = NAT_PROTOCOL_##N; + foreach_nat_protocol +#undef _ + else + return 0; + return 1; +} + +u8 * +format_nat_protocol (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(N, j, n, str) case NAT_PROTOCOL_##N: t = (u8 *) str; break; + foreach_nat_protocol +#undef _ + default: + s = format (s, "unknown"); + return s; + } + s = format (s, "%s", t); + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |