diff options
author | Filip Varga <fivarga@cisco.com> | 2020-09-14 11:20:16 +0200 |
---|---|---|
committer | Ole Tr�an <otroan@employees.org> | 2020-10-07 09:05:50 +0000 |
commit | 1f36023d29137825b8a09578d09b955046c2415b (patch) | |
tree | c00726718d9d8b4f5ecc74c45d9a2d790deb0250 /src/plugins/nat/nat64 | |
parent | d1762e614d1e05cbeda4d91e921fa663b2b46c03 (diff) |
nat: move nat64 to a subfeature
Type: refactor
Change-Id: I3b9e17164647d2019b1f40cffeed63393345219e
Signed-off-by: Filip Varga <fivarga@cisco.com>
Diffstat (limited to 'src/plugins/nat/nat64')
-rw-r--r-- | src/plugins/nat/nat64/nat64.api | 316 | ||||
-rw-r--r-- | src/plugins/nat/nat64/nat64.c | 1646 | ||||
-rw-r--r-- | src/plugins/nat/nat64/nat64.h | 531 | ||||
-rw-r--r-- | src/plugins/nat/nat64/nat64_api.c | 458 | ||||
-rw-r--r-- | src/plugins/nat/nat64/nat64_cli.c | 992 | ||||
-rw-r--r-- | src/plugins/nat/nat64/nat64_db.c | 742 | ||||
-rw-r--r-- | src/plugins/nat/nat64/nat64_db.h | 380 | ||||
-rw-r--r-- | src/plugins/nat/nat64/nat64_doc.md | 73 | ||||
-rw-r--r-- | src/plugins/nat/nat64/nat64_in2out.c | 1411 | ||||
-rw-r--r-- | src/plugins/nat/nat64/nat64_out2in.c | 796 |
10 files changed, 7345 insertions, 0 deletions
diff --git a/src/plugins/nat/nat64/nat64.api b/src/plugins/nat/nat64/nat64.api new file mode 100644 index 00000000000..5fc4129d041 --- /dev/null +++ b/src/plugins/nat/nat64/nat64.api @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; +import "vnet/ip/ip_types.api"; +import "vnet/interface_types.api"; +import "plugins/nat/nat_types.api"; + +/** + * @file nat64.api + * @brief VPP control-plane API messages. + * + * This file defines VPP control-plane API messages which are generally + * called through a shared memory interface. + */ + +/** \brief Enable/disable NAT64 plugin + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bib_buckets - Number of BIB hash buckets + @param bib_memory_size - Memory size of BIB hash + @param st_buckets - Number of session table hash buckets + @param st_memory_size - Memory size of session table hash + @param enable - true if enable, false if disable +*/ +autoreply define nat64_plugin_enable_disable { + u32 client_index; + u32 context; + u32 bib_buckets; + u32 bib_memory_size; + u32 st_buckets; + u32 st_memory_size; + bool enable; + option status="in_progress"; +}; + +/** \brief Set values of timeouts for NAT64 sessions (seconds) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param udp - UDP timeout (default 300sec) + @param tcp_established - TCP established timeout (default 7440sec) + @param tcp_transitory - TCP transitory timeout (default 240sec) + @param icmp - ICMP timeout (default 60sec) +*/ +autoreply define nat64_set_timeouts { + u32 client_index; + u32 context; + u32 udp; + u32 tcp_established; + u32 tcp_transitory; + u32 icmp; +}; + +/** \brief Get values of timeouts for NAT64 sessions (seconds) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat64_get_timeouts { + u32 client_index; + u32 context; +}; + +/** \brief Get values of timeouts for NAT64 sessions reply + @param context - sender context, to match reply w/ request + @param retval - return code + @param udp - UDP timeout + @param tcp_established - TCP established timeout + @param tcp_transitory - TCP transitory timeout + @param icmp - ICMP timeout +*/ +define nat64_get_timeouts_reply { + u32 context; + i32 retval; + u32 udp; + u32 tcp_established; + u32 tcp_transitory; + u32 icmp; +}; + +/** \brief Add/delete address range to NAT64 pool + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param start_addr - start IPv4 address of the range + @param end_addr - end IPv4 address of the range + @param vrf_id - VRF id of tenant, ~0 means independent of VRF + @param is_add - true if add, false if delete +*/ +autoreply define nat64_add_del_pool_addr_range { + u32 client_index; + u32 context; + vl_api_ip4_address_t start_addr; + vl_api_ip4_address_t end_addr; + u32 vrf_id; + bool is_add; +}; + +/** \brief Dump NAT64 pool addresses + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat64_pool_addr_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT64 pool address details response + @param context - sender context, to match reply w/ request + @param address - IPv4 address + @param vfr_id - VRF id of tenant, ~0 means independent of VRF +*/ +define nat64_pool_addr_details { + u32 context; + vl_api_ip4_address_t address; + u32 vrf_id; +}; + +/** \brief Enable/disable NAT64 feature on the interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param flags - flag NAT_IS_INSIDE if interface is inside else + interface is outside + @param sw_if_index - index of the interface +*/ +autoreply define nat64_add_del_interface { + u32 client_index; + u32 context; + bool is_add; + vl_api_nat_config_flags_t flags; + vl_api_interface_index_t sw_if_index; +}; + +/** \brief Dump interfaces with NAT64 feature + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat64_interface_dump { + u32 client_index; + u32 context; +}; + +/** \brief NAT64 interface details response + @param context - sender context, to match reply w/ request + @param flags - flag NAT_IS_INSIDE if interface is inside, + flag NAT_IS_OUTSIDE if interface is outside + and if both flags are set the interface is + both inside and outside + @param sw_if_index - index of the interface +*/ +define nat64_interface_details { + u32 context; + vl_api_nat_config_flags_t flags; + vl_api_interface_index_t sw_if_index; +}; + +/** \brief Add/delete NAT64 static BIB entry + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param i_addr - inside IPv6 address + @param o_addr - outside IPv4 address + @param i_port - inside port number + @param o_port - outside port number + @param vrf_id - VRF id of tenant + @param proto - protocol number + @param is_add - true if add, false if delete +*/ + autoreply define nat64_add_del_static_bib { + u32 client_index; + u32 context; + vl_api_ip6_address_t i_addr; + vl_api_ip4_address_t o_addr; + u16 i_port; + u16 o_port; + u32 vrf_id; + u8 proto; + bool is_add; +}; + +/** \brief Dump NAT64 BIB + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param proto - protocol of the BIB: 255 - all BIBs + 6 - TCP BIB + 17 - UDP BIB + 1/58 - ICMP BIB + otherwise - "unknown" protocol BIB +*/ +define nat64_bib_dump { + u32 client_index; + u32 context; + u8 proto; +}; + +/** \brief NAT64 BIB details response + @param context - sender context, to match reply w/ request + @param i_addr - inside IPv6 address + @param o_addr - outside IPv4 address + @param i_port - inside port number + @param o_port - outside port number + @param vrf_id - VRF id of tenant + @param proto - protocol number + @param flags - flag NAT_IS_STATIC if BIB entry is static + or BIB entry is dynamic + @param ses_num - number of sessions associated with the BIB entry +*/ +define nat64_bib_details { + u32 context; + vl_api_ip6_address_t i_addr; + vl_api_ip4_address_t o_addr; + u16 i_port; + u16 o_port; + u32 vrf_id; + u8 proto; + vl_api_nat_config_flags_t flags; + u32 ses_num; +}; + +/** \brief Dump NAT64 session table + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param proto - protocol of the session table: 255 - all STs + 6 - TCP ST + 17 - UDP ST + 1/58 - ICMP ST + otherwise - "unknown" proto ST +*/ +define nat64_st_dump { + u32 client_index; + u32 context; + u8 proto; +}; + +/** \brief NAT64 session table details response + @param context - sender context, to match reply w/ request + @param il_addr - inside IPv6 address of the local host + @param ol_addr - outside IPv4 address of the local host + @param il_port - inside port number id of the local host/inside ICMP id + @param ol_port - outside port number of the local host/outside ICMP id + @param ir_addr - inside IPv6 address of the remote host + @param or_addr - outside IPv4 address of the remote host + @param r_port - port number of the remote host (not used for ICMP) + @param vrf_id - VRF id of tenant + @param proto - protocol number +*/ +define nat64_st_details { + u32 context; + vl_api_ip6_address_t il_addr; + vl_api_ip4_address_t ol_addr; + u16 il_port; + u16 ol_port; + vl_api_ip6_address_t ir_addr; + vl_api_ip4_address_t or_addr; + u16 r_port; + u32 vrf_id; + u8 proto; +}; + +/** \brief Add/del NAT64 prefix + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param prefix - NAT64 prefix + @param vrf_id - VRF id of tenant + @param is_add - true if add, false if delete +*/ +autoreply define nat64_add_del_prefix { + u32 client_index; + u32 context; + vl_api_ip6_prefix_t prefix; + u32 vrf_id; + bool is_add; +}; + +/** \brief Dump NAT64 prefix + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define nat64_prefix_dump { + u32 client_index; + u32 context; +}; + +/** \brief Dump NAT64 prefix details response + @param context - sender context, to match reply w/ request + @param prefix - NAT64 prefix + @param vrf_id - VRF id of tenant +*/ +define nat64_prefix_details { + u32 context; + vl_api_ip6_prefix_t prefix; + u32 vrf_id; +}; + +/** \brief Add/delete NAT64 pool address from specific interfce + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - true if add, false if delete + @param sw_if_index - software index of the interface +*/ +autoreply define nat64_add_del_interface_addr { + u32 client_index; + u32 context; + bool is_add; + vl_api_interface_index_t sw_if_index; +}; diff --git a/src/plugins/nat/nat64/nat64.c b/src/plugins/nat/nat64/nat64.c new file mode 100644 index 00000000000..5da498670f6 --- /dev/null +++ b/src/plugins/nat/nat64/nat64.c @@ -0,0 +1,1646 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/crc32.h> +#include <vnet/fib/ip4_fib.h> + +#include <vnet/ip/reass/ip4_sv_reass.h> +#include <vnet/ip/reass/ip6_sv_reass.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +#include <nat/nat64/nat64.h> + +nat64_main_t nat64_main; + +/* *INDENT-OFF* */ +/* Hook up input features */ +VNET_FEATURE_INIT (nat64_in2out, static) = { + .arc_name = "ip6-unicast", + .node_name = "nat64-in2out", + .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (nat64_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat64-out2in", + .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (nat64_in2out_handoff, static) = { + .arc_name = "ip6-unicast", + .node_name = "nat64-in2out-handoff", + .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_after = VNET_FEATURES ("ip6-sv-reassembly-feature"), +}; +VNET_FEATURE_INIT (nat64_out2in_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat64-out2in-handoff", + .runs_before = VNET_FEATURES ("ip4-lookup"), + .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"), +}; +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "NAT64", +}; +static u8 well_known_prefix[] = { + 0x00, 0x64, 0xff, 0x9b, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; +/* *INDENT-ON* */ + +#define nat_elog_str(_str) \ +do \ + { \ + ELOG_TYPE_DECLARE (e) = \ + { \ + .format = "nat-msg " _str, \ + .format_args = "", \ + }; \ + ELOG_DATA (&vlib_global_main.elog_main, e); \ + } while (0); + +static void +nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_delete) +{ + nat64_main_t *nm = &nat64_main; + int i, j; + + if (plugin_enabled () == 0) + return; + + for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++) + { + if (sw_if_index == nm->auto_add_sw_if_indices[i]) + { + if (!is_delete) + { + /* Don't trip over lease renewal, static config */ + for (j = 0; j < vec_len (nm->addr_pool); j++) + if (nm->addr_pool[j].addr.as_u32 == address->as_u32) + return; + + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + address, ~0, 1); + return; + } + else + { + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + address, ~0, 0); + return; + } + } + } +} + +u32 +nat64_get_worker_in2out (ip6_address_t * addr) +{ + nat64_main_t *nm = &nat64_main; + u32 next_worker_index = nm->first_worker_index; + u32 hash; + +#ifdef clib_crc32c_uses_intrinsics + hash = clib_crc32c ((u8 *) addr->as_u32, 16); +#else + u64 tmp = addr->as_u64[0] ^ addr->as_u64[1]; + hash = clib_xxhash (tmp); +#endif + + if (PREDICT_TRUE (is_pow2 (_vec_len (nm->workers)))) + next_worker_index += nm->workers[hash & (_vec_len (nm->workers) - 1)]; + else + next_worker_index += nm->workers[hash % _vec_len (nm->workers)]; + + return next_worker_index; +} + +u32 +nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip) +{ + nat64_main_t *nm = &nat64_main; + udp_header_t *udp; + u16 port; + u32 proto; + + proto = ip_proto_to_nat_proto (ip->protocol); + udp = ip4_next_header (ip); + port = udp->dst_port; + + /* unknown protocol */ + if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER)) + { + nat64_db_t *db; + ip46_address_t daddr; + nat64_db_bib_entry_t *bibe; + + clib_memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ip->dst_address.as_u32; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0); + if (bibe) + return (u32) (db - nm->db); + } + /* *INDENT-ON* */ + return vlib_get_thread_index (); + } + + /* ICMP */ + if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP)) + { + icmp46_header_t *icmp = (icmp46_header_t *) udp; + icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + port = vnet_buffer (b)->ip.reass.l4_src_port; + else + { + /* if error message, then it's not fragmented and we can access it */ + ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); + proto = ip_proto_to_nat_proto (inner_ip->protocol); + void *l4_header = ip4_next_header (inner_ip); + switch (proto) + { + case NAT_PROTOCOL_ICMP: + icmp = (icmp46_header_t *) l4_header; + echo = (icmp_echo_header_t *) (icmp + 1); + port = echo->identifier; + break; + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + port = ((tcp_udp_header_t *) l4_header)->src_port; + break; + default: + return vlib_get_thread_index (); + } + } + } + + /* worker by outside port (TCP/UDP) */ + port = clib_net_to_host_u16 (port); + if (port > 1024) + return nm->first_worker_index + ((port - 1024) / nm->port_per_thread); + + return vlib_get_thread_index (); +} + +clib_error_t * +nat64_init (vlib_main_t * vm) +{ + nat64_main_t *nm = &nat64_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + ip4_add_del_interface_address_callback_t cb4; + vlib_node_t *node; + + clib_memset (nm, 0, sizeof (*nm)); + + nm->ip4_main = &ip4_main; + nm->log_class = vlib_log_register_class ("nat64", 0); + + nm->port_per_thread = 0xffff - 1024; + + nm->fq_in2out_index = ~0; + nm->fq_out2in_index = ~0; + + node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); + nm->error_node_index = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out"); + nm->in2out_node_index = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-slowpath"); + nm->in2out_slowpath_node_index = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in"); + nm->out2in_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "nat64-expire-worker-walk"); + nm->expire_worker_walk_node_index = node->index; + + nm->fib_src_hi = fib_source_allocate ("nat64-hi", + FIB_SOURCE_PRIORITY_HI, + FIB_SOURCE_BH_SIMPLE); + nm->fib_src_low = fib_source_allocate ("nat64-low", + FIB_SOURCE_PRIORITY_LOW, + FIB_SOURCE_BH_SIMPLE); + + // set protocol timeouts to defaults + nat64_reset_timeouts (); + + /* Set up the interface address add/del callback */ + cb4.function = nat64_ip4_add_del_interface_address_cb; + cb4.function_opaque = 0; + vec_add1 (nm->ip4_main->add_del_interface_address_callbacks, cb4); + + /* Init counters */ + nm->total_bibs.name = "total-bibs"; + nm->total_bibs.stat_segment_name = "/nat64/total-bibs"; + vlib_validate_simple_counter (&nm->total_bibs, 0); + vlib_zero_simple_counter (&nm->total_bibs, 0); + nm->total_sessions.name = "total-sessions"; + nm->total_sessions.stat_segment_name = "/nat64/total-sessions"; + vlib_validate_simple_counter (&nm->total_sessions, 0); + vlib_zero_simple_counter (&nm->total_sessions, 0); + + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + if (p) + { + vlib_thread_registration_t *tr; + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + nm->num_workers = tr->count; + nm->first_worker_index = tr->first_index; + } + } + + if (nm->num_workers > 1) + { + int i; + uword *bitmap = 0; + + for (i = 0; i < nm->num_workers; i++) + bitmap = clib_bitmap_set (bitmap, i, 1); + + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, bitmap, + ({ + vec_add1(nm->workers, i); + })); + /* *INDENT-ON* */ + + clib_bitmap_free (bitmap); + + nm->port_per_thread = (0xffff - 1024) / _vec_len (nm->workers); + } + + // TODO: ipfix needs to be separated from NAT base plugin + /* Init IPFIX logging */ + //snat_ipfix_logging_init (vm); + +#define _(x) \ + nm->counters.in2out.x.name = #x; \ + nm->counters.in2out.x.stat_segment_name = "/nat64/in2out/" #x; \ + nm->counters.out2in.x.name = #x; \ + nm->counters.out2in.x.stat_segment_name = "/nat64/out2in/" #x; + foreach_nat_counter; +#undef _ + return nat64_api_hookup (vm); +} + +VLIB_INIT_FUNCTION (nat64_init); + +static void nat64_free_out_addr_and_port (struct nat64_db_s *db, + ip4_address_t * addr, u16 port, + u8 protocol); + +int +nat64_init_hash (nat64_config_t c) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + nat64_main_t *nm = &nat64_main; + nat64_db_t *db; + int rv = 0; + + vec_validate (nm->db, tm->n_vlib_mains - 1); + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + if (nat64_db_init (db, c, nat64_free_out_addr_and_port)) + { + nat64_log_err ("NAT64 DB init failed"); + rv = 1; + } + } + /* *INDENT-ON* */ + + return rv; +} + +int +nat64_free_hash () +{ + nat64_main_t *nm = &nat64_main; + nat64_db_t *db; + int rv = 0; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + if (nat64_db_free (db)) + { + nat64_log_err ("NAT64 DB free failed"); + rv = 1; + } + } + /* *INDENT-ON* */ + + vec_free (nm->db); + + return rv; +} + +int +nat64_add_del_pool_addr (u32 thread_index, + ip4_address_t * addr, u32 vrf_id, u8 is_add) +{ + nat64_main_t *nm = &nat64_main; + nat64_address_t *a = 0; + nat64_interface_t *interface; + int i; + nat64_db_t *db; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + /* Check if address already exists */ + for (i = 0; i < vec_len (nm->addr_pool); i++) + { + if (nm->addr_pool[i].addr.as_u32 == addr->as_u32) + { + a = nm->addr_pool + i; + break; + } + } + + if (is_add) + { + if (a) + return VNET_API_ERROR_VALUE_EXIST; + + vec_add2 (nm->addr_pool, a, 1); + a->addr = *addr; + a->fib_index = ~0; + if (vrf_id != ~0) + a->fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + nm->fib_src_hi); +#define _(N, id, n, s) \ + clib_memset (a->busy_##n##_port_refcounts, 0, sizeof(a->busy_##n##_port_refcounts)); \ + a->busy_##n##_ports = 0; \ + vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0); + foreach_nat_protocol +#undef _ + } + else + { + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (a->fib_index != ~0) + fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nm->fib_src_hi); + /* Delete sessions using address */ + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + nat64_db_free_out_addr (thread_index, db, &a->addr); + vlib_set_simple_counter (&nm->total_bibs, db - nm->db, 0, + db->bib.bib_entries_num); + vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0, + db->st.st_entries_num); + } + /* *INDENT-ON* */ + vec_del1 (nm->addr_pool, i); + } + + /* Add/del external address to FIB */ + /* *INDENT-OFF* */ + pool_foreach (interface, nm->interfaces, + ({ + if (nat64_interface_is_inside(interface)) + continue; + + nat64_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add); + break; + })); + /* *INDENT-ON* */ + + return 0; +} + +void +nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx) +{ + nat64_main_t *nm = &nat64_main; + nat64_address_t *a = 0; + + /* *INDENT-OFF* */ + vec_foreach (a, nm->addr_pool) + { + if (fn (a, ctx)) + break; + }; + /* *INDENT-ON* */ +} + +int +nat64_add_interface_address (u32 sw_if_index, int is_add) +{ + nat64_main_t *nm = &nat64_main; + ip4_main_t *ip4_main = nm->ip4_main; + ip4_address_t *first_int_addr; + int i; + + first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0); + + for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++) + { + if (nm->auto_add_sw_if_indices[i] == sw_if_index) + { + if (is_add) + return VNET_API_ERROR_VALUE_EXIST; + else + { + /* if have address remove it */ + if (first_int_addr) + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + first_int_addr, ~0, 0); + vec_del1 (nm->auto_add_sw_if_indices, i); + return 0; + } + } + } + + if (!is_add) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* add to the auto-address list */ + vec_add1 (nm->auto_add_sw_if_indices, sw_if_index); + + /* If the address is already bound - or static - add it now */ + if (first_int_addr) + (void) nat64_add_del_pool_addr (vlib_get_thread_index (), + first_int_addr, ~0, 1); + + return 0; +} + +static void +nat64_validate_counters (nat64_main_t * nm, u32 sw_if_index) +{ +#define _(x) \ + vlib_validate_simple_counter (&nm->counters.in2out.x, sw_if_index); \ + vlib_zero_simple_counter (&nm->counters.in2out.x, sw_if_index); \ + vlib_validate_simple_counter (&nm->counters.out2in.x, sw_if_index); \ + vlib_zero_simple_counter (&nm->counters.out2in.x, sw_if_index); + foreach_nat_counter; +#undef _ +} + +void +nat64_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index, + int is_add) +{ + nat64_main_t *nm = &nat64_main; + fib_prefix_t prefix = { + .fp_len = p_len, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = addr->as_u32, + }, + }; + u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + + if (is_add) + fib_table_entry_update_one_path (fib_index, + &prefix, + nm->fib_src_low, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL | + FIB_ENTRY_FLAG_EXCLUSIVE), + DPO_PROTO_IP4, + NULL, + sw_if_index, + ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); + else + fib_table_entry_delete (fib_index, &prefix, nm->fib_src_low); +} + +int +nat64_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add) +{ + vlib_main_t *vm = vlib_get_main (); + nat64_main_t *nm = &nat64_main; + nat64_interface_t *interface = 0, *i; + nat64_address_t *ap; + const char *feature_name, *arc_name; + + // TODO: is enabled ? we can't signal if it is not + + /* Check if interface already exists */ + /* *INDENT-OFF* */ + pool_foreach (i, nm->interfaces, + ({ + if (i->sw_if_index == sw_if_index) + { + interface = i; + break; + } + })); + /* *INDENT-ON* */ + + if (is_add) + { + if (interface) + goto set_flags; + + pool_get (nm->interfaces, interface); + interface->sw_if_index = sw_if_index; + interface->flags = 0; + nat64_validate_counters (nm, sw_if_index); + set_flags: + if (is_inside) + interface->flags |= NAT64_INTERFACE_FLAG_IS_INSIDE; + else + interface->flags |= NAT64_INTERFACE_FLAG_IS_OUTSIDE; + + nm->total_enabled_count++; + vlib_process_signal_event (vm, + nm->expire_walk_node_index, + NAT64_CLEANER_RESCHEDULE, 0); + + } + else + { + if (!interface) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if ((nat64_interface_is_inside (interface) + && nat64_interface_is_outside (interface))) + interface->flags &= + is_inside ? ~NAT64_INTERFACE_FLAG_IS_INSIDE : + ~NAT64_INTERFACE_FLAG_IS_OUTSIDE; + else + pool_put (nm->interfaces, interface); + + nm->total_enabled_count--; + } + + if (!is_inside) + { + /* *INDENT-OFF* */ + vec_foreach (ap, nm->addr_pool) + nat64_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, is_add); + /* *INDENT-ON* */ + } + + if (nm->num_workers > 1) + { + feature_name = + is_inside ? "nat64-in2out-handoff" : "nat64-out2in-handoff"; + if (nm->fq_in2out_index == ~0) + nm->fq_in2out_index = + vlib_frame_queue_main_init (nat64_in2out_node.index, 0); + if (nm->fq_out2in_index == ~0) + nm->fq_out2in_index = + vlib_frame_queue_main_init (nat64_out2in_node.index, 0); + } + else + feature_name = is_inside ? "nat64-in2out" : "nat64-out2in"; + + arc_name = is_inside ? "ip6-unicast" : "ip4-unicast"; + + if (is_inside) + { + int rv = ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add); + if (rv) + return rv; + } + else + { + int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, is_add); + if (rv) + return rv; + } + + return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index, + is_add, 0, 0); +} + +void +nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx) +{ + nat64_main_t *nm = &nat64_main; + nat64_interface_t *i = 0; + + /* *INDENT-OFF* */ + pool_foreach (i, nm->interfaces, + ({ + if (fn (i, ctx)) + break; + })); + /* *INDENT-ON* */ +} + +// TODO: plugin independent +static_always_inline u16 +nat64_random_port (u16 min, u16 max) +{ + nat64_main_t *nm = &nat64_main; + u32 rwide; + u16 r; + + rwide = random_u32 (&nm->random_seed); + r = rwide & 0xFFFF; + if (r >= min && r <= max) + return r; + + return min + (rwide % (max - min + 1)); +} + +static_always_inline int +nat64_alloc_addr_and_port_default (nat64_address_t * addresses, + u32 fib_index, + u32 thread_index, + nat_protocol_t proto, + ip4_address_t * addr, + u16 * port, + u16 port_per_thread, u32 nat_thread_index) +{ + int i; + nat64_address_t *a, *ga = 0; + u32 portnum; + + for (i = 0; i < vec_len (addresses); i++) + { + a = addresses + i; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \ + { \ + if (a->fib_index == fib_index) \ + { \ + while (1) \ + { \ + portnum = (port_per_thread * \ + nat_thread_index) + \ + nat64_random_port(0, port_per_thread - 1) + 1024; \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + --a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16(portnum); \ + return 0; \ + } \ + } \ + else if (a->fib_index == ~0) \ + { \ + ga = a; \ + } \ + } \ + break; + foreach_nat_protocol +#undef _ + default: + return 1; + } + + } + + if (ga) + { + a = ga; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + while (1) \ + { \ + portnum = (port_per_thread * \ + nat_thread_index) + \ + nat64_random_port(0, port_per_thread - 1) + 1024; \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + ++a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16(portnum); \ + return 0; \ + } + break; + foreach_nat_protocol +#undef _ + default: + return 1; + } + } + + /* Totally out of translations to use... */ + //snat_ipfix_logging_addresses_exhausted (thread_index, 0); + return 1; +} + +int +nat64_alloc_out_addr_and_port (u32 fib_index, nat_protocol_t proto, + ip4_address_t * addr, u16 * port, + u32 thread_index) +{ + nat64_main_t *nm = &nat64_main; + u32 worker_index = 0; + int rv; + + if (nm->num_workers > 1) + worker_index = thread_index - nm->first_worker_index; + + rv = nat64_alloc_addr_and_port_default (nm->addr_pool, fib_index, + thread_index, + proto, addr, port, + nm->port_per_thread, worker_index); + + return rv; +} + +static void +nat64_free_out_addr_and_port (struct nat64_db_s *db, ip4_address_t * addr, + u16 port, u8 protocol) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = db - nm->db; + nat_protocol_t proto = ip_proto_to_nat_proto (protocol); + u16 port_host_byte_order = clib_net_to_host_u16 (port); + nat64_address_t *a; + int i; + + for (i = 0; i < vec_len (nm->addr_pool); i++) + { + a = nm->addr_pool + i; + if (addr->as_u32 != a->addr.as_u32) + continue; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \ + --a->busy_##n##_port_refcounts[port_host_byte_order]; \ + a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[thread_index]--; \ + break; + foreach_nat_protocol +#undef _ + default: + nat_elog_str ("unknown protocol"); + return; + } + break; + } +} + +/** + * @brief Add/delete static BIB entry in worker thread. + */ +static uword +nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = vm->thread_index; + nat64_db_t *db = &nm->db[thread_index]; + nat64_static_bib_to_update_t *static_bib; + nat64_db_bib_entry_t *bibe; + ip46_address_t addr; + + /* *INDENT-OFF* */ + pool_foreach (static_bib, nm->static_bibs, + ({ + if ((static_bib->thread_index != thread_index) || (static_bib->done)) + continue; + + if (static_bib->is_add) + { + (void) nat64_db_bib_entry_create (thread_index, db, + &static_bib->in_addr, + &static_bib->out_addr, + static_bib->in_port, + static_bib->out_port, + static_bib->fib_index, + static_bib->proto, 1); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + else + { + addr.as_u64[0] = static_bib->in_addr.as_u64[0]; + addr.as_u64[1] = static_bib->in_addr.as_u64[1]; + bibe = nat64_db_bib_entry_find (db, &addr, static_bib->in_port, + static_bib->proto, + static_bib->fib_index, 1); + if (bibe) + { + nat64_db_bib_entry_free (thread_index, db, bibe); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); + } + } + + static_bib->done = 1; + })); + /* *INDENT-ON* */ + + return 0; +} + +static vlib_node_registration_t nat64_static_bib_worker_node; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = { + .function = nat64_static_bib_worker_fn, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .name = "nat64-static-bib-worker", +}; +/* *INDENT-ON* */ + +int +nat64_add_del_static_bib_entry (ip6_address_t * in_addr, + ip4_address_t * out_addr, u16 in_port, + u16 out_port, u8 proto, u32 vrf_id, u8 is_add) +{ + nat64_main_t *nm = &nat64_main; + nat64_db_bib_entry_t *bibe; + u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + nm->fib_src_hi); + nat_protocol_t p = ip_proto_to_nat_proto (proto); + ip46_address_t addr; + int i; + nat64_address_t *a; + u32 thread_index = 0; + nat64_db_t *db; + nat64_static_bib_to_update_t *static_bib; + vlib_main_t *worker_vm; + u32 *to_be_free = 0, *index; + + if (nm->num_workers > 1) + { + thread_index = nat64_get_worker_in2out (in_addr); + db = &nm->db[thread_index]; + } + else + db = &nm->db[nm->num_workers]; + + addr.as_u64[0] = in_addr->as_u64[0]; + addr.as_u64[1] = in_addr->as_u64[1]; + bibe = + nat64_db_bib_entry_find (db, &addr, clib_host_to_net_u16 (in_port), + proto, fib_index, 1); + + if (is_add) + { + if (bibe) + return VNET_API_ERROR_VALUE_EXIST; + + /* outside port must be assigned to same thread as internall address */ + if ((out_port > 1024) && (nm->num_workers > 1)) + { + if (thread_index != ((out_port - 1024) / nm->port_per_thread)) + return VNET_API_ERROR_INVALID_VALUE_2; + } + + for (i = 0; i < vec_len (nm->addr_pool); i++) + { + a = nm->addr_pool + i; + if (out_addr->as_u32 != a->addr.as_u32) + continue; + switch (p) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_port_refcounts[out_port]) \ + return VNET_API_ERROR_INVALID_VALUE; \ + ++a->busy_##n##_port_refcounts[out_port]; \ + if (out_port > 1024) \ + { \ + a->busy_##n##_ports++; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + } \ + break; + foreach_nat_protocol +#undef _ + default: + clib_memset (&addr, 0, sizeof (addr)); + addr.ip4.as_u32 = out_addr->as_u32; + if (nat64_db_bib_entry_find (db, &addr, 0, proto, fib_index, 0)) + return VNET_API_ERROR_INVALID_VALUE; + } + break; + } + if (!nm->num_workers) + { + bibe = + nat64_db_bib_entry_create (thread_index, db, in_addr, out_addr, + clib_host_to_net_u16 (in_port), + clib_host_to_net_u16 (out_port), + fib_index, proto, 1); + if (!bibe) + return VNET_API_ERROR_UNSPECIFIED; + + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + } + else + { + if (!bibe) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (!nm->num_workers) + { + nat64_db_bib_entry_free (thread_index, db, bibe); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + } + + if (nm->num_workers) + { + /* *INDENT-OFF* */ + pool_foreach (static_bib, nm->static_bibs, + ({ + if (static_bib->done) + vec_add1 (to_be_free, static_bib - nm->static_bibs); + })); + vec_foreach (index, to_be_free) + pool_put_index (nm->static_bibs, index[0]); + /* *INDENT-ON* */ + vec_free (to_be_free); + pool_get (nm->static_bibs, static_bib); + static_bib->in_addr.as_u64[0] = in_addr->as_u64[0]; + static_bib->in_addr.as_u64[1] = in_addr->as_u64[1]; + static_bib->in_port = clib_host_to_net_u16 (in_port); + static_bib->out_addr.as_u32 = out_addr->as_u32; + static_bib->out_port = clib_host_to_net_u16 (out_port); + static_bib->fib_index = fib_index; + static_bib->proto = proto; + static_bib->is_add = is_add; + static_bib->thread_index = thread_index; + static_bib->done = 0; + worker_vm = vlib_mains[thread_index]; + if (worker_vm) + vlib_node_set_interrupt_pending (worker_vm, + nat64_static_bib_worker_node.index); + else + return VNET_API_ERROR_UNSPECIFIED; + } + + return 0; +} + +int +nat64_set_udp_timeout (u32 timeout) +{ + nat64_main_t *nm = &nat64_main; + + if (timeout == 0) + nm->udp_timeout = NAT_UDP_TIMEOUT; + else + nm->udp_timeout = timeout; + + return 0; +} + +u32 +nat64_get_udp_timeout (void) +{ + nat64_main_t *nm = &nat64_main; + + return nm->udp_timeout; +} + +int +nat64_set_icmp_timeout (u32 timeout) +{ + nat64_main_t *nm = &nat64_main; + + if (timeout == 0) + nm->icmp_timeout = NAT_ICMP_TIMEOUT; + else + nm->icmp_timeout = timeout; + + return 0; +} + +void +nat64_reset_timeouts () +{ + nat64_main_t *nm = &nat64_main; + + nm->udp_timeout = NAT_UDP_TIMEOUT; + nm->icmp_timeout = NAT_ICMP_TIMEOUT; + nm->tcp_est_timeout = NAT_TCP_ESTABLISHED_TIMEOUT; + nm->tcp_trans_timeout = NAT_TCP_TRANSITORY_TIMEOUT; +} + +u32 +nat64_get_icmp_timeout (void) +{ + nat64_main_t *nm = &nat64_main; + + return nm->icmp_timeout; +} + +int +nat64_set_tcp_timeouts (u32 trans, u32 est) +{ + nat64_main_t *nm = &nat64_main; + + if (trans == 0) + nm->tcp_trans_timeout = NAT_TCP_TRANSITORY_TIMEOUT; + else + nm->tcp_trans_timeout = trans; + + if (est == 0) + nm->tcp_est_timeout = NAT_TCP_ESTABLISHED_TIMEOUT; + else + nm->tcp_est_timeout = est; + + return 0; +} + +u32 +nat64_get_tcp_trans_timeout (void) +{ + nat64_main_t *nm = &nat64_main; + + return nm->tcp_trans_timeout; +} + +u32 +nat64_get_tcp_est_timeout (void) +{ + nat64_main_t *nm = &nat64_main; + + return nm->tcp_est_timeout; +} + +void +nat64_session_reset_timeout (nat64_db_st_entry_t * ste, vlib_main_t * vm) +{ + nat64_main_t *nm = &nat64_main; + u32 now = (u32) vlib_time_now (vm); + + switch (ip_proto_to_nat_proto (ste->proto)) + { + case NAT_PROTOCOL_ICMP: + ste->expire = now + nm->icmp_timeout; + return; + case NAT_PROTOCOL_TCP: + { + switch (ste->tcp_state) + { + case NAT64_TCP_STATE_V4_INIT: + case NAT64_TCP_STATE_V6_INIT: + case NAT64_TCP_STATE_V4_FIN_RCV: + case NAT64_TCP_STATE_V6_FIN_RCV: + case NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV: + case NAT64_TCP_STATE_TRANS: + ste->expire = now + nm->tcp_trans_timeout; + return; + case NAT64_TCP_STATE_ESTABLISHED: + ste->expire = now + nm->tcp_est_timeout; + return; + default: + return; + } + } + case NAT_PROTOCOL_UDP: + ste->expire = now + nm->udp_timeout; + return; + default: + ste->expire = now + nm->udp_timeout; + return; + } +} + +void +nat64_tcp_session_set_state (nat64_db_st_entry_t * ste, tcp_header_t * tcp, + u8 is_ip6) +{ + switch (ste->tcp_state) + { + case NAT64_TCP_STATE_CLOSED: + { + if (tcp->flags & TCP_FLAG_SYN) + { + if (is_ip6) + ste->tcp_state = NAT64_TCP_STATE_V6_INIT; + else + ste->tcp_state = NAT64_TCP_STATE_V4_INIT; + } + return; + } + case NAT64_TCP_STATE_V4_INIT: + { + if (is_ip6 && (tcp->flags & TCP_FLAG_SYN)) + ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED; + return; + } + case NAT64_TCP_STATE_V6_INIT: + { + if (!is_ip6 && (tcp->flags & TCP_FLAG_SYN)) + ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED; + return; + } + case NAT64_TCP_STATE_ESTABLISHED: + { + if (tcp->flags & TCP_FLAG_FIN) + { + if (is_ip6) + ste->tcp_state = NAT64_TCP_STATE_V6_FIN_RCV; + else + ste->tcp_state = NAT64_TCP_STATE_V4_FIN_RCV; + } + else if (tcp->flags & TCP_FLAG_RST) + { + ste->tcp_state = NAT64_TCP_STATE_TRANS; + } + return; + } + case NAT64_TCP_STATE_V4_FIN_RCV: + { + if (is_ip6 && (tcp->flags & TCP_FLAG_FIN)) + ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV; + return; + } + case NAT64_TCP_STATE_V6_FIN_RCV: + { + if (!is_ip6 && (tcp->flags & TCP_FLAG_FIN)) + ste->tcp_state = NAT64_TCP_STATE_V6_FIN_V4_FIN_RCV; + return; + } + case NAT64_TCP_STATE_TRANS: + { + if (!(tcp->flags & TCP_FLAG_RST)) + ste->tcp_state = NAT64_TCP_STATE_ESTABLISHED; + return; + } + default: + return; + } +} + +int +nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add) +{ + nat64_main_t *nm = &nat64_main; + nat64_prefix_t *p = 0; + int i; + + /* Verify prefix length */ + if (plen != 32 && plen != 40 && plen != 48 && plen != 56 && plen != 64 + && plen != 96) + return VNET_API_ERROR_INVALID_VALUE; + + /* Check if tenant already have prefix */ + for (i = 0; i < vec_len (nm->pref64); i++) + { + if (nm->pref64[i].vrf_id == vrf_id) + { + p = nm->pref64 + i; + break; + } + } + + if (is_add) + { + if (!p) + { + vec_add2 (nm->pref64, p, 1); + p->fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + nm->fib_src_hi); + p->vrf_id = vrf_id; + } + + p->prefix.as_u64[0] = prefix->as_u64[0]; + p->prefix.as_u64[1] = prefix->as_u64[1]; + p->plen = plen; + } + else + { + if (!p) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + // TODO: missing fib_table_unlock ? + + vec_del1 (nm->pref64, i); + } + + return 0; +} + +void +nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx) +{ + nat64_main_t *nm = &nat64_main; + nat64_prefix_t *p = 0; + + /* *INDENT-OFF* */ + vec_foreach (p, nm->pref64) + { + if (fn (p, ctx)) + break; + }; + /* *INDENT-ON* */ +} + +void +nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index) +{ + nat64_main_t *nm = &nat64_main; + nat64_prefix_t *p, *gp = 0, *prefix = 0; + + /* *INDENT-OFF* */ + vec_foreach (p, nm->pref64) + { + if (p->fib_index == fib_index) + { + prefix = p; + break; + } + + if (p->fib_index == 0) + gp = p; + }; + /* *INDENT-ON* */ + + if (!prefix) + prefix = gp; + + if (prefix) + { + clib_memcpy_fast (ip6, &p->prefix, sizeof (ip6_address_t)); + switch (p->plen) + { + case 32: + ip6->as_u32[1] = ip4->as_u32; + break; + case 40: + ip6->as_u8[5] = ip4->as_u8[0]; + ip6->as_u8[6] = ip4->as_u8[1]; + ip6->as_u8[7] = ip4->as_u8[2]; + ip6->as_u8[9] = ip4->as_u8[3]; + break; + case 48: + ip6->as_u8[6] = ip4->as_u8[0]; + ip6->as_u8[7] = ip4->as_u8[1]; + ip6->as_u8[9] = ip4->as_u8[2]; + ip6->as_u8[10] = ip4->as_u8[3]; + break; + case 56: + ip6->as_u8[7] = ip4->as_u8[0]; + ip6->as_u8[9] = ip4->as_u8[1]; + ip6->as_u8[10] = ip4->as_u8[2]; + ip6->as_u8[11] = ip4->as_u8[3]; + break; + case 64: + ip6->as_u8[9] = ip4->as_u8[0]; + ip6->as_u8[10] = ip4->as_u8[1]; + ip6->as_u8[11] = ip4->as_u8[2]; + ip6->as_u8[12] = ip4->as_u8[3]; + break; + case 96: + ip6->as_u32[3] = ip4->as_u32; + break; + default: + nat_elog_str ("invalid prefix length"); + break; + } + } + else + { + clib_memcpy_fast (ip6, well_known_prefix, sizeof (ip6_address_t)); + ip6->as_u32[3] = ip4->as_u32; + } +} + +void +nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index) +{ + nat64_main_t *nm = &nat64_main; + nat64_prefix_t *p, *gp = 0; + u8 plen = 0; + + /* *INDENT-OFF* */ + vec_foreach (p, nm->pref64) + { + if (p->fib_index == fib_index) + { + plen = p->plen; + break; + } + + if (p->vrf_id == 0) + gp = p; + }; + /* *INDENT-ON* */ + + if (!plen) + { + if (gp) + plen = gp->plen; + else + plen = 96; + } + + switch (plen) + { + case 32: + ip4->as_u32 = ip6->as_u32[1]; + break; + case 40: + ip4->as_u8[0] = ip6->as_u8[5]; + ip4->as_u8[1] = ip6->as_u8[6]; + ip4->as_u8[2] = ip6->as_u8[7]; + ip4->as_u8[3] = ip6->as_u8[9]; + break; + case 48: + ip4->as_u8[0] = ip6->as_u8[6]; + ip4->as_u8[1] = ip6->as_u8[7]; + ip4->as_u8[2] = ip6->as_u8[9]; + ip4->as_u8[3] = ip6->as_u8[10]; + break; + case 56: + ip4->as_u8[0] = ip6->as_u8[7]; + ip4->as_u8[1] = ip6->as_u8[9]; + ip4->as_u8[2] = ip6->as_u8[10]; + ip4->as_u8[3] = ip6->as_u8[11]; + break; + case 64: + ip4->as_u8[0] = ip6->as_u8[9]; + ip4->as_u8[1] = ip6->as_u8[10]; + ip4->as_u8[2] = ip6->as_u8[11]; + ip4->as_u8[3] = ip6->as_u8[12]; + break; + case 96: + ip4->as_u32 = ip6->as_u32[3]; + break; + default: + nat_elog_str ("invalid prefix length"); + break; + } +} + +/** + * @brief Per worker process checking expire time for NAT64 sessions. + */ +static uword +nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + u32 thread_index = vm->thread_index; + nat64_db_t *db; + u32 now; + + // TODO: barier sync on plugin enabled + if (plugin_enabled () == 0) + return 0; + + db = &nm->db[thread_index]; + now = (u32) vlib_time_now (vm); + + nad64_db_st_free_expired (thread_index, db, now); + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = { + .function = nat64_expire_worker_walk_fn, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .name = "nat64-expire-worker-walk", +}; +/* *INDENT-ON* */ + +/** + * @brief Centralized process to drive per worker expire walk. + */ +static uword +nat64_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + nat64_main_t *nm = &nat64_main; + vlib_main_t **worker_vms = 0, *worker_vm; + int i; + uword event_type, *event_data = 0; + + if (vec_len (vlib_mains) == 0) + vec_add1 (worker_vms, vm); + else + { + for (i = 0; i < vec_len (vlib_mains); i++) + { + worker_vm = vlib_mains[i]; + if (worker_vm) + vec_add1 (worker_vms, worker_vm); + } + } + + while (1) + { + if (nm->total_enabled_count) + { + vlib_process_wait_for_event_or_clock (vm, 10.0); + event_type = vlib_process_get_events (vm, &event_data); + } + else + { + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + } + + switch (event_type) + { + case ~0: + break; + case NAT64_CLEANER_RESCHEDULE: + break; + default: + nat64_log_err ("unknown event %u", event_type); + break; + } + + for (i = 0; i < vec_len (worker_vms); i++) + { + worker_vm = worker_vms[i]; + vlib_node_set_interrupt_pending (worker_vm, + nm->expire_worker_walk_node_index); + } + } + + return 0; +} + +void +nat64_create_expire_walk_process () +{ + nat64_main_t *nm = &nat64_main; + + if (nm->expire_walk_node_index) + return; + nm->expire_walk_node_index = vlib_process_create (vlib_get_main (), + "nat64-expire-walk", + nat64_expire_walk_fn, + 16 /* stack_bytes */ ); +} + +int +nat64_plugin_enable (nat64_config_t c) +{ + nat64_main_t *nm = &nat64_main; + + if (plugin_enabled () == 1) + { + nat64_log_err ("plugin already enabled!"); + return 1; + } + + if (!c.bib_buckets) + c.bib_buckets = 1024; + + if (!c.bib_memory_size) + c.bib_memory_size = 128 << 20; + + if (!c.st_buckets) + c.st_buckets = 2048; + + if (!c.st_memory_size) + c.st_memory_size = 256 << 20; + + nm->config = c; + + if (nat64_init_hash (c)) + { + nat64_log_err ("initializing hashes failed!"); + return 1; + } + + nat64_create_expire_walk_process (); + + nm->enabled = 1; + return 0; +} + +int +nat64_plugin_disable () +{ + nat64_main_t *nm = &nat64_main; + vnet_main_t *vnm = vnet_get_main (); + int rv = 0; + + nat64_address_t *a; + nat64_interface_t *i, *interfaces; + + if (plugin_enabled () == 0) + { + nat64_log_err ("plugin already disabled!"); + return 1; + } + nm->enabled = 0; + + interfaces = vec_dup (nm->interfaces); + vec_foreach (i, interfaces) + { + rv = nat64_interface_add_del (i->sw_if_index, i->flags, 1); + if (rv) + { + nat64_log_err ("%U %s interface del failed", + unformat_vnet_sw_interface, + i->flags & NAT64_INTERFACE_FLAG_IS_INSIDE ? + "inside" : "outside", vnm, i->sw_if_index); + } + } + vec_free (interfaces); + pool_free (nm->interfaces); + + nat64_reset_timeouts (); + + if (nat64_free_hash ()) + { + rv = 1; + nat64_log_err ("freeing hashes failed!"); + } + + // TODO: based on nat64_add_del_prefix fib_table_unlock is not called + vec_free (nm->pref64); + + if (vec_len (nm->addr_pool)) + { + vec_foreach (a, nm->addr_pool) + { + if (a->fib_index != ~0) + fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nm->fib_src_hi); + } + vec_free (nm->addr_pool); + } + return rv; +} + +uword +unformat_nat_protocol (unformat_input_t * input, va_list * args) +{ + u32 *r = va_arg (*args, u32 *); + + if (0); +#define _(N, i, n, s) else if (unformat (input, s)) *r = NAT_PROTOCOL_##N; + foreach_nat_protocol +#undef _ + else + return 0; + return 1; +} + +u8 * +format_nat_protocol (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(N, j, n, str) case NAT_PROTOCOL_##N: t = (u8 *) str; break; + foreach_nat_protocol +#undef _ + default: + s = format (s, "unknown"); + return s; + } + s = format (s, "%s", t); + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat64/nat64.h b/src/plugins/nat/nat64/nat64.h new file mode 100644 index 00000000000..1180f9df778 --- /dev/null +++ b/src/plugins/nat/nat64/nat64.h @@ -0,0 +1,531 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_nat64_h__ +#define __included_nat64_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/icmp46_packet.h> +#include <vnet/api_errno.h> +#include <vnet/fib/fib_source.h> +#include <vppinfra/dlist.h> +#include <vppinfra/error.h> +#include <vlibapi/api.h> +#include <vlib/log.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/ip/reass/ip4_sv_reass.h> + +#include <nat/lib/lib.h> +#include <nat/lib/inlines.h> +#include <nat/lib/nat_inlines.h> + +#include <nat/nat64/nat64_db.h> + +typedef struct +{ + u16 identifier; + u16 sequence; +} icmp_echo_header_t; + +typedef struct +{ + u16 src_port, dst_port; +} tcp_udp_header_t; + +#define foreach_nat64_tcp_ses_state \ + _(0, CLOSED, "closed") \ + _(1, V4_INIT, "v4-init") \ + _(2, V6_INIT, "v6-init") \ + _(3, ESTABLISHED, "established") \ + _(4, V4_FIN_RCV, "v4-fin-rcv") \ + _(5, V6_FIN_RCV, "v6-fin-rcv") \ + _(6, V6_FIN_V4_FIN_RCV, "v6-fin-v4-fin-rcv") \ + _(7, TRANS, "trans") + +typedef enum +{ +#define _(v, N, s) NAT64_TCP_STATE_##N = v, + foreach_nat64_tcp_ses_state +#undef _ +} nat64_tcp_ses_state_t; + +typedef enum +{ + NAT64_CLEANER_RESCHEDULE = 1, +} nat64_cleaner_process_event_e; + +typedef struct +{ + ip6_address_t prefix; + u8 plen; + u32 vrf_id; + u32 fib_index; +} nat64_prefix_t; + +typedef struct +{ + ip6_address_t in_addr; + u16 in_port; + ip4_address_t out_addr; + u16 out_port; + u32 fib_index; + u32 thread_index; + u8 proto; + u8 is_add; + u8 done; +} nat64_static_bib_to_update_t; + +typedef struct +{ + ip4_address_t addr; + u32 fib_index; +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + u16 busy_##n##_ports; \ + u16 * busy_##n##_ports_per_thread; \ + u32 busy_##n##_port_refcounts[65535]; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ +} nat64_address_t; + +typedef struct +{ + u32 sw_if_index; + u8 flags; +} nat64_interface_t; + +typedef struct +{ + u32 enabled; + + nat64_config_t config; + + /* API message ID base */ + u16 msg_id_base; + + /* log class */ + vlib_log_class_t log_class; + + /** Interface pool */ + nat64_interface_t *interfaces; + + /** Address pool vector */ + nat64_address_t *addr_pool; + + /** sw_if_indices whose interface addresses should be auto-added */ + u32 *auto_add_sw_if_indices; + + /** Pref64 vector */ + nat64_prefix_t *pref64; + + /** BIB and session DB per thread */ + nat64_db_t *db; + + /** Worker handoff */ + u32 fq_in2out_index; + u32 fq_out2in_index; + + /** Pool of static BIB entries to be added/deleted in worker threads */ + nat64_static_bib_to_update_t *static_bibs; + + /** config parameters */ + u32 bib_buckets; + uword bib_memory_size; + u32 st_buckets; + uword st_memory_size; + + /** values of various timeouts */ + u32 udp_timeout; + u32 icmp_timeout; + u32 tcp_trans_timeout; + u32 tcp_est_timeout; + + /* Total count of interfaces enabled */ + u32 total_enabled_count; + + /* Expire walk process node index */ + u32 expire_walk_node_index; + + /* Expire worker walk process node index */ + u32 expire_worker_walk_node_index; + + /* counters/gauges */ + vlib_simple_counter_main_t total_bibs; + vlib_simple_counter_main_t total_sessions; + + /** node index **/ + u32 error_node_index; + + u32 in2out_node_index; + u32 in2out_slowpath_node_index; + + u32 out2in_node_index; + +#define _(x) vlib_simple_counter_main_t x; + struct + { + struct + { + foreach_nat_counter; + } in2out; + + struct + { + foreach_nat_counter; + } out2in; + } counters; +#undef _ + + /* convenience */ + ip4_main_t *ip4_main; + + /* required */ + vnet_main_t *vnet_main; + + /* Randomize port allocation order */ + u32 random_seed; + + /* TCP MSS clamping */ + u16 mss_clamping; + + fib_source_t fib_src_hi; + fib_source_t fib_src_low; + + /* Thread settings */ + u32 num_workers; + u32 first_worker_index; + u32 *workers; + u16 port_per_thread; + +} nat64_main_t; + +extern nat64_main_t nat64_main; +extern vlib_node_registration_t nat64_in2out_node; +extern vlib_node_registration_t nat64_out2in_node; + +/** + * @brief Add/delete address to NAT64 pool. + * + * @param thread_index Thread index used by ipfix nat logging (not address per thread). + * @param addr IPv4 address. + * @param vrf_id VRF id of tenant, ~0 means independent of VRF. + * @param is_add 1 if add, 0 if delete. + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_add_del_pool_addr (u32 thread_index, + ip4_address_t * addr, u32 vrf_id, u8 is_add); + +/** + * @brief Call back function when walking addresses in NAT64 pool, non-zero + * return value stop walk. + */ +typedef int (*nat64_pool_addr_walk_fn_t) (nat64_address_t * addr, void *ctx); + +/** + * @brief Walk NAT64 pool. + * + * @param fn The function to invoke on each entry visited. + * @param ctx A context passed in the visit function. + */ +void nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx); + +/** + * @brief NAT64 pool address from specific (DHCP addressed) interface. + * + * @param sw_if_index Index of the interface. + * @param is_add 1 if add, 0 if delete. + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_add_interface_address (u32 sw_if_index, int is_add); + +/** + * @brief Enable/disable NAT64 feature on the interface. + * + * @param sw_if_index Index of the interface. + * @param is_inside 1 if inside, 0 if outside. + * @param is_add 1 if add, 0 if delete. + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add); + +/** + * @brief Call back function when walking interfaces with NAT64 feature, + * non-zero return value stop walk. + */ +typedef int (*nat64_interface_walk_fn_t) (nat64_interface_t * i, void *ctx); + +/** + * @brief Walk NAT64 interfaces. + * + * @param fn The function to invoke on each entry visited. + * @param ctx A context passed in the visit function. + */ +void nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx); + +/** + * @brief Initialize NAT64. + * + * @param vm vlib main. + * + * @return error code. + */ +clib_error_t *nat64_init (vlib_main_t * vm); + +/** + * @brief Add/delete static NAT64 BIB entry. + * + * @param in_addr Inside IPv6 address. + * @param out_addr Outside IPv4 address. + * @param in_port Inside port number. + * @param out_port Outside port number. + * @param proto L4 protocol. + * @param vrf_id VRF id of tenant. + * @param is_add 1 if add, 0 if delete. + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_add_del_static_bib_entry (ip6_address_t * in_addr, + ip4_address_t * out_addr, u16 in_port, + u16 out_port, u8 proto, u32 vrf_id, + u8 is_add); + +/** + * @brief Alloce IPv4 address and port pair from NAT64 pool. + * + * @param fib_index FIB index of tenant. + * @param proto L4 protocol. + * @param addr Allocated IPv4 address. + * @param port Allocated port number. + * @param thread_index Thread index. + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_alloc_out_addr_and_port (u32 fib_index, nat_protocol_t proto, + ip4_address_t * addr, u16 * port, + u32 thread_index); + +/** + * @brief Set UDP session timeout. + * + * @param timeout Timeout value in seconds (if 0 reset to default value 300sec). + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_set_udp_timeout (u32 timeout); + +/** + * @brief Get UDP session timeout. + * + * @returns UDP session timeout in seconds. + */ +u32 nat64_get_udp_timeout (void); + +/** + * @brief Set ICMP session timeout. + * + * @param timeout Timeout value in seconds (if 0 reset to default value 60sec). + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_set_icmp_timeout (u32 timeout); + +/** + * @brief Get ICMP session timeout. + * + * @returns ICMP session timeout in seconds. + */ +u32 nat64_get_icmp_timeout (void); + +/** + * @brief Set TCP session timeouts. + * + * @param trans Transitory timeout in seconds (if 0 reset to default value 240sec). + * @param est Established timeout in seconds (if 0 reset to default value 7440sec). + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_set_tcp_timeouts (u32 trans, u32 est); + +/** + * @brief Get TCP transitory timeout. + * + * @returns TCP transitory timeout in seconds. + */ +u32 nat64_get_tcp_trans_timeout (void); + +/** + * @brief Get TCP established timeout. + * + * @returns TCP established timeout in seconds. + */ +u32 nat64_get_tcp_est_timeout (void); + +/** + * @brief Reset NAT64 session timeout. + * + * @param ste Session table entry. + * @param vm VLIB main. + **/ +void nat64_session_reset_timeout (nat64_db_st_entry_t * ste, + vlib_main_t * vm); + +/** + * @brief Set NAT64 TCP session state. + * + * @param ste Session table entry. + * @param tcp TCP header. + * @param is_ip6 1 if IPv6 packet, 0 if IPv4. + */ +void nat64_tcp_session_set_state (nat64_db_st_entry_t * ste, + tcp_header_t * tcp, u8 is_ip6); + +/** + * @brief Add/delete NAT64 prefix. + * + * @param prefix NAT64 prefix. + * @param plen Prefix length. + * @param vrf_id VRF id of tenant. + * @param is_add 1 if add, 0 if delete. + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, + u8 is_add); + +/** + * @brief Call back function when walking addresses in NAT64 prefixes, non-zero + * return value stop walk. + */ +typedef int (*nat64_prefix_walk_fn_t) (nat64_prefix_t * pref64, void *ctx); + +/** + * @brief Walk NAT64 prefixes. + * + * @param fn The function to invoke on each entry visited. + * @param ctx A context passed in the visit function. + */ +void nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx); + +/** + * Compose IPv4-embedded IPv6 addresses. + * @param ip6 IPv4-embedded IPv6 addresses. + * @param ip4 IPv4 address. + * @param fib_index Tenant FIB index. + */ +void nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, + u32 fib_index); + +/** + * Extract IPv4 address from the IPv4-embedded IPv6 addresses. + * + * @param ip6 IPv4-embedded IPv6 addresses. + * @param ip4 IPv4 address. + * @param fib_index Tenant FIB index. + */ +void nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, + u32 fib_index); + +/** + * @brief Set NAT64 hash tables configuration. + * + * @param bib_buckets Number of BIB hash buckets. + * @param bib_memory_size Memory size of BIB hash. + * @param st_buckets Number of session table hash buckets. + * @param st_memory_size Memory size of session table hash. + */ +void nat64_set_hash (u32 bib_buckets, uword bib_memory_size, u32 st_buckets, + uword st_memory_size); + +/** + * @brief Get worker thread index for NAT64 in2out. + * + * @param addr IPv6 src address. + * + * @returns worker thread index. + */ +u32 nat64_get_worker_in2out (ip6_address_t * addr); + +/** + * @brief Get worker thread index for NAT64 out2in. + * + * @param ip IPv4 header. + * + * @returns worker thread index. + */ +u32 nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip); + +/* NAT64 interface flags */ +#define NAT64_INTERFACE_FLAG_IS_INSIDE 1 +#define NAT64_INTERFACE_FLAG_IS_OUTSIDE 2 + +/** \brief Check if NAT64 interface is inside. + @param i NAT64 interface + @return 1 if inside interface +*/ +#define nat64_interface_is_inside(i) i->flags & NAT64_INTERFACE_FLAG_IS_INSIDE + +/** \brief Check if NAT64 interface is outside. + @param i NAT64 interface + @return 1 if outside interface +*/ +#define nat64_interface_is_outside(i) i->flags & NAT64_INTERFACE_FLAG_IS_OUTSIDE + +static_always_inline u8 +plugin_enabled () +{ + nat64_main_t *nm = &nat64_main; + return nm->enabled; +} + +void +nat64_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index, + int is_add); + +int nat64_plugin_enable (nat64_config_t c); +int nat64_plugin_disable (); +void nat64_reset_timeouts (); + +format_function_t format_nat_protocol; +unformat_function_t unformat_nat_protocol; + +/* logging */ +#define nat64_log_err(...) \ + vlib_log(VLIB_LOG_LEVEL_ERR, nat64_main.log_class, __VA_ARGS__) +#define nat64_log_warn(...) \ + vlib_log(VLIB_LOG_LEVEL_WARNING, nat64_main.log_class, __VA_ARGS__) +#define nat64_log_notice(...) \ + vlib_log(VLIB_LOG_LEVEL_NOTICE, nat64_main.log_class, __VA_ARGS__) +#define nat64_log_info(...) \ + vlib_log(VLIB_LOG_LEVEL_INFO, nat64_main.log_class, __VA_ARGS__) +#define nat64_log_debug(...)\ + vlib_log(VLIB_LOG_LEVEL_DEBUG, nat64_main.log_class, __VA_ARGS__) + +clib_error_t *nat64_api_hookup (vlib_main_t * vm); + +#endif /* __included_nat64_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat64/nat64_api.c b/src/plugins/nat/nat64/nat64_api.c new file mode 100644 index 00000000000..e64b6434fd2 --- /dev/null +++ b/src/plugins/nat/nat64/nat64_api.c @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip_types_api.h> +#include <vlibmemory/api.h> +#include <nat/nat64/nat64.h> +#include <nat/nat64/nat64.api_enum.h> +#include <nat/nat64/nat64.api_types.h> +#include <vnet/fib/fib_table.h> +#include <vnet/ip/ip.h> + +#define REPLY_MSG_ID_BASE nm->msg_id_base +#include <vlibapi/api_helper_macros.h> + +static void + vl_api_nat64_plugin_enable_disable_t_handler + (vl_api_nat64_plugin_enable_disable_t * mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_plugin_enable_disable_reply_t *rmp; + nat64_config_t c = { 0 }; + int rv = 0; + if (mp->enable) + { + c.bib_buckets = ntohl (mp->bib_buckets); + c.bib_memory_size = ntohl (mp->bib_memory_size); + c.st_buckets = ntohl (mp->st_buckets); + c.st_memory_size = ntohl (mp->st_memory_size); + rv = nat64_plugin_enable (c); + } + else + { + rv = nat64_plugin_disable (); + } + REPLY_MACRO (VL_API_NAT64_PLUGIN_ENABLE_DISABLE_REPLY); +} + +static void +vl_api_nat64_set_timeouts_t_handler (vl_api_nat64_set_timeouts_t * mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_set_timeouts_reply_t *rmp; + int rv = 0; + + nm->udp_timeout = ntohl (mp->udp); + nm->tcp_est_timeout = ntohl (mp->tcp_established); + nm->tcp_trans_timeout = ntohl (mp->tcp_transitory); + nm->icmp_timeout = ntohl (mp->icmp); + + REPLY_MACRO (VL_API_NAT64_SET_TIMEOUTS_REPLY); +} + +static void +vl_api_nat64_get_timeouts_t_handler (vl_api_nat64_get_timeouts_t * mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_get_timeouts_reply_t *rmp; + int rv = 0; + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_NAT64_GET_TIMEOUTS_REPLY, + ({ + rmp->udp = htonl (nm->udp_timeout); + rmp->tcp_established = htonl (nm->tcp_est_timeout); + rmp->tcp_transitory = htonl (nm->tcp_trans_timeout); + rmp->icmp = htonl (nm->icmp_timeout); + })) + /* *INDENT-ON* */ +} + +static void + vl_api_nat64_add_del_pool_addr_range_t_handler + (vl_api_nat64_add_del_pool_addr_range_t * mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_add_del_pool_addr_range_reply_t *rmp; + int rv = 0; + ip4_address_t this_addr; + u32 start_host_order, end_host_order; + u32 vrf_id; + int i, count; + u32 *tmp; + + tmp = (u32 *) mp->start_addr; + start_host_order = clib_host_to_net_u32 (tmp[0]); + tmp = (u32 *) mp->end_addr; + end_host_order = clib_host_to_net_u32 (tmp[0]); + + count = (end_host_order - start_host_order) + 1; + + vrf_id = clib_host_to_net_u32 (mp->vrf_id); + + memcpy (&this_addr.as_u8, mp->start_addr, 4); + + for (i = 0; i < count; i++) + { + if ((rv = nat64_add_del_pool_addr (0, &this_addr, vrf_id, mp->is_add))) + goto send_reply; + + increment_v4_address (&this_addr); + } + +send_reply: + REPLY_MACRO (VL_API_NAT64_ADD_DEL_POOL_ADDR_RANGE_REPLY); +} + +typedef struct nat64_api_walk_ctx_t_ +{ + vl_api_registration_t *reg; + u32 context; + nat64_db_t *db; +} nat64_api_walk_ctx_t; + +static int +nat64_api_pool_walk (nat64_address_t * a, void *arg) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_pool_addr_details_t *rmp; + nat64_api_walk_ctx_t *ctx = arg; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_NAT64_POOL_ADDR_DETAILS + nm->msg_id_base); + clib_memcpy (rmp->address, &(a->addr), 4); + if (a->fib_index != ~0) + { + fib_table_t *fib = fib_table_get (a->fib_index, FIB_PROTOCOL_IP6); + if (!fib) + return -1; + rmp->vrf_id = ntohl (fib->ft_table_id); + } + else + rmp->vrf_id = ~0; + rmp->context = ctx->context; + + vl_api_send_msg (ctx->reg, (u8 *) rmp); + + return 0; +} + +static void +vl_api_nat64_pool_addr_dump_t_handler (vl_api_nat64_pool_addr_dump_t * mp) +{ + vl_api_registration_t *reg; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + nat64_api_walk_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + nat64_pool_addr_walk (nat64_api_pool_walk, &ctx); +} + +static void +vl_api_nat64_add_del_interface_t_handler (vl_api_nat64_add_del_interface_t * + mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_add_del_interface_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = + nat64_interface_add_del (ntohl (mp->sw_if_index), + mp->flags & NAT_API_IS_INSIDE, mp->is_add); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_NAT64_ADD_DEL_INTERFACE_REPLY); +} + +static int +nat64_api_interface_walk (nat64_interface_t * i, void *arg) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_interface_details_t *rmp; + nat64_api_walk_ctx_t *ctx = arg; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_NAT64_INTERFACE_DETAILS + nm->msg_id_base); + rmp->sw_if_index = ntohl (i->sw_if_index); + + if (nat64_interface_is_inside (i)) + rmp->flags |= NAT_API_IS_INSIDE; + if (nat64_interface_is_outside (i)) + rmp->flags |= NAT_API_IS_OUTSIDE; + + rmp->context = ctx->context; + + vl_api_send_msg (ctx->reg, (u8 *) rmp); + + return 0; +} + +static void +vl_api_nat64_interface_dump_t_handler (vl_api_nat64_interface_dump_t * mp) +{ + vl_api_registration_t *reg; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + nat64_api_walk_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + nat64_interfaces_walk (nat64_api_interface_walk, &ctx); +} + +static void + vl_api_nat64_add_del_static_bib_t_handler + (vl_api_nat64_add_del_static_bib_t * mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_add_del_static_bib_reply_t *rmp; + ip6_address_t in_addr; + ip4_address_t out_addr; + int rv = 0; + + memcpy (&in_addr.as_u8, mp->i_addr, 16); + memcpy (&out_addr.as_u8, mp->o_addr, 4); + + rv = + nat64_add_del_static_bib_entry (&in_addr, &out_addr, + clib_net_to_host_u16 (mp->i_port), + clib_net_to_host_u16 (mp->o_port), + mp->proto, + clib_net_to_host_u32 (mp->vrf_id), + mp->is_add); + + REPLY_MACRO (VL_API_NAT64_ADD_DEL_STATIC_BIB_REPLY); +} + +static int +nat64_api_bib_walk (nat64_db_bib_entry_t * bibe, void *arg) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_bib_details_t *rmp; + nat64_api_walk_ctx_t *ctx = arg; + fib_table_t *fib; + + fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); + if (!fib) + return -1; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_NAT64_BIB_DETAILS + nm->msg_id_base); + rmp->context = ctx->context; + clib_memcpy (rmp->i_addr, &(bibe->in_addr), 16); + clib_memcpy (rmp->o_addr, &(bibe->out_addr), 4); + rmp->i_port = bibe->in_port; + rmp->o_port = bibe->out_port; + rmp->vrf_id = ntohl (fib->ft_table_id); + rmp->proto = bibe->proto; + if (bibe->is_static) + rmp->flags |= NAT_API_IS_STATIC; + rmp->ses_num = ntohl (bibe->ses_num); + + vl_api_send_msg (ctx->reg, (u8 *) rmp); + + return 0; +} + +static void +vl_api_nat64_bib_dump_t_handler (vl_api_nat64_bib_dump_t * mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_registration_t *reg; + nat64_db_t *db; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + nat64_api_walk_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + nat64_db_bib_walk (db, mp->proto, nat64_api_bib_walk, &ctx); + /* *INDENT-ON* */ +} + +static int +nat64_api_st_walk (nat64_db_st_entry_t * ste, void *arg) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_st_details_t *rmp; + nat64_api_walk_ctx_t *ctx = arg; + nat64_db_bib_entry_t *bibe; + fib_table_t *fib; + + bibe = nat64_db_bib_entry_by_index (ctx->db, ste->proto, ste->bibe_index); + if (!bibe) + return -1; + + fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); + if (!fib) + return -1; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_NAT64_ST_DETAILS + nm->msg_id_base); + rmp->context = ctx->context; + clib_memcpy (rmp->il_addr, &(bibe->in_addr), 16); + clib_memcpy (rmp->ol_addr, &(bibe->out_addr), 4); + rmp->il_port = bibe->in_port; + rmp->ol_port = bibe->out_port; + clib_memcpy (rmp->ir_addr, &(ste->in_r_addr), 16); + clib_memcpy (rmp->or_addr, &(ste->out_r_addr), 4); + rmp->il_port = ste->r_port; + rmp->vrf_id = ntohl (fib->ft_table_id); + rmp->proto = ste->proto; + + vl_api_send_msg (ctx->reg, (u8 *) rmp); + + return 0; +} + +static void +vl_api_nat64_st_dump_t_handler (vl_api_nat64_st_dump_t * mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_registration_t *reg; + nat64_db_t *db; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + nat64_api_walk_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + ctx.db = db; + nat64_db_st_walk (db, mp->proto, nat64_api_st_walk, &ctx); + } + /* *INDENT-ON* */ +} + +static void +vl_api_nat64_add_del_prefix_t_handler (vl_api_nat64_add_del_prefix_t * mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_add_del_prefix_reply_t *rmp; + ip6_address_t prefix; + int rv = 0; + + memcpy (&prefix.as_u8, mp->prefix.address, 16); + + rv = + nat64_add_del_prefix (&prefix, mp->prefix.len, + clib_net_to_host_u32 (mp->vrf_id), mp->is_add); + REPLY_MACRO (VL_API_NAT64_ADD_DEL_PREFIX_REPLY); +} + +static int +nat64_api_prefix_walk (nat64_prefix_t * p, void *arg) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_prefix_details_t *rmp; + nat64_api_walk_ctx_t *ctx = arg; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + clib_memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_NAT64_PREFIX_DETAILS + nm->msg_id_base); + clib_memcpy (rmp->prefix.address, &(p->prefix), 16); + rmp->prefix.len = p->plen; + rmp->vrf_id = ntohl (p->vrf_id); + rmp->context = ctx->context; + + vl_api_send_msg (ctx->reg, (u8 *) rmp); + + return 0; +} + +static void +vl_api_nat64_prefix_dump_t_handler (vl_api_nat64_prefix_dump_t * mp) +{ + vl_api_registration_t *reg; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + + nat64_api_walk_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + nat64_prefix_walk (nat64_api_prefix_walk, &ctx); +} + +static void + vl_api_nat64_add_del_interface_addr_t_handler + (vl_api_nat64_add_del_interface_addr_t * mp) +{ + nat64_main_t *nm = &nat64_main; + vl_api_nat64_add_del_interface_addr_reply_t *rmp; + u32 sw_if_index = ntohl (mp->sw_if_index); + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = nat64_add_interface_address (sw_if_index, mp->is_add); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_NAT64_ADD_DEL_INTERFACE_ADDR_REPLY); +} + +/* API definitions */ +#include <vnet/format_fns.h> +#include <nat/nat64/nat64.api.c> + +/* Set up the API message handling tables */ +clib_error_t * +nat64_api_hookup (vlib_main_t * vm) +{ + nat64_main_t *nm = &nat64_main; + nm->msg_id_base = setup_message_id_table (); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat64/nat64_cli.c b/src/plugins/nat/nat64/nat64_cli.c new file mode 100644 index 00000000000..a7dd9ab9147 --- /dev/null +++ b/src/plugins/nat/nat64/nat64_cli.c @@ -0,0 +1,992 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_table.h> +#include <nat/nat64/nat64.h> + +static clib_error_t * +nat64_plugin_enable_disable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 enable = 0, is_set = 0; + clib_error_t *error = 0; + nat64_config_t c = { 0 }; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (!is_set && unformat (line_input, "enable")) + { + unformat (line_input, "bib-buckets %u", &c.bib_buckets); + unformat (line_input, "bib-memory %u", &c.bib_memory_size); + unformat (line_input, "st-buckets %u", &c.st_buckets); + unformat (line_input, "st-memory %u", &c.st_memory_size); + enable = 1; + } + else if (!is_set && unformat (line_input, "disable")); + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + is_set = 1; + } + + if (enable) + { + if (nat64_plugin_enable (c)) + error = clib_error_return (0, "plugin enable failed"); + } + else + { + if (nat64_plugin_disable ()) + error = clib_error_return (0, "plugin disable failed"); + } +done: + unformat_free (line_input); + return error; +} + +static clib_error_t * +nat64_add_del_pool_addr_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t start_addr, end_addr, this_addr; + u32 start_host_order, end_host_order; + int i, count, rv; + u32 vrf_id = ~0; + u8 is_add = 1; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U - %U", + unformat_ip4_address, &start_addr, + unformat_ip4_address, &end_addr)) + ; + else if (unformat (line_input, "tenant-vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "%U", unformat_ip4_address, &start_addr)) + end_addr = start_addr; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + start_host_order = clib_host_to_net_u32 (start_addr.as_u32); + end_host_order = clib_host_to_net_u32 (end_addr.as_u32); + + if (end_host_order < start_host_order) + { + error = clib_error_return (0, "end address less than start address"); + goto done; + } + + count = (end_host_order - start_host_order) + 1; + this_addr = start_addr; + + for (i = 0; i < count; i++) + { + rv = nat64_add_del_pool_addr (0, &this_addr, vrf_id, is_add); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = + clib_error_return (0, "NAT64 pool address %U not exist.", + format_ip4_address, &this_addr); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = + clib_error_return (0, "NAT64 pool address %U exist.", + format_ip4_address, &this_addr); + goto done; + default: + break; + + } + increment_v4_address (&this_addr); + } + +done: + unformat_free (line_input); + + return error; +} + +static int +nat64_cli_pool_walk (nat64_address_t * ap, void *ctx) +{ + vlib_main_t *vm = ctx; + + if (ap->fib_index != ~0) + { + fib_table_t *fib; + fib = fib_table_get (ap->fib_index, FIB_PROTOCOL_IP6); + if (!fib) + return -1; + vlib_cli_output (vm, " %U tenant VRF: %u", format_ip4_address, + &ap->addr, fib->ft_table_id); + } + else + vlib_cli_output (vm, " %U", format_ip4_address, &ap->addr); + +#define _(N, i, n, s) \ + vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s); + foreach_nat_protocol +#undef _ + return 0; +} + +static clib_error_t * +nat64_show_pool_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_cli_output (vm, "NAT64 pool:"); + nat64_pool_addr_walk (nat64_cli_pool_walk, vm); + + return 0; +} + +static clib_error_t * +nat64_interface_feature_command_fn (vlib_main_t * vm, + unformat_input_t * + input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + u32 *inside_sw_if_indices = 0; + u32 *outside_sw_if_indices = 0; + u8 is_add = 1; + int i, rv; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "in %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (inside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "out %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (outside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (vec_len (inside_sw_if_indices)) + { + for (i = 0; i < vec_len (inside_sw_if_indices); i++) + { + sw_if_index = inside_sw_if_indices[i]; + rv = nat64_interface_add_del (sw_if_index, 1, is_add); + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = + clib_error_return (0, "%U NAT64 feature not enabled.", + format_vnet_sw_if_index_name, vnm, + sw_if_index); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = + clib_error_return (0, "%U NAT64 feature already enabled.", + format_vnet_sw_if_index_name, vnm, + vnm, sw_if_index); + goto done; + case VNET_API_ERROR_INVALID_VALUE: + case VNET_API_ERROR_INVALID_VALUE_2: + error = + clib_error_return (0, + "%U NAT64 feature enable/disable failed.", + format_vnet_sw_if_index_name, vnm, + sw_if_index); + goto done; + default: + break; + + } + } + } + + if (vec_len (outside_sw_if_indices)) + { + for (i = 0; i < vec_len (outside_sw_if_indices); i++) + { + sw_if_index = outside_sw_if_indices[i]; + rv = nat64_interface_add_del (sw_if_index, 0, is_add); + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = + clib_error_return (0, "%U NAT64 feature not enabled.", + format_vnet_sw_if_index_name, vnm, + sw_if_index); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = + clib_error_return (0, "%U NAT64 feature already enabled.", + format_vnet_sw_if_index_name, vnm, + sw_if_index); + goto done; + case VNET_API_ERROR_INVALID_VALUE: + case VNET_API_ERROR_INVALID_VALUE_2: + error = + clib_error_return (0, + "%U NAT64 feature enable/disable failed.", + format_vnet_sw_if_index_name, vnm, + sw_if_index); + goto done; + default: + break; + + } + } + } + +done: + unformat_free (line_input); + vec_free (inside_sw_if_indices); + vec_free (outside_sw_if_indices); + + return error; +} + +static int +nat64_cli_interface_walk (nat64_interface_t * i, void *ctx) +{ + vlib_main_t *vm = ctx; + vnet_main_t *vnm = vnet_get_main (); + + vlib_cli_output (vm, " %U %s", format_vnet_sw_if_index_name, vnm, + i->sw_if_index, + (nat64_interface_is_inside (i) + && nat64_interface_is_outside (i)) ? "in out" : + nat64_interface_is_inside (i) ? "in" : "out"); + return 0; +} + +static clib_error_t * +nat64_show_interfaces_command_fn (vlib_main_t * vm, + unformat_input_t * + input, vlib_cli_command_t * cmd) +{ + vlib_cli_output (vm, "NAT64 interfaces:"); + nat64_interfaces_walk (nat64_cli_interface_walk, vm); + + return 0; +} + +static clib_error_t * +nat64_add_del_static_bib_command_fn (vlib_main_t * + vm, + unformat_input_t + * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + u8 is_add = 1; + ip6_address_t in_addr; + ip4_address_t out_addr; + u32 in_port = 0; + u32 out_port = 0; + u32 vrf_id = 0, protocol; + nat_protocol_t proto = 0; + u8 p = 0; + int rv; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U %u", unformat_ip6_address, + &in_addr, &in_port)) + ; + else if (unformat (line_input, "%U %u", unformat_ip4_address, + &out_addr, &out_port)) + ; + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "%U", unformat_nat_protocol, &proto)) + ; + else + if (unformat + (line_input, "%U %U %u", unformat_ip6_address, &in_addr, + unformat_ip4_address, &out_addr, &protocol)) + p = (u8) protocol; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!p) + { + if (!in_port) + { + error = + clib_error_return (0, "inside port and address must be set"); + goto done; + } + + if (!out_port) + { + error = + clib_error_return (0, "outside port and address must be set"); + goto done; + } + + p = nat_proto_to_ip_proto (proto); + } + + rv = + nat64_add_del_static_bib_entry (&in_addr, &out_addr, (u16) in_port, + (u16) out_port, p, vrf_id, is_add); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "NAT64 BIB entry not exist."); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "NAT64 BIB entry exist."); + goto done; + case VNET_API_ERROR_UNSPECIFIED: + error = clib_error_return (0, "Crerate NAT64 BIB entry failed."); + goto done; + case VNET_API_ERROR_INVALID_VALUE: + error = + clib_error_return (0, + "Outside address %U and port %u already in use.", + format_ip4_address, &out_addr, out_port); + goto done; + case VNET_API_ERROR_INVALID_VALUE_2: + error = clib_error_return (0, "Invalid outside port."); + default: + break; + } + +done: + unformat_free (line_input); + + return error; +} + +static int +nat64_cli_bib_walk (nat64_db_bib_entry_t * bibe, void *ctx) +{ + vlib_main_t *vm = ctx; + fib_table_t *fib; + + fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); + if (!fib) + return -1; + + switch (bibe->proto) + { + case IP_PROTOCOL_ICMP: + case IP_PROTOCOL_TCP: + case IP_PROTOCOL_UDP: + vlib_cli_output (vm, " %U %u %U %u protocol %U vrf %u %s %u sessions", + format_ip6_address, &bibe->in_addr, + clib_net_to_host_u16 (bibe->in_port), + format_ip4_address, &bibe->out_addr, + clib_net_to_host_u16 (bibe->out_port), + format_nat_protocol, + ip_proto_to_nat_proto (bibe->proto), fib->ft_table_id, + bibe->is_static ? "static" : "dynamic", bibe->ses_num); + break; + default: + vlib_cli_output (vm, " %U %U protocol %u vrf %u %s %u sessions", + format_ip6_address, &bibe->in_addr, + format_ip4_address, &bibe->out_addr, + bibe->proto, fib->ft_table_id, + bibe->is_static ? "static" : "dynamic", bibe->ses_num); + } + return 0; +} + +static clib_error_t * +nat64_show_bib_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + nat64_main_t *nm = &nat64_main; + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + u32 proto = NAT_PROTOCOL_OTHER; + u8 p = 255; + nat64_db_t *db; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (unformat (line_input, "%U", unformat_nat_protocol, &proto)) + p = nat_proto_to_ip_proto (proto); + else if (unformat (line_input, "unknown")) + p = 0; + else if (unformat (line_input, "all")) + ; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + + if (p == 255) + vlib_cli_output (vm, "NAT64 BIB entries:"); + else + vlib_cli_output (vm, "NAT64 %U BIB entries:", format_nat_protocol, proto); + + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + nat64_db_bib_walk (db, p, nat64_cli_bib_walk, vm); + /* *INDENT-ON* */ + +done: + unformat_free (line_input); + + return error; +} + +typedef struct nat64_cli_st_walk_ctx_t_ +{ + vlib_main_t *vm; + nat64_db_t *db; +} nat64_cli_st_walk_ctx_t; + +static int +nat64_cli_st_walk (nat64_db_st_entry_t * ste, void *arg) +{ + nat64_cli_st_walk_ctx_t *ctx = arg; + vlib_main_t *vm = ctx->vm; + nat64_db_bib_entry_t *bibe; + fib_table_t *fib; + + bibe = nat64_db_bib_entry_by_index (ctx->db, ste->proto, ste->bibe_index); + if (!bibe) + return -1; + + fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); + if (!fib) + return -1; + + u32 vrf_id = fib->ft_table_id; + + if (ste->proto == IP_PROTOCOL_ICMP) + vlib_cli_output (vm, " %U %U %u %U %U %u protocol %U vrf %u", + format_ip6_address, &bibe->in_addr, + format_ip6_address, &ste->in_r_addr, + clib_net_to_host_u16 (bibe->in_port), + format_ip4_address, &bibe->out_addr, + format_ip4_address, &ste->out_r_addr, + clib_net_to_host_u16 (bibe->out_port), + format_nat_protocol, + ip_proto_to_nat_proto (bibe->proto), vrf_id); + else if (ste->proto == IP_PROTOCOL_TCP || ste->proto == IP_PROTOCOL_UDP) + vlib_cli_output (vm, " %U %u %U %u %U %u %U %u protcol %U vrf %u", + format_ip6_address, &bibe->in_addr, + clib_net_to_host_u16 (bibe->in_port), + format_ip6_address, &ste->in_r_addr, + clib_net_to_host_u16 (ste->r_port), + format_ip4_address, &bibe->out_addr, + clib_net_to_host_u16 (bibe->out_port), + format_ip4_address, &ste->out_r_addr, + clib_net_to_host_u16 (ste->r_port), + format_nat_protocol, + ip_proto_to_nat_proto (bibe->proto), vrf_id); + else + vlib_cli_output (vm, " %U %U %U %U protocol %u vrf %u", + format_ip6_address, &bibe->in_addr, + format_ip6_address, &ste->in_r_addr, + format_ip4_address, &bibe->out_addr, + format_ip4_address, &ste->out_r_addr, + bibe->proto, vrf_id); + + return 0; +} + +static clib_error_t * +nat64_show_st_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + nat64_main_t *nm = &nat64_main; + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + u32 proto = NAT_PROTOCOL_OTHER; + u8 p = 255; + nat64_db_t *db; + nat64_cli_st_walk_ctx_t ctx = { + .vm = vm, + }; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (unformat (line_input, "%U", unformat_nat_protocol, &proto)) + p = nat_proto_to_ip_proto (proto); + else if (unformat (line_input, "unknown")) + p = 0; + else if (unformat (line_input, "all")) + ; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + + if (p == 255) + vlib_cli_output (vm, "NAT64 sessions:"); + else + vlib_cli_output (vm, "NAT64 %U sessions:", format_nat_protocol, proto); + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + ctx.db = db; + nat64_db_st_walk (db, p, nat64_cli_st_walk, &ctx); + } + /* *INDENT-ON* */ + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +nat64_add_del_prefix_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + nat64_main_t *nm = &nat64_main; + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + unformat_input_t _line_input, *line_input = &_line_input; + u8 is_add = 1; + u32 vrf_id = 0, sw_if_index = ~0; + ip6_address_t prefix; + u32 plen = 0; + int rv; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U/%u", unformat_ip6_address, &prefix, &plen)) + ; + else if (unformat (line_input, "tenant-vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else + if (unformat + (line_input, "interface %U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!plen) + { + error = clib_error_return (0, "NAT64 prefix must be set."); + goto done; + } + + rv = nat64_add_del_prefix (&prefix, (u8) plen, vrf_id, is_add); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "NAT64 prefix not exist."); + goto done; + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "Invalid prefix length."); + goto done; + default: + break; + } + + /* + * Add RX interface route, whenNAT isn't running on the real input + * interface + */ + if (sw_if_index != ~0) + { + u32 fib_index; + fib_prefix_t fibpfx = { + .fp_len = plen, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = prefix} + }; + + if (is_add) + { + fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, + vrf_id, nm->fib_src_hi); + fib_table_entry_update_one_path (fib_index, &fibpfx, + nm->fib_src_hi, + FIB_ENTRY_FLAG_NONE, + DPO_PROTO_IP6, NULL, + sw_if_index, ~0, 0, + NULL, FIB_ROUTE_PATH_INTF_RX); + } + else + { + fib_index = fib_table_find (FIB_PROTOCOL_IP6, vrf_id); + fib_table_entry_path_remove (fib_index, &fibpfx, + nm->fib_src_hi, + DPO_PROTO_IP6, NULL, + sw_if_index, ~0, 1, + FIB_ROUTE_PATH_INTF_RX); + fib_table_unlock (fib_index, FIB_PROTOCOL_IP6, nm->fib_src_hi); + } + } + +done: + unformat_free (line_input); + + return error; +} + +static int +nat64_cli_prefix_walk (nat64_prefix_t * p, void *ctx) +{ + vlib_main_t *vm = ctx; + + vlib_cli_output (vm, " %U/%u tenant-vrf %u", + format_ip6_address, &p->prefix, p->plen, p->vrf_id); + + return 0; +} + +static clib_error_t * +nat64_show_prefix_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_cli_output (vm, "NAT64 prefix:"); + nat64_prefix_walk (nat64_cli_prefix_walk, vm); + + return 0; +} + +static clib_error_t * +nat64_add_interface_address_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index; + int rv; + int is_add = 1; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)); + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + rv = nat64_add_interface_address (sw_if_index, is_add); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "entry not exist"); + break; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "entry exist"); + break; + default: + break; + } + +done: + unformat_free (line_input); + + return error; +} + +/* *INDENT-OFF* */ +/*? + * @cliexpar + * @cliexstart{nat64 plugin} + * Enable/disable NAT64 plugin. + * To enable NAT64 plugin use: + * vpp# nat64 plugin enable + * To enable NAT64 plugin and configure buckets/memory: + * vpp# nat64 plugin enable bib-buckets <n> bib-memory <s> \ + * st-buckets <n> st-memory <s> + * To disable NAT64 plugin: + * vpp# nat64 plugin disable + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat64_plugin_enable_disable_command, static) = +{ + .path = "nat64 plugin", + .short_help = "nat64 plugin <enable " + "[bib-buckets <count>] [bib-memory <size>] " + "[st-buckets <count>] [st-memory <size>] | disable>", + .function = nat64_plugin_enable_disable_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat64 add pool address} + * Add/delete NAT64 pool address. + * To add single NAT64 pool address use: + * vpp# nat64 add pool address 10.1.1.10 + * To add NAT64 pool address range use: + * vpp# nat64 add pool address 10.1.1.2 - 10.1.1.5 + * To add NAT64 pool address for specific tenant use: + * vpp# nat64 add pool address 10.1.1.100 tenant-vrf 100 + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat64_add_pool_address_command, static) = { + .path = "nat64 add pool address", + .short_help = "nat64 add pool address <ip4-range-start> [- <ip4-range-end>] " + "[tenant-vrf <vrf-id>] [del]", + .function = nat64_add_del_pool_addr_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat64 pool} + * Show NAT64 pool. + * vpp# show nat64 pool + * NAT64 pool: + * 10.1.1.3 tenant VRF: 0 + * 10.1.1.10 tenant VRF: 10 + * @cliexend +?*/ +VLIB_CLI_COMMAND (show_nat64_pool_command, static) = { + .path = "show nat64 pool", + .short_help = "show nat64 pool", + .function = nat64_show_pool_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{set interface nat64} + * Enable/disable NAT64 feature on the interface. + * To enable NAT64 feature with local (IPv6) network interface + * GigabitEthernet0/8/0 and external (IPv4) network interface + * GigabitEthernet0/a/0 use: + * vpp# set interface nat64 in GigabitEthernet0/8/0 out GigabitEthernet0/a/0 + * @cliexend +?*/ +VLIB_CLI_COMMAND (set_interface_nat64_command, static) = { + .path = "set interface nat64", + .short_help = "set interface nat64 in|out <intfc> [del]", + .function = nat64_interface_feature_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat64 interfaces} + * Show interfaces with NAT64 feature. + * To show interfaces with NAT64 feature use: + * vpp# show nat64 interfaces + * NAT64 interfaces: + * GigabitEthernet0/8/0 in + * GigabitEthernet0/a/0 out + * @cliexend +?*/ +VLIB_CLI_COMMAND (show_nat64_interfaces_command, static) = { + .path = "show nat64 interfaces", + .short_help = "show nat64 interfaces", + .function = nat64_show_interfaces_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat64 add static bib} + * Add/delete NAT64 static BIB entry. + * To create NAT64 satatic BIB entry use: + * vpp# nat64 add static bib 2001:db8:c000:221:: 1234 10.1.1.3 5678 tcp + * vpp# nat64 add static bib 2001:db8:c000:221:: 1234 10.1.1.3 5678 udp vrf 10 + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat64_add_del_static_bib_command, static) = { + .path = "nat64 add static bib", + .short_help = "nat64 add static bib <ip6-addr> <port> <ip4-addr> <port> " + "tcp|udp|icmp [vfr <table-id>] [del]", + .function = nat64_add_del_static_bib_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat64 bib} + * Show NAT64 BIB entries. + * To show NAT64 TCP BIB entries use: + * vpp# show nat64 bib tcp + * NAT64 tcp BIB: + * fd01:1::2 6303 10.0.0.3 62303 tcp vrf 0 dynamic 1 sessions + * 2001:db8:c000:221:: 1234 10.1.1.3 5678 tcp vrf 0 static 2 sessions + * To show NAT64 UDP BIB entries use: + * vpp# show nat64 bib udp + * NAT64 udp BIB: + * fd01:1::2 6304 10.0.0.3 10546 udp vrf 0 dynamic 10 sessions + * 2001:db8:c000:221:: 1234 10.1.1.3 5678 udp vrf 10 static 0 sessions + * To show NAT64 ICMP BIB entries use: + * vpp# show nat64 bib icmp + * NAT64 icmp BIB: + * fd01:1::2 6305 10.0.0.3 63209 icmp vrf 10 dynamic 1 sessions + * @cliexend +?*/ +VLIB_CLI_COMMAND (show_nat64_bib_command, static) = { + .path = "show nat64 bib", + .short_help = "show nat64 bib all|tcp|udp|icmp|unknown", + .function = nat64_show_bib_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat64 session table} + * Show NAT64 session table. + * To show NAT64 TCP session table use: + * vpp# show nat64 session table tcp + * NAT64 tcp session table: + * fd01:1::2 6303 64:ff9b::ac10:202 20 10.0.0.3 62303 172.16.2.2 20 tcp vrf 0 + * fd01:3::2 6303 64:ff9b::ac10:202 20 10.0.10.3 21300 172.16.2.2 20 tcp vrf 10 + * To show NAT64 UDP session table use: + * #vpp show nat64 session table udp + * NAT64 udp session table: + * fd01:1::2 6304 64:ff9b::ac10:202 20 10.0.0.3 10546 172.16.2.2 20 udp vrf 0 + * fd01:3::2 6304 64:ff9b::ac10:202 20 10.0.10.3 58627 172.16.2.2 20 udp vrf 10 + * fd01:1::2 1235 64:ff9b::a00:3 4023 10.0.0.3 24488 10.0.0.3 4023 udp vrf 0 + * fd01:1::3 23 64:ff9b::a00:3 24488 10.0.0.3 4023 10.0.0.3 24488 udp vrf 0 + * To show NAT64 ICMP session table use: + * #vpp show nat64 session table icmp + * NAT64 icmp session table: + * fd01:1::2 64:ff9b::ac10:202 6305 10.0.0.3 172.16.2.2 63209 icmp vrf 0 + * @cliexend +?*/ +VLIB_CLI_COMMAND (show_nat64_st_command, static) = { + .path = "show nat64 session table", + .short_help = "show nat64 session table all|tcp|udp|icmp|unknown", + .function = nat64_show_st_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat64 add prefix} + * Set NAT64 prefix for generating IPv6 representations of IPv4 addresses. + * To set NAT64 global prefix use: + * vpp# nat64 add prefix 2001:db8::/32 + * To set NAT64 prefix for specific tenant use: + * vpp# nat64 add prefix 2001:db8:122:300::/56 tenant-vrf 10 + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat64_add_del_prefix_command, static) = { + .path = "nat64 add prefix", + .short_help = "nat64 add prefix <ip6-prefix>/<plen> [tenant-vrf <vrf-id>] " + "[del] [interface <interface]", + .function = nat64_add_del_prefix_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{show nat64 prefix} + * Show NAT64 prefix. + * To show NAT64 prefix use: + * vpp# show nat64 prefix + * NAT64 prefix: + * 2001:db8::/32 tenant-vrf 0 + * 2001:db8:122:300::/56 tenant-vrf 10 + * @cliexend +?*/ +VLIB_CLI_COMMAND (show_nat64_prefix_command, static) = { + .path = "show nat64 prefix", + .short_help = "show nat64 prefix", + .function = nat64_show_prefix_command_fn, +}; + +/*? + * @cliexpar + * @cliexstart{nat64 add interface address} + * Add/delete NAT64 pool address from specific (DHCP addressed) interface. + * To add NAT64 pool address from specific interface use: + * vpp# nat64 add interface address GigabitEthernet0/8/0 + * @cliexend +?*/ +VLIB_CLI_COMMAND (nat64_add_interface_address_command, static) = { + .path = "nat64 add interface address", + .short_help = "nat64 add interface address <interface> [del]", + .function = nat64_add_interface_address_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat64/nat64_db.c b/src/plugins/nat/nat64/nat64_db.c new file mode 100644 index 00000000000..ffc5e7e7e84 --- /dev/null +++ b/src/plugins/nat/nat64/nat64_db.c @@ -0,0 +1,742 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_table.h> +//#include <nat/nat_ipfix_logging.h> +#include <nat/nat_syslog.h> +#include <nat/lib/inlines.h> +#include <nat/nat64/nat64_db.h> + +int +nat64_db_init (nat64_db_t * db, nat64_config_t c, + nat64_db_free_addr_port_function_t free_addr_port_cb) +{ + clib_bihash_init_24_8 (&db->bib.in2out, "bib-in2out", c.bib_buckets, + c.bib_memory_size); + + clib_bihash_init_24_8 (&db->bib.out2in, "bib-out2in", c.bib_buckets, + c.bib_memory_size); + + clib_bihash_init_48_8 (&db->st.in2out, "st-in2out", c.st_buckets, + c.st_memory_size); + + clib_bihash_init_48_8 (&db->st.out2in, "st-out2in", c.st_buckets, + c.st_memory_size); + + db->free_addr_port_cb = free_addr_port_cb; + db->bib.limit = 10 * c.bib_buckets; + db->bib.bib_entries_num = 0; + db->st.limit = 10 * c.st_buckets; + db->st.st_entries_num = 0; + db->addr_free = 0; + + return 0; +} + +int +nat64_db_free (nat64_db_t * db) +{ + clib_bihash_free_24_8 (&db->bib.in2out); + clib_bihash_free_24_8 (&db->bib.out2in); + + clib_bihash_free_48_8 (&db->st.in2out); + clib_bihash_free_48_8 (&db->st.out2in); + +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + pool_free (db->bib._##n##_bib); \ + pool_free (db->st._##n##_st); + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + + pool_free (db->bib._unk_proto_bib); + pool_free (db->st._unk_proto_st); + + return 0; +} + +nat64_db_bib_entry_t * +nat64_db_bib_entry_create (u32 thread_index, nat64_db_t * db, + ip6_address_t * in_addr, + ip4_address_t * out_addr, u16 in_port, + u16 out_port, u32 fib_index, u8 proto, + u8 is_static) +{ + nat64_db_bib_entry_t *bibe; + nat64_db_bib_entry_key_t bibe_key; + clib_bihash_kv_24_8_t kv; + + if (db->bib.bib_entries_num >= db->bib.limit) + { + db->free_addr_port_cb (db, out_addr, out_port, proto); + //nat_ipfix_logging_max_bibs (thread_index, db->bib.limit); + return 0; + } + + /* create pool entry */ + switch (ip_proto_to_nat_proto (proto)) + { +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + pool_get (db->bib._##n##_bib, bibe); \ + kv.value = bibe - db->bib._##n##_bib; \ + break; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + default: + pool_get (db->bib._unk_proto_bib, bibe); + kv.value = bibe - db->bib._unk_proto_bib; + break; + } + + db->bib.bib_entries_num++; + + clib_memset (bibe, 0, sizeof (*bibe)); + bibe->in_addr.as_u64[0] = in_addr->as_u64[0]; + bibe->in_addr.as_u64[1] = in_addr->as_u64[1]; + bibe->in_port = in_port; + bibe->out_addr.as_u32 = out_addr->as_u32; + bibe->out_port = out_port; + bibe->fib_index = fib_index; + bibe->proto = proto; + bibe->is_static = is_static; + + /* create hash lookup */ + bibe_key.addr.as_u64[0] = bibe->in_addr.as_u64[0]; + bibe_key.addr.as_u64[1] = bibe->in_addr.as_u64[1]; + bibe_key.fib_index = bibe->fib_index; + bibe_key.port = bibe->in_port; + bibe_key.proto = bibe->proto; + bibe_key.rsvd = 0; + kv.key[0] = bibe_key.as_u64[0]; + kv.key[1] = bibe_key.as_u64[1]; + kv.key[2] = bibe_key.as_u64[2]; + clib_bihash_add_del_24_8 (&db->bib.in2out, &kv, 1); + + clib_memset (&bibe_key.addr, 0, sizeof (bibe_key.addr)); + bibe_key.addr.ip4.as_u32 = bibe->out_addr.as_u32; + bibe_key.fib_index = 0; + bibe_key.port = bibe->out_port; + kv.key[0] = bibe_key.as_u64[0]; + kv.key[1] = bibe_key.as_u64[1]; + kv.key[2] = bibe_key.as_u64[2]; + clib_bihash_add_del_24_8 (&db->bib.out2in, &kv, 1); + + /*fib_table_t *fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); + nat_ipfix_logging_nat64_bib (thread_index, in_addr, out_addr, proto, + in_port, out_port, fib->ft_table_id, 1); */ + return bibe; +} + +void +nat64_db_bib_entry_free (u32 thread_index, nat64_db_t * db, + nat64_db_bib_entry_t * bibe) +{ + nat64_db_bib_entry_key_t bibe_key; + clib_bihash_kv_24_8_t kv; + nat64_db_bib_entry_t *bib; + u32 *ste_to_be_free = 0, *ste_index, bibe_index; + nat64_db_st_entry_t *st, *ste; + + switch (ip_proto_to_nat_proto (bibe->proto)) + { +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + bib = db->bib._##n##_bib; \ + st = db->st._##n##_st; \ + break; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + default: + bib = db->bib._unk_proto_bib; + st = db->st._unk_proto_st; + break; + } + + db->bib.bib_entries_num--; + + bibe_index = bibe - bib; + + /* delete ST entries for static BIB entry */ + if (bibe->is_static) + { + pool_foreach (ste, st, ( + { + if (ste->bibe_index == bibe_index) + vec_add1 (ste_to_be_free, ste - st);} + )); + vec_foreach (ste_index, ste_to_be_free) + nat64_db_st_entry_free (thread_index, db, + pool_elt_at_index (st, ste_index[0])); + vec_free (ste_to_be_free); + } + + /* delete hash lookup */ + bibe_key.addr.as_u64[0] = bibe->in_addr.as_u64[0]; + bibe_key.addr.as_u64[1] = bibe->in_addr.as_u64[1]; + bibe_key.fib_index = bibe->fib_index; + bibe_key.port = bibe->in_port; + bibe_key.proto = bibe->proto; + bibe_key.rsvd = 0; + kv.key[0] = bibe_key.as_u64[0]; + kv.key[1] = bibe_key.as_u64[1]; + kv.key[2] = bibe_key.as_u64[2]; + clib_bihash_add_del_24_8 (&db->bib.in2out, &kv, 0); + + clib_memset (&bibe_key.addr, 0, sizeof (bibe_key.addr)); + bibe_key.addr.ip4.as_u32 = bibe->out_addr.as_u32; + bibe_key.fib_index = 0; + bibe_key.port = bibe->out_port; + kv.key[0] = bibe_key.as_u64[0]; + kv.key[1] = bibe_key.as_u64[1]; + kv.key[2] = bibe_key.as_u64[2]; + clib_bihash_add_del_24_8 (&db->bib.out2in, &kv, 0); + + if (!db->addr_free) + db->free_addr_port_cb (db, &bibe->out_addr, bibe->out_port, bibe->proto); + + /*fib_table_t *fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); + nat_ipfix_logging_nat64_bib (thread_index, &bibe->in_addr, &bibe->out_addr, + bibe->proto, bibe->in_port, bibe->out_port, + fib->ft_table_id, 0); */ + + /* delete from pool */ + pool_put (bib, bibe); +} + +nat64_db_bib_entry_t * +nat64_db_bib_entry_find (nat64_db_t * db, ip46_address_t * addr, u16 port, + u8 proto, u32 fib_index, u8 is_ip6) +{ + nat64_db_bib_entry_t *bibe = 0; + nat64_db_bib_entry_key_t bibe_key; + clib_bihash_kv_24_8_t kv, value; + nat64_db_bib_entry_t *bib; + + switch (ip_proto_to_nat_proto (proto)) + { +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + bib = db->bib._##n##_bib; \ + break; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + default: + bib = db->bib._unk_proto_bib; + break; + } + + bibe_key.addr.as_u64[0] = addr->as_u64[0]; + bibe_key.addr.as_u64[1] = addr->as_u64[1]; + bibe_key.fib_index = fib_index; + bibe_key.port = port; + bibe_key.proto = proto; + bibe_key.rsvd = 0; + + kv.key[0] = bibe_key.as_u64[0]; + kv.key[1] = bibe_key.as_u64[1]; + kv.key[2] = bibe_key.as_u64[2]; + + if (!clib_bihash_search_24_8 + (is_ip6 ? &db->bib.in2out : &db->bib.out2in, &kv, &value)) + bibe = pool_elt_at_index (bib, value.value); + + return bibe; +} + +void +nat64_db_bib_walk (nat64_db_t * db, u8 proto, + nat64_db_bib_walk_fn_t fn, void *ctx) +{ + nat64_db_bib_entry_t *bib, *bibe; + + if (proto == 255) + { + /* *INDENT-OFF* */ + #define _(N, i, n, s) \ + bib = db->bib._##n##_bib; \ + pool_foreach (bibe, bib, ({ \ + if (fn (bibe, ctx)) \ + return; \ + })); + foreach_nat_protocol + #undef _ + bib = db->bib._unk_proto_bib; + pool_foreach (bibe, bib, ({ + if (fn (bibe, ctx)) + return; + })); + /* *INDENT-ON* */ + } + else + { + switch (ip_proto_to_nat_proto (proto)) + { + /* *INDENT-OFF* */ + #define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + bib = db->bib._##n##_bib; \ + break; + foreach_nat_protocol + #undef _ + /* *INDENT-ON* */ + default: + bib = db->bib._unk_proto_bib; + break; + } + + /* *INDENT-OFF* */ + pool_foreach (bibe, bib, + ({ + if (fn (bibe, ctx)) + return; + })); + /* *INDENT-ON* */ + } +} + +nat64_db_bib_entry_t * +nat64_db_bib_entry_by_index (nat64_db_t * db, u8 proto, u32 bibe_index) +{ + nat64_db_bib_entry_t *bib; + + switch (ip_proto_to_nat_proto (proto)) + { +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + bib = db->bib._##n##_bib; \ + break; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + default: + bib = db->bib._unk_proto_bib; + break; + } + + return pool_elt_at_index (bib, bibe_index); +} + +void +nat64_db_st_walk (nat64_db_t * db, u8 proto, + nat64_db_st_walk_fn_t fn, void *ctx) +{ + nat64_db_st_entry_t *st, *ste; + + if (proto == 255) + { + /* *INDENT-OFF* */ + #define _(N, i, n, s) \ + st = db->st._##n##_st; \ + pool_foreach (ste, st, ({ \ + if (fn (ste, ctx)) \ + return; \ + })); + foreach_nat_protocol + #undef _ + st = db->st._unk_proto_st; + pool_foreach (ste, st, ({ + if (fn (ste, ctx)) + return; + })); + /* *INDENT-ON* */ + } + else + { + switch (ip_proto_to_nat_proto (proto)) + { + /* *INDENT-OFF* */ + #define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + st = db->st._##n##_st; \ + break; + foreach_nat_protocol + #undef _ + /* *INDENT-ON* */ + default: + st = db->st._unk_proto_st; + break; + } + + /* *INDENT-OFF* */ + pool_foreach (ste, st, + ({ + if (fn (ste, ctx)) + return; + })); + /* *INDENT-ON* */ + } +} + +nat64_db_st_entry_t * +nat64_db_st_entry_create (u32 thread_index, nat64_db_t * db, + nat64_db_bib_entry_t * bibe, + ip6_address_t * in_r_addr, + ip4_address_t * out_r_addr, u16 r_port) +{ + nat64_db_st_entry_t *ste; + nat64_db_bib_entry_t *bib; + nat64_db_st_entry_key_t ste_key; + clib_bihash_kv_48_8_t kv; + + if (db->st.st_entries_num >= db->st.limit) + { + //nat_ipfix_logging_max_sessions (thread_index, db->st.limit); + return 0; + } + + /* create pool entry */ + switch (ip_proto_to_nat_proto (bibe->proto)) + { +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + pool_get (db->st._##n##_st, ste); \ + kv.value = ste - db->st._##n##_st; \ + bib = db->bib._##n##_bib; \ + break; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + default: + pool_get (db->st._unk_proto_st, ste); + kv.value = ste - db->st._unk_proto_st; + bib = db->bib._unk_proto_bib; + break; + } + + db->st.st_entries_num++; + + clib_memset (ste, 0, sizeof (*ste)); + ste->in_r_addr.as_u64[0] = in_r_addr->as_u64[0]; + ste->in_r_addr.as_u64[1] = in_r_addr->as_u64[1]; + ste->out_r_addr.as_u32 = out_r_addr->as_u32; + ste->r_port = r_port; + ste->bibe_index = bibe - bib; + ste->proto = bibe->proto; + + /* increment session number for BIB entry */ + bibe->ses_num++; + + /* create hash lookup */ + clib_memset (&ste_key, 0, sizeof (ste_key)); + ste_key.l_addr.as_u64[0] = bibe->in_addr.as_u64[0]; + ste_key.l_addr.as_u64[1] = bibe->in_addr.as_u64[1]; + ste_key.r_addr.as_u64[0] = ste->in_r_addr.as_u64[0]; + ste_key.r_addr.as_u64[1] = ste->in_r_addr.as_u64[1]; + ste_key.fib_index = bibe->fib_index; + ste_key.l_port = bibe->in_port; + ste_key.r_port = ste->r_port; + ste_key.proto = ste->proto; + kv.key[0] = ste_key.as_u64[0]; + kv.key[1] = ste_key.as_u64[1]; + kv.key[2] = ste_key.as_u64[2]; + kv.key[3] = ste_key.as_u64[3]; + kv.key[4] = ste_key.as_u64[4]; + kv.key[5] = ste_key.as_u64[5]; + clib_bihash_add_del_48_8 (&db->st.in2out, &kv, 1); + + clib_memset (&ste_key, 0, sizeof (ste_key)); + ste_key.l_addr.ip4.as_u32 = bibe->out_addr.as_u32; + ste_key.r_addr.ip4.as_u32 = ste->out_r_addr.as_u32; + ste_key.l_port = bibe->out_port; + ste_key.r_port = ste->r_port; + ste_key.proto = ste->proto; + kv.key[0] = ste_key.as_u64[0]; + kv.key[1] = ste_key.as_u64[1]; + kv.key[2] = ste_key.as_u64[2]; + kv.key[3] = ste_key.as_u64[3]; + kv.key[4] = ste_key.as_u64[4]; + kv.key[5] = ste_key.as_u64[5]; + clib_bihash_add_del_48_8 (&db->st.out2in, &kv, 1); + + /*fib_table_t *fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); + nat_ipfix_logging_nat64_session (thread_index, &bibe->in_addr, + &bibe->out_addr, bibe->proto, + bibe->in_port, bibe->out_port, + &ste->in_r_addr, &ste->out_r_addr, + ste->r_port, ste->r_port, fib->ft_table_id, + 1); */ + nat_syslog_nat64_sadd (bibe->fib_index, &bibe->in_addr, bibe->in_port, + &bibe->out_addr, bibe->out_port, &ste->out_r_addr, + ste->r_port, bibe->proto); + return ste; +} + +void +nat64_db_st_entry_free (u32 thread_index, + nat64_db_t * db, nat64_db_st_entry_t * ste) +{ + nat64_db_st_entry_t *st; + nat64_db_bib_entry_t *bib, *bibe; + nat64_db_st_entry_key_t ste_key; + clib_bihash_kv_48_8_t kv; + + switch (ip_proto_to_nat_proto (ste->proto)) + { +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + st = db->st._##n##_st; \ + bib = db->bib._##n##_bib; \ + break; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + default: + st = db->st._unk_proto_st; + bib = db->bib._unk_proto_bib; + break; + } + + bibe = pool_elt_at_index (bib, ste->bibe_index); + + db->st.st_entries_num--; + + /* delete hash lookup */ + clib_memset (&ste_key, 0, sizeof (ste_key)); + ste_key.l_addr.as_u64[0] = bibe->in_addr.as_u64[0]; + ste_key.l_addr.as_u64[1] = bibe->in_addr.as_u64[1]; + ste_key.r_addr.as_u64[0] = ste->in_r_addr.as_u64[0]; + ste_key.r_addr.as_u64[1] = ste->in_r_addr.as_u64[1]; + ste_key.fib_index = bibe->fib_index; + ste_key.l_port = bibe->in_port; + ste_key.r_port = ste->r_port; + ste_key.proto = ste->proto; + kv.key[0] = ste_key.as_u64[0]; + kv.key[1] = ste_key.as_u64[1]; + kv.key[2] = ste_key.as_u64[2]; + kv.key[3] = ste_key.as_u64[3]; + kv.key[4] = ste_key.as_u64[4]; + kv.key[5] = ste_key.as_u64[5]; + clib_bihash_add_del_48_8 (&db->st.in2out, &kv, 0); + + clib_memset (&ste_key, 0, sizeof (ste_key)); + ste_key.l_addr.ip4.as_u32 = bibe->out_addr.as_u32; + ste_key.r_addr.ip4.as_u32 = ste->out_r_addr.as_u32; + ste_key.l_port = bibe->out_port; + ste_key.r_port = ste->r_port; + ste_key.proto = ste->proto; + kv.key[0] = ste_key.as_u64[0]; + kv.key[1] = ste_key.as_u64[1]; + kv.key[2] = ste_key.as_u64[2]; + kv.key[3] = ste_key.as_u64[3]; + kv.key[4] = ste_key.as_u64[4]; + kv.key[5] = ste_key.as_u64[5]; + clib_bihash_add_del_48_8 (&db->st.out2in, &kv, 0); + + /*fib_table_t *fib = fib_table_get (bibe->fib_index, FIB_PROTOCOL_IP6); + nat_ipfix_logging_nat64_session (thread_index, &bibe->in_addr, + &bibe->out_addr, bibe->proto, + bibe->in_port, bibe->out_port, + &ste->in_r_addr, &ste->out_r_addr, + ste->r_port, ste->r_port, fib->ft_table_id, + 0); */ + nat_syslog_nat64_sdel (bibe->fib_index, &bibe->in_addr, bibe->in_port, + &bibe->out_addr, bibe->out_port, &ste->out_r_addr, + ste->r_port, bibe->proto); + + /* delete from pool */ + pool_put (st, ste); + + /* decrement session number for BIB entry */ + bibe->ses_num--; + + /* delete BIB entry if last session and dynamic */ + if (!bibe->is_static && !bibe->ses_num) + nat64_db_bib_entry_free (thread_index, db, bibe); +} + +nat64_db_st_entry_t * +nat64_db_st_entry_find (nat64_db_t * db, ip46_address_t * l_addr, + ip46_address_t * r_addr, u16 l_port, u16 r_port, + u8 proto, u32 fib_index, u8 is_ip6) +{ + nat64_db_st_entry_t *ste = 0; + nat64_db_st_entry_t *st; + nat64_db_st_entry_key_t ste_key; + clib_bihash_kv_48_8_t kv, value; + + switch (ip_proto_to_nat_proto (proto)) + { +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + st = db->st._##n##_st; \ + break; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + default: + st = db->st._unk_proto_st; + break; + } + + clib_memset (&ste_key, 0, sizeof (ste_key)); + ste_key.l_addr.as_u64[0] = l_addr->as_u64[0]; + ste_key.l_addr.as_u64[1] = l_addr->as_u64[1]; + ste_key.r_addr.as_u64[0] = r_addr->as_u64[0]; + ste_key.r_addr.as_u64[1] = r_addr->as_u64[1]; + ste_key.fib_index = fib_index; + ste_key.l_port = l_port; + ste_key.r_port = r_port; + ste_key.proto = proto; + kv.key[0] = ste_key.as_u64[0]; + kv.key[1] = ste_key.as_u64[1]; + kv.key[2] = ste_key.as_u64[2]; + kv.key[3] = ste_key.as_u64[3]; + kv.key[4] = ste_key.as_u64[4]; + kv.key[5] = ste_key.as_u64[5]; + + if (!clib_bihash_search_48_8 + (is_ip6 ? &db->st.in2out : &db->st.out2in, &kv, &value)) + ste = pool_elt_at_index (st, value.value); + + return ste; +} + +u32 +nat64_db_st_entry_get_index (nat64_db_t * db, nat64_db_st_entry_t * ste) +{ + nat64_db_st_entry_t *st; + + switch (ip_proto_to_nat_proto (ste->proto)) + { +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + st = db->st._##n##_st; \ + break; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + default: + st = db->st._unk_proto_st; + return (u32) ~ 0; + } + + return ste - st; +} + +nat64_db_st_entry_t * +nat64_db_st_entry_by_index (nat64_db_t * db, u8 proto, u32 ste_index) +{ + nat64_db_st_entry_t *st; + + switch (ip_proto_to_nat_proto (proto)) + { +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + st = db->st._##n##_st; \ + break; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + default: + st = db->st._unk_proto_st; + break; + } + + return pool_elt_at_index (st, ste_index); +} + +void +nad64_db_st_free_expired (u32 thread_index, nat64_db_t * db, u32 now) +{ + u32 *ste_to_be_free = 0, *ste_index; + nat64_db_st_entry_t *st, *ste; + +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + st = db->st._##n##_st; \ + pool_foreach (ste, st, ({\ + if (i == NAT_PROTOCOL_TCP && !ste->tcp_state) \ + continue; \ + if (ste->expire < now) \ + vec_add1 (ste_to_be_free, ste - st); \ + })); \ + vec_foreach (ste_index, ste_to_be_free) \ + nat64_db_st_entry_free (thread_index, db, \ + pool_elt_at_index(st, ste_index[0])); \ + vec_free (ste_to_be_free); \ + ste_to_be_free = 0; + foreach_nat_protocol +#undef _ + st = db->st._unk_proto_st; + pool_foreach (ste, st, ({ + if (ste->expire < now) + vec_add1 (ste_to_be_free, ste - st); + })); + vec_foreach (ste_index, ste_to_be_free) + nat64_db_st_entry_free (thread_index, db, + pool_elt_at_index(st, ste_index[0])); + vec_free (ste_to_be_free); +/* *INDENT-ON* */ +} + +void +nat64_db_free_out_addr (u32 thread_index, + nat64_db_t * db, ip4_address_t * out_addr) +{ + u32 *ste_to_be_free = 0, *ste_index; + nat64_db_st_entry_t *st, *ste; + nat64_db_bib_entry_t *bibe; + + db->addr_free = 1; +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + st = db->st._##n##_st; \ + pool_foreach (ste, st, ({ \ + bibe = pool_elt_at_index (db->bib._##n##_bib, ste->bibe_index); \ + if (bibe->out_addr.as_u32 == out_addr->as_u32) \ + vec_add1 (ste_to_be_free, ste - st); \ + })); \ + vec_foreach (ste_index, ste_to_be_free) \ + nat64_db_st_entry_free (thread_index, db, \ + pool_elt_at_index(st, ste_index[0])); \ + vec_free (ste_to_be_free); \ + ste_to_be_free = 0; + foreach_nat_protocol +#undef _ + st = db->st._unk_proto_st; + pool_foreach (ste, st, ({ + bibe = pool_elt_at_index (db->bib._unk_proto_bib, ste->bibe_index); + if (bibe->out_addr.as_u32 == out_addr->as_u32) + vec_add1 (ste_to_be_free, ste - st); + })); + vec_foreach (ste_index, ste_to_be_free) + nat64_db_st_entry_free (thread_index, db, + pool_elt_at_index(st, ste_index[0])); + vec_free (ste_to_be_free); + db->addr_free = 0; +/* *INDENT-ON* */ +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat64/nat64_db.h b/src/plugins/nat/nat64/nat64_db.h new file mode 100644 index 00000000000..711b6bf6b03 --- /dev/null +++ b/src/plugins/nat/nat64/nat64_db.h @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_nat64_db_h__ +#define __included_nat64_db_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/fib/fib_source.h> + +#include <vppinfra/bihash_24_8.h> +#include <vppinfra/bihash_48_8.h> + +typedef struct +{ + u32 bib_buckets; + u32 bib_memory_size; + u32 st_buckets; + u32 st_memory_size; +} nat64_config_t; + +typedef struct +{ + union + { + struct + { + ip46_address_t addr; + u32 fib_index; + u16 port; + u8 proto; + u8 rsvd; + }; + u64 as_u64[3]; + }; +} nat64_db_bib_entry_key_t; + +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct +{ + ip6_address_t in_addr; + u16 in_port; + ip4_address_t out_addr; + u16 out_port; + u32 fib_index; + u32 ses_num; + u8 proto; + u8 is_static; +}) nat64_db_bib_entry_t; +/* *INDENT-ON* */ + +typedef struct +{ + /* BIBs */ +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + nat64_db_bib_entry_t *_##n##_bib; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + nat64_db_bib_entry_t *_unk_proto_bib; + + /* BIB lookup */ + clib_bihash_24_8_t in2out; + clib_bihash_24_8_t out2in; + + u32 limit; + u32 bib_entries_num; +} nat64_db_bib_t; + +typedef struct +{ + union + { + struct + { + ip46_address_t l_addr; + ip46_address_t r_addr; + u32 fib_index; + u16 l_port; + u16 r_port; + u8 proto; + u8 rsvd[7]; + }; + u64 as_u64[6]; + }; +} nat64_db_st_entry_key_t; + +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct +{ + ip6_address_t in_r_addr; + ip4_address_t out_r_addr; + u16 r_port; + u32 bibe_index; + u32 expire; + u8 proto; + u8 tcp_state; +}) nat64_db_st_entry_t; +/* *INDENT-ON* */ + +typedef struct +{ + /* session tables */ +/* *INDENT-OFF* */ +#define _(N, i, n, s) \ + nat64_db_st_entry_t *_##n##_st; + foreach_nat_protocol +#undef _ +/* *INDENT-ON* */ + nat64_db_st_entry_t *_unk_proto_st; + + /* session lookup */ + clib_bihash_48_8_t in2out; + clib_bihash_48_8_t out2in; + + u32 limit; + u32 st_entries_num; +} nat64_db_st_t; + +struct nat64_db_s; + +/** + * @brief Call back function to free NAT64 pool address and port when BIB + * entry is deleted. + */ +typedef void (*nat64_db_free_addr_port_function_t) (struct nat64_db_s * db, + ip4_address_t * addr, + u16 port, u8 proto); + +typedef struct nat64_db_s +{ + nat64_db_bib_t bib; + nat64_db_st_t st; + nat64_db_free_addr_port_function_t free_addr_port_cb; + u8 addr_free; +} nat64_db_t; + +/** + * @brief Initialize NAT64 DB. + * + * @param db NAT64 DB. + * @param c.bib_buckets Number of BIB hash buckets. + * @param c.bib_memory_size Memory size of BIB hash. + * @param c.st_buckets Number of session table hash buckets. + * @param c.st_memory_size Memory size of session table hash. + * @param free_addr_port_cb Call back function to free address and port. + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_db_init (nat64_db_t * db, nat64_config_t c, + nat64_db_free_addr_port_function_t free_addr_port_cb); + +/** + * @brief Free NAT64 DB. + * + * @param db NAT64 DB. + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat64_db_free (nat64_db_t * db); + + +/** + * @brief Create new NAT64 BIB entry. + * + * @param thread_index thread index. + * @param db NAT64 DB. + * @param in_addr Inside IPv6 address. + * @param out_addr Outside IPv4 address. + * @param in_port Inside port number. + * @param out_port Outside port number. + * @param fib_index FIB index. + * @param proto L4 protocol. + * @param is_static 1 if static, 0 if dynamic. + * + * @returns BIB entry on success, 0 otherwise. + */ +nat64_db_bib_entry_t *nat64_db_bib_entry_create (u32 thread_index, + nat64_db_t * db, + ip6_address_t * in_addr, + ip4_address_t * out_addr, + u16 in_port, u16 out_port, + u32 fib_index, u8 proto, + u8 is_static); + +/** + * @brief Free NAT64 BIB entry. + * + * @param thread_index thread index. + * @param db NAT64 DB. + * @param bibe BIB entry. + */ +void nat64_db_bib_entry_free (u32 thread_index, nat64_db_t * db, + nat64_db_bib_entry_t * bibe); + +/** + * @brief Call back function when walking NAT64 BIB, non-zero + * return value stop walk. + */ +typedef int (*nat64_db_bib_walk_fn_t) (nat64_db_bib_entry_t * bibe, + void *ctx); +/** + * @brief Walk NAT64 BIB. + * + * @param db NAT64 DB. + * @param proto BIB L4 protocol: + * - 255 all BIBs + * - 6 TCP BIB + * - 17 UDP BIB + * - 1/58 ICMP BIB + * + * u - otherwise "unknown" protocol BIB + * @param fn The function to invoke on each entry visited. + * @param ctx A context passed in the visit function. + */ +void nat64_db_bib_walk (nat64_db_t * db, u8 proto, + nat64_db_bib_walk_fn_t fn, void *ctx); + +/** + * @brief Find NAT64 BIB entry. + * + * @param db NAT64 DB. + * @param addr IP address. + * @param port Port number. + * @param proto L4 protocol. + * @param fib_index FIB index. + * @param is_ip6 1 if find by IPv6 (inside) address, 0 by IPv4 (outside). + * + * @return BIB entry if found. + */ +nat64_db_bib_entry_t *nat64_db_bib_entry_find (nat64_db_t * db, + ip46_address_t * addr, + u16 port, + u8 proto, + u32 fib_index, u8 is_ip6); + +/** + * @brief Get BIB entry by index and protocol. + * + * @param db NAT64 DB. + * @param proto L4 protocol. + * @param bibe_index BIB entry index. + * + * @return BIB entry if found. + */ +nat64_db_bib_entry_t *nat64_db_bib_entry_by_index (nat64_db_t * db, + u8 proto, u32 bibe_index); +/** + * @brief Create new NAT64 session table entry. + * + * @param thread_index thread index. + * @param db NAT64 DB. + * @param bibe Corresponding BIB entry. + * @param in_r_addr Inside IPv6 address of the remote host. + * @param out_r_addr Outside IPv4 address of the remote host. + * @param r_port Remote host port number. + * + * @returns BIB entry on success, 0 otherwise. + */ +nat64_db_st_entry_t *nat64_db_st_entry_create (u32 thread_index, + nat64_db_t * db, + nat64_db_bib_entry_t * bibe, + ip6_address_t * in_r_addr, + ip4_address_t * out_r_addr, + u16 r_port); + +/** + * @brief Free NAT64 session table entry. + * + * @param thread_index thread index. + * @param db NAT64 DB. + * @param ste Session table entry. + */ +void nat64_db_st_entry_free (u32 thread_index, nat64_db_t * db, + nat64_db_st_entry_t * ste); + +/** + * @brief Find NAT64 session table entry. + * + * @param db NAT64 DB. + * @param l_addr Local host address. + * @param r_addr Remote host address. + * @param l_port Local host port number. + * @param r_port Remote host port number. + * @param proto L4 protocol. + * @param fib_index FIB index. + * @param is_ip6 1 if find by IPv6 (inside) address, 0 by IPv4 (outside). + * + * @return BIB entry if found. + */ +nat64_db_st_entry_t *nat64_db_st_entry_find (nat64_db_t * db, + ip46_address_t * l_addr, + ip46_address_t * r_addr, + u16 l_port, u16 r_port, + u8 proto, + u32 fib_index, u8 is_ip6); + +/** + * @brief Call back function when walking NAT64 session table, non-zero + * return value stop walk. + */ +typedef int (*nat64_db_st_walk_fn_t) (nat64_db_st_entry_t * ste, void *ctx); + +/** + * @brief Walk NAT64 session table. + * + * @param db NAT64 DB. + * @param proto L4 protocol: + * - 255 all session tables + * - 6 TCP session table + * - 17 UDP session table + * - 1/58 ICMP session table + * - otherwise "unknown" protocol session table + * @param fn The function to invoke on each entry visited. + * @param ctx A context passed in the visit function. + */ +void nat64_db_st_walk (nat64_db_t * db, u8 proto, + nat64_db_st_walk_fn_t fn, void *ctx); + +/** + * @brief Free expired session entries in session tables. + * + * @param thread_index thread index. + * @param db NAT64 DB. + * @param now Current time. + */ +void nad64_db_st_free_expired (u32 thread_index, nat64_db_t * db, u32 now); + +/** + * @brief Free sessions using specific outside address. + * + * @param thread_index thread index. + * @param db NAT64 DB. + * @param out_addr Outside address to match. + */ +void nat64_db_free_out_addr (u32 thread_index, nat64_db_t * db, + ip4_address_t * out_addr); + +/* + * @brief Get ST entry index. + * + * @param db NAT64 DB. + * @param ste ST entry. + * + * @return ST entry index on success, ~0 otherwise. + */ +u32 nat64_db_st_entry_get_index (nat64_db_t * db, nat64_db_st_entry_t * ste); + +/** + * @brief Get ST entry by index and protocol. + * + * @param db NAT64 DB. + * @param proto L4 protocol. + * @param bibe_index ST entry index. + * + * @return BIB entry if found. + */ +nat64_db_st_entry_t *nat64_db_st_entry_by_index (nat64_db_t * db, + u8 proto, u32 ste_index); +#endif /* __included_nat64_db_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat64/nat64_doc.md b/src/plugins/nat/nat64/nat64_doc.md new file mode 100644 index 00000000000..f65b46338b0 --- /dev/null +++ b/src/plugins/nat/nat64/nat64_doc.md @@ -0,0 +1,73 @@ +# Stateful NAT64: Network Address and Protocol Translation from IPv6 Clients to IPv4 Servers {#nat64_doc} + +## Introduction + +Stateful NAT64 in VPP allows IPv6-only clients to contact IPv4 servers using unicast UDP, TCP, or ICMP based on RFC 6146. + +## Configuration + +### Enable/disable NAT64 feature on the interface + +> set interface nat64 in|out <intfc> [del] + +in: inside/local/IPv6 network +out: outside/external/IPv4 network +intfc: interface name + +### Add/delete NAT64 pool address + +One or more public IPv4 addresses assigned to a NAT64 are shared among several IPv6-only clients. + +> nat64 add pool address <ip4-range-start> [- <ip4-range-end>] [tenant-vrf <tenant-vrf-id>] [del] + +ip4-range-start: First IPv4 address of the range +ip4-range-end: Last IPv4 address of the range (optional, not used for single address) +tenant-vrf-id: VRF id of the tenant associated with the pool address (optional, if not set pool address is global) + +### Add/delete static BIB entry + +Stateful NAT64 also supports IPv4-initiated communications to a subset of the IPv6 hosts through staticaly configured bindings. + +> nat64 add static bib <ip6-addr> <in-port> <ip4-addr> <out-port> tcp|udp|icmp [vfr <table-id>] [del] + +ip6-addr: inside IPv6 address of the host +in-port: inside port or ICMPv6 identifier +ip4-addr: outside IPv4 address of the host +out-port: outside port or ICMPv4 identifier +table-id: VRF id of the tenant associated with the BIB entry (optional, default use global VRF) + +### Set NAT64 session timeouts + +Session is deleted when timer expires. If all sessions corresponding to a dynamically create BIB entry are deleted, then the BIB entry is also deleted. When packets are flowing sessiom timer is refreshed to keep the session alive. + +> set nat64 timeouts udp <sec> icmp <sec> tcp-trans <sec> tcp-est <sec> tcp-incoming-syn <sec> | reset + +udp: UDP session timeout value (default 300sec) +icmp: ICMP session timeout value (default 60sec) +tcp-trans: transitory TCP session timeout value (default 240sec) +tcp-est: established TCP session timeout value (default 7440sec) +tcp-incoming-syn: incoming SYN TCP session timeout value (default 6sec) +reset: reset timers to default values + +### Set NAT64 prefix + +Stateful NAT64 support the algorithm for generating IPv6 representations of IPv4 addresses defined in RFC 6052. If no prefix is configured, Well-Known Prefix (64:ff9b::/96) is used. + +> nat64 add prefix <ip6-prefix>/<plen> [tenant-vrf <vrf-id>] [del] + +ip6-prefix: IPv6 prefix +plen: prefix length (valid values: 32, 40, 48, 56, 64, or 96) +tenant-vrf: VRF id of the tenant associated with the prefix + +### Show commands + +> show nat64 pool +> show nat64 interfaces +> show nat64 bib tcp|udp|icmp +> show nat64 session table tcp|udp|icmp +> show nat64 timeouts +> show nat64 prefix + +## Notes + +Multi thread is not supported yet (CLI/API commands are disabled when VPP runs with multiple threads). diff --git a/src/plugins/nat/nat64/nat64_in2out.c b/src/plugins/nat/nat64/nat64_in2out.c new file mode 100644 index 00000000000..5d98277d443 --- /dev/null +++ b/src/plugins/nat/nat64/nat64_in2out.c @@ -0,0 +1,1411 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <nat/nat64/nat64.h> +#include <vnet/ip/ip6_to_ip4.h> +#include <vnet/fib/fib_table.h> +#include <nat/lib/nat_inlines.h> + +typedef struct +{ + u32 sw_if_index; + u32 next_index; + u8 is_slow_path; +} nat64_in2out_trace_t; + +static u8 * +format_nat64_in2out_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat64_in2out_trace_t *t = va_arg (*args, nat64_in2out_trace_t *); + char *tag; + + tag = t->is_slow_path ? "NAT64-in2out-slowpath" : "NAT64-in2out"; + + s = + format (s, "%s: sw_if_index %d, next index %d", tag, t->sw_if_index, + t->next_index); + + return s; +} + +#define foreach_nat64_in2out_error \ +_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ +_(NO_TRANSLATION, "no translation") \ +_(UNKNOWN, "unknown") + + +typedef enum +{ +#define _(sym,str) NAT64_IN2OUT_ERROR_##sym, + foreach_nat64_in2out_error +#undef _ + NAT64_IN2OUT_N_ERROR, +} nat64_in2out_error_t; + +static char *nat64_in2out_error_strings[] = { +#define _(sym,string) string, + foreach_nat64_in2out_error +#undef _ +}; + +typedef enum +{ + NAT64_IN2OUT_NEXT_IP4_LOOKUP, + NAT64_IN2OUT_NEXT_IP6_LOOKUP, + NAT64_IN2OUT_NEXT_DROP, + NAT64_IN2OUT_NEXT_SLOWPATH, + NAT64_IN2OUT_N_NEXT, +} nat64_in2out_next_t; + +typedef struct nat64_in2out_set_ctx_t_ +{ + vlib_buffer_t *b; + vlib_main_t *vm; + u32 thread_index; +} nat64_in2out_set_ctx_t; + +static inline u8 +nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr) +{ + ip6_address_t *addr; + ip6_main_t *im6 = &ip6_main; + ip_lookup_main_t *lm6 = &im6->lookup_main; + ip_interface_address_t *ia = 0; + + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm6, ia, sw_if_index, 0, + ({ + addr = ip_interface_address_get_address (lm6, ia); + if (0 == ip6_address_compare (addr, &ip6_addr)) + return 1; + })); + /* *INDENT-ON* */ + + return 0; +} + +/** + * @brief Check whether is a hairpinning. + * + * If the destination IP address of the packet is an IPv4 address assigned to + * the NAT64 itself, then the packet is a hairpin packet. + * + * param dst_addr Destination address of the packet. + * + * @returns 1 if hairpinning, otherwise 0. + */ +static_always_inline int +is_hairpinning (ip6_address_t * dst_addr) +{ + nat64_main_t *nm = &nat64_main; + int i; + + for (i = 0; i < vec_len (nm->addr_pool); i++) + { + if (nm->addr_pool[i].addr.as_u32 == dst_addr->as_u32[3]) + return 1; + } + + return 0; +} + +static int +nat64_in2out_tcp_udp (vlib_main_t * vm, vlib_buffer_t * p, u16 l4_offset, + u16 frag_hdr_offset, nat64_in2out_set_ctx_t * ctx) +{ + ip6_header_t *ip6; + ip_csum_t csum = 0; + ip4_header_t *ip4; + u16 fragment_id; + u8 frag_more; + u16 frag_offset; + nat64_main_t *nm = &nat64_main; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + ip46_address_t old_saddr, old_daddr; + ip4_address_t new_daddr; + u32 sw_if_index, fib_index; + u8 proto = vnet_buffer (p)->ip.reass.ip_proto; + u16 sport = vnet_buffer (p)->ip.reass.l4_src_port; + u16 dport = vnet_buffer (p)->ip.reass.l4_dst_port; + nat64_db_t *db = &nm->db[ctx->thread_index]; + + ip6 = vlib_buffer_get_current (p); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + ip4 = vlib_buffer_get_current (p); + + u32 ip_version_traffic_class_and_flow_label = + ip6->ip_version_traffic_class_and_flow_label; + u16 payload_length = ip6->payload_length; + u8 hop_limit = ip6->hop_limit; + + old_saddr.as_u64[0] = ip6->src_address.as_u64[0]; + old_saddr.as_u64[1] = ip6->src_address.as_u64[1]; + old_daddr.as_u64[0] = ip6->dst_address.as_u64[0]; + old_daddr.as_u64[1] = ip6->dst_address.as_u64[1]; + + if (PREDICT_FALSE (frag_hdr_offset)) + { + //Only the first fragment + ip6_frag_hdr_t *hdr = + (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset); + fragment_id = frag_id_6to4 (hdr->identification); + frag_more = ip6_frag_hdr_more (hdr); + frag_offset = ip6_frag_hdr_offset (hdr); + } + else + { + fragment_id = 0; + frag_offset = 0; + frag_more = 0; + } + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip_version_traffic_class_and_flow_label); + ip4->length = + u16_net_add (payload_length, sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (frag_offset | + (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); + ip4->ttl = hop_limit; + ip4->protocol = (proto == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : proto; + + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; + fib_index = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); + + ste = + nat64_db_st_entry_find (db, &old_saddr, &old_daddr, sport, dport, proto, + fib_index, 1); + + if (ste) + { + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + } + else + { + bibe = + nat64_db_bib_entry_find (db, &old_saddr, sport, proto, fib_index, 1); + + if (!bibe) + { + u16 out_port; + ip4_address_t out_addr; + if (nat64_alloc_out_addr_and_port + (fib_index, ip_proto_to_nat_proto (proto), &out_addr, + &out_port, ctx->thread_index)) + return -1; + + bibe = + nat64_db_bib_entry_create (ctx->thread_index, db, + &old_saddr.ip6, &out_addr, sport, + out_port, fib_index, proto, 0); + if (!bibe) + return -1; + + vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0, + db->bib.bib_entries_num); + } + + nat64_extract_ip4 (&old_daddr.ip6, &new_daddr, fib_index); + ste = + nat64_db_st_entry_create (ctx->thread_index, db, bibe, + &old_daddr.ip6, &new_daddr, dport); + if (!ste) + return -1; + + vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0, + db->st.st_entries_num); + } + + ip4->src_address.as_u32 = bibe->out_addr.as_u32; + ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; + + ip4->checksum = ip4_header_checksum (ip4); + + if (!vnet_buffer (p)->ip.reass.is_non_first_fragment) + { + udp_header_t *udp = (udp_header_t *) (ip4 + 1); + udp->src_port = bibe->out_port; + + //UDP checksum is optional over IPv4 + if (proto == IP_PROTOCOL_UDP) + { + udp->checksum = 0; + } + else + { + tcp_header_t *tcp = (tcp_header_t *) (ip4 + 1); + csum = ip_csum_sub_even (tcp->checksum, old_saddr.as_u64[0]); + csum = ip_csum_sub_even (csum, old_saddr.as_u64[1]); + csum = ip_csum_sub_even (csum, old_daddr.as_u64[0]); + csum = ip_csum_sub_even (csum, old_daddr.as_u64[1]); + csum = ip_csum_add_even (csum, ip4->dst_address.as_u32); + csum = ip_csum_add_even (csum, ip4->src_address.as_u32); + csum = ip_csum_sub_even (csum, sport); + csum = ip_csum_add_even (csum, udp->src_port); + mss_clamping (nm->mss_clamping, tcp, &csum); + tcp->checksum = ip_csum_fold (csum); + + nat64_tcp_session_set_state (ste, tcp, 1); + } + } + + nat64_session_reset_timeout (ste, ctx->vm); + + return 0; +} + +static int +nat64_in2out_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) +{ + nat64_main_t *nm = &nat64_main; + nat64_in2out_set_ctx_t *ctx = arg; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + ip46_address_t saddr, daddr; + u32 sw_if_index, fib_index; + icmp46_header_t *icmp = ip6_next_header (ip6); + nat64_db_t *db = &nm->db[ctx->thread_index]; + + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; + fib_index = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); + + saddr.as_u64[0] = ip6->src_address.as_u64[0]; + saddr.as_u64[1] = ip6->src_address.as_u64[1]; + daddr.as_u64[0] = ip6->dst_address.as_u64[0]; + daddr.as_u64[1] = ip6->dst_address.as_u64[1]; + + if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) + { + u16 in_id = ((u16 *) (icmp))[2]; + ste = + nat64_db_st_entry_find (db, &saddr, &daddr, in_id, 0, + IP_PROTOCOL_ICMP, fib_index, 1); + + if (ste) + { + bibe = + nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP, + ste->bibe_index); + if (!bibe) + return -1; + } + else + { + bibe = + nat64_db_bib_entry_find (db, &saddr, in_id, + IP_PROTOCOL_ICMP, fib_index, 1); + + if (!bibe) + { + u16 out_id; + ip4_address_t out_addr; + if (nat64_alloc_out_addr_and_port + (fib_index, NAT_PROTOCOL_ICMP, &out_addr, &out_id, + ctx->thread_index)) + return -1; + + bibe = + nat64_db_bib_entry_create (ctx->thread_index, db, + &ip6->src_address, &out_addr, + in_id, out_id, fib_index, + IP_PROTOCOL_ICMP, 0); + if (!bibe) + return -1; + + vlib_set_simple_counter (&nm->total_bibs, ctx->thread_index, 0, + db->bib.bib_entries_num); + } + + nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); + ste = + nat64_db_st_entry_create (ctx->thread_index, db, bibe, + &ip6->dst_address, &daddr.ip4, 0); + if (!ste) + return -1; + + vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0, + db->st.st_entries_num); + } + + nat64_session_reset_timeout (ste, ctx->vm); + + ip4->src_address.as_u32 = bibe->out_addr.as_u32; + ((u16 *) (icmp))[2] = bibe->out_port; + + ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; + } + else + { + if (!vec_len (nm->addr_pool)) + return -1; + + ip4->src_address.as_u32 = nm->addr_pool[0].addr.as_u32; + nat64_extract_ip4 (&ip6->dst_address, &ip4->dst_address, fib_index); + } + + return 0; +} + +static int +nat64_in2out_inner_icmp_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, + void *arg) +{ + nat64_main_t *nm = &nat64_main; + nat64_in2out_set_ctx_t *ctx = arg; + nat64_db_st_entry_t *ste; + nat64_db_bib_entry_t *bibe; + ip46_address_t saddr, daddr; + u32 sw_if_index, fib_index; + u8 proto = ip6->protocol; + nat64_db_t *db = &nm->db[ctx->thread_index]; + + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; + fib_index = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); + + saddr.as_u64[0] = ip6->src_address.as_u64[0]; + saddr.as_u64[1] = ip6->src_address.as_u64[1]; + daddr.as_u64[0] = ip6->dst_address.as_u64[0]; + daddr.as_u64[1] = ip6->dst_address.as_u64[1]; + + if (proto == IP_PROTOCOL_ICMP6) + { + icmp46_header_t *icmp = ip6_next_header (ip6); + u16 in_id = ((u16 *) (icmp))[2]; + proto = IP_PROTOCOL_ICMP; + + if (! + (icmp->type == ICMP4_echo_request + || icmp->type == ICMP4_echo_reply)) + return -1; + + ste = + nat64_db_st_entry_find (db, &daddr, &saddr, in_id, 0, proto, + fib_index, 1); + if (!ste) + return -1; + + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + + ip4->dst_address.as_u32 = bibe->out_addr.as_u32; + ((u16 *) (icmp))[2] = bibe->out_port; + ip4->src_address.as_u32 = ste->out_r_addr.as_u32; + } + else + { + udp_header_t *udp = ip6_next_header (ip6); + tcp_header_t *tcp = ip6_next_header (ip6); + u16 *checksum; + ip_csum_t csum; + + u16 sport = udp->src_port; + u16 dport = udp->dst_port; + + ste = + nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto, + fib_index, 1); + if (!ste) + return -1; + + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + + ip4->dst_address.as_u32 = bibe->out_addr.as_u32; + udp->dst_port = bibe->out_port; + ip4->src_address.as_u32 = ste->out_r_addr.as_u32; + + if (proto == IP_PROTOCOL_TCP) + checksum = &tcp->checksum; + else + checksum = &udp->checksum; + csum = ip_csum_sub_even (*checksum, dport); + csum = ip_csum_add_even (csum, udp->dst_port); + *checksum = ip_csum_fold (csum); + } + + return 0; +} + +typedef struct unk_proto_st_walk_ctx_t_ +{ + ip6_address_t src_addr; + ip6_address_t dst_addr; + ip4_address_t out_addr; + u32 fib_index; + u32 thread_index; + u8 proto; +} unk_proto_st_walk_ctx_t; + +static int +unk_proto_st_walk (nat64_db_st_entry_t * ste, void *arg) +{ + nat64_main_t *nm = &nat64_main; + unk_proto_st_walk_ctx_t *ctx = arg; + nat64_db_bib_entry_t *bibe; + ip46_address_t saddr, daddr; + nat64_db_t *db = &nm->db[ctx->thread_index]; + + if (ip6_address_is_equal (&ste->in_r_addr, &ctx->dst_addr)) + { + bibe = nat64_db_bib_entry_by_index (db, ste->proto, ste->bibe_index); + if (!bibe) + return -1; + + if (ip6_address_is_equal (&bibe->in_addr, &ctx->src_addr) + && bibe->fib_index == ctx->fib_index) + { + clib_memset (&saddr, 0, sizeof (saddr)); + saddr.ip4.as_u32 = bibe->out_addr.as_u32; + clib_memset (&daddr, 0, sizeof (daddr)); + nat64_extract_ip4 (&ctx->dst_addr, &daddr.ip4, ctx->fib_index); + + if (nat64_db_st_entry_find + (db, &daddr, &saddr, 0, 0, ctx->proto, ctx->fib_index, 0)) + return -1; + + ctx->out_addr.as_u32 = bibe->out_addr.as_u32; + return 1; + } + } + + return 0; +} + +static int +nat64_in2out_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, u8 l4_protocol, + u16 l4_offset, u16 frag_hdr_offset, + nat64_in2out_set_ctx_t * s_ctx) +{ + ip6_header_t *ip6; + ip4_header_t *ip4; + u16 fragment_id; + u16 frag_offset; + u8 frag_more; + + ip6 = vlib_buffer_get_current (p); + + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + if (PREDICT_FALSE (frag_hdr_offset)) + { + //Only the first fragment + ip6_frag_hdr_t *hdr = + (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_hdr_offset); + fragment_id = frag_id_6to4 (hdr->identification); + frag_offset = ip6_frag_hdr_offset (hdr); + frag_more = ip6_frag_hdr_more (hdr); + } + else + { + fragment_id = 0; + frag_offset = 0; + frag_more = 0; + } + + nat64_main_t *nm = &nat64_main; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + ip46_address_t saddr, daddr, addr; + u32 sw_if_index, fib_index; + int i; + nat64_db_t *db = &nm->db[s_ctx->thread_index]; + + sw_if_index = vnet_buffer (s_ctx->b)->sw_if_index[VLIB_RX]; + fib_index = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); + + saddr.as_u64[0] = ip6->src_address.as_u64[0]; + saddr.as_u64[1] = ip6->src_address.as_u64[1]; + daddr.as_u64[0] = ip6->dst_address.as_u64[0]; + daddr.as_u64[1] = ip6->dst_address.as_u64[1]; + + ste = + nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, l4_protocol, fib_index, + 1); + + if (ste) + { + bibe = nat64_db_bib_entry_by_index (db, l4_protocol, ste->bibe_index); + if (!bibe) + return -1; + } + else + { + bibe = + nat64_db_bib_entry_find (db, &saddr, 0, l4_protocol, fib_index, 1); + + if (!bibe) + { + /* Choose same out address as for TCP/UDP session to same dst */ + unk_proto_st_walk_ctx_t ctx = { + .src_addr.as_u64[0] = ip6->src_address.as_u64[0], + .src_addr.as_u64[1] = ip6->src_address.as_u64[1], + .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0], + .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1], + .out_addr.as_u32 = 0, + .fib_index = fib_index, + .proto = l4_protocol, + .thread_index = s_ctx->thread_index, + }; + + nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx); + + if (!ctx.out_addr.as_u32) + nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx); + + /* Verify if out address is not already in use for protocol */ + clib_memset (&addr, 0, sizeof (addr)); + addr.ip4.as_u32 = ctx.out_addr.as_u32; + if (nat64_db_bib_entry_find (db, &addr, 0, l4_protocol, 0, 0)) + ctx.out_addr.as_u32 = 0; + + if (!ctx.out_addr.as_u32) + { + for (i = 0; i < vec_len (nm->addr_pool); i++) + { + addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32; + if (!nat64_db_bib_entry_find + (db, &addr, 0, l4_protocol, 0, 0)) + break; + } + } + + if (!ctx.out_addr.as_u32) + return -1; + + bibe = + nat64_db_bib_entry_create (s_ctx->thread_index, db, + &ip6->src_address, &ctx.out_addr, + 0, 0, fib_index, l4_protocol, 0); + if (!bibe) + return -1; + + vlib_set_simple_counter (&nm->total_bibs, s_ctx->thread_index, 0, + db->bib.bib_entries_num); + } + + nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); + ste = + nat64_db_st_entry_create (s_ctx->thread_index, db, bibe, + &ip6->dst_address, &daddr.ip4, 0); + if (!ste) + return -1; + + vlib_set_simple_counter (&nm->total_sessions, s_ctx->thread_index, 0, + db->st.st_entries_num); + } + + nat64_session_reset_timeout (ste, s_ctx->vm); + + ip4->src_address.as_u32 = bibe->out_addr.as_u32; + ip4->dst_address.as_u32 = ste->out_r_addr.as_u32; + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6->ip_version_traffic_class_and_flow_label); + ip4->length = u16_net_add (ip6->payload_length, + sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (frag_offset | + (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); + ip4->ttl = ip6->hop_limit; + ip4->protocol = l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + + return 0; +} + +static int +nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, + ip6_header_t * ip6, u32 l4_offset, + u32 thread_index) +{ + nat64_main_t *nm = &nat64_main; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + ip46_address_t saddr, daddr; + u32 sw_if_index, fib_index; + udp_header_t *udp = (udp_header_t *) u8_ptr_add (ip6, l4_offset); + tcp_header_t *tcp = (tcp_header_t *) u8_ptr_add (ip6, l4_offset); + u8 proto = vnet_buffer (b)->ip.reass.ip_proto; + u16 sport = vnet_buffer (b)->ip.reass.l4_src_port; + u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port; + u16 *checksum = NULL; + ip_csum_t csum = 0; + nat64_db_t *db = &nm->db[thread_index]; + + sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + fib_index = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); + + saddr.as_u64[0] = ip6->src_address.as_u64[0]; + saddr.as_u64[1] = ip6->src_address.as_u64[1]; + daddr.as_u64[0] = ip6->dst_address.as_u64[0]; + daddr.as_u64[1] = ip6->dst_address.as_u64[1]; + + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + if (proto == IP_PROTOCOL_UDP) + checksum = &udp->checksum; + else + checksum = &tcp->checksum; + csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); + } + + ste = + nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, + fib_index, 1); + + if (ste) + { + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + } + else + { + bibe = nat64_db_bib_entry_find (db, &saddr, sport, proto, fib_index, 1); + + if (!bibe) + { + u16 out_port; + ip4_address_t out_addr; + if (nat64_alloc_out_addr_and_port + (fib_index, ip_proto_to_nat_proto (proto), &out_addr, + &out_port, thread_index)) + return -1; + + bibe = + nat64_db_bib_entry_create (thread_index, db, &ip6->src_address, + &out_addr, sport, out_port, fib_index, + proto, 0); + if (!bibe) + return -1; + + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + + nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); + ste = + nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address, + &daddr.ip4, dport); + if (!ste) + return -1; + + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); + } + + if (proto == IP_PROTOCOL_TCP) + nat64_tcp_session_set_state (ste, tcp, 1); + + nat64_session_reset_timeout (ste, vm); + + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + udp->src_port = bibe->out_port; + } + + nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index); + + clib_memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ste->out_r_addr.as_u32; + + bibe = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0); + + if (bibe) + break; + } + /* *INDENT-ON* */ + + if (!bibe) + return -1; + + ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; + ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; + + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); + csum = ip_csum_sub_even (csum, sport); + csum = ip_csum_sub_even (csum, dport); + udp->dst_port = bibe->in_port; + csum = ip_csum_add_even (csum, udp->src_port); + csum = ip_csum_add_even (csum, udp->dst_port); + *checksum = ip_csum_fold (csum); + } + + return 0; +} + +static int +nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, + ip6_header_t * ip6, u32 thread_index) +{ + nat64_main_t *nm = &nat64_main; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + icmp46_header_t *icmp = ip6_next_header (ip6); + ip6_header_t *inner_ip6; + ip46_address_t saddr, daddr; + u32 sw_if_index, fib_index; + u8 proto; + udp_header_t *udp; + tcp_header_t *tcp; + u16 *checksum, sport, dport; + ip_csum_t csum; + nat64_db_t *db = &nm->db[thread_index]; + + if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) + return -1; + + inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + proto = inner_ip6->protocol; + + if (proto == IP_PROTOCOL_ICMP6) + return -1; + + sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + fib_index = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); + + saddr.as_u64[0] = inner_ip6->src_address.as_u64[0]; + saddr.as_u64[1] = inner_ip6->src_address.as_u64[1]; + daddr.as_u64[0] = inner_ip6->dst_address.as_u64[0]; + daddr.as_u64[1] = inner_ip6->dst_address.as_u64[1]; + + udp = ip6_next_header (inner_ip6); + tcp = ip6_next_header (inner_ip6); + + sport = udp->src_port; + dport = udp->dst_port; + + if (proto == IP_PROTOCOL_UDP) + checksum = &udp->checksum; + else + checksum = &tcp->checksum; + + csum = ip_csum_sub_even (*checksum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]); + csum = ip_csum_sub_even (csum, sport); + csum = ip_csum_sub_even (csum, dport); + + ste = + nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto, + fib_index, 1); + if (!ste) + return -1; + + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + + dport = udp->dst_port = bibe->out_port; + nat64_compose_ip6 (&inner_ip6->dst_address, &bibe->out_addr, fib_index); + + clib_memset (&saddr, 0, sizeof (saddr)); + clib_memset (&daddr, 0, sizeof (daddr)); + saddr.ip4.as_u32 = ste->out_r_addr.as_u32; + daddr.ip4.as_u32 = bibe->out_addr.as_u32; + + ste = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, + 0, 0); + + if (ste) + break; + } + /* *INDENT-ON* */ + + if (!ste) + return -1; + + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + + inner_ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0]; + inner_ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1]; + udp->src_port = bibe->in_port; + + csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]); + csum = ip_csum_add_even (csum, udp->src_port); + csum = ip_csum_add_even (csum, udp->dst_port); + *checksum = ip_csum_fold (csum); + + if (!vec_len (nm->addr_pool)) + return -1; + + nat64_compose_ip6 (&ip6->src_address, &nm->addr_pool[0].addr, fib_index); + ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0]; + ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1]; + + icmp->checksum = 0; + csum = ip_csum_with_carry (0, ip6->payload_length); + csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol)); + csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]); + csum = + ip_incremental_checksum (csum, icmp, + clib_net_to_host_u16 (ip6->payload_length)); + icmp->checksum = ~ip_csum_fold (csum); + + return 0; +} + +static int +nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b, + ip6_header_t * ip6, u32 thread_index) +{ + nat64_main_t *nm = &nat64_main; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + ip46_address_t saddr, daddr, addr; + u32 sw_if_index, fib_index; + u8 proto = ip6->protocol; + int i; + nat64_db_t *db = &nm->db[thread_index]; + + sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + fib_index = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); + + saddr.as_u64[0] = ip6->src_address.as_u64[0]; + saddr.as_u64[1] = ip6->src_address.as_u64[1]; + daddr.as_u64[0] = ip6->dst_address.as_u64[0]; + daddr.as_u64[1] = ip6->dst_address.as_u64[1]; + + ste = + nat64_db_st_entry_find (db, &saddr, &daddr, 0, 0, proto, fib_index, 1); + + if (ste) + { + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + } + else + { + bibe = nat64_db_bib_entry_find (db, &saddr, 0, proto, fib_index, 1); + + if (!bibe) + { + /* Choose same out address as for TCP/UDP session to same dst */ + unk_proto_st_walk_ctx_t ctx = { + .src_addr.as_u64[0] = ip6->src_address.as_u64[0], + .src_addr.as_u64[1] = ip6->src_address.as_u64[1], + .dst_addr.as_u64[0] = ip6->dst_address.as_u64[0], + .dst_addr.as_u64[1] = ip6->dst_address.as_u64[1], + .out_addr.as_u32 = 0, + .fib_index = fib_index, + .proto = proto, + .thread_index = thread_index, + }; + + nat64_db_st_walk (db, IP_PROTOCOL_TCP, unk_proto_st_walk, &ctx); + + if (!ctx.out_addr.as_u32) + nat64_db_st_walk (db, IP_PROTOCOL_UDP, unk_proto_st_walk, &ctx); + + /* Verify if out address is not already in use for protocol */ + clib_memset (&addr, 0, sizeof (addr)); + addr.ip4.as_u32 = ctx.out_addr.as_u32; + if (nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) + ctx.out_addr.as_u32 = 0; + + if (!ctx.out_addr.as_u32) + { + for (i = 0; i < vec_len (nm->addr_pool); i++) + { + addr.ip4.as_u32 = nm->addr_pool[i].addr.as_u32; + if (!nat64_db_bib_entry_find (db, &addr, 0, proto, 0, 0)) + break; + } + } + + if (!ctx.out_addr.as_u32) + return -1; + + bibe = + nat64_db_bib_entry_create (thread_index, db, &ip6->src_address, + &ctx.out_addr, 0, 0, fib_index, proto, + 0); + if (!bibe) + return -1; + + vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, + db->bib.bib_entries_num); + } + + nat64_extract_ip4 (&ip6->dst_address, &daddr.ip4, fib_index); + ste = + nat64_db_st_entry_create (thread_index, db, bibe, &ip6->dst_address, + &daddr.ip4, 0); + if (!ste) + return -1; + + vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, + db->st.st_entries_num); + } + + nat64_session_reset_timeout (ste, vm); + + nat64_compose_ip6 (&ip6->src_address, &bibe->out_addr, fib_index); + + clib_memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ste->out_r_addr.as_u32; + + bibe = 0; + /* *INDENT-OFF* */ + vec_foreach (db, nm->db) + { + bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0); + + if (bibe) + break; + } + /* *INDENT-ON* */ + + if (!bibe) + return -1; + + ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; + ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; + + return 0; +} + +static inline uword +nat64_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, u8 is_slow_path) +{ + u32 n_left_from, *from, *to_next; + nat64_in2out_next_t next_index; + u32 thread_index = vm->thread_index; + nat64_main_t *nm = &nat64_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + ip6_header_t *ip60; + u16 l4_offset0, frag_hdr_offset0; + u8 l4_protocol0; + u32 proto0; + nat64_in2out_set_ctx_t ctx0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip60 = vlib_buffer_get_current (b0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + ctx0.b = b0; + ctx0.vm = vm; + ctx0.thread_index = thread_index; + + next0 = NAT64_IN2OUT_NEXT_IP4_LOOKUP; + + if (PREDICT_FALSE + (ip6_parse + (vm, b0, ip60, b0->current_length, &l4_protocol0, &l4_offset0, + &frag_hdr_offset0))) + { + next0 = NAT64_IN2OUT_NEXT_DROP; + b0->error = node->errors[NAT64_IN2OUT_ERROR_UNKNOWN]; + goto trace0; + } + + if (nat64_not_translate (sw_if_index0, ip60->dst_address)) + { + next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; + goto trace0; + } + + proto0 = ip_proto_to_nat_proto (l4_protocol0); + + if (is_slow_path) + { + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_OTHER)) + { + vlib_increment_simple_counter (&nm->counters.in2out.other, + thread_index, sw_if_index0, + 1); + if (is_hairpinning (&ip60->dst_address)) + { + next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; + if (nat64_in2out_unk_proto_hairpinning + (vm, b0, ip60, thread_index)) + { + next0 = NAT64_IN2OUT_NEXT_DROP; + b0->error = + node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; + } + goto trace0; + } + + if (nat64_in2out_unk_proto + (vm, b0, l4_protocol0, l4_offset0, frag_hdr_offset0, + &ctx0)) + { + next0 = NAT64_IN2OUT_NEXT_DROP; + b0->error = + node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; + goto trace0; + } + } + goto trace0; + } + else + { + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + next0 = NAT64_IN2OUT_NEXT_SLOWPATH; + goto trace0; + } + } + + if (proto0 == NAT_PROTOCOL_ICMP) + { + vlib_increment_simple_counter (&nm->counters.in2out.icmp, + thread_index, sw_if_index0, 1); + if (is_hairpinning (&ip60->dst_address)) + { + next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; + if (nat64_in2out_icmp_hairpinning + (vm, b0, ip60, thread_index)) + { + next0 = NAT64_IN2OUT_NEXT_DROP; + b0->error = + node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; + } + goto trace0; + } + + if (icmp6_to_icmp + (vm, b0, nat64_in2out_icmp_set_cb, &ctx0, + nat64_in2out_inner_icmp_set_cb, &ctx0)) + { + next0 = NAT64_IN2OUT_NEXT_DROP; + b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; + goto trace0; + } + } + else if (proto0 == NAT_PROTOCOL_TCP || proto0 == NAT_PROTOCOL_UDP) + { + if (proto0 == NAT_PROTOCOL_TCP) + vlib_increment_simple_counter (&nm->counters.in2out.tcp, + thread_index, sw_if_index0, 1); + else + vlib_increment_simple_counter (&nm->counters.in2out.udp, + thread_index, sw_if_index0, 1); + + if (is_hairpinning (&ip60->dst_address)) + { + next0 = NAT64_IN2OUT_NEXT_IP6_LOOKUP; + if (nat64_in2out_tcp_udp_hairpinning + (vm, b0, ip60, l4_offset0, thread_index)) + { + next0 = NAT64_IN2OUT_NEXT_DROP; + b0->error = + node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; + } + goto trace0; + } + + if (nat64_in2out_tcp_udp + (vm, b0, l4_offset0, frag_hdr_offset0, &ctx0)) + { + next0 = NAT64_IN2OUT_NEXT_DROP; + b0->error = node->errors[NAT64_IN2OUT_ERROR_NO_TRANSLATION]; + goto trace0; + } + } + + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat64_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + t->is_slow_path = is_slow_path; + } + + if (next0 == NAT64_IN2OUT_NEXT_DROP) + { + vlib_increment_simple_counter (&nm->counters.in2out.drops, + thread_index, sw_if_index0, 1); + } + + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_NODE_FN (nat64_in2out_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat64_in2out_node_fn_inline (vm, node, frame, 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_in2out_node) = { + .name = "nat64-in2out", + .vector_size = sizeof (u32), + .format_trace = format_nat64_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (nat64_in2out_error_strings), + .error_strings = nat64_in2out_error_strings, + .n_next_nodes = NAT64_IN2OUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [NAT64_IN2OUT_NEXT_DROP] = "error-drop", + [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return nat64_in2out_node_fn_inline (vm, node, frame, 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = { + .name = "nat64-in2out-slowpath", + .vector_size = sizeof (u32), + .format_trace = format_nat64_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (nat64_in2out_error_strings), + .error_strings = nat64_in2out_error_strings, + .n_next_nodes = NAT64_IN2OUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [NAT64_IN2OUT_NEXT_DROP] = "error-drop", + [NAT64_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [NAT64_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + [NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath", + }, +}; +/* *INDENT-ON* */ + +typedef struct nat64_in2out_frag_set_ctx_t_ +{ + vlib_main_t *vm; + u32 sess_index; + u32 thread_index; + u16 l4_offset; + u8 proto; + u8 first_frag; +} nat64_in2out_frag_set_ctx_t; + + +#define foreach_nat64_in2out_handoff_error \ +_(CONGESTION_DROP, "congestion drop") \ +_(SAME_WORKER, "same worker") \ +_(DO_HANDOFF, "do handoff") + +typedef enum +{ +#define _(sym,str) NAT64_IN2OUT_HANDOFF_ERROR_##sym, + foreach_nat64_in2out_handoff_error +#undef _ + NAT64_IN2OUT_HANDOFF_N_ERROR, +} nat64_in2out_handoff_error_t; + +static char *nat64_in2out_handoff_error_strings[] = { +#define _(sym,string) string, + foreach_nat64_in2out_handoff_error +#undef _ +}; + +typedef struct +{ + u32 next_worker_index; +} nat64_in2out_handoff_trace_t; + +static u8 * +format_nat64_in2out_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat64_in2out_handoff_trace_t *t = + va_arg (*args, nat64_in2out_handoff_trace_t *); + + s = + format (s, "NAT64-IN2OUT-HANDOFF: next-worker %d", t->next_worker_index); + + return s; +} + +VLIB_NODE_FN (nat64_in2out_handoff_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + nat64_main_t *nm = &nat64_main; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u32 n_enq, n_left_from, *from; + u16 thread_indices[VLIB_FRAME_SIZE], *ti; + u32 fq_index; + u32 thread_index = vm->thread_index; + u32 do_handoff = 0, same_worker = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + vlib_get_buffers (vm, from, bufs, n_left_from); + + b = bufs; + ti = thread_indices; + + fq_index = nm->fq_in2out_index; + + while (n_left_from > 0) + { + ip6_header_t *ip0; + + ip0 = vlib_buffer_get_current (b[0]); + ti[0] = nat64_get_worker_in2out (&ip0->src_address); + + if (ti[0] != thread_index) + do_handoff++; + else + same_worker++; + + if (PREDICT_FALSE + ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b[0]->flags & VLIB_BUFFER_IS_TRACED))) + { + nat64_in2out_handoff_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->next_worker_index = ti[0]; + } + + n_left_from -= 1; + ti += 1; + b += 1; + } + + n_enq = + vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices, + frame->n_vectors, 1); + + if (n_enq < frame->n_vectors) + vlib_node_increment_counter (vm, node->node_index, + NAT64_IN2OUT_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); + vlib_node_increment_counter (vm, node->node_index, + NAT64_IN2OUT_HANDOFF_ERROR_SAME_WORKER, + same_worker); + vlib_node_increment_counter (vm, node->node_index, + NAT64_IN2OUT_HANDOFF_ERROR_DO_HANDOFF, + do_handoff); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = { + .name = "nat64-in2out-handoff", + .vector_size = sizeof (u32), + .format_trace = format_nat64_in2out_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(nat64_in2out_handoff_error_strings), + .error_strings = nat64_in2out_handoff_error_strings, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat64/nat64_out2in.c b/src/plugins/nat/nat64/nat64_out2in.c new file mode 100644 index 00000000000..108edf0bdd1 --- /dev/null +++ b/src/plugins/nat/nat64/nat64_out2in.c @@ -0,0 +1,796 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <nat/nat64/nat64.h> +#include <vnet/ip/ip4_to_ip6.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/udp/udp.h> + +typedef struct +{ + u32 sw_if_index; + u32 next_index; +} nat64_out2in_trace_t; + +static u8 * +format_nat64_out2in_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat64_out2in_trace_t *t = va_arg (*args, nat64_out2in_trace_t *); + + s = + format (s, "NAT64-out2in: sw_if_index %d, next index %d", t->sw_if_index, + t->next_index); + + return s; +} + +#define foreach_nat64_out2in_error \ +_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ +_(NO_TRANSLATION, "no translation") \ +_(UNKNOWN, "unknown") + +typedef enum +{ +#define _(sym,str) NAT64_OUT2IN_ERROR_##sym, + foreach_nat64_out2in_error +#undef _ + NAT64_OUT2IN_N_ERROR, +} nat64_out2in_error_t; + +static char *nat64_out2in_error_strings[] = { +#define _(sym,string) string, + foreach_nat64_out2in_error +#undef _ +}; + +typedef enum +{ + NAT64_OUT2IN_NEXT_IP6_LOOKUP, + NAT64_OUT2IN_NEXT_IP4_LOOKUP, + NAT64_OUT2IN_NEXT_DROP, + NAT64_OUT2IN_N_NEXT, +} nat64_out2in_next_t; + +typedef struct nat64_out2in_set_ctx_t_ +{ + vlib_buffer_t *b; + vlib_main_t *vm; + u32 thread_index; +} nat64_out2in_set_ctx_t; + +static int +nat64_out2in_tcp_udp (vlib_main_t * vm, vlib_buffer_t * b, + nat64_out2in_set_ctx_t * ctx) +{ + ip4_header_t *ip4; + ip6_header_t *ip6; + ip_csum_t csum; + u16 *checksum = NULL; + ip6_frag_hdr_t *frag; + u32 frag_id; + ip4_address_t old_src, old_dst; + + nat64_main_t *nm = &nat64_main; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + ip46_address_t saddr; + ip46_address_t daddr; + ip6_address_t ip6_saddr; + u8 proto = vnet_buffer (b)->ip.reass.ip_proto; + u16 dport = vnet_buffer (b)->ip.reass.l4_dst_port; + u16 sport = vnet_buffer (b)->ip.reass.l4_src_port; + u32 sw_if_index, fib_index; + nat64_db_t *db = &nm->db[ctx->thread_index]; + + ip4 = vlib_buffer_get_current (b); + + udp_header_t *udp = ip4_next_header (ip4); + tcp_header_t *tcp = ip4_next_header (ip4); + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + if (ip4->protocol == IP_PROTOCOL_UDP) + { + checksum = &udp->checksum; + //UDP checksum is optional over IPv4 but mandatory for IPv6 + //We do not check udp->length sanity but use our safe computed value instead + if (PREDICT_FALSE (!*checksum)) + { + u16 udp_len = + clib_host_to_net_u16 (ip4->length) - sizeof (*ip4); + csum = ip_incremental_checksum (0, udp, udp_len); + csum = + ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); + csum = + ip_csum_with_carry (csum, + clib_host_to_net_u16 (IP_PROTOCOL_UDP)); + csum = + ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address))); + *checksum = ~ip_csum_fold (csum); + } + } + else + { + checksum = &tcp->checksum; + } + } + + old_src.as_u32 = ip4->src_address.as_u32; + old_dst.as_u32 = ip4->dst_address.as_u32; + + // Deal with fragmented packets + u16 frag_offset = ip4_get_fragment_offset (ip4); + if (PREDICT_FALSE (ip4_get_fragment_more (ip4) || frag_offset)) + { + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*frag)); + frag = + (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + frag_id = frag_id_4to6 (ip4->fragment_id); + vlib_buffer_advance (b, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + } + else + { + ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); + vlib_buffer_advance (b, sizeof (*ip4) - sizeof (*ip6)); + frag = NULL; + } + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = ip4->protocol; + + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; + fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + + clib_memset (&saddr, 0, sizeof (saddr)); + saddr.ip4.as_u32 = ip4->src_address.as_u32; + clib_memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ip4->dst_address.as_u32; + + ste = + nat64_db_st_entry_find (db, &daddr, &saddr, dport, sport, proto, + fib_index, 0); + if (ste) + { + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + } + else + { + bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, fib_index, 0); + + if (!bibe) + return -1; + + nat64_compose_ip6 (&ip6_saddr, &old_src, bibe->fib_index); + ste = + nat64_db_st_entry_create (ctx->thread_index, db, bibe, &ip6_saddr, + &saddr.ip4, sport); + + if (!ste) + return -1; + + vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0, + db->st.st_entries_num); + } + + ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0]; + ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1]; + + ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; + ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; + + vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index; + + nat64_session_reset_timeout (ste, ctx->vm); + + if (PREDICT_FALSE (frag != NULL)) + { + frag->next_hdr = ip6->protocol; + frag->identification = frag_id; + frag->rsv = 0; + frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (frag_offset, 1); + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); + } + + if (!vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + udp->dst_port = bibe->in_port; + + if (proto == IP_PROTOCOL_TCP) + { + nat64_tcp_session_set_state (ste, tcp, 0); + } + + csum = ip_csum_sub_even (*checksum, dport); + csum = ip_csum_add_even (csum, udp->dst_port); + csum = ip_csum_sub_even (csum, old_src.as_u32); + csum = ip_csum_sub_even (csum, old_dst.as_u32); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); + *checksum = ip_csum_fold (csum); + } + + return 0; +} + +static int +nat64_out2in_icmp_set_cb (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *arg) +{ + nat64_main_t *nm = &nat64_main; + nat64_out2in_set_ctx_t *ctx = arg; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + ip46_address_t saddr, daddr; + ip6_address_t ip6_saddr; + u32 sw_if_index, fib_index; + icmp46_header_t *icmp = ip4_next_header (ip4); + nat64_db_t *db = &nm->db[ctx->thread_index]; + + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; + fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + + clib_memset (&saddr, 0, sizeof (saddr)); + saddr.ip4.as_u32 = ip4->src_address.as_u32; + clib_memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ip4->dst_address.as_u32; + + if (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) + { + u16 out_id = ((u16 *) (icmp))[2]; + ste = + nat64_db_st_entry_find (db, &daddr, &saddr, out_id, 0, + IP_PROTOCOL_ICMP, fib_index, 0); + + if (ste) + { + bibe = + nat64_db_bib_entry_by_index (db, IP_PROTOCOL_ICMP, + ste->bibe_index); + if (!bibe) + return -1; + } + else + { + bibe = + nat64_db_bib_entry_find (db, &daddr, out_id, + IP_PROTOCOL_ICMP, fib_index, 0); + if (!bibe) + return -1; + + nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index); + ste = + nat64_db_st_entry_create (ctx->thread_index, db, + bibe, &ip6_saddr, &saddr.ip4, 0); + + if (!ste) + return -1; + + vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0, + db->st.st_entries_num); + } + + nat64_session_reset_timeout (ste, ctx->vm); + + ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0]; + ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1]; + + ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; + ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; + ((u16 *) (icmp))[2] = bibe->in_port; + + vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index; + } + else + { + ip6_header_t *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + nat64_compose_ip6 (&ip6->src_address, &ip4->src_address, + vnet_buffer (ctx->b)->sw_if_index[VLIB_TX]); + ip6->dst_address.as_u64[0] = inner_ip6->src_address.as_u64[0]; + ip6->dst_address.as_u64[1] = inner_ip6->src_address.as_u64[1]; + } + + return 0; +} + +static int +nat64_out2in_inner_icmp_set_cb (vlib_buffer_t * b, ip4_header_t * ip4, + ip6_header_t * ip6, void *arg) +{ + nat64_main_t *nm = &nat64_main; + nat64_out2in_set_ctx_t *ctx = arg; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + ip46_address_t saddr, daddr; + u32 sw_if_index, fib_index; + u8 proto = ip4->protocol; + nat64_db_t *db = &nm->db[ctx->thread_index]; + + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; + fib_index = + fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index); + + clib_memset (&saddr, 0, sizeof (saddr)); + saddr.ip4.as_u32 = ip4->src_address.as_u32; + clib_memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ip4->dst_address.as_u32; + + if (proto == IP_PROTOCOL_ICMP6) + { + icmp46_header_t *icmp = ip4_next_header (ip4); + u16 out_id = ((u16 *) (icmp))[2]; + proto = IP_PROTOCOL_ICMP; + + if (! + (icmp->type == ICMP6_echo_request + || icmp->type == ICMP6_echo_reply)) + return -1; + + ste = + nat64_db_st_entry_find (db, &saddr, &daddr, out_id, 0, proto, + fib_index, 0); + if (!ste) + return -1; + + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + + ip6->dst_address.as_u64[0] = ste->in_r_addr.as_u64[0]; + ip6->dst_address.as_u64[1] = ste->in_r_addr.as_u64[1]; + ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0]; + ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1]; + ((u16 *) (icmp))[2] = bibe->in_port; + + vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index; + } + else + { + udp_header_t *udp = ip4_next_header (ip4); + tcp_header_t *tcp = ip4_next_header (ip4); + u16 dport = udp->dst_port; + u16 sport = udp->src_port; + u16 *checksum; + ip_csum_t csum; + + ste = + nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto, + fib_index, 0); + if (!ste) + return -1; + + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + + nat64_compose_ip6 (&ip6->dst_address, &daddr.ip4, bibe->fib_index); + ip6->src_address.as_u64[0] = bibe->in_addr.as_u64[0]; + ip6->src_address.as_u64[1] = bibe->in_addr.as_u64[1]; + udp->src_port = bibe->in_port; + + if (proto == IP_PROTOCOL_UDP) + checksum = &udp->checksum; + else + checksum = &tcp->checksum; + if (*checksum) + { + csum = ip_csum_sub_even (*checksum, sport); + csum = ip_csum_add_even (csum, udp->src_port); + *checksum = ip_csum_fold (csum); + } + + vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index; + } + + return 0; +} + +static int +nat64_out2in_unk_proto (vlib_main_t * vm, vlib_buffer_t * p, + nat64_out2in_set_ctx_t * ctx) +{ + ip4_header_t *ip4 = vlib_buffer_get_current (p); + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + u32 frag_id; + + nat64_main_t *nm = &nat64_main; + nat64_db_bib_entry_t *bibe; + nat64_db_st_entry_t *ste; + ip46_address_t saddr, daddr; + ip6_address_t ip6_saddr; + u32 sw_if_index, fib_index; + u8 proto = ip4->protocol; + nat64_db_t *db = &nm->db[ctx->thread_index]; + + // Deal with fragmented packets + u16 frag_offset = ip4_get_fragment_offset (ip4); + if (PREDICT_FALSE (ip4_get_fragment_more (ip4) || frag_offset)) + { + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*frag)); + frag = + (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + frag_id = frag_id_4to6 (ip4->fragment_id); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + } + else + { + ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); + frag = NULL; + } + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = ip4->protocol; + + if (PREDICT_FALSE (frag != NULL)) + { + frag->next_hdr = ip6->protocol; + frag->identification = frag_id; + frag->rsv = 0; + frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (frag_offset, 1); + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); + } + + sw_if_index = vnet_buffer (ctx->b)->sw_if_index[VLIB_RX]; + fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + + clib_memset (&saddr, 0, sizeof (saddr)); + saddr.ip4.as_u32 = ip4->src_address.as_u32; + clib_memset (&daddr, 0, sizeof (daddr)); + daddr.ip4.as_u32 = ip4->dst_address.as_u32; + + ste = + nat64_db_st_entry_find (db, &daddr, &saddr, 0, 0, proto, fib_index, 0); + if (ste) + { + bibe = nat64_db_bib_entry_by_index (db, proto, ste->bibe_index); + if (!bibe) + return -1; + } + else + { + bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, fib_index, 0); + + if (!bibe) + return -1; + + nat64_compose_ip6 (&ip6_saddr, &ip4->src_address, bibe->fib_index); + ste = nat64_db_st_entry_create (ctx->thread_index, db, + bibe, &ip6_saddr, &saddr.ip4, 0); + + if (!ste) + return -1; + + vlib_set_simple_counter (&nm->total_sessions, ctx->thread_index, 0, + db->st.st_entries_num); + } + + nat64_session_reset_timeout (ste, ctx->vm); + + ip6->src_address.as_u64[0] = ste->in_r_addr.as_u64[0]; + ip6->src_address.as_u64[1] = ste->in_r_addr.as_u64[1]; + + ip6->dst_address.as_u64[0] = bibe->in_addr.as_u64[0]; + ip6->dst_address.as_u64[1] = bibe->in_addr.as_u64[1]; + + vnet_buffer (ctx->b)->sw_if_index[VLIB_TX] = bibe->fib_index; + + return 0; +} + +VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + nat64_out2in_next_t next_index; + nat64_main_t *nm = &nat64_main; + u32 thread_index = vm->thread_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + ip4_header_t *ip40; + u32 proto0; + nat64_out2in_set_ctx_t ctx0; + udp_header_t *udp0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip40 = vlib_buffer_get_current (b0); + + ctx0.b = b0; + ctx0.vm = vm; + ctx0.thread_index = thread_index; + + next0 = NAT64_OUT2IN_NEXT_IP6_LOOKUP; + + proto0 = ip_proto_to_nat_proto (ip40->protocol); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + if (nat64_out2in_unk_proto (vm, b0, &ctx0)) + { + next0 = NAT64_OUT2IN_NEXT_DROP; + b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION]; + } + vlib_increment_simple_counter (&nm->counters.out2in.other, + thread_index, sw_if_index0, 1); + goto trace0; + } + + if (proto0 == NAT_PROTOCOL_ICMP) + { + vlib_increment_simple_counter (&nm->counters.out2in.icmp, + thread_index, sw_if_index0, 1); + if (icmp_to_icmp6 + (b0, nat64_out2in_icmp_set_cb, &ctx0, + nat64_out2in_inner_icmp_set_cb, &ctx0)) + { + next0 = NAT64_OUT2IN_NEXT_DROP; + b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace0; + } + } + else + { + if (proto0 == NAT_PROTOCOL_TCP) + vlib_increment_simple_counter (&nm->counters.out2in.tcp, + thread_index, sw_if_index0, 1); + else + vlib_increment_simple_counter (&nm->counters.out2in.udp, + thread_index, sw_if_index0, 1); + + if (nat64_out2in_tcp_udp (vm, b0, &ctx0)) + { + udp0 = ip4_next_header (ip40); + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if ((proto0 == NAT_PROTOCOL_UDP) + && (udp0->dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client))) + { + next0 = NAT64_OUT2IN_NEXT_IP4_LOOKUP; + goto trace0; + } + next0 = NAT64_OUT2IN_NEXT_DROP; + b0->error = node->errors[NAT64_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace0; + } + } + + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat64_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + } + + if (next0 == NAT64_OUT2IN_NEXT_DROP) + { + vlib_increment_simple_counter (&nm->counters.out2in.drops, + thread_index, sw_if_index0, 1); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_out2in_node) = { + .name = "nat64-out2in", + .vector_size = sizeof (u32), + .format_trace = format_nat64_out2in_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (nat64_out2in_error_strings), + .error_strings = nat64_out2in_error_strings, + .n_next_nodes = NAT64_OUT2IN_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [NAT64_OUT2IN_NEXT_DROP] = "error-drop", + [NAT64_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup", + [NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup", + }, +}; +/* *INDENT-ON* */ + +typedef struct nat64_out2in_frag_set_ctx_t_ +{ + vlib_main_t *vm; + vlib_buffer_t *b; + u32 sess_index; + u32 thread_index; + u8 proto; + u8 first_frag; +} nat64_out2in_frag_set_ctx_t; + +#define foreach_nat64_out2in_handoff_error \ +_(CONGESTION_DROP, "congestion drop") \ +_(SAME_WORKER, "same worker") \ +_(DO_HANDOFF, "do handoff") + +typedef enum +{ +#define _(sym,str) NAT64_OUT2IN_HANDOFF_ERROR_##sym, + foreach_nat64_out2in_handoff_error +#undef _ + NAT64_OUT2IN_HANDOFF_N_ERROR, +} nat64_out2in_handoff_error_t; + +static char *nat64_out2in_handoff_error_strings[] = { +#define _(sym,string) string, + foreach_nat64_out2in_handoff_error +#undef _ +}; + +typedef struct +{ + u32 next_worker_index; +} nat64_out2in_handoff_trace_t; + +static u8 * +format_nat64_out2in_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat64_out2in_handoff_trace_t *t = + va_arg (*args, nat64_out2in_handoff_trace_t *); + + s = + format (s, "NAT64-OUT2IN-HANDOFF: next-worker %d", t->next_worker_index); + + return s; +} + +VLIB_NODE_FN (nat64_out2in_handoff_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + nat64_main_t *nm = &nat64_main; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u32 n_enq, n_left_from, *from; + u16 thread_indices[VLIB_FRAME_SIZE], *ti; + u32 fq_index; + u32 thread_index = vm->thread_index; + u32 do_handoff = 0, same_worker = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + vlib_get_buffers (vm, from, bufs, n_left_from); + + b = bufs; + ti = thread_indices; + + fq_index = nm->fq_out2in_index; + + while (n_left_from > 0) + { + ip4_header_t *ip0; + + ip0 = vlib_buffer_get_current (b[0]); + ti[0] = nat64_get_worker_out2in (b[0], ip0); + + if (ti[0] != thread_index) + do_handoff++; + else + same_worker++; + + if (PREDICT_FALSE + ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b[0]->flags & VLIB_BUFFER_IS_TRACED))) + { + nat64_out2in_handoff_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->next_worker_index = ti[0]; + } + + n_left_from -= 1; + ti += 1; + b += 1; + } + + n_enq = + vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices, + frame->n_vectors, 1); + + if (n_enq < frame->n_vectors) + vlib_node_increment_counter (vm, node->node_index, + NAT64_OUT2IN_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); + vlib_node_increment_counter (vm, node->node_index, + NAT64_OUT2IN_HANDOFF_ERROR_SAME_WORKER, + same_worker); + vlib_node_increment_counter (vm, node->node_index, + NAT64_OUT2IN_HANDOFF_ERROR_DO_HANDOFF, + do_handoff); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = { + .name = "nat64-out2in-handoff", + .vector_size = sizeof (u32), + .format_trace = format_nat64_out2in_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(nat64_out2in_handoff_error_strings), + .error_strings = nat64_out2in_handoff_error_strings, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |