/* * Copyright (c) 2017 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @file * @brief NAT64 implementation */ #include #include #include #include #include #include nat64_main_t nat64_main; /* *INDENT-OFF* */ /* Hook up input features */ VNET_FEATURE_INIT (nat64_in2out, static) = { .arc_name = "ip6-unicast", .node_name = "nat64-in2out", .runs_before = VNET_FEATURES ("ip6-lookup"), }; VNET_FEATURE_INIT (nat64_out2in, static) = { .arc_name = "ip4-unicast", .node_name = "nat64-out2in", .runs_before = VNET_FEATURES ("ip4-lookup"), }; VNET_FEATURE_INIT (nat64_in2out_handoff, static) = { .arc_name = "ip6-unicast", .node_name = "nat64-in2out-handoff", .runs_before = VNET_FEATURES ("ip6-lookup"), }; VNET_FEATURE_INIT (nat64_out2in_handoff, static) = { .arc_name = "ip4-unicast", .node_name = "nat64-out2in-handoff", .runs_before = VNET_FEATURES ("ip4-lookup"), }; static u8 well_known_prefix[] = { 0x00, 0x64, 0xff, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; /* *INDENT-ON* */ static void nat64_ip4_add_del_interface_address_cb (ip4_main_t * im, uword opaque, u32 sw_if_index, ip4_address_t * address, u32 address_length, u32 if_address_index, u32 is_delete) { nat64_main_t *nm = &nat64_main; int i, j; for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++) { if (sw_if_index == nm->auto_add_sw_if_indices[i]) { if (!is_delete) { /* Don't trip over lease renewal, static config */ for (j = 0; j < vec_len (nm->addr_pool); j++) if (nm->addr_pool[j].addr.as_u32 == address->as_u32) return; (void) nat64_add_del_pool_addr (vlib_get_thread_index (), address, ~0, 1); return; } else { (void) nat64_add_del_pool_addr (vlib_get_thread_index (), address, ~0, 0); return; } } } } u32 nat64_get_worker_in2out (ip6_address_t * addr) { nat64_main_t *nm = &nat64_main; snat_main_t *sm = nm->sm; u32 next_worker_index = nm->sm->first_worker_index; u32 hash; #ifdef clib_crc32c_uses_intrinsics hash = clib_crc32c ((u8 *) addr->as_u32, 16); #else u64 tmp = addr->as_u64[0] ^ addr->as_u64[1]; hash = clib_xxhash (tmp); #endif if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers)))) next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)]; else next_worker_index += sm->workers[hash % _vec_len (sm->workers)]; return next_worker_index; } u32 nat64_get_worker_out2in (ip4_header_t * ip) { nat64_main_t *nm = &nat64_main; snat_main_t *sm = nm->sm; udp_header_t *udp; u16 port; u32 proto; proto = ip_proto_to_snat_proto (ip->protocol); udp = ip4_next_header (ip); port = udp->dst_port; /* fragments */ if (PREDICT_FALSE (ip4_is_fragment (ip))) { if (PREDICT_FALSE (nat_reass_is_drop_frag (0))) return vlib_get_thread_index (); nat_reass_ip4_t *reass; reass = nat_ip4_reass_find (ip->src_address, ip->dst_address, ip->fragment_id, ip->protocol); if (reass && (reass->thread_index != (u32) ~ 0)) return reass->thread_index; if (ip4_is_first_fragment (ip)) { reass = nat_ip4_reass_create (ip->src_address, ip->dst_address, ip->fragment_id, ip->protocol); if (!reass) goto no_reass; port = clib_net_to_host_u16 (port); if (port > 1024) reass->thread_index = nm->sm->first_worker_index + ((port - 1024) / sm->port_per_thread); else reass->thread_index = vlib_get_thread_index (); return reass->thread_index; } else return vlib_get_thread_index (); } no_reass: /* unknown protocol */ if (PREDICT_FALSE (proto == ~0)) { nat64_db_t *db; ip46_address_t daddr; nat64_db_bib_entry_t *bibe; clib_memset (&daddr, 0, sizeof (daddr)); daddr.ip4.as_u32 = ip->dst_address.as_u32; /* *INDENT-OFF* */ vec_foreach (db, nm->db) { bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0); if (bibe) return (u32) (db - nm->db); } /* *INDENT-ON* */ return vlib_get_thread_index (); } /* ICMP */ if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP)) { icmp46_header_t *icmp = (icmp46_header_t *) udp; icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); if (!icmp_is_error_message (icmp)) port = echo->identifier; else { ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); proto = ip_proto_to_snat_proto (inner_ip->protocol); void *l4_header = ip4_next_header (inner_ip); switch (proto) { case SNAT_PROTOCOL_ICMP: icmp = (icmp46_header_t *) l4_header; echo = (icmp_echo_header_t *) (icmp + 1); port = echo->identifier; break; case SNAT_PROTOCOL_UDP: case SNAT_PROTOCOL_TCP: port = ((tcp_udp_header_t *) l4_header)->src_port; break; default: return vlib_get_thread_index (); } } } /* worker by outside port (TCP/UDP) */ port = clib_net_to_host_u16 (port); if (port > 1024) return nm->sm->first_worker_index + ((port - 1024) / sm->port_per_thread); return vlib_get_thread_index (); } clib_error_t * nat64_init (vlib_main_t * vm) { nat64_main_t *nm = &nat64_main; vlib_thread_main_t *tm = vlib_get_thread_main (); ip4_add_del_interface_address_callback_t cb4; ip4_main_t *im = &ip4_main; nm->sm = &snat_main; vlib_node_t *node; vec_validate (nm->db, tm->n_vlib_mains - 1); nm->fq_in2out_index = ~0; nm->fq_out2in_index = ~0; node = vlib_get_node_by_name (vm, (u8 *) "error-drop"); nm->error_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out"); nm->in2out_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-slowpath"); nm->in2out_slowpath_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat64-in2out-reass"); nm->in2out_reass_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in"); nm->out2in_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat64-out2in-reass"); nm->out2in_reass_node_index = node->index; /* set session timeouts to default values */ nm->udp_timeout = SNAT_UDP_TIMEOUT; nm->icmp_timeout = SNAT_ICMP_TIMEOUT; nm->tcp_trans_timeout = SNAT_TCP_TRANSITORY_TIMEOUT; nm->tcp_est_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT; nm->total_enabled_count = 0; /* Set up the interface address add/del callback */ cb4.function = nat64_ip4_add_del_interface_address_cb; cb4.function_opaque = 0; vec_add1 (im->add_del_interface_address_callbacks, cb4); nm->ip4_main = im; /* Init counters */ nm->total_bibs.name = "total-bibs"; nm->total_bibs.stat_segment_name = "/nat64/total-bibs"; vlib_validate_simple_counter (&nm->total_bibs, 0); vlib_zero_simple_counter (&nm->total_bibs, 0); nm->total_sessions.name = "total-sessions"; nm->total_sessions.stat_segment_name = "/nat64/total-sessions"; vlib_validate_simple_counter (&nm->total_sessions, 0); vlib_zero_simple_counter (&nm->total_sessions, 0); return 0; } static void nat64_free_out_addr_and_port (struct nat64_db_s *db, ip4_address_t * addr, u16 port, u8 protocol); void nat64_set_hash (u32 bib_buckets, u32 bib_memory_size, u32 st_buckets, u32 st_memory_size) { nat64_main_t *nm = &nat64_main; nat64_db_t *db; nm->bib_buckets = bib_buckets; nm->bib_memory_size = bib_memory_size; nm->st_buckets = st_buckets; nm->st_memory_size = st_memory_size; /* *INDENT-OFF* */ vec_foreach (db, nm->db) { if (nat64_db_init (db, bib_buckets, bib_memory_size, st_buckets, st_memory_size, nat64_free_out_addr_and_port)) nat_elog_err ("NAT64 DB init failed"); } /* *INDENT-ON* */ } int nat64_add_del_pool_addr (u32 thread_index, ip4_address_t * addr, u32 vrf_id, u8 is_add) { nat64_main_t *nm = &nat64_main; snat_address_t *a = 0; snat_interface_t *interface; int i; nat64_db_t *db; vlib_thread_main_t *tm = vlib_get_thread_main (); /* Check if address already exists */ for (i = 0; i < vec_len (nm->addr_pool); i++) { if (nm->addr_pool[i].addr.as_u32 == addr->as_u32) { a = nm->addr_pool + i; break; } } if (is_add) { if (a) return VNET_API_ERROR_VALUE_EXIST; vec_add2 (nm->addr_pool, a, 1); a->addr = *addr; a->fib_index = ~0; if (vrf_id != ~0) a->fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, FIB_SOURCE_PLUGIN_HI); #define _(N, id, n, s) \ clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); \ a->busy_##n##_ports = 0; \ vec_validate_init_empty (a->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0); foreach_snat_protocol #undef _ } else { if (!a) return VNET_API_ERROR_NO_SUCH_ENTRY; if (a->fib_index != ~0) fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_PLUGIN_HI); /* Delete sessions using address */ /* *INDENT-OFF* */ vec_foreach (db, nm->db) { nat64_db_free_out_addr (thread_index, db, &a->addr); vlib_set_simple_counter (&nm->total_bibs, db - nm->db, 0, db->bib.bib_entries_num); vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0, db->st.st_entries_num); } #define _(N, id, n, s) \ clib_bitmap_free (a->busy_##n##_port_bitmap); foreach_snat_protocol #undef _ /* *INDENT-ON* */ vec_del1 (nm->addr_pool, i); } /* Add/del external address to FIB */ /* *INDENT-OFF* */ pool_foreach (interface, nm->interfaces, ({ if (nat_interface_is_inside(interface)) continue; snat_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add); break; })); /* *INDENT-ON* */ return 0; } void nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx) { nat64_main_t *nm = &nat64_main; snat_address_t *a = 0; /* *INDENT-OFF* */ vec_foreach (a, nm->addr_pool) { if (fn (a, ctx)) break; }; /* *INDENT-ON* */ } int nat64_add_interface_address (u32 sw_if_index, int is_add) { nat64_main_t *nm = &nat64_main; ip4_main_t *ip4_main = nm->ip4_main; ip4_address_t *first_int_addr; int i; first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0); for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++) { if (nm->auto_add_sw_if_indices[i] == sw_if_index) { if (is_add) return VNET_API_ERROR_VALUE_EXIST; else { /* if have address remove it */ if (first_int_addr) (void) nat64_add_del_pool_addr (vlib_get_thread_index (), first_int_addr, ~0, 0); vec_del1 (nm->auto_add_sw_if_indices, i); return 0; } } } if (!is_add) return VNET_API_ERROR_NO_SUCH_ENTRY; /* add to the auto-address list */ vec_add1 (nm->auto_add_sw_if_indices, sw_if_index); /* If the address is already bound - or static - add it now */ if (first_int_addr) (void) nat64_add_del_pool_addr (vlib_get_thread_index (), first_int_addr, ~0, 1); return 0; } int nat64_add_del_interface (u32 sw_if_index, u8 is_inside, u8 is_add) { nat64_main_t *nm = &nat64_main; snat_interface_t *interface = 0, *i; snat_address_t *ap; const char *feature_name, *arc_name; /* Check if interface already exists */ /* *INDENT-OFF* */ pool_foreach (i, nm->interfaces, ({ if (i->sw_if_index == sw_if_index) { interface = i; break; } })); /* *INDENT-ON* */ if (is_add) { if (interface) goto set_flags; pool_get (nm->interfaces, interface); interface->sw_if_index = sw_if_index; interface->flags = 0; set_flags: if (is_inside) interface->flags |= NAT_INTERFACE_FLAG_IS_INSIDE; else interface->flags |= NAT_INTERFACE_FLAG_IS_OUTSIDE; nm->total_enabled_count++; vlib_process_signal_event (nm->sm->vlib_main, nm->nat64_expire_walk_node_index, NAT64_CLEANER_RESCHEDULE, 0); } else { if (!interface) return VNET_API_ERROR_NO_SUCH_ENTRY; if ((nat_interface_is_inside (interface) && nat_interface_is_outside (interface))) interface->flags &= is_inside ? ~NAT_INTERFACE_FLAG_IS_INSIDE : ~NAT_INTERFACE_FLAG_IS_OUTSIDE; else pool_put (nm->interfaces, interface); nm->total_enabled_count--; } if (!is_inside) { /* *INDENT-OFF* */ vec_foreach (ap, nm->addr_pool) snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, is_add); /* *INDENT-ON* */ } if (nm->sm->num_workers > 1) { feature_name = is_inside ? "nat64-in2out-handoff" : "nat64-out2in-handoff"; if (nm->fq_in2out_index == ~0) nm->fq_in2out_index = vlib_frame_queue_main_init (nat64_in2out_node.index, 0); if (nm->fq_out2in_index == ~0) nm->fq_out2in_index = vlib_frame_queue_main_init (nat64_out2in_node.index, 0); } else feature_name = is_inside ? "nat64-in2out" : "nat64-out2in"; arc_name = is_inside ? "ip6-unicast" : "ip4-unicast"; return vnet_feature_enable_disable (arc_name, feature_name, sw_if_index, is_add, 0, 0); } void nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx) { nat64_main_t *nm = &nat64_main; snat_interface_t *i = 0; /* *INDENT-OFF* */ pool_foreach (i, nm->interfaces, ({ if (fn (i, ctx)) break; })); /* *INDENT-ON* */ } int nat64_alloc_out_addr_and_port (u32 fib_index, snat_protocol_t proto, ip4_address_t * addr, u16 * port, u32 thread_index) { nat64_main_t *nm = &nat64_main; snat_main_t *sm = nm->sm; snat_session_key_t k; u32 worker_index = 0; int rv; k.protocol = proto; if (sm->num_workers > 1) worker_index = thread_index - sm->first_worker_index; rv = sm->alloc_addr_and_port (nm->addr_pool, fib_index, thread_index, &k, sm->port_per_thread, worker_index); if (!rv) { *port = k.port; addr->as_u32 = k.addr.as_u32; } return rv; } static void nat64_free_out_addr_and_port (struct nat64_db_s *db, ip4_address_t * addr, u16 port, u8 protocol) { nat64_main_t *nm = &nat64_main; int i; snat_address_t *a; u32 thread_index = db - nm->db; snat_protocol_t proto = ip_proto_to_snat_proto (protocol); u16 port_host_byte_order = clib_net_to_host_u16 (port); for (i = 0; i < vec_len (nm->addr_pool); i++) { a = nm->addr_pool + i; if (addr->as_u32 != a->addr.as_u32) continue; switch (proto) { #define _(N, j, n, s) \ case SNAT_PROTOCOL_##N: \ ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \ port_host_byte_order) == 1); \ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, port_host_byte_order, 0); \ a->busy_##n##_ports--; \ a->busy_##n##_ports_per_thread[thread_index]--; \ break; foreach_snat_protocol #undef _ default: nat_elog_notice ("unknown protocol"); return; } break; } } /** * @brief Add/delete static BIB entry in worker thread. */ static uword nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { nat64_main_t *nm = &nat64_main; u32 thread_index = vm->thread_index; nat64_db_t *db = &nm->db[thread_index]; nat64_static_bib_to_update_t *static_bib; nat64_db_bib_entry_t *bibe; ip46_address_t addr; /* *INDENT-OFF* */ pool_foreach (static_bib, nm->static_bibs, ({ if ((static_bib->thread_index != thread_index) || (static_bib->done)) continue; if (static_bib->is_add) { (void) nat64_db_bib_entry_create (thread_index, db, &static_bib->in_addr, &static_bib->out_addr, static_bib->in_port, static_bib->out_port, static_bib->fib_index, static_bib->proto, 1); vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, db->bib.bib_entries_num); } else { addr.as_u64[0] = static_bib->in_addr.as_u64[0]; addr.as_u64[1] = static_bib->in_addr.as_u64[1]; bibe = nat64_db_bib_entry_find (db, &addr, static_bib->in_port, static_bib->proto, static_bib->fib_index, 1); if (bibe) { nat64_db_bib_entry_free (thread_index, db, bibe); vlib_set_simple_counter (&nm->total_bibs, thread_index, 0, db->bib.bib_entries_num); vlib_set_simple_counter (&nm->total_sessions, thread_index, 0, db->st.st_entries_num); } } static_bib->done = 1; })); /* *INDENT-ON* */ return 0; } static vlib_node_registration_t nat64_static_bib_worker_node; /* *INDENT-OFF* */ VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = { .function = nat64_static_bib_worker_fn, .type = VLIB_NODE_TYPE_INPUT, .state = VLIB_NODE_STATE_INTERRUPT, .name = "nat64-static-bib-worker", }; /* *INDENT-ON* */ int nat64_add_del_static_bib_entry (ip6_address_t * in_addr, ip4_address_t * out_addr, u16 in_port, u16 out_port, u8 proto, u32 vrf_id, u8 is_add) { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, FIB_SOURCE_PLUGIN_HI); snat_protocol_t p = ip_proto_to_snat_proto (proto); ip46_address_t addr; int i; snat_address_t *a; u32 thread_index = 0; nat64_db_t *db; nat64_static_bib_to_update_t *static_bib; vlib_main_t *worker_vm; u32 *to_be_free = 0, *index; if (nm->sm->num_workers > 1) { thread_index = nat64_get_worker_in2out (in_addr); db = &nm->db[thread_index]; } else db = &nm->db[nm->sm->num_workers]; addr.as_u64[0] = in_addr->as_u64[0]; addr.as_u64[1] = in_addr->as_u64[1]; bibe = nat64_db_bib_entry_find (db, &addr, clib_host_to_net_u16 (in_port), proto, fib_index, 1); if (is_add) { if (bibe) return VNET_API_ERROR_VALUE_EXIST; /* outside port must be assigned to same thread as internall address */ if ((out_port > 1024) && (nm->sm->num_workers > 1)) { if (thread_index != ((out_port - 1024) / nm->sm->port_per_thread)) return VNET_API_ERROR_INVALID_VALUE_2; }
/*
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef VNET_CONTROL_H_
#define VNET_CONTROL_H_

#include <vnet/vnet.h>
#include <vnet/lisp-cp/gid_dictionary.h>
#include <vnet/lisp-cp/lisp_types.h>
#include <vppinfra/timing_wheel.h>

#define NUMBER_OF_RETRIES                   1
#define PENDING_MREQ_EXPIRATION_TIME        3.0	/* seconds */
#define PENDING_MREQ_QUEUE_LEN              5

#define RLOC_PROBING_INTERVAL               60.0

/* when map-registration is enabled "quick registration" takes place first.
   In this mode ETR sends map-register messages at an increased frequency
   until specified message count is reached */
#define QUICK_MAP_REGISTER_MSG_COUNT        5
#define QUICK_MAP_REGISTER_INTERVAL         3.0

/* normal map-register period */
#define MAP_REGISTER_INTERVAL               60.0

/* how many tries until next map-server election */
#define MAX_EXPIRED_MAP_REGISTERS_DEFAULT   3

#define PENDING_MREG_EXPIRATION_TIME        3.0	/* seconds */

/* 24 hours */
#define MAP_REGISTER_DEFAULT_TTL            86400

typedef struct
{
  gid_address_t src;
  gid_address_t dst;
  u32 retries_num;
  f64 time_to_expire;
  u8 is_smr_invoked;
  u64 *nonces;
  u8 to_be_removed;
} pending_map_request_t;

typedef struct
{
  f64 time_to_expire;
} pending_map_register_t;

typedef struct
{
  gid_address_t leid;
  gid_address_t reid;
  u8 is_src_dst;
  locator_pair_t *locator_pairs;
} fwd_entry_t;

typedef struct
{
  gid_address_t leid;
  gid_address_t reid;
} lisp_adjacency_t;

typedef enum
{
  IP4_MISS_PACKET,
  IP6_MISS_PACKET
} miss_packet_type_t;

/* map-server/map-resolver structure */
typedef struct
{
  u8 is_down;
  f64 last_update;
  ip_address_t address;
  char *key;
} lisp_msmr_t;

typedef struct
{
  /* headers */
  u8 data[100];
  u32 length;
  miss_packet_type_t type;
} miss_packet_t;

typedef struct
{
  u8 mac[6];
  u32 ip4;
} lisp_api_l2_arp_entry_t;

typedef struct
{
  u8 mac[6];
  u8 ip6[16];
} lisp_api_ndp_entry_t;

typedef enum
{
  MR_MODE_DST_ONLY = 0,
  MR_MODE_SRC_DST,
  _MR_MODE_MAX
} map_request_mode_t;

#define foreach_lisp_flag_bit       \
  _(USE_PETR, "Use Proxy-ETR")                    \
  _(XTR_MODE, "ITR/ETR mode")                     \
  _(PETR_MODE, "Proxy-ETR mode")                   \
  _(PITR_MODE, "Proxy-ITR mode")                  \
  _(STATS_ENABLED, "Statistics enabled")

typedef enum lisp_flag_bits
{
#define _(sym, str) LISP_FLAG_BIT_##sym,