diff options
Diffstat (limited to 'src/plugins/nat/nat44-ei')
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei.c | 534 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei.h | 81 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_ha.c | 1316 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_ha.h | 179 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md | 70 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_in2out.c | 1972 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_inlines.h | 51 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_out2in.c | 1580 |
8 files changed, 5783 insertions, 0 deletions
diff --git a/src/plugins/nat/nat44-ei/nat44_ei.c b/src/plugins/nat/nat44-ei/nat44_ei.c new file mode 100644 index 00000000000..f50ccb9ba1b --- /dev/null +++ b/src/plugins/nat/nat44-ei/nat44_ei.c @@ -0,0 +1,534 @@ +/* + * nat44_ei.c - nat44 endpoint dependent plugin + * + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/ip4.h> +#include <vnet/plugin/plugin.h> +#include <nat/nat.h> +#include <nat/nat_dpo.h> +#include <nat/lib/ipfix_logging.h> +#include <nat/lib/nat_syslog.h> +#include <nat/nat_inlines.h> +#include <nat/nat44/inlines.h> +#include <nat/nat_affinity.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/ip/reass/ip4_sv_reass.h> +#include <vppinfra/bihash_16_8.h> +#include <nat/nat44/ed_inlines.h> +#include <vnet/ip/ip_table.h> + +#include <nat/nat44-ei/nat44_ei_inlines.h> +#include <nat/nat44-ei/nat44_ei.h> + +int +nat44_ei_plugin_enable () +{ + nat44_ei_set_alloc_default (); + nat_ha_enable (); + return 0; +} + +void +nat44_ei_plugin_disable () +{ + nat_ha_disable (); +} + +void +nat44_ei_free_session_data (snat_main_t *sm, snat_session_t *s, + u32 thread_index, u8 is_ha) +{ + clib_bihash_kv_8_8_t kv; + + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_index); + + init_nat_i2o_k (&kv, s); + if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0)) + nat_elog_warn ("in2out key del failed"); + + init_nat_o2i_k (&kv, s); + if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0)) + nat_elog_warn ("out2in key del failed"); + + if (!is_ha) + { + nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index, + &s->in2out.addr, s->in2out.port, + &s->out2in.addr, s->out2in.port, s->nat_proto); + + nat_ipfix_logging_nat44_ses_delete ( + thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, + s->nat_proto, s->in2out.port, s->out2in.port, s->in2out.fib_index); + + nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr, + s->ext_host_port, s->nat_proto, s->out2in.fib_index, + thread_index); + } + + if (snat_is_session_static (s)) + return; + + snat_free_outside_address_and_port (sm->addresses, thread_index, + &s->out2in.addr, s->out2in.port, + s->nat_proto); +} + +static_always_inline void +nat44_ei_user_del_sessions (snat_user_t *u, u32 thread_index) +{ + dlist_elt_t *elt; + snat_session_t *s; + + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + + // get head + elt = + pool_elt_at_index (tsm->list_pool, u->sessions_per_user_list_head_index); + // get first element + elt = pool_elt_at_index (tsm->list_pool, elt->next); + + while (elt->value != ~0) + { + s = pool_elt_at_index (tsm->sessions, elt->value); + elt = pool_elt_at_index (tsm->list_pool, elt->next); + + nat44_ei_free_session_data (sm, s, thread_index, 0); + nat44_delete_session (sm, s, thread_index); + } +} + +int +nat44_ei_user_del (ip4_address_t *addr, u32 fib_index) +{ + int rv = 1; + + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm; + + snat_user_key_t user_key; + clib_bihash_kv_8_8_t kv, value; + + if (sm->endpoint_dependent) + return rv; + + user_key.addr.as_u32 = addr->as_u32; + user_key.fib_index = fib_index; + kv.key = user_key.as_u64; + + if (sm->num_workers > 1) + { + vec_foreach (tsm, sm->per_thread_data) + { + if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) + { + nat44_ei_user_del_sessions ( + pool_elt_at_index (tsm->users, value.value), + tsm->thread_index); + rv = 0; + break; + } + } + } + else + { + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) + { + nat44_ei_user_del_sessions ( + pool_elt_at_index (tsm->users, value.value), tsm->thread_index); + rv = 0; + } + } + return rv; +} + +void +nat44_ei_static_mapping_del_sessions (snat_main_t *sm, + snat_main_per_thread_data_t *tsm, + snat_user_key_t u_key, int addr_only, + ip4_address_t e_addr, u16 e_port) +{ + clib_bihash_kv_8_8_t kv, value; + kv.key = u_key.as_u64; + u64 user_index; + dlist_elt_t *head, *elt; + snat_user_t *u; + snat_session_t *s; + u32 elt_index, head_index, ses_index; + + if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) + { + user_index = value.value; + u = pool_elt_at_index (tsm->users, user_index); + if (u->nstaticsessions) + { + head_index = u->sessions_per_user_list_head_index; + head = pool_elt_at_index (tsm->list_pool, head_index); + elt_index = head->next; + elt = pool_elt_at_index (tsm->list_pool, elt_index); + ses_index = elt->value; + while (ses_index != ~0) + { + s = pool_elt_at_index (tsm->sessions, ses_index); + elt = pool_elt_at_index (tsm->list_pool, elt->next); + ses_index = elt->value; + + if (!addr_only) + { + if ((s->out2in.addr.as_u32 != e_addr.as_u32) || + (s->out2in.port != e_port)) + continue; + } + + if (is_lb_session (s)) + continue; + + if (!snat_is_session_static (s)) + continue; + + nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0); + nat44_delete_session (sm, s, tsm - sm->per_thread_data); + + if (!addr_only) + break; + } + } + } +} + +u32 +nat44_ei_get_in2out_worker_index (ip4_header_t *ip0, u32 rx_fib_index0, + u8 is_output) +{ + snat_main_t *sm = &snat_main; + u32 next_worker_index = 0; + u32 hash; + + next_worker_index = sm->first_worker_index; + hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) + + (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >> 24); + + if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers)))) + next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)]; + else + next_worker_index += sm->workers[hash % _vec_len (sm->workers)]; + + return next_worker_index; +} + +u32 +nat44_ei_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip0, + u32 rx_fib_index0, u8 is_output) +{ + snat_main_t *sm = &snat_main; + udp_header_t *udp; + u16 port; + clib_bihash_kv_8_8_t kv, value; + snat_static_mapping_t *m; + u32 proto; + u32 next_worker_index = 0; + + /* first try static mappings without port */ + if (PREDICT_FALSE (pool_elts (sm->static_mappings))) + { + init_nat_k (&kv, ip0->dst_address, 0, rx_fib_index0, 0); + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, + &value)) + { + m = pool_elt_at_index (sm->static_mappings, value.value); + return m->workers[0]; + } + } + + proto = ip_proto_to_nat_proto (ip0->protocol); + udp = ip4_next_header (ip0); + port = udp->dst_port; + + /* unknown protocol */ + if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER)) + { + /* use current thread */ + return vlib_get_thread_index (); + } + + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP)) + { + icmp46_header_t *icmp = (icmp46_header_t *) udp; + icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); + if (!icmp_type_is_error_message ( + vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + port = vnet_buffer (b)->ip.reass.l4_src_port; + else + { + /* if error message, then it's not fragmented and we can access it */ + ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); + proto = ip_proto_to_nat_proto (inner_ip->protocol); + void *l4_header = ip4_next_header (inner_ip); + switch (proto) + { + case NAT_PROTOCOL_ICMP: + icmp = (icmp46_header_t *) l4_header; + echo = (icmp_echo_header_t *) (icmp + 1); + port = echo->identifier; + break; + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + port = ((tcp_udp_header_t *) l4_header)->src_port; + break; + default: + return vlib_get_thread_index (); + } + } + } + + /* try static mappings with port */ + if (PREDICT_FALSE (pool_elts (sm->static_mappings))) + { + init_nat_k (&kv, ip0->dst_address, port, rx_fib_index0, proto); + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, + &value)) + { + m = pool_elt_at_index (sm->static_mappings, value.value); + return m->workers[0]; + } + } + + /* worker by outside port */ + next_worker_index = sm->first_worker_index; + next_worker_index += + sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread]; + return next_worker_index; +} + +static int +nat44_ei_alloc_default_cb (snat_address_t *addresses, u32 fib_index, + u32 thread_index, nat_protocol_t proto, + ip4_address_t *addr, u16 *port, u16 port_per_thread, + u32 snat_thread_index) +{ + int i; + snat_address_t *a, *ga = 0; + u32 portnum; + + for (i = 0; i < vec_len (addresses); i++) + { + a = addresses + i; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \ + { \ + if (a->fib_index == fib_index) \ + { \ + while (1) \ + { \ + portnum = (port_per_thread * snat_thread_index) + \ + snat_random_port (0, port_per_thread - 1) + 1024; \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + --a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16 (portnum); \ + return 0; \ + } \ + } \ + else if (a->fib_index == ~0) \ + { \ + ga = a; \ + } \ + } \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info ("unknown protocol"); + return 1; + } + } + + if (ga) + { + a = ga; + switch (proto) + { +#define _(N, j, n, s) \ + case NAT_PROTOCOL_##N: \ + while (1) \ + { \ + portnum = (port_per_thread * snat_thread_index) + \ + snat_random_port (0, port_per_thread - 1) + 1024; \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + ++a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16 (portnum); \ + return 0; \ + } + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info ("unknown protocol"); + return 1; + } + } + + /* Totally out of translations to use... */ + nat_ipfix_logging_addresses_exhausted (thread_index, 0); + return 1; +} + +static int +nat44_ei_alloc_range_cb (snat_address_t *addresses, u32 fib_index, + u32 thread_index, nat_protocol_t proto, + ip4_address_t *addr, u16 *port, u16 port_per_thread, + u32 snat_thread_index) +{ + snat_main_t *sm = &snat_main; + snat_address_t *a = addresses; + u16 portnum, ports; + + ports = sm->end_port - sm->start_port + 1; + + if (!vec_len (addresses)) + goto exhausted; + + switch (proto) + { +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_ports < ports) \ + { \ + while (1) \ + { \ + portnum = snat_random_port (sm->start_port, sm->end_port); \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + ++a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16 (portnum); \ + return 0; \ + } \ + } \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info ("unknown protocol"); + return 1; + } + +exhausted: + /* Totally out of translations to use... */ + nat_ipfix_logging_addresses_exhausted (thread_index, 0); + return 1; +} + +static int +nat44_ei_alloc_mape_cb (snat_address_t *addresses, u32 fib_index, + u32 thread_index, nat_protocol_t proto, + ip4_address_t *addr, u16 *port, u16 port_per_thread, + u32 snat_thread_index) +{ + snat_main_t *sm = &snat_main; + snat_address_t *a = addresses; + u16 m, ports, portnum, A, j; + m = 16 - (sm->psid_offset + sm->psid_length); + ports = (1 << (16 - sm->psid_length)) - (1 << m); + + if (!vec_len (addresses)) + goto exhausted; + + switch (proto) + { +#define _(N, i, n, s) \ + case NAT_PROTOCOL_##N: \ + if (a->busy_##n##_ports < ports) \ + { \ + while (1) \ + { \ + A = snat_random_port (1, pow2_mask (sm->psid_offset)); \ + j = snat_random_port (0, pow2_mask (m)); \ + portnum = A | (sm->psid << sm->psid_offset) | (j << (16 - m)); \ + if (a->busy_##n##_port_refcounts[portnum]) \ + continue; \ + ++a->busy_##n##_port_refcounts[portnum]; \ + a->busy_##n##_ports++; \ + *addr = a->addr; \ + *port = clib_host_to_net_u16 (portnum); \ + return 0; \ + } \ + } \ + break; + foreach_nat_protocol +#undef _ + default : nat_elog_info ("unknown protocol"); + return 1; + } + +exhausted: + /* Totally out of translations to use... */ + nat_ipfix_logging_addresses_exhausted (thread_index, 0); + return 1; +} + +void +nat44_ei_set_alloc_default () +{ + snat_main_t *sm = &snat_main; + + sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_DEFAULT; + sm->alloc_addr_and_port = nat44_ei_alloc_default_cb; +} + +void +nat44_ei_set_alloc_range (u16 start_port, u16 end_port) +{ + snat_main_t *sm = &snat_main; + + sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_RANGE; + sm->alloc_addr_and_port = nat44_ei_alloc_range_cb; + sm->start_port = start_port; + sm->end_port = end_port; +} + +void +nat44_ei_set_alloc_mape (u16 psid, u16 psid_offset, u16 psid_length) +{ + snat_main_t *sm = &snat_main; + + sm->addr_and_port_alloc_alg = NAT_ADDR_AND_PORT_ALLOC_ALG_MAPE; + sm->alloc_addr_and_port = nat44_ei_alloc_mape_cb; + sm->psid = psid; + sm->psid_offset = psid_offset; + sm->psid_length = psid_length; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ei/nat44_ei.h b/src/plugins/nat/nat44-ei/nat44_ei.h new file mode 100644 index 00000000000..ac430bee5c3 --- /dev/null +++ b/src/plugins/nat/nat44-ei/nat44_ei.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file nat44_ei.h + * NAT44 endpoint independent plugin declarations + */ +#ifndef __included_nat44_ei_h__ +#define __included_nat44_ei_h__ + +int nat44_ei_plugin_enable (); + +void nat44_ei_plugin_disable (); + +/** + * @brief Delete specific NAT44 EI user and his sessions + * + * @param addr IPv4 address + * @param fib_index FIB table index + */ +int nat44_ei_user_del (ip4_address_t *addr, u32 fib_index); + +/** + * @brief Delete session for static mapping + * + * @param addr IPv4 address + * @param fib_index FIB table index + */ +void nat44_ei_static_mapping_del_sessions (snat_main_t *sm, + snat_main_per_thread_data_t *tsm, + snat_user_key_t u_key, + int addr_only, ip4_address_t e_addr, + u16 e_port); + +u32 nat44_ei_get_in2out_worker_index (ip4_header_t *ip0, u32 rx_fib_index0, + u8 is_output); + +u32 nat44_ei_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip0, + u32 rx_fib_index0, u8 is_output); + +/** + * @brief Set address and port assignment algorithm to default/standard + */ +void nat44_ei_set_alloc_default (void); + +/** + * @brief Set address and port assignment algorithm for MAP-E CE + * + * @param psid Port Set Identifier value + * @param psid_offset number of offset bits + * @param psid_length length of PSID + */ +void nat44_ei_set_alloc_mape (u16 psid, u16 psid_offset, u16 psid_length); + +/** + * @brief Set address and port assignment algorithm for port range + * + * @param start_port beginning of the port range + * @param end_port end of the port range + */ +void nat44_ei_set_alloc_range (u16 start_port, u16 end_port); + +#endif /* __included_nat44_ei_h__ */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha.c b/src/plugins/nat/nat44-ei/nat44_ei_ha.c new file mode 100644 index 00000000000..0b904bf079b --- /dev/null +++ b/src/plugins/nat/nat44-ei/nat44_ei_ha.c @@ -0,0 +1,1316 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <nat/nat_inlines.h> +#include <nat/nat44/ed_inlines.h> +#include <nat/nat44-ei/nat44_ei_ha.h> +#include <vnet/udp/udp_local.h> +#include <nat/nat.h> +#include <vppinfra/atomics.h> + +/* number of retries */ +#define NAT_HA_RETRIES 3 + +#define foreach_nat_ha_counter \ +_(RECV_ADD, "add-event-recv", 0) \ +_(RECV_DEL, "del-event-recv", 1) \ +_(RECV_REFRESH, "refresh-event-recv", 2) \ +_(SEND_ADD, "add-event-send", 3) \ +_(SEND_DEL, "del-event-send", 4) \ +_(SEND_REFRESH, "refresh-event-send", 5) \ +_(RECV_ACK, "ack-recv", 6) \ +_(SEND_ACK, "ack-send", 7) \ +_(RETRY_COUNT, "retry-count", 8) \ +_(MISSED_COUNT, "missed-count", 9) + +/* NAT HA protocol version */ +#define NAT_HA_VERSION 0x01 + +/* NAT HA protocol flags */ +#define NAT_HA_FLAG_ACK 0x01 + +/* NAT HA event types */ +typedef enum +{ + NAT_HA_ADD = 1, + NAT_HA_DEL, + NAT_HA_REFRESH, +} nat_ha_event_type_t; + +/* NAT HA protocol header */ +typedef struct +{ + /* version */ + u8 version; + /* flags */ + u8 flags; + /* event count */ + u16 count; + /* sequence number */ + u32 sequence_number; + /* thread index where events originated */ + u32 thread_index; +} __attribute__ ((packed)) nat_ha_message_header_t; + +/* NAT HA protocol event data */ +typedef struct +{ + /* event type */ + u8 event_type; + /* session data */ + u8 protocol; + u16 flags; + u32 in_addr; + u32 out_addr; + u16 in_port; + u16 out_port; + u32 eh_addr; + u32 ehn_addr; + u16 eh_port; + u16 ehn_port; + u32 fib_index; + u32 total_pkts; + u64 total_bytes; +} __attribute__ ((packed)) nat_ha_event_t; + +typedef enum +{ +#define _(N, s, v) NAT_HA_COUNTER_##N = v, + foreach_nat_ha_counter +#undef _ + NAT_HA_N_COUNTERS +} nat_ha_counter_t; + +/* data waiting for ACK */ +typedef struct +{ + /* sequence number */ + u32 seq; + /* retry count */ + u32 retry_count; + /* next retry time */ + f64 retry_timer; + /* 1 if HA resync */ + u8 is_resync; + /* packet data */ + u8 *data; +} nat_ha_resend_entry_t; + +/* per thread data */ +typedef struct +{ + /* buffer under construction */ + vlib_buffer_t *state_sync_buffer; + /* frame containing NAT HA buffers */ + vlib_frame_t *state_sync_frame; + /* number of events */ + u16 state_sync_count; + /* next event offset */ + u32 state_sync_next_event_offset; + /* data waiting for ACK */ + nat_ha_resend_entry_t *resend_queue; +} nat_ha_per_thread_data_t; + +/* NAT HA settings */ +typedef struct nat_ha_main_s +{ + u8 enabled; + /* local IP address and UDP port */ + ip4_address_t src_ip_address; + u16 src_port; + /* failvoer IP address and UDP port */ + ip4_address_t dst_ip_address; + u16 dst_port; + /* path MTU between local and failover */ + u32 state_sync_path_mtu; + /* number of seconds after which to send session counters refresh */ + u32 session_refresh_interval; + /* counters */ + vlib_simple_counter_main_t counters[NAT_HA_N_COUNTERS]; + vlib_main_t *vlib_main; + /* sequence number counter */ + u32 sequence_number; + /* 1 if resync in progress */ + u8 in_resync; + /* number of remaing ACK for resync */ + u32 resync_ack_count; + /* number of missed ACK for resync */ + u32 resync_ack_missed; + /* resync data */ + nat_ha_resync_event_cb_t event_callback; + u32 client_index; + u32 pid; + /* per thread data */ + u32 num_workers; + nat_ha_per_thread_data_t *per_thread_data; + + u32 ha_handoff_node_index; + u32 ha_process_node_index; + u32 ha_worker_node_index; + u32 ha_node_index; + + /* worker handoff frame-queue index */ + u32 fq_index; +} nat_ha_main_t; + +nat_ha_main_t nat_ha_main; + +static_always_inline void +nat44_ei_ha_sadd (ip4_address_t *in_addr, u16 in_port, ip4_address_t *out_addr, + u16 out_port, ip4_address_t *eh_addr, u16 eh_port, + ip4_address_t *ehn_addr, u16 ehn_port, u8 proto, + u32 fib_index, u16 flags, u32 thread_index) +{ + snat_main_t *sm = &snat_main; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + snat_user_t *u; + snat_session_t *s; + clib_bihash_kv_8_8_t kv; + vlib_main_t *vm = vlib_get_main (); + f64 now = vlib_time_now (vm); + nat_outside_fib_t *outside_fib; + fib_node_index_t fei = FIB_NODE_INDEX_INVALID; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + .fp_addr = { + .ip4.as_u32 = eh_addr->as_u32, + }, + }; + + if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING)) + { + if (nat_set_outside_address_and_port (sm->addresses, thread_index, + *out_addr, out_port, proto)) + return; + } + + u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index); + if (!u) + return; + + s = nat_session_alloc_or_recycle (sm, u, thread_index, now); + if (!s) + return; + + if (sm->endpoint_dependent) + { + nat_ed_lru_insert (tsm, s, now, nat_proto_to_ip_proto (proto)); + } + + s->out2in.addr.as_u32 = out_addr->as_u32; + s->out2in.port = out_port; + s->nat_proto = proto; + s->last_heard = now; + s->flags = flags; + s->ext_host_addr.as_u32 = eh_addr->as_u32; + s->ext_host_port = eh_port; + user_session_increment (sm, u, snat_is_session_static (s)); + switch (vec_len (sm->outside_fibs)) + { + case 0: + s->out2in.fib_index = sm->outside_fib_index; + break; + case 1: + s->out2in.fib_index = sm->outside_fibs[0].fib_index; + break; + default: + vec_foreach (outside_fib, sm->outside_fibs) + { + fei = fib_table_lookup (outside_fib->fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + { + if (fib_entry_get_resolving_interface (fei) != ~0) + { + s->out2in.fib_index = outside_fib->fib_index; + break; + } + } + } + break; + } + init_nat_o2i_kv (&kv, s, s - tsm->sessions); + if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 1)) + nat_elog_warn ("out2in key add failed"); + + s->in2out.addr.as_u32 = in_addr->as_u32; + s->in2out.port = in_port; + s->in2out.fib_index = fib_index; + init_nat_i2o_kv (&kv, s, s - tsm->sessions); + if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 1)) + nat_elog_warn ("in2out key add failed"); +} + +static_always_inline void +nat44_ei_ha_sdel (ip4_address_t *out_addr, u16 out_port, + ip4_address_t *eh_addr, u16 eh_port, u8 proto, u32 fib_index, + u32 ti) +{ + snat_main_t *sm = &snat_main; + clib_bihash_kv_8_8_t kv, value; + u32 thread_index; + snat_session_t *s; + snat_main_per_thread_data_t *tsm; + + if (sm->num_workers > 1) + thread_index = sm->first_worker_index + + (sm->workers[(clib_net_to_host_u16 (out_port) - 1024) / + sm->port_per_thread]); + else + thread_index = sm->num_workers; + tsm = vec_elt_at_index (sm->per_thread_data, thread_index); + + init_nat_k (&kv, *out_addr, out_port, fib_index, proto); + if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value)) + return; + + s = pool_elt_at_index (tsm->sessions, value.value); + nat_free_session_data (sm, s, thread_index, 1); + nat44_delete_session (sm, s, thread_index); +} + +static_always_inline void +nat44_ei_ha_sref (ip4_address_t *out_addr, u16 out_port, + ip4_address_t *eh_addr, u16 eh_port, u8 proto, u32 fib_index, + u32 total_pkts, u64 total_bytes, u32 thread_index) +{ + snat_main_t *sm = &snat_main; + clib_bihash_kv_8_8_t kv, value; + snat_session_t *s; + snat_main_per_thread_data_t *tsm; + + tsm = vec_elt_at_index (sm->per_thread_data, thread_index); + + init_nat_k (&kv, *out_addr, out_port, fib_index, proto); + if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value)) + return; + + s = pool_elt_at_index (tsm->sessions, value.value); + s->total_pkts = total_pkts; + s->total_bytes = total_bytes; +} + +static void +nat_ha_resync_fin (void) +{ + nat_ha_main_t *ha = &nat_ha_main; + + /* if no more resync ACK remainig we are done */ + if (ha->resync_ack_count) + return; + + ha->in_resync = 0; + if (ha->resync_ack_missed) + { + nat_elog_info ("resync completed with result FAILED"); + } + else + { + nat_elog_info ("resync completed with result SUCCESS"); + } + if (ha->event_callback) + ha->event_callback (ha->client_index, ha->pid, ha->resync_ack_missed); +} + +/* cache HA NAT data waiting for ACK */ +static int +nat_ha_resend_queue_add (u32 seq, u8 * data, u8 data_len, u8 is_resync, + u32 thread_index) +{ + nat_ha_main_t *ha = &nat_ha_main; + nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index]; + nat_ha_resend_entry_t *entry; + f64 now = vlib_time_now (ha->vlib_main); + + vec_add2 (td->resend_queue, entry, 1); + clib_memset (entry, 0, sizeof (*entry)); + entry->retry_timer = now + 2.0; + entry->seq = seq; + entry->is_resync = is_resync; + vec_add (entry->data, data, data_len); + + return 0; +} + +static_always_inline void +nat_ha_ack_recv (u32 seq, u32 thread_index) +{ + nat_ha_main_t *ha = &nat_ha_main; + nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index]; + u32 i; + + vec_foreach_index (i, td->resend_queue) + { + if (td->resend_queue[i].seq != seq) + continue; + + vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ACK], + thread_index, 0, 1); + /* ACK received remove cached data */ + if (td->resend_queue[i].is_resync) + { + clib_atomic_fetch_sub (&ha->resync_ack_count, 1); + nat_ha_resync_fin (); + } + vec_free (td->resend_queue[i].data); + vec_del1 (td->resend_queue, i); + nat_elog_debug_X1 ("ACK for seq %d received", "i4", + clib_net_to_host_u32 (seq)); + + return; + } +} + +/* scan non-ACKed HA NAT for retry */ +static void +nat_ha_resend_scan (f64 now, u32 thread_index) +{ + nat_ha_main_t *ha = &nat_ha_main; + nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index]; + u32 i, *del, *to_delete = 0; + vlib_main_t *vm = ha->vlib_main; + vlib_buffer_t *b = 0; + vlib_frame_t *f; + u32 bi, *to_next; + ip4_header_t *ip; + + vec_foreach_index (i, td->resend_queue) + { + if (td->resend_queue[i].retry_timer > now) + continue; + + /* maximum retry reached delete cached data */ + if (td->resend_queue[i].retry_count >= NAT_HA_RETRIES) + { + nat_elog_notice_X1 ("seq %d missed", "i4", + clib_net_to_host_u32 (td->resend_queue[i].seq)); + if (td->resend_queue[i].is_resync) + { + clib_atomic_fetch_add (&ha->resync_ack_missed, 1); + clib_atomic_fetch_sub (&ha->resync_ack_count, 1); + nat_ha_resync_fin (); + } + vec_add1 (to_delete, i); + vlib_increment_simple_counter (&ha->counters + [NAT_HA_COUNTER_MISSED_COUNT], + thread_index, 0, 1); + continue; + } + + /* retry to send non-ACKed data */ + nat_elog_debug_X1 ("state sync seq %d resend", "i4", + clib_net_to_host_u32 (td->resend_queue[i].seq)); + td->resend_queue[i].retry_count++; + vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RETRY_COUNT], + thread_index, 0, 1); + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + { + nat_elog_warn ("HA NAT state sync can't allocate buffer"); + return; + } + b = vlib_get_buffer (vm, bi); + b->current_length = vec_len (td->resend_queue[i].data); + b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; + ip = vlib_buffer_get_current (b); + clib_memcpy (ip, td->resend_queue[i].data, + vec_len (td->resend_queue[i].data)); + f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, ip4_lookup_node.index, f); + td->resend_queue[i].retry_timer = now + 2.0; + } + + vec_foreach (del, to_delete) + { + vec_free (td->resend_queue[*del].data); + vec_del1 (td->resend_queue, *del); + } + vec_free (to_delete); +} + +void +nat_ha_enable () +{ + nat_ha_main_t *ha = &nat_ha_main; + ha->enabled = 1; +} + +void +nat_ha_disable () +{ + nat_ha_main_t *ha = &nat_ha_main; + ha->dst_port = 0; + ha->enabled = 0; +} + +void +nat_ha_set_node_indexes (nat_ha_main_t *ha, vlib_main_t *vm) +{ + vlib_node_t *node; + + node = vlib_get_node_by_name (vm, (u8 *) "nat-ha-handoff"); + ha->ha_handoff_node_index = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "nat-ha-process"); + ha->ha_process_node_index = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "nat-ha-worker"); + ha->ha_worker_node_index = node->index; + node = vlib_get_node_by_name (vm, (u8 *) "nat-ha"); + ha->ha_node_index = node->index; +} + +void +nat_ha_init (vlib_main_t * vm, u32 num_workers, u32 num_threads) +{ + nat_ha_main_t *ha = &nat_ha_main; + clib_memset (ha, 0, sizeof (*ha)); + + nat_ha_set_node_indexes (ha, vm); + + ha->vlib_main = vm; + ha->fq_index = ~0; + + ha->num_workers = num_workers; + vec_validate (ha->per_thread_data, num_threads); + +#define _(N, s, v) ha->counters[v].name = s; \ + ha->counters[v].stat_segment_name = "/nat44/ha/" s; \ + vlib_validate_simple_counter(&ha->counters[v], 0); \ + vlib_zero_simple_counter(&ha->counters[v], 0); + foreach_nat_ha_counter +#undef _ +} + +int +nat_ha_set_listener (ip4_address_t * addr, u16 port, u32 path_mtu) +{ + nat_ha_main_t *ha = &nat_ha_main; + + /* unregister previously set UDP port */ + if (ha->src_port) + udp_unregister_dst_port (ha->vlib_main, ha->src_port, 1); + + ha->src_ip_address.as_u32 = addr->as_u32; + ha->src_port = port; + ha->state_sync_path_mtu = path_mtu; + + if (port) + { + /* if multiple worker threads first go to handoff node */ + if (ha->num_workers > 1) + { + if (ha->fq_index == ~0) + ha->fq_index = vlib_frame_queue_main_init (ha->ha_node_index, 0); + udp_register_dst_port (ha->vlib_main, port, + ha->ha_handoff_node_index, 1); + } + else + { + udp_register_dst_port (ha->vlib_main, port, ha->ha_node_index, 1); + } + nat_elog_info_X1 ("HA listening on port %d for state sync", "i4", port); + } + + return 0; +} + +void +nat_ha_get_listener (ip4_address_t * addr, u16 * port, u32 * path_mtu) +{ + nat_ha_main_t *ha = &nat_ha_main; + + addr->as_u32 = ha->src_ip_address.as_u32; + *port = ha->src_port; + *path_mtu = ha->state_sync_path_mtu; +} + +int +nat_ha_set_failover (ip4_address_t * addr, u16 port, + u32 session_refresh_interval) +{ + nat_ha_main_t *ha = &nat_ha_main; + + ha->dst_ip_address.as_u32 = addr->as_u32; + ha->dst_port = port; + ha->session_refresh_interval = session_refresh_interval; + + vlib_process_signal_event (ha->vlib_main, ha->ha_process_node_index, 1, 0); + + return 0; +} + +void +nat_ha_get_failover (ip4_address_t * addr, u16 * port, + u32 * session_refresh_interval) +{ + nat_ha_main_t *ha = &nat_ha_main; + + addr->as_u32 = ha->dst_ip_address.as_u32; + *port = ha->dst_port; + *session_refresh_interval = ha->session_refresh_interval; +} + +static_always_inline void +nat_ha_recv_add (nat_ha_event_t * event, f64 now, u32 thread_index) +{ + nat_ha_main_t *ha = &nat_ha_main; + ip4_address_t in_addr, out_addr, eh_addr, ehn_addr; + u32 fib_index; + u16 flags; + + vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ADD], + thread_index, 0, 1); + + in_addr.as_u32 = event->in_addr; + out_addr.as_u32 = event->out_addr; + eh_addr.as_u32 = event->eh_addr; + ehn_addr.as_u32 = event->ehn_addr; + fib_index = clib_net_to_host_u32 (event->fib_index); + flags = clib_net_to_host_u16 (event->flags); + + nat44_ei_ha_sadd (&in_addr, event->in_port, &out_addr, event->out_port, + &eh_addr, event->eh_port, &ehn_addr, event->ehn_port, + event->protocol, fib_index, flags, thread_index); +} + +static_always_inline void +nat_ha_recv_del (nat_ha_event_t * event, u32 thread_index) +{ + nat_ha_main_t *ha = &nat_ha_main; + ip4_address_t out_addr, eh_addr; + u32 fib_index; + + vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_DEL], + thread_index, 0, 1); + + out_addr.as_u32 = event->out_addr; + eh_addr.as_u32 = event->eh_addr; + fib_index = clib_net_to_host_u32 (event->fib_index); + + nat44_ei_ha_sdel (&out_addr, event->out_port, &eh_addr, event->eh_port, + event->protocol, fib_index, thread_index); +} + +static_always_inline void +nat_ha_recv_refresh (nat_ha_event_t * event, f64 now, u32 thread_index) +{ + nat_ha_main_t *ha = &nat_ha_main; + ip4_address_t out_addr, eh_addr; + u32 fib_index, total_pkts; + u64 total_bytes; + + vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_REFRESH], + thread_index, 0, 1); + + out_addr.as_u32 = event->out_addr; + eh_addr.as_u32 = event->eh_addr; + fib_index = clib_net_to_host_u32 (event->fib_index); + total_pkts = clib_net_to_host_u32 (event->total_pkts); + total_bytes = clib_net_to_host_u64 (event->total_bytes); + + nat44_ei_ha_sref (&out_addr, event->out_port, &eh_addr, event->eh_port, + event->protocol, fib_index, total_pkts, total_bytes, + thread_index); +} + +/* process received NAT HA event */ +static_always_inline void +nat_ha_event_process (nat_ha_event_t * event, f64 now, u32 thread_index) +{ + switch (event->event_type) + { + case NAT_HA_ADD: + nat_ha_recv_add (event, now, thread_index); + break; + case NAT_HA_DEL: + nat_ha_recv_del (event, thread_index); + break; + case NAT_HA_REFRESH: + nat_ha_recv_refresh (event, now, thread_index); + break; + default: + nat_elog_notice_X1 ("Unsupported HA event type %d", "i4", + event->event_type); + break; + } +} + +static inline void +nat_ha_header_create (vlib_buffer_t * b, u32 * offset, u32 thread_index) +{ + nat_ha_main_t *ha = &nat_ha_main; + nat_ha_message_header_t *h; + ip4_header_t *ip; + udp_header_t *udp; + u32 sequence_number; + + b->current_data = 0; + b->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h); + b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; + ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + h = (nat_ha_message_header_t *) (udp + 1); + + /* IP header */ + ip->ip_version_and_header_length = 0x45; + ip->ttl = 254; + ip->protocol = IP_PROTOCOL_UDP; + ip->flags_and_fragment_offset = + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + ip->src_address.as_u32 = ha->src_ip_address.as_u32; + ip->dst_address.as_u32 = ha->dst_ip_address.as_u32; + /* UDP header */ + udp->src_port = clib_host_to_net_u16 (ha->src_port); + udp->dst_port = clib_host_to_net_u16 (ha->dst_port); + udp->checksum = 0; + + /* NAT HA protocol header */ + h->version = NAT_HA_VERSION; + h->flags = 0; + h->count = 0; + h->thread_index = clib_host_to_net_u32 (thread_index); + sequence_number = clib_atomic_fetch_add (&ha->sequence_number, 1); + h->sequence_number = clib_host_to_net_u32 (sequence_number); + + *offset = + sizeof (ip4_header_t) + sizeof (udp_header_t) + + sizeof (nat_ha_message_header_t); +} + +static inline void +nat_ha_send (vlib_frame_t * f, vlib_buffer_t * b, u8 is_resync, + u32 thread_index) +{ + nat_ha_main_t *ha = &nat_ha_main; + nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index]; + nat_ha_message_header_t *h; + ip4_header_t *ip; + udp_header_t *udp; + vlib_main_t *vm = vlib_mains[thread_index]; + + ip = vlib_buffer_get_current (b); + udp = ip4_next_header (ip); + h = (nat_ha_message_header_t *) (udp + 1); + + h->count = clib_host_to_net_u16 (td->state_sync_count); + + ip->length = clib_host_to_net_u16 (b->current_length); + ip->checksum = ip4_header_checksum (ip); + udp->length = clib_host_to_net_u16 (b->current_length - sizeof (*ip)); + + nat_ha_resend_queue_add (h->sequence_number, (u8 *) ip, b->current_length, + is_resync, thread_index); + + vlib_put_frame_to_node (vm, ip4_lookup_node.index, f); +} + +/* add NAT HA protocol event */ +static_always_inline void +nat_ha_event_add (nat_ha_event_t * event, u8 do_flush, u32 thread_index, + u8 is_resync) +{ + nat_ha_main_t *ha = &nat_ha_main; + nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index]; + vlib_main_t *vm = vlib_mains[thread_index]; + vlib_buffer_t *b = 0; + vlib_frame_t *f; + u32 bi = ~0, offset; + + b = td->state_sync_buffer; + + if (PREDICT_FALSE (b == 0)) + { + if (do_flush) + return; + + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + { + nat_elog_warn ("HA NAT state sync can't allocate buffer"); + return; + } + + b = td->state_sync_buffer = vlib_get_buffer (vm, bi); + clib_memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b))); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); + offset = 0; + } + else + { + bi = vlib_get_buffer_index (vm, b); + offset = td->state_sync_next_event_offset; + } + + f = td->state_sync_frame; + if (PREDICT_FALSE (f == 0)) + { + u32 *to_next; + f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); + td->state_sync_frame = f; + to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + } + + if (PREDICT_FALSE (td->state_sync_count == 0)) + nat_ha_header_create (b, &offset, thread_index); + + if (PREDICT_TRUE (do_flush == 0)) + { + clib_memcpy_fast (b->data + offset, event, sizeof (*event)); + offset += sizeof (*event); + td->state_sync_count++; + b->current_length += sizeof (*event); + + switch (event->event_type) + { + case NAT_HA_ADD: + vlib_increment_simple_counter (&ha->counters + [NAT_HA_COUNTER_SEND_ADD], + thread_index, 0, 1); + break; + case NAT_HA_DEL: + vlib_increment_simple_counter (&ha->counters + [NAT_HA_COUNTER_SEND_DEL], + thread_index, 0, 1); + break; + case NAT_HA_REFRESH: + vlib_increment_simple_counter (&ha->counters + [NAT_HA_COUNTER_SEND_REFRESH], + thread_index, 0, 1); + break; + default: + break; + } + } + + if (PREDICT_FALSE + (do_flush || offset + (sizeof (*event)) > ha->state_sync_path_mtu)) + { + nat_ha_send (f, b, is_resync, thread_index); + td->state_sync_buffer = 0; + td->state_sync_frame = 0; + td->state_sync_count = 0; + offset = 0; + if (is_resync) + { + clib_atomic_fetch_add (&ha->resync_ack_count, 1); + nat_ha_resync_fin (); + } + } + + td->state_sync_next_event_offset = offset; +} + +#define skip_if_disabled() \ +do { \ + nat_ha_main_t *ha = &nat_ha_main; \ + if (PREDICT_TRUE (!ha->dst_port)) \ + return; \ +} while (0) + +void +nat_ha_flush (u8 is_resync) +{ + skip_if_disabled (); + nat_ha_event_add (0, 1, 0, is_resync); +} + +void +nat_ha_sadd (ip4_address_t * in_addr, u16 in_port, ip4_address_t * out_addr, + u16 out_port, ip4_address_t * eh_addr, u16 eh_port, + ip4_address_t * ehn_addr, u16 ehn_port, u8 proto, u32 fib_index, + u16 flags, u32 thread_index, u8 is_resync) +{ + nat_ha_event_t event; + + skip_if_disabled (); + + clib_memset (&event, 0, sizeof (event)); + event.event_type = NAT_HA_ADD; + event.flags = clib_host_to_net_u16 (flags); + event.in_addr = in_addr->as_u32; + event.in_port = in_port; + event.out_addr = out_addr->as_u32; + event.out_port = out_port; + event.eh_addr = eh_addr->as_u32; + event.eh_port = eh_port; + event.ehn_addr = ehn_addr->as_u32; + event.ehn_port = ehn_port; + event.fib_index = clib_host_to_net_u32 (fib_index); + event.protocol = proto; + nat_ha_event_add (&event, 0, thread_index, is_resync); +} + +void +nat_ha_sdel (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr, + u16 eh_port, u8 proto, u32 fib_index, u32 thread_index) +{ + nat_ha_event_t event; + + skip_if_disabled (); + + clib_memset (&event, 0, sizeof (event)); + event.event_type = NAT_HA_DEL; + event.out_addr = out_addr->as_u32; + event.out_port = out_port; + event.eh_addr = eh_addr->as_u32; + event.eh_port = eh_port; + event.fib_index = clib_host_to_net_u32 (fib_index); + event.protocol = proto; + nat_ha_event_add (&event, 0, thread_index, 0); +} + +void +nat_ha_sref (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr, + u16 eh_port, u8 proto, u32 fib_index, u32 total_pkts, + u64 total_bytes, u32 thread_index, f64 * last_refreshed, f64 now) +{ + nat_ha_main_t *ha = &nat_ha_main; + nat_ha_event_t event; + + skip_if_disabled (); + + if ((*last_refreshed + ha->session_refresh_interval) > now) + return; + + *last_refreshed = now; + clib_memset (&event, 0, sizeof (event)); + event.event_type = NAT_HA_REFRESH; + event.out_addr = out_addr->as_u32; + event.out_port = out_port; + event.eh_addr = eh_addr->as_u32; + event.eh_port = eh_port; + event.fib_index = clib_host_to_net_u32 (fib_index); + event.protocol = proto; + event.total_pkts = clib_host_to_net_u32 (total_pkts); + event.total_bytes = clib_host_to_net_u64 (total_bytes); + nat_ha_event_add (&event, 0, thread_index, 0); +} + +static_always_inline u8 +plugin_enabled () +{ + nat_ha_main_t *ha = &nat_ha_main; + return ha->enabled; +} + +/* per thread process waiting for interrupt */ +static uword +nat_ha_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + u32 thread_index = vm->thread_index; + + if (plugin_enabled () == 0) + return 0; + + /* flush HA NAT data under construction */ + nat_ha_event_add (0, 1, thread_index, 0); + /* scan if we need to resend some non-ACKed data */ + nat_ha_resend_scan (vlib_time_now (vm), thread_index); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat_ha_worker_node) = { + .function = nat_ha_worker_fn, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .name = "nat-ha-worker", +}; +/* *INDENT-ON* */ + +/* periodically send interrupt to each thread */ +static uword +nat_ha_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + nat_ha_main_t *ha = &nat_ha_main; + uword event_type; + uword *event_data = 0; + u32 ti; + + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + if (event_type) + nat_elog_info ("nat-ha-process: bogus kickoff event received"); + vec_reset_length (event_data); + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, 1.0); + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + for (ti = 0; ti < vec_len (vlib_mains); ti++) + { + if (ti >= vec_len (ha->per_thread_data)) + continue; + + vlib_node_set_interrupt_pending (vlib_mains[ti], + nat_ha_worker_node.index); + } + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat_ha_process_node) = { + .function = nat_ha_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "nat-ha-process", +}; +/* *INDENT-ON* */ + +void +nat_ha_get_resync_status (u8 * in_resync, u32 * resync_ack_missed) +{ + nat_ha_main_t *ha = &nat_ha_main; + + *in_resync = ha->in_resync; + *resync_ack_missed = ha->resync_ack_missed; +} + +typedef struct +{ + ip4_address_t addr; + u32 event_count; +} nat_ha_trace_t; + +static u8 * +format_nat_ha_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat_ha_trace_t *t = va_arg (*args, nat_ha_trace_t *); + + s = + format (s, "nat-ha: %u events from %U", t->event_count, + format_ip4_address, &t->addr); + + return s; +} + +typedef enum +{ + NAT_HA_NEXT_IP4_LOOKUP, + NAT_HA_NEXT_DROP, + NAT_HA_N_NEXT, +} nat_ha_next_t; + +#define foreach_nat_ha_error \ +_(PROCESSED, "pkts-processed") \ +_(BAD_VERSION, "bad-version") + +typedef enum +{ +#define _(sym, str) NAT_HA_ERROR_##sym, + foreach_nat_ha_error +#undef _ + NAT_HA_N_ERROR, +} nat_ha_error_t; + +static char *nat_ha_error_strings[] = { +#define _(sym, str) str, + foreach_nat_ha_error +#undef _ +}; + +/* process received HA NAT protocol messages */ +static uword +nat_ha_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next; + f64 now = vlib_time_now (vm); + u32 thread_index = vm->thread_index; + u32 pkts_processed = 0; + ip4_main_t *i4m = &ip4_main; + u8 host_config_ttl = i4m->host_config.ttl; + nat_ha_main_t *ha = &nat_ha_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, next0, src_addr0, dst_addr0;; + vlib_buffer_t *b0; + nat_ha_message_header_t *h0; + nat_ha_event_t *e0; + u16 event_count0, src_port0, dst_port0, old_len0; + ip4_header_t *ip0; + udp_header_t *udp0; + ip_csum_t sum0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + vlib_buffer_advance (b0, -sizeof (*udp0)); + udp0 = vlib_buffer_get_current (b0); + vlib_buffer_advance (b0, -sizeof (*ip0)); + ip0 = vlib_buffer_get_current (b0); + + next0 = NAT_HA_NEXT_DROP; + + if (h0->version != NAT_HA_VERSION) + { + b0->error = node->errors[NAT_HA_ERROR_BAD_VERSION]; + goto done0; + } + + event_count0 = clib_net_to_host_u16 (h0->count); + /* ACK for previously send data */ + if (!event_count0 && (h0->flags & NAT_HA_FLAG_ACK)) + { + nat_ha_ack_recv (h0->sequence_number, thread_index); + b0->error = node->errors[NAT_HA_ERROR_PROCESSED]; + goto done0; + } + + e0 = (nat_ha_event_t *) (h0 + 1); + + /* process each event */ + while (event_count0) + { + nat_ha_event_process (e0, now, thread_index); + event_count0--; + e0 = (nat_ha_event_t *) ((u8 *) e0 + sizeof (nat_ha_event_t)); + } + + next0 = NAT_HA_NEXT_IP4_LOOKUP; + pkts_processed++; + + /* reply with ACK */ + b0->current_length = sizeof (*ip0) + sizeof (*udp0) + sizeof (*h0); + + src_addr0 = ip0->src_address.data_u32; + dst_addr0 = ip0->dst_address.data_u32; + ip0->src_address.data_u32 = dst_addr0; + ip0->dst_address.data_u32 = src_addr0; + old_len0 = ip0->length; + ip0->length = clib_host_to_net_u16 (b0->current_length); + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl, + ip4_header_t, ttl); + ip0->ttl = host_config_ttl; + sum0 = + ip_csum_update (sum0, old_len0, ip0->length, ip4_header_t, + length); + ip0->checksum = ip_csum_fold (sum0); + + udp0->checksum = 0; + src_port0 = udp0->src_port; + dst_port0 = udp0->dst_port; + udp0->src_port = dst_port0; + udp0->dst_port = src_port0; + udp0->length = + clib_host_to_net_u16 (b0->current_length - sizeof (*ip0)); + + h0->flags = NAT_HA_FLAG_ACK; + h0->count = 0; + vlib_increment_simple_counter (&ha->counters + [NAT_HA_COUNTER_SEND_ACK], + thread_index, 0, 1); + + done0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + nat_ha_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + ip4_header_t *ip = + (void *) (b0->data + vnet_buffer (b0)->l3_hdr_offset); + t->event_count = clib_net_to_host_u16 (h0->count); + t->addr.as_u32 = ip->src_address.data_u32; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ha->ha_node_index, NAT_HA_ERROR_PROCESSED, + pkts_processed); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat_ha_node) = { + .function = nat_ha_node_fn, + .name = "nat-ha", + .vector_size = sizeof (u32), + .format_trace = format_nat_ha_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (nat_ha_error_strings), + .error_strings = nat_ha_error_strings, + .n_next_nodes = NAT_HA_N_NEXT, + .next_nodes = { + [NAT_HA_NEXT_IP4_LOOKUP] = "ip4-lookup", + [NAT_HA_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +typedef struct +{ + u32 next_worker_index; + u8 in2out; +} nat_ha_handoff_trace_t; + +#define foreach_nat_ha_handoff_error \ +_(CONGESTION_DROP, "congestion drop") \ +_(SAME_WORKER, "same worker") \ +_(DO_HANDOFF, "do handoff") + +typedef enum +{ +#define _(sym,str) NAT_HA_HANDOFF_ERROR_##sym, + foreach_nat_ha_handoff_error +#undef _ + NAT_HA_HANDOFF_N_ERROR, +} nat_ha_handoff_error_t; + +static char *nat_ha_handoff_error_strings[] = { +#define _(sym,string) string, + foreach_nat_ha_handoff_error +#undef _ +}; + +static u8 * +format_nat_ha_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nat_ha_handoff_trace_t *t = va_arg (*args, nat_ha_handoff_trace_t *); + + s = + format (s, "NAT_HA_WORKER_HANDOFF: next-worker %d", t->next_worker_index); + + return s; +} + +/* do worker handoff based on thread_index in NAT HA protcol header */ +static uword +nat_ha_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + nat_ha_main_t *ha = &nat_ha_main; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u32 n_enq, n_left_from, *from; + u16 thread_indices[VLIB_FRAME_SIZE], *ti; + u32 thread_index = vm->thread_index; + u32 do_handoff = 0, same_worker = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + vlib_get_buffers (vm, from, bufs, n_left_from); + + b = bufs; + ti = thread_indices; + + while (n_left_from > 0) + { + nat_ha_message_header_t *h0; + + h0 = vlib_buffer_get_current (b[0]); + ti[0] = clib_net_to_host_u32 (h0->thread_index); + + if (ti[0] != thread_index) + do_handoff++; + else + same_worker++; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b[0]->flags & VLIB_BUFFER_IS_TRACED))) + { + nat_ha_handoff_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->next_worker_index = ti[0]; + } + + n_left_from -= 1; + ti += 1; + b += 1; + } + + n_enq = + vlib_buffer_enqueue_to_thread (vm, ha->fq_index, from, thread_indices, + frame->n_vectors, 1); + + if (n_enq < frame->n_vectors) + vlib_node_increment_counter (vm, node->node_index, + NAT_HA_HANDOFF_ERROR_CONGESTION_DROP, + frame->n_vectors - n_enq); + vlib_node_increment_counter (vm, node->node_index, + NAT_HA_HANDOFF_ERROR_SAME_WORKER, same_worker); + vlib_node_increment_counter (vm, node->node_index, + NAT_HA_HANDOFF_ERROR_DO_HANDOFF, do_handoff); + return frame->n_vectors; +} + +int +nat_ha_resync (u32 client_index, u32 pid, + nat_ha_resync_event_cb_t event_callback) +{ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (nat_ha_handoff_node) = { + .function = nat_ha_handoff_node_fn, + .name = "nat-ha-handoff", + .vector_size = sizeof (u32), + .format_trace = format_nat_ha_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(nat_ha_handoff_error_strings), + .error_strings = nat_ha_handoff_error_strings, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha.h b/src/plugins/nat/nat44-ei/nat44_ei_ha.h new file mode 100644 index 00000000000..c466d4c9288 --- /dev/null +++ b/src/plugins/nat/nat44-ei/nat44_ei_ha.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT active-passive HA + */ + +#ifndef __included_nat_ha_h__ +#define __included_nat_ha_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> + +/* Call back functions for received HA events on passive/failover */ +typedef void (*nat_ha_sadd_cb_t) (ip4_address_t * in_addr, u16 in_port, + ip4_address_t * out_addr, u16 out_port, + ip4_address_t * eh_addr, u16 eh_port, + ip4_address_t * ehn_addr, u16 ehn_port, + u8 proto, u32 fib_index, u16 flags, + u32 thread_index); +typedef void (*nat_ha_sdel_cb_t) (ip4_address_t * out_addr, u16 out_port, + ip4_address_t * eh_addr, u16 eh_port, + u8 proto, u32 fib_index, u32 thread_index); +typedef void (*nat_ha_sref_cb_t) (ip4_address_t * out_addr, u16 out_port, + ip4_address_t * eh_addr, u16 eh_port, + u8 proto, u32 fib_index, u32 total_pkts, + u64 total_bytes, u32 thread_index); + +/** + * @brief Enable NAT HA + */ +void nat_ha_enable (); + +/** + * @brief Disable NAT HA + */ +void nat_ha_disable (); + +/** + * @brief Initialize NAT HA + */ +void nat_ha_init (vlib_main_t * vm, u32 num_workers, u32 num_threads); + +/** + * @brief Set HA listener (local settings) + * + * @param addr local IP4 address + * @param port local UDP port number + * @param path_mtu path MTU between local and failover + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat_ha_set_listener (ip4_address_t * addr, u16 port, u32 path_mtu); + +/** + * @brief Get HA listener/local configuration + */ +void nat_ha_get_listener (ip4_address_t * addr, u16 * port, u32 * path_mtu); + +/** + * @brief Set HA failover (remote settings) + * + * @param addr failover IP4 address + * @param port failvoer UDP port number + * @param session_refresh_interval number of seconds after which to send + * session counters refresh + * + * @returns 0 on success, non-zero value otherwise. + */ +int nat_ha_set_failover (ip4_address_t * addr, u16 port, + u32 session_refresh_interval); + +/** + * @brief Get HA failover/remote settings + */ +void nat_ha_get_failover (ip4_address_t * addr, u16 * port, + u32 * session_refresh_interval); + +/** + * @brief Create session add HA event + * + * @param in_addr inside IPv4 address + * @param in_port inside L4 port number + * @param out_addr outside IPv4 address + * @param out_port outside L4 port number + * @param eh_addr external host IPv4 address + * @param eh_port external host L4 port number + * @param ehn_addr external host IPv4 address after translation + * @param ehn_port external host L4 port number after translation + * @param proto L4 protocol + * @param fib_index fib index + * @param flags session flags + * @param thread_index thread index + * @param is_resync 1 if HA resync + */ +void nat_ha_sadd (ip4_address_t * in_addr, u16 in_port, + ip4_address_t * out_addr, u16 out_port, + ip4_address_t * eh_addr, u16 eh_port, + ip4_address_t * ehn_addr, u16 ehn_port, u8 proto, + u32 fib_index, u16 flags, u32 thread_index, u8 is_resync); + +/** + * @brief Create session delete HA event + * + * @param out_addr outside IPv4 address + * @param out_port outside L4 port number + * @param eh_addr external host IPv4 address + * @param eh_port external host L4 port number + * @param proto L4 protocol + * @param fib_index fib index + * @param thread_index thread index + */ +void nat_ha_sdel (ip4_address_t * out_addr, u16 out_port, + ip4_address_t * eh_addr, u16 eh_port, u8 proto, + u32 fib_index, u32 thread_index); + +/** + * @brief Create session refresh HA event + * + * @param out_addr outside IPv4 address + * @param out_port outside L4 port number + * @param eh_addr external host IPv4 address + * @param eh_port external host L4 port number + * @param proto L4 protocol + * @param fib_index fib index + * @param total_pkts total packets processed + * @param total_bytes total bytes processed + * @param thread_index thread index + * @param last_refreshed last session refresh time + * @param now current time + */ +void nat_ha_sref (ip4_address_t * out_addr, u16 out_port, + ip4_address_t * eh_addr, u16 eh_port, u8 proto, + u32 fib_index, u32 total_pkts, u64 total_bytes, + u32 thread_index, f64 * last_refreshed, f64 now); + +/** + * @brief Flush the current HA data (for testing) + */ +void nat_ha_flush (u8 is_resync); + +typedef void (*nat_ha_resync_event_cb_t) (u32 client_index, u32 pid, + u32 missed_count); + +/** + * @brief Resync HA (resend existing sessions to new failover) + */ +int nat_ha_resync (u32 client_index, u32 pid, + nat_ha_resync_event_cb_t event_callback); + +/** + * @brief Get resync status + * + * @param in_resync 1 if resync in progress + * @param resync_ack_missed number of missed (not ACKed) messages + */ +void nat_ha_get_resync_status (u8 * in_resync, u32 * resync_ack_missed); + +#endif /* __included_nat_ha_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md b/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md new file mode 100644 index 00000000000..f0ea209e250 --- /dev/null +++ b/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md @@ -0,0 +1,70 @@ +# Active-Passive NAT HA {#nat_ha_doc} + +## Introduction + +One NAT node actively manages traffic while the other is synchronized and ready to transition to the active state and takes over seamlessly and enforces the same NAT sessions when failure occur. Both nodes share the same configuration settings. + +## Configuration + +### NAT HA protocol +Session synchronization traffic is distributed through an IPv4 UDP connection. The active node sends NAT HA protocol events to passive node. To achieve reliable transfer NAT HA protocol uses acknowledgement with re-transmission. This require the passive node to respond with an acknowledgement message as it receives the data. The active node keeps a record of each packet it sends and maintains a timer from when the packet was sent. The active node re-transmits a packet if the timer expires before receiving the acknowledgement. + +### Topology + +The two NAT nodes have a dedicated link (interface GE0/0/3 on both) to synchronize NAT sessions using NAT HA protocol. + +``` + +-----------------------+ + | outside network | + +-----------------------+ + / \ + / \ + / \ + / \ + / \ ++---------+ +---------+ +| GE0/0/1 | Active Passive | GE0/0/1 | +| | | | +| GE0/0/3|-------------------|GE0/0/3 | +| | sync network | | +| GE0/0/0 | | GE0/0/0 | ++---------+ +---------+ + \ / + \ / + \ / + \ / + \ / + +-----------------------+ + | inside network | + +-----------------------+ +``` + +### Active node configuration + +``` +set interface ip address GigabitEthernet0/0/1 10.15.7.101/24 +set interface ip address GigabitEthernet0/0/0 172.16.10.101/24 +set interface ip address GigabitEthernet0/0/3 10.0.0.1/24 +set interface state GigabitEthernet0/0/0 up +set interface state GigabitEthernet0/0/1 up +set interface state GigabitEthernet0/0/3 up +set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1 +nat44 add address 10.15.7.100 +nat ha listener 10.0.0.1:1234 +nat ha failover 10.0.0.2:2345 +``` + +### Passive node configuration + +``` +set interface ip address GigabitEthernet0/0/1 10.15.7.102/24 +set interface ip address GigabitEthernet0/0/0 172.16.10.102/24 +set interface ip address GigabitEthernet0/0/3 10.0.0.2/24 +set interface state GigabitEthernet0/0/0 up +set interface state GigabitEthernet0/0/1 up +set interface state GigabitEthernet0/0/3 up +set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1 +nat44 add address 10.15.7.100 +nat ha listener 10.0.0.2:2345 +``` + diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c new file mode 100644 index 00000000000..1c341318957 --- /dev/null +++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c @@ -0,0 +1,1972 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT44 inside to outside network translation + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/udp/udp_local.h> +#include <nat/nat.h> +#include <nat/lib/ipfix_logging.h> +#include <nat/nat_inlines.h> +#include <nat/lib/nat_syslog.h> +#include <nat/nat44-ei/nat44_ei_inlines.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> +#include <nat/lib/nat_inlines.h> + +typedef struct +{ + u32 sw_if_index; + u32 next_index; + u32 session_index; + u32 is_slow_path; + u32 is_hairpinning; +} snat_in2out_trace_t; + +/* packet trace format function */ +static u8 * +format_snat_in2out_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_in2out_trace_t *t = va_arg (*args, snat_in2out_trace_t *); + char *tag; + + tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH"; + + s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, + t->sw_if_index, t->next_index, t->session_index); + if (t->is_hairpinning) + { + s = format (s, ", with-hairpinning"); + } + + return s; +} + +static u8 * +format_snat_in2out_fast_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_in2out_trace_t *t = va_arg (*args, snat_in2out_trace_t *); + + s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + + return s; +} + +#define foreach_snat_in2out_error \ +_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ +_(OUT_OF_PORTS, "out of ports") \ +_(BAD_OUTSIDE_FIB, "outside VRF ID not found") \ +_(BAD_ICMP_TYPE, "unsupported ICMP type") \ +_(NO_TRANSLATION, "no translation") \ +_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ +_(CANNOT_CREATE_USER, "cannot create NAT user") + +typedef enum +{ +#define _(sym,str) SNAT_IN2OUT_ERROR_##sym, + foreach_snat_in2out_error +#undef _ + SNAT_IN2OUT_N_ERROR, +} snat_in2out_error_t; + +static char *snat_in2out_error_strings[] = { +#define _(sym,string) string, + foreach_snat_in2out_error +#undef _ +}; + +typedef enum +{ + SNAT_IN2OUT_NEXT_LOOKUP, + SNAT_IN2OUT_NEXT_DROP, + SNAT_IN2OUT_NEXT_ICMP_ERROR, + SNAT_IN2OUT_NEXT_SLOW_PATH, + SNAT_IN2OUT_N_NEXT, +} snat_in2out_next_t; + +static inline int +snat_not_translate (snat_main_t * sm, vlib_node_runtime_t * node, + u32 sw_if_index0, ip4_header_t * ip0, u32 proto0, + u32 rx_fib_index0, u32 thread_index) +{ + udp_header_t *udp0 = ip4_next_header (ip0); + clib_bihash_kv_8_8_t kv0, value0; + + init_nat_k (&kv0, ip0->dst_address, udp0->dst_port, sm->outside_fib_index, + proto0); + + /* NAT packet aimed at external address if */ + /* has active sessions */ + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, + &value0)) + { + /* or is static mappings */ + ip4_address_t placeholder_addr; + u16 placeholder_port; + u32 placeholder_fib_index; + if (!snat_static_mapping_match + (sm, ip0->dst_address, udp0->dst_port, sm->outside_fib_index, + proto0, &placeholder_addr, &placeholder_port, + &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0)) + return 0; + } + else + return 0; + + if (sm->forwarding_enabled) + return 1; + + return snat_not_translate_fast (sm, node, sw_if_index0, ip0, proto0, + rx_fib_index0); +} + +static inline int +nat_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip0, + u32 proto0, u16 src_port, u16 dst_port, + u32 thread_index, u32 sw_if_index) +{ + clib_bihash_kv_8_8_t kv0, value0; + snat_interface_t *i; + + /* src NAT check */ + init_nat_k (&kv0, ip0->src_address, src_port, + ip4_fib_table_get_index_for_sw_if_index (sw_if_index), proto0); + + if (!clib_bihash_search_8_8 + (&sm->per_thread_data[thread_index].out2in, &kv0, &value0)) + return 1; + + /* dst NAT check */ + init_nat_k (&kv0, ip0->dst_address, dst_port, + ip4_fib_table_get_index_for_sw_if_index (sw_if_index), proto0); + if (!clib_bihash_search_8_8 + (&sm->per_thread_data[thread_index].in2out, &kv0, &value0)) + { + /* hairpinning */ + /* *INDENT-OFF* */ + pool_foreach (i, sm->output_feature_interfaces) + { + if ((nat_interface_is_inside(i)) && (sw_if_index == i->sw_if_index)) + return 0; + } + /* *INDENT-ON* */ + return 1; + } + + return 0; +} + +#ifndef CLIB_MARCH_VARIANT +int +nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg) +{ + snat_main_t *sm = &snat_main; + nat44_is_idle_session_ctx_t *ctx = arg; + snat_session_t *s; + u64 sess_timeout_time; + snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data, + ctx->thread_index); + clib_bihash_kv_8_8_t s_kv; + + s = pool_elt_at_index (tsm->sessions, kv->value); + sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); + if (ctx->now >= sess_timeout_time) + { + init_nat_o2i_k (&s_kv, s); + if (clib_bihash_add_del_8_8 (&tsm->out2in, &s_kv, 0)) + nat_elog_warn ("out2in key del failed"); + + nat_ipfix_logging_nat44_ses_delete (ctx->thread_index, + s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->nat_proto, + s->in2out.port, + s->out2in.port, + s->in2out.fib_index); + + nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index, + &s->in2out.addr, s->in2out.port, + &s->out2in.addr, s->out2in.port, s->nat_proto); + + nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr, + s->ext_host_port, s->nat_proto, s->out2in.fib_index, + ctx->thread_index); + + if (!snat_is_session_static (s)) + snat_free_outside_address_and_port (sm->addresses, ctx->thread_index, + &s->out2in.addr, + s->out2in.port, s->nat_proto); + + nat44_delete_session (sm, s, ctx->thread_index); + return 1; + } + + return 0; +} +#endif + +static u32 +slow_path (snat_main_t * sm, vlib_buffer_t * b0, + ip4_header_t * ip0, + ip4_address_t i2o_addr, + u16 i2o_port, + u32 rx_fib_index0, + nat_protocol_t nat_proto, + snat_session_t ** sessionp, + vlib_node_runtime_t * node, u32 next0, u32 thread_index, f64 now) +{ + snat_user_t *u; + snat_session_t *s = 0; + clib_bihash_kv_8_8_t kv0; + u8 is_sm = 0; + nat_outside_fib_t *outside_fib; + fib_node_index_t fei = FIB_NODE_INDEX_INVALID; + u8 identity_nat; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + .fp_addr = { + .ip4.as_u32 = ip0->dst_address.as_u32, + }, + }; + nat44_is_idle_session_ctx_t ctx0; + ip4_address_t sm_addr; + u16 sm_port; + u32 sm_fib_index; + + if (PREDICT_FALSE (nat44_ei_maximum_sessions_exceeded (sm, thread_index))) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_ipfix_logging_max_sessions (thread_index, + sm->max_translations_per_thread); + nat_elog_notice ("maximum sessions exceeded"); + return SNAT_IN2OUT_NEXT_DROP; + } + + /* First try to match static mapping by local address and port */ + if (snat_static_mapping_match + (sm, i2o_addr, i2o_port, rx_fib_index0, nat_proto, &sm_addr, + &sm_port, &sm_fib_index, 0, 0, 0, 0, 0, &identity_nat, 0)) + { + /* Try to create dynamic translation */ + if (sm->alloc_addr_and_port ( + sm->addresses, rx_fib_index0, thread_index, nat_proto, &sm_addr, + &sm_port, sm->port_per_thread, + sm->per_thread_data[thread_index].snat_thread_index)) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + return SNAT_IN2OUT_NEXT_DROP; + } + } + else + { + if (PREDICT_FALSE (identity_nat)) + { + *sessionp = s; + return next0; + } + + is_sm = 1; + } + + u = nat_user_get_or_create (sm, &ip0->src_address, rx_fib_index0, + thread_index); + if (!u) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_CANNOT_CREATE_USER]; + return SNAT_IN2OUT_NEXT_DROP; + } + + s = nat_session_alloc_or_recycle (sm, u, thread_index, now); + if (!s) + { + nat44_delete_user_with_no_session (sm, u, thread_index); + nat_elog_warn ("create NAT session failed"); + return SNAT_IN2OUT_NEXT_DROP; + } + + if (is_sm) + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + user_session_increment (sm, u, is_sm); + s->in2out.addr = i2o_addr; + s->in2out.port = i2o_port; + s->in2out.fib_index = rx_fib_index0; + s->nat_proto = nat_proto; + s->out2in.addr = sm_addr; + s->out2in.port = sm_port; + s->out2in.fib_index = sm->outside_fib_index; + switch (vec_len (sm->outside_fibs)) + { + case 0: + s->out2in.fib_index = sm->outside_fib_index; + break; + case 1: + s->out2in.fib_index = sm->outside_fibs[0].fib_index; + break; + default: + /* *INDENT-OFF* */ + vec_foreach (outside_fib, sm->outside_fibs) + { + fei = fib_table_lookup (outside_fib->fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + { + if (fib_entry_get_resolving_interface (fei) != ~0) + { + s->out2in.fib_index = outside_fib->fib_index; + break; + } + } + } + /* *INDENT-ON* */ + break; + } + s->ext_host_addr.as_u32 = ip0->dst_address.as_u32; + s->ext_host_port = vnet_buffer (b0)->ip.reass.l4_dst_port; + *sessionp = s; + + /* Add to translation hashes */ + ctx0.now = now; + ctx0.thread_index = thread_index; + init_nat_i2o_kv (&kv0, s, s - sm->per_thread_data[thread_index].sessions); + if (clib_bihash_add_or_overwrite_stale_8_8 + (&sm->per_thread_data[thread_index].in2out, &kv0, + nat44_i2o_is_idle_session_cb, &ctx0)) + nat_elog_notice ("in2out key add failed"); + + init_nat_o2i_kv (&kv0, s, s - sm->per_thread_data[thread_index].sessions); + if (clib_bihash_add_or_overwrite_stale_8_8 + (&sm->per_thread_data[thread_index].out2in, &kv0, + nat44_o2i_is_idle_session_cb, &ctx0)) + nat_elog_notice ("out2in key add failed"); + + /* log NAT event */ + nat_ipfix_logging_nat44_ses_create (thread_index, + s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->nat_proto, + s->in2out.port, + s->out2in.port, s->in2out.fib_index); + + nat_syslog_nat44_apmadd (s->user_index, s->in2out.fib_index, + &s->in2out.addr, s->in2out.port, &s->out2in.addr, + s->out2in.port, s->nat_proto); + + nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr, + s->out2in.port, &s->ext_host_addr, s->ext_host_port, + &s->ext_host_nat_addr, s->ext_host_nat_port, + s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0); + + return next0; +} + +#ifndef CLIB_MARCH_VARIANT +static_always_inline snat_in2out_error_t +icmp_get_key (vlib_buffer_t * b, ip4_header_t * ip0, + ip4_address_t * addr, u16 * port, nat_protocol_t * nat_proto) +{ + icmp46_header_t *icmp0; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0 = 0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + echo0 = (icmp_echo_header_t *) (icmp0 + 1); + + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + { + *nat_proto = NAT_PROTOCOL_ICMP; + *addr = ip0->src_address; + *port = vnet_buffer (b)->ip.reass.l4_src_port; + } + else + { + inner_ip0 = (ip4_header_t *) (echo0 + 1); + l4_header = ip4_next_header (inner_ip0); + *nat_proto = ip_proto_to_nat_proto (inner_ip0->protocol); + *addr = inner_ip0->dst_address; + switch (*nat_proto) + { + case NAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t *) l4_header; + inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); + *port = inner_echo0->identifier; + break; + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + *port = ((tcp_udp_header_t *) l4_header)->dst_port; + break; + default: + return SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL; + } + } + return -1; /* success */ +} + +/** + * Get address and port values to be used for ICMP packet translation + * and create session if needed + * + * @param[in,out] sm NAT main + * @param[in,out] node NAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[in,out] ip0 ip header + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 +icmp_match_in2out_slow (snat_main_t * sm, vlib_node_runtime_t * node, + u32 thread_index, vlib_buffer_t * b0, + ip4_header_t * ip0, ip4_address_t * addr, u16 * port, + u32 * fib_index, nat_protocol_t * proto, void *d, + void *e, u8 * dont_translate) +{ + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + u32 sw_if_index0; + snat_session_t *s0 = 0; + clib_bihash_kv_8_8_t kv0, value0; + u32 next0 = ~0; + int err; + vlib_main_t *vm = vlib_get_main (); + *dont_translate = 0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + *fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + err = icmp_get_key (b0, ip0, addr, port, proto); + if (err != -1) + { + b0->error = node->errors[err]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + init_nat_k (&kv0, *addr, *port, *fib_index, *proto); + if (clib_bihash_search_8_8 (&tsm->in2out, &kv0, &value0)) + { + if (vnet_buffer (b0)->sw_if_index[VLIB_TX] != ~0) + { + if (PREDICT_FALSE + (nat_not_translate_output_feature + (sm, ip0, *proto, *port, *port, thread_index, sw_if_index0))) + { + *dont_translate = 1; + goto out; + } + } + else + { + if (PREDICT_FALSE (snat_not_translate (sm, node, sw_if_index0, + ip0, NAT_PROTOCOL_ICMP, + *fib_index, thread_index))) + { + *dont_translate = 1; + goto out; + } + } + + if (PREDICT_FALSE + (icmp_type_is_error_message + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + next0 = + slow_path (sm, b0, ip0, *addr, *port, *fib_index, *proto, &s0, node, + next0, thread_index, vlib_time_now (vm)); + + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) + goto out; + + if (!s0) + { + *dont_translate = 1; + goto out; + } + } + else + { + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request + && vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + s0 = pool_elt_at_index (tsm->sessions, value0.value); + } + +out: + if (s0) + { + *addr = s0->out2in.addr; + *port = s0->out2in.port; + *fib_index = s0->out2in.fib_index; + } + if (d) + *(snat_session_t **) (d) = s0; + return next0; +} +#endif + +#ifndef CLIB_MARCH_VARIANT +/** + * Get address and port values to be used for ICMP packet translation + * + * @param[in] sm NAT main + * @param[in,out] node NAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[in,out] ip0 ip header + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 +icmp_match_in2out_fast (snat_main_t * sm, vlib_node_runtime_t * node, + u32 thread_index, vlib_buffer_t * b0, + ip4_header_t * ip0, ip4_address_t * addr, u16 * port, + u32 * fib_index, nat_protocol_t * proto, void *d, + void *e, u8 * dont_translate) +{ + u32 sw_if_index0; + u8 is_addr_only; + u32 next0 = ~0; + int err; + *dont_translate = 0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + *fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + err = icmp_get_key (b0, ip0, addr, port, proto); + if (err != -1) + { + b0->error = node->errors[err]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + ip4_address_t sm_addr; + u16 sm_port; + u32 sm_fib_index; + + if (snat_static_mapping_match + (sm, *addr, *port, *fib_index, *proto, &sm_addr, &sm_port, + &sm_fib_index, 0, &is_addr_only, 0, 0, 0, 0, 0)) + { + if (PREDICT_FALSE (snat_not_translate_fast (sm, node, sw_if_index0, ip0, + IP_PROTOCOL_ICMP, + *fib_index))) + { + *dont_translate = 1; + goto out; + } + + if (icmp_type_is_error_message + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_request + && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply || !is_addr_only) + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + +out: + return next0; +} +#endif + +#ifndef CLIB_MARCH_VARIANT +u32 +icmp_in2out (snat_main_t * sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, u32 thread_index, void *d, void *e) +{ + vlib_main_t *vm = vlib_get_main (); + ip4_address_t addr; + u16 port; + u32 fib_index; + nat_protocol_t protocol; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + u8 dont_translate; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + u16 old_checksum0, new_checksum0; + ip_csum_t sum0; + u16 checksum0; + u32 next0_tmp; + + echo0 = (icmp_echo_header_t *) (icmp0 + 1); + + next0_tmp = + sm->icmp_match_in2out_cb (sm, node, thread_index, b0, ip0, &addr, &port, + &fib_index, &protocol, d, e, &dont_translate); + if (next0_tmp != ~0) + next0 = next0_tmp; + if (next0 == SNAT_IN2OUT_NEXT_DROP || dont_translate) + goto out; + + if (PREDICT_TRUE (!ip4_is_fragment (ip0))) + { + sum0 = + ip_incremental_checksum_buffer (vm, b0, + (u8 *) icmp0 - + (u8 *) vlib_buffer_get_current (b0), + ntohs (ip0->length) - + ip4_header_bytes (ip0), 0); + checksum0 = ~ip_csum_fold (sum0); + if (PREDICT_FALSE (checksum0 != 0 && checksum0 != 0xffff)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + } + + old_addr0 = ip0->src_address.as_u32; + new_addr0 = ip0->src_address.as_u32 = addr.as_u32; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + src_address /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + if (icmp0->checksum == 0) + icmp0->checksum = 0xffff; + + if (!icmp_type_is_error_message (icmp0->type)) + { + new_id0 = port; + if (PREDICT_FALSE (new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + new_id0 = port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = + ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + } + } + else + { + inner_ip0 = (ip4_header_t *) (echo0 + 1); + l4_header = ip4_next_header (inner_ip0); + + if (!ip4_header_checksum_is_valid (inner_ip0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto out; + } + + /* update inner destination IP address */ + old_addr0 = inner_ip0->dst_address.as_u32; + inner_ip0->dst_address = addr; + new_addr0 = inner_ip0->dst_address.as_u32; + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address /* changed member */ ); + icmp0->checksum = ip_csum_fold (sum0); + + /* update inner IP header checksum */ + old_checksum0 = inner_ip0->checksum; + sum0 = inner_ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address /* changed member */ ); + inner_ip0->checksum = ip_csum_fold (sum0); + new_checksum0 = inner_ip0->checksum; + sum0 = icmp0->checksum; + sum0 = + ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t, + checksum); + icmp0->checksum = ip_csum_fold (sum0); + + switch (protocol) + { + case NAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t *) l4_header; + inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); + + old_id0 = inner_echo0->identifier; + new_id0 = port; + inner_echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = + ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + break; + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + old_id0 = ((tcp_udp_header_t *) l4_header)->dst_port; + new_id0 = port; + ((tcp_udp_header_t *) l4_header)->dst_port = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t, + dst_port); + icmp0->checksum = ip_csum_fold (sum0); + break; + default: + ASSERT (0); + } + } + } + + if (vnet_buffer (b0)->sw_if_index[VLIB_TX] == ~0) + { + if (0 != snat_icmp_hairpinning (sm, b0, ip0, icmp0, + sm->endpoint_dependent)) + vnet_buffer (b0)->sw_if_index[VLIB_TX] = fib_index; + } + +out: + return next0; +} +#endif + +static inline u32 +icmp_in2out_slow_path (snat_main_t * sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, + f64 now, u32 thread_index, snat_session_t ** p_s0) +{ + vlib_main_t *vm = vlib_get_main (); + + next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, thread_index, p_s0, 0); + snat_session_t *s0 = *p_s0; + if (PREDICT_TRUE (next0 != SNAT_IN2OUT_NEXT_DROP && s0)) + { + /* Accounting */ + nat44_ei_session_update_counters ( + s0, now, vlib_buffer_length_in_chain (vm, b0), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + } + return next0; +} + +static int +nat_in2out_sm_unknown_proto (snat_main_t * sm, + vlib_buffer_t * b, + ip4_header_t * ip, u32 rx_fib_index) +{ + clib_bihash_kv_8_8_t kv, value; + snat_static_mapping_t *m; + u32 old_addr, new_addr; + ip_csum_t sum; + + init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) + return 1; + + m = pool_elt_at_index (sm->static_mappings, value.value); + + old_addr = ip->src_address.as_u32; + new_addr = ip->src_address.as_u32 = m->external_addr.as_u32; + sum = ip->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); + ip->checksum = ip_csum_fold (sum); + + + /* Hairpinning */ + if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) + { + vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index; + nat_hairpinning_sm_unknown_proto (sm, b, ip); + } + + return 0; +} + +static inline uword +snat_in2out_node_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_slow_path, + int is_output_feature) +{ + u32 n_left_from, *from; + snat_main_t *sm = &snat_main; + f64 now = vlib_time_now (vm); + u32 thread_index = vm->thread_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + vlib_get_buffers (vm, from, b, n_left_from); + + while (n_left_from >= 2) + { + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ip4_header_t *ip0, *ip1; + ip_csum_t sum0, sum1; + u32 new_addr0, old_addr0, new_addr1, old_addr1; + u16 old_port0, new_port0, old_port1, new_port1; + udp_header_t *udp0, *udp1; + tcp_header_t *tcp0, *tcp1; + icmp46_header_t *icmp0, *icmp1; + u32 rx_fib_index0, rx_fib_index1; + u32 proto0, proto1; + snat_session_t *s0 = 0, *s1 = 0; + clib_bihash_kv_8_8_t kv0, value0, kv1, value1; + u32 iph_offset0 = 0, iph_offset1 = 0; + + b0 = *b; + b++; + b1 = *b; + b++; + + /* Prefetch next iteration. */ + if (PREDICT_TRUE (n_left_from >= 4)) + { + vlib_buffer_t *p2, *p3; + + p2 = *b; + p3 = *(b + 1); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + if (is_output_feature) + iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length; + + ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + + iph_offset0); + + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP; + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace00; + } + + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + /* Next configured feature, probably ip4-lookup */ + if (is_slow_path) + { + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = + node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL]; + } + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out. + other : &sm->counters.fastpath. + in2out.other, thread_index, + sw_if_index0, 1); + goto trace00; + } + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next0 = icmp_in2out_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, + node, next0, now, thread_index, &s0); + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out. + icmp : &sm->counters.fastpath. + in2out.icmp, thread_index, + sw_if_index0, 1); + goto trace00; + } + } + else + { + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } + } + + init_nat_k (&kv0, ip0->src_address, + vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0, + proto0); + if (PREDICT_FALSE + (clib_bihash_search_8_8 + (&sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0)) + { + if (is_slow_path) + { + if (is_output_feature) + { + if (PREDICT_FALSE + (nat_not_translate_output_feature + (sm, ip0, proto0, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + thread_index, sw_if_index0))) + goto trace00; + + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE + (proto0 == NAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_server)) + && ip0->dst_address.as_u32 == 0xffffffff)) + goto trace00; + } + else + { + if (PREDICT_FALSE + (snat_not_translate + (sm, node, sw_if_index0, ip0, proto0, + rx_fib_index0, thread_index))) + goto trace00; + } + + next0 = slow_path (sm, b0, ip0, + ip0->src_address, + vnet_buffer (b0)->ip.reass.l4_src_port, + rx_fib_index0, + proto0, &s0, node, next0, thread_index, now); + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) + goto trace00; + + if (PREDICT_FALSE (!s0)) + goto trace00; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } + } + else + s0 = + pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value0.value); + + b0->flags |= VNET_BUFFER_F_IS_NATED; + + old_addr0 = ip0->src_address.as_u32; + ip0->src_address = s0->out2in.addr; + new_addr0 = ip0->src_address.as_u32; + if (!is_output_feature) + vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, src_address /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + + + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + new_port0 = udp0->src_port = s0->out2in.port; + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + mss_clamping (sm->mss_clamping, tcp0, &sum0); + tcp0->checksum = ip_csum_fold (sum0); + } + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out.tcp : &sm-> + counters.fastpath.in2out.tcp, + thread_index, sw_if_index0, 1); + } + else + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + udp0->src_port = s0->out2in.port; + if (PREDICT_FALSE (udp0->checksum)) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + new_port0 = udp0->src_port; + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ + ); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } + } + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out.udp : &sm-> + counters.fastpath.in2out.udp, + thread_index, sw_if_index0, 1); + } + + /* Accounting */ + nat44_ei_session_update_counters ( + s0, now, vlib_buffer_length_in_chain (vm, b0), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + trace00: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = is_slow_path; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = + s0 - sm->per_thread_data[thread_index].sessions; + } + + if (next0 == SNAT_IN2OUT_NEXT_DROP) + { + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out. + drops : &sm->counters.fastpath. + in2out.drops, thread_index, + sw_if_index0, 1); + } + + if (is_output_feature) + iph_offset1 = vnet_buffer (b1)->ip.reass.save_rewrite_length; + + ip1 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b1) + + iph_offset1); + + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + icmp1 = (icmp46_header_t *) udp1; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index1); + + if (PREDICT_FALSE (ip1->ttl == 1)) + { + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next1 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace01; + } + + proto1 = ip_proto_to_nat_proto (ip1->protocol); + + /* Next configured feature, probably ip4-lookup */ + if (is_slow_path) + { + if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_OTHER)) + { + if (nat_in2out_sm_unknown_proto (sm, b1, ip1, rx_fib_index1)) + { + next1 = SNAT_IN2OUT_NEXT_DROP; + b1->error = + node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL]; + } + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out. + other : &sm->counters.fastpath. + in2out.other, thread_index, + sw_if_index1, 1); + goto trace01; + } + + if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_ICMP)) + { + next1 = icmp_in2out_slow_path + (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, + next1, now, thread_index, &s1); + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out. + icmp : &sm->counters.fastpath. + in2out.icmp, thread_index, + sw_if_index1, 1); + goto trace01; + } + } + else + { + if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_OTHER)) + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } + + if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_ICMP)) + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } + } + + init_nat_k (&kv1, ip1->src_address, + vnet_buffer (b1)->ip.reass.l4_src_port, rx_fib_index1, + proto1); + if (PREDICT_FALSE + (clib_bihash_search_8_8 + (&sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0)) + { + if (is_slow_path) + { + if (is_output_feature) + { + if (PREDICT_FALSE + (nat_not_translate_output_feature + (sm, ip1, proto1, + vnet_buffer (b1)->ip.reass.l4_src_port, + vnet_buffer (b1)->ip.reass.l4_dst_port, + thread_index, sw_if_index1))) + goto trace01; + + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE + (proto1 == NAT_PROTOCOL_UDP + && (vnet_buffer (b1)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_server)) + && ip1->dst_address.as_u32 == 0xffffffff)) + goto trace01; + } + else + { + if (PREDICT_FALSE + (snat_not_translate + (sm, node, sw_if_index1, ip1, proto1, + rx_fib_index1, thread_index))) + goto trace01; + } + + next1 = + slow_path (sm, b1, ip1, ip1->src_address, + vnet_buffer (b1)->ip.reass.l4_src_port, + rx_fib_index1, proto1, &s1, node, next1, + thread_index, now); + if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP)) + goto trace01; + + if (PREDICT_FALSE (!s1)) + goto trace01; + } + else + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } + } + else + s1 = + pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value1.value); + + b1->flags |= VNET_BUFFER_F_IS_NATED; + + old_addr1 = ip1->src_address.as_u32; + ip1->src_address = s1->out2in.addr; + new_addr1 = ip1->src_address.as_u32; + if (!is_output_feature) + vnet_buffer (b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, src_address /* changed member */ ); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE (proto1 == NAT_PROTOCOL_TCP)) + { + if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) + { + old_port1 = vnet_buffer (b1)->ip.reass.l4_src_port; + new_port1 = udp1->src_port = s1->out2in.port; + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */ ); + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */ , + length /* changed member */ ); + mss_clamping (sm->mss_clamping, tcp1, &sum1); + tcp1->checksum = ip_csum_fold (sum1); + } + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out.tcp : &sm-> + counters.fastpath.in2out.tcp, + thread_index, sw_if_index1, 1); + } + else + { + if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) + { + udp1->src_port = s1->out2in.port; + if (PREDICT_FALSE (udp1->checksum)) + { + old_port1 = vnet_buffer (b1)->ip.reass.l4_src_port; + new_port1 = udp1->src_port; + sum1 = udp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, ip4_header_t, dst_address /* changed member */ + ); + sum1 = + ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp1->checksum = ip_csum_fold (sum1); + } + } + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out.udp : &sm-> + counters.fastpath.in2out.udp, + thread_index, sw_if_index1, 1); + } + + /* Accounting */ + nat44_ei_session_update_counters ( + s1, now, vlib_buffer_length_in_chain (vm, b1), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s1, thread_index); + trace01: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (s1) + t->session_index = + s1 - sm->per_thread_data[thread_index].sessions; + } + + if (next1 == SNAT_IN2OUT_NEXT_DROP) + { + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out. + drops : &sm->counters.fastpath. + in2out.drops, thread_index, + sw_if_index1, 1); + } + + n_left_from -= 2; + next[0] = next0; + next[1] = next1; + next += 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t *ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 old_port0, new_port0; + udp_header_t *udp0; + tcp_header_t *tcp0; + icmp46_header_t *icmp0; + u32 rx_fib_index0; + u32 proto0; + snat_session_t *s0 = 0; + clib_bihash_kv_8_8_t kv0, value0; + u32 iph_offset0 = 0; + + b0 = *b; + b++; + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + + if (is_output_feature) + iph_offset0 = vnet_buffer (b0)->ip.reass.save_rewrite_length; + + ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + + iph_offset0); + + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace0; + } + + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + /* Next configured feature, probably ip4-lookup */ + if (is_slow_path) + { + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + if (nat_in2out_sm_unknown_proto (sm, b0, ip0, rx_fib_index0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + b0->error = + node->errors[SNAT_IN2OUT_ERROR_UNSUPPORTED_PROTOCOL]; + } + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out. + other : &sm->counters.fastpath. + in2out.other, thread_index, + sw_if_index0, 1); + goto trace0; + } + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next0 = icmp_in2out_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, thread_index, &s0); + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out. + icmp : &sm->counters.fastpath. + in2out.icmp, thread_index, + sw_if_index0, 1); + goto trace0; + } + } + else + { + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } + } + + init_nat_k (&kv0, ip0->src_address, + vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0, + proto0); + + if (clib_bihash_search_8_8 + (&sm->per_thread_data[thread_index].in2out, &kv0, &value0)) + { + if (is_slow_path) + { + if (is_output_feature) + { + if (PREDICT_FALSE + (nat_not_translate_output_feature + (sm, ip0, proto0, + vnet_buffer (b0)->ip.reass.l4_src_port, + vnet_buffer (b0)->ip.reass.l4_dst_port, + thread_index, sw_if_index0))) + goto trace0; + + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE + (proto0 == NAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 + (UDP_DST_PORT_dhcp_to_server)) + && ip0->dst_address.as_u32 == 0xffffffff)) + goto trace0; + } + else + { + if (PREDICT_FALSE + (snat_not_translate + (sm, node, sw_if_index0, ip0, proto0, rx_fib_index0, + thread_index))) + goto trace0; + } + + next0 = + slow_path (sm, b0, ip0, ip0->src_address, + vnet_buffer (b0)->ip.reass.l4_src_port, + rx_fib_index0, proto0, &s0, node, next0, + thread_index, now); + + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) + goto trace0; + + if (PREDICT_FALSE (!s0)) + goto trace0; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } + } + else + s0 = + pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value0.value); + + b0->flags |= VNET_BUFFER_F_IS_NATED; + + old_addr0 = ip0->src_address.as_u32; + ip0->src_address = s0->out2in.addr; + new_addr0 = ip0->src_address.as_u32; + if (!is_output_feature) + vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, src_address /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + new_port0 = udp0->src_port = s0->out2in.port; + sum0 = tcp0->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address /* changed member */ ); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + mss_clamping (sm->mss_clamping, tcp0, &sum0); + tcp0->checksum = ip_csum_fold (sum0); + } + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out.tcp : &sm-> + counters.fastpath.in2out.tcp, + thread_index, sw_if_index0, 1); + } + else + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + udp0->src_port = s0->out2in.port; + if (PREDICT_FALSE (udp0->checksum)) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + new_port0 = udp0->src_port; + sum0 = udp0->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address /* changed member */ ); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } + } + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out.udp : &sm-> + counters.fastpath.in2out.udp, + thread_index, sw_if_index0, 1); + } + + /* Accounting */ + nat44_ei_session_update_counters ( + s0, now, vlib_buffer_length_in_chain (vm, b0), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = is_slow_path; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = + s0 - sm->per_thread_data[thread_index].sessions; + } + + if (next0 == SNAT_IN2OUT_NEXT_DROP) + { + vlib_increment_simple_counter (is_slow_path ? &sm-> + counters.slowpath.in2out. + drops : &sm->counters.fastpath. + in2out.drops, thread_index, + sw_if_index0, 1); + } + + n_left_from--; + next[0] = next0; + next++; + } + + vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, + frame->n_vectors); + return frame->n_vectors; +} + +VLIB_NODE_FN (snat_in2out_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */ , + 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_in2out_node) = { + .name = "nat44-in2out", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", + [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FN (snat_in2out_output_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */ , + 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_in2out_output_node) = { + .name = "nat44-in2out-output", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath", + [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FN (snat_in2out_slowpath_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */ , + 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { + .name = "nat44-in2out-slowpath", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", + [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FN (snat_in2out_output_slowpath_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */ , + 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_in2out_output_slowpath_node) = { + .name = "nat44-in2out-output-slowpath", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "interface-output", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-output-slowpath", + [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FN (snat_in2out_fast_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + snat_in2out_next_t next_index; + snat_main_t *sm = &snat_main; + int is_hairpinning = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t *ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 old_port0, new_port0; + udp_header_t *udp0; + tcp_header_t *tcp0; + icmp46_header_t *icmp0; + u32 proto0; + u32 rx_fib_index0; + ip4_address_t sm0_addr; + u16 sm0_port; + u32 sm0_fib_index; + + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = + ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_IN2OUT_NEXT_ICMP_ERROR; + goto trace0; + } + + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + goto trace0; + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next0, ~0, 0, 0); + goto trace0; + } + + if (snat_static_mapping_match + (sm, ip0->src_address, udp0->src_port, rx_fib_index0, proto0, + &sm0_addr, &sm0_port, &sm0_fib_index, 0, 0, 0, 0, 0, 0, 0)) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + next0 = SNAT_IN2OUT_NEXT_DROP; + goto trace0; + } + + new_addr0 = sm0_addr.as_u32; + new_port0 = sm0_port; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm0_fib_index; + old_addr0 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE (new_port0 != udp0->dst_port)) + { + old_port0 = udp0->src_port; + udp0->src_port = new_port0; + + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + mss_clamping (sm->mss_clamping, tcp0, &sum0); + tcp0->checksum = ip_csum_fold (sum0); + } + else if (udp0->checksum) + { + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } + } + else + { + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + mss_clamping (sm->mss_clamping, tcp0, &sum0); + tcp0->checksum = ip_csum_fold (sum0); + } + else if (udp0->checksum) + { + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } + } + + /* Hairpinning */ + is_hairpinning = + snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, proto0, 0, + 0 /* do_trace */ ); + + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->is_hairpinning = is_hairpinning; + } + + if (next0 != SNAT_IN2OUT_NEXT_DROP) + { + + vlib_increment_simple_counter (&sm->counters.fastpath. + in2out.other, sw_if_index0, + vm->thread_index, 1); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_in2out_fast_node) = { + .name = "nat44-in2out-fast", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_fast_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "nat44-in2out-slowpath", + [SNAT_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ei/nat44_ei_inlines.h b/src/plugins/nat/nat44-ei/nat44_ei_inlines.h new file mode 100644 index 00000000000..f7089ea0c21 --- /dev/null +++ b/src/plugins/nat/nat44-ei/nat44_ei_inlines.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_nat44_ei_inlines_h__ +#define __included_nat44_ei_inlines_h__ + +#include <nat/nat44-ei/nat44_ei_ha.h> + +static_always_inline u8 +nat44_ei_maximum_sessions_exceeded (snat_main_t *sm, u32 thread_index) +{ + if (pool_elts (sm->per_thread_data[thread_index].sessions) >= + sm->max_translations_per_thread) + return 1; + return 0; +} + +always_inline void +nat44_ei_session_update_counters (snat_session_t *s, f64 now, uword bytes, + u32 thread_index) +{ + s->last_heard = now; + s->total_pkts++; + s->total_bytes += bytes; + nat_ha_sref (&s->out2in.addr, s->out2in.port, &s->ext_host_addr, + s->ext_host_port, s->nat_proto, s->out2in.fib_index, + s->total_pkts, s->total_bytes, thread_index, + &s->ha_last_refreshed, now); +} + +#endif /* __included_nat44_ei_inlines_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/nat/nat44-ei/nat44_ei_out2in.c b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c new file mode 100644 index 00000000000..8220f0dc9d8 --- /dev/null +++ b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c @@ -0,0 +1,1580 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief NAT44 endpoint-dependent outside to inside network translation + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> + +#include <vnet/ip/ip.h> +#include <vnet/udp/udp_local.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/fib/ip4_fib.h> +#include <nat/nat.h> +#include <nat/lib/ipfix_logging.h> +#include <nat/nat_inlines.h> +#include <nat/lib/nat_syslog.h> +#include <nat/nat44-ei/nat44_ei_inlines.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +typedef struct +{ + u32 sw_if_index; + u32 next_index; + u32 session_index; +} snat_out2in_trace_t; + +/* packet trace format function */ +static u8 * +format_snat_out2in_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_trace_t *t = va_arg (*args, snat_out2in_trace_t *); + + s = + format (s, + "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d", + t->sw_if_index, t->next_index, t->session_index); + return s; +} + +static u8 * +format_snat_out2in_fast_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_trace_t *t = va_arg (*args, snat_out2in_trace_t *); + + s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + return s; +} + +#define foreach_snat_out2in_error \ +_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ +_(OUT_OF_PORTS, "out of ports") \ +_(BAD_ICMP_TYPE, "unsupported ICMP type") \ +_(NO_TRANSLATION, "no translation") \ +_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ +_(CANNOT_CREATE_USER, "cannot create NAT user") + +typedef enum +{ +#define _(sym,str) SNAT_OUT2IN_ERROR_##sym, + foreach_snat_out2in_error +#undef _ + SNAT_OUT2IN_N_ERROR, +} snat_out2in_error_t; + +static char *snat_out2in_error_strings[] = { +#define _(sym,string) string, + foreach_snat_out2in_error +#undef _ +}; + +typedef enum +{ + SNAT_OUT2IN_NEXT_DROP, + SNAT_OUT2IN_NEXT_LOOKUP, + SNAT_OUT2IN_NEXT_ICMP_ERROR, + SNAT_OUT2IN_N_NEXT, +} snat_out2in_next_t; + +#ifndef CLIB_MARCH_VARIANT +int +nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg) +{ + snat_main_t *sm = &snat_main; + nat44_is_idle_session_ctx_t *ctx = arg; + snat_session_t *s; + u64 sess_timeout_time; + snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data, + ctx->thread_index); + clib_bihash_kv_8_8_t s_kv; + + s = pool_elt_at_index (tsm->sessions, kv->value); + sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); + if (ctx->now >= sess_timeout_time) + { + init_nat_i2o_k (&s_kv, s); + if (clib_bihash_add_del_8_8 (&tsm->in2out, &s_kv, 0)) + nat_elog_warn ("out2in key del failed"); + + nat_ipfix_logging_nat44_ses_delete (ctx->thread_index, + s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->nat_proto, + s->in2out.port, + s->out2in.port, + s->in2out.fib_index); + + nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index, + &s->in2out.addr, s->in2out.port, + &s->out2in.addr, s->out2in.port, s->nat_proto); + + nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr, + s->ext_host_port, s->nat_proto, s->out2in.fib_index, + ctx->thread_index); + + if (!snat_is_session_static (s)) + snat_free_outside_address_and_port (sm->addresses, ctx->thread_index, + &s->out2in.addr, s->out2in.port, + s->nat_proto); + + nat44_delete_session (sm, s, ctx->thread_index); + return 1; + } + + return 0; +} +#endif + +/** + * @brief Create session for static mapping. + * + * Create NAT session initiated by host from external network with static + * mapping. + * + * @param sm NAT main. + * @param b0 Vlib buffer. + * @param in2out In2out NAT44 session key. + * @param out2in Out2in NAT44 session key. + * @param node Vlib node. + * + * @returns SNAT session if successfully created otherwise 0. + */ +static inline snat_session_t * +create_session_for_static_mapping (snat_main_t * sm, + vlib_buffer_t * b0, + ip4_address_t i2o_addr, + u16 i2o_port, + u32 i2o_fib_index, + ip4_address_t o2i_addr, + u16 o2i_port, + u32 o2i_fib_index, + nat_protocol_t proto, + vlib_node_runtime_t * node, + u32 thread_index, f64 now) +{ + snat_user_t *u; + snat_session_t *s; + clib_bihash_kv_8_8_t kv0; + ip4_header_t *ip0; + udp_header_t *udp0; + nat44_is_idle_session_ctx_t ctx0; + + if (PREDICT_FALSE (nat44_ei_maximum_sessions_exceeded (sm, thread_index))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_notice ("maximum sessions exceeded"); + return 0; + } + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + + u = nat_user_get_or_create (sm, &i2o_addr, i2o_fib_index, thread_index); + if (!u) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_CANNOT_CREATE_USER]; + return 0; + } + + s = nat_session_alloc_or_recycle (sm, u, thread_index, now); + if (!s) + { + nat44_delete_user_with_no_session (sm, u, thread_index); + nat_elog_warn ("create NAT session failed"); + return 0; + } + + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->ext_host_addr.as_u32 = ip0->src_address.as_u32; + s->ext_host_port = udp0->src_port; + user_session_increment (sm, u, 1 /* static */ ); + s->in2out.addr = i2o_addr; + s->in2out.port = i2o_port; + s->in2out.fib_index = i2o_fib_index; + s->out2in.addr = o2i_addr; + s->out2in.port = o2i_port; + s->out2in.fib_index = o2i_fib_index; + s->nat_proto = proto; + + /* Add to translation hashes */ + ctx0.now = now; + ctx0.thread_index = thread_index; + init_nat_i2o_kv (&kv0, s, s - sm->per_thread_data[thread_index].sessions); + if (clib_bihash_add_or_overwrite_stale_8_8 + (&sm->per_thread_data[thread_index].in2out, &kv0, + nat44_i2o_is_idle_session_cb, &ctx0)) + nat_elog_notice ("in2out key add failed"); + + init_nat_o2i_kv (&kv0, s, s - sm->per_thread_data[thread_index].sessions); + if (clib_bihash_add_or_overwrite_stale_8_8 + (&sm->per_thread_data[thread_index].out2in, &kv0, + nat44_o2i_is_idle_session_cb, &ctx0)) + nat_elog_notice ("out2in key add failed"); + + /* log NAT event */ + nat_ipfix_logging_nat44_ses_create (thread_index, + s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->nat_proto, + s->in2out.port, + s->out2in.port, s->in2out.fib_index); + + nat_syslog_nat44_apmadd (s->user_index, s->in2out.fib_index, + &s->in2out.addr, s->in2out.port, &s->out2in.addr, + s->out2in.port, s->nat_proto); + + nat_ha_sadd (&s->in2out.addr, s->in2out.port, &s->out2in.addr, + s->out2in.port, &s->ext_host_addr, s->ext_host_port, + &s->ext_host_nat_addr, s->ext_host_nat_port, + s->nat_proto, s->in2out.fib_index, s->flags, thread_index, 0); + + return s; +} + +#ifndef CLIB_MARCH_VARIANT +static_always_inline snat_out2in_error_t +icmp_get_key (vlib_buffer_t * b, ip4_header_t * ip0, + ip4_address_t * addr, u16 * port, nat_protocol_t * nat_proto) +{ + icmp46_header_t *icmp0; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + echo0 = (icmp_echo_header_t *) (icmp0 + 1); + + if (!icmp_type_is_error_message + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) + { + *nat_proto = NAT_PROTOCOL_ICMP; + *addr = ip0->dst_address; + *port = vnet_buffer (b)->ip.reass.l4_src_port; + } + else + { + inner_ip0 = (ip4_header_t *) (echo0 + 1); + l4_header = ip4_next_header (inner_ip0); + *nat_proto = ip_proto_to_nat_proto (inner_ip0->protocol); + *addr = inner_ip0->src_address; + switch (*nat_proto) + { + case NAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t *) l4_header; + inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); + *port = inner_echo0->identifier; + break; + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + *port = ((tcp_udp_header_t *) l4_header)->src_port; + break; + default: + return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL; + } + } + return -1; /* success */ +} + +/** + * Get address and port values to be used for ICMP packet translation + * and create session if needed + * + * @param[in,out] sm NAT main + * @param[in,out] node NAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[in,out] ip0 ip header + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 +icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node, + u32 thread_index, vlib_buffer_t * b0, + ip4_header_t * ip0, ip4_address_t * addr, + u16 * port, u32 * fib_index, + nat_protocol_t * proto, void *d, void *e, + u8 * dont_translate) +{ + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + u32 sw_if_index0; + snat_session_t *s0 = 0; + clib_bihash_kv_8_8_t kv0, value0; + u8 is_addr_only; + u32 next0 = ~0; + int err; + u8 identity_nat; + vlib_main_t *vm = vlib_get_main (); + *dont_translate = 0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + *fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + *proto = 0; + + err = icmp_get_key (b0, ip0, addr, port, proto); + if (err != -1) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + ip4_address_t mapping_addr; + u16 mapping_port; + u32 mapping_fib_index; + + init_nat_k (&kv0, *addr, *port, *fib_index, *proto); + if (clib_bihash_search_8_8 (&tsm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match + (sm, *addr, *port, *fib_index, *proto, + &mapping_addr, &mapping_port, &mapping_fib_index, 1, &is_addr_only, + 0, 0, 0, &identity_nat, 0)) + { + if (!sm->forwarding_enabled) + { + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE (is_interface_addr (sm, node, sw_if_index0, + ip0->dst_address.as_u32))) + { + *dont_translate = 1; + goto out; + } + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + else + { + *dont_translate = 1; + goto out; + } + } + + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply + && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request || !is_addr_only))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + if (PREDICT_FALSE (identity_nat)) + { + *dont_translate = 1; + goto out; + } + /* Create session initiated by host from external network */ + s0 = + create_session_for_static_mapping (sm, b0, mapping_addr, mapping_port, + mapping_fib_index, *addr, *port, + *fib_index, *proto, node, + thread_index, vlib_time_now (vm)); + + if (!s0) + { + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + } + else + { + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply + && vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + s0 = pool_elt_at_index (tsm->sessions, value0.value); + } + +out: + if (s0) + { + *addr = s0->in2out.addr; + *port = s0->in2out.port; + *fib_index = s0->in2out.fib_index; + } + if (d) + *(snat_session_t **) d = s0; + return next0; +} +#endif + +#ifndef CLIB_MARCH_VARIANT +/** + * Get address and port values to be used for ICMP packet translation + * + * @param[in] sm NAT main + * @param[in,out] node NAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[in,out] ip0 ip header + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 +icmp_match_out2in_fast (snat_main_t * sm, vlib_node_runtime_t * node, + u32 thread_index, vlib_buffer_t * b0, + ip4_header_t * ip0, ip4_address_t * mapping_addr, + u16 * mapping_port, u32 * mapping_fib_index, + nat_protocol_t * proto, void *d, void *e, + u8 * dont_translate) +{ + u32 sw_if_index0; + u32 rx_fib_index0; + u8 is_addr_only; + u32 next0 = ~0; + int err; + ip4_address_t addr; + u16 port; + *dont_translate = 0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + err = icmp_get_key (b0, ip0, &addr, &port, proto); + if (err != -1) + { + b0->error = node->errors[err]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + if (snat_static_mapping_match + (sm, addr, port, rx_fib_index0, *proto, mapping_addr, mapping_port, + mapping_fib_index, 1, &is_addr_only, 0, 0, 0, 0, 0)) + { + /* Don't NAT packet aimed at the intfc address */ + if (is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)) + { + *dont_translate = 1; + goto out; + } + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + if (PREDICT_FALSE + (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != ICMP4_echo_reply + && (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request || !is_addr_only) + && !icmp_type_is_error_message (vnet_buffer (b0)->ip. + reass.icmp_type_or_tcp_flags))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + +out: + return next0; +} +#endif + +#ifndef CLIB_MARCH_VARIANT +u32 +icmp_out2in (snat_main_t * sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, u32 thread_index, void *d, void *e) +{ + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0 = 0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + u8 dont_translate; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + u16 checksum0; + u32 next0_tmp; + vlib_main_t *vm = vlib_get_main (); + ip4_address_t addr; + u16 port; + u32 fib_index; + nat_protocol_t proto; + + echo0 = (icmp_echo_header_t *) (icmp0 + 1); + + next0_tmp = sm->icmp_match_out2in_cb (sm, node, thread_index, b0, ip0, + &addr, &port, &fib_index, &proto, + d, e, &dont_translate); + if (next0_tmp != ~0) + next0 = next0_tmp; + if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate) + goto out; + + if (PREDICT_TRUE (!ip4_is_fragment (ip0))) + { + sum0 = + ip_incremental_checksum_buffer (vm, b0, + (u8 *) icmp0 - + (u8 *) vlib_buffer_get_current (b0), + ntohs (ip0->length) - + ip4_header_bytes (ip0), 0); + checksum0 = ~ip_csum_fold (sum0); + if (checksum0 != 0 && checksum0 != 0xffff) + { + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + } + + old_addr0 = ip0->dst_address.as_u32; + new_addr0 = ip0->dst_address.as_u32 = addr.as_u32; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + + + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + if (icmp0->checksum == 0) + icmp0->checksum = 0xffff; + + if (!icmp_type_is_error_message (icmp0->type)) + { + new_id0 = port; + if (PREDICT_FALSE (new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + new_id0 = port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = + ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier /* changed member */ ); + icmp0->checksum = ip_csum_fold (sum0); + } + } + else + { + inner_ip0 = (ip4_header_t *) (echo0 + 1); + l4_header = ip4_next_header (inner_ip0); + + if (!ip4_header_checksum_is_valid (inner_ip0)) + { + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + old_addr0 = inner_ip0->src_address.as_u32; + inner_ip0->src_address = addr; + new_addr0 = inner_ip0->src_address.as_u32; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + src_address /* changed member */ ); + icmp0->checksum = ip_csum_fold (sum0); + + switch (proto) + { + case NAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t *) l4_header; + inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); + + old_id0 = inner_echo0->identifier; + new_id0 = port; + inner_echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = + ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + break; + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + old_id0 = ((tcp_udp_header_t *) l4_header)->src_port; + new_id0 = port; + ((tcp_udp_header_t *) l4_header)->src_port = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t, + src_port); + icmp0->checksum = ip_csum_fold (sum0); + break; + default: + ASSERT (0); + } + } + } + +out: + return next0; +} +#endif + +static inline u32 +icmp_out2in_slow_path (snat_main_t * sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, f64 now, + u32 thread_index, snat_session_t ** p_s0) +{ + vlib_main_t *vm = vlib_get_main (); + + next0 = icmp_out2in (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, thread_index, p_s0, 0); + snat_session_t *s0 = *p_s0; + if (PREDICT_TRUE (next0 != SNAT_OUT2IN_NEXT_DROP && s0)) + { + /* Accounting */ + nat44_ei_session_update_counters ( + s0, now, vlib_buffer_length_in_chain (vm, b0), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + } + return next0; +} + +static int +nat_out2in_sm_unknown_proto (snat_main_t * sm, + vlib_buffer_t * b, + ip4_header_t * ip, u32 rx_fib_index) +{ + clib_bihash_kv_8_8_t kv, value; + snat_static_mapping_t *m; + u32 old_addr, new_addr; + ip_csum_t sum; + + init_nat_k (&kv, ip->dst_address, 0, 0, 0); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + return 1; + + m = pool_elt_at_index (sm->static_mappings, value.value); + + old_addr = ip->dst_address.as_u32; + new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32; + sum = ip->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); + ip->checksum = ip_csum_fold (sum); + + vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index; + return 0; +} + +VLIB_NODE_FN (snat_out2in_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from; + snat_main_t *sm = &snat_main; + f64 now = vlib_time_now (vm); + u32 thread_index = vm->thread_index; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + vlib_get_buffers (vm, from, b, n_left_from); + + while (n_left_from >= 2) + { + vlib_buffer_t *b0, *b1; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0, sw_if_index1; + ip4_header_t *ip0, *ip1; + ip_csum_t sum0, sum1; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + u32 new_addr1, old_addr1; + u16 new_port1, old_port1; + udp_header_t *udp0, *udp1; + tcp_header_t *tcp0, *tcp1; + icmp46_header_t *icmp0, *icmp1; + u32 rx_fib_index0, rx_fib_index1; + u32 proto0, proto1; + snat_session_t *s0 = 0, *s1 = 0; + clib_bihash_kv_8_8_t kv0, kv1, value0, value1; + u8 identity_nat0, identity_nat1; + ip4_address_t sm_addr0, sm_addr1; + u16 sm_port0, sm_port1; + u32 sm_fib_index0, sm_fib_index1; + + b0 = *b; + b++; + b1 = *b; + b++; + + /* Prefetch next iteration. */ + if (PREDICT_TRUE (n_left_from >= 4)) + { + vlib_buffer_t *p2, *p3; + + p2 = *b; + p3 = *(b + 1); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + vnet_buffer (b0)->snat.flags = 0; + vnet_buffer (b1)->snat.flags = 0; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace0; + } + + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + if (nat_out2in_sm_unknown_proto (sm, b0, ip0, rx_fib_index0)) + { + if (!sm->forwarding_enabled) + { + b0->error = + node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; + next0 = SNAT_OUT2IN_NEXT_DROP; + } + } + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.other, + thread_index, sw_if_index0, 1); + + goto trace0; + } + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, thread_index, &s0); + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.icmp, + thread_index, sw_if_index0, 1); + goto trace0; + } + + init_nat_k (&kv0, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, + proto0); + if (clib_bihash_search_8_8 + (&sm->per_thread_data[thread_index].out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match + (sm, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, + proto0, &sm_addr0, &sm_port0, &sm_fib_index0, 1, 0, 0, 0, + 0, &identity_nat0, 0)) + { + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE + (proto0 == NAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))) + { + vnet_feature_next (&next0, b0); + goto trace0; + } + + if (!sm->forwarding_enabled) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next0 = SNAT_OUT2IN_NEXT_DROP; + } + goto trace0; + } + + if (PREDICT_FALSE (identity_nat0)) + goto trace0; + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping (sm, b0, + sm_addr0, sm_port0, + sm_fib_index0, + ip0->dst_address, + vnet_buffer (b0)->ip. + reass.l4_dst_port, + rx_fib_index0, proto0, node, + thread_index, now); + if (!s0) + { + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace0; + } + } + else + s0 = pool_elt_at_index (tsm->sessions, value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, dst_address /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + new_port0 = udp0->dst_port = s0->in2out.port; + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + tcp0->checksum = ip_csum_fold (sum0); + } + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.tcp, + thread_index, sw_if_index0, 1); + } + else + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + new_port0 = udp0->dst_port = s0->in2out.port; + if (PREDICT_FALSE (udp0->checksum)) + { + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ + ); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } + } + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.udp, + thread_index, sw_if_index0, 1); + } + + /* Accounting */ + nat44_ei_session_update_counters ( + s0, now, vlib_buffer_length_in_chain (vm, b0), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + trace0: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = + s0 - sm->per_thread_data[thread_index].sessions; + } + + if (next0 == SNAT_OUT2IN_NEXT_DROP) + { + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.drops, + thread_index, sw_if_index0, 1); + } + + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + icmp1 = (icmp46_header_t *) udp1; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index1); + + if (PREDICT_FALSE (ip1->ttl == 1)) + { + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace1; + } + + proto1 = ip_proto_to_nat_proto (ip1->protocol); + + if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_OTHER)) + { + if (nat_out2in_sm_unknown_proto (sm, b1, ip1, rx_fib_index1)) + { + if (!sm->forwarding_enabled) + { + b1->error = + node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; + next1 = SNAT_OUT2IN_NEXT_DROP; + } + } + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.other, + thread_index, sw_if_index1, 1); + goto trace1; + } + + if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_ICMP)) + { + next1 = icmp_out2in_slow_path + (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, + next1, now, thread_index, &s1); + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.icmp, + thread_index, sw_if_index1, 1); + goto trace1; + } + + init_nat_k (&kv1, ip1->dst_address, + vnet_buffer (b1)->ip.reass.l4_dst_port, rx_fib_index1, + proto1); + + if (clib_bihash_search_8_8 + (&sm->per_thread_data[thread_index].out2in, &kv1, &value1)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match + (sm, ip1->dst_address, + vnet_buffer (b1)->ip.reass.l4_dst_port, proto1, + rx_fib_index1, &sm_addr1, &sm_port1, &sm_fib_index1, 1, 0, + 0, 0, 0, &identity_nat1, 0)) + { + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE + (proto1 == NAT_PROTOCOL_UDP + && (vnet_buffer (b1)->ip.reass.l4_dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))) + { + vnet_feature_next (&next1, b1); + goto trace1; + } + + if (!sm->forwarding_enabled) + { + b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next1 = SNAT_OUT2IN_NEXT_DROP; + } + goto trace1; + } + + if (PREDICT_FALSE (identity_nat1)) + goto trace1; + + /* Create session initiated by host from external network */ + s1 = + create_session_for_static_mapping (sm, b1, sm_addr1, sm_port1, + sm_fib_index1, + ip1->dst_address, + vnet_buffer (b1)->ip. + reass.l4_dst_port, + rx_fib_index1, proto1, node, + thread_index, now); + if (!s1) + { + next1 = SNAT_OUT2IN_NEXT_DROP; + goto trace1; + } + } + else + s1 = + pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value1.value); + + old_addr1 = ip1->dst_address.as_u32; + ip1->dst_address = s1->in2out.addr; + new_addr1 = ip1->dst_address.as_u32; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, dst_address /* changed member */ ); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE (proto1 == NAT_PROTOCOL_TCP)) + { + if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) + { + old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port; + new_port1 = udp1->dst_port = s1->in2out.port; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */ ); + + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */ , + length /* changed member */ ); + tcp1->checksum = ip_csum_fold (sum1); + } + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.tcp, + thread_index, sw_if_index1, 1); + } + else + { + if (!vnet_buffer (b1)->ip.reass.is_non_first_fragment) + { + old_port1 = vnet_buffer (b1)->ip.reass.l4_dst_port; + new_port1 = udp1->dst_port = s1->in2out.port; + if (PREDICT_FALSE (udp1->checksum)) + { + + sum1 = udp1->checksum; + sum1 = + ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */ ); + sum1 = + ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp1->checksum = ip_csum_fold (sum1); + } + } + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.udp, + thread_index, sw_if_index1, 1); + } + + /* Accounting */ + nat44_ei_session_update_counters ( + s1, now, vlib_buffer_length_in_chain (vm, b1), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s1, thread_index); + trace1: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (s1) + t->session_index = + s1 - sm->per_thread_data[thread_index].sessions; + } + + if (next1 == SNAT_OUT2IN_NEXT_DROP) + { + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.drops, + thread_index, sw_if_index1, 1); + } + + n_left_from -= 2; + next[0] = next0; + next[1] = next1; + next += 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0; + ip4_header_t *ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t *udp0; + tcp_header_t *tcp0; + icmp46_header_t *icmp0; + u32 rx_fib_index0; + u32 proto0; + snat_session_t *s0 = 0; + clib_bihash_kv_8_8_t kv0, value0; + u8 identity_nat0; + ip4_address_t sm_addr0; + u16 sm_port0; + u32 sm_fib_index0; + + b0 = *b; + ++b; + + vnet_buffer (b0)->snat.flags = 0; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + { + if (nat_out2in_sm_unknown_proto (sm, b0, ip0, rx_fib_index0)) + { + if (!sm->forwarding_enabled) + { + b0->error = + node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; + next0 = SNAT_OUT2IN_NEXT_DROP; + } + } + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.other, + thread_index, sw_if_index0, 1); + goto trace00; + } + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace00; + } + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, thread_index, &s0); + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.icmp, + thread_index, sw_if_index0, 1); + goto trace00; + } + + init_nat_k (&kv0, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, + proto0); + + if (clib_bihash_search_8_8 + (&sm->per_thread_data[thread_index].out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match + (sm, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, + proto0, &sm_addr0, &sm_port0, &sm_fib_index0, 1, 0, 0, 0, + 0, &identity_nat0, 0)) + { + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE + (proto0 == NAT_PROTOCOL_UDP + && (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))) + { + vnet_feature_next (&next0, b0); + goto trace00; + } + + if (!sm->forwarding_enabled) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next0 = SNAT_OUT2IN_NEXT_DROP; + } + goto trace00; + } + + if (PREDICT_FALSE (identity_nat0)) + goto trace00; + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping (sm, b0, + sm_addr0, sm_port0, + sm_fib_index0, + ip0->dst_address, + vnet_buffer (b0)->ip. + reass.l4_dst_port, + rx_fib_index0, proto0, node, + thread_index, now); + if (!s0) + { + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace00; + } + } + else + s0 = + pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, dst_address /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + new_port0 = udp0->dst_port = s0->in2out.port; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + tcp0->checksum = ip_csum_fold (sum0); + } + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.tcp, + thread_index, sw_if_index0, 1); + } + else + { + if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + { + old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + new_port0 = udp0->dst_port = s0->in2out.port; + if (PREDICT_FALSE (udp0->checksum)) + { + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, dst_address /* changed member */ + ); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } + } + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.udp, + thread_index, sw_if_index0, 1); + } + + /* Accounting */ + nat44_ei_session_update_counters ( + s0, now, vlib_buffer_length_in_chain (vm, b0), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s0, thread_index); + trace00: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = + s0 - sm->per_thread_data[thread_index].sessions; + } + + if (next0 == SNAT_OUT2IN_NEXT_DROP) + { + vlib_increment_simple_counter (&sm->counters.slowpath.out2in.drops, + thread_index, sw_if_index0, 1); + } + + n_left_from--; + next[0] = next0; + next++; + } + + vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, + frame->n_vectors); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_out2in_node) = { + .name = "nat44-out2in", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_OUT2IN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FN (snat_out2in_fast_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, *from; + snat_main_t *sm = &snat_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + vlib_get_buffers (vm, from, b, n_left_from); + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 next0 = SNAT_OUT2IN_NEXT_DROP; + u32 sw_if_index0; + ip4_header_t *ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t *udp0; + tcp_header_t *tcp0; + icmp46_header_t *icmp0; + u32 proto0; + u32 rx_fib_index0; + ip4_address_t sm_addr0; + u16 sm_port0; + u32 sm_fib_index0; + + b0 = *b; + b++; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + vnet_feature_next (&next0, b0); + + if (PREDICT_FALSE (ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace00; + } + + proto0 = ip_proto_to_nat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + goto trace00; + + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in (sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next0, ~0, 0, 0); + goto trace00; + } + + if (snat_static_mapping_match + (sm, ip0->dst_address, udp0->dst_port, rx_fib_index0, proto0, + &sm_addr0, &sm_port0, &sm_fib_index0, 1, 0, 0, 0, 0, 0, 0)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace00; + } + + new_addr0 = sm_addr0.as_u32; + new_port0 = sm_port0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm_fib_index0; + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, dst_address /* changed member */ ); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE (new_port0 != udp0->dst_port)) + { + old_port0 = udp0->dst_port; + udp0->dst_port = new_port0; + + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + tcp0->checksum = ip_csum_fold (sum0); + } + else if (udp0->checksum) + { + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */ , + length /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } + } + else + { + if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + tcp0->checksum = ip_csum_fold (sum0); + } + else if (udp0->checksum) + { + sum0 = udp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */ ); + udp0->checksum = ip_csum_fold (sum0); + } + } + + trace00: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + if (next0 == SNAT_OUT2IN_NEXT_DROP) + { + vlib_increment_simple_counter (&sm->counters.fastpath.out2in.drops, + vm->thread_index, sw_if_index0, 1); + } + + n_left_from--; + next[0] = next0; + next++; + } + + vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts, + frame->n_vectors); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_out2in_fast_node) = { + .name = "nat44-out2in-fast", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_fast_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_OUT2IN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |