diff options
Diffstat (limited to 'src/plugins/snat')
-rw-r--r-- | src/plugins/snat/in2out.c | 1597 | ||||
-rw-r--r-- | src/plugins/snat/out2in.c | 1261 | ||||
-rw-r--r-- | src/plugins/snat/snat.api | 283 | ||||
-rw-r--r-- | src/plugins/snat/snat.c | 1957 | ||||
-rw-r--r-- | src/plugins/snat/snat.h | 259 | ||||
-rw-r--r-- | src/plugins/snat/snat_all_api_h.h | 19 | ||||
-rw-r--r-- | src/plugins/snat/snat_msg_enum.h | 31 | ||||
-rw-r--r-- | src/plugins/snat/snat_test.c | 602 |
8 files changed, 6009 insertions, 0 deletions
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c new file mode 100644 index 00000000000..c78fdd76631 --- /dev/null +++ b/src/plugins/snat/in2out.c @@ -0,0 +1,1597 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/handoff.h> + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/fib/ip4_fib.h> +#include <snat/snat.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 session_index; + u32 is_slow_path; +} snat_in2out_trace_t; + +typedef struct { + u32 next_worker_index; + u8 do_handoff; +} snat_in2out_worker_handoff_trace_t; + +/* packet trace format function */ +static u8 * format_snat_in2out_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *); + char * tag; + + tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH"; + + s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, + t->sw_if_index, t->next_index, t->session_index); + + return s; +} + +static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *); + + s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + + return s; +} + +static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_in2out_worker_handoff_trace_t * t = + va_arg (*args, snat_in2out_worker_handoff_trace_t *); + char * m; + + m = t->do_handoff ? "next worker" : "same worker"; + s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index); + + return s; +} + +vlib_node_registration_t snat_in2out_node; +vlib_node_registration_t snat_in2out_slowpath_node; +vlib_node_registration_t snat_in2out_fast_node; +vlib_node_registration_t snat_in2out_worker_handoff_node; + +#define foreach_snat_in2out_error \ +_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ +_(IN2OUT_PACKETS, "Good in2out packets processed") \ +_(OUT_OF_PORTS, "Out of ports") \ +_(BAD_OUTSIDE_FIB, "Outside VRF ID not found") \ +_(BAD_ICMP_TYPE, "icmp type not echo-request") \ +_(NO_TRANSLATION, "No translation") + +typedef enum { +#define _(sym,str) SNAT_IN2OUT_ERROR_##sym, + foreach_snat_in2out_error +#undef _ + SNAT_IN2OUT_N_ERROR, +} snat_in2out_error_t; + +static char * snat_in2out_error_strings[] = { +#define _(sym,string) string, + foreach_snat_in2out_error +#undef _ +}; + +typedef enum { + SNAT_IN2OUT_NEXT_LOOKUP, + SNAT_IN2OUT_NEXT_DROP, + SNAT_IN2OUT_NEXT_SLOW_PATH, + SNAT_IN2OUT_N_NEXT, +} snat_in2out_next_t; + +static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, + ip4_header_t * ip0, + u32 rx_fib_index0, + snat_session_key_t * key0, + snat_session_t ** sessionp, + vlib_node_runtime_t * node, + u32 next0, + u32 cpu_index) +{ + snat_user_t *u; + snat_user_key_t user_key; + snat_session_t *s; + clib_bihash_kv_8_8_t kv0, value0; + u32 oldest_per_user_translation_list_index; + dlist_elt_t * oldest_per_user_translation_list_elt; + dlist_elt_t * per_user_translation_list_elt; + dlist_elt_t * per_user_list_head_elt; + u32 session_index; + snat_session_key_t key1; + u32 address_index = ~0; + u32 outside_fib_index; + uword * p; + snat_static_mapping_key_t worker_by_out_key; + + p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id); + if (! p) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB]; + return SNAT_IN2OUT_NEXT_DROP; + } + outside_fib_index = p[0]; + + user_key.addr = ip0->src_address; + user_key.fib_index = rx_fib_index0; + kv0.key = user_key.as_u64; + + /* Ever heard of the "user" = src ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) + { + /* no, make a new one */ + pool_get (sm->per_thread_data[cpu_index].users, u); + memset (u, 0, sizeof (*u)); + u->addr = ip0->src_address; + + pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt); + + u->sessions_per_user_list_head_index = per_user_list_head_elt - + sm->per_thread_data[cpu_index].list_pool; + + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + u->sessions_per_user_list_head_index); + + kv0.value = u - sm->per_thread_data[cpu_index].users; + + /* add user */ + clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + } + else + { + u = pool_elt_at_index (sm->per_thread_data[cpu_index].users, + value0.value); + } + + /* Over quota? Recycle the least recently used dynamic translation */ + if (u->nsessions >= sm->max_translations_per_user) + { + /* Remove the oldest dynamic translation */ + do { + oldest_per_user_translation_list_index = + clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool, + u->sessions_per_user_list_head_index); + + ASSERT (oldest_per_user_translation_list_index != ~0); + + /* add it back to the end of the LRU list */ + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + u->sessions_per_user_list_head_index, + oldest_per_user_translation_list_index); + /* Get the list element */ + oldest_per_user_translation_list_elt = + pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool, + oldest_per_user_translation_list_index); + + /* Get the session index from the list element */ + session_index = oldest_per_user_translation_list_elt->value; + + /* Get the session */ + s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + session_index); + } while (snat_is_session_static (s)); + + /* Remove in2out, out2in keys */ + kv0.key = s->in2out.as_u64; + if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */)) + clib_warning ("in2out key delete failed"); + kv0.key = s->out2in.as_u64; + if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */)) + clib_warning ("out2in key delete failed"); + + snat_free_outside_address_and_port + (sm, &s->out2in, s->outside_address_index); + s->outside_address_index = ~0; + + if (snat_alloc_outside_address_and_port (sm, &key1, &address_index)) + { + ASSERT(0); + + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + return SNAT_IN2OUT_NEXT_DROP; + } + s->outside_address_index = address_index; + } + else + { + u8 static_mapping = 1; + + /* First try to match static mapping by local address and port */ + if (snat_static_mapping_match (sm, *key0, &key1, 0)) + { + static_mapping = 0; + /* Try to create dynamic translation */ + if (snat_alloc_outside_address_and_port (sm, &key1, &address_index)) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + return SNAT_IN2OUT_NEXT_DROP; + } + } + + /* Create a new session */ + pool_get (sm->per_thread_data[cpu_index].sessions, s); + memset (s, 0, sizeof (*s)); + + s->outside_address_index = address_index; + + if (static_mapping) + { + u->nstaticsessions++; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + } + else + { + u->nsessions++; + } + + /* Create list elts */ + pool_get (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt); + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); + + per_user_translation_list_elt->value = + s - sm->per_thread_data[cpu_index].sessions; + s->per_user_index = per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s->per_user_list_head_index, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); + } + + s->in2out = *key0; + s->out2in = key1; + s->out2in.protocol = key0->protocol; + s->out2in.fib_index = outside_fib_index; + *sessionp = s; + + /* Add to translation hashes */ + kv0.key = s->in2out.as_u64; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; + if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) + clib_warning ("in2out key add failed"); + + kv0.key = s->out2in.as_u64; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; + + if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) + clib_warning ("out2in key add failed"); + + /* Add to translated packets worker lookup */ + worker_by_out_key.addr = s->out2in.addr; + worker_by_out_key.port = s->out2in.port; + worker_by_out_key.fib_index = s->out2in.fib_index; + kv0.key = worker_by_out_key.as_u64; + kv0.value = cpu_index; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); + return next0; +} + +static inline u32 icmp_in2out_slow_path (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, + f64 now, + u32 cpu_index) +{ + snat_session_key_t key0; + icmp_echo_header_t *echo0; + clib_bihash_kv_8_8_t kv0, value0; + snat_session_t * s0; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request)) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + return SNAT_IN2OUT_NEXT_DROP; + } + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + key0.addr = ip0->src_address; + key0.port = echo0->identifier; + key0.protocol = SNAT_PROTOCOL_ICMP; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + return next0; + + next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, + &s0, node, next0, cpu_index); + + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) + return next0; + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->src_address.as_u32; + ip0->src_address = s0->out2in.addr; + new_addr0 = ip0->src_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + old_id0 = echo0->identifier; + new_id0 = s0->out2in.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0); + /* Per-user LRU list maintenance for dynamic translations */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + + return next0; +} + +/** + * @brief Hairpinning + * + * Hairpinning allows two endpoints on the internal side of the NAT to + * communicate even if they only use each other's external IP addresses + * and ports. + * + * @param sm SNAT main. + * @param b0 Vlib buffer. + * @param ip0 IP header. + * @param udp0 UDP header. + * @param tcp0 TCP header. + * @param proto0 SNAT protocol. + */ +static inline void +snat_hairpinning (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + udp_header_t * udp0, + tcp_header_t * tcp0, + u32 proto0) +{ + snat_session_key_t key0, sm0; + snat_static_mapping_key_t k0; + snat_session_t * s0; + clib_bihash_kv_8_8_t kv0, value0; + ip_csum_t sum0; + u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si; + u16 new_dst_port0, old_dst_port0; + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = sm->outside_fib_index; + kv0.key = key0.as_u64; + + /* Check if destination is in active sessions */ + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* or static mappings */ + if (!snat_static_mapping_match(sm, key0, &sm0, 1)) + { + new_dst_addr0 = sm0.addr.as_u32; + new_dst_port0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + } + } + else + { + si = value0.value; + if (sm->num_workers > 1) + { + k0.addr = ip0->dst_address; + k0.port = udp0->dst_port; + k0.fib_index = sm->outside_fib_index; + kv0.key = k0.as_u64; + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + ASSERT(0); + else + ti = value0.value; + } + else + ti = sm->num_workers; + + s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); + new_dst_addr0 = s0->in2out.addr.as_u32; + new_dst_port0 = s0->in2out.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + } + + /* Destination is behind the same NAT, use internal address and port */ + if (new_dst_addr0) + { + old_dst_addr0 = ip0->dst_address.as_u32; + ip0->dst_address.as_u32 = new_dst_addr0; + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, + ip4_header_t, dst_address); + ip0->checksum = ip_csum_fold (sum0); + + old_dst_port0 = tcp0->ports.dst; + if (PREDICT_TRUE(new_dst_port0 != old_dst_port0)) + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + tcp0->ports.dst = new_dst_port0; + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, + ip4_header_t, dst_address); + sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0, + ip4_header_t /* cheat */, length); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + udp0->dst_port = new_dst_port0; + udp0->checksum = 0; + } + } + } +} + +static inline uword +snat_in2out_node_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_slow_path) +{ + u32 n_left_from, * from, * to_next; + snat_in2out_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + f64 now = vlib_time_now (vm); + u32 stats_node_index; + u32 cpu_index = os_get_cpu_number (); + + stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index : + snat_in2out_node.index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, * ip1; + ip_csum_t sum0, sum1; + u32 new_addr0, old_addr0, new_addr1, old_addr1; + u16 old_port0, new_port0, old_port1, new_port1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + icmp46_header_t * icmp0, * icmp1; + snat_session_key_t key0, key1; + u32 rx_fib_index0, rx_fib_index1; + u32 proto0, proto1; + snat_session_t * s0 = 0, * s1 = 0; + clib_bihash_kv_8_8_t kv0, value0, kv1, value1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP; + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + /* Next configured feature, probably ip4-lookup */ + if (is_slow_path) + { + if (PREDICT_FALSE (proto0 == ~0)) + goto trace00; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_in2out_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, + node, next0, now, cpu_index); + goto trace00; + } + } + else + { + if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } + } + + key0.addr = ip0->src_address; + key0.port = udp0->src_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0)) + { + if (is_slow_path) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + goto trace00; + + next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, + &s0, node, next0, cpu_index); + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) + goto trace00; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->src_address.as_u32; + ip0->src_address = s0->out2in.addr; + new_addr0 = ip0->src_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.src; + tcp0->ports.src = s0->out2in.port; + new_port0 = tcp0->ports.src; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->src_port; + udp0->src_port = s0->out2in.port; + udp0->checksum = 0; + } + + /* Hairpinning */ + snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0); + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = is_slow_path; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + icmp1 = (icmp46_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index1); + + proto1 = ~0; + proto1 = (ip1->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto1; + proto1 = (ip1->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto1; + proto1 = (ip1->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto1; + + /* Next configured feature, probably ip4-lookup */ + if (is_slow_path) + { + if (PREDICT_FALSE (proto1 == ~0)) + goto trace01; + + if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) + { + next1 = icmp_in2out_slow_path + (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, + next1, now, cpu_index); + goto trace01; + } + } + else + { + if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP)) + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } + } + + key1.addr = ip1->src_address; + key1.port = udp1->src_port; + key1.protocol = proto1; + key1.fib_index = rx_fib_index1; + + kv1.key = key1.as_u64; + + if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0)) + { + if (is_slow_path) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index1)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index1, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index1; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip1->dst_address.as_u32 == + rt->cached_ip4_address)) + goto trace01; + + next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1, + &s1, node, next1, cpu_index); + if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP)) + goto trace01; + } + else + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } + } + else + s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value1.value); + + old_addr1 = ip1->src_address.as_u32; + ip1->src_address = s1->out2in.addr; + new_addr1 = ip1->src_address.as_u32; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + src_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + old_port1 = tcp1->ports.src; + tcp1->ports.src = s1->out2in.port; + new_port1 = tcp1->ports.src; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */); + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + old_port1 = udp1->src_port; + udp1->src_port = s1->out2in.port; + udp1->checksum = 0; + } + + /* Hairpinning */ + snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1); + + /* Accounting */ + s1->last_heard = now; + s1->total_pkts++; + s1->total_bytes += vlib_buffer_length_in_chain (vm, b1); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s1)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_list_head_index, + s1->per_user_index); + } + trace01: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (s1) + t->session_index = s1 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 old_port0, new_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0; + u32 rx_fib_index0; + u32 proto0; + snat_session_t * s0 = 0; + clib_bihash_kv_8_8_t kv0, value0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + /* Next configured feature, probably ip4-lookup */ + if (is_slow_path) + { + if (PREDICT_FALSE (proto0 == ~0)) + goto trace0; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_in2out_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, cpu_index); + goto trace0; + } + } + else + { + if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } + } + + key0.addr = ip0->src_address; + key0.port = udp0->src_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) + { + if (is_slow_path) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + goto trace0; + + next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, + &s0, node, next0, cpu_index); + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) + goto trace0; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->src_address.as_u32; + ip0->src_address = s0->out2in.addr; + new_addr0 = ip0->src_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.src; + tcp0->ports.src = s0->out2in.port; + new_port0 = tcp0->ports.src; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->src_port; + udp0->src_port = s0->out2in.port; + udp0->checksum = 0; + } + + /* Hairpinning */ + snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0); + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + + trace0: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = is_slow_path; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, stats_node_index, + SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +static uword +snat_in2out_fast_path_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */); +} + +VLIB_REGISTER_NODE (snat_in2out_node) = { + .function = snat_in2out_fast_path_fn, + .name = "snat-in2out", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn); + +static uword +snat_in2out_slow_path_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */); +} + +VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { + .function = snat_in2out_slow_path_fn, + .name = "snat-in2out-slowpath", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn); + +static uword +snat_in2out_worker_handoff_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + snat_main_t *sm = &snat_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from, *to_next = 0; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + vlib_frame_t *f = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 cpu_index = os_get_cpu_number (); + + ASSERT (vec_len (sm->workers)); + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + sm->first_worker_index + sm->num_workers - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 sw_if_index0; + u32 rx_fib_index0; + ip4_header_t * ip0; + snat_user_key_t key0; + clib_bihash_kv_8_8_t kv0, value0; + u8 do_handoff; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + ip0 = vlib_buffer_get_current (b0); + + key0.addr = ip0->src_address; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + /* Ever heard of of the "user" before? */ + if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) + { + /* No, assign next available worker (RR) */ + next_worker_index = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + + /* add non-traslated packets worker lookup */ + kv0.value = next_worker_index; + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); + } + else + next_worker_index = value0.value; + + if (PREDICT_FALSE (next_worker_index != cpu_index)) + { + do_handoff = 1; + + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + } + else + { + do_handoff = 0; + /* if this is 1st frame */ + if (!f) + { + f = vlib_get_frame_to_node (vm, snat_in2out_node.index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next += 1; + f->n_vectors++; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_worker_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_worker_index = next_worker_index; + t->do_handoff = do_handoff; + } + } + + if (f) + vlib_put_frame_to_node (vm, snat_in2out_node.index, f); + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = { + .function = snat_in2out_worker_handoff_fn, + .name = "snat-in2out-worker-handoff", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_worker_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn); + +static inline u32 icmp_in2out_static_map (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + vlib_node_runtime_t * node, + u32 next0, + u32 rx_fib_index0) +{ + snat_session_key_t key0, sm0; + icmp_echo_header_t *echo0; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + key0.addr = ip0->src_address; + key0.port = echo0->identifier; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 0)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + return next0; + + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + return SNAT_IN2OUT_NEXT_DROP; + } + + new_addr0 = sm0.addr.as_u32; + new_id0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + old_addr0 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE(new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + } + + return next0; +} + +static uword +snat_in2out_fast_static_map_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_in2out_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + u32 stats_node_index; + + stats_node_index = snat_in2out_fast_node.index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 old_port0, new_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0, sm0; + u32 proto0; + u32 rx_fib_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + if (PREDICT_FALSE (proto0 == ~0)) + goto trace0; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + goto trace0; + + next0 = icmp_in2out_static_map + (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0); + goto trace0; + } + + key0.addr = ip0->src_address; + key0.port = udp0->src_port; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 0)) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + next0= SNAT_IN2OUT_NEXT_DROP; + goto trace0; + } + + new_addr0 = sm0.addr.as_u32; + new_port0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + old_addr0 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE(new_port0 != udp0->dst_port)) + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.src; + tcp0->ports.src = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->src_port; + udp0->src_port = new_port0; + udp0->checksum = 0; + } + } + else + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + } + + /* Hairpinning */ + snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0); + + trace0: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, stats_node_index, + SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, + pkts_processed); + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (snat_in2out_fast_node) = { + .function = snat_in2out_fast_static_map_fn, + .name = "snat-in2out-fast", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_fast_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn); diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c new file mode 100644 index 00000000000..f1f4159cdce --- /dev/null +++ b/src/plugins/snat/out2in.c @@ -0,0 +1,1261 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/handoff.h> + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/fib/ip4_fib.h> +#include <snat/snat.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 session_index; +} snat_out2in_trace_t; + +typedef struct { + u32 next_worker_index; + u8 do_handoff; +} snat_out2in_worker_handoff_trace_t; + +/* packet trace format function */ +static u8 * format_snat_out2in_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *); + + s = format (s, "SNAT_OUT2IN: sw_if_index %d, next index %d, session index %d", + t->sw_if_index, t->next_index, t->session_index); + return s; +} + +static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *); + + s = format (s, "SNAT_OUT2IN_FAST: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + return s; +} + +static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_worker_handoff_trace_t * t = + va_arg (*args, snat_out2in_worker_handoff_trace_t *); + char * m; + + m = t->do_handoff ? "next worker" : "same worker"; + s = format (s, "SNAT_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index); + + return s; +} + +vlib_node_registration_t snat_out2in_node; +vlib_node_registration_t snat_out2in_fast_node; +vlib_node_registration_t snat_out2in_worker_handoff_node; + +#define foreach_snat_out2in_error \ +_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ +_(OUT2IN_PACKETS, "Good out2in packets processed") \ +_(BAD_ICMP_TYPE, "icmp type not echo-reply") \ +_(NO_TRANSLATION, "No translation") + +typedef enum { +#define _(sym,str) SNAT_OUT2IN_ERROR_##sym, + foreach_snat_out2in_error +#undef _ + SNAT_OUT2IN_N_ERROR, +} snat_out2in_error_t; + +static char * snat_out2in_error_strings[] = { +#define _(sym,string) string, + foreach_snat_out2in_error +#undef _ +}; + +typedef enum { + SNAT_OUT2IN_NEXT_DROP, + SNAT_OUT2IN_NEXT_LOOKUP, + SNAT_OUT2IN_N_NEXT, +} snat_out2in_next_t; + +/** + * @brief Create session for static mapping. + * + * Create NAT session initiated by host from external network with static + * mapping. + * + * @param sm SNAT main. + * @param b0 Vlib buffer. + * @param in2out In2out SNAT session key. + * @param out2in Out2in SNAT session key. + * @param node Vlib node. + * + * @returns SNAT session if successfully created otherwise 0. + */ +static inline snat_session_t * +create_session_for_static_mapping (snat_main_t *sm, + vlib_buffer_t *b0, + snat_session_key_t in2out, + snat_session_key_t out2in, + vlib_node_runtime_t * node, + u32 cpu_index) +{ + snat_user_t *u; + snat_user_key_t user_key; + snat_session_t *s; + clib_bihash_kv_8_8_t kv0, value0; + dlist_elt_t * per_user_translation_list_elt; + dlist_elt_t * per_user_list_head_elt; + + user_key.addr = in2out.addr; + user_key.fib_index = in2out.fib_index; + kv0.key = user_key.as_u64; + + /* Ever heard of the "user" = inside ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) + { + /* no, make a new one */ + pool_get (sm->per_thread_data[cpu_index].users, u); + memset (u, 0, sizeof (*u)); + u->addr = in2out.addr; + + pool_get (sm->per_thread_data[cpu_index].list_pool, + per_user_list_head_elt); + + u->sessions_per_user_list_head_index = per_user_list_head_elt - + sm->per_thread_data[cpu_index].list_pool; + + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + u->sessions_per_user_list_head_index); + + kv0.value = u - sm->per_thread_data[cpu_index].users; + + /* add user */ + clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + + /* add non-traslated packets worker lookup */ + kv0.value = cpu_index; + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); + } + else + { + u = pool_elt_at_index (sm->per_thread_data[cpu_index].users, + value0.value); + } + + pool_get (sm->per_thread_data[cpu_index].sessions, s); + memset (s, 0, sizeof (*s)); + + s->outside_address_index = ~0; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + u->nstaticsessions++; + + /* Create list elts */ + pool_get (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt); + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); + + per_user_translation_list_elt->value = + s - sm->per_thread_data[cpu_index].sessions; + s->per_user_index = + per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s->per_user_list_head_index, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); + + s->in2out = in2out; + s->out2in = out2in; + s->in2out.protocol = out2in.protocol; + + /* Add to translation hashes */ + kv0.key = s->in2out.as_u64; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; + if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) + clib_warning ("in2out key add failed"); + + kv0.key = s->out2in.as_u64; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; + + if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) + clib_warning ("out2in key add failed"); + + return s; +} + +static inline u32 icmp_out2in_slow_path (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, f64 now, + u32 cpu_index) +{ + snat_session_key_t key0, sm0; + icmp_echo_header_t *echo0; + clib_bihash_kv_8_8_t kv0, value0; + snat_session_t * s0; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + key0.addr = ip0->dst_address; + key0.port = echo0->identifier; + key0.protocol = SNAT_PROTOCOL_ICMP; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + return next0; + + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + return SNAT_OUT2IN_NEXT_DROP; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, + node, cpu_index); + if (!s0) + return SNAT_OUT2IN_NEXT_DROP; + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + old_id0 = echo0->identifier; + new_id0 = s0->in2out.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + + return next0; +} + +static uword +snat_out2in_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_out2in_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + f64 now = vlib_time_now (vm); + u32 cpu_index = os_get_cpu_number (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, *ip1; + ip_csum_t sum0, sum1; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + u32 new_addr1, old_addr1; + u16 new_port1, old_port1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + icmp46_header_t * icmp0, * icmp1; + snat_session_key_t key0, key1, sm0, sm1; + u32 rx_fib_index0, rx_fib_index1; + u32 proto0, proto1; + snat_session_t * s0 = 0, * s1 = 0; + clib_bihash_kv_8_8_t kv0, kv1, value0, value1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + if (PREDICT_FALSE (proto0 == ~0)) + goto trace0; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, cpu_index); + goto trace0; + } + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace0; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, + cpu_index); + if (!s0) + goto trace0; + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.dst; + tcp0->ports.dst = s0->in2out.port; + new_port0 = tcp0->ports.dst; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = s0->in2out.port; + udp0->checksum = 0; + } + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + trace0: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + icmp1 = (icmp46_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index1); + + proto1 = ~0; + proto1 = (ip1->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto1; + proto1 = (ip1->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto1; + proto1 = (ip1->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto1; + + if (PREDICT_FALSE (proto1 == ~0)) + goto trace1; + + if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) + { + next1 = icmp_out2in_slow_path + (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, + next1, now, cpu_index); + goto trace1; + } + + key1.addr = ip1->dst_address; + key1.port = udp1->dst_port; + key1.protocol = proto1; + key1.fib_index = rx_fib_index1; + + kv1.key = key1.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv1, &value1)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key1, &sm1, 1)) + { + b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace1; + } + + /* Create session initiated by host from external network */ + s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node, + cpu_index); + if (!s1) + goto trace1; + } + else + s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value1.value); + + old_addr1 = ip1->dst_address.as_u32; + ip1->dst_address = s1->in2out.addr; + new_addr1 = ip1->dst_address.as_u32; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + old_port1 = tcp1->ports.dst; + tcp1->ports.dst = s1->in2out.port; + new_port1 = tcp1->ports.dst; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */); + + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + old_port1 = udp1->dst_port; + udp1->dst_port = s1->in2out.port; + udp1->checksum = 0; + } + + /* Accounting */ + s1->last_heard = now; + s1->total_pkts++; + s1->total_bytes += vlib_buffer_length_in_chain (vm, b1); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s1)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_list_head_index, + s1->per_user_index); + } + trace1: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (s1) + t->session_index = s1 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0, sm0; + u32 rx_fib_index0; + u32 proto0; + snat_session_t * s0 = 0; + clib_bihash_kv_8_8_t kv0, value0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + if (PREDICT_FALSE (proto0 == ~0)) + goto trace00; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, cpu_index); + goto trace00; + } + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace00; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, + cpu_index); + if (!s0) + goto trace00; + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.dst; + tcp0->ports.dst = s0->in2out.port; + new_port0 = tcp0->ports.dst; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = s0->in2out.port; + udp0->checksum = 0; + } + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_out2in_node.index, + SNAT_OUT2IN_ERROR_OUT2IN_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_node) = { + .function = snat_out2in_node_fn, + .name = "snat-out2in", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_OUT2IN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn); + +static uword +snat_out2in_worker_handoff_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + snat_main_t *sm = &snat_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from, *to_next = 0; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + vlib_frame_t *f = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 cpu_index = os_get_cpu_number (); + + ASSERT (vec_len (sm->workers)); + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + sm->first_worker_index + sm->num_workers - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 sw_if_index0; + u32 rx_fib_index0; + ip4_header_t * ip0; + udp_header_t * udp0; + snat_static_mapping_key_t key0; + clib_bihash_kv_8_8_t kv0, value0; + u8 do_handoff; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + /* Ever heard of of the "user" before? */ + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + { + key0.port = 0; + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + { + /* No, assign next available worker (RR) */ + next_worker_index = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + } + else + { + /* Static mapping without port */ + next_worker_index = value0.value; + } + + /* Add to translated packets worker lookup */ + kv0.value = next_worker_index; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); + } + else + next_worker_index = value0.value; + + if (PREDICT_FALSE (next_worker_index != cpu_index)) + { + do_handoff = 1; + + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + } + else + { + do_handoff = 0; + /* if this is 1st frame */ + if (!f) + { + f = vlib_get_frame_to_node (vm, snat_out2in_node.index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next += 1; + f->n_vectors++; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_worker_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_worker_index = next_worker_index; + t->do_handoff = do_handoff; + } + } + + if (f) + vlib_put_frame_to_node (vm, snat_out2in_node.index, f); + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = { + .function = snat_out2in_worker_handoff_fn, + .name = "snat-out2in-worker-handoff", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_worker_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn); + +static inline u32 icmp_out2in_fast (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + vlib_node_runtime_t * node, + u32 next0, + u32 rx_fib_index0) +{ + snat_session_key_t key0, sm0; + icmp_echo_header_t *echo0; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + key0.addr = ip0->dst_address; + key0.port = echo0->identifier; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + return next0; + + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + return SNAT_OUT2IN_NEXT_DROP; + } + + new_addr0 = sm0.addr.as_u32; + new_id0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE(new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + } + + return next0; +} + +static uword +snat_out2in_fast_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_out2in_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = SNAT_OUT2IN_NEXT_DROP; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0, sm0; + u32 proto0; + u32 rx_fib_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + vnet_feature_next (sw_if_index0, &next0, b0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + if (PREDICT_FALSE (proto0 == ~0)) + goto trace00; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_fast + (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0); + goto trace00; + } + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace00; + } + + new_addr0 = sm0.addr.as_u32; + new_port0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE(new_port0 != udp0->dst_port)) + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.dst; + tcp0->ports.dst = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = new_port0; + udp0->checksum = 0; + } + } + else + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + tcp0->checksum = ip_csum_fold(sum0); + } + } + + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_out2in_fast_node.index, + SNAT_OUT2IN_ERROR_OUT2IN_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_fast_node) = { + .function = snat_out2in_fast_node_fn, + .name = "snat-out2in-fast", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_fast_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_OUT2IN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn); diff --git a/src/plugins/snat/snat.api b/src/plugins/snat/snat.api new file mode 100644 index 00000000000..a191eed5944 --- /dev/null +++ b/src/plugins/snat/snat.api @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file snat.api + * @brief VPP control-plane API messages. + * + * This file defines VPP control-plane API messages which are generally + * called through a shared memory interface. + */ + +/** \brief Add/del S-NAT address range + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ip4 - 1 if address type is IPv4 + @first_ip_address - first IP address + @last_ip_address - last IP address + @is_add - 1 if add, 0 if delete +*/ +define snat_add_address_range { + u32 client_index; + u32 context; + u8 is_ip4; + u8 first_ip_address[16]; + u8 last_ip_address[16]; + u8 is_add; +}; + +/** \brief Add S-NAT address range reply + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define snat_add_address_range_reply { + u32 context; + i32 retval; +}; + +/** \brief Dump S-NAT addresses + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_address_dump { + u32 client_index; + u32 context; +}; + +/** \brief S-NAT address details response + @param context - sender context, to match reply w/ request + @param is_ip4 - 1 if address type is IPv4 + @param ip_address - IP address +*/ +define snat_address_details { + u32 context; + u8 is_ip4; + u8 ip_address[16]; +}; + +/** \brief Enable/disable S-NAT feature on the interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - 1 if add, 0 if delete + @param is_inside - 1 if inside, 0 if outside + @param sw_if_index - software index of the interface +*/ +define snat_interface_add_del_feature { + u32 client_index; + u32 context; + u8 is_add; + u8 is_inside; + u32 sw_if_index; +}; + +/** \brief Enable/disable S-NAT feature on the interface reply + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define snat_interface_add_del_feature_reply { + u32 context; + i32 retval; +}; + +/** \brief Dump interfaces with S-NAT feature + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_interface_dump { + u32 client_index; + u32 context; +}; + +/** \brief S-NAT interface details response + @param context - sender context, to match reply w/ request + @param is_inside - 1 if inside, 0 if outside + @param sw_if_index - software index of the interface +*/ +define snat_interface_details { + u32 context; + u8 is_inside; + u32 sw_if_index; +}; + +/** \brief Add/delete S-NAT static mapping + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - 1 if add, 0 if delete + @param is_ip4 - 1 if address type is IPv4 + @param addr_only - 1 if address only mapping + @param local_ip_address - local IP address + @param external_ip_address - external IP address + @param local_port - local port number + @param external_port - external port number + @param vfr_id - VRF ID +*/ +define snat_add_static_mapping { + u32 client_index; + u32 context; + u8 is_add; + u8 is_ip4; + u8 addr_only; + u8 local_ip_address[16]; + u8 external_ip_address[16]; + u16 local_port; + u16 external_port; + u32 vrf_id; +}; + +/** \brief Add/delete S-NAT static mapping reply + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define snat_add_static_mapping_reply { + u32 context; + i32 retval; +}; + +/** \brief Dump S-NAT static mappings + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_static_mapping_dump { + u32 client_index; + u32 context; +}; + +/** \brief S-NAT static mapping details response + @param context - sender context, to match reply w/ request + @param is_ip4 - 1 if address type is IPv4 + @param addr_only - 1 if address only mapping + @param local_ip_address - local IP address + @param external_ip_address - external IP address + @param local_port - local port number + @param external_port - external port number + @param vfr_id - VRF ID +*/ +define snat_static_mapping_details { + u32 context; + u8 is_ip4; + u8 addr_only; + u8 local_ip_address[16]; + u8 external_ip_address[16]; + u16 local_port; + u16 external_port; + u32 vrf_id; +}; + +/** \brief Control ping from client to api server request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_control_ping +{ + u32 client_index; + u32 context; +}; + +/** \brief Control ping from the client to the server response + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param vpe_pid - the pid of the vpe, returned by the server +*/ +define snat_control_ping_reply +{ + u32 context; + i32 retval; + u32 client_index; + u32 vpe_pid; +}; + +/** \brief Show S-NAT plugin startup config + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_show_config +{ + u32 client_index; + u32 context; +}; + +/** \brief Show S-NAT plugin startup config reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param static_mapping_only - if 1 dynamic translations disabled + @param static_mapping_connection_tracking - if 1 create session data + @param translation_buckets - number of translation hash buckets + @param translation_memory_size - translation hash memory size + @param user_buckets - number of user hash buckets + @param user_memory_size - user hash memory size + @param max_translations_per_user - maximum number of translations per user + @param outside_vrf_id - outside VRF id + @param inside_vrf_id - default inside VRF id +*/ +define snat_show_config_reply +{ + u32 context; + i32 retval; + u8 static_mapping_only; + u8 static_mapping_connection_tracking; + u32 translation_buckets; + u32 translation_memory_size; + u32 user_buckets; + u32 user_memory_size; + u32 max_translations_per_user; + u32 outside_vrf_id; + u32 inside_vrf_id; +}; + +/** \brief Set S-NAT workers + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param worker_mask - S-NAT workers mask +*/ +define snat_set_workers { + u32 client_index; + u32 context; + u64 worker_mask; +}; + +/** \brief Set S-NAT workers reply + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define snat_set_workers_reply { + u32 context; + i32 retval; +}; + +/** \brief Dump S-NAT workers + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_worker_dump { + u32 client_index; + u32 context; +}; + +/** \brief S-NAT workers details response + @param context - sender context, to match reply w/ request + @param worker_index - worker index + @param lcore_id - lcore ID + @param name - worker name +*/ +define snat_worker_details { + u32 context; + u32 worker_index; + u32 lcore_id; + u8 name[64]; +}; diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c new file mode 100644 index 00000000000..bc9956841d2 --- /dev/null +++ b/src/plugins/snat/snat.c @@ -0,0 +1,1957 @@ +/* + * snat.c - simple nat plugin + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <vlibapi/api.h> +#include <snat/snat.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +snat_main_t snat_main; + +/* define message IDs */ +#include <snat/snat_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <snat/snat_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <snat/snat_all_api_h.h> +#undef vl_endianfun + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <snat/snat_all_api_h.h> +#undef vl_api_version + +/* Macro to finish up custom dump fns */ +#define FINISH \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); \ + return handle; + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define REPLY_MACRO2(t, body) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + + +/* Hook up input features */ +VNET_FEATURE_INIT (ip4_snat_in2out, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-in2out", + .runs_before = VNET_FEATURES ("snat-out2in"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-out2in", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out_worker_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-in2out-worker-handoff", + .runs_before = VNET_FEATURES ("snat-out2in-worker-handoff"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in_worker_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-out2in-worker-handoff", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-in2out-fast", + .runs_before = VNET_FEATURES ("snat-out2in-fast"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-out2in-fast", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; + + +/* + * This routine exists to convince the vlib plugin framework that + * we haven't accidentally copied a random .dll into the plugin directory. + * + * Also collects global variable pointers passed from the vpp engine + */ + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, + int from_early_init) +{ + snat_main_t * sm = &snat_main; + clib_error_t * error = 0; + + sm->vlib_main = vm; + sm->vnet_main = h->vnet_main; + sm->ethernet_main = h->ethernet_main; + + return error; +} + +/*$$$$$ move to an installed header file */ +#if (1 || CLIB_DEBUG > 0) /* "trust, but verify" */ + +#define VALIDATE_SW_IF_INDEX(mp) \ + do { u32 __sw_if_index = ntohl(mp->sw_if_index); \ + vnet_main_t *__vnm = vnet_get_main(); \ + if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \ + __sw_if_index)) { \ + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \ + goto bad_sw_if_index; \ + } \ +} while(0); + +#define BAD_SW_IF_INDEX_LABEL \ +do { \ +bad_sw_if_index: \ + ; \ +} while (0); + +#define VALIDATE_RX_SW_IF_INDEX(mp) \ + do { u32 __rx_sw_if_index = ntohl(mp->rx_sw_if_index); \ + vnet_main_t *__vnm = vnet_get_main(); \ + if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \ + __rx_sw_if_index)) { \ + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \ + goto bad_rx_sw_if_index; \ + } \ +} while(0); + +#define BAD_RX_SW_IF_INDEX_LABEL \ +do { \ +bad_rx_sw_if_index: \ + ; \ +} while (0); + +#define VALIDATE_TX_SW_IF_INDEX(mp) \ + do { u32 __tx_sw_if_index = ntohl(mp->tx_sw_if_index); \ + vnet_main_t *__vnm = vnet_get_main(); \ + if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \ + __tx_sw_if_index)) { \ + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \ + goto bad_tx_sw_if_index; \ + } \ +} while(0); + +#define BAD_TX_SW_IF_INDEX_LABEL \ +do { \ +bad_tx_sw_if_index: \ + ; \ +} while (0); + +#else + +#define VALIDATE_SW_IF_INDEX(mp) +#define BAD_SW_IF_INDEX_LABEL +#define VALIDATE_RX_SW_IF_INDEX(mp) +#define BAD_RX_SW_IF_INDEX_LABEL +#define VALIDATE_TX_SW_IF_INDEX(mp) +#define BAD_TX_SW_IF_INDEX_LABEL + +#endif /* CLIB_DEBUG > 0 */ + +void snat_add_address (snat_main_t *sm, ip4_address_t *addr) +{ + snat_address_t * ap; + + /* Check if address already exists */ + vec_foreach (ap, sm->addresses) + { + if (ap->addr.as_u32 == addr->as_u32) + return; + } + + vec_add2 (sm->addresses, ap, 1); + ap->addr = *addr; + clib_bitmap_alloc (ap->busy_port_bitmap, 65535); +} + +static int is_snat_address_used_in_static_mapping (snat_main_t *sm, + ip4_address_t addr) +{ + snat_static_mapping_t *m; + pool_foreach (m, sm->static_mappings, + ({ + if (m->external_addr.as_u32 == addr.as_u32) + return 1; + })); + + return 0; +} + +int snat_del_address (snat_main_t *sm, ip4_address_t addr) +{ + snat_address_t *a = 0; + snat_session_t *ses; + u32 *ses_to_be_removed = 0, *ses_index; + clib_bihash_kv_8_8_t kv, value; + snat_user_key_t user_key; + snat_user_t *u; + snat_main_per_thread_data_t *tsm; + + int i; + + /* Find SNAT address */ + for (i=0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == addr.as_u32) + { + a = sm->addresses + i; + break; + } + } + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* Check if address is used in some static mapping */ + if (is_snat_address_used_in_static_mapping(sm, addr)) + { + clib_warning ("address used in static mapping"); + return VNET_API_ERROR_UNSPECIFIED; + } + + /* Delete sessions using address */ + if (a->busy_ports) + { + vec_foreach (tsm, sm->per_thread_data) + { + pool_foreach (ses, tsm->sessions, ({ + if (ses->out2in.addr.as_u32 == addr.as_u32) + { + vec_add1 (ses_to_be_removed, ses - tsm->sessions); + kv.key = ses->in2out.as_u64; + clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0); + kv.key = ses->out2in.as_u64; + clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0); + clib_dlist_remove (tsm->list_pool, ses->per_user_index); + user_key.addr = ses->in2out.addr; + user_key.fib_index = ses->in2out.fib_index; + kv.key = user_key.as_u64; + if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + u = pool_elt_at_index (tsm->users, value.value); + u->nsessions--; + } + } + })); + + vec_foreach (ses_index, ses_to_be_removed) + pool_put_index (tsm->sessions, ses_index[0]); + + vec_free (ses_to_be_removed); + } + } + + vec_del1 (sm->addresses, i); + + return 0; +} + +static void increment_v4_address (ip4_address_t * a) +{ + u32 v; + + v = clib_net_to_host_u32(a->as_u32) + 1; + a->as_u32 = clib_host_to_net_u32(v); +} + +/** + * @brief Add static mapping. + * + * Create static mapping between local addr+port and external addr+port. + * + * @param l_addr Local IPv4 address. + * @param e_addr External IPv4 address. + * @param l_port Local port number. + * @param e_port External port number. + * @param vrf_id VRF ID. + * @param addr_only If 0 address port and pair mapping, otherwise address only. + * @param is_add If 0 delete static mapping, otherwise add. + * + * @returns + */ +int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, + u16 l_port, u16 e_port, u32 vrf_id, int addr_only, + int is_add) +{ + snat_main_t * sm = &snat_main; + snat_static_mapping_t *m; + snat_static_mapping_key_t m_key; + clib_bihash_kv_8_8_t kv, value; + snat_address_t *a = 0; + u32 fib_index = ~0; + uword * p; + int i; + + /* If outside FIB index is not resolved yet */ + if (sm->outside_fib_index == ~0) + { + p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id); + if (!p) + return VNET_API_ERROR_NO_SUCH_FIB; + sm->outside_fib_index = p[0]; + } + + m_key.addr = e_addr; + m_key.port = addr_only ? 0 : e_port; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + m = 0; + else + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (is_add) + { + if (m) + return VNET_API_ERROR_VALUE_EXIST; + + /* Convert VRF id to FIB index */ + if (vrf_id != ~0) + { + p = hash_get (sm->ip4_main->fib_index_by_table_id, vrf_id); + if (!p) + return VNET_API_ERROR_NO_SUCH_FIB; + fib_index = p[0]; + } + /* If not specified use inside VRF id from SNAT plugin startup config */ + else + { + if (sm->inside_fib_index == ~0) + { + p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->inside_vrf_id); + if (!p) + return VNET_API_ERROR_NO_SUCH_FIB; + fib_index = p[0]; + sm->inside_fib_index = fib_index; + } + else + fib_index = sm->inside_fib_index; + + vrf_id = sm->inside_vrf_id; + } + + /* Find external address in allocated addresses and reserve port for + address and port pair mapping when dynamic translations enabled */ + if (!addr_only && !(sm->static_mapping_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + /* External port must be unused */ + if (clib_bitmap_get_no_check (a->busy_port_bitmap, e_port)) + return VNET_API_ERROR_INVALID_VALUE; + clib_bitmap_set_no_check (a->busy_port_bitmap, e_port, 1); + if (e_port > 1024) + a->busy_ports++; + + break; + } + } + /* External address must be allocated */ + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + pool_get (sm->static_mappings, m); + memset (m, 0, sizeof (*m)); + m->local_addr = l_addr; + m->external_addr = e_addr; + m->addr_only = addr_only; + m->vrf_id = vrf_id; + m->fib_index = fib_index; + if (!addr_only) + { + m->local_port = l_port; + m->external_port = e_port; + } + + m_key.addr = m->local_addr; + m_key.port = m->local_port; + m_key.fib_index = m->fib_index; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1); + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1); + + /* Assign worker */ + if (sm->workers) + { + snat_user_key_t w_key0; + snat_static_mapping_key_t w_key1; + + w_key0.addr = m->local_addr; + w_key0.fib_index = m->fib_index; + kv.key = w_key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + { + kv.value = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1); + } + else + { + kv.value = value.value; + } + + w_key1.addr = m->external_addr; + w_key1.port = clib_host_to_net_u16 (m->external_port); + w_key1.fib_index = sm->outside_fib_index; + kv.key = w_key1.as_u64; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1); + } + } + else + { + if (!m) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* Free external address port */ + if (!addr_only && !(sm->static_mapping_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + clib_bitmap_set_no_check (a->busy_port_bitmap, e_port, 0); + a->busy_ports--; + + break; + } + } + } + + m_key.addr = m->local_addr; + m_key.port = m->local_port; + m_key.fib_index = m->fib_index; + kv.key = m_key.as_u64; + clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0); + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0); + + /* Delete session(s) for static mapping if exist */ + if (!(sm->static_mapping_only) || + (sm->static_mapping_only && sm->static_mapping_connection_tracking)) + { + snat_user_key_t u_key; + snat_user_t *u; + dlist_elt_t * head, * elt; + u32 elt_index, head_index, del_elt_index; + u32 ses_index; + u64 user_index; + snat_session_t * s; + snat_main_per_thread_data_t *tsm; + + u_key.addr = m->local_addr; + u_key.fib_index = m->fib_index; + kv.key = u_key.as_u64; + if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + user_index = value.value; + if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + tsm = vec_elt_at_index (sm->per_thread_data, value.value); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + u = pool_elt_at_index (tsm->users, user_index); + if (u->nstaticsessions) + { + head_index = u->sessions_per_user_list_head_index; + head = pool_elt_at_index (tsm->list_pool, head_index); + elt_index = head->next; + elt = pool_elt_at_index (tsm->list_pool, elt_index); + ses_index = elt->value; + while (ses_index != ~0) + { + s = pool_elt_at_index (tsm->sessions, ses_index); + del_elt_index = elt_index; + elt_index = elt->next; + elt = pool_elt_at_index (tsm->list_pool, elt_index); + ses_index = elt->value; + + if (!addr_only) + { + if ((s->out2in.addr.as_u32 != e_addr.as_u32) && + (clib_net_to_host_u16 (s->out2in.port) != e_port)) + continue; + } + + value.key = s->in2out.as_u64; + clib_bihash_add_del_8_8 (&sm->in2out, &value, 0); + value.key = s->out2in.as_u64; + clib_bihash_add_del_8_8 (&sm->out2in, &value, 0); + pool_put (tsm->sessions, s); + + clib_dlist_remove (tsm->list_pool, del_elt_index); + pool_put_index (tsm->list_pool, del_elt_index); + u->nstaticsessions--; + + if (!addr_only) + break; + } + if (addr_only) + { + pool_put (tsm->users, u); + clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 0); + } + } + } + } + + /* Delete static mapping from pool */ + pool_put (sm->static_mappings, m); + } + + return 0; +} + +static int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del) +{ + snat_main_t *sm = &snat_main; + snat_interface_t *i; + const char * feature_name; + + if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) + feature_name = is_inside ? "snat-in2out-fast" : "snat-out2in-fast"; + else + { + if (sm->num_workers > 1) + feature_name = is_inside ? "snat-in2out-worker-handoff" : "snat-out2in-worker-handoff"; + else + feature_name = is_inside ? "snat-in2out" : "snat-out2in"; + } + + vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, + !is_del, 0, 0); + + if (sm->fq_in2out_index == ~0) + sm->fq_in2out_index = vlib_frame_queue_main_init (snat_in2out_node.index, 0); + + if (sm->fq_out2in_index == ~0) + sm->fq_out2in_index = vlib_frame_queue_main_init (snat_out2in_node.index, 0); + + pool_foreach (i, sm->interfaces, + ({ + if (i->sw_if_index == sw_if_index) + { + if (is_del) + pool_put (sm->interfaces, i); + else + return VNET_API_ERROR_VALUE_EXIST; + + return 0; + } + })); + + if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + pool_get (sm->interfaces, i); + i->sw_if_index = sw_if_index; + i->is_inside = is_inside; + + return 0; +} + +static int snat_set_workers (uword * bitmap) +{ + snat_main_t *sm = &snat_main; + int i; + + if (sm->num_workers < 2) + return VNET_API_ERROR_FEATURE_DISABLED; + + if (clib_bitmap_last_set (bitmap) >= sm->num_workers) + return VNET_API_ERROR_INVALID_WORKER; + + vec_free (sm->workers); + clib_bitmap_foreach (i, bitmap, + ({ + vec_add1(sm->workers, i); + })); + + return 0; +} + +static void +vl_api_snat_add_address_range_t_handler +(vl_api_snat_add_address_range_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_add_address_range_reply_t * rmp; + ip4_address_t this_addr; + u32 start_host_order, end_host_order; + int i, count; + int rv = 0; + u32 * tmp; + + if (mp->is_ip4 != 1) + { + rv = VNET_API_ERROR_UNIMPLEMENTED; + goto send_reply; + } + + if (sm->static_mapping_only) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto send_reply; + } + + tmp = (u32 *) mp->first_ip_address; + start_host_order = clib_host_to_net_u32 (tmp[0]); + tmp = (u32 *) mp->last_ip_address; + end_host_order = clib_host_to_net_u32 (tmp[0]); + + count = (end_host_order - start_host_order) + 1; + + if (count > 1024) + clib_warning ("%U - %U, %d addresses...", + format_ip4_address, mp->first_ip_address, + format_ip4_address, mp->last_ip_address, + count); + + memcpy (&this_addr.as_u8, mp->first_ip_address, 4); + + for (i = 0; i < count; i++) + { + if (mp->is_add) + snat_add_address (sm, &this_addr); + else + rv = snat_del_address (sm, this_addr); + + if (rv) + goto send_reply; + + increment_v4_address (&this_addr); + } + + send_reply: + REPLY_MACRO (VL_API_SNAT_ADD_ADDRESS_RANGE_REPLY); +} + +static void *vl_api_snat_add_address_range_t_print +(vl_api_snat_add_address_range_t *mp, void * handle) +{ + u8 * s; + + s = format (0, "SCRIPT: snat_add_address_range "); + s = format (s, "%U ", format_ip4_address, mp->first_ip_address); + if (memcmp (mp->first_ip_address, mp->last_ip_address, 4)) + { + s = format (s, " - %U ", format_ip4_address, mp->last_ip_address); + } + FINISH; +} + +static void +send_snat_address_details +(snat_address_t * a, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_snat_address_details_t *rmp; + snat_main_t * sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_SNAT_ADDRESS_DETAILS+sm->msg_id_base); + rmp->is_ip4 = 1; + clib_memcpy (rmp->ip_address, &(a->addr), 4); + rmp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_snat_address_dump_t_handler +(vl_api_snat_address_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t * sm = &snat_main; + snat_address_t * a; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vec_foreach (a, sm->addresses) + send_snat_address_details (a, q, mp->context); +} + +static void *vl_api_snat_address_dump_t_print +(vl_api_snat_address_dump_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_address_dump "); + + FINISH; +} + +static void +vl_api_snat_interface_add_del_feature_t_handler +(vl_api_snat_interface_add_del_feature_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_interface_add_del_feature_reply_t * rmp; + u8 is_del = mp->is_add == 0; + u32 sw_if_index = ntohl(mp->sw_if_index); + int rv = 0; + + VALIDATE_SW_IF_INDEX(mp); + + rv = snat_interface_add_del (sw_if_index, mp->is_inside, is_del); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO(VL_API_SNAT_INTERFACE_ADD_DEL_FEATURE_REPLY); +} + +static void *vl_api_snat_interface_add_del_feature_t_print +(vl_api_snat_interface_add_del_feature_t * mp, void *handle) +{ + u8 * s; + + s = format (0, "SCRIPT: snat_interface_add_del_feature "); + s = format (s, "sw_if_index %d %s %s", + clib_host_to_net_u32(mp->sw_if_index), + mp->is_inside ? "in":"out", + mp->is_add ? "" : "del"); + + FINISH; +} + +static void +send_snat_interface_details +(snat_interface_t * i, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_snat_interface_details_t *rmp; + snat_main_t * sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_SNAT_INTERFACE_DETAILS+sm->msg_id_base); + rmp->sw_if_index = ntohl (i->sw_if_index); + rmp->is_inside = i->is_inside; + rmp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_snat_interface_dump_t_handler +(vl_api_snat_interface_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t * sm = &snat_main; + snat_interface_t * i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + pool_foreach (i, sm->interfaces, + ({ + send_snat_interface_details(i, q, mp->context); + })); +} + +static void *vl_api_snat_interface_dump_t_print +(vl_api_snat_interface_dump_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_interface_dump "); + + FINISH; +}static void + +vl_api_snat_add_static_mapping_t_handler +(vl_api_snat_add_static_mapping_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_add_static_mapping_reply_t * rmp; + ip4_address_t local_addr, external_addr; + u16 local_port = 0, external_port = 0; + u32 vrf_id; + int rv = 0; + + if (mp->is_ip4 != 1) + { + rv = VNET_API_ERROR_UNIMPLEMENTED; + goto send_reply; + } + + memcpy (&local_addr.as_u8, mp->local_ip_address, 4); + memcpy (&external_addr.as_u8, mp->external_ip_address, 4); + if (mp->addr_only == 0) + { + local_port = clib_net_to_host_u16 (mp->local_port); + external_port = clib_net_to_host_u16 (mp->external_port); + } + vrf_id = clib_net_to_host_u32 (mp->vrf_id); + + rv = snat_add_static_mapping(local_addr, external_addr, local_port, + external_port, vrf_id, mp->addr_only, + mp->is_add); + + send_reply: + REPLY_MACRO (VL_API_SNAT_ADD_ADDRESS_RANGE_REPLY); +} + +static void *vl_api_snat_add_static_mapping_t_print +(vl_api_snat_add_static_mapping_t *mp, void * handle) +{ + u8 * s; + + s = format (0, "SCRIPT: snat_add_static_mapping "); + s = format (s, "local_addr %U external_addr %U ", + format_ip4_address, mp->local_ip_address, + format_ip4_address, mp->external_ip_address); + + if (mp->addr_only == 0) + s = format (s, "local_port %d external_port %d ", + clib_net_to_host_u16 (mp->local_port), + clib_net_to_host_u16 (mp->external_port)); + + if (mp->vrf_id != ~0) + s = format (s, "vrf %d", clib_net_to_host_u32 (mp->vrf_id)); + + FINISH; +} + +static void +send_snat_static_mapping_details +(snat_static_mapping_t * m, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_snat_static_mapping_details_t *rmp; + snat_main_t * sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_SNAT_STATIC_MAPPING_DETAILS+sm->msg_id_base); + rmp->is_ip4 = 1; + rmp->addr_only = m->addr_only; + clib_memcpy (rmp->local_ip_address, &(m->local_addr), 4); + clib_memcpy (rmp->external_ip_address, &(m->external_addr), 4); + rmp->local_port = htons (m->local_port); + rmp->external_port = htons (m->external_port); + rmp->vrf_id = htonl (m->vrf_id); + rmp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_snat_static_mapping_dump_t_handler +(vl_api_snat_static_mapping_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t * sm = &snat_main; + snat_static_mapping_t * m; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + pool_foreach (m, sm->static_mappings, + ({ + send_snat_static_mapping_details (m, q, mp->context); + })); +} + +static void *vl_api_snat_static_mapping_dump_t_print +(vl_api_snat_static_mapping_dump_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_static_mapping_dump "); + + FINISH; +} + +static void +vl_api_snat_control_ping_t_handler +(vl_api_snat_control_ping_t * mp) +{ + vl_api_snat_control_ping_reply_t *rmp; + snat_main_t * sm = &snat_main; + int rv = 0; + + REPLY_MACRO2(VL_API_SNAT_CONTROL_PING_REPLY, + ({ + rmp->vpe_pid = ntohl (getpid()); + })); +} + +static void *vl_api_snat_control_ping_t_print +(vl_api_snat_control_ping_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_control_ping "); + + FINISH; +} + +static void +vl_api_snat_show_config_t_handler +(vl_api_snat_show_config_t * mp) +{ + vl_api_snat_show_config_reply_t *rmp; + snat_main_t * sm = &snat_main; + int rv = 0; + + REPLY_MACRO2(VL_API_SNAT_SHOW_CONFIG_REPLY, + ({ + rmp->translation_buckets = htonl (sm->translation_buckets); + rmp->translation_memory_size = htonl (sm->translation_memory_size); + rmp->user_buckets = htonl (sm->user_buckets); + rmp->user_memory_size = htonl (sm->user_memory_size); + rmp->max_translations_per_user = htonl (sm->max_translations_per_user); + rmp->outside_vrf_id = htonl (sm->outside_vrf_id); + rmp->inside_vrf_id = htonl (sm->inside_vrf_id); + rmp->static_mapping_only = sm->static_mapping_only; + rmp->static_mapping_connection_tracking = + sm->static_mapping_connection_tracking; + })); +} + +static void *vl_api_snat_show_config_t_print +(vl_api_snat_show_config_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_show_config "); + + FINISH; +} + +static void +vl_api_snat_set_workers_t_handler +(vl_api_snat_set_workers_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_set_workers_reply_t * rmp; + int rv = 0; + uword *bitmap = 0; + u64 mask = clib_net_to_host_u64 (mp->worker_mask); + + if (sm->num_workers < 2) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto send_reply; + } + + bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask)); + rv = snat_set_workers(bitmap); + clib_bitmap_free (bitmap); + + send_reply: + REPLY_MACRO (VL_API_SNAT_SET_WORKERS_REPLY); +} + +static void *vl_api_snat_set_workers_t_print +(vl_api_snat_set_workers_t *mp, void * handle) +{ + u8 * s; + uword *bitmap = 0; + u8 first = 1; + int i; + u64 mask = clib_net_to_host_u64 (mp->worker_mask); + + s = format (0, "SCRIPT: snat_set_workers "); + bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask)); + clib_bitmap_foreach (i, bitmap, + ({ + if (first) + s = format (s, "%d", i); + else + s = format (s, ",%d", i); + first = 0; + })); + clib_bitmap_free (bitmap); + FINISH; +} + +static void +send_snat_worker_details +(u32 worker_index, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_snat_worker_details_t *rmp; + snat_main_t * sm = &snat_main; + vlib_worker_thread_t *w = + vlib_worker_threads + worker_index + sm->first_worker_index; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_SNAT_WORKER_DETAILS+sm->msg_id_base); + rmp->context = context; + rmp->worker_index = htonl (worker_index); + rmp->lcore_id = htonl (w->lcore_id); + strncpy ((char *) rmp->name, (char *) w->name, ARRAY_LEN (rmp->name) - 1); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_snat_worker_dump_t_handler +(vl_api_snat_worker_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t * sm = &snat_main; + u32 * worker_index; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vec_foreach (worker_index, sm->workers) + { + send_snat_worker_details(*worker_index, q, mp->context); + } +} + +static void *vl_api_snat_worker_dump_t_print +(vl_api_snat_worker_dump_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_worker_dump "); + + FINISH; +} + +/* List of message types that this plugin understands */ +#define foreach_snat_plugin_api_msg \ +_(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range) \ +_(SNAT_INTERFACE_ADD_DEL_FEATURE, snat_interface_add_del_feature) \ +_(SNAT_ADD_STATIC_MAPPING, snat_add_static_mapping) \ +_(SNAT_CONTROL_PING, snat_control_ping) \ +_(SNAT_STATIC_MAPPING_DUMP, snat_static_mapping_dump) \ +_(SNAT_SHOW_CONFIG, snat_show_config) \ +_(SNAT_ADDRESS_DUMP, snat_address_dump) \ +_(SNAT_INTERFACE_DUMP, snat_interface_dump) \ +_(SNAT_SET_WORKERS, snat_set_workers) \ +_(SNAT_WORKER_DUMP, snat_worker_dump) + +/* Set up the API message handling tables */ +static clib_error_t * +snat_plugin_api_hookup (vlib_main_t *vm) +{ + snat_main_t * sm __attribute__ ((unused)) = &snat_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_snat_plugin_api_msg; +#undef _ + + return 0; +} + +#define vl_msg_name_crc_list +#include <snat/snat_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (snat_main_t * sm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base); + foreach_vl_msg_name_crc_snat; +#undef _ +} + +static void plugin_custom_dump_configure (snat_main_t * sm) +{ +#define _(n,f) sm->api_main->msg_print_handlers \ + [VL_API_##n + sm->msg_id_base] \ + = (void *) vl_api_##f##_t_print; + foreach_snat_plugin_api_msg; +#undef _ +} + +static clib_error_t * snat_init (vlib_main_t * vm) +{ + snat_main_t * sm = &snat_main; + clib_error_t * error = 0; + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + u8 * name; + uword *p; + vlib_thread_registration_t *tr; + vlib_thread_main_t *tm = vlib_get_thread_main (); + uword *bitmap = 0; + u32 i; + + name = format (0, "snat_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + sm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + + sm->vlib_main = vm; + sm->vnet_main = vnet_get_main(); + sm->ip4_main = im; + sm->ip4_lookup_main = lm; + sm->api_main = &api_main; + sm->first_worker_index = 0; + sm->next_worker = 0; + sm->num_workers = 0; + sm->workers = 0; + sm->fq_in2out_index = ~0; + sm->fq_out2in_index = ~0; + + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + if (p) + { + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + sm->num_workers = tr->count; + sm->first_worker_index = tr->first_index; + } + } + + /* Use all available workers by default */ + if (sm->num_workers > 1) + { + for (i=0; i < sm->num_workers; i++) + bitmap = clib_bitmap_set (bitmap, i, 1); + snat_set_workers(bitmap); + clib_bitmap_free (bitmap); + } + + error = snat_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (sm, &api_main); + + plugin_custom_dump_configure (sm); + vec_free(name); + + return error; +} + +VLIB_INIT_FUNCTION (snat_init); + +void snat_free_outside_address_and_port (snat_main_t * sm, + snat_session_key_t * k, + u32 address_index) +{ + snat_address_t *a; + u16 port_host_byte_order = clib_net_to_host_u16 (k->port); + + ASSERT (address_index < vec_len (sm->addresses)); + + a = sm->addresses + address_index; + + ASSERT (clib_bitmap_get_no_check (a->busy_port_bitmap, + port_host_byte_order) == 1); + + clib_bitmap_set_no_check (a->busy_port_bitmap, port_host_byte_order, 0); + a->busy_ports--; +} + +/** + * @brief Match SNAT static mapping. + * + * @param sm SNAT main. + * @param match Address and port to match. + * @param mapping External or local address and port of the matched mapping. + * @param by_external If 0 match by local address otherwise match by external + * address. + * + * @returns 0 if match found otherwise 1. + */ +int snat_static_mapping_match (snat_main_t * sm, + snat_session_key_t match, + snat_session_key_t * mapping, + u8 by_external) +{ + clib_bihash_kv_8_8_t kv, value; + snat_static_mapping_t *m; + snat_static_mapping_key_t m_key; + clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local; + + if (by_external) + mapping_hash = &sm->static_mapping_by_external; + + m_key.addr = match.addr; + m_key.port = clib_net_to_host_u16 (match.port); + m_key.fib_index = match.fib_index; + + kv.key = m_key.as_u64; + + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + { + /* Try address only mapping */ + m_key.port = 0; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + return 1; + } + + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (by_external) + { + mapping->addr = m->local_addr; + /* Address only mapping doesn't change port */ + mapping->port = m->addr_only ? match.port + : clib_host_to_net_u16 (m->local_port); + mapping->fib_index = m->fib_index; + } + else + { + mapping->addr = m->external_addr; + /* Address only mapping doesn't change port */ + mapping->port = m->addr_only ? match.port + : clib_host_to_net_u16 (m->external_port); + mapping->fib_index = sm->outside_fib_index; + } + + return 0; +} + +int snat_alloc_outside_address_and_port (snat_main_t * sm, + snat_session_key_t * k, + u32 * address_indexp) +{ + int i; + snat_address_t *a; + u32 portnum; + + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].busy_ports < (65535-1024)) + { + a = sm->addresses + i; + + while (1) + { + portnum = random_u32 (&sm->random_seed); + portnum &= 0xFFFF; + if (portnum < 1024) + continue; + if (clib_bitmap_get_no_check (a->busy_port_bitmap, portnum)) + continue; + clib_bitmap_set_no_check (a->busy_port_bitmap, portnum, 1); + a->busy_ports++; + /* Caller sets protocol and fib index */ + k->addr = a->addr; + k->port = clib_host_to_net_u16(portnum); + *address_indexp = i; + return 0; + } + } + } + /* Totally out of translations to use... */ + return 1; +} + + +static clib_error_t * +add_address_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + snat_main_t * sm = &snat_main; + ip4_address_t start_addr, end_addr, this_addr; + u32 start_host_order, end_host_order; + int i, count; + int is_add = 1; + int rv = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U - %U", + unformat_ip4_address, &start_addr, + unformat_ip4_address, &end_addr)) + ; + else if (unformat (line_input, "%U", unformat_ip4_address, &start_addr)) + end_addr = start_addr; + else if (unformat (line_input, "del")) + is_add = 0; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (sm->static_mapping_only) + return clib_error_return (0, "static mapping only mode"); + + start_host_order = clib_host_to_net_u32 (start_addr.as_u32); + end_host_order = clib_host_to_net_u32 (end_addr.as_u32); + + if (end_host_order < start_host_order) + return clib_error_return (0, "end address less than start address"); + + count = (end_host_order - start_host_order) + 1; + + if (count > 1024) + clib_warning ("%U - %U, %d addresses...", + format_ip4_address, &start_addr, + format_ip4_address, &end_addr, + count); + + this_addr = start_addr; + + for (i = 0; i < count; i++) + { + if (is_add) + snat_add_address (sm, &this_addr); + else + rv = snat_del_address (sm, this_addr); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "S-NAT address not exist."); + break; + case VNET_API_ERROR_UNSPECIFIED: + return clib_error_return (0, "S-NAT address used in static mapping."); + break; + default: + break; + } + + increment_v4_address (&this_addr); + } + + return 0; +} + +VLIB_CLI_COMMAND (add_address_command, static) = { + .path = "snat add address", + .short_help = "snat add addresses <ip4-range-start> [- <ip4-range-end>] [del]", + .function = add_address_command_fn, +}; + +static clib_error_t * +snat_feature_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 * inside_sw_if_indices = 0; + u32 * outside_sw_if_indices = 0; + int is_del = 0; + int i; + + sw_if_index = ~0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "in %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (inside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "out %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (outside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "del")) + is_del = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (vec_len (inside_sw_if_indices)) + { + for (i = 0; i < vec_len(inside_sw_if_indices); i++) + { + sw_if_index = inside_sw_if_indices[i]; + snat_interface_add_del (sw_if_index, 1, is_del); + } + } + + if (vec_len (outside_sw_if_indices)) + { + for (i = 0; i < vec_len(outside_sw_if_indices); i++) + { + sw_if_index = outside_sw_if_indices[i]; + snat_interface_add_del (sw_if_index, 0, is_del); + } + } + + vec_free (inside_sw_if_indices); + vec_free (outside_sw_if_indices); + + return error; +} + +VLIB_CLI_COMMAND (set_interface_snat_command, static) = { + .path = "set interface snat", + .function = snat_feature_command_fn, + .short_help = "set interface snat in <intfc> out <intfc> [del]", +}; + +static clib_error_t * +add_static_mapping_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t * error = 0; + ip4_address_t l_addr, e_addr; + u32 l_port = 0, e_port = 0, vrf_id = ~0; + int is_add = 1; + int addr_only = 1; + int rv; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U %u", unformat_ip4_address, &l_addr, + &l_port)) + addr_only = 0; + else if (unformat (line_input, "local %U", unformat_ip4_address, &l_addr)) + ; + else if (unformat (line_input, "external %U %u", unformat_ip4_address, + &e_addr, &e_port)) + addr_only = 0; + else if (unformat (line_input, "external %U", unformat_ip4_address, + &e_addr)) + ; + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else + return clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + } + unformat_free (line_input); + + rv = snat_add_static_mapping(l_addr, e_addr, (u16) l_port, (u16) e_port, + vrf_id, addr_only, is_add); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + return clib_error_return (0, "External port already in use."); + break; + case VNET_API_ERROR_NO_SUCH_ENTRY: + if (is_add) + return clib_error_return (0, "External addres must be allocated."); + else + return clib_error_return (0, "Mapping not exist."); + break; + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "No such VRF id."); + case VNET_API_ERROR_VALUE_EXIST: + return clib_error_return (0, "Mapping already exist."); + default: + break; + } + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat add static mapping} + * Static mapping allows hosts on the external network to initiate connection + * to to the local network host. + * To create static mapping between local host address 10.0.0.3 port 6303 and + * external address 4.4.4.4 port 3606 use: + * vpp# snat add static mapping local 10.0.0.3 6303 external 4.4.4.4 3606 + * If not runnig "static mapping only" S-NAT plugin mode use before: + * vpp# snat add address 4.4.4.4 + * To create static mapping between local and external address use: + * vpp# snat add static mapping local 10.0.0.3 external 4.4.4.4 + * @cliexend +?*/ +VLIB_CLI_COMMAND (add_static_mapping_command, static) = { + .path = "snat add static mapping", + .function = add_static_mapping_command_fn, + .short_help = + "snat add static mapping local <addr> [<port>] external <addr> [<port>] [vrf <table-id>] [del]", +}; + +static clib_error_t * +set_workers_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + uword *bitmap = 0; + int rv = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_bitmap_list, &bitmap)) + ; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (bitmap == 0) + return clib_error_return (0, "List of workers must be specified."); + + rv = snat_set_workers(bitmap); + + clib_bitmap_free (bitmap); + + switch (rv) + { + case VNET_API_ERROR_INVALID_WORKER: + return clib_error_return (0, "Invalid worker(s)."); + break; + case VNET_API_ERROR_FEATURE_DISABLED: + return clib_error_return (0, + "Supported only if 2 or more workes available."); + break; + default: + break; + } + + return 0; +} + +/*? + * @cliexpar + * @cliexstart{set snat workers} + * Set SNAT workers if 2 or more workers available, use: + * vpp# set snat workers 0-2,5 + * @cliexend +?*/ +VLIB_CLI_COMMAND (set_workers_command, static) = { + .path = "set snat workers", + .function = set_workers_command_fn, + .short_help = + "set snat workers <workers-list>", +}; + +static clib_error_t * +snat_config (vlib_main_t * vm, unformat_input_t * input) +{ + snat_main_t * sm = &snat_main; + u32 translation_buckets = 1024; + u32 translation_memory_size = 128<<20; + u32 user_buckets = 128; + u32 user_memory_size = 64<<20; + u32 max_translations_per_user = 100; + u32 outside_vrf_id = 0; + u32 inside_vrf_id = 0; + u32 static_mapping_buckets = 1024; + u32 static_mapping_memory_size = 64<<20; + u8 static_mapping_only = 0; + u8 static_mapping_connection_tracking = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "translation hash buckets %d", &translation_buckets)) + ; + else if (unformat (input, "translation hash memory %d", + &translation_memory_size)); + else if (unformat (input, "user hash buckets %d", &user_buckets)) + ; + else if (unformat (input, "user hash memory %d", + &user_memory_size)) + ; + else if (unformat (input, "max translations per user %d", + &max_translations_per_user)) + ; + else if (unformat (input, "outside VRF id %d", + &outside_vrf_id)) + ; + else if (unformat (input, "inside VRF id %d", + &inside_vrf_id)) + ; + else if (unformat (input, "static mapping only")) + { + static_mapping_only = 1; + if (unformat (input, "connection tracking")) + static_mapping_connection_tracking = 1; + } + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + /* for show commands, etc. */ + sm->translation_buckets = translation_buckets; + sm->translation_memory_size = translation_memory_size; + sm->user_buckets = user_buckets; + sm->user_memory_size = user_memory_size; + sm->max_translations_per_user = max_translations_per_user; + sm->outside_vrf_id = outside_vrf_id; + sm->outside_fib_index = ~0; + sm->inside_vrf_id = inside_vrf_id; + sm->inside_fib_index = ~0; + sm->static_mapping_only = static_mapping_only; + sm->static_mapping_connection_tracking = static_mapping_connection_tracking; + + if (!static_mapping_only || + (static_mapping_only && static_mapping_connection_tracking)) + { + clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, + user_memory_size); + + clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets, + user_memory_size); + + vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1); + + clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets, + translation_memory_size); + + clib_bihash_init_8_8 (&sm->out2in, "out2in", translation_buckets, + translation_memory_size); + + clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets, + user_memory_size); + } + clib_bihash_init_8_8 (&sm->static_mapping_by_local, + "static_mapping_by_local", static_mapping_buckets, + static_mapping_memory_size); + + clib_bihash_init_8_8 (&sm->static_mapping_by_external, + "static_mapping_by_external", static_mapping_buckets, + static_mapping_memory_size); + return 0; +} + +VLIB_CONFIG_FUNCTION (snat_config, "snat"); + +u8 * format_snat_key (u8 * s, va_list * args) +{ + snat_session_key_t * key = va_arg (*args, snat_session_key_t *); + char * protocol_string = "unknown"; + static char *protocol_strings[] = { + "UDP", + "TCP", + "ICMP", + }; + + if (key->protocol < ARRAY_LEN(protocol_strings)) + protocol_string = protocol_strings[key->protocol]; + + s = format (s, "%U proto %s port %d fib %d", + format_ip4_address, &key->addr, protocol_string, + clib_net_to_host_u16 (key->port), key->fib_index); + return s; +} + +u8 * format_snat_session (u8 * s, va_list * args) +{ + snat_main_t * sm __attribute__((unused)) = va_arg (*args, snat_main_t *); + snat_session_t * sess = va_arg (*args, snat_session_t *); + + s = format (s, " i2o %U\n", format_snat_key, &sess->in2out); + s = format (s, " o2i %U\n", format_snat_key, &sess->out2in); + s = format (s, " last heard %.2f\n", sess->last_heard); + s = format (s, " total pkts %d, total bytes %lld\n", + sess->total_pkts, sess->total_bytes); + if (snat_is_session_static (sess)) + s = format (s, " static translation\n"); + else + s = format (s, " dynamic translation\n"); + + return s; +} + +u8 * format_snat_user (u8 * s, va_list * args) +{ + snat_main_per_thread_data_t * sm = va_arg (*args, snat_main_per_thread_data_t *); + snat_user_t * u = va_arg (*args, snat_user_t *); + int verbose = va_arg (*args, int); + dlist_elt_t * head, * elt; + u32 elt_index, head_index; + u32 session_index; + snat_session_t * sess; + + s = format (s, "%U: %d dynamic translations, %d static translations\n", + format_ip4_address, &u->addr, u->nsessions, u->nstaticsessions); + + if (verbose == 0) + return s; + + if (u->nsessions || u->nstaticsessions) + { + head_index = u->sessions_per_user_list_head_index; + head = pool_elt_at_index (sm->list_pool, head_index); + + elt_index = head->next; + elt = pool_elt_at_index (sm->list_pool, elt_index); + session_index = elt->value; + + while (session_index != ~0) + { + sess = pool_elt_at_index (sm->sessions, session_index); + + s = format (s, " %U\n", format_snat_session, sm, sess); + + elt_index = elt->next; + elt = pool_elt_at_index (sm->list_pool, elt_index); + session_index = elt->value; + } + } + + return s; +} + +u8 * format_snat_static_mapping (u8 * s, va_list * args) +{ + snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *); + + if (m->addr_only) + s = format (s, "local %U external %U vrf %d", + format_ip4_address, &m->local_addr, + format_ip4_address, &m->external_addr, + m->vrf_id); + else + s = format (s, "local %U:%d external %U:%d vrf %d", + format_ip4_address, &m->local_addr, m->local_port, + format_ip4_address, &m->external_addr, m->external_port, + m->vrf_id); + + return s; +} + +static clib_error_t * +show_snat_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int verbose = 0; + snat_main_t * sm = &snat_main; + snat_user_t * u; + snat_static_mapping_t *m; + snat_interface_t *i; + snat_address_t * ap; + vnet_main_t *vnm = vnet_get_main(); + snat_main_per_thread_data_t *tsm; + u32 users_num = 0, sessions_num = 0, *worker; + uword j = 0; + + if (unformat (input, "detail")) + verbose = 1; + else if (unformat (input, "verbose")) + verbose = 2; + + if (sm->static_mapping_only) + { + if (sm->static_mapping_connection_tracking) + vlib_cli_output (vm, "SNAT mode: static mapping only connection " + "tracking"); + else + vlib_cli_output (vm, "SNAT mode: static mapping only"); + } + else + { + vlib_cli_output (vm, "SNAT mode: dynamic translations enabled"); + } + + if (verbose > 0) + { + pool_foreach (i, sm->interfaces, + ({ + vlib_cli_output (vm, "%U %s", format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, i->sw_if_index), + i->is_inside ? "in" : "out"); + })); + + vec_foreach (ap, sm->addresses) + { + u8 * s = format (0, ""); + vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr); + clib_bitmap_foreach (j, ap->busy_port_bitmap, + ({ + s = format (s, " %d", j); + })); + vlib_cli_output (vm, " %d busy ports:%v", ap->busy_ports, s); + } + } + + if (sm->num_workers > 1) + { + vlib_cli_output (vm, "%d workers", vec_len (sm->workers)); + if (verbose > 0) + { + vec_foreach (worker, sm->workers) + { + vlib_worker_thread_t *w = + vlib_worker_threads + *worker + sm->first_worker_index; + vlib_cli_output (vm, " %v", w->name); + } + } + } + + if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) + { + vlib_cli_output (vm, "%d static mappings", + pool_elts (sm->static_mappings)); + + if (verbose > 0) + { + pool_foreach (m, sm->static_mappings, + ({ + vlib_cli_output (vm, "%U", format_snat_static_mapping, m); + })); + } + } + else + { + vec_foreach (tsm, sm->per_thread_data) + { + users_num += pool_elts (tsm->users); + sessions_num += pool_elts (tsm->sessions); + } + + vlib_cli_output (vm, "%d users, %d outside addresses, %d active sessions," + " %d static mappings", + users_num, + vec_len (sm->addresses), + sessions_num, + pool_elts (sm->static_mappings)); + + if (verbose > 0) + { + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->in2out, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out, + verbose - 1); + vec_foreach_index (j, sm->per_thread_data) + { + tsm = vec_elt_at_index (sm->per_thread_data, j); + + if (pool_elts (tsm->users) == 0) + continue; + + vlib_worker_thread_t *w = vlib_worker_threads + j; + vlib_cli_output (vm, "Thread %d (%v at lcore %u):", j, w->name, + w->lcore_id); + vlib_cli_output (vm, " %d list pool elements", + pool_elts (tsm->list_pool)); + + pool_foreach (u, tsm->users, + ({ + vlib_cli_output (vm, " %U", format_snat_user, tsm, u, + verbose - 1); + })); + } + + if (pool_elts (sm->static_mappings)) + { + vlib_cli_output (vm, "static mappings:"); + pool_foreach (m, sm->static_mappings, + ({ + vlib_cli_output (vm, "%U", format_snat_static_mapping, m); + })); + } + } + } + + return 0; +} + +VLIB_CLI_COMMAND (show_snat_command, static) = { + .path = "show snat", + .short_help = "show snat", + .function = show_snat_command_fn, +}; diff --git a/src/plugins/snat/snat.h b/src/plugins/snat/snat.h new file mode 100644 index 00000000000..cb31dc51423 --- /dev/null +++ b/src/plugins/snat/snat.h @@ -0,0 +1,259 @@ + +/* + * snat.h - simple nat definitions + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_snat_h__ +#define __included_snat_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/icmp46_packet.h> +#include <vnet/api_errno.h> +#include <vppinfra/bihash_8_8.h> +#include <vppinfra/dlist.h> +#include <vppinfra/error.h> +#include <vlibapi/api.h> + +/* Key */ +typedef struct { + union + { + struct + { + ip4_address_t addr; + u16 port; + u16 protocol:3, + fib_index:13; + }; + u64 as_u64; + }; +} snat_session_key_t; + +typedef struct { + union + { + struct + { + ip4_address_t addr; + u32 fib_index; + }; + u64 as_u64; + }; +} snat_user_key_t; + +typedef struct { + union + { + struct + { + ip4_address_t addr; + u16 port; + u16 fib_index; + }; + u64 as_u64; + }; +} snat_static_mapping_key_t; + + +typedef enum { + SNAT_PROTOCOL_UDP = 0, + SNAT_PROTOCOL_TCP, + SNAT_PROTOCOL_ICMP, +} snat_protocol_t; + + +#define SNAT_SESSION_FLAG_STATIC_MAPPING 1 + +typedef CLIB_PACKED(struct { + snat_session_key_t out2in; /* 0-15 */ + + snat_session_key_t in2out; /* 16-31 */ + + u32 flags; /* 32-35 */ + + /* per-user translations */ + u32 per_user_index; /* 36-39 */ + + u32 per_user_list_head_index; /* 40-43 */ + + /* Last heard timer */ + f64 last_heard; /* 44-51 */ + + u64 total_bytes; /* 52-59 */ + + u32 total_pkts; /* 60-63 */ + + /* Outside address */ + u32 outside_address_index; /* 64-67 */ + +}) snat_session_t; + + +typedef struct { + ip4_address_t addr; + u32 sessions_per_user_list_head_index; + u32 nsessions; + u32 nstaticsessions; +} snat_user_t; + +typedef struct { + ip4_address_t addr; + u32 busy_ports; + uword * busy_port_bitmap; +} snat_address_t; + +typedef struct { + ip4_address_t local_addr; + ip4_address_t external_addr; + u16 local_port; + u16 external_port; + u8 addr_only; + u32 vrf_id; + u32 fib_index; +} snat_static_mapping_t; + +typedef struct { + u32 sw_if_index; + u8 is_inside; +} snat_interface_t; + +typedef struct { + /* User pool */ + snat_user_t * users; + + /* Session pool */ + snat_session_t * sessions; + + /* Pool of doubly-linked list elements */ + dlist_elt_t * list_pool; +} snat_main_per_thread_data_t; + +typedef struct { + /* Main lookup tables */ + clib_bihash_8_8_t out2in; + clib_bihash_8_8_t in2out; + + /* Find-a-user => src address lookup */ + clib_bihash_8_8_t user_hash; + + /* Non-translated packets worker lookup => src address + VRF */ + clib_bihash_8_8_t worker_by_in; + + /* Translated packets worker lookup => IP address + port number */ + clib_bihash_8_8_t worker_by_out; + + u32 num_workers; + u32 first_worker_index; + u32 next_worker; + u32 * workers; + + /* Per thread data */ + snat_main_per_thread_data_t * per_thread_data; + + /* Find a static mapping by local */ + clib_bihash_8_8_t static_mapping_by_local; + + /* Find a static mapping by external */ + clib_bihash_8_8_t static_mapping_by_external; + + /* Static mapping pool */ + snat_static_mapping_t * static_mappings; + + /* Interface pool */ + snat_interface_t * interfaces; + + /* Vector of outside addresses */ + snat_address_t * addresses; + + /* Randomize port allocation order */ + u32 random_seed; + + /* Worker handoff index */ + u32 fq_in2out_index; + u32 fq_out2in_index; + + /* Config parameters */ + u8 static_mapping_only; + u8 static_mapping_connection_tracking; + u32 translation_buckets; + u32 translation_memory_size; + u32 user_buckets; + u32 user_memory_size; + u32 max_translations_per_user; + u32 outside_vrf_id; + u32 outside_fib_index; + u32 inside_vrf_id; + u32 inside_fib_index; + + /* API message ID base */ + u16 msg_id_base; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + ip4_main_t * ip4_main; + ip_lookup_main_t * ip4_lookup_main; + ethernet_main_t * ethernet_main; + api_main_t * api_main; +} snat_main_t; + +extern snat_main_t snat_main; +extern vlib_node_registration_t snat_in2out_node; +extern vlib_node_registration_t snat_out2in_node; +extern vlib_node_registration_t snat_in2out_fast_node; +extern vlib_node_registration_t snat_out2in_fast_node; +extern vlib_node_registration_t snat_in2out_worker_handoff_node; +extern vlib_node_registration_t snat_out2in_worker_handoff_node; + +void snat_free_outside_address_and_port (snat_main_t * sm, + snat_session_key_t * k, + u32 address_index); + +int snat_alloc_outside_address_and_port (snat_main_t * sm, + snat_session_key_t * k, + u32 * address_indexp); + +int snat_static_mapping_match (snat_main_t * sm, + snat_session_key_t match, + snat_session_key_t * mapping, + u8 by_external); + +format_function_t format_snat_user; + +typedef struct { + u32 cached_sw_if_index; + u32 cached_ip4_address; +} snat_runtime_t; + +/** \brief Check if SNAT session is created from static mapping. + @param s SNAT session + @return 1 if SNAT session is created from static mapping otherwise 0 +*/ +#define snat_is_session_static(s) s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING + +/* + * Why is this here? Because we don't need to touch this layer to + * simply reply to an icmp. We need to change id to a unique + * value to NAT an echo request/reply. + */ + +typedef struct { + u16 identifier; + u16 sequence; +} icmp_echo_header_t; + +#endif /* __included_snat_h__ */ diff --git a/src/plugins/snat/snat_all_api_h.h b/src/plugins/snat/snat_all_api_h.h new file mode 100644 index 00000000000..490177008e0 --- /dev/null +++ b/src/plugins/snat/snat_all_api_h.h @@ -0,0 +1,19 @@ + +/* + * snat_all_api_h.h - skeleton vpp engine plug-in api #include file + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <snat/snat.api.h> diff --git a/src/plugins/snat/snat_msg_enum.h b/src/plugins/snat/snat_msg_enum.h new file mode 100644 index 00000000000..2c76fd51158 --- /dev/null +++ b/src/plugins/snat/snat_msg_enum.h @@ -0,0 +1,31 @@ + +/* + * snat_msg_enum.h - skeleton vpp engine plug-in message enumeration + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_snat_msg_enum_h +#define included_snat_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <snat/snat_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_snat_msg_enum_h */ diff --git a/src/plugins/snat/snat_test.c b/src/plugins/snat/snat_test.c new file mode 100644 index 00000000000..2a003ba60c6 --- /dev/null +++ b/src/plugins/snat/snat_test.c @@ -0,0 +1,602 @@ + +/* + * snat.c - skeleton vpp-api-test plug-in + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> + +uword unformat_sw_if_index (unformat_input_t * input, va_list * args); + +/* Declare message IDs */ +#include <snat/snat_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <snat/snat_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <snat/snat_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <snat/snat_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <snat/snat_all_api_h.h> +#undef vl_api_version + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} snat_test_main_t; + +snat_test_main_t snat_test_main; + +#define foreach_standard_reply_retval_handler \ +_(snat_add_address_range_reply) \ +_(snat_interface_add_del_feature_reply) \ +_(snat_add_static_mapping_reply) \ +_(snat_set_workers_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = snat_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(SNAT_ADD_ADDRESS_RANGE_REPLY, snat_add_address_range_reply) \ +_(SNAT_INTERFACE_ADD_DEL_FEATURE_REPLY, \ + snat_interface_add_del_feature_reply) \ +_(SNAT_ADD_STATIC_MAPPING_REPLY, snat_add_static_mapping_reply) \ +_(SNAT_CONTROL_PING_REPLY, snat_control_ping_reply) \ +_(SNAT_STATIC_MAPPING_DETAILS, snat_static_mapping_details) \ +_(SNAT_SHOW_CONFIG_REPLY, snat_show_config_reply) \ +_(SNAT_ADDRESS_DETAILS, snat_address_details) \ +_(SNAT_INTERFACE_DETAILS, snat_interface_details) \ +_(SNAT_SET_WORKERS_REPLY, snat_set_workers_reply) \ +_(SNAT_WORKER_DETAILS, snat_worker_details) + +/* M: construct, but don't yet send a message */ +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + +static int api_snat_add_address_range (vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + ip4_address_t start_addr, end_addr; + u32 start_host_order, end_host_order; + vl_api_snat_add_address_range_t * mp; + u8 is_add = 1; + int count; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U - %U", + unformat_ip4_address, &start_addr, + unformat_ip4_address, &end_addr)) + ; + else if (unformat (i, "%U", unformat_ip4_address, &start_addr)) + end_addr = start_addr; + else if (unformat (i, "del")) + is_add = 0; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + start_host_order = clib_host_to_net_u32 (start_addr.as_u32); + end_host_order = clib_host_to_net_u32 (end_addr.as_u32); + + if (end_host_order < start_host_order) + { + errmsg ("end address less than start address\n"); + return -99; + } + + count = (end_host_order - start_host_order) + 1; + + if (count > 1024) + { + errmsg ("%U - %U, %d addresses...\n", + format_ip4_address, &start_addr, + format_ip4_address, &end_addr, + count); + } + + M(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range); + + memcpy (mp->first_ip_address, &start_addr, 4); + memcpy (mp->last_ip_address, &end_addr, 4); + mp->is_ip4 = 1; + mp->is_add = is_add; + + S; W; + + /* NOTREACHED */ + return 0; +} + +static int api_snat_interface_add_del_feature (vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_snat_interface_add_del_feature_t * mp; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u8 is_inside = 1; + u8 is_add = 1; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "out")) + is_inside = 0; + else if (unformat (i, "in")) + is_inside = 1; + else if (unformat (i, "del")) + is_add = 0; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + if (sw_if_index_set == 0) + { + errmsg ("interface / sw_if_index required\n"); + return -99; + } + + M(SNAT_INTERFACE_ADD_DEL_FEATURE, snat_interface_add_del_feature); + mp->sw_if_index = ntohl(sw_if_index); + mp->is_add = is_add; + mp->is_inside = is_inside; + + S; W; + /* NOTREACHED */ + return 0; +} + +static int api_snat_add_static_mapping(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_snat_add_static_mapping_t * mp; + u8 addr_set_n = 0; + u8 is_add = 1; + u8 addr_only = 1; + ip4_address_t local_addr, external_addr; + u32 local_port = 0, external_port = 0, vrf_id = ~0; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "local_addr %U", unformat_ip4_address, &local_addr)) + addr_set_n++; + else if (unformat (i, "external_addr %U", unformat_ip4_address, + &external_addr)) + addr_set_n++; + else if (unformat (i, "local_port %u", &local_port)) + addr_only = 0; + else if (unformat (i, "external_port %u", &external_port)) + addr_only = 0; + else if (unformat (i, "vrf %u", &vrf_id)) + ; + else if (unformat (i, "del")) + is_add = 0; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + if (addr_set_n != 2) + { + errmsg ("local_addr and remote_addr required\n"); + return -99; + } + + M(SNAT_ADD_STATIC_MAPPING, snat_add_static_mapping); + mp->is_add = is_add; + mp->is_ip4 = 1; + mp->addr_only = addr_only; + mp->local_port = ntohs ((u16) local_port); + mp->external_port = ntohs ((u16) external_port); + mp->vrf_id = ntohl (vrf_id); + memcpy (mp->local_ip_address, &local_addr, 4); + memcpy (mp->external_ip_address, &external_addr, 4); + + S; W; + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_control_ping_reply_t_handler + (vl_api_snat_control_ping_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +} + +static void vl_api_snat_static_mapping_details_t_handler + (vl_api_snat_static_mapping_details_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + + if (mp->addr_only) + fformat (vam->ofp, "%15U%6s%15U%6s%11d\n", + format_ip4_address, &mp->local_ip_address, "", + format_ip4_address, &mp->external_ip_address, "", + ntohl (mp->vrf_id)); + else + fformat (vam->ofp, "%15U%6d%15U%6d%11d\n", + format_ip4_address, &mp->local_ip_address, + ntohs (mp->local_port), + format_ip4_address, &mp->external_ip_address, + ntohs (mp->external_port), + ntohl (mp->vrf_id)); + +} + +static int api_snat_static_mapping_dump(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_static_mapping_dump_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_static_mapping_dump"); + return -99; + } + + fformat (vam->ofp, "%21s%21s\n", "local", "external"); + fformat (vam->ofp, "%15s%6s%15s%6s%11s\n", "address", "port", "address", + "port", "vrf"); + + M(SNAT_STATIC_MAPPING_DUMP, snat_static_mapping_dump); + S; + /* Use a control ping for synchronization */ + { + vl_api_snat_control_ping_t *mp; + M (SNAT_CONTROL_PING, snat_control_ping); + S; + } + W; + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_show_config_reply_t_handler + (vl_api_snat_show_config_reply_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + i32 retval = ntohl (mp->retval); + + if (retval >= 0) + { + fformat (vam->ofp, "translation hash buckets %d\n", + ntohl (mp->translation_buckets)); + fformat (vam->ofp, "translation hash memory %d\n", + ntohl (mp->translation_memory_size)); + fformat (vam->ofp, "user hash buckets %d\n", ntohl (mp->user_buckets)); + fformat (vam->ofp, "user hash memory %d\n", ntohl (mp->user_memory_size)); + fformat (vam->ofp, "max translations per user %d\n", + ntohl (mp->max_translations_per_user)); + fformat (vam->ofp, "outside VRF id %d\n", ntohl (mp->outside_vrf_id)); + fformat (vam->ofp, "inside VRF id %d\n", ntohl (mp->inside_vrf_id)); + if (mp->static_mapping_only) + { + fformat (vam->ofp, "static mapping only"); + if (mp->static_mapping_connection_tracking) + fformat (vam->ofp, " connection tracking"); + fformat (vam->ofp, "\n"); + } + } + vam->retval = retval; + vam->result_ready = 1; +} + +static int api_snat_show_config(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_show_config_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_show_config"); + return -99; + } + + M(SNAT_SHOW_CONFIG, snat_show_config); + S; W; + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_address_details_t_handler + (vl_api_snat_address_details_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + + fformat (vam->ofp, "%U\n", format_ip4_address, &mp->ip_address); +} + +static int api_snat_address_dump(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_address_dump_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_address_dump"); + return -99; + } + + M(SNAT_ADDRESS_DUMP, snat_address_dump); + S; + /* Use a control ping for synchronization */ + { + vl_api_snat_control_ping_t *mp; + M (SNAT_CONTROL_PING, snat_control_ping); + S; + } + W; + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_interface_details_t_handler + (vl_api_snat_interface_details_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + + fformat (vam->ofp, "sw_if_index %d %s\n", ntohl (mp->sw_if_index), + mp->is_inside ? "in" : "out"); +} + +static int api_snat_interface_dump(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_interface_dump_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_address_dump"); + return -99; + } + + M(SNAT_INTERFACE_DUMP, snat_interface_dump); + S; + /* Use a control ping for synchronization */ + { + vl_api_snat_control_ping_t *mp; + M (SNAT_CONTROL_PING, snat_control_ping); + S; + } + W; + /* NOTREACHED */ + return 0; +} + +static int api_snat_set_workers (vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_snat_set_workers_t * mp; + uword *bitmap; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", unformat_bitmap_list, &bitmap)) + ; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + M(SNAT_SET_WORKERS, snat_set_workers); + mp->worker_mask = clib_host_to_net_u64 (bitmap[0]); + + S; W; + + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_worker_details_t_handler + (vl_api_snat_worker_details_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + + fformat (vam->ofp, "worker_index %d (%s at lcore %u)\n", + ntohl (mp->worker_index), mp->name, ntohl (mp->lcore_id)); +} + +static int api_snat_worker_dump(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_worker_dump_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_address_dump"); + return -99; + } + + M(SNAT_WORKER_DUMP, snat_worker_dump); + S; + /* Use a control ping for synchronization */ + { + vl_api_snat_control_ping_t *mp; + M (SNAT_CONTROL_PING, snat_control_ping); + S; + } + W; + /* NOTREACHED */ + return 0; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(snat_add_address_range, "<start-addr> [- <end-addr] [del]") \ +_(snat_interface_add_del_feature, \ + "<intfc> | sw_if_index <id> [in] [out] [del]") \ +_(snat_add_static_mapping, "local_addr <ip> external_addr <ip> " \ + "[local_port <n>] [external_port <n>] [vrf <table-id>] [del]") \ +_(snat_set_workers, "<wokrers_bitmap>") \ +_(snat_static_mapping_dump, "") \ +_(snat_show_config, "") \ +_(snat_address_dump, "") \ +_(snat_interface_dump, "") \ +_(snat_worker_dump, "") + +void vat_api_hookup (vat_main_t *vam) +{ + snat_test_main_t * sm __attribute__((unused)) = &snat_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + snat_test_main_t * sm = &snat_test_main; + u8 * name; + + sm->vat_main = vam; + + /* Ask the vpp engine for the first assigned message-id */ + name = format (0, "snat_%08x%c", api_version, 0); + sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (sm->msg_id_base != (u16) ~0) + vat_api_hookup (vam); + + vec_free(name); + + return 0; +} |