From 2ba92e32e0197f676dd905e5edcb4ff3e1bec241 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Mon, 21 Aug 2017 07:05:03 -0700 Subject: NAT: Rename snat plugin to nat (VPP-955) Change-Id: I30a7e3da7a4efc6038a91e27b48045d4b07e2764 Signed-off-by: Matus Fabian --- src/plugins/nat/out2in.c | 2294 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2294 insertions(+) create mode 100644 src/plugins/nat/out2in.c (limited to 'src/plugins/nat/out2in.c') diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c new file mode 100644 index 00000000..67950066 --- /dev/null +++ b/src/plugins/nat/out2in.c @@ -0,0 +1,2294 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 session_index; +} snat_out2in_trace_t; + +typedef struct { + u32 next_worker_index; + u8 do_handoff; +} snat_out2in_worker_handoff_trace_t; + +/* packet trace format function */ +static u8 * format_snat_out2in_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *); + + s = format (s, "NAT44_OUT2IN: sw_if_index %d, next index %d, session index %d", + t->sw_if_index, t->next_index, t->session_index); + return s; +} + +static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *); + + s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + return s; +} + +static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_worker_handoff_trace_t * t = + va_arg (*args, snat_out2in_worker_handoff_trace_t *); + char * m; + + m = t->do_handoff ? "next worker" : "same worker"; + s = format (s, "NAT44_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index); + + return s; +} + +vlib_node_registration_t snat_out2in_node; +vlib_node_registration_t snat_out2in_fast_node; +vlib_node_registration_t snat_out2in_worker_handoff_node; +vlib_node_registration_t snat_det_out2in_node; + +#define foreach_snat_out2in_error \ +_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ +_(OUT2IN_PACKETS, "Good out2in packets processed") \ +_(BAD_ICMP_TYPE, "unsupported ICMP type") \ +_(NO_TRANSLATION, "No translation") + +typedef enum { +#define _(sym,str) SNAT_OUT2IN_ERROR_##sym, + foreach_snat_out2in_error +#undef _ + SNAT_OUT2IN_N_ERROR, +} snat_out2in_error_t; + +static char * snat_out2in_error_strings[] = { +#define _(sym,string) string, + foreach_snat_out2in_error +#undef _ +}; + +typedef enum { + SNAT_OUT2IN_NEXT_DROP, + SNAT_OUT2IN_NEXT_LOOKUP, + SNAT_OUT2IN_NEXT_ICMP_ERROR, + SNAT_OUT2IN_N_NEXT, +} snat_out2in_next_t; + +/** + * @brief Create session for static mapping. + * + * Create NAT session initiated by host from external network with static + * mapping. + * + * @param sm NAT main. + * @param b0 Vlib buffer. + * @param in2out In2out NAT44 session key. + * @param out2in Out2in NAT44 session key. + * @param node Vlib node. + * + * @returns SNAT session if successfully created otherwise 0. + */ +static inline snat_session_t * +create_session_for_static_mapping (snat_main_t *sm, + vlib_buffer_t *b0, + snat_session_key_t in2out, + snat_session_key_t out2in, + vlib_node_runtime_t * node, + u32 thread_index) +{ + snat_user_t *u; + snat_user_key_t user_key; + snat_session_t *s; + clib_bihash_kv_8_8_t kv0, value0; + dlist_elt_t * per_user_translation_list_elt; + dlist_elt_t * per_user_list_head_elt; + ip4_header_t *ip0; + + ip0 = vlib_buffer_get_current (b0); + + user_key.addr = in2out.addr; + user_key.fib_index = in2out.fib_index; + kv0.key = user_key.as_u64; + + /* Ever heard of the "user" = inside ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) + { + /* no, make a new one */ + pool_get (sm->per_thread_data[thread_index].users, u); + memset (u, 0, sizeof (*u)); + u->addr = in2out.addr; + u->fib_index = in2out.fib_index; + + pool_get (sm->per_thread_data[thread_index].list_pool, + per_user_list_head_elt); + + u->sessions_per_user_list_head_index = per_user_list_head_elt - + sm->per_thread_data[thread_index].list_pool; + + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, + u->sessions_per_user_list_head_index); + + kv0.value = u - sm->per_thread_data[thread_index].users; + + /* add user */ + clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + + /* add non-traslated packets worker lookup */ + kv0.value = thread_index; + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); + } + else + { + u = pool_elt_at_index (sm->per_thread_data[thread_index].users, + value0.value); + } + + pool_get (sm->per_thread_data[thread_index].sessions, s); + memset (s, 0, sizeof (*s)); + + s->outside_address_index = ~0; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->ext_host_addr.as_u32 = ip0->dst_address.as_u32; + u->nstaticsessions++; + + /* Create list elts */ + pool_get (sm->per_thread_data[thread_index].list_pool, + per_user_translation_list_elt); + clib_dlist_init (sm->per_thread_data[thread_index].list_pool, + per_user_translation_list_elt - + sm->per_thread_data[thread_index].list_pool); + + per_user_translation_list_elt->value = + s - sm->per_thread_data[thread_index].sessions; + s->per_user_index = + per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, + s->per_user_list_head_index, + per_user_translation_list_elt - + sm->per_thread_data[thread_index].list_pool); + + s->in2out = in2out; + s->out2in = out2in; + s->in2out.protocol = out2in.protocol; + + /* Add to translation hashes */ + kv0.key = s->in2out.as_u64; + kv0.value = s - sm->per_thread_data[thread_index].sessions; + if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) + clib_warning ("in2out key add failed"); + + kv0.key = s->out2in.as_u64; + kv0.value = s - sm->per_thread_data[thread_index].sessions; + + if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) + clib_warning ("out2in key add failed"); + + /* log NAT event */ + snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->in2out.protocol, + s->in2out.port, + s->out2in.port, + s->in2out.fib_index); + return s; +} + +static_always_inline +snat_out2in_error_t icmp_get_key(ip4_header_t *ip0, + snat_session_key_t *p_key0) +{ + icmp46_header_t *icmp0; + snat_session_key_t key0; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + echo0 = (icmp_echo_header_t *)(icmp0+1); + + if (!icmp_is_error_message (icmp0)) + { + key0.protocol = SNAT_PROTOCOL_ICMP; + key0.addr = ip0->dst_address; + key0.port = echo0->identifier; + } + else + { + inner_ip0 = (ip4_header_t *)(echo0+1); + l4_header = ip4_next_header (inner_ip0); + key0.protocol = ip_proto_to_snat_proto (inner_ip0->protocol); + key0.addr = inner_ip0->src_address; + switch (key0.protocol) + { + case SNAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t*)l4_header; + inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1); + key0.port = inner_echo0->identifier; + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + key0.port = ((tcp_udp_header_t*)l4_header)->src_port; + break; + default: + return SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL; + } + } + *p_key0 = key0; + return -1; /* success */ +} + +/** + * Get address and port values to be used for ICMP packet translation + * and create session if needed + * + * @param[in,out] sm NAT main + * @param[in,out] node NAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, + u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + snat_session_key_t *p_value, + u8 *p_dont_translate, void *d, void *e) +{ + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0; + u32 rx_fib_index0; + snat_session_key_t key0; + snat_session_key_t sm0; + snat_session_t *s0 = 0; + u8 dont_translate = 0; + clib_bihash_kv_8_8_t kv0, value0; + u8 is_addr_only; + u32 next0 = ~0; + int err; + + ip0 = vlib_buffer_get_current (b0); + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + key0.protocol = 0; + + err = icmp_get_key (ip0, &key0); + if (err != -1) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only)) + { + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0, + ip0->dst_address.as_u32))) + { + dont_translate = 1; + goto out; + } + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply && + (icmp0->type != ICMP4_echo_request || !is_addr_only))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, + node, thread_index); + + if (!s0) + { + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + } + else + { + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply && + icmp0->type != ICMP4_echo_request && + !icmp_is_error_message (icmp0))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value0.value); + } + +out: + *p_proto = key0.protocol; + if (s0) + *p_value = s0->in2out; + *p_dont_translate = dont_translate; + if (d) + *(snat_session_t**)d = s0; + return next0; +} + +/** + * Get address and port values to be used for ICMP packet translation + * + * @param[in] sm NAT main + * @param[in,out] node NAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node, + u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + snat_session_key_t *p_value, + u8 *p_dont_translate, void *d, void *e) +{ + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0; + u32 rx_fib_index0; + snat_session_key_t key0; + snat_session_key_t sm0; + u8 dont_translate = 0; + u8 is_addr_only; + u32 next0 = ~0; + int err; + + ip0 = vlib_buffer_get_current (b0); + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + + err = icmp_get_key (ip0, &key0); + if (err != -1) + { + b0->error = node->errors[err]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out2; + } + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only)) + { + /* Don't NAT packet aimed at the intfc address */ + if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32)) + { + dont_translate = 1; + goto out; + } + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply && + (icmp0->type != ICMP4_echo_request || !is_addr_only) && + !icmp_is_error_message (icmp0))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + +out: + *p_value = sm0; +out2: + *p_proto = key0.protocol; + *p_dont_translate = dont_translate; + return next0; +} + +static inline u32 icmp_out2in (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, + u32 thread_index, + void *d, + void *e) +{ + snat_session_key_t sm0; + u8 protocol; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0 = 0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + u8 dont_translate; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + u16 checksum0; + u32 next0_tmp; + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, + &protocol, &sm0, &dont_translate, d, e); + if (next0_tmp != ~0) + next0 = next0_tmp; + if (next0 == SNAT_OUT2IN_NEXT_DROP || dont_translate) + goto out; + + sum0 = ip_incremental_checksum (0, icmp0, + ntohs(ip0->length) - ip4_header_bytes (ip0)); + checksum0 = ~ip_csum_fold (sum0); + if (checksum0 != 0 && checksum0 != 0xffff) + { + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + old_addr0 = ip0->dst_address.as_u32; + new_addr0 = ip0->dst_address.as_u32 = sm0.addr.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (!icmp_is_error_message (icmp0)) + { + new_id0 = sm0.port; + if (PREDICT_FALSE(new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + new_id0 = sm0.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier /* changed member */); + icmp0->checksum = ip_csum_fold (sum0); + } + } + else + { + inner_ip0 = (ip4_header_t *)(echo0+1); + l4_header = ip4_next_header (inner_ip0); + + if (!ip4_header_checksum_is_valid (inner_ip0)) + { + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + old_addr0 = inner_ip0->src_address.as_u32; + inner_ip0->src_address = sm0.addr; + new_addr0 = inner_ip0->src_address.as_u32; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + src_address /* changed member */); + icmp0->checksum = ip_csum_fold (sum0); + + switch (protocol) + { + case SNAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t*)l4_header; + inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1); + + old_id0 = inner_echo0->identifier; + new_id0 = sm0.port; + inner_echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + old_id0 = ((tcp_udp_header_t*)l4_header)->src_port; + new_id0 = sm0.port; + ((tcp_udp_header_t*)l4_header)->src_port = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, tcp_udp_header_t, + src_port); + icmp0->checksum = ip_csum_fold (sum0); + break; + default: + ASSERT(0); + } + } + +out: + return next0; +} + + +static inline u32 icmp_out2in_slow_path (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, f64 now, + u32 thread_index, + snat_session_t ** p_s0) +{ + next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, thread_index, p_s0, 0); + snat_session_t * s0 = *p_s0; + if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0)) + { + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + } + return next0; +} + +static void +snat_out2in_unknown_proto (snat_main_t *sm, + vlib_buffer_t * b, + ip4_header_t * ip, + u32 rx_fib_index, + u32 thread_index, + f64 now, + vlib_main_t * vm) +{ + clib_bihash_kv_8_8_t kv, value; + clib_bihash_kv_16_8_t s_kv, s_value; + snat_static_mapping_t *m; + snat_session_key_t m_key; + u32 old_addr, new_addr; + ip_csum_t sum; + snat_unk_proto_ses_key_t key; + snat_session_t * s; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + snat_user_key_t u_key; + snat_user_t *u; + dlist_elt_t *head, *elt; + + old_addr = ip->dst_address.as_u32; + + key.l_addr = ip->dst_address; + key.r_addr = ip->src_address; + key.fib_index = rx_fib_index; + key.proto = ip->protocol; + key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + + if (!clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + { + s = pool_elt_at_index (tsm->sessions, s_value.value); + new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; + } + else + { + m_key.addr = ip->dst_address; + m_key.port = 0; + m_key.protocol = 0; + m_key.fib_index = rx_fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + return; + + m = pool_elt_at_index (sm->static_mappings, value.value); + + new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32; + + u_key.addr = ip->src_address; + u_key.fib_index = m->fib_index; + kv.key = u_key.as_u64; + + /* Ever heard of the "user" = src ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + /* no, make a new one */ + pool_get (tsm->users, u); + memset (u, 0, sizeof (*u)); + u->addr = ip->src_address; + u->fib_index = rx_fib_index; + + pool_get (tsm->list_pool, head); + u->sessions_per_user_list_head_index = head - tsm->list_pool; + + clib_dlist_init (tsm->list_pool, + u->sessions_per_user_list_head_index); + + kv.value = u - tsm->users; + + /* add user */ + clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1); + } + else + { + u = pool_elt_at_index (tsm->users, value.value); + } + + /* Create a new session */ + pool_get (tsm->sessions, s); + memset (s, 0, sizeof (*s)); + + s->ext_host_addr.as_u32 = ip->src_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->outside_address_index = ~0; + s->out2in.addr.as_u32 = old_addr; + s->out2in.fib_index = rx_fib_index; + s->in2out.addr.as_u32 = new_addr; + s->in2out.fib_index = m->fib_index; + s->in2out.port = s->out2in.port = ip->protocol; + u->nstaticsessions++; + + /* Create list elts */ + pool_get (tsm->list_pool, elt); + clib_dlist_init (tsm->list_pool, elt - tsm->list_pool); + elt->value = s - tsm->sessions; + s->per_user_index = elt - tsm->list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, + s->per_user_index); + + /* Add to lookup tables */ + s_kv.value = s - tsm->sessions; + if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1)) + clib_warning ("out2in key add failed"); + + key.l_addr = ip->dst_address; + key.fib_index = m->fib_index; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1)) + clib_warning ("in2out key add failed"); + } + + /* Update IP checksum */ + sum = ip->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); + ip->checksum = ip_csum_fold (sum); + + vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index; + + /* Accounting */ + s->last_heard = now; + s->total_pkts++; + s->total_bytes += vlib_buffer_length_in_chain (vm, b); + /* Per-user LRU list maintenance */ + clib_dlist_remove (tsm->list_pool, s->per_user_index); + clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, + s->per_user_index); +} + +static uword +snat_out2in_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_out2in_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + f64 now = vlib_time_now (vm); + u32 thread_index = vlib_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, *ip1; + ip_csum_t sum0, sum1; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + u32 new_addr1, old_addr1; + u16 new_port1, old_port1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + icmp46_header_t * icmp0, * icmp1; + snat_session_key_t key0, key1, sm0, sm1; + u32 rx_fib_index0, rx_fib_index1; + u32 proto0, proto1; + snat_session_t * s0 = 0, * s1 = 0; + clib_bihash_kv_8_8_t kv0, kv1, value0, value1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + vnet_buffer (b0)->snat.flags = 0; + vnet_buffer (b1)->snat.flags = 0; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace0; + } + + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == ~0)) + { + snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm); + goto trace0; + } + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, thread_index, &s0); + goto trace0; + } + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key0, &sm0, 1, 0)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (proto0 != SNAT_PROTOCOL_UDP + || (udp0->dst_port + != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))) + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace0; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, + thread_index); + if (!s0) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace0; + } + } + else + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->dst_port; + tcp0->dst_port = s0->in2out.port; + new_port0 = tcp0->dst_port; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = s0->in2out.port; + udp0->checksum = 0; + } + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + trace0: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + icmp1 = (icmp46_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index1); + + if (PREDICT_FALSE(ip1->ttl == 1)) + { + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace1; + } + + proto1 = ip_proto_to_snat_proto (ip1->protocol); + + if (PREDICT_FALSE (proto1 == ~0)) + { + snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1, + thread_index, now, vm); + goto trace1; + } + + if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) + { + next1 = icmp_out2in_slow_path + (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, + next1, now, thread_index, &s1); + goto trace1; + } + + key1.addr = ip1->dst_address; + key1.port = udp1->dst_port; + key1.protocol = proto1; + key1.fib_index = rx_fib_index1; + + kv1.key = key1.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv1, &value1)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key1, &sm1, 1, 0)) + { + b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (proto1 != SNAT_PROTOCOL_UDP + || (udp1->dst_port + != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))) + next1 = SNAT_OUT2IN_NEXT_DROP; + goto trace1; + } + + /* Create session initiated by host from external network */ + s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node, + thread_index); + if (!s1) + { + b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next1 = SNAT_OUT2IN_NEXT_DROP; + goto trace1; + } + } + else + s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value1.value); + + old_addr1 = ip1->dst_address.as_u32; + ip1->dst_address = s1->in2out.addr; + new_addr1 = ip1->dst_address.as_u32; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + old_port1 = tcp1->dst_port; + tcp1->dst_port = s1->in2out.port; + new_port1 = tcp1->dst_port; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */); + + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + old_port1 = udp1->dst_port; + udp1->dst_port = s1->in2out.port; + udp1->checksum = 0; + } + + /* Accounting */ + s1->last_heard = now; + s1->total_pkts++; + s1->total_bytes += vlib_buffer_length_in_chain (vm, b1); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s1)) + { + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, + s1->per_user_index); + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, + s1->per_user_list_head_index, + s1->per_user_index); + } + trace1: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (s1) + t->session_index = s1 - sm->per_thread_data[thread_index].sessions; + } + + pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0, sm0; + u32 rx_fib_index0; + u32 proto0; + snat_session_t * s0 = 0; + clib_bihash_kv_8_8_t kv0, value0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + vnet_buffer (b0)->snat.flags = 0; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == ~0)) + { + snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm); + goto trace00; + } + + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace00; + } + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, thread_index, &s0); + goto trace00; + } + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key0, &sm0, 1, 0)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (proto0 != SNAT_PROTOCOL_UDP + || (udp0->dst_port + != clib_host_to_net_u16(UDP_DST_PORT_dhcp_to_client))) + + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace00; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, + thread_index); + if (!s0) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace00; + } + } + else + s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, + value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->dst_port; + tcp0->dst_port = s0->in2out.port; + new_port0 = tcp0->dst_port; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = s0->in2out.port; + udp0->checksum = 0; + } + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[thread_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[thread_index].sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_out2in_node.index, + SNAT_OUT2IN_ERROR_OUT2IN_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_node) = { + .function = snat_out2in_node_fn, + .name = "nat44-out2in", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_OUT2IN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn); + +/**************************/ +/*** deterministic mode ***/ +/**************************/ +static uword +snat_det_out2in_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_out2in_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + u32 thread_index = vlib_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, * ip1; + ip_csum_t sum0, sum1; + ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1; + u16 new_port0, old_port0, old_port1, new_port1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + u32 proto0, proto1; + snat_det_out_key_t key0, key1; + snat_det_map_t * dm0, * dm1; + snat_det_session_t * ses0 = 0, * ses1 = 0; + u32 rx_fib_index0, rx_fib_index1; + icmp46_header_t * icmp0, * icmp1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace0; + } + + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP)) + { + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + icmp0 = (icmp46_header_t *) udp0; + + next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next0, thread_index, + &ses0, &dm0); + goto trace0; + } + + key0.ext_host_addr = ip0->src_address; + key0.ext_host_port = tcp0->src; + key0.out_port = tcp0->dst; + + dm0 = snat_det_map_by_out(sm, &ip0->dst_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("unknown dst address: %U", + format_ip4_address, &ip0->dst_address); + next0 = SNAT_OUT2IN_NEXT_DROP; + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace0; + } + + snat_det_reverse(dm0, &ip0->dst_address, + clib_net_to_host_u16(tcp0->dst), &new_addr0); + + ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64); + if (PREDICT_FALSE(!ses0)) + { + clib_warning("no match src %U:%d dst %U:%d for user %U", + format_ip4_address, &ip0->src_address, + clib_net_to_host_u16 (tcp0->src), + format_ip4_address, &ip0->dst_address, + clib_net_to_host_u16 (tcp0->dst), + format_ip4_address, &new_addr0); + next0 = SNAT_OUT2IN_NEXT_DROP; + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace0; + } + new_port0 = ses0->in_port; + + old_addr0 = ip0->dst_address; + ip0->dst_address = new_addr0; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK) + snat_det_ses_close(dm0, ses0); + + old_port0 = tcp0->dst; + tcp0->dst = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = new_port0; + udp0->checksum = 0; + } + + trace0: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + b1 = vlib_get_buffer (vm, bi1); + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + if (PREDICT_FALSE(ip1->ttl == 1)) + { + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next1 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace1; + } + + proto1 = ip_proto_to_snat_proto (ip1->protocol); + + if (PREDICT_FALSE(proto1 == SNAT_PROTOCOL_ICMP)) + { + rx_fib_index1 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index1); + icmp1 = (icmp46_header_t *) udp1; + + next1 = icmp_out2in(sm, b1, ip1, icmp1, sw_if_index1, + rx_fib_index1, node, next1, thread_index, + &ses1, &dm1); + goto trace1; + } + + key1.ext_host_addr = ip1->src_address; + key1.ext_host_port = tcp1->src; + key1.out_port = tcp1->dst; + + dm1 = snat_det_map_by_out(sm, &ip1->dst_address); + if (PREDICT_FALSE(!dm1)) + { + clib_warning("unknown dst address: %U", + format_ip4_address, &ip1->dst_address); + next1 = SNAT_OUT2IN_NEXT_DROP; + b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace1; + } + + snat_det_reverse(dm1, &ip1->dst_address, + clib_net_to_host_u16(tcp1->dst), &new_addr1); + + ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64); + if (PREDICT_FALSE(!ses1)) + { + clib_warning("no match src %U:%d dst %U:%d for user %U", + format_ip4_address, &ip1->src_address, + clib_net_to_host_u16 (tcp1->src), + format_ip4_address, &ip1->dst_address, + clib_net_to_host_u16 (tcp1->dst), + format_ip4_address, &new_addr1); + next1 = SNAT_OUT2IN_NEXT_DROP; + b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace1; + } + new_port1 = ses1->in_port; + + old_addr1 = ip1->dst_address; + ip1->dst_address = new_addr1; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + dst_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED) + ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT; + else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK) + snat_det_ses_close(dm1, ses1); + + old_port1 = tcp1->dst; + tcp1->dst = new_port1; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + dst_address /* changed member */); + + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + old_port1 = udp1->dst_port; + udp1->dst_port = new_port1; + udp1->checksum = 0; + } + + trace1: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (ses1) + t->session_index = ses1 - dm1->sessions; + } + + pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + ip4_address_t new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + u32 proto0; + snat_det_out_key_t key0; + snat_det_map_t * dm0; + snat_det_session_t * ses0 = 0; + u32 rx_fib_index0; + icmp46_header_t * icmp0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace00; + } + + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + if (PREDICT_FALSE(proto0 == SNAT_PROTOCOL_ICMP)) + { + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + icmp0 = (icmp46_header_t *) udp0; + + next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next0, thread_index, + &ses0, &dm0); + goto trace00; + } + + key0.ext_host_addr = ip0->src_address; + key0.ext_host_port = tcp0->src; + key0.out_port = tcp0->dst; + + dm0 = snat_det_map_by_out(sm, &ip0->dst_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("unknown dst address: %U", + format_ip4_address, &ip0->dst_address); + next0 = SNAT_OUT2IN_NEXT_DROP; + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace00; + } + + snat_det_reverse(dm0, &ip0->dst_address, + clib_net_to_host_u16(tcp0->dst), &new_addr0); + + ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64); + if (PREDICT_FALSE(!ses0)) + { + clib_warning("no match src %U:%d dst %U:%d for user %U", + format_ip4_address, &ip0->src_address, + clib_net_to_host_u16 (tcp0->src), + format_ip4_address, &ip0->dst_address, + clib_net_to_host_u16 (tcp0->dst), + format_ip4_address, &new_addr0); + next0 = SNAT_OUT2IN_NEXT_DROP; + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace00; + } + new_port0 = ses0->in_port; + + old_addr0 = ip0->dst_address; + ip0->dst_address = new_addr0; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK) + snat_det_ses_close(dm0, ses0); + + old_port0 = tcp0->dst; + tcp0->dst = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = new_port0; + udp0->checksum = 0; + } + + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_det_out2in_node.index, + SNAT_OUT2IN_ERROR_OUT2IN_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_det_out2in_node) = { + .function = snat_det_out2in_node_fn, + .name = "nat44-det-out2in", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_OUT2IN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn); + +/** + * Get address and port values to be used for ICMP packet translation + * and create session if needed + * + * @param[in,out] sm NAT main + * @param[in,out] node NAT node runtime + * @param[in] thread_index thread index + * @param[in,out] b0 buffer containing packet to be translated + * @param[out] p_proto protocol used for matching + * @param[out] p_value address and port after NAT translation + * @param[out] p_dont_translate if packet should not be translated + * @param d optional parameter + * @param e optional parameter + */ +u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node, + u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + snat_session_key_t *p_value, + u8 *p_dont_translate, void *d, void *e) +{ + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0; + u8 protocol; + snat_det_out_key_t key0; + u8 dont_translate = 0; + u32 next0 = ~0; + icmp_echo_header_t *echo0, *inner_echo0 = 0; + ip4_header_t *inner_ip0; + void *l4_header = 0; + icmp46_header_t *inner_icmp0; + snat_det_map_t * dm0 = 0; + ip4_address_t new_addr0 = {{0}}; + snat_det_session_t * ses0 = 0; + ip4_address_t out_addr; + + ip0 = vlib_buffer_get_current (b0); + icmp0 = (icmp46_header_t *) ip4_next_header (ip0); + echo0 = (icmp_echo_header_t *)(icmp0+1); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + if (!icmp_is_error_message (icmp0)) + { + protocol = SNAT_PROTOCOL_ICMP; + key0.ext_host_addr = ip0->src_address; + key0.ext_host_port = 0; + key0.out_port = echo0->identifier; + out_addr = ip0->dst_address; + } + else + { + inner_ip0 = (ip4_header_t *)(echo0+1); + l4_header = ip4_next_header (inner_ip0); + protocol = ip_proto_to_snat_proto (inner_ip0->protocol); + key0.ext_host_addr = inner_ip0->dst_address; + out_addr = inner_ip0->src_address; + switch (protocol) + { + case SNAT_PROTOCOL_ICMP: + inner_icmp0 = (icmp46_header_t*)l4_header; + inner_echo0 = (icmp_echo_header_t *)(inner_icmp0+1); + key0.ext_host_port = 0; + key0.out_port = inner_echo0->identifier; + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + key0.ext_host_port = ((tcp_udp_header_t*)l4_header)->dst_port; + key0.out_port = ((tcp_udp_header_t*)l4_header)->src_port; + break; + default: + b0->error = node->errors[SNAT_OUT2IN_ERROR_UNSUPPORTED_PROTOCOL]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + } + + dm0 = snat_det_map_by_out(sm, &out_addr); + if (PREDICT_FALSE(!dm0)) + { + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0, + ip0->dst_address.as_u32))) + { + dont_translate = 1; + goto out; + } + clib_warning("unknown dst address: %U", + format_ip4_address, &ip0->dst_address); + goto out; + } + + snat_det_reverse(dm0, &ip0->dst_address, + clib_net_to_host_u16(key0.out_port), &new_addr0); + + ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64); + if (PREDICT_FALSE(!ses0)) + { + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(is_interface_addr(sm, node, sw_if_index0, + ip0->dst_address.as_u32))) + { + dont_translate = 1; + goto out; + } + clib_warning("no match src %U:%d dst %U:%d for user %U", + format_ip4_address, &key0.ext_host_addr, + clib_net_to_host_u16 (key0.ext_host_port), + format_ip4_address, &out_addr, + clib_net_to_host_u16 (key0.out_port), + format_ip4_address, &new_addr0); + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_reply && + !icmp_is_error_message (icmp0))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_BAD_ICMP_TYPE]; + next0 = SNAT_OUT2IN_NEXT_DROP; + goto out; + } + + goto out; + +out: + *p_proto = protocol; + if (ses0) + { + p_value->addr = new_addr0; + p_value->fib_index = sm->inside_fib_index; + p_value->port = ses0->in_port; + } + *p_dont_translate = dont_translate; + if (d) + *(snat_det_session_t**)d = ses0; + if (e) + *(snat_det_map_t**)e = dm0; + return next0; +} + +/**********************/ +/*** worker handoff ***/ +/**********************/ +static uword +snat_out2in_worker_handoff_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + snat_main_t *sm = &snat_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from, *to_next = 0; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + vlib_frame_t *f = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 thread_index = vlib_get_thread_index (); + + ASSERT (vec_len (sm->workers)); + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + sm->first_worker_index + sm->num_workers - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 sw_if_index0; + u32 rx_fib_index0; + ip4_header_t * ip0; + u8 do_handoff; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + ip0 = vlib_buffer_get_current (b0); + + next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0); + + if (PREDICT_FALSE (next_worker_index != thread_index)) + { + do_handoff = 1; + + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + } + else + { + do_handoff = 0; + /* if this is 1st frame */ + if (!f) + { + f = vlib_get_frame_to_node (vm, sm->out2in_node_index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next += 1; + f->n_vectors++; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_worker_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_worker_index = next_worker_index; + t->do_handoff = do_handoff; + } + } + + if (f) + vlib_put_frame_to_node (vm, sm->out2in_node_index, f); + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = { + .function = snat_out2in_worker_handoff_fn, + .name = "nat44-out2in-worker-handoff", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_worker_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn); + +static uword +snat_out2in_fast_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_out2in_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = SNAT_OUT2IN_NEXT_DROP; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0, sm0; + u32 proto0; + u32 rx_fib_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + vnet_feature_next (sw_if_index0, &next0, b0); + + if (PREDICT_FALSE(ip0->ttl == 1)) + { + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = SNAT_OUT2IN_NEXT_ICMP_ERROR; + goto trace00; + } + + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + if (PREDICT_FALSE (proto0 == ~0)) + goto trace00; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next0, ~0, 0, 0); + goto trace00; + } + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 1, 0)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace00; + } + + new_addr0 = sm0.addr.as_u32; + new_port0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE(new_port0 != udp0->dst_port)) + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->dst_port; + tcp0->dst_port = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = new_port0; + udp0->checksum = 0; + } + } + else + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + tcp0->checksum = ip_csum_fold(sum0); + } + } + + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_out2in_fast_node.index, + SNAT_OUT2IN_ERROR_OUT2IN_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_fast_node) = { + .function = snat_out2in_fast_node_fn, + .name = "nat44-out2in-fast", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_fast_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_OUT2IN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + [SNAT_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn); -- cgit 1.2.3-korg From 704018cf117b6667f08b09d6db5fbec105bf6d57 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Mon, 4 Sep 2017 02:17:18 -0700 Subject: NAT: Destination NAT44 with load-balancing (VPP-954) added load-balancing static mappings with unequal load support Change-Id: Ie505e41f24d46f812b94dd28bdafe3dc170a6060 Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 253 ++++++++++++++++++++++++--- src/plugins/nat/nat.api | 33 ++++ src/plugins/nat/nat.c | 430 ++++++++++++++++++++++++++++++++++++++++++---- src/plugins/nat/nat.h | 23 ++- src/plugins/nat/nat_api.c | 137 ++++++++++++++- src/plugins/nat/out2in.c | 205 ++++++++++++++++++++-- test/test_nat.py | 101 +++++++++++ test/vpp_papi_provider.py | 30 ++++ 8 files changed, 1142 insertions(+), 70 deletions(-) (limited to 'src/plugins/nat/out2in.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index bb186393..c51d4fb4 100644 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -314,23 +314,25 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, if (snat_is_unk_proto_session (s)) { clib_bihash_kv_16_8_t up_kv; - snat_unk_proto_ses_key_t key; + nat_ed_ses_key_t key; /* Remove from lookup tables */ key.l_addr = s->in2out.addr; key.r_addr = s->ext_host_addr; key.fib_index = s->in2out.fib_index; key.proto = s->in2out.port; + key.rsvd = 0; + key.l_port = 0; up_kv.key[0] = key.as_u64[0]; up_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &up_kv, 0)) + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0)) clib_warning ("in2out key del failed"); key.l_addr = s->out2in.addr; key.fib_index = s->out2in.fib_index; up_kv.key[0] = key.as_u64[0]; up_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &up_kv, 0)) + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0)) clib_warning ("out2in key del failed"); } else @@ -1033,7 +1035,7 @@ snat_hairpinning_unknown_proto (snat_main_t *sm, u32 old_addr, new_addr = 0, ti = 0; clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; - snat_unk_proto_ses_key_t key; + nat_ed_ses_key_t key; snat_session_key_t m_key; snat_worker_key_t w_key; snat_static_mapping_t *m; @@ -1045,10 +1047,11 @@ snat_hairpinning_unknown_proto (snat_main_t *sm, key.r_addr.as_u32 = ip->src_address.as_u32; key.fib_index = sm->outside_fib_index; key.proto = ip->protocol; - key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0; + key.rsvd = 0; + key.l_port = 0; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) { m_key.addr = ip->dst_address; m_key.fib_index = sm->outside_fib_index; @@ -1110,7 +1113,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; u32 elt_index, head_index, ses_index, oldest_index; snat_session_t * s; - snat_unk_proto_ses_key_t key; + nat_ed_ses_key_t key; u32 address_index = ~0; int i; u8 is_sm = 0; @@ -1121,11 +1124,12 @@ snat_in2out_unknown_proto (snat_main_t *sm, key.r_addr = ip->dst_address; key.fib_index = rx_fib_index; key.proto = ip->protocol; - key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0; + key.rsvd = 0; + key.l_port = 0; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (!clib_bihash_search_16_8 (&sm->in2out_unk_proto, &s_kv, &s_value)) + if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value)) { s = pool_elt_at_index (tsm->sessions, s_value.value); new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; @@ -1202,7 +1206,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, key.l_addr.as_u32 = new_addr; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) break; goto create_ses; @@ -1215,7 +1219,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, key.l_addr.as_u32 = sm->addresses[i].addr.as_u32; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) { new_addr = ip->src_address.as_u32 = key.l_addr.as_u32; address_index = i; @@ -1259,14 +1263,14 @@ create_ses: key.proto = s->in2out.port; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 0)) + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 0)) clib_warning ("in2out key del failed"); key.l_addr = s->out2in.addr; key.fib_index = s->out2in.fib_index; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 0)) + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 0)) clib_warning ("out2in key del failed"); } else @@ -1333,14 +1337,14 @@ create_ses: s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; s_kv.value = s - tsm->sessions; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1)) + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1)) clib_warning ("in2out key add failed"); key.l_addr.as_u32 = new_addr; key.fib_index = sm->outside_fib_index; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1)) + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) clib_warning ("out2in key add failed"); } @@ -1366,6 +1370,153 @@ create_ses: vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index; } +static void +snat_in2out_lb (snat_main_t *sm, + vlib_buffer_t * b, + ip4_header_t * ip, + u32 rx_fib_index, + u32 thread_index, + f64 now, + vlib_main_t * vm) +{ + nat_ed_ses_key_t key; + clib_bihash_kv_16_8_t s_kv, s_value; + udp_header_t *udp = ip4_next_header (ip); + tcp_header_t *tcp = (tcp_header_t *) udp; + snat_session_t *s = 0; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + u32 old_addr, new_addr; + u16 new_port, old_port; + ip_csum_t sum; + u32 proto = ip_proto_to_snat_proto (ip->protocol); + snat_session_key_t e_key, l_key; + clib_bihash_kv_8_8_t kv, value; + snat_user_key_t u_key; + snat_user_t *u; + dlist_elt_t *head, *elt; + + old_addr = ip->src_address.as_u32; + + key.l_addr = ip->src_address; + key.r_addr = ip->dst_address; + key.fib_index = rx_fib_index; + key.proto = ip->protocol; + key.rsvd = 0; + key.l_port = udp->src_port; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + + if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value)) + { + s = pool_elt_at_index (tsm->sessions, s_value.value); + } + else + { + l_key.addr = ip->src_address; + l_key.port = udp->src_port; + l_key.protocol = proto; + l_key.fib_index = rx_fib_index; + if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0)) + return; + + u_key.addr = ip->src_address; + u_key.fib_index = rx_fib_index; + kv.key = u_key.as_u64; + + /* Ever heard of the "user" = src ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + /* no, make a new one */ + pool_get (tsm->users, u); + memset (u, 0, sizeof (*u)); + u->addr = ip->src_address; + u->fib_index = rx_fib_index; + + pool_get (tsm->list_pool, head); + u->sessions_per_user_list_head_index = head - tsm->list_pool; + + clib_dlist_init (tsm->list_pool, + u->sessions_per_user_list_head_index); + + kv.value = u - tsm->users; + + /* add user */ + if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1)) + clib_warning ("user key add failed"); + } + else + { + u = pool_elt_at_index (tsm->users, value.value); + } + + /* Create a new session */ + pool_get (tsm->sessions, s); + memset (s, 0, sizeof (*s)); + + s->ext_host_addr.as_u32 = ip->dst_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING; + s->outside_address_index = ~0; + s->in2out = l_key; + s->out2in = e_key; + u->nstaticsessions++; + + /* Create list elts */ + pool_get (tsm->list_pool, elt); + clib_dlist_init (tsm->list_pool, elt - tsm->list_pool); + elt->value = s - tsm->sessions; + s->per_user_index = elt - tsm->list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, + s->per_user_index); + + /* Add to lookup tables */ + s_kv.value = s - tsm->sessions; + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1)) + clib_warning ("in2out-ed key add failed"); + + key.l_addr = e_key.addr; + key.fib_index = e_key.fib_index; + key.l_port = e_key.port; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) + clib_warning ("out2in-ed key add failed"); + } + + new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; + + /* Update IP checksum */ + sum = ip->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); + ip->checksum = ip_csum_fold (sum); + + if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP)) + { + old_port = tcp->src_port; + tcp->src_port = s->out2in.port; + new_port = tcp->src_port; + + sum = tcp->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); + sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length); + tcp->checksum = ip_csum_fold(sum); + } + else + { + udp->src_port = s->out2in.port; + udp->checksum = 0; + } + + if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0) + vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + /* Accounting */ + s->last_heard = now; + s->total_pkts++; + s->total_bytes += vlib_buffer_length_in_chain (vm, b); +} + static inline uword snat_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1521,8 +1672,28 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value0.value); + { + if (PREDICT_FALSE (value0.value == ~0ULL)) + { + if (is_slow_path) + { + snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, + now, vm); + goto trace00; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } + } + else + { + s0 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value0.value); + } + } old_addr0 = ip0->src_address.as_u32; ip0->src_address = s0->out2in.addr; @@ -1672,8 +1843,28 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value1.value); + { + if (PREDICT_FALSE (value1.value == ~0ULL)) + { + if (is_slow_path) + { + snat_in2out_lb(sm, b1, ip1, rx_fib_index1, thread_index, + now, vm); + goto trace01; + } + else + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } + } + else + { + s1 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value1.value); + } + } old_addr1 = ip1->src_address.as_u32; ip1->src_address = s1->out2in.addr; @@ -1860,8 +2051,28 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value0.value); + { + if (PREDICT_FALSE (value0.value == ~0ULL)) + { + if (is_slow_path) + { + snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, + now, vm); + goto trace0; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } + } + else + { + s0 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value0.value); + } + } old_addr0 = ip0->src_address.as_u32; ip0->src_address = s0->out2in.addr; diff --git a/src/plugins/nat/nat.api b/src/plugins/nat/nat.api index 7245cb07..d7a4a9ef 100644 --- a/src/plugins/nat/nat.api +++ b/src/plugins/nat/nat.api @@ -1025,6 +1025,39 @@ define nat44_user_session_details { u32 total_pkts; }; +typeonly manual_endian define nat44_lb_addr_port { + u8 addr[4]; + u16 port; + u8 probability; +}; + +autoreply manual_endian define nat44_add_del_lb_static_mapping { + u32 client_index; + u32 context; + u8 is_add; + u8 external_addr[4]; + u16 external_port; + u8 protocol; + u32 vrf_id; + u8 local_num; + vl_api_nat44_lb_addr_port_t locals[local_num]; +}; + +define nat44_lb_static_mapping_dump { + u32 client_index; + u32 context; +}; + +manual_endian define nat44_lb_static_mapping_details { + u32 context; + u8 external_addr[4]; + u16 external_port; + u8 protocol; + u32 vrf_id; + u8 local_num; + vl_api_nat44_lb_addr_port_t locals[local_num]; +}; + /* * Deterministic NAT (CGN) APIs */ diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index f9ecb943..fabd0bc2 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -506,15 +506,16 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, if (snat_is_unk_proto_session (s)) { clib_bihash_kv_16_8_t up_kv; - snat_unk_proto_ses_key_t up_key; + nat_ed_ses_key_t up_key; up_key.l_addr = s->in2out.addr; up_key.r_addr = s->ext_host_addr; up_key.fib_index = s->in2out.fib_index; up_key.proto = s->in2out.port; - up_key.rsvd[0] = up_key.rsvd[1] = up_key.rsvd[2] = 0; + up_key.rsvd = 0; + up_key.l_port = 0; up_kv.key[0] = up_key.as_u64[0]; up_kv.key[1] = up_key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0)) clib_warning ("in2out key del failed"); @@ -522,7 +523,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, up_key.fib_index = s->out2in.fib_index; up_kv.key[0] = up_key.as_u64[0]; up_kv.key[1] = up_key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0)) clib_warning ("out2in key del failed"); @@ -589,6 +590,243 @@ delete: return 0; } +int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, + snat_protocol_t proto, u32 vrf_id, + nat44_lb_addr_port_t *locals, u8 is_add) +{ + snat_main_t * sm = &snat_main; + snat_static_mapping_t *m; + snat_session_key_t m_key; + clib_bihash_kv_8_8_t kv, value; + u32 fib_index; + snat_address_t *a = 0; + int i; + nat44_lb_addr_port_t *local; + snat_user_key_t w_key0; + snat_worker_key_t w_key1; + u32 worker_index = 0; + + m_key.addr = e_addr; + m_key.port = e_port; + m_key.protocol = proto; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + m = 0; + else + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (is_add) + { + if (m) + return VNET_API_ERROR_VALUE_EXIST; + + if (vec_len (locals) < 2) + return VNET_API_ERROR_INVALID_VALUE; + + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + vrf_id); + + /* Find external address in allocated addresses and reserve port for + address and port pair mapping when dynamic translations enabled */ + if (!sm->static_mapping_only) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + /* External port must be unused */ + switch (proto) + { +#define _(N, j, n, s) \ + case SNAT_PROTOCOL_##N: \ + if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, e_port)) \ + return VNET_API_ERROR_INVALID_VALUE; \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \ + if (e_port > 1024) \ + a->busy_##n##_ports++; \ + break; + foreach_snat_protocol +#undef _ + default: + clib_warning("unknown_protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + /* External address must be allocated */ + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + pool_get (sm->static_mappings, m); + memset (m, 0, sizeof (*m)); + m->external_addr = e_addr; + m->addr_only = 0; + m->vrf_id = vrf_id; + m->fib_index = fib_index; + m->external_port = e_port; + m->proto = proto; + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.protocol = m->proto; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1)) + { + clib_warning ("static_mapping_by_external key add failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + m_key.port = clib_host_to_net_u16 (m->external_port); + kv.key = m_key.as_u64; + kv.value = ~0ULL; + if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 1)) + { + clib_warning ("static_mapping_by_local key add failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + + m_key.fib_index = m->fib_index; + + /* Assign worker */ + if (sm->workers) + { + w_key0.addr = locals[0].addr; + w_key0.fib_index = fib_index; + kv.key = w_key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + worker_index = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + else + worker_index = value.value; + + w_key1.addr = m->external_addr; + w_key1.port = clib_host_to_net_u16 (m->external_port); + w_key1.fib_index = sm->outside_fib_index; + kv.key = w_key1.as_u64; + kv.value = worker_index; + if (clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1)) + { + clib_warning ("worker-by-out add key failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + } + + for (i = 0; i < vec_len (locals); i++) + { + m_key.addr = locals[i].addr; + m_key.port = locals[i].port; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1); + locals[i].prefix = locals[i - 1].prefix + locals[i].probability; + vec_add1 (m->locals, locals[i]); + m_key.port = clib_host_to_net_u16 (locals[i].port); + kv.key = m_key.as_u64; + kv.value = ~0ULL; + if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 1)) + { + clib_warning ("in2out key add failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + /* Assign worker */ + if (sm->workers) + { + w_key0.addr = locals[i].addr; + w_key0.fib_index = fib_index; + kv.key = w_key0.as_u64; + kv.value = worker_index; + if (clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1)) + { + clib_warning ("worker-by-in key add failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + } + } + } + else + { + if (!m) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4); + + /* Free external address port */ + if (!sm->static_mapping_only) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + switch (proto) + { +#define _(N, j, n, s) \ + case SNAT_PROTOCOL_##N: \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \ + if (e_port > 1024) \ + a->busy_##n##_ports--; \ + break; + foreach_snat_protocol +#undef _ + default: + clib_warning("unknown_protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + } + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.protocol = m->proto; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0)) + { + clib_warning ("static_mapping_by_external key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + m_key.port = clib_host_to_net_u16 (m->external_port); + kv.key = m_key.as_u64; + if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 0)) + { + clib_warning ("outi2in key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + + vec_foreach (local, m->locals) + { + m_key.addr = local->addr; + m_key.port = local->port; + m_key.fib_index = m->fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0)) + { + clib_warning ("static_mapping_by_local key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + m_key.port = clib_host_to_net_u16 (local->port); + kv.key = m_key.as_u64; + if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 0)) + { + clib_warning ("in2out key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + } + + pool_put (sm->static_mappings, m); + } + + return 0; +} + int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) { snat_address_t *a = 0; @@ -649,15 +887,16 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) if (snat_is_unk_proto_session (ses)) { clib_bihash_kv_16_8_t up_kv; - snat_unk_proto_ses_key_t up_key; + nat_ed_ses_key_t up_key; up_key.l_addr = ses->in2out.addr; up_key.r_addr = ses->ext_host_addr; up_key.fib_index = ses->in2out.fib_index; up_key.proto = ses->in2out.port; - up_key.rsvd[0] = up_key.rsvd[1] = up_key.rsvd[2] = 0; + up_key.rsvd = 0; + up_key.l_port = 0; up_kv.key[0] = up_key.as_u64[0]; up_kv.key[1] = up_key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0)) clib_warning ("in2out key del failed"); @@ -665,7 +904,7 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) up_key.fib_index = ses->out2in.fib_index; up_kv.key[0] = up_key.as_u64[0]; up_kv.key[1] = up_key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0)) clib_warning ("out2in key del failed"); } @@ -1048,6 +1287,7 @@ int snat_static_mapping_match (snat_main_t * sm, snat_static_mapping_t *m; snat_session_key_t m_key; clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local; + u32 rand, lo = 0, hi, mid; if (by_external) mapping_hash = &sm->static_mapping_by_external; @@ -1073,11 +1313,29 @@ int snat_static_mapping_match (snat_main_t * sm, if (by_external) { - mapping->addr = m->local_addr; - /* Address only mapping doesn't change port */ - mapping->port = m->addr_only ? match.port - : clib_host_to_net_u16 (m->local_port); + if (vec_len (m->locals)) + { + hi = vec_len (m->locals) - 1; + rand = 1 + (random_u32 (&sm->random_seed) % m->locals[hi].prefix); + while (lo < hi) + { + mid = ((hi - 1) >> 1) + lo; + (rand > m->locals[mid].prefix) ? (lo = mid + 1) : (hi = mid); + } + if (!(m->locals[lo].prefix >= rand)) + return 1; + mapping->addr = m->locals[lo].addr; + mapping->port = clib_host_to_net_u16 (m->locals[lo].port); + } + else + { + mapping->addr = m->local_addr; + /* Address only mapping doesn't change port */ + mapping->port = m->addr_only ? match.port + : clib_host_to_net_u16 (m->local_port); + } mapping->fib_index = m->fib_index; + mapping->protocol = m->proto; } else { @@ -1517,6 +1775,101 @@ VLIB_CLI_COMMAND (add_static_mapping_command, static) = { "nat44 add static mapping tcp|udp|icmp local [] external [] [vrf ] [del]", }; +static clib_error_t * +add_lb_static_mapping_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t * error = 0; + ip4_address_t l_addr, e_addr; + u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0; + int is_add = 1; + int rv; + snat_protocol_t proto; + u8 proto_set = 0; + nat44_lb_addr_port_t *locals = 0, local; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U:%u probability %u", + unformat_ip4_address, &l_addr, &l_port, &probability)) + { + memset (&local, 0, sizeof (local)); + local.addr = l_addr; + local.port = (u16) l_port; + local.probability = (u8) probability; + vec_add1 (locals, local); + } + else if (unformat (line_input, "external %U:%u", unformat_ip4_address, + &e_addr, &e_port)) + ; + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "protocol %U", unformat_snat_protocol, + &proto)) + proto_set = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (vec_len (locals) < 2) + { + error = clib_error_return (0, "at least two local must be set"); + goto done; + } + + if (!proto_set) + { + error = clib_error_return (0, "missing protocol"); + goto done; + } + + rv = nat44_add_del_lb_static_mapping (e_addr, (u16) e_port, proto, vrf_id, + locals, is_add); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "External port already in use."); + goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + if (is_add) + error = clib_error_return (0, "External addres must be allocated."); + else + error = clib_error_return (0, "Mapping not exist."); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "Mapping already exist."); + goto done; + default: + break; + } + +done: + unformat_free (line_input); + vec_free (locals); + + return error; +} + +VLIB_CLI_COMMAND (add_lb_static_mapping_command, static) = { + .path = "nat44 add load-balancing static mapping", + .function = add_lb_static_mapping_command_fn, + .short_help = + "nat44 add load-balancing static mapping protocol tcp|udp external : local : probability [vrf ] [del]", +}; + static clib_error_t * set_workers_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -1839,10 +2192,10 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets, user_memory_size); - clib_bihash_init_16_8 (&sm->in2out_unk_proto, "in2out-unk-proto", + clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed", translation_buckets, translation_memory_size); - clib_bihash_init_16_8 (&sm->out2in_unk_proto, "out2in-unk-proto", + clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed", translation_buckets, translation_memory_size); } else @@ -1884,18 +2237,10 @@ u8 * format_snat_session_state (u8 * s, va_list * args) u8 * format_snat_key (u8 * s, va_list * args) { snat_session_key_t * key = va_arg (*args, snat_session_key_t *); - char * protocol_string = "unknown"; - static char *protocol_strings[] = { - "UDP", - "TCP", - "ICMP", - }; - - if (key->protocol < ARRAY_LEN(protocol_strings)) - protocol_string = protocol_strings[key->protocol]; - s = format (s, "%U proto %s port %d fib %d", - format_ip4_address, &key->addr, protocol_string, + s = format (s, "%U proto %U port %d fib %d", + format_ip4_address, &key->addr, + format_snat_protocol, key->protocol, clib_net_to_host_u16 (key->port), key->fib_index); return s; } @@ -1919,6 +2264,9 @@ u8 * format_snat_session (u8 * s, va_list * args) s = format (s, " i2o %U\n", format_snat_key, &sess->in2out); s = format (s, " o2i %U\n", format_snat_key, &sess->out2in); } + if (sess->ext_host_addr.as_u32) + s = format (s, " external host %U\n", + format_ip4_address, &sess->ext_host_addr); s = format (s, " last heard %.2f\n", sess->last_heard); s = format (s, " total pkts %d, total bytes %lld\n", sess->total_pkts, sess->total_bytes); @@ -1926,6 +2274,8 @@ u8 * format_snat_session (u8 * s, va_list * args) s = format (s, " static translation\n"); else s = format (s, " dynamic translation\n"); + if (sess->flags & SNAT_SESSION_FLAG_LOAD_BALANCING) + s = format (s, " load-balancing\n"); return s; } @@ -1973,6 +2323,7 @@ u8 * format_snat_user (u8 * s, va_list * args) u8 * format_snat_static_mapping (u8 * s, va_list * args) { snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *); + nat44_lb_addr_port_t *local; if (m->addr_only) s = format (s, "local %U external %U vrf %d", @@ -1980,12 +2331,25 @@ u8 * format_snat_static_mapping (u8 * s, va_list * args) format_ip4_address, &m->external_addr, m->vrf_id); else - s = format (s, "%U local %U:%d external %U:%d vrf %d", - format_snat_protocol, m->proto, - format_ip4_address, &m->local_addr, m->local_port, - format_ip4_address, &m->external_addr, m->external_port, - m->vrf_id); - + { + if (vec_len (m->locals)) + { + s = format (s, "%U vrf %d external %U:%d", + format_snat_protocol, m->proto, + m->vrf_id, + format_ip4_address, &m->external_addr, m->external_port); + vec_foreach (local, m->locals) + s = format (s, "\n local %U:%d probability %d\%", + format_ip4_address, &local->addr, local->port, + local->probability); + } + else + s = format (s, "%U local %U:%d external %U:%d vrf %d", + format_snat_protocol, m->proto, + format_ip4_address, &m->local_addr, m->local_port, + format_ip4_address, &m->external_addr, m->external_port, + m->vrf_id); + } return s; } @@ -2208,6 +2572,10 @@ show_snat_command_fn (vlib_main_t * vm, verbose - 1); vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in, verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->in2out_ed, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed, + verbose - 1); vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, verbose - 1); vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out, diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 04c466dc..8935144d 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -62,12 +62,13 @@ typedef struct { ip4_address_t l_addr; ip4_address_t r_addr; u32 fib_index; + u16 l_port; u8 proto; - u8 rsvd[3]; + u8 rsvd; }; u64 as_u64[2]; }; -} snat_unk_proto_ses_key_t; +} nat_ed_ses_key_t; typedef struct { union @@ -139,6 +140,7 @@ typedef enum { #define SNAT_SESSION_FLAG_STATIC_MAPPING 1 #define SNAT_SESSION_FLAG_UNKNOWN_PROTO 2 +#define SNAT_SESSION_FLAG_LOAD_BALANCING 4 typedef CLIB_PACKED(struct { snat_session_key_t out2in; /* 0-15 */ @@ -205,6 +207,13 @@ typedef struct { snat_det_session_t * sessions; } snat_det_map_t; +typedef struct { + ip4_address_t addr; + u16 port; + u8 probability; + u8 prefix; +} nat44_lb_addr_port_t; + typedef struct { ip4_address_t local_addr; ip4_address_t external_addr; @@ -214,6 +223,7 @@ typedef struct { u32 vrf_id; u32 fib_index; snat_protocol_t proto; + nat44_lb_addr_port_t *locals; } snat_static_mapping_t; typedef struct { @@ -264,9 +274,9 @@ typedef struct snat_main_s { clib_bihash_8_8_t out2in; clib_bihash_8_8_t in2out; - /* Unknown protocol sessions lookup tables */ - clib_bihash_16_8_t out2in_unk_proto; - clib_bihash_16_8_t in2out_unk_proto; + /* Endpoint address dependent sessions lookup tables */ + clib_bihash_16_8_t out2in_ed; + clib_bihash_16_8_t in2out_ed; /* Find-a-user => src address lookup */ clib_bihash_8_8_t user_hash; @@ -496,6 +506,9 @@ int snat_interface_add_del_output_feature(u32 sw_if_index, u8 is_inside, int snat_add_interface_address(snat_main_t *sm, u32 sw_if_index, int is_del); uword unformat_snat_protocol(unformat_input_t * input, va_list * args); u8 * format_snat_protocol(u8 * s, va_list * args); +int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, + snat_protocol_t proto, u32 vrf_id, + nat44_lb_addr_port_t *locals, u8 is_add); static_always_inline u8 icmp_is_error_message (icmp46_header_t * icmp) diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index 0a2141f2..fa20f2cc 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -27,6 +27,10 @@ #include #include +#define vl_api_nat44_lb_addr_port_t_endian vl_noop_handler +#define vl_api_nat44_add_del_lb_static_mapping_t_endian vl_noop_handler +#define vl_api_nat44_nat44_lb_static_mapping_details_t_endian vl_noop_handler + /* define message structures */ #define vl_typedefs #include @@ -465,7 +469,8 @@ static void /* *INDENT-OFF* */ pool_foreach (m, sm->static_mappings, ({ - send_snat_static_mapping_details (m, q, mp->context); + if (!vec_len(m->locals)) + send_snat_static_mapping_details (m, q, mp->context); })); /* *INDENT-ON* */ @@ -1888,7 +1893,8 @@ vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t /* *INDENT-OFF* */ pool_foreach (m, sm->static_mappings, ({ - send_nat44_static_mapping_details (m, q, mp->context); + if (!vec_len(m->locals)) + send_nat44_static_mapping_details (m, q, mp->context); })); /* *INDENT-ON* */ @@ -2136,6 +2142,131 @@ vl_api_nat44_user_session_dump_t_print (vl_api_nat44_user_session_dump_t * mp, FINISH; } +static nat44_lb_addr_port_t * +unformat_nat44_lb_addr_port (vl_api_nat44_lb_addr_port_t * addr_port_pairs, + u8 addr_port_pair_num) +{ + u8 i; + nat44_lb_addr_port_t *lb_addr_port_pairs = 0, lb_addr_port; + vl_api_nat44_lb_addr_port_t *ap; + + for (i = 0; i < addr_port_pair_num; i++) + { + ap = &addr_port_pairs[i]; + memset (&lb_addr_port, 0, sizeof (lb_addr_port)); + clib_memcpy (&lb_addr_port.addr, ap->addr, 4); + lb_addr_port.port = clib_net_to_host_u16 (ap->port); + lb_addr_port.probability = ap->probability; + vec_add1 (lb_addr_port_pairs, lb_addr_port); + } + + return lb_addr_port_pairs; +} + +static void + vl_api_nat44_add_del_lb_static_mapping_t_handler + (vl_api_nat44_add_del_lb_static_mapping_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_add_del_lb_static_mapping_reply_t *rmp; + int rv = 0; + nat44_lb_addr_port_t *locals = 0; + ip4_address_t e_addr; + snat_protocol_t proto; + + locals = unformat_nat44_lb_addr_port (mp->locals, mp->local_num); + clib_memcpy (&e_addr, mp->external_addr, 4); + proto = ip_proto_to_snat_proto (mp->protocol); + + rv = + nat44_add_del_lb_static_mapping (e_addr, + clib_net_to_host_u16 (mp->external_port), + proto, clib_net_to_host_u32 (mp->vrf_id), + locals, mp->is_add); + + vec_free (locals); + + REPLY_MACRO (VL_API_NAT44_ADD_DEL_LB_STATIC_MAPPING_REPLY); +} + +static void *vl_api_nat44_add_del_lb_static_mapping_t_print + (vl_api_nat44_add_del_lb_static_mapping_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: nat44_add_del_lb_static_mapping "); + s = format (s, "is_add %d\n", mp->is_add); + + FINISH; +} + +static void +send_nat44_lb_static_mapping_details (snat_static_mapping_t * m, + unix_shared_memory_queue_t * q, + u32 context) +{ + vl_api_nat44_lb_static_mapping_details_t *rmp; + snat_main_t *sm = &snat_main; + nat44_lb_addr_port_t *ap; + vl_api_nat44_lb_addr_port_t *locals; + + rmp = + vl_msg_api_alloc (sizeof (*rmp) + + (vec_len (m->locals) * sizeof (nat44_lb_addr_port_t))); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_LB_STATIC_MAPPING_DETAILS + sm->msg_id_base); + + clib_memcpy (rmp->external_addr, &(m->external_addr), 4); + rmp->external_port = ntohs (m->external_port); + rmp->protocol = snat_proto_to_ip_proto (m->proto); + rmp->vrf_id = ntohl (m->vrf_id); + rmp->context = context; + + locals = (vl_api_nat44_lb_addr_port_t *) rmp->locals; + vec_foreach (ap, m->locals) + { + clib_memcpy (locals->addr, &(ap->addr), 4); + locals->port = htons (ap->port); + locals->probability = ap->probability; + locals++; + rmp->local_num++; + } + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void + vl_api_nat44_lb_static_mapping_dump_t_handler + (vl_api_nat44_lb_static_mapping_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (m, sm->static_mappings, + ({ + if (vec_len(m->locals)) + send_nat44_lb_static_mapping_details (m, q, mp->context); + })); + /* *INDENT-ON* */ +} + +static void *vl_api_nat44_lb_static_mapping_dump_t_print + (vl_api_nat44_lb_static_mapping_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: nat44_lb_static_mapping_dump "); + + FINISH; +} + /*******************************/ /*** Deterministic NAT (CGN) ***/ /*******************************/ @@ -3159,6 +3290,8 @@ _(NAT44_INTERFACE_ADD_DEL_OUTPUT_FEATURE, \ nat44_interface_add_del_output_feature) \ _(NAT44_INTERFACE_OUTPUT_FEATURE_DUMP, \ nat44_interface_output_feature_dump) \ +_(NAT44_ADD_DEL_LB_STATIC_MAPPING, nat44_add_del_lb_static_mapping) \ +_(NAT44_LB_STATIC_MAPPING_DUMP, nat44_lb_static_mapping_dump) \ _(NAT_DET_ADD_DEL_MAP, nat_det_add_del_map) \ _(NAT_DET_FORWARD, nat_det_forward) \ _(NAT_DET_REVERSE, nat_det_reverse) \ diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 67950066..55a750e4 100644 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -630,7 +630,7 @@ snat_out2in_unknown_proto (snat_main_t *sm, snat_session_key_t m_key; u32 old_addr, new_addr; ip_csum_t sum; - snat_unk_proto_ses_key_t key; + nat_ed_ses_key_t key; snat_session_t * s; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; snat_user_key_t u_key; @@ -643,11 +643,12 @@ snat_out2in_unknown_proto (snat_main_t *sm, key.r_addr = ip->src_address; key.fib_index = rx_fib_index; key.proto = ip->protocol; - key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0; + key.rsvd = 0; + key.l_port = 0; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (!clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) { s = pool_elt_at_index (tsm->sessions, s_value.value); new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; @@ -721,14 +722,14 @@ snat_out2in_unknown_proto (snat_main_t *sm, /* Add to lookup tables */ s_kv.value = s - tsm->sessions; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1)) + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) clib_warning ("out2in key add failed"); key.l_addr = ip->dst_address; key.fib_index = m->fib_index; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1)) + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1)) clib_warning ("in2out key add failed"); } @@ -749,6 +750,152 @@ snat_out2in_unknown_proto (snat_main_t *sm, s->per_user_index); } +static void +snat_out2in_lb (snat_main_t *sm, + vlib_buffer_t * b, + ip4_header_t * ip, + u32 rx_fib_index, + u32 thread_index, + f64 now, + vlib_main_t * vm) +{ + nat_ed_ses_key_t key; + clib_bihash_kv_16_8_t s_kv, s_value; + udp_header_t *udp = ip4_next_header (ip); + tcp_header_t *tcp = (tcp_header_t *) udp; + snat_session_t *s = 0; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + snat_session_key_t e_key, l_key; + clib_bihash_kv_8_8_t kv, value; + u32 old_addr, new_addr; + u32 proto = ip_proto_to_snat_proto (ip->protocol); + u16 new_port, old_port; + ip_csum_t sum; + snat_user_key_t u_key; + snat_user_t *u; + dlist_elt_t *head, *elt; + + old_addr = ip->dst_address.as_u32; + + key.l_addr = ip->dst_address; + key.r_addr = ip->src_address; + key.fib_index = rx_fib_index; + key.proto = ip->protocol; + key.rsvd = 0; + key.l_port = udp->dst_port; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + + if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) + { + s = pool_elt_at_index (tsm->sessions, s_value.value); + } + else + { + e_key.addr = ip->dst_address; + e_key.port = udp->dst_port; + e_key.protocol = proto; + e_key.fib_index = rx_fib_index; + if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0)) + return; + + u_key.addr = l_key.addr; + u_key.fib_index = l_key.fib_index; + kv.key = u_key.as_u64; + + /* Ever heard of the "user" = src ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + /* no, make a new one */ + pool_get (tsm->users, u); + memset (u, 0, sizeof (*u)); + u->addr = l_key.addr; + u->fib_index = l_key.fib_index; + + pool_get (tsm->list_pool, head); + u->sessions_per_user_list_head_index = head - tsm->list_pool; + + clib_dlist_init (tsm->list_pool, + u->sessions_per_user_list_head_index); + + kv.value = u - tsm->users; + + /* add user */ + if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1)) + clib_warning ("user key add failed"); + } + else + { + u = pool_elt_at_index (tsm->users, value.value); + } + + /* Create a new session */ + pool_get (tsm->sessions, s); + memset (s, 0, sizeof (*s)); + + s->ext_host_addr.as_u32 = ip->src_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING; + s->outside_address_index = ~0; + s->out2in = e_key; + s->in2out = l_key; + u->nstaticsessions++; + + /* Create list elts */ + pool_get (tsm->list_pool, elt); + clib_dlist_init (tsm->list_pool, elt - tsm->list_pool); + elt->value = s - tsm->sessions; + s->per_user_index = elt - tsm->list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, + s->per_user_index); + + /* Add to lookup tables */ + s_kv.value = s - tsm->sessions; + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) + clib_warning ("out2in-ed key add failed"); + + key.l_addr = l_key.addr; + key.fib_index = l_key.fib_index; + key.l_port = l_key.port; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1)) + clib_warning ("in2out-ed key add failed"); + } + + new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; + + /* Update IP checksum */ + sum = ip->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); + ip->checksum = ip_csum_fold (sum); + + if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP)) + { + old_port = tcp->dst_port; + tcp->dst_port = s->in2out.port; + new_port = tcp->dst_port; + + sum = tcp->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); + sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length); + tcp->checksum = ip_csum_fold(sum); + } + else + { + udp->dst_port = s->in2out.port; + udp->checksum = 0; + } + + vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index; + + /* Accounting */ + s->last_heard = now; + s->total_pkts++; + s->total_bytes += vlib_buffer_length_in_chain (vm, b); +} + static uword snat_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -894,8 +1041,20 @@ snat_out2in_node_fn (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value0.value); + { + if (PREDICT_FALSE (value0.value == ~0ULL)) + { + snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, + vm); + goto trace0; + } + else + { + s0 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value0.value); + } + } old_addr0 = ip0->dst_address.as_u32; ip0->dst_address = s0->in2out.addr; @@ -1033,8 +1192,20 @@ snat_out2in_node_fn (vlib_main_t * vm, } } else - s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value1.value); + { + if (PREDICT_FALSE (value1.value == ~0ULL)) + { + snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index, now, + vm); + goto trace1; + } + else + { + s1 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value1.value); + } + } old_addr1 = ip1->dst_address.as_u32; ip1->dst_address = s1->in2out.addr; @@ -1209,8 +1380,20 @@ snat_out2in_node_fn (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value0.value); + { + if (PREDICT_FALSE (value0.value == ~0ULL)) + { + snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, + vm); + goto trace00; + } + else + { + s0 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value0.value); + } + } old_addr0 = ip0->dst_address.as_u32; ip0->dst_address = s0->in2out.addr; diff --git a/test/test_nat.py b/test/test_nat.py index 0d622b08..de07019f 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -15,6 +15,7 @@ from scapy.packet import bind_layers from util import ppp from ipfix import IPFIX, Set, Template, Data, IPFIXDecoder from time import sleep +from util import ip4_range class MethodHolder(VppTestCase): @@ -633,6 +634,15 @@ class TestNAT44(MethodHolder): protocol=sm.protocol, is_add=0) + lb_static_mappings = self.vapi.nat44_lb_static_mapping_dump() + for lb_sm in lb_static_mappings: + self.vapi.nat44_add_del_lb_static_mapping( + lb_sm.external_addr, + lb_sm.external_port, + lb_sm.protocol, + lb_sm.vrf_id, + is_add=0) + adresses = self.vapi.nat44_address_dump() for addr in adresses: self.vapi.nat44_add_del_address_range(addr.ip_address, @@ -1037,6 +1047,97 @@ class TestNAT44(MethodHolder): self.pg_start() self.pg3.assert_nothing_captured() + def test_static_lb(self): + """ NAT44 local service load balancing """ + external_addr_n = socket.inet_pton(socket.AF_INET, self.nat_addr) + external_port = 80 + local_port = 8080 + server1 = self.pg0.remote_hosts[0] + server2 = self.pg0.remote_hosts[1] + + locals = [{'addr': server1.ip4n, + 'port': local_port, + 'probability': 70}, + {'addr': server2.ip4n, + 'port': local_port, + 'probability': 30}] + + self.nat44_add_address(self.nat_addr) + self.vapi.nat44_add_del_lb_static_mapping(external_addr_n, + external_port, + IP_PROTOS.tcp, + local_num=len(locals), + locals=locals) + self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index) + self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index, + is_inside=0) + + # from client to service + p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) / + IP(src=self.pg1.remote_ip4, dst=self.nat_addr) / + TCP(sport=12345, dport=external_port)) + self.pg1.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg0.get_capture(1) + p = capture[0] + server = None + try: + ip = p[IP] + tcp = p[TCP] + self.assertIn(ip.dst, [server1.ip4, server2.ip4]) + if ip.dst == server1.ip4: + server = server1 + else: + server = server2 + self.assertEqual(tcp.dport, local_port) + self.check_tcp_checksum(p) + self.check_ip_checksum(p) + except: + self.logger.error(ppp("Unexpected or invalid packet:", p)) + raise + + # from service back to client + p = (Ether(src=server.mac, dst=self.pg0.local_mac) / + IP(src=server.ip4, dst=self.pg1.remote_ip4) / + TCP(sport=local_port, dport=12345)) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg1.get_capture(1) + p = capture[0] + try: + ip = p[IP] + tcp = p[TCP] + self.assertEqual(ip.src, self.nat_addr) + self.assertEqual(tcp.sport, external_port) + self.check_tcp_checksum(p) + self.check_ip_checksum(p) + except: + self.logger.error(ppp("Unexpected or invalid packet:", p)) + raise + + # multiple clients + server1_n = 0 + server2_n = 0 + clients = ip4_range(self.pg1.remote_ip4, 10, 20) + pkts = [] + for client in clients: + p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) / + IP(src=client, dst=self.nat_addr) / + TCP(sport=12345, dport=external_port)) + pkts.append(p) + self.pg1.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg0.get_capture(len(pkts)) + for p in capture: + if p[IP].dst == server1.ip4: + server1_n += 1 + else: + server2_n += 1 + self.assertTrue(server1_n > server2_n) + def test_multiple_inside_interfaces(self): """ NAT44 multiple non-overlapping address space inside interfaces """ diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 61db4d6b..03238b9d 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1238,6 +1238,36 @@ class VppPapiProvider(object): """ return self.api(self.papi.nat44_user_dump, {}) + def nat44_add_del_lb_static_mapping( + self, + external_addr, + external_port, + protocol, + vrf_id=0, + local_num=0, + locals=None, + is_add=1): + """Add/delete NAT44 load balancing static mapping + + :param is_add - 1 if add, 0 if delete + """ + return self.api( + self.papi.nat44_add_del_lb_static_mapping, + {'is_add': is_add, + 'external_addr': external_addr, + 'external_port': external_port, + 'protocol': protocol, + 'vrf_id': vrf_id, + 'local_num': local_num, + 'locals': locals}) + + def nat44_lb_static_mapping_dump(self): + """Dump NAT44 load balancing static mappings + + :return: Dictionary of NAT44 load balancing static mapping + """ + return self.api(self.papi.nat44_lb_static_mapping_dump, {}) + def nat_det_add_del_map( self, in_addr, -- cgit 1.2.3-korg From d5b1b85d2e776718169f930f592af5184a18100a Mon Sep 17 00:00:00 2001 From: dongjuan Date: Fri, 15 Sep 2017 10:21:07 +0800 Subject: Modify return value of snat_out2in_lb(VPP-985) in order to trace session_index Change-Id: I4433155fbe21635f8a997523e2c7900c6a7569af Signed-off-by: dongjuan --- src/plugins/nat/in2out.c | 11 ++++++----- src/plugins/nat/out2in.c | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) mode change 100644 => 100755 src/plugins/nat/in2out.c mode change 100644 => 100755 src/plugins/nat/out2in.c (limited to 'src/plugins/nat/out2in.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c old mode 100644 new mode 100755 index d0a13237..9196ccbb --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -1370,7 +1370,7 @@ create_ses: vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index; } -static void +static snat_session_t * snat_in2out_lb (snat_main_t *sm, vlib_buffer_t * b, ip4_header_t * ip, @@ -1417,7 +1417,7 @@ snat_in2out_lb (snat_main_t *sm, l_key.protocol = proto; l_key.fib_index = rx_fib_index; if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0)) - return; + return 0; u_key.addr = ip->src_address; u_key.fib_index = rx_fib_index; @@ -1515,6 +1515,7 @@ snat_in2out_lb (snat_main_t *sm, s->last_heard = now; s->total_pkts++; s->total_bytes += vlib_buffer_length_in_chain (vm, b); + return s; } static inline uword @@ -1677,7 +1678,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, + s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, vm); goto trace00; } @@ -1848,7 +1849,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - snat_in2out_lb(sm, b1, ip1, rx_fib_index1, thread_index, + s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1, thread_index, now, vm); goto trace01; } @@ -2056,7 +2057,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, + s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, vm); goto trace0; } diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c old mode 100644 new mode 100755 index 55a750e4..52adc8a8 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -750,7 +750,7 @@ snat_out2in_unknown_proto (snat_main_t *sm, s->per_user_index); } -static void +static snat_session_t * snat_out2in_lb (snat_main_t *sm, vlib_buffer_t * b, ip4_header_t * ip, @@ -797,7 +797,7 @@ snat_out2in_lb (snat_main_t *sm, e_key.protocol = proto; e_key.fib_index = rx_fib_index; if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0)) - return; + return 0; u_key.addr = l_key.addr; u_key.fib_index = l_key.fib_index; @@ -894,6 +894,7 @@ snat_out2in_lb (snat_main_t *sm, s->last_heard = now; s->total_pkts++; s->total_bytes += vlib_buffer_length_in_chain (vm, b); + return s; } static uword @@ -1044,7 +1045,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { if (PREDICT_FALSE (value0.value == ~0ULL)) { - snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, + s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, vm); goto trace0; } @@ -1195,7 +1196,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { if (PREDICT_FALSE (value1.value == ~0ULL)) { - snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index, now, + s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index, now, vm); goto trace1; } @@ -1383,7 +1384,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { if (PREDICT_FALSE (value0.value == ~0ULL)) { - snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, + s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, vm); goto trace00; } -- cgit 1.2.3-korg From 092b3cd59f17d5c3ebe167d8729273838afbe2cb Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Tue, 19 Sep 2017 05:42:38 -0700 Subject: NAT: move session and user lookup tables to per thread data (VPP-986) Change-Id: I41a51bb36e31e05c76fef0b34fe006afbee27729 Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 118 ++++++++++++++++++++++++++-------------------- src/plugins/nat/nat.c | 93 +++++++++++++++++++++--------------- src/plugins/nat/nat.h | 14 +++--- src/plugins/nat/nat_api.c | 4 +- src/plugins/nat/out2in.c | 32 ++++++++----- 5 files changed, 151 insertions(+), 110 deletions(-) (limited to 'src/plugins/nat/out2in.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index 9196ccbb..8b658302 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -191,7 +191,7 @@ snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node, static inline int snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node, u32 sw_if_index0, ip4_header_t * ip0, u32 proto0, - u32 rx_fib_index0) + u32 rx_fib_index0, u32 thread_index) { udp_header_t * udp0 = ip4_next_header (ip0); snat_session_key_t key0, sm0; @@ -205,7 +205,8 @@ snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node, /* NAT packet aimed at external address if */ /* has active sessions */ - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, + &value0)) { /* or is static mappings */ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) @@ -256,7 +257,8 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, kv0.key = user_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash, + &kv0, &value0)) { /* no, make a new one */ pool_get (sm->per_thread_data[thread_index].users, u); @@ -275,7 +277,8 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, kv0.value = u - sm->per_thread_data[thread_index].users; /* add user */ - clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash, + &kv0, 1 /* is_add */); } else { @@ -339,10 +342,12 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, { /* Remove in2out, out2in keys */ kv0.key = s->in2out.as_u64; - if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, + &kv0, 0 /* is_add */)) clib_warning ("in2out key delete failed"); kv0.key = s->out2in.as_u64; - if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, + &kv0, 0 /* is_add */)) clib_warning ("out2in key delete failed"); /* log NAT event */ @@ -431,13 +436,15 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, /* Add to translation hashes */ kv0.key = s->in2out.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; - if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0, + 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; - if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, + 1 /* is_add */)) clib_warning ("out2in key add failed"); /* Add to translated packets worker lookup */ @@ -554,10 +561,11 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0, + &value0)) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0, - IP_PROTOCOL_ICMP, rx_fib_index0) && + IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) && vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)) { dont_translate = 1; @@ -841,8 +849,22 @@ snat_hairpinning (snat_main_t *sm, key0.fib_index = sm->outside_fib_index; kv0.key = key0.as_u64; + if (sm->num_workers > 1) + { + k0.addr = ip0->dst_address; + k0.port = udp0->dst_port; + k0.fib_index = sm->outside_fib_index; + kv0.key = k0.as_u64; + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + return; + else + ti = value0.value; + } + else + ti = sm->num_workers; + /* Check if destination is in active sessions */ - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0)) { /* or static mappings */ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) @@ -855,19 +877,6 @@ snat_hairpinning (snat_main_t *sm, else { si = value0.value; - if (sm->num_workers > 1) - { - k0.addr = ip0->dst_address; - k0.port = udp0->dst_port; - k0.fib_index = sm->outside_fib_index; - kv0.key = k0.as_u64; - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) - ASSERT(0); - else - ti = value0.value; - } - else - ti = sm->num_workers; s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); new_dst_addr0 = s0->in2out.addr.as_u32; @@ -940,8 +949,23 @@ snat_icmp_hairpinning (snat_main_t *sm, key0.fib_index = sm->outside_fib_index; kv0.key = key0.as_u64; + if (sm->num_workers > 1) + { + k0.addr = ip0->dst_address; + k0.port = icmp_id0; + k0.fib_index = sm->outside_fib_index; + kv0.key = k0.as_u64; + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + return; + else + ti = value0.value; + } + else + ti = sm->num_workers; + /* Check if destination is in active sessions */ - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, + &value0)) { /* or static mappings */ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) @@ -953,19 +977,6 @@ snat_icmp_hairpinning (snat_main_t *sm, else { si = value0.value; - if (sm->num_workers > 1) - { - k0.addr = ip0->dst_address; - k0.port = icmp_id0; - k0.fib_index = sm->outside_fib_index; - kv0.key = k0.as_u64; - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) - ASSERT(0); - else - ti = value0.value; - } - else - ti = sm->num_workers; s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); new_dst_addr0 = s0->in2out.addr.as_u32; @@ -1141,7 +1152,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, kv.key = u_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { /* no, make a new one */ pool_get (tsm->users, u); @@ -1158,7 +1169,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, kv.value = u - tsm->users; /* add user */ - clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1); + clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1); } else { @@ -1183,7 +1194,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, else { /* Choose same out address as for TCP/UDP session to same destination */ - if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { head_index = u->sessions_per_user_list_head_index; head = pool_elt_at_index (tsm->list_pool, head_index); @@ -1288,10 +1299,12 @@ create_ses: /* Remove in2out, out2in keys */ kv.key = s->in2out.as_u64; - if (clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0)) + if (clib_bihash_add_del_8_8 ( + &sm->per_thread_data[thread_index].in2out, &kv, 0)) clib_warning ("in2out key del failed"); kv.key = s->out2in.as_u64; - if (clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0)) + if (clib_bihash_add_del_8_8 ( + &sm->per_thread_data[thread_index].out2in, &kv, 0)) clib_warning ("out2in key del failed"); } } @@ -1424,7 +1437,7 @@ snat_in2out_lb (snat_main_t *sm, kv.key = u_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { /* no, make a new one */ pool_get (tsm->users, u); @@ -1441,7 +1454,7 @@ snat_in2out_lb (snat_main_t *sm, kv.value = u - tsm->users; /* add user */ - if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1)) + if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1)) clib_warning ("user key add failed"); } else @@ -1653,12 +1666,13 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, kv0.key = key0.as_u64; - if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0)) + if (PREDICT_FALSE (clib_bihash_search_8_8 ( + &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0)) { if (is_slow_path) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, - ip0, proto0, rx_fib_index0)) && !is_output_feature) + ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature) goto trace00; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, @@ -1824,12 +1838,13 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, kv1.key = key1.as_u64; - if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0)) + if (PREDICT_FALSE(clib_bihash_search_8_8 ( + &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0)) { if (is_slow_path) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, - ip1, proto1, rx_fib_index1)) && !is_output_feature) + ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature) goto trace01; next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1, @@ -2031,12 +2046,13 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, + &kv0, &value0)) { if (is_slow_path) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, - ip0, proto0, rx_fib_index0)) && !is_output_feature) + ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature) goto trace0; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 36b72664..876b6aad 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -483,13 +483,13 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, u_key.addr = m->local_addr; u_key.fib_index = m->fib_index; kv.key = u_key.as_u64; - if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + tsm = vec_elt_at_index (sm->per_thread_data, value.value); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { user_index = value.value; - if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) - tsm = vec_elt_at_index (sm->per_thread_data, value.value); - else - tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); u = pool_elt_at_index (tsm->users, user_index); if (u->nstaticsessions) { @@ -548,10 +548,10 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, s->in2out.fib_index); value.key = s->in2out.as_u64; - if (clib_bihash_add_del_8_8 (&sm->in2out, &value, 0)) + if (clib_bihash_add_del_8_8 (&tsm->in2out, &value, 0)) clib_warning ("in2out key del failed"); value.key = s->out2in.as_u64; - if (clib_bihash_add_del_8_8 (&sm->out2in, &value, 0)) + if (clib_bihash_add_del_8_8 (&tsm->out2in, &value, 0)) clib_warning ("out2in key del failed"); delete: pool_put (tsm->sessions, s); @@ -566,7 +566,7 @@ delete: if (addr_only) { pool_put (tsm->users, u); - clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 0); + clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 0); } } } @@ -615,6 +615,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, snat_user_key_t w_key0; snat_worker_key_t w_key1; u32 worker_index = 0; + snat_main_per_thread_data_t *tsm; m_key.addr = e_addr; m_key.port = e_port; @@ -695,16 +696,6 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("static_mapping_by_external key add failed"); return VNET_API_ERROR_UNSPECIFIED; } - m_key.port = clib_host_to_net_u16 (m->external_port); - kv.key = m_key.as_u64; - kv.value = ~0ULL; - if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 1)) - { - clib_warning ("static_mapping_by_local key add failed"); - return VNET_API_ERROR_UNSPECIFIED; - } - - m_key.fib_index = m->fib_index; /* Assign worker */ if (sm->workers) @@ -729,8 +720,21 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("worker-by-out add key failed"); return VNET_API_ERROR_UNSPECIFIED; } + tsm = vec_elt_at_index (sm->per_thread_data, worker_index); + } + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + + m_key.port = clib_host_to_net_u16 (m->external_port); + kv.key = m_key.as_u64; + kv.value = ~0ULL; + if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 1)) + { + clib_warning ("static_mapping_by_local key add failed"); + return VNET_API_ERROR_UNSPECIFIED; } + m_key.fib_index = m->fib_index; for (i = 0; i < vec_len (locals); i++) { m_key.addr = locals[i].addr; @@ -744,7 +748,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, m_key.port = clib_host_to_net_u16 (locals[i].port); kv.key = m_key.as_u64; kv.value = ~0ULL; - if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 1)) + if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 1)) { clib_warning ("in2out key add failed"); return VNET_API_ERROR_UNSPECIFIED; @@ -801,6 +805,15 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, } } + w_key1.addr = m->external_addr; + w_key1.port = clib_host_to_net_u16 (m->external_port); + w_key1.fib_index = sm->outside_fib_index; + kv.key = w_key1.as_u64; + if (!clib_bihash_search_8_8 (&sm->worker_by_out, &kv, &value)) + tsm = vec_elt_at_index (sm->per_thread_data, value.value); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + m_key.addr = m->external_addr; m_key.port = m->external_port; m_key.protocol = m->proto; @@ -813,7 +826,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, } m_key.port = clib_host_to_net_u16 (m->external_port); kv.key = m_key.as_u64; - if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 0)) + if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 0)) { clib_warning ("outi2in key del failed"); return VNET_API_ERROR_UNSPECIFIED; @@ -832,7 +845,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, } m_key.port = clib_host_to_net_u16 (local->port); kv.key = m_key.as_u64; - if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 0)) + if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 0)) { clib_warning ("in2out key del failed"); return VNET_API_ERROR_UNSPECIFIED; @@ -938,16 +951,16 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) ses->out2in.port, ses->in2out.fib_index); kv.key = ses->in2out.as_u64; - clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0); + clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0); kv.key = ses->out2in.as_u64; - clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0); + clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0); } vec_add1 (ses_to_be_removed, ses - tsm->sessions); clib_dlist_remove (tsm->list_pool, ses->per_user_index); user_key.addr = ses->in2out.addr; user_key.fib_index = ses->in2out.fib_index; kv.key = user_key.as_u64; - if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { u = pool_elt_at_index (tsm->users, value.value); u->nsessions--; @@ -2130,6 +2143,7 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) u32 static_mapping_memory_size = 64<<20; u8 static_mapping_only = 0; u8 static_mapping_connection_tracking = 0; + snat_main_per_thread_data_t *tsm; sm->deterministic = 0; @@ -2204,21 +2218,24 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) sm->icmp_match_in2out_cb = icmp_match_in2out_slow; sm->icmp_match_out2in_cb = icmp_match_out2in_slow; - clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, - user_memory_size); - - clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets, - user_memory_size); + vec_foreach (tsm, sm->per_thread_data) + { + clib_bihash_init_8_8 (&tsm->in2out, "in2out", translation_buckets, + translation_memory_size); - clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets, - translation_memory_size); + clib_bihash_init_8_8 (&tsm->out2in, "out2in", translation_buckets, + translation_memory_size); - clib_bihash_init_8_8 (&sm->out2in, "out2in", translation_buckets, - translation_memory_size); + clib_bihash_init_8_8 (&tsm->user_hash, "users", user_buckets, + user_memory_size); + } - clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets, + clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, user_memory_size); + clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", + translation_buckets, translation_memory_size); + clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed", translation_buckets, translation_memory_size); @@ -2595,10 +2612,6 @@ show_snat_command_fn (vlib_main_t * vm, if (verbose > 0) { - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->in2out, - verbose - 1); - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in, - verbose - 1); vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->in2out_ed, verbose - 1); vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed, @@ -2617,6 +2630,10 @@ show_snat_command_fn (vlib_main_t * vm, vlib_worker_thread_t *w = vlib_worker_threads + j; vlib_cli_output (vm, "Thread %d (%s at lcore %u):", j, w->name, w->lcore_id); + vlib_cli_output (vm, " %U", format_bihash_8_8, &tsm->in2out, + verbose - 1); + vlib_cli_output (vm, " %U", format_bihash_8_8, &tsm->out2in, + verbose - 1); vlib_cli_output (vm, " %d list pool elements", pool_elts (tsm->list_pool)); diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index d1ba5d55..f970821b 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -244,6 +244,13 @@ typedef struct { } snat_static_map_resolve_t; typedef struct { + /* Main lookup tables */ + clib_bihash_8_8_t out2in; + clib_bihash_8_8_t in2out; + + /* Find-a-user => src address lookup */ + clib_bihash_8_8_t user_hash; + /* User pool */ snat_user_t * users; @@ -271,17 +278,10 @@ typedef u32 snat_icmp_match_function_t (struct snat_main_s *sm, typedef u32 (snat_get_worker_function_t) (ip4_header_t * ip, u32 rx_fib_index); typedef struct snat_main_s { - /* Main lookup tables */ - clib_bihash_8_8_t out2in; - clib_bihash_8_8_t in2out; - /* Endpoint address dependent sessions lookup tables */ clib_bihash_16_8_t out2in_ed; clib_bihash_16_8_t in2out_ed; - /* Find-a-user => src address lookup */ - clib_bihash_8_8_t user_hash; - /* Non-translated packets worker lookup => src address + VRF */ clib_bihash_8_8_t worker_by_in; diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index fa20f2cc..50b4a9ae 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -870,7 +870,7 @@ static void tsm = vec_elt_at_index (sm->per_thread_data, value.value); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); - if (clib_bihash_search_8_8 (&sm->user_hash, &key, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value)) return; u = pool_elt_at_index (tsm->users, value.value); if (!u->nsessions && !u->nstaticsessions) @@ -2105,7 +2105,7 @@ vl_api_nat44_user_session_dump_t_handler (vl_api_nat44_user_session_dump_t * tsm = vec_elt_at_index (sm->per_thread_data, value.value); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); - if (clib_bihash_search_8_8 (&sm->user_hash, &key, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value)) return; u = pool_elt_at_index (tsm->users, value.value); if (!u->nsessions && !u->nstaticsessions) diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 52adc8a8..6472e7ff 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -146,7 +146,8 @@ create_session_for_static_mapping (snat_main_t *sm, kv0.key = user_key.as_u64; /* Ever heard of the "user" = inside ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash, + &kv0, &value0)) { /* no, make a new one */ pool_get (sm->per_thread_data[thread_index].users, u); @@ -166,7 +167,8 @@ create_session_for_static_mapping (snat_main_t *sm, kv0.value = u - sm->per_thread_data[thread_index].users; /* add user */ - clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash, + &kv0, 1 /* is_add */); /* add non-traslated packets worker lookup */ kv0.value = thread_index; @@ -211,13 +213,15 @@ create_session_for_static_mapping (snat_main_t *sm, /* Add to translation hashes */ kv0.key = s->in2out.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; - if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0, + 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; - if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, + 1 /* is_add */)) clib_warning ("out2in key add failed"); /* log NAT event */ @@ -325,7 +329,8 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, + &value0)) { /* Try to match static mapping by external address and port, destination address and port in packet */ @@ -672,7 +677,7 @@ snat_out2in_unknown_proto (snat_main_t *sm, kv.key = u_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { /* no, make a new one */ pool_get (tsm->users, u); @@ -689,7 +694,7 @@ snat_out2in_unknown_proto (snat_main_t *sm, kv.value = u - tsm->users; /* add user */ - clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1); + clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1); } else { @@ -804,7 +809,7 @@ snat_out2in_lb (snat_main_t *sm, kv.key = u_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { /* no, make a new one */ pool_get (tsm->users, u); @@ -821,7 +826,7 @@ snat_out2in_lb (snat_main_t *sm, kv.value = u - tsm->users; /* add user */ - if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1)) + if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1)) clib_warning ("user key add failed"); } else @@ -1013,7 +1018,8 @@ snat_out2in_node_fn (vlib_main_t * vm, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, + &kv0, &value0)) { /* Try to match static mapping by external address and port, destination address and port in packet */ @@ -1164,7 +1170,8 @@ snat_out2in_node_fn (vlib_main_t * vm, kv1.key = key1.as_u64; - if (clib_bihash_search_8_8 (&sm->out2in, &kv1, &value1)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, + &kv1, &value1)) { /* Try to match static mapping by external address and port, destination address and port in packet */ @@ -1351,7 +1358,8 @@ snat_out2in_node_fn (vlib_main_t * vm, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, + &kv0, &value0)) { /* Try to match static mapping by external address and port, destination address and port in packet */ -- cgit 1.2.3-korg From 41fef50d5db5e7deb3cfd901c3108abbc4406813 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Fri, 22 Sep 2017 02:43:05 -0700 Subject: NAT: session number limitation to avoid running out of memory crash (VPP-984) Change-Id: I7f18f8c4ba609d96950dc1f833feb967d4a099b7 Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 69 ++++++++++++++++++++++++++++++++----------- src/plugins/nat/nat.c | 2 ++ src/plugins/nat/nat.h | 10 +++++++ src/plugins/nat/out2in.c | 77 +++++++++++++++++++++++++++++++++++------------- 4 files changed, 120 insertions(+), 38 deletions(-) (limited to 'src/plugins/nat/out2in.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index dfe10303..dbbc67f9 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -101,7 +101,8 @@ _(IN2OUT_PACKETS, "Good in2out packets processed") \ _(OUT_OF_PORTS, "Out of ports") \ _(BAD_OUTSIDE_FIB, "Outside VRF ID not found") \ _(BAD_ICMP_TYPE, "unsupported ICMP type") \ -_(NO_TRANSLATION, "No translation") +_(NO_TRANSLATION, "No translation") \ +_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded") typedef enum { #define _(sym,str) SNAT_IN2OUT_ERROR_##sym, @@ -242,6 +243,12 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, u32 outside_fib_index; uword * p; + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED]; + return SNAT_IN2OUT_NEXT_DROP; + } + p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id); if (! p) { @@ -1064,14 +1071,15 @@ snat_hairpinning_unknown_proto (snat_main_t *sm, ip->checksum = ip_csum_fold (sum); } -static void +static snat_session_t * snat_in2out_unknown_proto (snat_main_t *sm, vlib_buffer_t * b, ip4_header_t * ip, u32 rx_fib_index, u32 thread_index, f64 now, - vlib_main_t * vm) + vlib_main_t * vm, + vlib_node_runtime_t * node) { clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; @@ -1108,6 +1116,12 @@ snat_in2out_unknown_proto (snat_main_t *sm, } else { + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + u_key.addr = ip->src_address; u_key.fib_index = rx_fib_index; kv.key = u_key.as_u64; @@ -1198,7 +1212,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, goto create_ses; } } - return; + return 0; } create_ses: @@ -1342,6 +1356,8 @@ create_ses: if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0) vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + return s; } static snat_session_t * @@ -1351,7 +1367,8 @@ snat_in2out_lb (snat_main_t *sm, u32 rx_fib_index, u32 thread_index, f64 now, - vlib_main_t * vm) + vlib_main_t * vm, + vlib_node_runtime_t * node) { nat_ed_ses_key_t key; clib_bihash_kv_16_8_t s_kv, s_value; @@ -1386,6 +1403,12 @@ snat_in2out_lb (snat_main_t *sm, } else { + if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index))) + { + b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + l_key.addr = ip->src_address; l_key.port = udp->src_port; l_key.protocol = proto; @@ -1598,8 +1621,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (proto0 == ~0)) { - snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, - thread_index, now, vm); + s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_IN2OUT_NEXT_DROP; goto trace00; } @@ -1653,8 +1678,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, - now, vm); + s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_IN2OUT_NEXT_DROP; goto trace00; } else @@ -1770,8 +1797,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (proto1 == ~0)) { - snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1, - thread_index, now, vm); + s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1, + thread_index, now, vm, node); + if (!s1) + next1 = SNAT_IN2OUT_NEXT_DROP; goto trace01; } @@ -1825,8 +1854,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1, thread_index, - now, vm); + s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1, + thread_index, now, vm, node); + if (!s1) + next1 = SNAT_IN2OUT_NEXT_DROP; goto trace01; } else @@ -1978,8 +2009,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (proto0 == ~0)) { - snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, - thread_index, now, vm); + s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_IN2OUT_NEXT_DROP; goto trace0; } @@ -2034,8 +2067,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, - now, vm); + s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_IN2OUT_NEXT_DROP; goto trace0; } else diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 5f3b006e..612085fc 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -2216,6 +2216,8 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) /* for show commands, etc. */ sm->translation_buckets = translation_buckets; sm->translation_memory_size = translation_memory_size; + /* do not exceed load factor 10 */ + sm->max_translations = 10 * translation_buckets; sm->user_buckets = user_buckets; sm->user_memory_size = user_memory_size; sm->max_translations_per_user = max_translations_per_user; diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 20e45952..d34ff07b 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -345,6 +345,7 @@ typedef struct snat_main_s { u8 deterministic; u32 translation_buckets; u32 translation_memory_size; + u32 max_translations; u32 user_buckets; u32 user_memory_size; u32 max_translations_per_user; @@ -551,4 +552,13 @@ is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0, return 0; } +always_inline u8 +maximum_sessions_exceeded (snat_main_t *sm, u32 thread_index) +{ + if (pool_elts (sm->per_thread_data[thread_index].sessions) >= sm->max_translations) + return 1; + + return 0; +} + #endif /* __included_nat_h__ */ diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 6472e7ff..e5426c1a 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -87,7 +87,8 @@ vlib_node_registration_t snat_det_out2in_node; _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ _(OUT2IN_PACKETS, "Good out2in packets processed") \ _(BAD_ICMP_TYPE, "unsupported ICMP type") \ -_(NO_TRANSLATION, "No translation") +_(NO_TRANSLATION, "No translation") \ +_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded") typedef enum { #define _(sym,str) SNAT_OUT2IN_ERROR_##sym, @@ -139,6 +140,12 @@ create_session_for_static_mapping (snat_main_t *sm, dlist_elt_t * per_user_list_head_elt; ip4_header_t *ip0; + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + ip0 = vlib_buffer_get_current (b0); user_key.addr = in2out.addr; @@ -620,14 +627,15 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm, return next0; } -static void +static snat_session_t * snat_out2in_unknown_proto (snat_main_t *sm, vlib_buffer_t * b, ip4_header_t * ip, u32 rx_fib_index, u32 thread_index, f64 now, - vlib_main_t * vm) + vlib_main_t * vm, + vlib_node_runtime_t * node) { clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; @@ -660,13 +668,22 @@ snat_out2in_unknown_proto (snat_main_t *sm, } else { + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + m_key.addr = ip->dst_address; m_key.port = 0; m_key.protocol = 0; m_key.fib_index = rx_fib_index; kv.key = m_key.as_u64; if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) - return; + { + b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + return 0; + } m = pool_elt_at_index (sm->static_mappings, value.value); @@ -753,6 +770,8 @@ snat_out2in_unknown_proto (snat_main_t *sm, clib_dlist_remove (tsm->list_pool, s->per_user_index); clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, s->per_user_index); + + return s; } static snat_session_t * @@ -762,7 +781,8 @@ snat_out2in_lb (snat_main_t *sm, u32 rx_fib_index, u32 thread_index, f64 now, - vlib_main_t * vm) + vlib_main_t * vm, + vlib_node_runtime_t * node) { nat_ed_ses_key_t key; clib_bihash_kv_16_8_t s_kv, s_value; @@ -797,6 +817,12 @@ snat_out2in_lb (snat_main_t *sm, } else { + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + e_key.addr = ip->dst_address; e_key.port = udp->dst_port; e_key.protocol = proto; @@ -998,8 +1024,10 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == ~0)) { - snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, - thread_index, now, vm); + s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace0; } @@ -1042,7 +1070,6 @@ snat_out2in_node_fn (vlib_main_t * vm, thread_index); if (!s0) { - b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; next0 = SNAT_OUT2IN_NEXT_DROP; goto trace0; } @@ -1051,8 +1078,10 @@ snat_out2in_node_fn (vlib_main_t * vm, { if (PREDICT_FALSE (value0.value == ~0ULL)) { - s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, - vm); + s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, + now, vm, node); + if (!s0) + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace0; } else @@ -1150,8 +1179,10 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (proto1 == ~0)) { - snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1, - thread_index, now, vm); + s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1, + thread_index, now, vm, node); + if (!s1) + next1 = SNAT_OUT2IN_NEXT_DROP; goto trace1; } @@ -1194,7 +1225,6 @@ snat_out2in_node_fn (vlib_main_t * vm, thread_index); if (!s1) { - b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; next1 = SNAT_OUT2IN_NEXT_DROP; goto trace1; } @@ -1203,8 +1233,10 @@ snat_out2in_node_fn (vlib_main_t * vm, { if (PREDICT_FALSE (value1.value == ~0ULL)) { - s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index, now, - vm); + s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index, + now, vm, node); + if (!s1) + next1 = SNAT_OUT2IN_NEXT_DROP; goto trace1; } else @@ -1328,8 +1360,10 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == ~0)) { - snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, - thread_index, now, vm); + s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace00; } @@ -1383,8 +1417,7 @@ snat_out2in_node_fn (vlib_main_t * vm, thread_index); if (!s0) { - b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; - next0 = SNAT_OUT2IN_NEXT_DROP; + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace00; } } @@ -1392,8 +1425,10 @@ snat_out2in_node_fn (vlib_main_t * vm, { if (PREDICT_FALSE (value0.value == ~0ULL)) { - s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, - vm); + s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, + now, vm, node); + if (!s0) + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace00; } else -- cgit 1.2.3-korg From 7865b5c8a4f11731d8c6d4627e505afe990989b4 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Tue, 26 Sep 2017 01:23:01 -0700 Subject: NAT: remove worker_by_in lookup hash table (VPP-992) Change-Id: I3873d3e411bf93cac82e73a0b8e3b22563aaf217 Signed-off-by: Matus Fabian --- src/plugins/nat/nat.c | 91 ++++++++++------------------------------------- src/plugins/nat/nat.h | 16 --------- src/plugins/nat/nat_api.c | 16 ++++++--- src/plugins/nat/out2in.c | 4 --- 4 files changed, 30 insertions(+), 97 deletions(-) (limited to 'src/plugins/nat/out2in.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 94416255..c2f9586c 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -390,28 +390,12 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, kv.value = m - sm->static_mappings; clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1); - /* Assign worker */ if (sm->workers) { - snat_user_key_t w_key0; - - w_key0.addr = m->local_addr; - w_key0.fib_index = m->fib_index; - kv.key = w_key0.as_u64; - - if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) - { - kv.value = sm->first_worker_index + - sm->workers[sm->next_worker++ % vec_len (sm->workers)]; - - clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1); - } - else - { - kv.value = value.value; - } - - m->worker_index = kv.value; + ip4_header_t ip = { + .src_address = m->local_addr, + }; + m->worker_index = sm->worker_in2out_cb (&ip, m->fib_index); } } else @@ -478,8 +462,8 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, u_key.addr = m->local_addr; u_key.fib_index = m->fib_index; kv.key = u_key.as_u64; - if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) - tsm = vec_elt_at_index (sm->per_thread_data, value.value); + if (sm->num_workers) + tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) @@ -607,7 +591,6 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, snat_address_t *a = 0; int i; nat44_lb_addr_port_t *local; - snat_user_key_t w_key0; u32 worker_index = 0; snat_main_per_thread_data_t *tsm; @@ -694,16 +677,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, /* Assign worker */ if (sm->workers) { - w_key0.addr = locals[0].addr; - w_key0.fib_index = fib_index; - kv.key = w_key0.as_u64; - - if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) - worker_index = sm->first_worker_index + - sm->workers[sm->next_worker++ % vec_len (sm->workers)]; - else - worker_index = value.value; - + worker_index = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; tsm = vec_elt_at_index (sm->per_thread_data, worker_index); m->worker_index = worker_index; } @@ -730,6 +705,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, locals[i].prefix = (i == 0) ? locals[i].probability :\ (locals[i - 1].prefix + locals[i].probability); vec_add1 (m->locals, locals[i]); + m_key.port = clib_host_to_net_u16 (locals[i].port); kv.key = m_key.as_u64; kv.value = ~0ULL; @@ -738,19 +714,6 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("in2out key add failed"); return VNET_API_ERROR_UNSPECIFIED; } - /* Assign worker */ - if (sm->workers) - { - w_key0.addr = locals[i].addr; - w_key0.fib_index = fib_index; - kv.key = w_key0.as_u64; - kv.value = worker_index; - if (clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1)) - { - clib_warning ("worker-by-in key add failed"); - return VNET_API_ERROR_UNSPECIFIED; - } - } } } else @@ -801,6 +764,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("static_mapping_by_external key del failed"); return VNET_API_ERROR_UNSPECIFIED; } + m_key.port = clib_host_to_net_u16 (m->external_port); kv.key = m_key.as_u64; if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 0)) @@ -820,6 +784,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("static_mapping_by_local key del failed"); return VNET_API_ERROR_UNSPECIFIED; } + m_key.port = clib_host_to_net_u16 (local->port); kv.key = m_key.as_u64; if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 0)) @@ -2017,32 +1982,17 @@ static u32 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0) { snat_main_t *sm = &snat_main; - snat_user_key_t key0; - clib_bihash_kv_8_8_t kv0, value0; u32 next_worker_index = 0; + u32 hash; - key0.addr = ip0->src_address; - key0.fib_index = rx_fib_index0; - - kv0.key = key0.as_u64; + next_worker_index = sm->first_worker_index; + hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) + + (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >>24); - /* Ever heard of of the "user" before? */ - if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) - { - /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index; - if (vec_len (sm->workers)) - { - next_worker_index += - sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; - } - - /* add non-traslated packets worker lookup */ - kv0.value = next_worker_index; - clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); - } + if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers)))) + next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)]; else - next_worker_index = value0.value; + next_worker_index += sm->workers[hash % _vec_len (sm->workers)]; return next_worker_index; } @@ -2265,9 +2215,6 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) user_memory_size); } - clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, - user_memory_size); - clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed", translation_buckets, translation_memory_size); @@ -2648,8 +2595,6 @@ show_snat_command_fn (vlib_main_t * vm, verbose - 1); vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed, verbose - 1); - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, - verbose - 1); vec_foreach_index (j, sm->per_thread_data) { tsm = vec_elt_at_index (sm->per_thread_data, j); diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index d34ff07b..e467fde7 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -95,19 +95,6 @@ typedef struct { }; } snat_user_key_t; -typedef struct { - union - { - struct - { - ip4_address_t addr; - u16 port; - u16 fib_index; - }; - u64 as_u64; - }; -} snat_worker_key_t; - #define foreach_snat_protocol \ _(UDP, 0, udp, "udp") \ @@ -283,9 +270,6 @@ typedef struct snat_main_s { clib_bihash_16_8_t out2in_ed; clib_bihash_16_8_t in2out_ed; - /* Non-translated packets worker lookup => src address + VRF */ - clib_bihash_8_8_t worker_by_in; - snat_icmp_match_function_t * icmp_match_in2out_cb; snat_icmp_match_function_t * icmp_match_out2in_cb; diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index 50b4a9ae..b56b4436 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -856,6 +856,7 @@ static void snat_user_t *u; u32 session_index, head_index, elt_index; dlist_elt_t *head, *elt; + ip4_header_t ip; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -864,10 +865,13 @@ static void return; clib_memcpy (&ukey.addr, mp->ip_address, 4); + ip.src_address.as_u32 = ukey.addr.as_u32; ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id)); key.key = ukey.as_u64; - if (!clib_bihash_search_8_8 (&sm->worker_by_in, &key, &value)) - tsm = vec_elt_at_index (sm->per_thread_data, value.value); + if (sm->num_workers) + tsm = + vec_elt_at_index (sm->per_thread_data, + sm->worker_in2out_cb (&ip, ukey.fib_index)); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value)) @@ -2093,16 +2097,20 @@ vl_api_nat44_user_session_dump_t_handler (vl_api_nat44_user_session_dump_t * snat_user_t *u; u32 session_index, head_index, elt_index; dlist_elt_t *head, *elt; + ip4_header_t ip; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) return; clib_memcpy (&ukey.addr, mp->ip_address, 4); + ip.src_address.as_u32 = ukey.addr.as_u32; ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id)); key.key = ukey.as_u64; - if (!clib_bihash_search_8_8 (&sm->worker_by_in, &key, &value)) - tsm = vec_elt_at_index (sm->per_thread_data, value.value); + if (sm->num_workers) + tsm = + vec_elt_at_index (sm->per_thread_data, + sm->worker_in2out_cb (&ip, ukey.fib_index)); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value)) diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index e5426c1a..802c3312 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -176,10 +176,6 @@ create_session_for_static_mapping (snat_main_t *sm, /* add user */ clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash, &kv0, 1 /* is_add */); - - /* add non-traslated packets worker lookup */ - kv0.value = thread_index; - clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); } else { -- cgit 1.2.3-korg From dea5881815426442f8b3137c5971ed1764bdd278 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Wed, 4 Oct 2017 23:57:58 -0700 Subject: NAT: fixed ICMP broken translation for GRE tunnel interface (VPP-1008) Change-Id: Ie3245b96c511cc30915e70e8c881f445291a38c2 Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 22 +++++++--------------- src/plugins/nat/nat.h | 19 +++++++++++++------ src/plugins/nat/out2in.c | 17 +++++++---------- 3 files changed, 27 insertions(+), 31 deletions(-) (limited to 'src/plugins/nat/out2in.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index dbbc67f9..8e583313 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -523,11 +523,11 @@ snat_in2out_error_t icmp_get_key(ip4_header_t *ip0, * @param e optional parameter */ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e) { - ip4_header_t *ip0; icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; @@ -537,13 +537,7 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, clib_bihash_kv_8_8_t kv0, value0; u32 next0 = ~0; int err; - u32 iph_offset0 = 0; - if (PREDICT_FALSE(vnet_buffer(b0)->sw_if_index[VLIB_TX] != ~0)) - { - iph_offset0 = vnet_buffer (b0)->ip.save_rewrite_length; - } - ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0); icmp0 = (icmp46_header_t *) ip4_next_header (ip0); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); @@ -622,11 +616,11 @@ out: * @param e optional parameter */ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e) { - ip4_header_t *ip0; icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; @@ -637,7 +631,6 @@ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node, u32 next0 = ~0; int err; - ip0 = vlib_buffer_get_current (b0); icmp0 = (icmp46_header_t *) ip4_next_header (ip0); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); @@ -715,7 +708,7 @@ static inline u32 icmp_in2out (snat_main_t *sm, echo0 = (icmp_echo_header_t *)(icmp0+1); - next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, + next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0, ip0, &protocol, &sm0, &dont_translate, d, e); if (next0_tmp != ~0) next0 = next0_tmp; @@ -2919,11 +2912,11 @@ VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn); * @param e optional parameter */ u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e) { - ip4_header_t *ip0; icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; @@ -2942,7 +2935,6 @@ u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node, ip4_address_t in_addr; u16 in_port; - ip0 = vlib_buffer_get_current (b0); icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *)(icmp0+1); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index e467fde7..e53e924f 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -257,6 +257,7 @@ typedef u32 snat_icmp_match_function_t (struct snat_main_s *sm, vlib_node_runtime_t *node, u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, @@ -454,27 +455,33 @@ typedef struct { } tcp_udp_header_t; u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e); u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e); u32 icmp_match_in2out_det(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e); u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e); u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e); u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e); void increment_v4_address(ip4_address_t * a); diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 802c3312..f250136b 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -297,11 +297,11 @@ snat_out2in_error_t icmp_get_key(ip4_header_t *ip0, * @param e optional parameter */ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e) { - ip4_header_t *ip0; icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; @@ -314,7 +314,6 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, u32 next0 = ~0; int err; - ip0 = vlib_buffer_get_current (b0); icmp0 = (icmp46_header_t *) ip4_next_header (ip0); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); @@ -408,11 +407,11 @@ out: * @param e optional parameter */ u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e) { - ip4_header_t *ip0; icmp46_header_t *icmp0; u32 sw_if_index0; u32 rx_fib_index0; @@ -423,7 +422,6 @@ u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node, u32 next0 = ~0; int err; - ip0 = vlib_buffer_get_current (b0); icmp0 = (icmp46_header_t *) ip4_next_header (ip0); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); @@ -494,7 +492,7 @@ static inline u32 icmp_out2in (snat_main_t *sm, echo0 = (icmp_echo_header_t *)(icmp0+1); - next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, + next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0, ip0, &protocol, &sm0, &dont_translate, d, e); if (next0_tmp != ~0) next0 = next0_tmp; @@ -2038,11 +2036,11 @@ VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn); * @param e optional parameter */ u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node, - u32 thread_index, vlib_buffer_t *b0, u8 *p_proto, + u32 thread_index, vlib_buffer_t *b0, + ip4_header_t *ip0, u8 *p_proto, snat_session_key_t *p_value, u8 *p_dont_translate, void *d, void *e) { - ip4_header_t *ip0; icmp46_header_t *icmp0; u32 sw_if_index0; u8 protocol; @@ -2058,7 +2056,6 @@ u32 icmp_match_out2in_det(snat_main_t *sm, vlib_node_runtime_t *node, snat_det_session_t * ses0 = 0; ip4_address_t out_addr; - ip0 = vlib_buffer_get_current (b0); icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *)(icmp0+1); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; -- cgit 1.2.3-korg