diff options
author | Matus Fabian <matfabia@cisco.com> | 2017-02-10 03:48:01 -0800 |
---|---|---|
committer | Ole Trøan <otroan@employees.org> | 2017-03-07 12:47:12 +0000 |
commit | 066f034b903bda6e938bec1b12f01edef65ee9c4 (patch) | |
tree | ddb4a1eb0878fe7ac58f25056f3bb7c6ad0bae3a /src/plugins/snat | |
parent | eab38d91e8db5ad271598a63781a7afa3bd8b5ea (diff) |
CGN: Deterministic NAT (VPP-623)
Inside user is statically mapped to a set of outside ports. Support endpoint
dependent mapping to deal with overloading of the outside ports.
Change-Id: I8014438744597a976f8ae459283e8b91f63b7f72
Signed-off-by: Matus Fabian <matfabia@cisco.com>
Diffstat (limited to 'src/plugins/snat')
-rw-r--r-- | src/plugins/snat/in2out.c | 538 | ||||
-rw-r--r-- | src/plugins/snat/out2in.c | 467 | ||||
-rw-r--r-- | src/plugins/snat/snat.api | 91 | ||||
-rw-r--r-- | src/plugins/snat/snat.c | 699 | ||||
-rw-r--r-- | src/plugins/snat/snat.h | 72 | ||||
-rw-r--r-- | src/plugins/snat/snat_det.c | 125 | ||||
-rw-r--r-- | src/plugins/snat/snat_det.h | 190 | ||||
-rw-r--r-- | src/plugins/snat/snat_test.c | 129 |
8 files changed, 2148 insertions, 163 deletions
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c index e9bc5384916..5970588b5b9 100644 --- a/src/plugins/snat/in2out.c +++ b/src/plugins/snat/in2out.c @@ -23,6 +23,7 @@ #include <vnet/fib/ip4_fib.h> #include <snat/snat.h> #include <snat/snat_ipfix_logging.h> +#include <snat/snat_det.h> #include <vppinfra/hash.h> #include <vppinfra/error.h> @@ -86,6 +87,7 @@ vlib_node_registration_t snat_in2out_node; vlib_node_registration_t snat_in2out_slowpath_node; vlib_node_registration_t snat_in2out_fast_node; vlib_node_registration_t snat_in2out_worker_handoff_node; +vlib_node_registration_t snat_det_in2out_node; #define foreach_snat_in2out_error \ _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ @@ -1325,6 +1327,510 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn); +/**************************/ +/*** deterministic mode ***/ +/**************************/ +static uword +snat_det_in2out_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_in2out_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + u32 now = (u32) vlib_time_now (vm); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, * ip1; + ip_csum_t sum0, sum1; + ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1; + u16 old_port0, new_port0, lo_port0, i; + u16 old_port1, new_port1, lo_port1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + u32 proto0, proto1; + snat_det_out_key_t key0, key1; + snat_det_map_t * dm0, * dm1; + snat_det_session_t * ses0 = 0, * ses1 = 0; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + next1 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + dm0 = snat_det_map_by_user(sm, &ip0->src_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &ip0->src_address); + goto trace0; + } + + snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); + + ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src); + if (PREDICT_FALSE(!ses0)) + { + key0.ext_host_addr = ip0->dst_address; + key0.ext_host_port = tcp0->dst; + for (i = 0; i < dm0->ports_per_host; i++) + { + key0.out_port = clib_host_to_net_u16 (lo_port0 + i + + (clib_net_to_host_u16 (tcp0->src) % dm0->ports_per_host)); + + if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) + continue; + + ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); + break; + } + if (PREDICT_FALSE(!ses0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto trace0; + } + } + + new_port0 = ses0->out.out_port; + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + old_addr0.as_u32 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_SYN) + ses0->state = SNAT_SESSION_TCP_SYN_SENT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_FIN_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT) + snat_det_ses_close(dm0, ses0); + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT) + ses0->state = SNAT_SESSION_TCP_LAST_ACK; + else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + + old_port0 = tcp0->src; + tcp0->src = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + ses0->state = SNAT_SESSION_UDP_ACTIVE; + old_port0 = udp0->src_port; + udp0->src_port = new_port0; + udp0->checksum = 0; + } + + switch(ses0->state) + { + case SNAT_SESSION_UDP_ACTIVE: + ses0->expire = now + SNAT_UDP_TIMEOUT; + break; + case SNAT_SESSION_TCP_SYN_SENT: + case SNAT_SESSION_TCP_FIN_WAIT: + case SNAT_SESSION_TCP_CLOSE_WAIT: + case SNAT_SESSION_TCP_LAST_ACK: + ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + break; + case SNAT_SESSION_TCP_ESTABLISHED: + ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + break; + } + + trace0: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = 0; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + dm1 = snat_det_map_by_user(sm, &ip1->src_address); + if (PREDICT_FALSE(!dm1)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &ip0->src_address); + goto trace1; + } + + snat_det_forward(dm1, &ip1->src_address, &new_addr1, &lo_port1); + + + ses1 = snat_det_find_ses_by_in(dm1, &ip1->src_address, tcp1->src); + if (PREDICT_FALSE(!ses1)) + { + key1.ext_host_addr = ip1->dst_address; + key1.ext_host_port = tcp1->dst; + for (i = 0; i < dm1->ports_per_host; i++) + { + key1.out_port = clib_host_to_net_u16 (lo_port1 + i + + (clib_net_to_host_u16 (tcp1->src) % dm1->ports_per_host)); + + if (snat_det_get_ses_by_out (dm1, &ip1->src_address, key1.as_u64)) + continue; + + ses1 = snat_det_ses_create(dm1, &ip1->src_address, tcp1->src, &key1); + break; + } + if (PREDICT_FALSE(!ses1)) + { + next1 = SNAT_IN2OUT_NEXT_DROP; + goto trace1; + } + } + + new_port1 = ses1->out.out_port; + proto1 = ip_proto_to_snat_proto (ip1->protocol); + + old_addr1.as_u32 = ip1->src_address.as_u32; + ip1->src_address.as_u32 = new_addr1.as_u32; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + src_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + if (tcp1->flags & TCP_FLAG_SYN) + ses1->state = SNAT_SESSION_TCP_SYN_SENT; + else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_SYN_SENT) + ses1->state = SNAT_SESSION_TCP_ESTABLISHED; + else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED) + ses1->state = SNAT_SESSION_TCP_FIN_WAIT; + else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_FIN_WAIT) + snat_det_ses_close(dm1, ses1); + else if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_CLOSE_WAIT) + ses1->state = SNAT_SESSION_TCP_LAST_ACK; + else if (tcp1->flags == 0 && ses1->state == SNAT_SESSION_UNKNOWN) + ses1->state = SNAT_SESSION_TCP_ESTABLISHED; + + old_port1 = tcp1->src; + tcp1->src = new_port1; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + dst_address /* changed member */); + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + ses1->state = SNAT_SESSION_UDP_ACTIVE; + old_port1 = udp1->src_port; + udp1->src_port = new_port1; + udp1->checksum = 0; + } + + switch(ses1->state) + { + case SNAT_SESSION_UDP_ACTIVE: + ses1->expire = now + SNAT_UDP_TIMEOUT; + break; + case SNAT_SESSION_TCP_SYN_SENT: + case SNAT_SESSION_TCP_FIN_WAIT: + case SNAT_SESSION_TCP_CLOSE_WAIT: + case SNAT_SESSION_TCP_LAST_ACK: + ses1->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + break; + case SNAT_SESSION_TCP_ESTABLISHED: + ses1->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + break; + } + + trace1: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->is_slow_path = 0; + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (ses1) + t->session_index = ses1 - dm1->sessions; + } + + pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + ip4_address_t new_addr0, old_addr0; + u16 old_port0, new_port0, lo_port0, i; + udp_header_t * udp0; + tcp_header_t * tcp0; + u32 proto0; + snat_det_out_key_t key0; + snat_det_map_t * dm0; + snat_det_session_t * ses0 = 0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + dm0 = snat_det_map_by_user(sm, &ip0->src_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("no match for internal host %U", + format_ip4_address, &ip0->src_address); + goto trace00; + } + + snat_det_forward(dm0, &ip0->src_address, &new_addr0, &lo_port0); + + ses0 = snat_det_find_ses_by_in(dm0, &ip0->src_address, tcp0->src); + if (PREDICT_FALSE(!ses0)) + { + key0.ext_host_addr = ip0->dst_address; + key0.ext_host_port = tcp0->dst; + for (i = 0; i < dm0->ports_per_host; i++) + { + key0.out_port = clib_host_to_net_u16 (lo_port0 + i + + (clib_net_to_host_u16 (tcp0->src) % dm0->ports_per_host)); + + if (snat_det_get_ses_by_out (dm0, &ip0->src_address, key0.as_u64)) + continue; + + ses0 = snat_det_ses_create(dm0, &ip0->src_address, tcp0->src, &key0); + break; + } + if (PREDICT_FALSE(!ses0)) + { + next0 = SNAT_IN2OUT_NEXT_DROP; + goto trace00; + } + } + + new_port0 = ses0->out.out_port; + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + old_addr0.as_u32 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_SYN) + ses0->state = SNAT_SESSION_TCP_SYN_SENT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_SYN_SENT) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_FIN_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_FIN_WAIT) + snat_det_ses_close(dm0, ses0); + else if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_CLOSE_WAIT) + ses0->state = SNAT_SESSION_TCP_LAST_ACK; + else if (tcp0->flags == 0 && ses0->state == SNAT_SESSION_UNKNOWN) + ses0->state = SNAT_SESSION_TCP_ESTABLISHED; + + old_port0 = tcp0->src; + tcp0->src = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + ses0->state = SNAT_SESSION_UDP_ACTIVE; + old_port0 = udp0->src_port; + udp0->src_port = new_port0; + udp0->checksum = 0; + } + + switch(ses0->state) + { + case SNAT_SESSION_UDP_ACTIVE: + ses0->expire = now + SNAT_UDP_TIMEOUT; + break; + case SNAT_SESSION_TCP_SYN_SENT: + case SNAT_SESSION_TCP_FIN_WAIT: + case SNAT_SESSION_TCP_CLOSE_WAIT: + case SNAT_SESSION_TCP_LAST_ACK: + ses0->expire = now + SNAT_TCP_TRANSITORY_TIMEOUT; + break; + case SNAT_SESSION_TCP_ESTABLISHED: + ses0->expire = now + SNAT_TCP_ESTABLISHED_TIMEOUT; + break; + } + + trace00: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = 0; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_det_in2out_node.index, + SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_det_in2out_node) = { + .function = snat_det_in2out_node_fn, + .name = "snat-det-in2out", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = 2, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_det_in2out_node, snat_det_in2out_node_fn); + +/**********************/ +/*** worker handoff ***/ +/**********************/ static uword snat_in2out_worker_handoff_fn (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1365,8 +1871,6 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, u32 sw_if_index0; u32 rx_fib_index0; ip4_header_t * ip0; - snat_user_key_t key0; - clib_bihash_kv_8_8_t kv0, value0; u8 do_handoff; bi0 = from[0]; @@ -1380,28 +1884,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, ip0 = vlib_buffer_get_current (b0); - key0.addr = ip0->src_address; - key0.fib_index = rx_fib_index0; - - kv0.key = key0.as_u64; - - /* Ever heard of of the "user" before? */ - if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) - { - /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index; - if (vec_len (sm->workers)) - { - next_worker_index += - sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; - } - - /* add non-traslated packets worker lookup */ - kv0.value = next_worker_index; - clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); - } - else - next_worker_index = value0.value; + next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0); if (PREDICT_FALSE (next_worker_index != cpu_index)) { @@ -1441,7 +1924,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, /* if this is 1st frame */ if (!f) { - f = vlib_get_frame_to_node (vm, snat_in2out_node.index); + f = vlib_get_frame_to_node (vm, sm->in2out_node_index); to_next = vlib_frame_vector_args (f); } @@ -1461,7 +1944,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm, } if (f) - vlib_put_frame_to_node (vm, snat_in2out_node.index, f); + vlib_put_frame_to_node (vm, sm->in2out_node_index, f); if (hf) hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; @@ -1508,6 +1991,9 @@ VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = { VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn); +/********************************/ +/*** static mapping only mode ***/ +/********************************/ static inline u32 icmp_in2out_static_map (snat_main_t *sm, vlib_buffer_t * b0, ip4_header_t * ip0, diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c index 7905436aab2..9b4c73d779e 100644 --- a/src/plugins/snat/out2in.c +++ b/src/plugins/snat/out2in.c @@ -24,6 +24,7 @@ #include <vnet/fib/ip4_fib.h> #include <snat/snat.h> #include <snat/snat_ipfix_logging.h> +#include <snat/snat_det.h> #include <vppinfra/hash.h> #include <vppinfra/error.h> @@ -80,6 +81,7 @@ static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args) vlib_node_registration_t snat_out2in_node; vlib_node_registration_t snat_out2in_fast_node; vlib_node_registration_t snat_out2in_worker_handoff_node; +vlib_node_registration_t snat_det_out2in_node; #define foreach_snat_out2in_error \ _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ @@ -996,6 +998,418 @@ VLIB_REGISTER_NODE (snat_out2in_node) = { }; VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn); +/**************************/ +/*** deterministic mode ***/ +/**************************/ +static uword +snat_det_out2in_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_out2in_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, * ip1; + ip_csum_t sum0, sum1; + ip4_address_t new_addr0, old_addr0, new_addr1, old_addr1; + u16 new_port0, old_port0, old_port1, new_port1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + u32 proto0, proto1; + snat_det_out_key_t key0, key1; + snat_det_map_t * dm0, * dm1; + snat_det_session_t * ses0 = 0, * ses1 = 0; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + key0.ext_host_addr = ip0->src_address; + key0.ext_host_port = tcp0->src; + key0.out_port = tcp0->dst; + + dm0 = snat_det_map_by_out(sm, &ip0->dst_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("unknown dst address: %U", + format_ip4_address, &ip0->dst_address); + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace0; + } + + snat_det_reverse(dm0, &ip0->dst_address, + clib_net_to_host_u16(tcp0->dst), &new_addr0); + + ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64); + if (PREDICT_FALSE(!ses0)) + { + clib_warning("no match src %U:%d dst %d for user %U", + format_ip4_address, &ip0->dst_address, + clib_net_to_host_u16 (tcp0->src), + clib_net_to_host_u16 (tcp0->dst), + format_ip4_address, &new_addr0); + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace0; + } + new_port0 = ses0->in_port; + + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + old_addr0 = ip0->dst_address; + ip0->dst_address = new_addr0; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK) + snat_det_ses_close(dm0, ses0); + + old_port0 = tcp0->dst; + tcp0->dst = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = new_port0; + udp0->checksum = 0; + } + + trace0: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + b1 = vlib_get_buffer (vm, bi1); + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + + key1.ext_host_addr = ip1->src_address; + key1.ext_host_port = tcp1->src; + key1.out_port = tcp1->dst; + + dm1 = snat_det_map_by_out(sm, &ip1->dst_address); + if (PREDICT_FALSE(!dm1)) + { + clib_warning("unknown dst address: %U", + format_ip4_address, &ip1->dst_address); + next1 = SNAT_OUT2IN_NEXT_DROP; + goto trace1; + } + + snat_det_reverse(dm1, &ip1->dst_address, + clib_net_to_host_u16(tcp1->dst), &new_addr1); + + ses1 = snat_det_get_ses_by_out (dm1, &new_addr1, key1.as_u64); + if (PREDICT_FALSE(!ses1)) + { + clib_warning("no match src %U:%d dst %d for user %U", + format_ip4_address, &ip1->dst_address, + clib_net_to_host_u16 (tcp1->src), + clib_net_to_host_u16 (tcp1->dst), + format_ip4_address, &new_addr1); + next1 = SNAT_OUT2IN_NEXT_DROP; + goto trace1; + } + new_port1 = ses1->in_port; + + proto1 = ip_proto_to_snat_proto (ip1->protocol); + + old_addr1 = ip1->dst_address; + ip1->dst_address = new_addr1; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = sm->inside_fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + dst_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + if (tcp1->flags & TCP_FLAG_FIN && ses1->state == SNAT_SESSION_TCP_ESTABLISHED) + ses1->state = SNAT_SESSION_TCP_CLOSE_WAIT; + else if (tcp1->flags & TCP_FLAG_ACK && ses1->state == SNAT_SESSION_TCP_LAST_ACK) + snat_det_ses_close(dm1, ses1); + + old_port1 = tcp1->dst; + tcp1->dst = new_port1; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1.as_u32, new_addr1.as_u32, + ip4_header_t, + dst_address /* changed member */); + + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + old_port1 = udp1->dst_port; + udp1->dst_port = new_port1; + udp1->checksum = 0; + } + + trace1: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (ses1) + t->session_index = ses1 - dm1->sessions; + } + + pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + ip4_address_t new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + u32 proto0; + snat_det_out_key_t key0; + snat_det_map_t * dm0; + snat_det_session_t * ses0 = 0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + + key0.ext_host_addr = ip0->src_address; + key0.ext_host_port = tcp0->src; + key0.out_port = tcp0->dst; + + dm0 = snat_det_map_by_out(sm, &ip0->dst_address); + if (PREDICT_FALSE(!dm0)) + { + clib_warning("unknown dst address: %U", + format_ip4_address, &ip0->dst_address); + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace00; + } + + snat_det_reverse(dm0, &ip0->dst_address, + clib_net_to_host_u16(tcp0->dst), &new_addr0); + + ses0 = snat_det_get_ses_by_out (dm0, &new_addr0, key0.as_u64); + if (PREDICT_FALSE(!ses0)) + { + clib_warning("no match src %U:%d dst %d for user %U", + format_ip4_address, &ip0->dst_address, + clib_net_to_host_u16 (tcp0->src), + clib_net_to_host_u16 (tcp0->dst), + format_ip4_address, &new_addr0); + next0 = SNAT_OUT2IN_NEXT_DROP; + goto trace00; + } + new_port0 = ses0->in_port; + + proto0 = ip_proto_to_snat_proto (ip0->protocol); + + old_addr0 = ip0->dst_address; + ip0->dst_address = new_addr0; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm->inside_fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + if (tcp0->flags & TCP_FLAG_FIN && ses0->state == SNAT_SESSION_TCP_ESTABLISHED) + ses0->state = SNAT_SESSION_TCP_CLOSE_WAIT; + else if (tcp0->flags & TCP_FLAG_ACK && ses0->state == SNAT_SESSION_TCP_LAST_ACK) + snat_det_ses_close(dm0, ses0); + + old_port0 = tcp0->dst; + tcp0->dst = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0.as_u32, new_addr0.as_u32, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = new_port0; + udp0->checksum = 0; + } + + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (ses0) + t->session_index = ses0 - dm0->sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_det_out2in_node.index, + SNAT_OUT2IN_ERROR_OUT2IN_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_det_out2in_node) = { + .function = snat_det_out2in_node_fn, + .name = "snat-det-out2in", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = 2, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (snat_det_out2in_node, snat_det_out2in_node_fn); + +/**********************/ +/*** worker handoff ***/ +/**********************/ static uword snat_out2in_worker_handoff_fn (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1036,9 +1450,6 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm, u32 sw_if_index0; u32 rx_fib_index0; ip4_header_t * ip0; - udp_header_t * udp0; - snat_worker_key_t key0; - clib_bihash_kv_8_8_t kv0, value0; u8 do_handoff; bi0 = from[0]; @@ -1051,49 +1462,8 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm, rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); ip0 = vlib_buffer_get_current (b0); - udp0 = ip4_next_header (ip0); - - key0.addr = ip0->dst_address; - key0.port = udp0->dst_port; - key0.fib_index = rx_fib_index0; - - if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_ICMP)) - { - icmp46_header_t * icmp0 = (icmp46_header_t *) udp0; - icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1); - key0.port = echo0->identifier; - } - kv0.key = key0.as_u64; - - /* Ever heard of of the "user" before? */ - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) - { - key0.port = 0; - kv0.key = key0.as_u64; - - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) - { - /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index; - if (vec_len (sm->workers)) - { - next_worker_index += - sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; - } - } - else - { - /* Static mapping without port */ - next_worker_index = value0.value; - } - - /* Add to translated packets worker lookup */ - kv0.value = next_worker_index; - clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); - } - else - next_worker_index = value0.value; + next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0); if (PREDICT_FALSE (next_worker_index != cpu_index)) { @@ -1133,7 +1503,7 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm, /* if this is 1st frame */ if (!f) { - f = vlib_get_frame_to_node (vm, snat_out2in_node.index); + f = vlib_get_frame_to_node (vm, sm->out2in_node_index); to_next = vlib_frame_vector_args (f); } @@ -1153,7 +1523,7 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm, } if (f) - vlib_put_frame_to_node (vm, snat_out2in_node.index, f); + vlib_put_frame_to_node (vm, sm->out2in_node_index, f); if (hf) hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; @@ -1200,6 +1570,9 @@ VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = { VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn); +/********************************/ +/*** static mapping only mode ***/ +/********************************/ static inline u32 icmp_out2in_fast (snat_main_t *sm, vlib_buffer_t * b0, ip4_header_t * ip0, diff --git a/src/plugins/snat/snat.api b/src/plugins/snat/snat.api index 8b1537bff67..8ff69b25742 100644 --- a/src/plugins/snat/snat.api +++ b/src/plugins/snat/snat.api @@ -226,6 +226,7 @@ define snat_show_config @param retval - return code for the request @param static_mapping_only - if 1 dynamic translations disabled @param static_mapping_connection_tracking - if 1 create session data + @param deterministic - if 1 deterministic mapping @param translation_buckets - number of translation hash buckets @param translation_memory_size - translation hash memory size @param user_buckets - number of user hash buckets @@ -240,6 +241,7 @@ define snat_show_config_reply i32 retval; u8 static_mapping_only; u8 static_mapping_connection_tracking; + u8 deterministic; u32 translation_buckets; u32 translation_memory_size; u32 user_buckets; @@ -421,3 +423,92 @@ define snat_user_session_details { u64 total_bytes; u32 total_pkts; }; + +/** \brief Add/delete S-NAT deterministic mapping + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - 1 if add, 0 if delete + @param is_ip4 - 1 if address type is IPv4 + @param in_addr - inside IP address + @param in_plen - inside IP address prefix length + @param out_addr - outside IP address + @param out_addr - outside IP address prefix length +*/ +define snat_add_det_map { + u32 client_index; + u32 context; + u8 is_add; + u8 is_ip4; + u8 addr_only; + u8 in_addr[16]; + u8 in_plen; + u8 out_addr[16]; + u8 out_plen; +}; + +/** \brief Add/delete S-NAT deterministic mapping reply + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define snat_add_det_map_reply { + u32 context; + i32 retval; +}; + +/** \brief Get outside address and port range from inside address + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ip4 - 1 if address type is IPv4 + @param in_addr - inside IP address +*/ +define snat_det_forward { + u32 client_index; + u32 context; + u8 is_ip4; + u8 in_addr[16]; +}; + +/** \brief Get outside address and port range from inside address + @param context - sender context, to match reply w/ request + @param retval - return code + @param out_port_lo - outside port range start + @param out_port_hi - outside port range end + @param is_ip4 - 1 if address type is IPv4 + @param out_addr - outside IP address +*/ +define snat_det_forward_reply { + u32 context; + i32 retval; + u16 out_port_lo; + u16 out_port_hi; + u8 is_ip4; + u8 out_addr[16]; +}; + +/** \brief Get inside address from outside address and port + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param out_port - outside port + @param is_ip4 - 1 if address type is IPv4 + @param out_addr - outside IP address +*/ +define snat_det_reverse { + u32 client_index; + u32 context; + u16 out_port; + u8 is_ip4; + u8 out_addr[16]; +}; + +/** \brief Get inside address from outside address and port reply + @param context - sender context, to match reply w/ request + @param retval - return code + @param is_ip4 - 1 if address type is IPv4 + @param in_addr - inside IP address +*/ +define snat_det_reverse_reply { + u32 context; + i32 retval; + u8 is_ip4; + u8 in_addr[16]; +}; diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c index 12d1df402f4..1cf2b0c5cee 100644 --- a/src/plugins/snat/snat.c +++ b/src/plugins/snat/snat.c @@ -22,6 +22,7 @@ #include <vlibapi/api.h> #include <snat/snat.h> #include <snat/snat_ipfix_logging.h> +#include <snat/snat_det.h> #include <vnet/fib/fib_table.h> #include <vnet/fib/ip4_fib.h> @@ -109,6 +110,16 @@ VNET_FEATURE_INIT (ip4_snat_out2in, static) = { .node_name = "snat-out2in", .runs_before = VNET_FEATURES ("ip4-lookup"), }; +VNET_FEATURE_INIT (ip4_snat_det_in2out, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-det-in2out", + .runs_before = VNET_FEATURES ("snat-det-out2in"), +}; +VNET_FEATURE_INIT (ip4_snat_det_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-det-out2in", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; VNET_FEATURE_INIT (ip4_snat_in2out_worker_handoff, static) = { .arc_name = "ip4-unicast", .node_name = "snat-in2out-worker-handoff", @@ -206,14 +217,16 @@ bad_tx_sw_if_index: \ * proxy ARP on the outside interface. * * @param addr IPv4 address. + * @param plen address prefix length * @param sw_if_index Interface. * @param is_add If 0 delete, otherwise add. */ -static void -snat_add_del_addr_to_fib (ip4_address_t * addr, u32 sw_if_index, int is_add) +void +snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index, + int is_add) { fib_prefix_t prefix = { - .fp_len = 32, + .fp_len = p_len, .fp_proto = FIB_PROTOCOL_IP4, .fp_addr = { .ip4.as_u32 = addr->as_u32, @@ -270,7 +283,7 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id) if (i->is_inside) continue; - snat_add_del_addr_to_fib(addr, i->sw_if_index, 1); + snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1); break; })); } @@ -627,7 +640,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, if (interface->is_inside) continue; - snat_add_del_addr_to_fib(&e_addr, interface->sw_if_index, is_add); + snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add); break; })); @@ -727,7 +740,7 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) if (interface->is_inside) continue; - snat_add_del_addr_to_fib(&addr, interface->sw_if_index, 0); + snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0); break; })); @@ -741,13 +754,16 @@ static int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del) const char * feature_name; snat_address_t * ap; snat_static_mapping_t * m; + snat_det_map_t * dm; if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) feature_name = is_inside ? "snat-in2out-fast" : "snat-out2in-fast"; else { - if (sm->num_workers > 1) + if (sm->num_workers > 1 && !sm->deterministic) feature_name = is_inside ? "snat-in2out-worker-handoff" : "snat-out2in-worker-handoff"; + else if (sm->deterministic) + feature_name = is_inside ? "snat-det-in2out" : "snat-det-out2in"; else feature_name = is_inside ? "snat-in2out" : "snat-out2in"; } @@ -755,11 +771,11 @@ static int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del) vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, !is_del, 0, 0); - if (sm->fq_in2out_index == ~0) - sm->fq_in2out_index = vlib_frame_queue_main_init (snat_in2out_node.index, 0); + if (sm->fq_in2out_index == ~0 && !sm->deterministic && sm->num_workers > 1) + sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index, 0); - if (sm->fq_out2in_index == ~0) - sm->fq_out2in_index = vlib_frame_queue_main_init (snat_out2in_node.index, 0); + if (sm->fq_out2in_index == ~0 && !sm->deterministic && sm->num_workers > 1) + sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index, 0); pool_foreach (i, sm->interfaces, ({ @@ -787,14 +803,19 @@ fib: return 0; vec_foreach (ap, sm->addresses) - snat_add_del_addr_to_fib(&ap->addr, sw_if_index, !is_del); + snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del); pool_foreach (m, sm->static_mappings, ({ if (!(m->addr_only)) continue; - snat_add_del_addr_to_fib(&m->external_addr, sw_if_index, !is_del); + snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del); + })); + + pool_foreach (dm, sm->det_maps, + ({ + snat_add_del_addr_to_fib(&dm->out_addr, dm->out_plen, sw_if_index, !is_del); })); return 0; @@ -1206,6 +1227,7 @@ vl_api_snat_show_config_t_handler rmp->static_mapping_only = sm->static_mapping_only; rmp->static_mapping_connection_tracking = sm->static_mapping_connection_tracking; + rmp->deterministic = sm->deterministic; })); } @@ -1561,6 +1583,123 @@ static void *vl_api_snat_user_session_dump_t_print FINISH; } +static void +vl_api_snat_add_det_map_t_handler +(vl_api_snat_add_det_map_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_add_det_map_reply_t * rmp; + int rv = 0; + ip4_address_t in_addr, out_addr; + + clib_memcpy(&in_addr, mp->in_addr, 4); + clib_memcpy(&out_addr, mp->out_addr, 4); + rv = snat_det_add_map(sm, &in_addr, mp->in_plen, &out_addr, + mp->out_plen, mp->is_add); + + REPLY_MACRO (VL_API_SNAT_ADD_DET_MAP_REPLY); +} + +static void *vl_api_snat_add_det_map_t_print +(vl_api_snat_add_det_map_t *mp, void * handle) +{ + u8 * s; + + s = format (0, "SCRIPT: snat_add_det_map "); + s = format (s, "inside address %U/%d outside address %U/%d\n", + format_ip4_address, mp->in_addr, mp->in_plen, + format_ip4_address, mp->out_addr, mp->out_plen); + + FINISH; +} + +static void +vl_api_snat_det_forward_t_handler +(vl_api_snat_det_forward_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_det_forward_reply_t * rmp; + int rv = 0; + u16 lo_port = 0; + snat_det_map_t * dm; + ip4_address_t in_addr, out_addr; + + out_addr.as_u32 = 0; + clib_memcpy(&in_addr, mp->in_addr, 4); + dm = snat_det_map_by_user(sm, &in_addr); + if (!dm) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto send_reply; + } + + snat_det_forward(dm, &in_addr, &out_addr, &lo_port); + +send_reply: + REPLY_MACRO2(VL_API_SNAT_DET_FORWARD_REPLY, + ({ + rmp->out_port_lo = ntohs(lo_port); + rmp->out_port_hi = ntohs(lo_port + dm->ports_per_host - 1); + rmp->is_ip4 = 1; + memset(rmp->out_addr, 0, 16); + clib_memcpy(rmp->out_addr, &out_addr, 4); + })) +} + +static void *vl_api_snat_det_forward_t_print +(vl_api_snat_det_forward_t * mp, void * handle) +{ + u8 * s; + + s = format (0, "SCRIPT: smat_det_forward_t"); + s = format (s, "inside ip address %U\n", + format_ip4_address, mp->in_addr); + + FINISH; +} + +static void +vl_api_snat_det_reverse_t_handler +(vl_api_snat_det_reverse_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_det_reverse_reply_t * rmp; + int rv = 0; + ip4_address_t out_addr, in_addr; + snat_det_map_t * dm; + + in_addr.as_u32 = 0; + clib_memcpy(&out_addr, mp->out_addr, 4); + dm = snat_det_map_by_out(sm, &out_addr); + if (!dm) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + goto send_reply; + } + + snat_det_reverse(dm, &out_addr, htons(mp->out_port), &in_addr); + + send_reply: + REPLY_MACRO2(VL_API_SNAT_DET_REVERSE_REPLY, + ({ + rmp->is_ip4 = 1; + memset(rmp->in_addr, 0, 16); + clib_memcpy(rmp->in_addr, &in_addr, 4); + })) +} + +static void *vl_api_snat_det_reverse_t_print +(vl_api_snat_det_reverse_t * mp, void * handle) +{ + u8 * s; + + s = format(0, "SCRIPT: smat_det_reverse_t"); + s = format(s, "outside ip address %U outside port %d", + format_ip4_address, mp->out_addr, ntohs(mp->out_port)); + + FINISH; +} + /* List of message types that this plugin understands */ #define foreach_snat_plugin_api_msg \ _(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range) \ @@ -1577,7 +1716,10 @@ _(SNAT_ADD_DEL_INTERFACE_ADDR, snat_add_del_interface_addr) \ _(SNAT_INTERFACE_ADDR_DUMP, snat_interface_addr_dump) \ _(SNAT_IPFIX_ENABLE_DISABLE, snat_ipfix_enable_disable) \ _(SNAT_USER_DUMP, snat_user_dump) \ -_(SNAT_USER_SESSION_DUMP, snat_user_session_dump) +_(SNAT_USER_SESSION_DUMP, snat_user_session_dump) \ +_(SNAT_ADD_DET_MAP, snat_add_det_map) \ +_(SNAT_DET_FORWARD, snat_det_forward) \ +_(SNAT_DET_REVERSE, snat_det_reverse) /* Set up the API message handling tables */ static clib_error_t * @@ -2291,6 +2433,96 @@ VLIB_CLI_COMMAND (snat_ipfix_logging_enable_disable_command, static) = { .short_help = "snat ipfix logging [domain <domain-id>] [src-port <port>] [disable]", }; +static u32 +snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0) +{ + snat_main_t *sm = &snat_main; + snat_user_key_t key0; + clib_bihash_kv_8_8_t kv0, value0; + u32 next_worker_index = 0; + + key0.addr = ip0->src_address; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + /* Ever heard of of the "user" before? */ + if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) + { + /* No, assign next available worker (RR) */ + next_worker_index = sm->first_worker_index; + if (vec_len (sm->workers)) + { + next_worker_index += + sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; + } + + /* add non-traslated packets worker lookup */ + kv0.value = next_worker_index; + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); + } + else + next_worker_index = value0.value; + + return next_worker_index; +} + +static u32 +snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0) +{ + snat_main_t *sm = &snat_main; + snat_worker_key_t key0; + clib_bihash_kv_8_8_t kv0, value0; + udp_header_t * udp0; + u32 next_worker_index = 0; + + udp0 = ip4_next_header (ip0); + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.fib_index = rx_fib_index0; + + if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_ICMP)) + { + icmp46_header_t * icmp0 = (icmp46_header_t *) udp0; + icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1); + key0.port = echo0->identifier; + } + + kv0.key = key0.as_u64; + + /* Ever heard of of the "user" before? */ + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + { + key0.port = 0; + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + { + /* No, assign next available worker (RR) */ + next_worker_index = sm->first_worker_index; + if (vec_len (sm->workers)) + { + next_worker_index += + sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; + } + } + else + { + /* Static mapping without port */ + next_worker_index = value0.value; + } + + /* Add to translated packets worker lookup */ + kv0.value = next_worker_index; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); + } + else + next_worker_index = value0.value; + + return next_worker_index; +} + static clib_error_t * snat_config (vlib_main_t * vm, unformat_input_t * input) { @@ -2308,6 +2540,8 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) u8 static_mapping_connection_tracking = 0; vlib_thread_main_t *tm = vlib_get_thread_main (); + sm->deterministic = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "translation hash buckets %d", &translation_buckets)) @@ -2334,7 +2568,9 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) if (unformat (input, "connection tracking")) static_mapping_connection_tracking = 1; } - else + else if (unformat (input, "deterministic")) + sm->deterministic = 1; + else return clib_error_return (0, "unknown input '%U'", format_unformat_error, input); } @@ -2354,38 +2590,68 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) sm->static_mapping_only = static_mapping_only; sm->static_mapping_connection_tracking = static_mapping_connection_tracking; - if (!static_mapping_only || - (static_mapping_only && static_mapping_connection_tracking)) + if (sm->deterministic) { - clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, - user_memory_size); + sm->in2out_node_index = snat_det_in2out_node.index; + sm->out2in_node_index = snat_det_out2in_node.index; + } + else + { + sm->worker_in2out_cb = snat_get_worker_in2out_cb; + sm->worker_out2in_cb = snat_get_worker_out2in_cb; + sm->in2out_node_index = snat_in2out_node.index; + sm->out2in_node_index = snat_out2in_node.index; + if (!static_mapping_only || + (static_mapping_only && static_mapping_connection_tracking)) + { + clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, + user_memory_size); - clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets, - user_memory_size); + clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets, + user_memory_size); - vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1); + vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1); - clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets, - translation_memory_size); + clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets, + translation_memory_size); - clib_bihash_init_8_8 (&sm->out2in, "out2in", translation_buckets, - translation_memory_size); + clib_bihash_init_8_8 (&sm->out2in, "out2in", translation_buckets, + translation_memory_size); - clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets, - user_memory_size); + clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets, + user_memory_size); + } + clib_bihash_init_8_8 (&sm->static_mapping_by_local, + "static_mapping_by_local", static_mapping_buckets, + static_mapping_memory_size); + + clib_bihash_init_8_8 (&sm->static_mapping_by_external, + "static_mapping_by_external", static_mapping_buckets, + static_mapping_memory_size); } - clib_bihash_init_8_8 (&sm->static_mapping_by_local, - "static_mapping_by_local", static_mapping_buckets, - static_mapping_memory_size); - clib_bihash_init_8_8 (&sm->static_mapping_by_external, - "static_mapping_by_external", static_mapping_buckets, - static_mapping_memory_size); return 0; } VLIB_CONFIG_FUNCTION (snat_config, "snat"); +u8 * format_snat_session_state (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break; + foreach_snat_session_state +#undef _ + default: + t = format (t, "unknown"); + } + s = format (s, "%s", t); + return s; +} + u8 * format_snat_key (u8 * s, va_list * args) { snat_session_key_t * key = va_arg (*args, snat_session_key_t *); @@ -2504,6 +2770,35 @@ u8 * format_snat_static_map_to_resolve (u8 * s, va_list * args) return s; } +u8 * format_det_map_ses (u8 * s, va_list * args) +{ + snat_det_map_t * det_map = va_arg (*args, snat_det_map_t *); + ip4_address_t in_addr, out_addr; + u32 in_offset, out_offset; + snat_det_session_t * ses = va_arg (*args, snat_det_session_t *); + u32 * i = va_arg (*args, u32 *); + + u32 user_index = *i / SNAT_DET_SES_PER_USER; + in_addr.as_u32 = clib_host_to_net_u32 ( + clib_net_to_host_u32(det_map->in_addr.as_u32) + user_index); + in_offset = clib_net_to_host_u32(in_addr.as_u32) - + clib_net_to_host_u32(det_map->in_addr.as_u32); + out_offset = in_offset / det_map->sharing_ratio; + out_addr.as_u32 = clib_host_to_net_u32( + clib_net_to_host_u32(det_map->out_addr.as_u32) + out_offset); + s = format (s, "in %U:%d out %U:%d external host %U:%d state: %U expire: %d\n", + format_ip4_address, &in_addr, + clib_net_to_host_u16 (ses->in_port), + format_ip4_address, &out_addr, + clib_net_to_host_u16 (ses->out.out_port), + format_ip4_address, &ses->out.ext_host_addr, + clib_net_to_host_u16 (ses->out.ext_host_port), + format_snat_session_state, ses->state, + ses->expire); + + return s; +} + static clib_error_t * show_snat_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -2520,6 +2815,8 @@ show_snat_command_fn (vlib_main_t * vm, u32 users_num = 0, sessions_num = 0, *worker, *sw_if_index; uword j = 0; snat_static_map_resolve_t *rp; + snat_det_map_t * dm; + snat_det_session_t * ses; if (unformat (input, "detail")) verbose = 1; @@ -2534,6 +2831,10 @@ show_snat_command_fn (vlib_main_t * vm, else vlib_cli_output (vm, "SNAT mode: static mapping only"); } + else if (sm->deterministic) + { + vlib_cli_output (vm, "SNAT mode: deterministic mapping"); + } else { vlib_cli_output (vm, "SNAT mode: dynamic translations enabled"); @@ -2587,81 +2888,112 @@ show_snat_command_fn (vlib_main_t * vm, } } - if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) + if (sm->deterministic) { - vlib_cli_output (vm, "%d static mappings", - pool_elts (sm->static_mappings)); - + vlib_cli_output (vm, "%d deterministic mappings", + pool_elts (sm->det_maps)); if (verbose > 0) { - pool_foreach (m, sm->static_mappings, + pool_foreach (dm, sm->det_maps, ({ - vlib_cli_output (vm, "%U", format_snat_static_mapping, m); + vlib_cli_output (vm, "in %U/%d out %U/%d\n", + format_ip4_address, &dm->in_addr, dm->in_plen, + format_ip4_address, &dm->out_addr, dm->out_plen); + vlib_cli_output (vm, " outside address sharing ratio: %d\n", + dm->sharing_ratio); + vlib_cli_output (vm, " number of ports per inside host: %d\n", + dm->ports_per_host); + vlib_cli_output (vm, " sessions number: %d\n", dm->ses_num); + if (verbose > 1) + { + vec_foreach_index (j, dm->sessions) + { + ses = vec_elt_at_index (dm->sessions, j); + if (ses->in_port) + vlib_cli_output (vm, " %U", format_det_map_ses, dm, ses, + &j); + } + } })); } } else { - vec_foreach (tsm, sm->per_thread_data) + if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) { - users_num += pool_elts (tsm->users); - sessions_num += pool_elts (tsm->sessions); - } + vlib_cli_output (vm, "%d static mappings", + pool_elts (sm->static_mappings)); - vlib_cli_output (vm, "%d users, %d outside addresses, %d active sessions," - " %d static mappings", - users_num, - vec_len (sm->addresses), - sessions_num, - pool_elts (sm->static_mappings)); - - if (verbose > 0) - { - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->in2out, - verbose - 1); - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in, - verbose - 1); - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, - verbose - 1); - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out, - verbose - 1); - vec_foreach_index (j, sm->per_thread_data) + if (verbose > 0) { - tsm = vec_elt_at_index (sm->per_thread_data, j); - - if (pool_elts (tsm->users) == 0) - continue; - - vlib_worker_thread_t *w = vlib_worker_threads + j; - vlib_cli_output (vm, "Thread %d (%s at lcore %u):", j, w->name, - w->lcore_id); - vlib_cli_output (vm, " %d list pool elements", - pool_elts (tsm->list_pool)); - - pool_foreach (u, tsm->users, + pool_foreach (m, sm->static_mappings, ({ - vlib_cli_output (vm, " %U", format_snat_user, tsm, u, - verbose - 1); + vlib_cli_output (vm, "%U", format_snat_static_mapping, m); })); } + } + else + { + vec_foreach (tsm, sm->per_thread_data) + { + users_num += pool_elts (tsm->users); + sessions_num += pool_elts (tsm->sessions); + } - if (pool_elts (sm->static_mappings) || vec_len (sm->to_resolve)) + vlib_cli_output (vm, "%d users, %d outside addresses, %d active sessions," + " %d static mappings", + users_num, + vec_len (sm->addresses), + sessions_num, + pool_elts (sm->static_mappings)); + + if (verbose > 0) { - vlib_cli_output (vm, "static mappings:"); - pool_foreach (m, sm->static_mappings, - ({ - vlib_cli_output (vm, "%U", format_snat_static_mapping, m); - })); - for (j = 0; j < vec_len (sm->to_resolve); j++) + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->in2out, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out, + verbose - 1); + vec_foreach_index (j, sm->per_thread_data) { - rp = sm->to_resolve + j; - vlib_cli_output (vm, "%U", format_snat_static_map_to_resolve, - rp); + tsm = vec_elt_at_index (sm->per_thread_data, j); + + if (pool_elts (tsm->users) == 0) + continue; + + vlib_worker_thread_t *w = vlib_worker_threads + j; + vlib_cli_output (vm, "Thread %d (%s at lcore %u):", j, w->name, + w->lcore_id); + vlib_cli_output (vm, " %d list pool elements", + pool_elts (tsm->list_pool)); + + pool_foreach (u, tsm->users, + ({ + vlib_cli_output (vm, " %U", format_snat_user, tsm, u, + verbose - 1); + })); + } + + if (pool_elts (sm->static_mappings)) + { + vlib_cli_output (vm, "static mappings:"); + pool_foreach (m, sm->static_mappings, + ({ + vlib_cli_output (vm, "%U", format_snat_static_mapping, m); + })); + for (j = 0; j < vec_len (sm->to_resolve); j++) + { + rp = sm->to_resolve + j; + vlib_cli_output (vm, "%U", + format_snat_static_map_to_resolve, rp); + } } } } } - return 0; } @@ -2856,3 +3188,196 @@ VLIB_CLI_COMMAND (snat_add_interface_address_command, static) = { .short_help = "snat add interface address <interface> [del]", .function = snat_add_interface_address_command_fn, }; + +static clib_error_t * +snat_det_map_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t in_addr, out_addr; + u32 in_plen, out_plen; + int is_add = 1, rv; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "in %U/%u", unformat_ip4_address, &in_addr, &in_plen)) + ; + else if (unformat (line_input, "out %U/%u", unformat_ip4_address, &out_addr, &out_plen)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + unformat_free (line_input); + + rv = snat_det_add_map(sm, &in_addr, (u8) in_plen, &out_addr, (u8)out_plen, + is_add); + + if (rv) + { + error = clib_error_return (0, "snat_det_add_map return %d", rv); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat deterministic add} + * Create bijective mapping of inside address to outside address and port range + * pairs, with the purpose of enabling deterministic NAT to reduce logging in + * CGN deployments. + * To create deterministic mapping between inside network 10.0.0.0/18 and + * outside network 1.1.1.0/30 use: + * # vpp# snat deterministic add in 10.0.0.0/18 out 1.1.1.0/30 + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_det_map_command, static) = { + .path = "snat deterministic add", + .short_help = "snat deterministic add in <addr>/<plen> out <addr>/<plen> [del]", + .function = snat_det_map_command_fn, +}; + +static clib_error_t * +snat_det_forward_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t in_addr, out_addr; + u16 lo_port; + snat_det_map_t * dm; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_ip4_address, &in_addr)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + unformat_free (line_input); + + dm = snat_det_map_by_user(sm, &in_addr); + if (!dm) + vlib_cli_output (vm, "no match"); + else + { + snat_det_forward (dm, &in_addr, &out_addr, &lo_port); + vlib_cli_output (vm, "%U:<%d-%d>", format_ip4_address, &out_addr, + lo_port, lo_port + dm->ports_per_host - 1); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat deterministic forward} + * Return outside address and port range from inside address for deterministic + * NAT. + * To obtain outside address and port of inside host use: + * vpp# snat deterministic forward 10.0.0.2 + * 1.1.1.0:<1054-1068> + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_det_forward_command, static) = { + .path = "snat deterministic forward", + .short_help = "snat deterministic forward <addr>", + .function = snat_det_forward_command_fn, +}; + +static clib_error_t * +snat_det_reverse_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t in_addr, out_addr; + u32 out_port; + snat_det_map_t * dm; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U:%d", unformat_ip4_address, &out_addr, &out_port)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + } + + unformat_free (line_input); + + if (out_port < 1024 || out_port > 65535) + { + error = clib_error_return (0, "wrong port, must be <1024-65535>"); + goto done; + } + + dm = snat_det_map_by_out(sm, &out_addr); + if (!dm) + vlib_cli_output (vm, "no match"); + else + { + snat_det_reverse (dm, &out_addr, (u16) out_port, &in_addr); + vlib_cli_output (vm, "%U", format_ip4_address, &in_addr); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat deterministic reverse} + * Return inside address from outside address and port for deterministic NAT. + * To obtain inside host address from outside address and port use: + * #vpp snat deterministic reverse 1.1.1.1:1276 + * 10.0.16.16 + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_det_reverse_command, static) = { + .path = "snat deterministic reverse", + .short_help = "snat deterministic reverse <addr>:<port>", + .function = snat_det_reverse_command_fn, +}; diff --git a/src/plugins/snat/snat.h b/src/plugins/snat/snat.h index 1d203aa870a..9cfd6be222e 100644 --- a/src/plugins/snat/snat.h +++ b/src/plugins/snat/snat.h @@ -28,6 +28,11 @@ #include <vppinfra/error.h> #include <vlibapi/api.h> + +#define SNAT_UDP_TIMEOUT 300 +#define SNAT_TCP_TRANSITORY_TIMEOUT 240 +#define SNAT_TCP_ESTABLISHED_TIMEOUT 7440 + /* Key */ typedef struct { union @@ -48,6 +53,19 @@ typedef struct { { struct { + ip4_address_t ext_host_addr; + u16 ext_host_port; + u16 out_port; + }; + u64 as_u64; + }; +} snat_det_out_key_t; + +typedef struct { + union + { + struct + { ip4_address_t addr; u32 fib_index; }; @@ -81,6 +99,22 @@ typedef enum { } snat_protocol_t; +#define foreach_snat_session_state \ + _(0, UNKNOWN, "unknown") \ + _(1, UDP_ACTIVE, "udp-active") \ + _(2, TCP_SYN_SENT, "tcp-syn-sent") \ + _(3, TCP_ESTABLISHED, "tcp-established") \ + _(4, TCP_FIN_WAIT, "tcp-fin-wait") \ + _(5, TCP_CLOSE_WAIT, "tcp-close-wait") \ + _(6, TCP_LAST_ACK, "tcp-last-ack") + +typedef enum { +#define _(v, N, s) SNAT_SESSION_##N = v, + foreach_snat_session_state +#undef _ +} snat_session_state_t; + + #define SNAT_SESSION_FLAG_STATIC_MAPPING 1 typedef CLIB_PACKED(struct { @@ -127,6 +161,25 @@ typedef struct { } snat_address_t; typedef struct { + u16 in_port; + snat_det_out_key_t out; + u8 state; + u32 expire; +} snat_det_session_t; + +typedef struct { + ip4_address_t in_addr; + u8 in_plen; + ip4_address_t out_addr; + u8 out_plen; + u32 sharing_ratio; + u16 ports_per_host; + u32 ses_num; + /* vector of sessions */ + snat_det_session_t * sessions; +} snat_det_map_t; + +typedef struct { ip4_address_t local_addr; ip4_address_t external_addr; u16 local_port; @@ -164,6 +217,8 @@ typedef struct { dlist_elt_t * list_pool; } snat_main_per_thread_data_t; +typedef u32 (snat_get_worker_function_t) (ip4_header_t * ip, u32 rx_fib_index); + typedef struct { /* Main lookup tables */ clib_bihash_8_8_t out2in; @@ -182,6 +237,8 @@ typedef struct { u32 first_worker_index; u32 next_worker; u32 * workers; + snat_get_worker_function_t * worker_in2out_cb; + snat_get_worker_function_t * worker_out2in_cb; /* Per thread data */ snat_main_per_thread_data_t * per_thread_data; @@ -214,9 +271,17 @@ typedef struct { u32 fq_in2out_index; u32 fq_out2in_index; + /* in2out and out2in node index */ + u32 in2out_node_index; + u32 out2in_node_index; + + /* Deterministic NAT */ + snat_det_map_t * det_maps; + /* Config parameters */ u8 static_mapping_only; u8 static_mapping_connection_tracking; + u8 deterministic; u32 translation_buckets; u32 translation_memory_size; u32 user_buckets; @@ -248,6 +313,8 @@ extern vlib_node_registration_t snat_in2out_fast_node; extern vlib_node_registration_t snat_out2in_fast_node; extern vlib_node_registration_t snat_in2out_worker_handoff_node; extern vlib_node_registration_t snat_out2in_worker_handoff_node; +extern vlib_node_registration_t snat_det_in2out_node; +extern vlib_node_registration_t snat_det_out2in_node; void snat_free_outside_address_and_port (snat_main_t * sm, snat_session_key_t * k, @@ -263,6 +330,11 @@ int snat_static_mapping_match (snat_main_t * sm, snat_session_key_t * mapping, u8 by_external); +void snat_add_del_addr_to_fib (ip4_address_t * addr, + u8 p_len, + u32 sw_if_index, + int is_add); + format_function_t format_snat_user; typedef struct { diff --git a/src/plugins/snat/snat_det.c b/src/plugins/snat/snat_det.c new file mode 100644 index 00000000000..d54bca66cac --- /dev/null +++ b/src/plugins/snat/snat_det.c @@ -0,0 +1,125 @@ +/* + * snat_det.c - deterministic NAT + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief deterministic NAT + */ + +#include <snat/snat_det.h> + + +/** + * @brief Add/delete deterministic NAT mapping. + * + * Create bijective mapping of inside address to outside address and port range + * pairs, with the purpose of enabling deterministic NAT to reduce logging in + * CGN deployments. + * + * @param sm SNAT main. + * @param in_addr Inside network address. + * @param in_plen Inside network prefix length. + * @param out_addr Outside network address. + * @param out_plen Outside network prefix length. + * @param is_add If 0 delete, otherwise add. + */ +int +snat_det_add_map (snat_main_t * sm, ip4_address_t * in_addr, u8 in_plen, + ip4_address_t * out_addr, u8 out_plen, int is_add) +{ + snat_det_map_t *det_map; + static snat_det_session_t empty_snat_det_session = { 0 }; + snat_interface_t *i; + + pool_get (sm->det_maps, det_map); + memset (det_map, 0, sizeof (*det_map)); + det_map->in_addr.as_u32 = in_addr->as_u32 & ip4_main.fib_masks[in_plen]; + det_map->in_plen = in_plen; + det_map->out_addr.as_u32 = out_addr->as_u32 & ip4_main.fib_masks[out_plen]; + det_map->out_plen = out_plen; + det_map->sharing_ratio = (1 << (32 - in_plen)) / (1 << (32 - out_plen)); + det_map->ports_per_host = (65535 - 1023) / det_map->sharing_ratio; + + vec_validate_init_empty (det_map->sessions, + SNAT_DET_SES_PER_USER * (1 << (32 - in_plen)) - 1, + empty_snat_det_session); + + /* Add/del external address range to FIB */ + /* *INDENT-OFF* */ + pool_foreach (i, sm->interfaces, + ({ + if (i->is_inside) + continue; + + snat_add_del_addr_to_fib(out_addr, out_plen, i->sw_if_index, is_add); + break; + })); + /* *INDENT-ON* */ + return 0; +} + +/** + * @brief The 'snat-det-expire-walk' process's main loop. + * + * Check expire time for active sessions. + */ +static uword +snat_det_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + snat_main_t *sm = &snat_main; + snat_det_map_t *dm; + snat_det_session_t *ses; + + while (sm->deterministic) + { + vlib_process_wait_for_event_or_clock (vm, 10.0); + vlib_process_get_events (vm, NULL); + u32 now = (u32) vlib_time_now (vm); + /* *INDENT-OFF* */ + pool_foreach (dm, sm->det_maps, + ({ + vec_foreach(ses, dm->sessions) + { + /* Delete if session expired */ + if (ses->in_port && (ses->expire < now)) + snat_det_ses_close (dm, ses); + } + })); + /* *INDENT-ON* */ + } + + return 0; +} + +static vlib_node_registration_t snat_det_expire_walk_node; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (snat_det_expire_walk_node, static) = { + .function = snat_det_expire_walk_fn, + .type = VLIB_NODE_TYPE_PROCESS, + .name = + "snat-det-expire-walk", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/snat/snat_det.h b/src/plugins/snat/snat_det.h new file mode 100644 index 00000000000..42ce87608b5 --- /dev/null +++ b/src/plugins/snat/snat_det.h @@ -0,0 +1,190 @@ +/* + * snat_det.h - deterministic nat definitions + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief deterministic NAT definitions + */ + +#ifndef __included_snat_det_h__ +#define __included_snat_det_h__ + +#include <vnet/ip/ip.h> +#include <snat/snat.h> + + +#define SNAT_DET_SES_PER_USER 1000 + + +int snat_det_add_map (snat_main_t * sm, ip4_address_t * in_addr, u8 in_plen, + ip4_address_t * out_addr, u8 out_plen, int is_add); + +always_inline int +is_addr_in_net (ip4_address_t * addr, ip4_address_t * net, u8 plen) +{ + if (net->as_u32 == (addr->as_u32 & ip4_main.fib_masks[plen])) + return 1; + return 0; +} + +always_inline snat_det_map_t * +snat_det_map_by_user (snat_main_t * sm, ip4_address_t * user_addr) +{ + snat_det_map_t *dm; + + /* *INDENT-OFF* */ + pool_foreach (dm, sm->det_maps, + ({ + if (is_addr_in_net(user_addr, &dm->in_addr, dm->in_plen)) + return dm; + })); + /* *INDENT-ON* */ + return 0; +} + +always_inline snat_det_map_t * +snat_det_map_by_out (snat_main_t * sm, ip4_address_t * out_addr) +{ + snat_det_map_t *dm; + + /* *INDENT-OFF* */ + pool_foreach (dm, sm->det_maps, + ({ + if (is_addr_in_net(out_addr, &dm->out_addr, dm->out_plen)) + return dm; + })); + /* *INDENT-ON* */ + return 0; +} + +always_inline void +snat_det_forward (snat_det_map_t * dm, ip4_address_t * in_addr, + ip4_address_t * out_addr, u16 * lo_port) +{ + u32 in_offset, out_offset; + + in_offset = clib_net_to_host_u32 (in_addr->as_u32) - + clib_net_to_host_u32 (dm->in_addr.as_u32); + out_offset = in_offset / dm->sharing_ratio; + out_addr->as_u32 = + clib_host_to_net_u32 (clib_net_to_host_u32 (dm->out_addr.as_u32) + + out_offset); + *lo_port = 1024 + dm->ports_per_host * (in_offset % dm->sharing_ratio); +} + +always_inline void +snat_det_reverse (snat_det_map_t * dm, ip4_address_t * out_addr, u16 out_port, + ip4_address_t * in_addr) +{ + u32 in_offset1, in_offset2, out_offset; + + out_offset = clib_net_to_host_u32 (out_addr->as_u32) - + clib_net_to_host_u32 (dm->out_addr.as_u32); + in_offset1 = out_offset * dm->sharing_ratio; + in_offset2 = (out_port - 1024) / dm->ports_per_host; + in_addr->as_u32 = + clib_host_to_net_u32 (clib_net_to_host_u32 (dm->in_addr.as_u32) + + in_offset1 + in_offset2); +} + +always_inline u32 +snat_det_user_ses_offset (ip4_address_t * addr, u8 plen) +{ + return (clib_net_to_host_u32 (addr->as_u32) & pow2_mask (32 - plen)) * + SNAT_DET_SES_PER_USER; +} + +always_inline snat_det_session_t * +snat_det_get_ses_by_out (snat_det_map_t * dm, ip4_address_t * in_addr, + u64 out_key) +{ + u32 user_offset; + u16 i; + + user_offset = snat_det_user_ses_offset (in_addr, dm->in_plen); + for (i = 0; i < SNAT_DET_SES_PER_USER; i++) + { + if (dm->sessions[i + user_offset].out.as_u64 == out_key) + return &dm->sessions[i + user_offset]; + } + + return 0; +} + +always_inline snat_det_session_t * +snat_det_find_ses_by_in (snat_det_map_t * dm, + ip4_address_t * in_addr, u16 in_port) +{ + u32 user_offset; + u16 i; + + user_offset = snat_det_user_ses_offset (in_addr, dm->in_plen); + for (i = 0; i < SNAT_DET_SES_PER_USER; i++) + { + if (dm->sessions[i + user_offset].in_port == in_port) + return &dm->sessions[i + user_offset]; + } + + return 0; +} + +always_inline snat_det_session_t * +snat_det_ses_create (snat_det_map_t * dm, ip4_address_t * in_addr, + u16 in_port, snat_det_out_key_t * out) +{ + u32 user_offset; + u16 i; + + user_offset = snat_det_user_ses_offset (in_addr, dm->in_plen); + + for (i = 0; i < SNAT_DET_SES_PER_USER; i++) + { + if (!dm->sessions[i + user_offset].in_port) + { + if (__sync_bool_compare_and_swap + (&dm->sessions[i + user_offset].in_port, 0, in_port)) + { + dm->sessions[i + user_offset].out.as_u64 = out->as_u64; + dm->sessions[i + user_offset].state = SNAT_SESSION_UNKNOWN; + dm->sessions[i + user_offset].expire = 0; + __sync_add_and_fetch (&dm->ses_num, 1); + return &dm->sessions[i + user_offset]; + } + } + } + + return 0; +} + +always_inline void +snat_det_ses_close (snat_det_map_t * dm, snat_det_session_t * ses) +{ + if (__sync_bool_compare_and_swap (&ses->in_port, ses->in_port, 0)) + { + ses->out.as_u64 = 0; + __sync_add_and_fetch (&dm->ses_num, -1); + } +} + +#endif /* __included_snat_det_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/snat/snat_test.c b/src/plugins/snat/snat_test.c index c97b813e24e..c60554b21d5 100644 --- a/src/plugins/snat/snat_test.c +++ b/src/plugins/snat/snat_test.c @@ -66,7 +66,8 @@ _(snat_interface_add_del_feature_reply) \ _(snat_add_static_mapping_reply) \ _(snat_set_workers_reply) \ _(snat_add_del_interface_addr_reply) \ -_(snat_ipfix_enable_disable_reply) +_(snat_ipfix_enable_disable_reply) \ +_(snat_add_det_map_reply) #define _(n) \ static void vl_api_##n##_t_handler \ @@ -106,7 +107,10 @@ _(SNAT_INTERFACE_ADDR_DETAILS, snat_interface_addr_details) \ _(SNAT_IPFIX_ENABLE_DISABLE_REPLY, \ snat_ipfix_enable_disable_reply) \ _(SNAT_USER_DETAILS, snat_user_details) \ -_(SNAT_USER_SESSION_DETAILS, snat_user_session_details) +_(SNAT_USER_SESSION_DETAILS, snat_user_session_details) \ +_(SNAT_ADD_DET_MAP_REPLY, snat_add_det_map_reply) \ +_(SNAT_DET_FORWARD_REPLY, snat_det_forward_reply) \ +_(SNAT_DET_REVERSE_REPLY, snat_det_reverse_reply) static int api_snat_add_address_range (vat_main_t * vam) { @@ -719,6 +723,121 @@ static int api_snat_user_dump(vat_main_t * vam) return ret; } +static int api_snat_add_det_map (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + vl_api_snat_add_det_map_t * mp; + ip4_address_t in_addr, out_addr; + u32 in_plen, out_plen; + u8 is_add = 1; + int ret; + + if (unformat (i, "in %U/%d out %U/%d", + unformat_ip4_address, &in_addr, &in_plen, + unformat_ip4_address, &out_addr, &out_plen)) + ; + else if (unformat (i, "del")) + is_add = 0; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + + M(SNAT_ADD_DET_MAP, mp); + clib_memcpy(mp->in_addr, &in_addr, 4); + mp->in_plen = in_plen; + clib_memcpy(mp->out_addr, &out_addr, 4); + mp->out_plen = out_plen; + mp->is_add = is_add; + + S(mp); + W (ret); + return ret; +} + +static void vl_api_snat_det_forward_reply_t_handler + (vl_api_snat_det_forward_reply_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + i32 retval = ntohl(mp->retval); + + if (retval >= 0) + { + fformat (vam->ofp, "outside address %U", format_ip4_address, &mp->out_addr); + fformat (vam->ofp, " outside port range start %d", ntohs(mp->out_port_lo)); + fformat (vam->ofp, " outside port range end %d\n", ntohs(mp->out_port_hi)); + } + + vam->retval = retval; + vam->result_ready = 1; +} + +static int api_snat_det_forward (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + vl_api_snat_det_forward_t * mp; + ip4_address_t in_addr; + int ret; + + if (unformat (i, "%U", unformat_ip4_address, &in_addr)) + ; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + + M(SNAT_DET_FORWARD, mp); + clib_memcpy(mp->in_addr, &in_addr, 4); + + S(mp); + W(ret); + return ret; +} + +static void vl_api_snat_det_reverse_reply_t_handler + (vl_api_snat_det_reverse_reply_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + i32 retval = ntohl(mp->retval); + + if (retval >= 0) + { + fformat (vam->ofp, "inside address %U\n", format_ip4_address, &mp->in_addr); + } + + vam->retval = retval; + vam->result_ready = 1; +} + +static int api_snat_det_reverse (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + vl_api_snat_det_reverse_t * mp; + ip4_address_t out_addr; + u16 out_port; + int ret; + + if (unformat (i, "%U %d", unformat_ip4_address, &out_addr, &out_port)) + ; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + + M(SNAT_DET_REVERSE, mp); + clib_memcpy(mp->out_addr, &out_addr, 4); + mp->out_port = htons(out_port); + + S(mp); + W(ret); + return ret; +} + /* * List of messages that the api test plugin sends, * and that the data plane plugin processes @@ -743,7 +862,11 @@ _(snat_interface_addr_dump, "") \ _(snat_ipfix_enable_disable, "[domain <id>] [src_port <n>] " \ "[disable]") \ _(snat_user_dump, "") \ -_(snat_user_session_dump, "ip_address <ip> vrf_id <table-id>") +_(snat_user_session_dump, "ip_address <ip> vrf_id <table-id>") \ +_(snat_add_det_map, "in <in_addr>/<in_plen> out " \ + "<out_addr>/<out_plen> [del]") \ +_(snat_det_forward, "<in_addr>") \ +_(snat_det_reverse, "<out_addr> <out_port>") static void snat_vat_api_hookup (vat_main_t *vam) |