diff options
author | Matus Fabian <matfabia@cisco.com> | 2017-11-06 05:04:53 -0800 |
---|---|---|
committer | Ole Trøan <otroan@employees.org> | 2017-11-07 11:52:39 +0000 |
commit | 8ebe62536223e5a8d827b2b870cbd57aa34fd7ef (patch) | |
tree | f4ab30aa41a7a26bb6068e7f0f441930cc7774bb /src/plugins/nat/dslite_in2out.c | |
parent | 5917939256af392914d8a648de0c3287042ddbf6 (diff) |
NAT: DS-Lite (VPP-1040)
Dual-Stack Lite enables a broadband service provider to share IPv4 addresses among customers by combining two well-known technologies: IPv4-in-IPv6 and NAT.
Change-Id: I039740f8548c623cd1ac89b8ecda1a6cc4aafb9c
Signed-off-by: Matus Fabian <matfabia@cisco.com>
Diffstat (limited to 'src/plugins/nat/dslite_in2out.c')
-rw-r--r-- | src/plugins/nat/dslite_in2out.c | 490 |
1 files changed, 490 insertions, 0 deletions
diff --git a/src/plugins/nat/dslite_in2out.c b/src/plugins/nat/dslite_in2out.c new file mode 100644 index 00000000000..9a7751ce11c --- /dev/null +++ b/src/plugins/nat/dslite_in2out.c @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <nat/dslite.h> + +vlib_node_registration_t dslite_in2out_node; +vlib_node_registration_t dslite_in2out_slowpath_node; + +typedef enum +{ + DSLITE_IN2OUT_NEXT_IP4_LOOKUP, + DSLITE_IN2OUT_NEXT_IP6_LOOKUP, + DSLITE_IN2OUT_NEXT_DROP, + DSLITE_IN2OUT_NEXT_SLOWPATH, + DSLITE_IN2OUT_N_NEXT, +} dslite_in2out_next_t; + +static char *dslite_in2out_error_strings[] = { +#define _(sym,string) string, + foreach_dslite_error +#undef _ +}; + +static u32 +slow_path (dslite_main_t * dm, dslite_session_key_t * in2out_key, + dslite_session_t ** sp, u32 next, u8 * error, u32 thread_index) +{ + dslite_b4_t *b4; + clib_bihash_kv_16_8_t b4_kv, b4_value; + clib_bihash_kv_24_8_t in2out_kv; + clib_bihash_kv_8_8_t out2in_kv; + dlist_elt_t *head_elt, *oldest_elt, *elt; + u32 oldest_index; + dslite_session_t *s; + snat_session_key_t out2in_key; + u32 address_index; + + out2in_key.protocol = in2out_key->proto; + out2in_key.fib_index = 0; + + b4_kv.key[0] = in2out_key->softwire_id.as_u64[0]; + b4_kv.key[1] = in2out_key->softwire_id.as_u64[1]; + + if (clib_bihash_search_16_8 + (&dm->per_thread_data[thread_index].b4_hash, &b4_kv, &b4_value)) + { + pool_get (dm->per_thread_data[thread_index].b4s, b4); + memset (b4, 0, sizeof (*b4)); + b4->addr.as_u64[0] = in2out_key->softwire_id.as_u64[0]; + b4->addr.as_u64[1] = in2out_key->softwire_id.as_u64[1]; + + pool_get (dm->per_thread_data[thread_index].list_pool, head_elt); + b4->sessions_per_b4_list_head_index = + head_elt - dm->per_thread_data[thread_index].list_pool; + clib_dlist_init (dm->per_thread_data[thread_index].list_pool, + b4->sessions_per_b4_list_head_index); + + b4_kv.value = b4 - dm->per_thread_data[thread_index].b4s; + clib_bihash_add_del_16_8 (&dm->per_thread_data[thread_index].b4_hash, + &b4_kv, 1); + } + else + { + b4 = + pool_elt_at_index (dm->per_thread_data[thread_index].b4s, + b4_value.value); + } + + //TODO configurable quota + if (b4->nsessions >= 1000) + { + oldest_index = + clib_dlist_remove_head (dm->per_thread_data[thread_index].list_pool, + b4->sessions_per_b4_list_head_index); + ASSERT (oldest_index != ~0); + clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool, + b4->sessions_per_b4_list_head_index, oldest_index); + oldest_elt = + pool_elt_at_index (dm->per_thread_data[thread_index].list_pool, + oldest_index); + s = + pool_elt_at_index (dm->per_thread_data[thread_index].sessions, + oldest_elt->value); + + in2out_kv.key[0] = s->in2out.as_u64[0]; + in2out_kv.key[1] = s->in2out.as_u64[1]; + in2out_kv.key[2] = s->in2out.as_u64[2]; + clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out, + &in2out_kv, 0); + out2in_kv.key = s->out2in.as_u64; + clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in, + &out2in_kv, 0); + snat_free_outside_address_and_port (dm->addr_pool, thread_index, + &s->out2in, + s->outside_address_index); + s->outside_address_index = ~0; + + if (snat_alloc_outside_address_and_port + (dm->addr_pool, 0, thread_index, &out2in_key, + &s->outside_address_index, 0, dm->port_per_thread, thread_index)) + ASSERT (0); + } + else + { + if (snat_alloc_outside_address_and_port + (dm->addr_pool, 0, thread_index, &out2in_key, &address_index, 0, + dm->port_per_thread, thread_index)) + { + *error = DSLITE_ERROR_OUT_OF_PORTS; + return DSLITE_IN2OUT_NEXT_DROP; + } + pool_get (dm->per_thread_data[thread_index].sessions, s); + memset (s, 0, sizeof (*s)); + s->outside_address_index = address_index; + b4->nsessions++; + + pool_get (dm->per_thread_data[thread_index].list_pool, elt); + clib_dlist_init (dm->per_thread_data[thread_index].list_pool, + elt - dm->per_thread_data[thread_index].list_pool); + elt->value = s - dm->per_thread_data[thread_index].sessions; + s->per_b4_index = elt - dm->per_thread_data[thread_index].list_pool; + s->per_b4_list_head_index = b4->sessions_per_b4_list_head_index; + clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool, + s->per_b4_list_head_index, + elt - dm->per_thread_data[thread_index].list_pool); + } + + s->in2out = *in2out_key; + s->out2in = out2in_key; + *sp = s; + in2out_kv.key[0] = s->in2out.as_u64[0]; + in2out_kv.key[1] = s->in2out.as_u64[1]; + in2out_kv.key[2] = s->in2out.as_u64[2]; + in2out_kv.value = s - dm->per_thread_data[thread_index].sessions; + clib_bihash_add_del_24_8 (&dm->per_thread_data[thread_index].in2out, + &in2out_kv, 1); + out2in_kv.key = s->out2in.as_u64; + out2in_kv.value = s - dm->per_thread_data[thread_index].sessions; + clib_bihash_add_del_8_8 (&dm->per_thread_data[thread_index].out2in, + &out2in_kv, 1); + + return next; +} + +static inline u32 +dslite_icmp_in2out (dslite_main_t * dm, ip6_header_t * ip6, + ip4_header_t * ip4, dslite_session_t ** sp, u32 next, + u8 * error, u32 thread_index) +{ + dslite_session_t *s = 0; + icmp46_header_t *icmp = ip4_next_header (ip4); + clib_bihash_kv_24_8_t kv, value; + dslite_session_key_t key; + u32 n = next; + icmp_echo_header_t *echo; + u32 new_addr, old_addr; + u16 old_id, new_id; + ip_csum_t sum; + + if (icmp_is_error_message (icmp)) + { + n = DSLITE_IN2OUT_NEXT_DROP; + *error = DSLITE_ERROR_BAD_ICMP_TYPE; + goto done; + } + + echo = (icmp_echo_header_t *) (icmp + 1); + + key.addr = ip4->src_address; + key.port = echo->identifier; + key.proto = SNAT_PROTOCOL_ICMP; + key.softwire_id.as_u64[0] = ip6->src_address.as_u64[0]; + key.softwire_id.as_u64[1] = ip6->src_address.as_u64[1]; + key.pad = 0; + kv.key[0] = key.as_u64[0]; + kv.key[1] = key.as_u64[1]; + kv.key[2] = key.as_u64[2]; + + if (clib_bihash_search_24_8 + (&dm->per_thread_data[thread_index].in2out, &kv, &value)) + { + n = slow_path (dm, &key, &s, next, error, thread_index); + if (PREDICT_FALSE (next == DSLITE_IN2OUT_NEXT_DROP)) + goto done; + } + else + { + s = + pool_elt_at_index (dm->per_thread_data[thread_index].sessions, + value.value); + } + + old_addr = ip4->src_address.as_u32; + ip4->src_address = s->out2in.addr; + new_addr = ip4->src_address.as_u32; + sum = ip4->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); + ip4->checksum = ip_csum_fold (sum); + + old_id = echo->identifier; + echo->identifier = new_id = s->out2in.port; + sum = icmp->checksum; + sum = ip_csum_update (sum, old_id, new_id, icmp_echo_header_t, identifier); + icmp->checksum = ip_csum_fold (sum); + +done: + *sp = s; + return n; +} + +static inline uword +dslite_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, u8 is_slow_path) +{ + u32 n_left_from, *from, *to_next; + dslite_in2out_next_t next_index; + u32 node_index; + vlib_node_runtime_t *error_node; + u32 thread_index = vlib_get_thread_index (); + f64 now = vlib_time_now (vm); + dslite_main_t *dm = &dslite_main; + + node_index = + is_slow_path ? dslite_in2out_slowpath_node. + index : dslite_in2out_node.index; + + error_node = vlib_node_get_runtime (vm, node_index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = DSLITE_IN2OUT_NEXT_IP4_LOOKUP; + ip4_header_t *ip40; + ip6_header_t *ip60; + u8 error0 = DSLITE_ERROR_IN2OUT; + u32 proto0; + dslite_session_t *s0 = 0; + clib_bihash_kv_24_8_t kv0, value0; + dslite_session_key_t key0; + udp_header_t *udp0; + tcp_header_t *tcp0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 old_port0, new_port0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip60 = vlib_buffer_get_current (b0); + + if (PREDICT_FALSE (ip60->protocol != IP_PROTOCOL_IP_IN_IP)) + { + error0 = DSLITE_ERROR_BAD_IP6_PROTOCOL; + next0 = DSLITE_IN2OUT_NEXT_DROP; + goto trace0; + } + + ip40 = vlib_buffer_get_current (b0) + sizeof (ip6_header_t); + proto0 = ip_proto_to_snat_proto (ip40->protocol); + + if (PREDICT_FALSE (proto0 == ~0)) + { + error0 = DSLITE_ERROR_UNSUPPORTED_PROTOCOL; + next0 = DSLITE_IN2OUT_NEXT_DROP; + goto trace0; + } + + udp0 = ip4_next_header (ip40); + tcp0 = (tcp_header_t *) udp0; + + if (is_slow_path) + { + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = + dslite_icmp_in2out (dm, ip60, ip40, &s0, next0, &error0, + thread_index); + if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP)) + goto trace0; + + goto accounting0; + } + } + else + { + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = DSLITE_IN2OUT_NEXT_SLOWPATH; + goto trace0; + } + } + + key0.addr = ip40->src_address; + key0.port = udp0->src_port; + key0.proto = proto0; + key0.softwire_id.as_u64[0] = ip60->src_address.as_u64[0]; + key0.softwire_id.as_u64[1] = ip60->src_address.as_u64[1]; + key0.pad = 0; + kv0.key[0] = key0.as_u64[0]; + kv0.key[1] = key0.as_u64[1]; + kv0.key[2] = key0.as_u64[2]; + + if (clib_bihash_search_24_8 + (&dm->per_thread_data[thread_index].in2out, &kv0, &value0)) + { + if (is_slow_path) + { + next0 = + slow_path (dm, &key0, &s0, next0, &error0, thread_index); + if (PREDICT_FALSE (next0 == DSLITE_IN2OUT_NEXT_DROP)) + goto trace0; + } + else + { + next0 = DSLITE_IN2OUT_NEXT_SLOWPATH; + goto trace0; + } + } + else + { + s0 = + pool_elt_at_index (dm->per_thread_data[thread_index].sessions, + value0.value); + } + + old_addr0 = ip40->src_address.as_u32; + ip40->src_address = s0->out2in.addr; + new_addr0 = ip40->src_address.as_u32; + sum0 = ip40->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + src_address); + ip40->checksum = ip_csum_fold (sum0); + if (PREDICT_TRUE (proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->src_port; + tcp0->src_port = s0->out2in.port; + new_port0 = tcp0->src_port; + + sum0 = tcp0->checksum; + sum0 = + ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, + dst_address); + sum0 = + ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, + length); + tcp0->checksum = ip_csum_fold (sum0); + } + else + { + old_port0 = udp0->src_port; + udp0->src_port = s0->out2in.port; + udp0->checksum = 0; + } + + accounting0: + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-B4 LRU list maintenance */ + clib_dlist_remove (dm->per_thread_data[thread_index].list_pool, + s0->per_b4_index); + clib_dlist_addtail (dm->per_thread_data[thread_index].list_pool, + s0->per_b4_list_head_index, s0->per_b4_index); + + ip40->tos = + (clib_net_to_host_u32 + (ip60->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >> + 20; + vlib_buffer_advance (b0, sizeof (ip6_header_t)); + + trace0: + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + dslite_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = + s0 - dm->per_thread_data[thread_index].sessions; + } + + b0->error = error_node->errors[error0]; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +dslite_in2out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dslite_in2out_node_fn_inline (vm, node, frame, 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dslite_in2out_node) = { + .function = dslite_in2out_node_fn, + .name = "dslite-in2out", + .vector_size = sizeof (u32), + .format_trace = format_dslite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (dslite_in2out_error_strings), + .error_strings = dslite_in2out_error_strings, + .n_next_nodes = DSLITE_IN2OUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [DSLITE_IN2OUT_NEXT_DROP] = "error-drop", + [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [DSLITE_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_node, dslite_in2out_node_fn); + +static uword +dslite_in2out_slowpath_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return dslite_in2out_node_fn_inline (vm, node, frame, 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = { + .function = dslite_in2out_slowpath_node_fn, + .name = "dslite-in2out-slowpath", + .vector_size = sizeof (u32), + .format_trace = format_dslite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (dslite_in2out_error_strings), + .error_strings = dslite_in2out_error_strings, + .n_next_nodes = DSLITE_IN2OUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [DSLITE_IN2OUT_NEXT_DROP] = "error-drop", + [DSLITE_IN2OUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [DSLITE_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + [DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (dslite_in2out_slowpath_node, + dslite_in2out_slowpath_node_fn); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |