diff options
author | Neale Ranns <nranns@cisco.com> | 2020-05-19 07:17:19 +0000 |
---|---|---|
committer | Andrew Yourtchenko <ayourtch@gmail.com> | 2020-08-31 09:23:32 +0000 |
commit | 29f3c7d2ecac2f9d80bb33e91bd5d1f9d434768a (patch) | |
tree | 66d7c69f2c24959ef4f6ef67b7c56dba11d8be29 /src/plugins/cnat/cnat_node.h | |
parent | 133c91c1c06e7c773ba675181901ba0dcf955ae6 (diff) |
cnat: Destination based NAT
Type: feature
Signed-off-by: Neale Ranns <nranns@cisco.com>
Change-Id: I64a99a4fbc674212944247793fd5c1fb701408cb
Diffstat (limited to 'src/plugins/cnat/cnat_node.h')
-rw-r--r-- | src/plugins/cnat/cnat_node.h | 535 |
1 files changed, 535 insertions, 0 deletions
diff --git a/src/plugins/cnat/cnat_node.h b/src/plugins/cnat/cnat_node.h new file mode 100644 index 00000000000..58e81c12b45 --- /dev/null +++ b/src/plugins/cnat/cnat_node.h @@ -0,0 +1,535 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CNAT_NODE_H__ +#define __CNAT_NODE_H__ + +#include <vlibmemory/api.h> +#include <cnat/cnat_session.h> +#include <cnat/cnat_client.h> + +typedef uword (*cnat_node_sub_t) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_buffer_t * b, + cnat_node_ctx_t * ctx, int rv, + cnat_session_t * session); + +/** + * Inline translation functions + */ + +static_always_inline u8 +has_ip6_address (ip6_address_t * a) +{ + return ((0 != a->as_u64[0]) || (0 != a->as_u64[1])); +} + +static_always_inline void +cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp, + u16 * checksum, + ip4_address_t new_addr[VLIB_N_DIR], + u16 new_port[VLIB_N_DIR]) +{ + u16 old_port[VLIB_N_DIR]; + ip4_address_t old_addr[VLIB_N_DIR]; + ip_csum_t sum; + + old_port[VLIB_TX] = udp->dst_port; + old_port[VLIB_RX] = udp->src_port; + old_addr[VLIB_TX] = ip4->dst_address; + old_addr[VLIB_RX] = ip4->src_address; + + sum = *checksum; + if (new_addr[VLIB_TX].as_u32) + sum = + ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32, + ip4_header_t, dst_address); + if (new_port[VLIB_TX]) + { + udp->dst_port = new_port[VLIB_TX]; + sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } + if (new_addr[VLIB_RX].as_u32) + sum = + ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32, + ip4_header_t, src_address); + + if (new_port[VLIB_RX]) + { + udp->src_port = new_port[VLIB_RX]; + sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } + *checksum = ip_csum_fold (sum); +} + +static_always_inline void +cnat_ip4_translate_l3 (ip4_header_t * ip4, ip4_address_t new_addr[VLIB_N_DIR]) +{ + ip4_address_t old_addr[VLIB_N_DIR]; + ip_csum_t sum; + + old_addr[VLIB_TX] = ip4->dst_address; + old_addr[VLIB_RX] = ip4->src_address; + + sum = ip4->checksum; + if (new_addr[VLIB_TX].as_u32) + { + ip4->dst_address = new_addr[VLIB_TX]; + sum = + ip_csum_update (sum, old_addr[VLIB_TX].as_u32, + new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address); + } + if (new_addr[VLIB_RX].as_u32) + { + ip4->src_address = new_addr[VLIB_RX]; + sum = + ip_csum_update (sum, old_addr[VLIB_RX].as_u32, + new_addr[VLIB_RX].as_u32, ip4_header_t, src_address); + } + ip4->checksum = ip_csum_fold (sum); +} + +static_always_inline void +cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index) +{ + cnat_main_t *cm = &cnat_main; + if (PREDICT_FALSE (tcp_fin (tcp))) + { + cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT); + } + + if (PREDICT_FALSE (tcp_rst (tcp))) + { + cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT); + } + + if (PREDICT_FALSE (tcp_syn (tcp) && tcp_ack (tcp))) + { + cnat_timestamp_set_lifetime (index, cm->tcp_max_age); + } +} + +static_always_inline void +cnat_translation_ip4 (const cnat_session_t * session, + ip4_header_t * ip4, udp_header_t * udp) +{ + tcp_header_t *tcp = (tcp_header_t *) udp; + ip4_address_t new_addr[VLIB_N_DIR]; + u16 new_port[VLIB_N_DIR]; + + new_addr[VLIB_TX] = session->value.cs_ip[VLIB_TX].ip4; + new_addr[VLIB_RX] = session->value.cs_ip[VLIB_RX].ip4; + new_port[VLIB_TX] = session->value.cs_port[VLIB_TX]; + new_port[VLIB_RX] = session->value.cs_port[VLIB_RX]; + + if (ip4->protocol == IP_PROTOCOL_TCP) + { + if (PREDICT_FALSE (tcp->checksum)) + cnat_ip4_translate_l4 (ip4, udp, &tcp->checksum, new_addr, new_port); + else + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + } + cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index); + } + else if (ip4->protocol == IP_PROTOCOL_UDP) + { + if (PREDICT_FALSE (udp->checksum)) + cnat_ip4_translate_l4 (ip4, udp, &udp->checksum, new_addr, new_port); + else + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + } + } + + cnat_ip4_translate_l3 (ip4, new_addr); +} + +static_always_inline void +cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR]) +{ + if (has_ip6_address (&new_addr[VLIB_TX])) + ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]); + if (has_ip6_address (&new_addr[VLIB_RX])) + ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]); +} + +static_always_inline void +cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp, + u16 * checksum, + ip6_address_t new_addr[VLIB_N_DIR], + u16 new_port[VLIB_N_DIR]) +{ + u16 old_port[VLIB_N_DIR]; + ip6_address_t old_addr[VLIB_N_DIR]; + ip_csum_t sum; + + old_port[VLIB_TX] = udp->dst_port; + old_port[VLIB_RX] = udp->src_port; + ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address); + ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address); + + sum = *checksum; + if (has_ip6_address (&new_addr[VLIB_TX])) + { + sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]); + sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]); + sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]); + sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]); + } + + if (new_port[VLIB_TX]) + { + udp->dst_port = new_port[VLIB_TX]; + sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } + if (has_ip6_address (&new_addr[VLIB_RX])) + { + sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]); + sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]); + sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]); + sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]); + } + + if (new_port[VLIB_RX]) + { + udp->src_port = new_port[VLIB_RX]; + sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX], + ip4_header_t /* cheat */ , + length /* changed member */ ); + } + *checksum = ip_csum_fold (sum); +} + +static_always_inline void +cnat_translation_ip6 (const cnat_session_t * session, + ip6_header_t * ip6, udp_header_t * udp) +{ + tcp_header_t *tcp = (tcp_header_t *) udp; + ip6_address_t new_addr[VLIB_N_DIR]; + u16 new_port[VLIB_N_DIR]; + + ip6_address_copy (&new_addr[VLIB_TX], &session->value.cs_ip[VLIB_TX].ip6); + ip6_address_copy (&new_addr[VLIB_RX], &session->value.cs_ip[VLIB_RX].ip6); + new_port[VLIB_TX] = session->value.cs_port[VLIB_TX]; + new_port[VLIB_RX] = session->value.cs_port[VLIB_RX]; + + if (ip6->protocol == IP_PROTOCOL_TCP) + { + if (PREDICT_FALSE (tcp->checksum)) + cnat_ip6_translate_l4 (ip6, udp, &tcp->checksum, new_addr, new_port); + else + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + } + cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index); + } + else if (ip6->protocol == IP_PROTOCOL_UDP) + { + if (PREDICT_FALSE (udp->checksum)) + cnat_ip6_translate_l4 (ip6, udp, &udp->checksum, new_addr, new_port); + else + { + udp->dst_port = new_port[VLIB_TX]; + udp->src_port = new_port[VLIB_RX]; + } + } + + cnat_ip6_translate_l3 (ip6, new_addr); +} + +static_always_inline void +cnat_session_make_key (vlib_buffer_t * b, ip_address_family_t af, + clib_bihash_kv_40_48_t * bkey) +{ + udp_header_t *udp; + cnat_session_t *session = (cnat_session_t *) bkey; + if (AF_IP4 == af) + { + ip4_header_t *ip4; + ip4 = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip4 + 1); + session->key.cs_af = AF_IP4; + session->key.__cs_pad[0] = 0; + session->key.__cs_pad[1] = 0; + + ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX], &ip4->dst_address); + ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX], &ip4->src_address); + session->key.cs_port[VLIB_RX] = udp->src_port; + session->key.cs_port[VLIB_TX] = udp->dst_port; + session->key.cs_proto = ip4->protocol; + } + else + { + ip6_header_t *ip6; + ip6 = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip6 + 1); + session->key.cs_af = AF_IP6; + session->key.__cs_pad[0] = 0; + session->key.__cs_pad[1] = 0; + + ip46_address_set_ip6 (&session->key.cs_ip[VLIB_TX], &ip6->dst_address); + ip46_address_set_ip6 (&session->key.cs_ip[VLIB_RX], &ip6->src_address); + session->key.cs_port[VLIB_RX] = udp->src_port; + session->key.cs_port[VLIB_TX] = udp->dst_port; + session->key.cs_proto = ip6->protocol; + } +} + +/** + * Create NAT sessions + */ + +static_always_inline void +cnat_session_create (cnat_session_t * session, cnat_node_ctx_t * ctx, + u8 rsession_flags) +{ + cnat_client_t *cc; + clib_bihash_kv_40_48_t rkey; + cnat_session_t *rsession = (cnat_session_t *) & rkey; + clib_bihash_kv_40_48_t *bkey = (clib_bihash_kv_40_48_t *) session; + clib_bihash_kv_40_48_t rvalue; + int rv; + + /* create the reverse flow key */ + ip46_address_copy (&rsession->key.cs_ip[VLIB_RX], + &session->value.cs_ip[VLIB_TX]); + ip46_address_copy (&rsession->key.cs_ip[VLIB_TX], + &session->value.cs_ip[VLIB_RX]); + rsession->key.cs_proto = session->key.cs_proto; + rsession->key.__cs_pad[0] = 0; + rsession->key.__cs_pad[1] = 0; + rsession->key.cs_af = ctx->af; + rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX]; + rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX]; + + /* First search for existing reverse session */ + rv = clib_bihash_search_inline_2_40_48 (&cnat_session_db, &rkey, &rvalue); + if (!rv) + { + /* Reverse session already exists + corresponding client should also exist + we only need to refcnt the timestamp */ + cnat_session_t *found_rsession = (cnat_session_t *) & rvalue; + session->value.cs_ts_index = found_rsession->value.cs_ts_index; + cnat_timestamp_inc_refcnt (session->value.cs_ts_index); + clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1 /* is_add */ ); + goto create_rsession; + } + + session->value.cs_ts_index = cnat_timestamp_new (ctx->now); + clib_bihash_add_del_40_48 (&cnat_session_db, bkey, 1); + + /* is this the first time we've seen this source address */ + cc = (AF_IP4 == ctx->af ? + cnat_client_ip4_find (&session->value.cs_ip[VLIB_RX].ip4) : + cnat_client_ip6_find (&session->value.cs_ip[VLIB_RX].ip6)); + + if (NULL == cc) + { + u64 r0 = 17; + if (AF_IP4 == ctx->af) + r0 = (u64) session->value.cs_ip[VLIB_RX].ip4.as_u32; + else + { + r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[0]; + r0 = r0 * 31 + session->value.cs_ip[VLIB_RX].ip6.as_u64[1]; + } + + /* Rate limit */ + if (!throttle_check (&cnat_throttle, ctx->thread_index, r0, ctx->seed)) + { + cnat_learn_arg_t l; + l.addr.version = ctx->af; + ip46_address_copy (&l.addr.ip, &session->value.cs_ip[VLIB_RX]); + /* fire client create to the main thread */ + vl_api_rpc_call_main_thread (cnat_client_learn, + (u8 *) & l, sizeof (l)); + } + else + { + /* Will still need to count those for session refcnt */ + ip_address_t *addr; + clib_spinlock_lock (&cnat_client_db.throttle_pool_lock + [ctx->thread_index]); + pool_get (cnat_client_db.throttle_pool[ctx->thread_index], addr); + addr->version = ctx->af; + ip46_address_copy (&addr->ip, &session->value.cs_ip[VLIB_RX]); + clib_spinlock_unlock (&cnat_client_db.throttle_pool_lock + [ctx->thread_index]); + } + } + else + { + cnat_client_cnt_session (cc); + } + +create_rsession: + /* add the reverse flow */ + ip46_address_copy (&rsession->value.cs_ip[VLIB_RX], + &session->key.cs_ip[VLIB_TX]); + ip46_address_copy (&rsession->value.cs_ip[VLIB_TX], + &session->key.cs_ip[VLIB_RX]); + rsession->value.cs_ts_index = session->value.cs_ts_index; + rsession->value.cs_lbi = INDEX_INVALID; + rsession->value.flags = rsession_flags; + rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX]; + rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX]; + + clib_bihash_add_del_40_48 (&cnat_session_db, &rkey, 1); +} + +always_inline uword +cnat_node_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + cnat_node_sub_t cnat_sub, + ip_address_family_t af, u8 do_trace) +{ + u32 n_left, *from, thread_index; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE]; + vlib_buffer_t **b = bufs; + u16 nexts[VLIB_FRAME_SIZE], *next; + f64 now; + u64 seed; + + thread_index = vm->thread_index; + from = vlib_frame_vector_args (frame); + n_left = frame->n_vectors; + next = nexts; + vlib_get_buffers (vm, from, bufs, n_left); + now = vlib_time_now (vm); + seed = throttle_seed (&cnat_throttle, thread_index, vlib_time_now (vm)); + cnat_session_t *session[4]; + clib_bihash_kv_40_48_t bkey[4], bvalue[4]; + u64 hash[4]; + int rv[4]; + + cnat_node_ctx_t ctx = { now, seed, thread_index, af, do_trace }; + + if (n_left >= 8) + { + /* Kickstart our state */ + cnat_session_make_key (b[3], af, &bkey[3]); + cnat_session_make_key (b[2], af, &bkey[2]); + cnat_session_make_key (b[1], af, &bkey[1]); + cnat_session_make_key (b[0], af, &bkey[0]); + + hash[3] = clib_bihash_hash_40_48 (&bkey[3]); + hash[2] = clib_bihash_hash_40_48 (&bkey[2]); + hash[1] = clib_bihash_hash_40_48 (&bkey[1]); + hash[0] = clib_bihash_hash_40_48 (&bkey[0]); + } + + while (n_left >= 8) + { + if (n_left >= 12) + { + vlib_prefetch_buffer_header (b[11], LOAD); + vlib_prefetch_buffer_header (b[10], LOAD); + vlib_prefetch_buffer_header (b[9], LOAD); + vlib_prefetch_buffer_header (b[8], LOAD); + } + + rv[3] = + clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db, + hash[3], &bkey[3], + &bvalue[3]); + session[3] = (cnat_session_t *) (rv[3] ? &bkey[3] : &bvalue[3]); + next[3] = cnat_sub (vm, node, b[3], &ctx, rv[3], session[3]); + + rv[2] = + clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db, + hash[2], &bkey[2], + &bvalue[2]); + session[2] = (cnat_session_t *) (rv[2] ? &bkey[2] : &bvalue[2]); + next[2] = cnat_sub (vm, node, b[2], &ctx, rv[2], session[2]); + + rv[1] = + clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db, + hash[1], &bkey[1], + &bvalue[1]); + session[1] = (cnat_session_t *) (rv[1] ? &bkey[1] : &bvalue[1]); + next[1] = cnat_sub (vm, node, b[1], &ctx, rv[1], session[1]); + + rv[0] = + clib_bihash_search_inline_2_with_hash_40_48 (&cnat_session_db, + hash[0], &bkey[0], + &bvalue[0]); + session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]); + next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]); + + cnat_session_make_key (b[7], af, &bkey[3]); + cnat_session_make_key (b[6], af, &bkey[2]); + cnat_session_make_key (b[5], af, &bkey[1]); + cnat_session_make_key (b[4], af, &bkey[0]); + + hash[3] = clib_bihash_hash_40_48 (&bkey[3]); + hash[2] = clib_bihash_hash_40_48 (&bkey[2]); + hash[1] = clib_bihash_hash_40_48 (&bkey[1]); + hash[0] = clib_bihash_hash_40_48 (&bkey[0]); + + clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[3]); + clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[2]); + clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[1]); + clib_bihash_prefetch_bucket_40_48 (&cnat_session_db, hash[0]); + + clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[3]); + clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[2]); + clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[1]); + clib_bihash_prefetch_data_40_48 (&cnat_session_db, hash[0]); + + b += 4; + next += 4; + n_left -= 4; + } + + while (n_left > 0) + { + cnat_session_make_key (b[0], af, &bkey[0]); + rv[0] = clib_bihash_search_inline_2_40_48 (&cnat_session_db, + &bkey[0], &bvalue[0]); + + session[0] = (cnat_session_t *) (rv[0] ? &bkey[0] : &bvalue[0]); + next[0] = cnat_sub (vm, node, b[0], &ctx, rv[0], session[0]); + + b++; + next++; + n_left--; + } + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + + return frame->n_vectors; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif |