diff options
author | Matus Fabian <matfabia@cisco.com> | 2016-10-19 06:17:52 -0700 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2016-11-28 11:35:22 +0000 |
commit | 475f055305cf904b1c1c0436654f2f3e1c4f3358 (patch) | |
tree | 0f6a205f837377289eb369caf1f083fa5610d6f2 /plugins/snat-plugin/snat/out2in.c | |
parent | a10f62b11e7a710fde628ae75fe5791e54caba0a (diff) |
snat: thread safe (VPP-443)
All traffic corresponding to a specific SANT user is handled by a CPU core.
in2out: Non-translated packets worker lookup by src address and VRF hash in
snat-in2out-worker-handoff node.
out2in: Translated packets worker lookup by dst address and port number hash in
snat-out2in-worker-handoff node.
Change-Id: Ia092a605689539469841d382588f3f486a29a769
Signed-off-by: Matus Fabian <matfabia@cisco.com>
Diffstat (limited to 'plugins/snat-plugin/snat/out2in.c')
-rw-r--r-- | plugins/snat-plugin/snat/out2in.c | 326 |
1 files changed, 276 insertions, 50 deletions
diff --git a/plugins/snat-plugin/snat/out2in.c b/plugins/snat-plugin/snat/out2in.c index 35edcc5f7b3..a4641d3a71e 100644 --- a/plugins/snat-plugin/snat/out2in.c +++ b/plugins/snat-plugin/snat/out2in.c @@ -16,6 +16,7 @@ #include <vlib/vlib.h> #include <vnet/vnet.h> #include <vnet/pg/pg.h> +#include <vnet/handoff.h> #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> @@ -32,6 +33,11 @@ typedef struct { u32 session_index; } snat_out2in_trace_t; +typedef struct { + u32 next_worker_index; + u8 do_handoff; +} snat_out2in_worker_handoff_trace_t; + /* packet trace format function */ static u8 * format_snat_out2in_trace (u8 * s, va_list * args) { @@ -55,9 +61,23 @@ static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args) return s; } +static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_worker_handoff_trace_t * t = + va_arg (*args, snat_out2in_worker_handoff_trace_t *); + char * m; + + m = t->do_handoff ? "next worker" : "same worker"; + s = format (s, "SNAT_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index); + + return s; +} vlib_node_registration_t snat_out2in_node; vlib_node_registration_t snat_out2in_fast_node; +vlib_node_registration_t snat_out2in_worker_handoff_node; #define foreach_snat_out2in_error \ _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ @@ -80,6 +100,7 @@ static char * snat_out2in_error_strings[] = { typedef enum { SNAT_OUT2IN_NEXT_DROP, + SNAT_OUT2IN_NEXT_LOOKUP, SNAT_OUT2IN_N_NEXT, } snat_out2in_next_t; @@ -102,7 +123,8 @@ create_session_for_static_mapping (snat_main_t *sm, vlib_buffer_t *b0, snat_session_key_t in2out, snat_session_key_t out2in, - vlib_node_runtime_t * node) + vlib_node_runtime_t * node, + u32 cpu_index) { snat_user_t *u; snat_user_key_t user_key; @@ -119,28 +141,35 @@ create_session_for_static_mapping (snat_main_t *sm, if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) { /* no, make a new one */ - pool_get (sm->users, u); + pool_get (sm->per_thread_data[cpu_index].users, u); memset (u, 0, sizeof (*u)); u->addr = in2out.addr; - pool_get (sm->list_pool, per_user_list_head_elt); + pool_get (sm->per_thread_data[cpu_index].list_pool, + per_user_list_head_elt); u->sessions_per_user_list_head_index = per_user_list_head_elt - - sm->list_pool; + sm->per_thread_data[cpu_index].list_pool; - clib_dlist_init (sm->list_pool, u->sessions_per_user_list_head_index); + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + u->sessions_per_user_list_head_index); - kv0.value = u - sm->users; + kv0.value = u - sm->per_thread_data[cpu_index].users; /* add user */ clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + + /* add non-traslated packets worker lookup */ + kv0.value = cpu_index; + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); } else { - u = pool_elt_at_index (sm->users, value0.value); + u = pool_elt_at_index (sm->per_thread_data[cpu_index].users, + value0.value); } - pool_get (sm->sessions, s); + pool_get (sm->per_thread_data[cpu_index].sessions, s); memset (s, 0, sizeof (*s)); s->outside_address_index = ~0; @@ -148,16 +177,22 @@ create_session_for_static_mapping (snat_main_t *sm, u->nstaticsessions++; /* Create list elts */ - pool_get (sm->list_pool, per_user_translation_list_elt); - clib_dlist_init (sm->list_pool, per_user_translation_list_elt - - sm->list_pool); - - per_user_translation_list_elt->value = s - sm->sessions; - s->per_user_index = per_user_translation_list_elt - sm->list_pool; + pool_get (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt); + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); + + per_user_translation_list_elt->value = + s - sm->per_thread_data[cpu_index].sessions; + s->per_user_index = + per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool; s->per_user_list_head_index = u->sessions_per_user_list_head_index; - clib_dlist_addtail (sm->list_pool, s->per_user_list_head_index, - per_user_translation_list_elt - sm->list_pool); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s->per_user_list_head_index, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); s->in2out = in2out; s->out2in = out2in; @@ -165,12 +200,12 @@ create_session_for_static_mapping (snat_main_t *sm, /* Add to translation hashes */ kv0.key = s->in2out.as_u64; - kv0.value = s - sm->sessions; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; - kv0.value = s - sm->sessions; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) clib_warning ("out2in key add failed"); @@ -185,7 +220,8 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm, u32 sw_if_index0, u32 rx_fib_index0, vlib_node_runtime_t * node, - u32 next0, f64 now) + u32 next0, f64 now, + u32 cpu_index) { snat_session_key_t key0, sm0; icmp_echo_header_t *echo0; @@ -233,12 +269,13 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm, /* Create session initiated by host from external network */ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, - node); + node, cpu_index); if (!s0) return SNAT_OUT2IN_NEXT_DROP; } else - s0 = pool_elt_at_index (sm->sessions, value0.value); + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); old_addr0 = ip0->dst_address.as_u32; ip0->dst_address = s0->in2out.addr; @@ -267,8 +304,10 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->list_pool, s0->per_user_index); - clib_dlist_addtail (sm->list_pool, s0->per_user_list_head_index, + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, s0->per_user_index); } @@ -285,6 +324,7 @@ snat_out2in_node_fn (vlib_main_t * vm, u32 pkts_processed = 0; snat_main_t * sm = &snat_main; f64 now = vlib_time_now (vm); + u32 cpu_index = os_get_cpu_number (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -301,8 +341,8 @@ snat_out2in_node_fn (vlib_main_t * vm, { u32 bi0, bi1; vlib_buffer_t * b0, * b1; - u32 next0 = SNAT_OUT2IN_NEXT_DROP; - u32 next1 = SNAT_OUT2IN_NEXT_DROP; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP; u32 sw_if_index0, sw_if_index1; ip4_header_t * ip0, *ip1; ip_csum_t sum0, sum1; @@ -353,7 +393,6 @@ snat_out2in_node_fn (vlib_main_t * vm, rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, sw_if_index0); - vnet_feature_next (sw_if_index0, &next0, b0); proto0 = ~0; proto0 = (ip0->protocol == IP_PROTOCOL_UDP) ? SNAT_PROTOCOL_UDP : proto0; @@ -369,7 +408,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { next0 = icmp_out2in_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now); + next0, now, cpu_index); goto trace0; } @@ -391,12 +430,14 @@ snat_out2in_node_fn (vlib_main_t * vm, } /* Create session initiated by host from external network */ - s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node); + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, + cpu_index); if (!s0) goto trace0; } else - s0 = pool_elt_at_index (sm->sessions, value0.value); + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); old_addr0 = ip0->dst_address.as_u32; ip0->dst_address = s0->in2out.addr; @@ -439,8 +480,10 @@ snat_out2in_node_fn (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->list_pool, s0->per_user_index); - clib_dlist_addtail (sm->list_pool, s0->per_user_list_head_index, + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, s0->per_user_index); } trace0: @@ -454,7 +497,7 @@ snat_out2in_node_fn (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->sessions; + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; } pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; @@ -469,8 +512,6 @@ snat_out2in_node_fn (vlib_main_t * vm, rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, sw_if_index1); - vnet_feature_next (sw_if_index1, &next1, b1); - proto1 = ~0; proto1 = (ip1->protocol == IP_PROTOCOL_UDP) ? SNAT_PROTOCOL_UDP : proto1; @@ -486,7 +527,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { next1 = icmp_out2in_slow_path (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, - next1, now); + next1, now, cpu_index); goto trace1; } @@ -508,12 +549,14 @@ snat_out2in_node_fn (vlib_main_t * vm, } /* Create session initiated by host from external network */ - s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node); + s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node, + cpu_index); if (!s1) goto trace1; } else - s1 = pool_elt_at_index (sm->sessions, value1.value); + s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value1.value); old_addr1 = ip1->dst_address.as_u32; ip1->dst_address = s1->in2out.addr; @@ -556,8 +599,10 @@ snat_out2in_node_fn (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s1)) { - clib_dlist_remove (sm->list_pool, s1->per_user_index); - clib_dlist_addtail (sm->list_pool, s1->per_user_list_head_index, + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_list_head_index, s1->per_user_index); } trace1: @@ -571,10 +616,9 @@ snat_out2in_node_fn (vlib_main_t * vm, t->next_index = next1; t->session_index = ~0; if (s1) - t->session_index = s1 - sm->sessions; + t->session_index = s1 - sm->per_thread_data[cpu_index].sessions; } - pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP; /* verify speculative enqueues, maybe switch current next frame */ @@ -587,7 +631,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { u32 bi0; vlib_buffer_t * b0; - u32 next0 = SNAT_OUT2IN_NEXT_DROP; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; u32 sw_if_index0; ip4_header_t * ip0; ip_csum_t sum0; @@ -621,8 +665,6 @@ snat_out2in_node_fn (vlib_main_t * vm, rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, sw_if_index0); - vnet_feature_next (sw_if_index0, &next0, b0); - proto0 = ~0; proto0 = (ip0->protocol == IP_PROTOCOL_UDP) ? SNAT_PROTOCOL_UDP : proto0; @@ -638,7 +680,7 @@ snat_out2in_node_fn (vlib_main_t * vm, { next0 = icmp_out2in_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, now); + next0, now, cpu_index); goto trace00; } @@ -660,12 +702,14 @@ snat_out2in_node_fn (vlib_main_t * vm, } /* Create session initiated by host from external network */ - s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node); + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, + cpu_index); if (!s0) goto trace00; } else - s0 = pool_elt_at_index (sm->sessions, value0.value); + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); old_addr0 = ip0->dst_address.as_u32; ip0->dst_address = s0->in2out.addr; @@ -708,8 +752,10 @@ snat_out2in_node_fn (vlib_main_t * vm, /* Per-user LRU list maintenance for dynamic translation */ if (!snat_is_session_static (s0)) { - clib_dlist_remove (sm->list_pool, s0->per_user_index); - clib_dlist_addtail (sm->list_pool, s0->per_user_list_head_index, + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, s0->per_user_index); } trace00: @@ -723,7 +769,7 @@ snat_out2in_node_fn (vlib_main_t * vm, t->next_index = next0; t->session_index = ~0; if (s0) - t->session_index = s0 - sm->sessions; + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; } pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; @@ -760,10 +806,189 @@ VLIB_REGISTER_NODE (snat_out2in_node) = { /* edit / add dispositions here */ .next_nodes = { [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", }, }; VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn); +static uword +snat_out2in_worker_handoff_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + snat_main_t *sm = &snat_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from, *to_next = 0; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + vlib_frame_t *f = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 cpu_index = os_get_cpu_number (); + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + sm->first_worker_index + sm->num_workers - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 sw_if_index0; + u32 rx_fib_index0; + ip4_header_t * ip0; + udp_header_t * udp0; + snat_static_mapping_key_t key0; + clib_bihash_kv_8_8_t kv0, value0; + u8 do_handoff; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + /* Ever heard of of the "user" before? */ + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + { + /* No, assign next available worker (RR) */ + next_worker_index = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + + /* Add to translated packets worker lookup */ + kv0.value = next_worker_index; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); + } + else + next_worker_index = value0.value; + + if (PREDICT_FALSE (next_worker_index != cpu_index)) + { + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + } + else + { + do_handoff = 0; + /* if this is 1st frame */ + if (!f) + { + f = vlib_get_frame_to_node (vm, snat_out2in_node.index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next += 1; + f->n_vectors++; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_worker_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_worker_index = next_worker_index; + t->do_handoff = do_handoff; + } + } + + if (f) + vlib_put_frame_to_node (vm, snat_out2in_node.index, f); + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = { + .function = snat_out2in_worker_handoff_fn, + .name = "snat-out2in-worker-handoff", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_worker_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn); + static inline u32 icmp_out2in_fast (snat_main_t *sm, vlib_buffer_t * b0, ip4_header_t * ip0, @@ -1014,6 +1239,7 @@ VLIB_REGISTER_NODE (snat_out2in_fast_node) = { /* edit / add dispositions here */ .next_nodes = { + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", [SNAT_OUT2IN_NEXT_DROP] = "error-drop", }, }; |