summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatus Fabian <matfabia@cisco.com>2016-10-19 06:17:52 -0700
committerDamjan Marion <dmarion.lists@gmail.com>2016-11-28 11:35:22 +0000
commit475f055305cf904b1c1c0436654f2f3e1c4f3358 (patch)
tree0f6a205f837377289eb369caf1f083fa5610d6f2
parenta10f62b11e7a710fde628ae75fe5791e54caba0a (diff)
snat: thread safe (VPP-443)
All traffic corresponding to a specific SANT user is handled by a CPU core. in2out: Non-translated packets worker lookup by src address and VRF hash in snat-in2out-worker-handoff node. out2in: Translated packets worker lookup by dst address and port number hash in snat-out2in-worker-handoff node. Change-Id: Ia092a605689539469841d382588f3f486a29a769 Signed-off-by: Matus Fabian <matfabia@cisco.com>
-rw-r--r--plugins/snat-plugin/snat/in2out.c350
-rw-r--r--plugins/snat-plugin/snat/out2in.c326
-rw-r--r--plugins/snat-plugin/snat/snat.api43
-rw-r--r--plugins/snat-plugin/snat/snat.c387
-rw-r--r--plugins/snat-plugin/snat/snat.h40
-rw-r--r--plugins/snat-plugin/snat/snat_test.c74
-rw-r--r--vnet/etc/scripts/snat34
7 files changed, 1066 insertions, 188 deletions
diff --git a/plugins/snat-plugin/snat/in2out.c b/plugins/snat-plugin/snat/in2out.c
index 9a4aeb01..e1edbb81 100644
--- a/plugins/snat-plugin/snat/in2out.c
+++ b/plugins/snat-plugin/snat/in2out.c
@@ -16,6 +16,7 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
+#include <vnet/handoff.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
@@ -33,6 +34,11 @@ typedef struct {
u32 is_slow_path;
} snat_in2out_trace_t;
+typedef struct {
+ u32 next_worker_index;
+ u8 do_handoff;
+} snat_in2out_worker_handoff_trace_t;
+
/* packet trace format function */
static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
{
@@ -61,9 +67,24 @@ static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
return s;
}
+static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_in2out_worker_handoff_trace_t * t =
+ va_arg (*args, snat_in2out_worker_handoff_trace_t *);
+ char * m;
+
+ m = t->do_handoff ? "next worker" : "same worker";
+ s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
+
+ return s;
+}
+
vlib_node_registration_t snat_in2out_node;
vlib_node_registration_t snat_in2out_slowpath_node;
vlib_node_registration_t snat_in2out_fast_node;
+vlib_node_registration_t snat_in2out_worker_handoff_node;
#define foreach_snat_in2out_error \
_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
@@ -93,14 +114,14 @@ typedef enum {
SNAT_IN2OUT_N_NEXT,
} snat_in2out_next_t;
-
static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
ip4_header_t * ip0,
u32 rx_fib_index0,
snat_session_key_t * key0,
snat_session_t ** sessionp,
vlib_node_runtime_t * node,
- u32 next0)
+ u32 next0,
+ u32 cpu_index)
{
snat_user_t *u;
snat_user_key_t user_key;
@@ -115,6 +136,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
u32 address_index = ~0;
u32 outside_fib_index;
uword * p;
+ snat_static_mapping_key_t worker_by_out_key;
p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
if (! p)
@@ -132,25 +154,27 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
{
/* no, make a new one */
- pool_get (sm->users, u);
+ pool_get (sm->per_thread_data[cpu_index].users, u);
memset (u, 0, sizeof (*u));
u->addr = ip0->src_address;
- pool_get (sm->list_pool, per_user_list_head_elt);
+ pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
u->sessions_per_user_list_head_index = per_user_list_head_elt -
- sm->list_pool;
+ sm->per_thread_data[cpu_index].list_pool;
- clib_dlist_init (sm->list_pool, u->sessions_per_user_list_head_index);
+ clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+ u->sessions_per_user_list_head_index);
- kv0.value = u - sm->users;
+ kv0.value = u - sm->per_thread_data[cpu_index].users;
/* add user */
clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
}
else
{
- u = pool_elt_at_index (sm->users, value0.value);
+ u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+ value0.value);
}
/* Over quota? Recycle the least recently used dynamic translation */
@@ -159,25 +183,26 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
/* Remove the oldest dynamic translation */
do {
oldest_per_user_translation_list_index =
- clib_dlist_remove_head
- (sm->list_pool, u->sessions_per_user_list_head_index);
+ clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
+ u->sessions_per_user_list_head_index);
ASSERT (oldest_per_user_translation_list_index != ~0);
/* add it back to the end of the LRU list */
- clib_dlist_addtail (sm->list_pool,
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
u->sessions_per_user_list_head_index,
oldest_per_user_translation_list_index);
/* Get the list element */
oldest_per_user_translation_list_elt =
- pool_elt_at_index (sm->list_pool,
+ pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
oldest_per_user_translation_list_index);
/* Get the session index from the list element */
session_index = oldest_per_user_translation_list_elt->value;
/* Get the session */
- s = pool_elt_at_index (sm->sessions, session_index);
+ s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ session_index);
} while (!snat_is_session_static (s));
/* Remove in2out, out2in keys */
@@ -218,7 +243,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
}
/* Create a new session */
- pool_get (sm->sessions, s);
+ pool_get (sm->per_thread_data[cpu_index].sessions, s);
memset (s, 0, sizeof (*s));
s->outside_address_index = address_index;
@@ -234,16 +259,22 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
}
/* Create list elts */
- pool_get (sm->list_pool, per_user_translation_list_elt);
- clib_dlist_init (sm->list_pool, per_user_translation_list_elt -
- sm->list_pool);
-
- per_user_translation_list_elt->value = s - sm->sessions;
- s->per_user_index = per_user_translation_list_elt - sm->list_pool;
+ pool_get (sm->per_thread_data[cpu_index].list_pool,
+ per_user_translation_list_elt);
+ clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+ per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool);
+
+ per_user_translation_list_elt->value =
+ s - sm->per_thread_data[cpu_index].sessions;
+ s->per_user_index = per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool;
s->per_user_list_head_index = u->sessions_per_user_list_head_index;
- clib_dlist_addtail (sm->list_pool, s->per_user_list_head_index,
- per_user_translation_list_elt - sm->list_pool);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s->per_user_list_head_index,
+ per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool);
}
s->in2out = *key0;
@@ -254,16 +285,23 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
/* Add to translation hashes */
kv0.key = s->in2out.as_u64;
- kv0.value = s - sm->sessions;
+ kv0.value = s - sm->per_thread_data[cpu_index].sessions;
if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
clib_warning ("in2out key add failed");
kv0.key = s->out2in.as_u64;
- kv0.value = s - sm->sessions;
+ kv0.value = s - sm->per_thread_data[cpu_index].sessions;
if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
clib_warning ("out2in key add failed");
+ /* Add to translated packets worker lookup */
+ worker_by_out_key.addr = s->out2in.addr;
+ worker_by_out_key.port = s->out2in.port;
+ worker_by_out_key.fib_index = s->out2in.fib_index;
+ kv0.key = worker_by_out_key.as_u64;
+ kv0.value = cpu_index;
+ clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
return next0;
}
@@ -275,7 +313,8 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
u32 rx_fib_index0,
vlib_node_runtime_t * node,
u32 next0,
- f64 now)
+ f64 now,
+ u32 cpu_index)
{
snat_session_key_t key0;
icmp_echo_header_t *echo0;
@@ -320,13 +359,14 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
return next0;
next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
- &s0, node, next0);
+ &s0, node, next0, cpu_index);
if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
return next0;
}
else
- s0 = pool_elt_at_index (sm->sessions, value0.value);
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
old_addr0 = ip0->src_address.as_u32;
ip0->src_address = s0->out2in.addr;
@@ -355,8 +395,10 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
/* Per-user LRU list maintenance for dynamic translations */
if (!snat_is_session_static (s0))
{
- clib_dlist_remove (sm->list_pool, s0->per_user_index);
- clib_dlist_addtail (sm->list_pool, s0->per_user_list_head_index,
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
s0->per_user_index);
}
@@ -375,6 +417,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
f64 now = vlib_time_now (vm);
u32 stats_node_index;
+ u32 cpu_index = os_get_cpu_number ();
stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
snat_in2out_node.index;
@@ -445,14 +488,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
-#if 0
- /* Formally correct, but we send to slowpath, lookup or drop */
- vnet_get_config_data (&cm->config_main,
- &b0->current_config_index,
- &next0,
- 0 /* sizeof config data */);
-#endif
-
proto0 = ~0;
proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
? SNAT_PROTOCOL_UDP : proto0;
@@ -471,7 +506,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
{
next0 = icmp_in2out_slow_path
(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
- node, next0, now);
+ node, next0, now, cpu_index);
goto trace00;
}
}
@@ -512,7 +547,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
goto trace00;
next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
- &s0, node, next0);
+ &s0, node, next0, cpu_index);
if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
goto trace00;
}
@@ -523,7 +558,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
}
}
else
- s0 = pool_elt_at_index (sm->sessions, value0.value);
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
old_addr0 = ip0->src_address.as_u32;
ip0->src_address = s0->out2in.addr;
@@ -565,8 +601,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
/* Per-user LRU list maintenance for dynamic translation */
if (!snat_is_session_static (s0))
{
- clib_dlist_remove (sm->list_pool, s0->per_user_index);
- clib_dlist_addtail (sm->list_pool, s0->per_user_list_head_index,
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
s0->per_user_index);
}
trace00:
@@ -581,7 +619,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
t->next_index = next0;
t->session_index = ~0;
if (s0)
- t->session_index = s0 - sm->sessions;
+ t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
}
pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
@@ -595,13 +633,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
sw_if_index1);
-#if 0
- vnet_get_config_data (&cm->config_main,
- &b1->current_config_index,
- &next1,
- 0 /* sizeof config data */);
-#endif
-
proto1 = ~0;
proto1 = (ip1->protocol == IP_PROTOCOL_UDP)
? SNAT_PROTOCOL_UDP : proto1;
@@ -619,8 +650,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
{
next1 = icmp_in2out_slow_path
- (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, next1,
- now);
+ (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
+ next1, now, cpu_index);
goto trace01;
}
}
@@ -661,7 +692,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
goto trace01;
next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
- &s1, node, next1);
+ &s1, node, next1, cpu_index);
if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
goto trace01;
}
@@ -672,7 +703,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
}
}
else
- s1 = pool_elt_at_index (sm->sessions, value1.value);
+ s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value1.value);
old_addr1 = ip1->src_address.as_u32;
ip1->src_address = s1->out2in.addr;
@@ -714,8 +746,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
/* Per-user LRU list maintenance for dynamic translation */
if (!snat_is_session_static (s1))
{
- clib_dlist_remove (sm->list_pool, s1->per_user_index);
- clib_dlist_addtail (sm->list_pool, s1->per_user_list_head_index,
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s1->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s1->per_user_list_head_index,
s1->per_user_index);
}
trace01:
@@ -729,7 +763,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
t->next_index = next1;
t->session_index = ~0;
if (s1)
- t->session_index = s1 - sm->sessions;
+ t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
}
pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
@@ -779,14 +813,6 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
sw_if_index0);
-
-#if 0
- vnet_get_config_data (&cm->config_main,
- &b0->current_config_index,
- &next0,
- 0 /* sizeof config data */);
-#endif
-
proto0 = ~0;
proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
? SNAT_PROTOCOL_UDP : proto0;
@@ -804,8 +830,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
{
next0 = icmp_in2out_slow_path
- (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0,
- now);
+ (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
+ next0, now, cpu_index);
goto trace0;
}
}
@@ -846,7 +872,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
goto trace0;
next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
- &s0, node, next0);
+ &s0, node, next0, cpu_index);
if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
goto trace0;
}
@@ -857,7 +883,8 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
}
}
else
- s0 = pool_elt_at_index (sm->sessions, value0.value);
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
old_addr0 = ip0->src_address.as_u32;
ip0->src_address = s0->out2in.addr;
@@ -899,8 +926,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
/* Per-user LRU list maintenance for dynamic translation */
if (!snat_is_session_static (s0))
{
- clib_dlist_remove (sm->list_pool, s0->per_user_index);
- clib_dlist_addtail (sm->list_pool, s0->per_user_list_head_index,
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
s0->per_user_index);
}
@@ -915,7 +944,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
t->next_index = next0;
t->session_index = ~0;
if (s0)
- t->session_index = s0 - sm->sessions;
+ t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
}
pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
@@ -999,6 +1028,183 @@ VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
+static uword
+snat_in2out_worker_handoff_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ snat_main_t *sm = &snat_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 n_left_from, *from, *to_next = 0;
+ static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
+ static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
+ = 0;
+ vlib_frame_queue_elt_t *hf = 0;
+ vlib_frame_t *f = 0;
+ int i;
+ u32 n_left_to_next_worker = 0, *to_next_worker = 0;
+ u32 next_worker_index = 0;
+ u32 current_worker_index = ~0;
+ u32 cpu_index = os_get_cpu_number ();
+
+ if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
+ {
+ vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
+
+ vec_validate_init_empty (congested_handoff_queue_by_worker_index,
+ sm->first_worker_index + sm->num_workers - 1,
+ (vlib_frame_queue_t *) (~0));
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ ip4_header_t * ip0;
+ snat_user_key_t key0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u8 do_handoff;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ key0.addr = ip0->src_address;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ /* Ever heard of of the "user" before? */
+ if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
+ {
+ /* No, assign next available worker (RR) */
+ next_worker_index = sm->first_worker_index +
+ sm->workers[sm->next_worker++ % vec_len (sm->workers)];
+
+ /* add non-traslated packets worker lookup */
+ kv0.value = next_worker_index;
+ clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
+ }
+ else
+ next_worker_index = value0.value;
+
+ if (PREDICT_FALSE (next_worker_index != cpu_index))
+ {
+ do_handoff = 1;
+
+ if (next_worker_index != current_worker_index)
+ {
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
+ next_worker_index,
+ handoff_queue_elt_by_worker_index);
+
+ n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
+ to_next_worker = &hf->buffer_index[hf->n_vectors];
+ current_worker_index = next_worker_index;
+ }
+
+ /* enqueue to correct worker thread */
+ to_next_worker[0] = bi0;
+ to_next_worker++;
+ n_left_to_next_worker--;
+
+ if (n_left_to_next_worker == 0)
+ {
+ hf->n_vectors = VLIB_FRAME_SIZE;
+ vlib_put_frame_queue_elt (hf);
+ current_worker_index = ~0;
+ handoff_queue_elt_by_worker_index[next_worker_index] = 0;
+ hf = 0;
+ }
+ }
+ else
+ {
+ do_handoff = 0;
+ /* if this is 1st frame */
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, snat_in2out_node.index);
+ to_next = vlib_frame_vector_args (f);
+ }
+
+ to_next[0] = bi0;
+ to_next += 1;
+ f->n_vectors++;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_worker_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_worker_index = next_worker_index;
+ t->do_handoff = do_handoff;
+ }
+ }
+
+ if (f)
+ vlib_put_frame_to_node (vm, snat_in2out_node.index, f);
+
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ /* Ship frames to the worker nodes */
+ for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
+ {
+ if (handoff_queue_elt_by_worker_index[i])
+ {
+ hf = handoff_queue_elt_by_worker_index[i];
+ /*
+ * It works better to let the handoff node
+ * rate-adapt, always ship the handoff queue element.
+ */
+ if (1 || hf->n_vectors == hf->last_n_vectors)
+ {
+ vlib_put_frame_queue_elt (hf);
+ handoff_queue_elt_by_worker_index[i] = 0;
+ }
+ else
+ hf->last_n_vectors = hf->n_vectors;
+ }
+ congested_handoff_queue_by_worker_index[i] =
+ (vlib_frame_queue_t *) (~0);
+ }
+ hf = 0;
+ current_worker_index = ~0;
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
+ .function = snat_in2out_worker_handoff_fn,
+ .name = "snat-in2out-worker-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_worker_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
+
static inline u32 icmp_in2out_static_map (snat_main_t *sm,
vlib_buffer_t * b0,
ip4_header_t * ip0,
diff --git a/plugins/snat-plugin/snat/out2in.c b/plugins/snat-plugin/snat/out2in.c
index 35edcc5f..a4641d3a 100644
--- a/plugins/snat-plugin/snat/out2in.c
+++ b/plugins/snat-plugin/snat/out2in.c
@@ -16,6 +16,7 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
+#include <vnet/handoff.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
@@ -32,6 +33,11 @@ typedef struct {
u32 session_index;
} snat_out2in_trace_t;
+typedef struct {
+ u32 next_worker_index;
+ u8 do_handoff;
+} snat_out2in_worker_handoff_trace_t;
+
/* packet trace format function */
static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
{
@@ -55,9 +61,23 @@ static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
return s;
}
+static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_out2in_worker_handoff_trace_t * t =
+ va_arg (*args, snat_out2in_worker_handoff_trace_t *);
+ char * m;
+
+ m = t->do_handoff ? "next worker" : "same worker";
+ s = format (s, "SNAT_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
+
+ return s;
+}
vlib_node_registration_t snat_out2in_node;
vlib_node_registration_t snat_out2in_fast_node;
+vlib_node_registration_t snat_out2in_worker_handoff_node;
#define foreach_snat_out2in_error \
_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
@@ -80,6 +100,7 @@ static char * snat_out2in_error_strings[] = {
typedef enum {
SNAT_OUT2IN_NEXT_DROP,
+ SNAT_OUT2IN_NEXT_LOOKUP,
SNAT_OUT2IN_N_NEXT,
} snat_out2in_next_t;
@@ -102,7 +123,8 @@ create_session_for_static_mapping (snat_main_t *sm,
vlib_buffer_t *b0,
snat_session_key_t in2out,
snat_session_key_t out2in,
- vlib_node_runtime_t * node)
+ vlib_node_runtime_t * node,
+ u32 cpu_index)
{
snat_user_t *u;
snat_user_key_t user_key;
@@ -119,28 +141,35 @@ create_session_for_static_mapping (snat_main_t *sm,
if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
{
/* no, make a new one */
- pool_get (sm->users, u);
+ pool_get (sm->per_thread_data[cpu_index].users, u);
memset (u, 0, sizeof (*u));
u->addr = in2out.addr;
- pool_get (sm->list_pool, per_user_list_head_elt);
+ pool_get (sm->per_thread_data[cpu_index].list_pool,
+ per_user_list_head_elt);
u->sessions_per_user_list_head_index = per_user_list_head_elt -
- sm->list_pool;
+ sm->per_thread_data[cpu_index].list_pool;
- clib_dlist_init (sm->list_pool, u->sessions_per_user_list_head_index);
+ clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+ u->sessions_per_user_list_head_index);
- kv0.value = u - sm->users;
+ kv0.value = u - sm->per_thread_data[cpu_index].users;
/* add user */
clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
+
+ /* add non-traslated packets worker lookup */
+ kv0.value = cpu_index;
+ clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
}
else
{
- u = pool_elt_at_index (sm->users, value0.value);
+ u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+ value0.value);
}
- pool_get (sm->sessions, s);
+ pool_get (sm->per_thread_data[cpu_index].sessions, s);
memset (s, 0, sizeof (*s));
s->outside_address_index = ~0;
@@ -148,16 +177,22 @@ create_session_for_static_mapping (snat_main_t *sm,
u->nstaticsessions++;
/* Create list elts */
- pool_get (sm->list_pool, per_user_translation_list_elt);
- clib_dlist_init (sm->list_pool, per_user_translation_list_elt -
- sm->list_pool);
-
- per_user_translation_list_elt->value = s - sm->sessions;
- s->per_user_index = per_user_translation_list_elt - sm->list_pool;
+ pool_get (sm->per_thread_data[cpu_index].list_pool,
+ per_user_translation_list_elt);
+ clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+ per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool);
+
+ per_user_translation_list_elt->value =
+ s - sm->per_thread_data[cpu_index].sessions;
+ s->per_user_index =
+ per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool;
s->per_user_list_head_index = u->sessions_per_user_list_head_index;
- clib_dlist_addtail (sm->list_pool, s->per_user_list_head_index,
- per_user_translation_list_elt - sm->list_pool);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s->per_user_list_head_index,
+ per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool);
s->in2out = in2out;
s->out2in = out2in;
@@ -165,12 +200,12 @@ create_session_for_static_mapping (snat_main_t *sm,
/* Add to translation hashes */
kv0.key = s->in2out.as_u64;
- kv0.value = s - sm->sessions;
+ kv0.value = s - sm->per_thread_data[cpu_index].sessions;
if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
clib_warning ("in2out key add failed");
kv0.key = s->out2in.as_u64;
- kv0.value = s - sm->sessions;
+ kv0.value = s - sm->per_thread_data[cpu_index].sessions;
if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
clib_warning ("out2in key add failed");
@@ -185,7 +220,8 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
u32 sw_if_index0,
u32 rx_fib_index0,
vlib_node_runtime_t * node,
- u32 next0, f64 now)
+ u32 next0, f64 now,
+ u32 cpu_index)
{
snat_session_key_t key0, sm0;
icmp_echo_header_t *echo0;
@@ -233,12 +269,13 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
/* Create session initiated by host from external network */
s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
- node);
+ node, cpu_index);
if (!s0)
return SNAT_OUT2IN_NEXT_DROP;
}
else
- s0 = pool_elt_at_index (sm->sessions, value0.value);
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
old_addr0 = ip0->dst_address.as_u32;
ip0->dst_address = s0->in2out.addr;
@@ -267,8 +304,10 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
/* Per-user LRU list maintenance for dynamic translation */
if (!snat_is_session_static (s0))
{
- clib_dlist_remove (sm->list_pool, s0->per_user_index);
- clib_dlist_addtail (sm->list_pool, s0->per_user_list_head_index,
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
s0->per_user_index);
}
@@ -285,6 +324,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
u32 pkts_processed = 0;
snat_main_t * sm = &snat_main;
f64 now = vlib_time_now (vm);
+ u32 cpu_index = os_get_cpu_number ();
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -301,8 +341,8 @@ snat_out2in_node_fn (vlib_main_t * vm,
{
u32 bi0, bi1;
vlib_buffer_t * b0, * b1;
- u32 next0 = SNAT_OUT2IN_NEXT_DROP;
- u32 next1 = SNAT_OUT2IN_NEXT_DROP;
+ u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
u32 sw_if_index0, sw_if_index1;
ip4_header_t * ip0, *ip1;
ip_csum_t sum0, sum1;
@@ -353,7 +393,6 @@ snat_out2in_node_fn (vlib_main_t * vm,
rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
sw_if_index0);
- vnet_feature_next (sw_if_index0, &next0, b0);
proto0 = ~0;
proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
? SNAT_PROTOCOL_UDP : proto0;
@@ -369,7 +408,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
{
next0 = icmp_out2in_slow_path
(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
- next0, now);
+ next0, now, cpu_index);
goto trace0;
}
@@ -391,12 +430,14 @@ snat_out2in_node_fn (vlib_main_t * vm,
}
/* Create session initiated by host from external network */
- s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node);
+ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
+ cpu_index);
if (!s0)
goto trace0;
}
else
- s0 = pool_elt_at_index (sm->sessions, value0.value);
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
old_addr0 = ip0->dst_address.as_u32;
ip0->dst_address = s0->in2out.addr;
@@ -439,8 +480,10 @@ snat_out2in_node_fn (vlib_main_t * vm,
/* Per-user LRU list maintenance for dynamic translation */
if (!snat_is_session_static (s0))
{
- clib_dlist_remove (sm->list_pool, s0->per_user_index);
- clib_dlist_addtail (sm->list_pool, s0->per_user_list_head_index,
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
s0->per_user_index);
}
trace0:
@@ -454,7 +497,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
t->next_index = next0;
t->session_index = ~0;
if (s0)
- t->session_index = s0 - sm->sessions;
+ t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
}
pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
@@ -469,8 +512,6 @@ snat_out2in_node_fn (vlib_main_t * vm,
rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
sw_if_index1);
- vnet_feature_next (sw_if_index1, &next1, b1);
-
proto1 = ~0;
proto1 = (ip1->protocol == IP_PROTOCOL_UDP)
? SNAT_PROTOCOL_UDP : proto1;
@@ -486,7 +527,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
{
next1 = icmp_out2in_slow_path
(sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
- next1, now);
+ next1, now, cpu_index);
goto trace1;
}
@@ -508,12 +549,14 @@ snat_out2in_node_fn (vlib_main_t * vm,
}
/* Create session initiated by host from external network */
- s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node);
+ s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
+ cpu_index);
if (!s1)
goto trace1;
}
else
- s1 = pool_elt_at_index (sm->sessions, value1.value);
+ s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value1.value);
old_addr1 = ip1->dst_address.as_u32;
ip1->dst_address = s1->in2out.addr;
@@ -556,8 +599,10 @@ snat_out2in_node_fn (vlib_main_t * vm,
/* Per-user LRU list maintenance for dynamic translation */
if (!snat_is_session_static (s1))
{
- clib_dlist_remove (sm->list_pool, s1->per_user_index);
- clib_dlist_addtail (sm->list_pool, s1->per_user_list_head_index,
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s1->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s1->per_user_list_head_index,
s1->per_user_index);
}
trace1:
@@ -571,10 +616,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
t->next_index = next1;
t->session_index = ~0;
if (s1)
- t->session_index = s1 - sm->sessions;
+ t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
}
- pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
/* verify speculative enqueues, maybe switch current next frame */
@@ -587,7 +631,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
{
u32 bi0;
vlib_buffer_t * b0;
- u32 next0 = SNAT_OUT2IN_NEXT_DROP;
+ u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
u32 sw_if_index0;
ip4_header_t * ip0;
ip_csum_t sum0;
@@ -621,8 +665,6 @@ snat_out2in_node_fn (vlib_main_t * vm,
rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
sw_if_index0);
- vnet_feature_next (sw_if_index0, &next0, b0);
-
proto0 = ~0;
proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
? SNAT_PROTOCOL_UDP : proto0;
@@ -638,7 +680,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
{
next0 = icmp_out2in_slow_path
(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
- next0, now);
+ next0, now, cpu_index);
goto trace00;
}
@@ -660,12 +702,14 @@ snat_out2in_node_fn (vlib_main_t * vm,
}
/* Create session initiated by host from external network */
- s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node);
+ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
+ cpu_index);
if (!s0)
goto trace00;
}
else
- s0 = pool_elt_at_index (sm->sessions, value0.value);
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
old_addr0 = ip0->dst_address.as_u32;
ip0->dst_address = s0->in2out.addr;
@@ -708,8 +752,10 @@ snat_out2in_node_fn (vlib_main_t * vm,
/* Per-user LRU list maintenance for dynamic translation */
if (!snat_is_session_static (s0))
{
- clib_dlist_remove (sm->list_pool, s0->per_user_index);
- clib_dlist_addtail (sm->list_pool, s0->per_user_list_head_index,
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
s0->per_user_index);
}
trace00:
@@ -723,7 +769,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
t->next_index = next0;
t->session_index = ~0;
if (s0)
- t->session_index = s0 - sm->sessions;
+ t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
}
pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
@@ -760,10 +806,189 @@ VLIB_REGISTER_NODE (snat_out2in_node) = {
/* edit / add dispositions here */
.next_nodes = {
[SNAT_OUT2IN_NEXT_DROP] = "error-drop",
+ [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
},
};
VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
+static uword
+snat_out2in_worker_handoff_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ snat_main_t *sm = &snat_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 n_left_from, *from, *to_next = 0;
+ static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
+ static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
+ = 0;
+ vlib_frame_queue_elt_t *hf = 0;
+ vlib_frame_t *f = 0;
+ int i;
+ u32 n_left_to_next_worker = 0, *to_next_worker = 0;
+ u32 next_worker_index = 0;
+ u32 current_worker_index = ~0;
+ u32 cpu_index = os_get_cpu_number ();
+
+ if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
+ {
+ vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
+
+ vec_validate_init_empty (congested_handoff_queue_by_worker_index,
+ sm->first_worker_index + sm->num_workers - 1,
+ (vlib_frame_queue_t *) (~0));
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ ip4_header_t * ip0;
+ udp_header_t * udp0;
+ snat_static_mapping_key_t key0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u8 do_handoff;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ /* Ever heard of of the "user" before? */
+ if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
+ {
+ /* No, assign next available worker (RR) */
+ next_worker_index = sm->first_worker_index +
+ sm->workers[sm->next_worker++ % vec_len (sm->workers)];
+
+ /* Add to translated packets worker lookup */
+ kv0.value = next_worker_index;
+ clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
+ }
+ else
+ next_worker_index = value0.value;
+
+ if (PREDICT_FALSE (next_worker_index != cpu_index))
+ {
+ if (next_worker_index != current_worker_index)
+ {
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
+ next_worker_index,
+ handoff_queue_elt_by_worker_index);
+
+ n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
+ to_next_worker = &hf->buffer_index[hf->n_vectors];
+ current_worker_index = next_worker_index;
+ }
+
+ /* enqueue to correct worker thread */
+ to_next_worker[0] = bi0;
+ to_next_worker++;
+ n_left_to_next_worker--;
+
+ if (n_left_to_next_worker == 0)
+ {
+ hf->n_vectors = VLIB_FRAME_SIZE;
+ vlib_put_frame_queue_elt (hf);
+ current_worker_index = ~0;
+ handoff_queue_elt_by_worker_index[next_worker_index] = 0;
+ hf = 0;
+ }
+ }
+ else
+ {
+ do_handoff = 0;
+ /* if this is 1st frame */
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, snat_out2in_node.index);
+ to_next = vlib_frame_vector_args (f);
+ }
+
+ to_next[0] = bi0;
+ to_next += 1;
+ f->n_vectors++;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_worker_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_worker_index = next_worker_index;
+ t->do_handoff = do_handoff;
+ }
+ }
+
+ if (f)
+ vlib_put_frame_to_node (vm, snat_out2in_node.index, f);
+
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ /* Ship frames to the worker nodes */
+ for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
+ {
+ if (handoff_queue_elt_by_worker_index[i])
+ {
+ hf = handoff_queue_elt_by_worker_index[i];
+ /*
+ * It works better to let the handoff node
+ * rate-adapt, always ship the handoff queue element.
+ */
+ if (1 || hf->n_vectors == hf->last_n_vectors)
+ {
+ vlib_put_frame_queue_elt (hf);
+ handoff_queue_elt_by_worker_index[i] = 0;
+ }
+ else
+ hf->last_n_vectors = hf->n_vectors;
+ }
+ congested_handoff_queue_by_worker_index[i] =
+ (vlib_frame_queue_t *) (~0);
+ }
+ hf = 0;
+ current_worker_index = ~0;
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
+ .function = snat_out2in_worker_handoff_fn,
+ .name = "snat-out2in-worker-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_out2in_worker_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
+
static inline u32 icmp_out2in_fast (snat_main_t *sm,
vlib_buffer_t * b0,
ip4_header_t * ip0,
@@ -1014,6 +1239,7 @@ VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
/* edit / add dispositions here */
.next_nodes = {
+ [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
[SNAT_OUT2IN_NEXT_DROP] = "error-drop",
},
};
diff --git a/plugins/snat-plugin/snat/snat.api b/plugins/snat-plugin/snat/snat.api
index d7d41f20..a191eed5 100644
--- a/plugins/snat-plugin/snat/snat.api
+++ b/plugins/snat-plugin/snat/snat.api
@@ -238,3 +238,46 @@ define snat_show_config_reply
u32 outside_vrf_id;
u32 inside_vrf_id;
};
+
+/** \brief Set S-NAT workers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param worker_mask - S-NAT workers mask
+*/
+define snat_set_workers {
+ u32 client_index;
+ u32 context;
+ u64 worker_mask;
+};
+
+/** \brief Set S-NAT workers reply
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+*/
+define snat_set_workers_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Dump S-NAT workers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_worker_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief S-NAT workers details response
+ @param context - sender context, to match reply w/ request
+ @param worker_index - worker index
+ @param lcore_id - lcore ID
+ @param name - worker name
+*/
+define snat_worker_details {
+ u32 context;
+ u32 worker_index;
+ u32 lcore_id;
+ u8 name[64];
+};
diff --git a/plugins/snat-plugin/snat/snat.c b/plugins/snat-plugin/snat/snat.c
index 2956e24f..70df44ea 100644
--- a/plugins/snat-plugin/snat/snat.c
+++ b/plugins/snat-plugin/snat/snat.c
@@ -103,6 +103,16 @@ VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
.node_name = "snat-out2in",
.runs_before = VNET_FEATURES ("ip4-lookup"),
};
+VNET_FEATURE_INIT (ip4_snat_in2out_worker_handoff, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "snat-in2out-worker-handoff",
+ .runs_before = VNET_FEATURES ("snat-out2in-worker-handoff"),
+};
+VNET_FEATURE_INIT (ip4_snat_out2in_worker_handoff, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "snat-out2in-worker-handoff",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
.arc_name = "ip4-unicast",
.node_name = "snat-in2out-fast",
@@ -229,13 +239,13 @@ static int is_snat_address_used_in_static_mapping (snat_main_t *sm,
int snat_del_address (snat_main_t *sm, ip4_address_t addr)
{
- clib_warning("%U", format_ip4_address, &addr);
snat_address_t *a = 0;
snat_session_t *ses;
u32 *ses_to_be_removed = 0, *ses_index;
clib_bihash_kv_8_8_t kv, value;
snat_user_key_t user_key;
snat_user_t *u;
+ snat_main_per_thread_data_t *tsm;
int i;
@@ -261,30 +271,33 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr)
/* Delete sessions using address */
if (a->busy_ports)
{
- pool_foreach (ses, sm->sessions, ({
- if (ses->out2in.addr.as_u32 == addr.as_u32)
- {
- vec_add1 (ses_to_be_removed, ses - sm->sessions);
- kv.key = ses->in2out.as_u64;
- clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0);
- kv.key = ses->out2in.as_u64;
- clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0);
- clib_dlist_remove (sm->list_pool, ses->per_user_index);
- user_key.addr = ses->in2out.addr;
- user_key.fib_index = ses->in2out.fib_index;
- kv.key = user_key.as_u64;
- if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+ vec_foreach (tsm, sm->per_thread_data)
+ {
+ pool_foreach (ses, tsm->sessions, ({
+ if (ses->out2in.addr.as_u32 == addr.as_u32)
{
- u = pool_elt_at_index (sm->users, value.value);
- u->nsessions--;
+ vec_add1 (ses_to_be_removed, ses - tsm->sessions);
+ kv.key = ses->in2out.as_u64;
+ clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0);
+ kv.key = ses->out2in.as_u64;
+ clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0);
+ clib_dlist_remove (tsm->list_pool, ses->per_user_index);
+ user_key.addr = ses->in2out.addr;
+ user_key.fib_index = ses->in2out.fib_index;
+ kv.key = user_key.as_u64;
+ if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+ {
+ u = pool_elt_at_index (tsm->users, value.value);
+ u->nsessions--;
+ }
}
- }
- }));
+ }));
- vec_foreach (ses_index, ses_to_be_removed)
- pool_put_index (sm->sessions, ses_index[0]);
+ vec_foreach (ses_index, ses_to_be_removed)
+ pool_put_index (tsm->sessions, ses_index[0]);
- vec_free (ses_to_be_removed);
+ vec_free (ses_to_be_removed);
+ }
}
vec_del1 (sm->addresses, i);
@@ -469,26 +482,35 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
snat_user_key_t u_key;
snat_user_t *u;
dlist_elt_t * head, * elt;
- u32 elt_index, head_index;
+ u32 elt_index, head_index, del_elt_index;
u32 ses_index;
+ u64 user_index;
snat_session_t * s;
+ snat_main_per_thread_data_t *tsm;
u_key.addr = m->local_addr;
u_key.fib_index = m->fib_index;
kv.key = u_key.as_u64;
if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
{
- u = pool_elt_at_index (sm->users, value.value);
+ user_index = value.value;
+ clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value);
+ tsm = vec_elt_at_index (sm->per_thread_data, value.value);
+ u = pool_elt_at_index (tsm->users, user_index);
if (u->nstaticsessions)
{
head_index = u->sessions_per_user_list_head_index;
- head = pool_elt_at_index (sm->list_pool, head_index);
+ head = pool_elt_at_index (tsm->list_pool, head_index);
elt_index = head->next;
- elt = pool_elt_at_index (sm->list_pool, elt_index);
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
ses_index = elt->value;
while (ses_index != ~0)
{
- s = pool_elt_at_index (sm->sessions, ses_index);
+ s = pool_elt_at_index (tsm->sessions, ses_index);
+ del_elt_index = elt_index;
+ elt_index = elt->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ ses_index = elt->value;
if (!addr_only)
{
@@ -496,35 +518,25 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
(clib_net_to_host_u16 (s->out2in.port) != e_port))
continue;
}
+
value.key = s->in2out.as_u64;
clib_bihash_add_del_8_8 (&sm->in2out, &value, 0);
value.key = s->out2in.as_u64;
clib_bihash_add_del_8_8 (&sm->out2in, &value, 0);
- pool_put (sm->sessions, s);
+ pool_put (tsm->sessions, s);
+
+ clib_dlist_remove (tsm->list_pool, del_elt_index);
+ pool_put_index (tsm->list_pool, del_elt_index);
+ u->nstaticsessions--;
if (!addr_only)
break;
-
- elt_index = elt->next;
- elt = pool_elt_at_index (sm->list_pool, elt_index);
- ses_index = elt->value;
}
if (addr_only)
{
- while ((elt_index = clib_dlist_remove_head(sm->list_pool, head_index)) != ~0)
- pool_put_index (sm->list_pool, elt_index);
- pool_put (sm->users, u);
+ pool_put (tsm->users, u);
clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 0);
}
- else
- {
- if (ses_index != ~0)
- {
- clib_dlist_remove (sm->list_pool, elt_index);
- pool_put (sm->list_pool, elt);
- u->nstaticsessions--;
- }
- }
}
}
}
@@ -545,11 +557,22 @@ static int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
feature_name = is_inside ? "snat-in2out-fast" : "snat-out2in-fast";
else
- feature_name = is_inside ? "snat-in2out" : "snat-out2in";
+ {
+ if (sm->num_workers > 1)
+ feature_name = is_inside ? "snat-in2out-worker-handoff" : "snat-out2in-worker-handoff";
+ else
+ feature_name = is_inside ? "snat-in2out" : "snat-out2in";
+ }
vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index,
!is_del, 0, 0);
+ if (sm->fq_in2out_index == ~0)
+ sm->fq_in2out_index = vlib_frame_queue_main_init (snat_in2out_node.index, 0);
+
+ if (sm->fq_out2in_index == ~0)
+ sm->fq_out2in_index = vlib_frame_queue_main_init (snat_out2in_node.index, 0);
+
pool_foreach (i, sm->interfaces,
({
if (i->sw_if_index == sw_if_index)
@@ -573,6 +596,26 @@ static int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
return 0;
}
+static int snat_set_workers (uword * bitmap)
+{
+ snat_main_t *sm = &snat_main;
+ int i;
+
+ if (sm->num_workers < 2)
+ return VNET_API_ERROR_FEATURE_DISABLED;
+
+ if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
+ return VNET_API_ERROR_INVALID_WORKER;
+
+ vec_free (sm->workers);
+ clib_bitmap_foreach (i, bitmap,
+ ({
+ vec_add1(sm->workers, i);
+ }));
+
+ return 0;
+}
+
static void
vl_api_snat_add_address_range_t_handler
(vl_api_snat_add_address_range_t * mp)
@@ -925,6 +968,101 @@ static void *vl_api_snat_show_config_t_print
FINISH;
}
+static void
+vl_api_snat_set_workers_t_handler
+(vl_api_snat_set_workers_t * mp)
+{
+ snat_main_t * sm = &snat_main;
+ vl_api_snat_set_workers_reply_t * rmp;
+ int rv = 0;
+ uword *bitmap = 0;
+ u64 mask = clib_net_to_host_u64 (mp->worker_mask);
+
+ if (sm->num_workers < 2)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask));
+ rv = snat_set_workers(bitmap);
+ clib_bitmap_free (bitmap);
+
+ send_reply:
+ REPLY_MACRO (VL_API_SNAT_SET_WORKERS_REPLY);
+}
+
+static void *vl_api_snat_set_workers_t_print
+(vl_api_snat_set_workers_t *mp, void * handle)
+{
+ u8 * s;
+ uword *bitmap = 0;
+ u8 first = 1;
+ int i;
+ u64 mask = clib_net_to_host_u64 (mp->worker_mask);
+
+ s = format (0, "SCRIPT: snat_set_workers ");
+ bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask));
+ clib_bitmap_foreach (i, bitmap,
+ ({
+ if (first)
+ s = format (s, "%d", i);
+ else
+ s = format (s, ",%d", i);
+ first = 0;
+ }));
+ clib_bitmap_free (bitmap);
+ FINISH;
+}
+
+static void
+send_snat_worker_details
+(u32 worker_index, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_worker_details_t *rmp;
+ snat_main_t * sm = &snat_main;
+ vlib_worker_thread_t *w =
+ vlib_worker_threads + worker_index + sm->first_worker_index;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_WORKER_DETAILS+sm->msg_id_base);
+ rmp->context = context;
+ rmp->worker_index = htonl (worker_index);
+ rmp->lcore_id = htonl (w->lcore_id);
+ strncpy ((char *) rmp->name, (char *) w->name, ARRAY_LEN (rmp->name) - 1);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_worker_dump_t_handler
+(vl_api_snat_worker_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t * sm = &snat_main;
+ u32 * worker_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ vec_foreach (worker_index, sm->workers)
+ {
+ send_snat_worker_details(*worker_index, q, mp->context);
+ }
+}
+
+static void *vl_api_snat_worker_dump_t_print
+(vl_api_snat_worker_dump_t *mp, void * handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_worker_dump ");
+
+ FINISH;
+}
+
/* List of message types that this plugin understands */
#define foreach_snat_plugin_api_msg \
_(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range) \
@@ -934,7 +1072,9 @@ _(SNAT_CONTROL_PING, snat_control_ping) \
_(SNAT_STATIC_MAPPING_DUMP, snat_static_mapping_dump) \
_(SNAT_SHOW_CONFIG, snat_show_config) \
_(SNAT_ADDRESS_DUMP, snat_address_dump) \
-_(SNAT_INTERFACE_DUMP, snat_interface_dump)
+_(SNAT_INTERFACE_DUMP, snat_interface_dump) \
+_(SNAT_SET_WORKERS, snat_set_workers) \
+_(SNAT_WORKER_DUMP, snat_worker_dump)
/* Set up the API message handling tables */
static clib_error_t *
@@ -984,6 +1124,11 @@ static clib_error_t * snat_init (vlib_main_t * vm)
ip4_main_t * im = &ip4_main;
ip_lookup_main_t * lm = &im->lookup_main;
u8 * name;
+ uword *p;
+ vlib_thread_registration_t *tr;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ uword *bitmap = 0;
+ u32 i;
name = format (0, "snat_%08x%c", api_version, 0);
@@ -996,6 +1141,32 @@ static clib_error_t * snat_init (vlib_main_t * vm)
sm->ip4_main = im;
sm->ip4_lookup_main = lm;
sm->api_main = &api_main;
+ sm->first_worker_index = 0;
+ sm->next_worker = 0;
+ sm->num_workers = 0;
+ sm->workers = 0;
+ sm->fq_in2out_index = ~0;
+ sm->fq_out2in_index = ~0;
+
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ if (p)
+ {
+ tr = (vlib_thread_registration_t *) p[0];
+ if (tr)
+ {
+ sm->num_workers = tr->count;
+ sm->first_worker_index = tr->first_index;
+ }
+ }
+
+ /* Use all available workers by default */
+ if (sm->num_workers > 1)
+ {
+ for (i=0; i < sm->num_workers; i++)
+ bitmap = clib_bitmap_set (bitmap, i, 1);
+ snat_set_workers(bitmap);
+ clib_bitmap_free (bitmap);
+ }
error = snat_plugin_api_hookup (vm);
@@ -1363,6 +1534,66 @@ VLIB_CLI_COMMAND (add_static_mapping_command, static) = {
};
static clib_error_t *
+set_workers_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ uword *bitmap = 0;
+ int rv = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_bitmap_list, &bitmap))
+ ;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (bitmap == 0)
+ return clib_error_return (0, "List of workers must be specified.");
+
+ rv = snat_set_workers(bitmap);
+
+ clib_bitmap_free (bitmap);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_WORKER:
+ return clib_error_return (0, "Invalid worker(s).");
+ break;
+ case VNET_API_ERROR_FEATURE_DISABLED:
+ return clib_error_return (0,
+ "Supported only if 2 or more workes available.");
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{set snat workers}
+ * Set SNAT workers if 2 or more workers available, use:
+ * vpp# set snat workers 0-2,5
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (set_workers_command, static) = {
+ .path = "set snat workers",
+ .function = set_workers_command_fn,
+ .short_help =
+ "set snat workers <workers-list>",
+};
+
+static clib_error_t *
snat_config (vlib_main_t * vm, unformat_input_t * input)
{
snat_main_t * sm = &snat_main;
@@ -1377,6 +1608,7 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
u32 static_mapping_memory_size = 64<<20;
u8 static_mapping_only = 0;
u8 static_mapping_connection_tracking = 0;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -1425,6 +1657,14 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
if (!static_mapping_only ||
(static_mapping_only && static_mapping_connection_tracking))
{
+ clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets,
+ user_memory_size);
+
+ clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets,
+ user_memory_size);
+
+ vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1);
+
clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets,
translation_memory_size);
@@ -1485,7 +1725,7 @@ u8 * format_snat_session (u8 * s, va_list * args)
u8 * format_snat_user (u8 * s, va_list * args)
{
- snat_main_t * sm = va_arg (*args, snat_main_t *);
+ snat_main_per_thread_data_t * sm = va_arg (*args, snat_main_per_thread_data_t *);
snat_user_t * u = va_arg (*args, snat_user_t *);
int verbose = va_arg (*args, int);
dlist_elt_t * head, * elt;
@@ -1552,6 +1792,9 @@ show_snat_command_fn (vlib_main_t * vm,
snat_static_mapping_t *m;
snat_interface_t *i;
vnet_main_t *vnm = vnet_get_main();
+ snat_main_per_thread_data_t *tsm;
+ u32 users_num = 0, sessions_num = 0, *worker;
+ uword j = 0;
if (unformat (input, "detail"))
verbose = 1;
@@ -1581,6 +1824,20 @@ show_snat_command_fn (vlib_main_t * vm,
}));
}
+ if (sm->num_workers > 1)
+ {
+ vlib_cli_output (vm, "%d workers", vec_len (sm->workers));
+ if (verbose > 0)
+ {
+ vec_foreach (worker, sm->workers)
+ {
+ vlib_worker_thread_t *w =
+ vlib_worker_threads + *worker + sm->first_worker_index;
+ vlib_cli_output (vm, " %s", w->name);
+ }
+ }
+ }
+
if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
{
vlib_cli_output (vm, "%d static mappings",
@@ -1596,11 +1853,17 @@ show_snat_command_fn (vlib_main_t * vm,
}
else
{
+ vec_foreach (tsm, sm->per_thread_data)
+ {
+ users_num += pool_elts (tsm->users);
+ sessions_num += pool_elts (tsm->sessions);
+ }
+
vlib_cli_output (vm, "%d users, %d outside addresses, %d active sessions,"
" %d static mappings",
- pool_elts (sm->users),
+ users_num,
vec_len (sm->addresses),
- pool_elts (sm->sessions),
+ sessions_num,
pool_elts (sm->static_mappings));
if (verbose > 0)
@@ -1609,13 +1872,29 @@ show_snat_command_fn (vlib_main_t * vm,
verbose - 1);
vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in,
verbose - 1);
- vlib_cli_output (vm, "%d list pool elements",
- pool_elts (sm->list_pool));
+ vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in,
+ verbose - 1);
+ vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out,
+ verbose - 1);
+ vec_foreach_index (j, sm->per_thread_data)
+ {
+ tsm = vec_elt_at_index (sm->per_thread_data, j);
- pool_foreach (u, sm->users,
- ({
- vlib_cli_output (vm, "%U", format_snat_user, sm, u, verbose - 1);
- }));
+ if (pool_elts (tsm->users) == 0)
+ continue;
+
+ vlib_worker_thread_t *w = vlib_worker_threads + j;
+ vlib_cli_output (vm, "Thread %d (%s at lcore %u):", j, w->name,
+ w->lcore_id);
+ vlib_cli_output (vm, " %d list pool elements",
+ pool_elts (tsm->list_pool));
+
+ pool_foreach (u, tsm->users,
+ ({
+ vlib_cli_output (vm, " %U", format_snat_user, tsm, u,
+ verbose - 1);
+ }));
+ }
if (pool_elts (sm->static_mappings))
{
diff --git a/plugins/snat-plugin/snat/snat.h b/plugins/snat-plugin/snat/snat.h
index ca10b76a..cb31dc51 100644
--- a/plugins/snat-plugin/snat/snat.h
+++ b/plugins/snat-plugin/snat/snat.h
@@ -132,6 +132,17 @@ typedef struct {
} snat_interface_t;
typedef struct {
+ /* User pool */
+ snat_user_t * users;
+
+ /* Session pool */
+ snat_session_t * sessions;
+
+ /* Pool of doubly-linked list elements */
+ dlist_elt_t * list_pool;
+} snat_main_per_thread_data_t;
+
+typedef struct {
/* Main lookup tables */
clib_bihash_8_8_t out2in;
clib_bihash_8_8_t in2out;
@@ -139,18 +150,26 @@ typedef struct {
/* Find-a-user => src address lookup */
clib_bihash_8_8_t user_hash;
+ /* Non-translated packets worker lookup => src address + VRF */
+ clib_bihash_8_8_t worker_by_in;
+
+ /* Translated packets worker lookup => IP address + port number */
+ clib_bihash_8_8_t worker_by_out;
+
+ u32 num_workers;
+ u32 first_worker_index;
+ u32 next_worker;
+ u32 * workers;
+
+ /* Per thread data */
+ snat_main_per_thread_data_t * per_thread_data;
+
/* Find a static mapping by local */
clib_bihash_8_8_t static_mapping_by_local;
/* Find a static mapping by external */
clib_bihash_8_8_t static_mapping_by_external;
- /* User pool */
- snat_user_t * users;
-
- /* Session pool */
- snat_session_t * sessions;
-
/* Static mapping pool */
snat_static_mapping_t * static_mappings;
@@ -160,12 +179,13 @@ typedef struct {
/* Vector of outside addresses */
snat_address_t * addresses;
- /* Pool of doubly-linked list elements */
- dlist_elt_t * list_pool;
-
/* Randomize port allocation order */
u32 random_seed;
+ /* Worker handoff index */
+ u32 fq_in2out_index;
+ u32 fq_out2in_index;
+
/* Config parameters */
u8 static_mapping_only;
u8 static_mapping_connection_tracking;
@@ -196,6 +216,8 @@ extern vlib_node_registration_t snat_in2out_node;
extern vlib_node_registration_t snat_out2in_node;
extern vlib_node_registration_t snat_in2out_fast_node;
extern vlib_node_registration_t snat_out2in_fast_node;
+extern vlib_node_registration_t snat_in2out_worker_handoff_node;
+extern vlib_node_registration_t snat_out2in_worker_handoff_node;
void snat_free_outside_address_and_port (snat_main_t * sm,
snat_session_key_t * k,
diff --git a/plugins/snat-plugin/snat/snat_test.c b/plugins/snat-plugin/snat/snat_test.c
index e03c9eec..2a003ba6 100644
--- a/plugins/snat-plugin/snat/snat_test.c
+++ b/plugins/snat-plugin/snat/snat_test.c
@@ -60,7 +60,8 @@ snat_test_main_t snat_test_main;
#define foreach_standard_reply_retval_handler \
_(snat_add_address_range_reply) \
_(snat_interface_add_del_feature_reply) \
-_(snat_add_static_mapping_reply)
+_(snat_add_static_mapping_reply) \
+_(snat_set_workers_reply)
#define _(n) \
static void vl_api_##n##_t_handler \
@@ -91,7 +92,9 @@ _(SNAT_CONTROL_PING_REPLY, snat_control_ping_reply) \
_(SNAT_STATIC_MAPPING_DETAILS, snat_static_mapping_details) \
_(SNAT_SHOW_CONFIG_REPLY, snat_show_config_reply) \
_(SNAT_ADDRESS_DETAILS, snat_address_details) \
-_(SNAT_INTERFACE_DETAILS, snat_interface_details)
+_(SNAT_INTERFACE_DETAILS, snat_interface_details) \
+_(SNAT_SET_WORKERS_REPLY, snat_set_workers_reply) \
+_(SNAT_WORKER_DETAILS, snat_worker_details)
/* M: construct, but don't yet send a message */
#define M(T,t) \
@@ -473,6 +476,69 @@ static int api_snat_interface_dump(vat_main_t * vam)
return 0;
}
+static int api_snat_set_workers (vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_snat_set_workers_t * mp;
+ uword *bitmap;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_bitmap_list, &bitmap))
+ ;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M(SNAT_SET_WORKERS, snat_set_workers);
+ mp->worker_mask = clib_host_to_net_u64 (bitmap[0]);
+
+ S; W;
+
+ /* NOTREACHED */
+ return 0;
+}
+
+static void vl_api_snat_worker_details_t_handler
+ (vl_api_snat_worker_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat (vam->ofp, "worker_index %d (%s at lcore %u)\n",
+ ntohl (mp->worker_index), mp->name, ntohl (mp->lcore_id));
+}
+
+static int api_snat_worker_dump(vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ f64 timeout;
+ vl_api_snat_worker_dump_t * mp;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ M(SNAT_WORKER_DUMP, snat_worker_dump);
+ S;
+ /* Use a control ping for synchronization */
+ {
+ vl_api_snat_control_ping_t *mp;
+ M (SNAT_CONTROL_PING, snat_control_ping);
+ S;
+ }
+ W;
+ /* NOTREACHED */
+ return 0;
+}
+
/*
* List of messages that the api test plugin sends,
* and that the data plane plugin processes
@@ -483,10 +549,12 @@ _(snat_interface_add_del_feature, \
"<intfc> | sw_if_index <id> [in] [out] [del]") \
_(snat_add_static_mapping, "local_addr <ip> external_addr <ip> " \
"[local_port <n>] [external_port <n>] [vrf <table-id>] [del]") \
+_(snat_set_workers, "<wokrers_bitmap>") \
_(snat_static_mapping_dump, "") \
_(snat_show_config, "") \
_(snat_address_dump, "") \
-_(snat_interface_dump, "")
+_(snat_interface_dump, "") \
+_(snat_worker_dump, "")
void vat_api_hookup (vat_main_t *vam)
{
diff --git a/vnet/etc/scripts/snat b/vnet/etc/scripts/snat
new file mode 100644
index 00000000..87fd699e
--- /dev/null
+++ b/vnet/etc/scripts/snat
@@ -0,0 +1,34 @@
+packet-generator new {
+ name f1
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 0
+ data {
+ UDP: 10.0.0.3 -> 172.16.1.2
+ UDP: 3000 -> 3001
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+
+packet-generator new {
+ name f2
+ limit 1000000
+ node ip4-input
+ size 64-64
+ no-recycle
+ worker 1
+ data {
+ UDP: 10.0.0.3 -> 172.16.1.2
+ UDP: 3005 -> 3006
+ length 128 checksum 0 incrementing 1
+ }
+}
+
+snat add address 172.16.1.3
+ip route 172.16.1.2/32 via drop
+set int ip address pg0 10.0.0.1/24
+set int snat in pg0
+trace add pg-input 10