aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/nat
diff options
context:
space:
mode:
authorMatus Fabian <matfabia@cisco.com>2018-09-03 05:02:23 -0700
committerDamjan Marion <dmarion@me.com>2018-09-03 14:48:54 +0000
commitea5b5be4eeb0f4cd80cb466bd6e31cad33c57960 (patch)
treeb40903f9c38e53e34fd0cd86613046838aca9505 /src/plugins/nat
parent7ff0a265a8b5692268c0f44385712c7be8e32775 (diff)
NAT44: client-IP based session affinity for load-balancing (VPP-1297)
Enable client-IP based session affinity per LB NAT rule with specific timeout. Change-Id: I9aade152e330218d21dfda99cc5e984d769ab806 Signed-off-by: Matus Fabian <matfabia@cisco.com>
Diffstat (limited to 'src/plugins/nat')
-rw-r--r--src/plugins/nat/CMakeLists.txt1
-rwxr-xr-xsrc/plugins/nat/in2out.c20
-rw-r--r--src/plugins/nat/nat.api6
-rwxr-xr-xsrc/plugins/nat/nat.c47
-rw-r--r--src/plugins/nat/nat.h20
-rw-r--r--src/plugins/nat/nat44_cli.c15
-rw-r--r--src/plugins/nat/nat_affinity.c269
-rw-r--r--src/plugins/nat/nat_affinity.h142
-rw-r--r--src/plugins/nat/nat_api.c3
-rwxr-xr-xsrc/plugins/nat/out2in.c41
10 files changed, 524 insertions, 40 deletions
diff --git a/src/plugins/nat/CMakeLists.txt b/src/plugins/nat/CMakeLists.txt
index ef82213412b..20cf0e70a9e 100644
--- a/src/plugins/nat/CMakeLists.txt
+++ b/src/plugins/nat/CMakeLists.txt
@@ -38,6 +38,7 @@ add_vpp_plugin(nat
nat66_cli.c
nat66_in2out.c
nat66_out2in.c
+ nat_affinity.c
API_FILES
nat.api
diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c
index c900393a3a1..0fe36338397 100755
--- a/src/plugins/nat/in2out.c
+++ b/src/plugins/nat/in2out.c
@@ -258,7 +258,7 @@ snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
&value0))
{
/* or is static mappings */
- if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
+ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
return 0;
}
else
@@ -387,7 +387,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
key1.protocol = key0->protocol;
/* First try to match static mapping by local address and port */
- if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0, 0))
+ if (snat_static_mapping_match (sm, *key0, &key1, 0, 0, 0, 0, 0))
{
/* Try to create dynamic translation */
if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index0,
@@ -674,7 +674,7 @@ u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
}
key0.fib_index = rx_fib_index0;
- if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0, 0))
+ if (snat_static_mapping_match(sm, key0, &sm0, 0, &is_addr_only, 0, 0, 0))
{
if (PREDICT_FALSE(snat_not_translate_fast(sm, node, sw_if_index0, ip0,
IP_PROTOCOL_ICMP, rx_fib_index0)))
@@ -875,7 +875,7 @@ snat_hairpinning (snat_main_t *sm,
kv0.key = key0.as_u64;
/* Check if destination is static mappings */
- if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
+ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
{
new_dst_addr0 = sm0.addr.as_u32;
new_dst_port0 = sm0.port;
@@ -1006,7 +1006,7 @@ snat_icmp_hairpinning (snat_main_t *sm,
if (rv)
{
/* or static mappings */
- if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
+ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
{
new_dst_addr0 = sm0.addr.as_u32;
vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
@@ -2031,7 +2031,7 @@ nat44_reass_hairpinning (snat_main_t *sm,
udp0 = ip4_next_header (ip0);
/* Check if destination is static mappings */
- if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
+ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
{
new_dst_addr0 = sm0.addr.as_u32;
new_dst_port0 = sm0.port;
@@ -2535,7 +2535,7 @@ slow_path_ed (snat_main_t *sm,
snat_session_t *s;
snat_user_t *u;
snat_session_key_t key0, key1;
- u8 lb = 0, is_sm = 0;
+ lb_nat_type_t lb = 0, is_sm = 0;
u32 address_index = ~0;
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
nat_ed_ses_key_t *key = (nat_ed_ses_key_t *) kv->key;
@@ -2565,7 +2565,7 @@ slow_path_ed (snat_main_t *sm,
key0.fib_index = rx_fib_index;
key1.fib_index = sm->outside_fib_index;
/* First try to match static mapping by local address and port */
- if (snat_static_mapping_match (sm, key0, &key1, 0, 0, 0, &lb))
+ if (snat_static_mapping_match (sm, key0, &key1, 0, 0, 0, &lb, 0))
{
/* Try to create dynamic translation */
if (snat_alloc_outside_address_and_port (sm->addresses, rx_fib_index,
@@ -2691,7 +2691,7 @@ nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t *node,
key0.protocol = proto;
key0.fib_index = sm->outside_fib_index;
/* or is static mappings */
- if (!snat_static_mapping_match(sm, key0, &key1, 1, 0, 0, 0))
+ if (!snat_static_mapping_match(sm, key0, &key1, 1, 0, 0, 0, 0))
return 0;
}
else
@@ -5321,7 +5321,7 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm,
key0.port = udp0->src_port;
key0.fib_index = rx_fib_index0;
- if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0, 0))
+ if (snat_static_mapping_match(sm, key0, &sm0, 0, 0, 0, 0, 0))
{
b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
next0= SNAT_IN2OUT_NEXT_DROP;
diff --git a/src/plugins/nat/nat.api b/src/plugins/nat/nat.api
index 8e37567a9e8..f1c95b2d65b 100644
--- a/src/plugins/nat/nat.api
+++ b/src/plugins/nat/nat.api
@@ -668,6 +668,8 @@ typeonly manual_endian define nat44_lb_addr_port {
local address of internal host
@param out2in_only - if 1 rule match only out2in direction
@param tag - opaque string tag
+ @param affinity - if 0 disabled, otherwise client IP affinity sticky time
+ in seconds
@param local_num - number of local network nodes
@param locals - local network nodes
*/
@@ -682,6 +684,7 @@ autoreply manual_endian define nat44_add_del_lb_static_mapping {
u8 self_twice_nat;
u8 out2in_only;
u8 tag[64];
+ u32 affinity;
u8 local_num;
vl_api_nat44_lb_addr_port_t locals[local_num];
};
@@ -707,6 +710,8 @@ define nat44_lb_static_mapping_dump {
local address of internal host
@param out2in_only - if 1 rule match only out2in direction
@param tag - opaque string tag
+ @param affinity - if 0 disabled, otherwise client IP affinity sticky time
+ in seconds
@param local_num - number of local network nodes
@param locals - local network nodes
*/
@@ -719,6 +724,7 @@ manual_endian define nat44_lb_static_mapping_details {
u8 self_twice_nat;
u8 out2in_only;
u8 tag[64];
+ u32 affinity;
u8 local_num;
vl_api_nat44_lb_addr_port_t locals[local_num];
};
diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c
index 364d5f5ce94..0ce1a60c976 100755
--- a/src/plugins/nat/nat.c
+++ b/src/plugins/nat/nat.c
@@ -28,6 +28,7 @@
#include <nat/dslite.h>
#include <nat/nat_reass.h>
#include <nat/nat_inlines.h>
+#include <nat/nat_affinity.h>
#include <vnet/fib/fib_table.h>
#include <vnet/fib/ip4_fib.h>
@@ -211,6 +212,9 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index)
/* session lookup tables */
if (is_ed_session (s))
{
+ if (is_affinity_sessions (s))
+ nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
+ s->in2out.protocol, s->out2in.port);
ed_key.l_addr = s->out2in.addr;
ed_key.r_addr = s->ext_host_addr;
ed_key.fib_index = s->out2in.fib_index;
@@ -230,7 +234,6 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index)
ed_kv.key[1] = ed_key.as_u64[1];
if (clib_bihash_add_del_16_8 (&tsm->out2in_ed, &ed_kv, 0))
nat_log_warn ("out2in_ed key del failed");
-
ed_key.l_addr = s->in2out.addr;
ed_key.fib_index = s->in2out.fib_index;
if (!snat_is_unk_proto_session (s))
@@ -1259,7 +1262,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
snat_protocol_t proto,
nat44_lb_addr_port_t *locals, u8 is_add,
twice_nat_type_t twice_nat, u8 out2in_only,
- u8 *tag)
+ u8 *tag, u32 affinity)
{
snat_main_t * sm = &snat_main;
snat_static_mapping_t *m;
@@ -1343,6 +1346,13 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
m->proto = proto;
m->twice_nat = twice_nat;
m->out2in_only = out2in_only;
+ m->affinity = affinity;
+
+ if (affinity)
+ m->affinity_per_service_list_head_index =
+ nat_affinity_get_per_service_list_head_index();
+ else
+ m->affinity_per_service_list_head_index = ~0;
m_key.addr = m->external_addr;
m_key.port = m->external_port;
@@ -1499,6 +1509,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
}
}
}
+ if (m->affinity)
+ nat_affinity_flush_service (m->affinity_per_service_list_head_index);
vec_free(m->locals);
vec_free(m->tag);
vec_free(m->workers);
@@ -2173,13 +2185,15 @@ int snat_static_mapping_match (snat_main_t * sm,
u8 by_external,
u8 *is_addr_only,
twice_nat_type_t *twice_nat,
- u8 *lb)
+ lb_nat_type_t *lb,
+ ip4_address_t * ext_host_addr)
{
clib_bihash_kv_8_8_t kv, value;
snat_static_mapping_t *m;
snat_session_key_t m_key;
clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local;
u32 rand, lo = 0, hi, mid;
+ u8 backend_index;
m_key.fib_index = match.fib_index;
if (by_external)
@@ -2210,6 +2224,19 @@ int snat_static_mapping_match (snat_main_t * sm,
{
if (vec_len (m->locals))
{
+ if (PREDICT_FALSE(lb != 0))
+ *lb = m->affinity ? AFFINITY_LB_NAT : LB_NAT;
+ if (m->affinity)
+ {
+ if (nat_affinity_find_and_lock (ext_host_addr[0], match.addr,
+ match.protocol, match.port, &backend_index))
+ goto get_local;
+
+ mapping->addr = m->locals[backend_index].addr;
+ mapping->port = clib_host_to_net_u16 (m->locals[backend_index].port);
+ mapping->fib_index = m->locals[backend_index].fib_index;
+ goto end;
+ }
get_local:
hi = vec_len (m->locals) - 1;
rand = 1 + (random_u32 (&sm->random_seed) % m->locals[hi].prefix);
@@ -2231,9 +2258,18 @@ get_local:
mapping->addr = m->locals[lo].addr;
mapping->port = clib_host_to_net_u16 (m->locals[lo].port);
mapping->fib_index = m->locals[lo].fib_index;
+ if (m->affinity)
+ {
+ if (nat_affinity_create_and_lock (ext_host_addr[0], match.addr,
+ match.protocol, match.port, lo, m->affinity,
+ m->affinity_per_service_list_head_index))
+ nat_log_info ("create affinity record failed");
+ }
}
else
{
+ if (PREDICT_FALSE(lb != 0))
+ *lb = NO_LB_NAT;
mapping->fib_index = m->fib_index;
mapping->addr = m->local_addr;
/* Address only mapping doesn't change port */
@@ -2251,15 +2287,13 @@ get_local:
mapping->fib_index = sm->outside_fib_index;
}
+end:
if (PREDICT_FALSE(is_addr_only != 0))
*is_addr_only = m->addr_only;
if (PREDICT_FALSE(twice_nat != 0))
*twice_nat = m->twice_nat;
- if (PREDICT_FALSE(lb != 0))
- *lb = vec_len (m->locals) > 0;
-
return 0;
}
@@ -2904,6 +2938,7 @@ snat_config (vlib_main_t * vm, unformat_input_t * input)
sm->out2in_node_index = nat44_ed_out2in_node.index;
sm->icmp_match_in2out_cb = icmp_match_in2out_ed;
sm->icmp_match_out2in_cb = icmp_match_out2in_ed;
+ nat_affinity_init (vm);
}
else
{
diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h
index 76f57542d16..660fb4c2965 100644
--- a/src/plugins/nat/nat.h
+++ b/src/plugins/nat/nat.h
@@ -142,6 +142,7 @@ typedef enum {
#define SNAT_SESSION_FLAG_TWICE_NAT 8
#define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT 16
#define SNAT_SESSION_FLAG_FWD_BYPASS 32
+#define SNAT_SESSION_FLAG_AFFINITY 64
#define NAT_INTERFACE_FLAG_IS_INSIDE 1
#define NAT_INTERFACE_FLAG_IS_OUTSIDE 2
@@ -241,6 +242,12 @@ typedef enum {
TWICE_NAT_SELF,
} twice_nat_type_t;
+typedef enum {
+ NO_LB_NAT,
+ LB_NAT,
+ AFFINITY_LB_NAT,
+} lb_nat_type_t;
+
typedef struct {
ip4_address_t local_addr;
ip4_address_t external_addr;
@@ -252,9 +259,11 @@ typedef struct {
u32 vrf_id;
u32 fib_index;
snat_protocol_t proto;
+ u32 affinity;
u32 *workers;
u8 *tag;
nat44_lb_addr_port_t *locals;
+ u32 affinity_per_service_list_head_index;
} snat_static_mapping_t;
typedef struct {
@@ -472,7 +481,8 @@ int snat_static_mapping_match (snat_main_t * sm,
u8 by_external,
u8 *is_addr_only,
twice_nat_type_t *twice_nat,
- u8 *lb);
+ lb_nat_type_t *lb,
+ ip4_address_t * ext_host_addr);
void snat_add_del_addr_to_fib (ip4_address_t * addr,
u8 p_len,
@@ -526,6 +536,12 @@ typedef struct {
*/
#define is_ed_session(s) (s->flags & SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT)
+/** \brief Check if NAT session has affinity record.
+ @param s NAT session
+ @return 1 if NAT session has affinity record
+*/
+#define is_affinity_sessions(s) (s->flags & SNAT_SESSION_FLAG_AFFINITY)
+
#define nat_interface_is_inside(i) i->flags & NAT_INTERFACE_FLAG_IS_INSIDE
#define nat_interface_is_outside(i) i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE
@@ -619,7 +635,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
snat_protocol_t proto,
nat44_lb_addr_port_t *locals, u8 is_add,
twice_nat_type_t twice_nat, u8 out2in_only,
- u8 *tag);
+ u8 *tag, u32 affinity);
int nat44_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
snat_protocol_t proto, u32 vrf_id, int is_in);
int nat44_del_ed_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
diff --git a/src/plugins/nat/nat44_cli.c b/src/plugins/nat/nat44_cli.c
index e51f6d68511..3847502ca7e 100644
--- a/src/plugins/nat/nat44_cli.c
+++ b/src/plugins/nat/nat44_cli.c
@@ -22,6 +22,7 @@
#include <nat/nat_det.h>
#include <nat/nat64.h>
#include <nat/nat_inlines.h>
+#include <nat/nat_affinity.h>
#include <vnet/fib/fib_table.h>
#define UNSUPPORTED_IN_DET_MODE_STR \
@@ -165,6 +166,7 @@ nat44_show_hash_commnad_fn (vlib_main_t * vm, unformat_input_t * input,
{
snat_main_t *sm = &snat_main;
snat_main_per_thread_data_t *tsm;
+ nat_affinity_main_t *nam = &nat_affinity_main;
int i;
int verbose = 0;
@@ -198,6 +200,9 @@ nat44_show_hash_commnad_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%U", format_bihash_8_8, &tsm->user_hash, verbose);
}
+ if (sm->endpoint_dependent)
+ vlib_cli_output (vm, "%U", format_bihash_16_8, &nam->affinity_hash,
+ verbose);
return 0;
}
@@ -741,7 +746,7 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
snat_main_t *sm = &snat_main;
clib_error_t *error = 0;
ip4_address_t l_addr, e_addr;
- u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0;
+ u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0, affinity = 0;
int is_add = 1;
int rv;
snat_protocol_t proto;
@@ -793,6 +798,8 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
out2in_only = 1;
else if (unformat (line_input, "del"))
is_add = 0;
+ else if (unformat (line_input, "affinity %u", &affinity))
+ ;
else
{
error = clib_error_return (0, "unknown input: '%U'",
@@ -814,7 +821,8 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
}
rv = nat44_add_del_lb_static_mapping (e_addr, (u16) e_port, proto, locals,
- is_add, twice_nat, out2in_only, 0);
+ is_add, twice_nat, out2in_only, 0,
+ affinity);
switch (rv)
{
@@ -1788,7 +1796,8 @@ VLIB_CLI_COMMAND (add_lb_static_mapping_command, static) = {
.short_help =
"nat44 add load-balancing static mapping protocol tcp|udp "
"external <addr>:<port> local <addr>:<port> [vrf <table-id>] "
- "probability <n> [twice-nat|self-twice-nat] [out2in-only] [del]",
+ "probability <n> [twice-nat|self-twice-nat] [out2in-only] "
+ "[affinity <timeout-seconds>] [del]",
};
/*?
diff --git a/src/plugins/nat/nat_affinity.c b/src/plugins/nat/nat_affinity.c
new file mode 100644
index 00000000000..28c25aecda5
--- /dev/null
+++ b/src/plugins/nat/nat_affinity.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT plugin client-IP based session affinity for load-balancing
+ */
+
+#include <nat/nat_affinity.h>
+#include <nat/nat.h>
+
+nat_affinity_main_t nat_affinity_main;
+
+#define AFFINITY_HASH_BUCKETS 65536
+#define AFFINITY_HASH_MEMORY (2 << 25)
+
+u8 *
+format_affinity_kvp (u8 * s, va_list * args)
+{
+ clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
+ nat_affinity_key_t k;
+
+ k.as_u64[0] = v->key[0];
+ k.as_u64[1] = v->key[1];
+
+ s = format (s, "client %U backend %U:%d proto %U index %llu",
+ format_ip4_address, &k.client_addr,
+ format_ip4_address, &k.service_addr,
+ clib_net_to_host_u16 (k.service_port),
+ format_snat_protocol, k.proto);
+
+ return s;
+}
+
+clib_error_t *
+nat_affinity_init (vlib_main_t * vm)
+{
+ nat_affinity_main_t *nam = &nat_affinity_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ clib_error_t *error = 0;
+
+ if (tm->n_vlib_mains > 1)
+ clib_spinlock_init (&nam->affinity_lock);
+
+ clib_bihash_init_16_8 (&nam->affinity_hash, "nat-affinity",
+ AFFINITY_HASH_BUCKETS, AFFINITY_HASH_MEMORY);
+ clib_bihash_set_kvp_format_fn_16_8 (&nam->affinity_hash,
+ format_affinity_kvp);
+
+ nam->vlib_main = vm;
+
+ return error;
+}
+
+static_always_inline void
+make_affinity_kv (clib_bihash_kv_16_8_t * kv, ip4_address_t client_addr,
+ ip4_address_t service_addr, u8 proto, u16 service_port)
+{
+ nat_affinity_key_t *key = (nat_affinity_key_t *) kv->key;
+
+ key->client_addr = client_addr;
+ key->service_addr = service_addr;
+ key->proto = proto;
+ key->service_port = service_port;
+
+ kv->value = ~0ULL;
+}
+
+u32
+nat_affinity_get_per_service_list_head_index (void)
+{
+ nat_affinity_main_t *nam = &nat_affinity_main;
+ dlist_elt_t *head_elt;
+
+ clib_spinlock_lock_if_init (&nam->affinity_lock);
+
+ pool_get (nam->list_pool, head_elt);
+ clib_dlist_init (nam->list_pool, head_elt - nam->list_pool);
+
+ clib_spinlock_unlock_if_init (&nam->affinity_lock);
+
+ return head_elt - nam->list_pool;
+}
+
+void
+nat_affinity_flush_service (u32 affinity_per_service_list_head_index)
+{
+ nat_affinity_main_t *nam = &nat_affinity_main;
+ u32 elt_index;
+ dlist_elt_t *elt;
+ nat_affinity_t *a;
+ clib_bihash_kv_16_8_t kv;
+
+ clib_spinlock_lock_if_init (&nam->affinity_lock);
+
+ while ((elt_index =
+ clib_dlist_remove_head (nam->list_pool,
+ affinity_per_service_list_head_index)) !=
+ ~0)
+ {
+ elt = pool_elt_at_index (nam->list_pool, elt_index);
+ a = pool_elt_at_index (nam->affinity_pool, elt->value);
+ kv.key[0] = a->key.as_u64[0];
+ kv.key[1] = a->key.as_u64[1];
+ pool_put_index (nam->affinity_pool, elt->value);
+ if (clib_bihash_add_del_16_8 (&nam->affinity_hash, &kv, 0))
+ nat_log_warn ("affinity key del failed");
+ pool_put_index (nam->list_pool, elt_index);
+ }
+ pool_put_index (nam->list_pool, affinity_per_service_list_head_index);
+
+ clib_spinlock_unlock_if_init (&nam->affinity_lock);
+}
+
+int
+nat_affinity_find_and_lock (ip4_address_t client_addr,
+ ip4_address_t service_addr, u8 proto,
+ u16 service_port, u8 * backend_index)
+{
+ nat_affinity_main_t *nam = &nat_affinity_main;
+ clib_bihash_kv_16_8_t kv, value;
+ nat_affinity_t *a;
+ int rv = 0;
+
+ make_affinity_kv (&kv, client_addr, service_addr, proto, service_port);
+ clib_spinlock_lock_if_init (&nam->affinity_lock);
+ if (clib_bihash_search_16_8 (&nam->affinity_hash, &kv, &value))
+ {
+ rv = 1;
+ goto unlock;
+ }
+
+ a = pool_elt_at_index (nam->affinity_pool, value.value);
+ /* if already expired delete */
+ if (a->ref_cnt == 0)
+ {
+ if (a->expire < vlib_time_now (nam->vlib_main))
+ {
+ clib_dlist_remove (nam->list_pool, a->per_service_index);
+ pool_put_index (nam->list_pool, a->per_service_index);
+ pool_put_index (nam->affinity_pool, value.value);
+ if (clib_bihash_add_del_16_8 (&nam->affinity_hash, &kv, 0))
+ nat_log_warn ("affinity key del failed");
+ rv = 1;
+ goto unlock;
+ }
+ }
+ a->ref_cnt++;
+ *backend_index = a->backend_index;
+
+unlock:
+ clib_spinlock_unlock_if_init (&nam->affinity_lock);
+ return rv;
+}
+
+static int
+affinity_is_expired_cb (clib_bihash_kv_16_8_t * kv, void *arg)
+{
+ nat_affinity_main_t *nam = &nat_affinity_main;
+ nat_affinity_t *a;
+
+ a = pool_elt_at_index (nam->affinity_pool, kv->value);
+ if (a->ref_cnt == 0)
+ {
+ if (a->expire < vlib_time_now (nam->vlib_main))
+ {
+ clib_dlist_remove (nam->list_pool, a->per_service_index);
+ pool_put_index (nam->list_pool, a->per_service_index);
+ pool_put_index (nam->affinity_pool, kv->value);
+ if (clib_bihash_add_del_16_8 (&nam->affinity_hash, kv, 0))
+ nat_log_warn ("affinity key del failed");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int
+nat_affinity_create_and_lock (ip4_address_t client_addr,
+ ip4_address_t service_addr, u8 proto,
+ u16 service_port, u8 backend_index,
+ u32 sticky_time,
+ u32 affinity_per_service_list_head_index)
+{
+ nat_affinity_main_t *nam = &nat_affinity_main;
+ clib_bihash_kv_16_8_t kv, value;
+ nat_affinity_t *a;
+ dlist_elt_t *list_elt;
+ int rv = 0;
+
+ make_affinity_kv (&kv, client_addr, service_addr, proto, service_port);
+ clib_spinlock_lock_if_init (&nam->affinity_lock);
+ if (!clib_bihash_search_16_8 (&nam->affinity_hash, &kv, &value))
+ {
+ rv = 1;
+ nat_log_notice ("affinity key already exist");
+ goto unlock;
+ }
+
+ pool_get (nam->affinity_pool, a);
+ kv.value = a - nam->affinity_pool;
+ rv =
+ clib_bihash_add_or_overwrite_stale_16_8 (&nam->affinity_hash, &kv,
+ affinity_is_expired_cb, NULL);
+ if (rv)
+ {
+ nat_log_notice ("affinity key add failed");
+ pool_put (nam->affinity_pool, a);
+ goto unlock;
+ }
+
+ pool_get (nam->list_pool, list_elt);
+ clib_dlist_init (nam->list_pool, list_elt - nam->list_pool);
+ list_elt->value = a - nam->affinity_pool;
+ a->per_service_index = list_elt - nam->list_pool;
+ a->backend_index = backend_index;
+ a->ref_cnt = 1;
+ a->sticky_time = sticky_time;
+ a->key.as_u64[0] = kv.key[0];
+ a->key.as_u64[1] = kv.key[1];
+ clib_dlist_addtail (nam->list_pool, affinity_per_service_list_head_index,
+ a->per_service_index);
+
+unlock:
+ clib_spinlock_unlock_if_init (&nam->affinity_lock);
+ return rv;
+}
+
+void
+nat_affinity_unlock (ip4_address_t client_addr, ip4_address_t service_addr,
+ u8 proto, u16 service_port)
+{
+ nat_affinity_main_t *nam = &nat_affinity_main;
+ clib_bihash_kv_16_8_t kv, value;
+ nat_affinity_t *a;
+
+ make_affinity_kv (&kv, client_addr, service_addr, proto, service_port);
+ clib_spinlock_lock_if_init (&nam->affinity_lock);
+ if (clib_bihash_search_16_8 (&nam->affinity_hash, &kv, &value))
+ goto unlock;
+
+ a = pool_elt_at_index (nam->affinity_pool, value.value);
+ a->ref_cnt--;
+ if (a->ref_cnt == 0)
+ a->expire = (u64) a->sticky_time + vlib_time_now (nam->vlib_main);
+
+unlock:
+ clib_spinlock_unlock_if_init (&nam->affinity_lock);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_affinity.h b/src/plugins/nat/nat_affinity.h
new file mode 100644
index 00000000000..358e682eb49
--- /dev/null
+++ b/src/plugins/nat/nat_affinity.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT plugin client-IP based session affinity for load-balancing
+ */
+
+#ifndef __included_nat_affinity_h__
+#define __included_nat_affinity_h__
+
+#include <vnet/ip/ip.h>
+#include <vppinfra/bihash_16_8.h>
+#include <vppinfra/dlist.h>
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ ip4_address_t service_addr;
+ ip4_address_t client_addr;
+ /* align by making this 4 octets even though its a 1 octet field */
+ u32 proto;
+ /* align by making this 4 octets even though its a 2 octets field */
+ u32 service_port;
+ };
+ u64 as_u64[2];
+ };
+} nat_affinity_key_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct
+{
+ nat_affinity_key_t key;
+ u32 sticky_time;
+ u32 ref_cnt;
+ u32 per_service_index;
+ u8 backend_index;
+ f64 expire;
+}) nat_affinity_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ nat_affinity_t *affinity_pool;
+ clib_bihash_16_8_t affinity_hash;
+ clib_spinlock_t affinity_lock;
+ dlist_elt_t *list_pool;
+ vlib_main_t *vlib_main;
+} nat_affinity_main_t;
+
+extern nat_affinity_main_t nat_affinity_main;
+
+/**
+ * @brief Get new affinity per service list head index.
+ *
+ * @returns new affinity per service list head index.
+ */
+u32 nat_affinity_get_per_service_list_head_index (void);
+
+/**
+ * @brief Flush all service affinity data.
+ *
+ * @param affinity_per_service_list_head_index Per sevice list head index.
+ */
+void nat_affinity_flush_service (u32 affinity_per_service_list_head_index);
+
+/**
+ * @brief Initialize NAT client-IP based affinity.
+ *
+ * @param vm vlib main.
+ *
+ * @return error code.
+ */
+clib_error_t *nat_affinity_init (vlib_main_t * vm);
+
+/**
+ * @brief Find service backend index for client-IP and take a reference
+ * counting lock.
+ *
+ * @param client_addr Client IP address.
+ * @param service_addr Service IP address.
+ * @param proto IP protocol number.
+ * @param service_port Service L4 port number.
+ * @param backend_index Service backend index for client-IP if found.
+ *
+ * @return 0 on success, non-zero value otherwise.
+ */
+int nat_affinity_find_and_lock (ip4_address_t client_addr,
+ ip4_address_t service_addr, u8 proto,
+ u16 service_port, u8 * backend_index);
+
+/**
+ * @brief Create affinity record and take reference counting lock.
+ * @param client_addr Client IP address.
+ * @param service_addr Service IP address.
+ * @param proto IP protocol number.
+ * @param service_port Service L4 port number.
+ * @param backend_index Service backend index for client-IP.
+ * @param sticky_time Affinity sticky time in seconds.
+ * @param affinity_per_service_list_head_index Per sevice list head index.
+ *
+ * @return 0 on success, non-zero value otherwise.
+ */
+int nat_affinity_create_and_lock (ip4_address_t client_addr,
+ ip4_address_t service_addr, u8 proto,
+ u16 service_port, u8 backend_index,
+ u32 sticky_time,
+ u32 affinity_per_service_list_head_index);
+/**
+ * @brief Release a reference counting lock for affinity.
+ *
+ * @param client_addr Client IP address.
+ * @param service_addr Service IP address.
+ * @param proto IP protocol number.
+ */
+void nat_affinity_unlock (ip4_address_t client_addr,
+ ip4_address_t service_addr, u8 proto,
+ u16 service_port);
+
+#endif /* __included_nat_affinity_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c
index 8055259616f..17009c93561 100644
--- a/src/plugins/nat/nat_api.c
+++ b/src/plugins/nat/nat_api.c
@@ -1463,7 +1463,8 @@ static void
nat44_add_del_lb_static_mapping (e_addr,
clib_net_to_host_u16 (mp->external_port),
proto, locals, mp->is_add, twice_nat,
- mp->out2in_only, tag);
+ mp->out2in_only, tag,
+ clib_net_to_host_u32 (mp->affinity));
vec_free (locals);
vec_free (tag);
diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c
index 46a8a1ed078..5029300dcdb 100755
--- a/src/plugins/nat/out2in.c
+++ b/src/plugins/nat/out2in.c
@@ -367,7 +367,7 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
{
/* Try to match static mapping by external address and port,
destination address and port in packet */
- if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0))
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0, 0))
{
if (!sm->forwarding_enabled)
{
@@ -475,7 +475,7 @@ u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
}
key0.fib_index = rx_fib_index0;
- if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0))
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, &is_addr_only, 0, 0, 0))
{
/* Don't NAT packet aimed at the intfc address */
if (is_interface_addr(sm, node, sw_if_index0, ip0->dst_address.as_u32))
@@ -821,7 +821,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
{
/* Try to match static mapping by external address and port,
destination address and port in packet */
- if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
{
/*
* Send DHCP packets to the ipv4 stack, or we won't
@@ -972,7 +972,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
{
/* Try to match static mapping by external address and port,
destination address and port in packet */
- if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0, 0))
+ if (snat_static_mapping_match(sm, key1, &sm1, 1, 0, 0, 0, 0))
{
/*
* Send DHCP packets to the ipv4 stack, or we won't
@@ -1159,7 +1159,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
{
/* Try to match static mapping by external address and port,
destination address and port in packet */
- if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
{
/*
* Send DHCP packets to the ipv4 stack, or we won't
@@ -1384,7 +1384,7 @@ nat44_out2in_reass_node_fn (vlib_main_t * vm,
{
/* Try to match static mapping by external address and port,
destination address and port in packet */
- if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
{
/*
* Send DHCP packets to the ipv4 stack, or we won't
@@ -1719,7 +1719,7 @@ create_session_for_static_mapping_ed (snat_main_t * sm,
vlib_node_runtime_t * node,
u32 thread_index,
twice_nat_type_t twice_nat,
- u8 is_lb,
+ lb_nat_type_t lb_nat,
f64 now)
{
snat_session_t *s;
@@ -1760,8 +1760,10 @@ create_session_for_static_mapping_ed (snat_main_t * sm,
s->ext_host_addr.as_u32 = ip->src_address.as_u32;
s->ext_host_port = e_key.protocol == SNAT_PROTOCOL_ICMP ? 0 : udp->src_port;
s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
- if (is_lb)
+ if (lb_nat)
s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
+ if (lb_nat == AFFINITY_LB_NAT)
+ s->flags |= SNAT_SESSION_FLAG_AFFINITY;
s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
s->outside_address_index = ~0;
s->out2in = e_key;
@@ -2005,7 +2007,7 @@ icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node,
e_key.port = key.l_port;
e_key.protocol = ip_proto_to_snat_proto (key.proto);
e_key.fib_index = rx_fib_index;
- if (snat_static_mapping_match(sm, e_key, &l_key, 1, &is_addr_only, 0, 0))
+ if (snat_static_mapping_match(sm, e_key, &l_key, 1, &is_addr_only, 0, 0, 0))
{
if (!sm->forwarding_enabled)
{
@@ -2221,7 +2223,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
clib_bihash_kv_16_8_t kv0, value0, kv1, value1;
ip_csum_t sum0, sum1;
snat_session_key_t e_key0, l_key0, e_key1, l_key1;
- u8 is_lb0, is_lb1;
+ lb_nat_type_t lb_nat0, lb_nat1;
twice_nat_type_t twice_nat0, twice_nat1;
/* Prefetch next iteration. */
@@ -2324,7 +2326,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
e_key0.protocol = proto0;
e_key0.fib_index = rx_fib_index0;
if (snat_static_mapping_match(sm, e_key0, &l_key0, 1, 0,
- &twice_nat0, &is_lb0))
+ &twice_nat0, &lb_nat0, &ip0->src_address))
{
/*
* Send DHCP packets to the ipv4 stack, or we won't
@@ -2362,7 +2364,8 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
s0 = create_session_for_static_mapping_ed(sm, b0, l_key0,
e_key0, node,
thread_index,
- twice_nat0, is_lb0,
+ twice_nat0,
+ lb_nat0,
now);
if (!s0)
@@ -2526,7 +2529,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
e_key1.protocol = proto1;
e_key1.fib_index = rx_fib_index1;
if (snat_static_mapping_match(sm, e_key1, &l_key1, 1, 0,
- &twice_nat1, &is_lb1))
+ &twice_nat1, &lb_nat1, &ip1->src_address))
{
/*
* Send DHCP packets to the ipv4 stack, or we won't
@@ -2564,7 +2567,8 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
s1 = create_session_for_static_mapping_ed(sm, b1, l_key1,
e_key1, node,
thread_index,
- twice_nat1, is_lb1,
+ twice_nat1,
+ lb_nat1,
now);
if (!s1)
@@ -2673,7 +2677,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
clib_bihash_kv_16_8_t kv0, value0;
ip_csum_t sum0;
snat_session_key_t e_key0, l_key0;
- u8 is_lb0;
+ lb_nat_type_t lb_nat0;
twice_nat_type_t twice_nat0;
/* speculatively enqueue b0 to the current next frame */
@@ -2760,7 +2764,7 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
e_key0.protocol = proto0;
e_key0.fib_index = rx_fib_index0;
if (snat_static_mapping_match(sm, e_key0, &l_key0, 1, 0,
- &twice_nat0, &is_lb0))
+ &twice_nat0, &lb_nat0, &ip0->src_address))
{
/*
* Send DHCP packets to the ipv4 stack, or we won't
@@ -2798,7 +2802,8 @@ nat44_ed_out2in_node_fn_inline (vlib_main_t * vm,
s0 = create_session_for_static_mapping_ed(sm, b0, l_key0,
e_key0, node,
thread_index,
- twice_nat0, is_lb0,
+ twice_nat0,
+ lb_nat0,
now);
if (!s0)
@@ -3874,7 +3879,7 @@ snat_out2in_fast_node_fn (vlib_main_t * vm,
key0.port = udp0->dst_port;
key0.fib_index = rx_fib_index0;
- if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0))
+ if (snat_static_mapping_match(sm, key0, &sm0, 1, 0, 0, 0, 0))
{
b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
goto trace00;