summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Yourtchenko <ayourtch@gmail.com>2018-05-24 16:53:27 +0200
committerDamjan Marion <dmarion.lists@gmail.com>2018-05-26 16:56:02 +0000
commita34c08c8c5a505e55178a9a8ef5391224d5460a5 (patch)
tree961461e2a4261dcea81b21e2eddfb026c3d01b8e
parentc6f186b23d00685b3e9f132ba79a5cb44f0a44c0 (diff)
acl-plugin: create forward and return sessions in lieu of making a special per-packet session key
Using a separate session key has proven to be tricky for the following reasons: - it's a lot of storage to have what looks to be nearly identical to 5tuple, just maybe with some fields swapped - shuffling the fields from 5tuple adds to memory pressure - the fact that the fields do not coincide with the packet memory means for any staged processing we need to use up a lot of memory Thus, just add two entries into the bihash table pointing to the same session entry, so we could match the packets from either direction. With this we have the key layout of L3 info (which takes up the majority of space for IPv6 case) the same as in the packet, thus, opening up the possibility for other optimizations. Not having to create and store a separate session key should also give us a small performance win in itself. Also, add the routine to show the session bihash in a better way than a bunch of numbers. Alas, the memory usage in the bihash obviously doubles. Change-Id: I8fd2ed4714ad7fc447c4fa224d209bc0b736b371 Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
-rw-r--r--src/plugins/acl/dataplane_node.c45
-rw-r--r--src/plugins/acl/fa_node.h12
-rw-r--r--src/plugins/acl/public_inlines.h24
-rw-r--r--src/plugins/acl/sess_mgmt_node.c22
-rw-r--r--src/plugins/acl/session_inlines.h174
5 files changed, 156 insertions, 121 deletions
diff --git a/src/plugins/acl/dataplane_node.c b/src/plugins/acl/dataplane_node.c
index 98e966189d8..58db6f7c8a6 100644
--- a/src/plugins/acl/dataplane_node.c
+++ b/src/plugins/acl/dataplane_node.c
@@ -82,7 +82,7 @@ acl_fa_node_fn (vlib_main_t * vm,
u32 pkts_restart_session_timer = 0;
u32 trace_bitmap = 0;
acl_main_t *am = &acl_main;
- fa_5tuple_t fa_5tuple, kv_sess;
+ fa_5tuple_t fa_5tuple;
clib_bihash_kv_40_8_t value_sess;
vlib_node_runtime_t *error_node;
u64 now = clib_cpu_time_now ();
@@ -114,7 +114,6 @@ acl_fa_node_fn (vlib_main_t * vm,
u32 match_acl_pos = ~0;
u32 match_rule_index = ~0;
u8 error0 = 0;
- u32 valid_new_sess;
/* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
@@ -144,26 +143,16 @@ acl_fa_node_fn (vlib_main_t * vm,
sw_if_index0)
: (is_input * FA_POLICY_EPOCH_IS_INPUT);
/*
- * Extract the L3/L4 matching info into a 5-tuple structure,
- * then create a session key whose layout is independent on forward or reverse
- * direction of the packet.
+ * Extract the L3/L4 matching info into a 5-tuple structure.
*/
acl_plugin_fill_5tuple_inline (lc_index0, b0, is_ip6, is_input,
is_l2_path,
(fa_5tuple_opaque_t *) & fa_5tuple);
fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff;
- valid_new_sess =
- acl_make_5tuple_session_key (am, is_input, is_ip6, sw_if_index0,
- &fa_5tuple, &kv_sess);
- // XXDEL fa_5tuple.pkt.is_input = is_input;
fa_5tuple.pkt.mask_type_index_lsb = ~0;
#ifdef FA_NODE_VERBOSE_DEBUG
clib_warning
- ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
- kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2],
- kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value);
- clib_warning
("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx",
fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2],
fa_5tuple.kv.key[3], fa_5tuple.kv.key[4], fa_5tuple.kv.value);
@@ -174,7 +163,7 @@ acl_fa_node_fn (vlib_main_t * vm,
if (acl_fa_ifc_has_sessions (am, sw_if_index0))
{
if (acl_fa_find_session
- (am, sw_if_index0, &kv_sess, &value_sess))
+ (am, sw_if_index0, &fa_5tuple, &value_sess))
{
trace_bitmap |= 0x80000000;
error0 = ACL_FA_ERROR_ACL_EXIST_SESSION;
@@ -272,26 +261,14 @@ acl_fa_node_fn (vlib_main_t * vm,
if (acl_fa_can_add_session (am, is_input, sw_if_index0))
{
- if (PREDICT_TRUE (valid_new_sess))
- {
- fa_session_t *sess =
- acl_fa_add_session (am, is_input,
- sw_if_index0,
- now, &kv_sess,
- current_policy_epoch);
- acl_fa_track_session (am, is_input, sw_if_index0,
- now, sess, &fa_5tuple);
- pkts_new_session += 1;
- }
- else
- {
- /*
- * ICMP packets with non-icmp_valid_new type will be
- * forwared without being dropped.
- */
- action = 1;
- pkts_acl_permit += 1;
- }
+ fa_session_t *sess =
+ acl_fa_add_session (am, is_input, is_ip6,
+ sw_if_index0,
+ now, &fa_5tuple,
+ current_policy_epoch);
+ acl_fa_track_session (am, is_input, sw_if_index0,
+ now, sess, &fa_5tuple);
+ pkts_new_session += 1;
}
else
{
diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h
index 263cf1431e6..8d79e42e738 100644
--- a/src/plugins/acl/fa_node.h
+++ b/src/plugins/acl/fa_node.h
@@ -39,8 +39,16 @@ typedef union {
u64 as_u64;
struct {
u16 port[2];
- u16 proto;
- u16 lsb_of_sw_if_index;
+ union {
+ struct {
+ u8 proto;
+ u8 is_input: 1;
+ u8 is_slowpath: 1;
+ u8 reserved0: 6;
+ u16 lsb_of_sw_if_index;
+ };
+ u32 non_port_l4_data;
+ };
};
} fa_session_l4_key_t;
diff --git a/src/plugins/acl/public_inlines.h b/src/plugins/acl/public_inlines.h
index a2b8fc96d3c..3e6c95ad6d9 100644
--- a/src/plugins/acl/public_inlines.h
+++ b/src/plugins/acl/public_inlines.h
@@ -192,7 +192,7 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
int l3_offset;
int l4_offset;
u16 ports[2];
- u16 proto;
+ u8 proto;
if (is_l2_path)
{
@@ -307,6 +307,8 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
}
p5tuple_pkt->l4.proto = proto;
+ p5tuple_pkt->l4.is_input = is_input;
+
if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
{
p5tuple_pkt->pkt.l4_valid = 1;
@@ -322,6 +324,7 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
*(u8 *) get_ptr_to_offset (b0,
l4_offset + offsetof (icmp46_header_t,
code));
+ p5tuple_pkt->l4.is_slowpath = 1;
}
else if ((IP_PROTOCOL_TCP == proto) || (IP_PROTOCOL_UDP == proto))
{
@@ -338,21 +341,12 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
l4_offset + offsetof (tcp_header_t,
flags));
p5tuple_pkt->pkt.tcp_flags_valid = (proto == IP_PROTOCOL_TCP);
+ p5tuple_pkt->l4.is_slowpath = 0;
}
- /*
- * FIXME: rather than the above conditional, here could
- * be a nice generic mechanism to extract two L4 values:
- *
- * have a per-protocol array of 4 elements like this:
- * u8 offset; to take the byte from, off L4 header
- * u8 mask; to mask it with, before storing
- *
- * this way we can describe UDP, TCP and ICMP[46] semantics,
- * and add a sort of FPM-type behavior for other protocols.
- *
- * Of course, is it faster ? and is it needed ?
- *
- */
+ else
+ {
+ p5tuple_pkt->l4.is_slowpath = 1;
+ }
}
}
diff --git a/src/plugins/acl/sess_mgmt_node.c b/src/plugins/acl/sess_mgmt_node.c
index b4faf554612..103db35f7dc 100644
--- a/src/plugins/acl/sess_mgmt_node.c
+++ b/src/plugins/acl/sess_mgmt_node.c
@@ -48,6 +48,26 @@ fa_session_get_shortest_timeout (acl_main_t * am)
return timeout;
}
+static u8 *
+format_session_bihash_5tuple (u8 * s, va_list * args)
+{
+ fa_5tuple_t *p5t = va_arg (*args, fa_5tuple_t *);
+ fa_full_session_id_t *sess = (void *) &p5t->pkt;
+
+ return format (s, "l3 %U -> %U"
+ " l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d l4_reserved0 %d port %d -> %d | sess id %d thread id %d epoch %04x",
+ format_ip46_address, &p5t->addr[0],
+ IP46_TYPE_ANY,
+ format_ip46_address, &p5t->addr[1],
+ IP46_TYPE_ANY,
+ p5t->l4.lsb_of_sw_if_index,
+ p5t->l4.proto, p5t->l4.is_input, p5t->l4.is_slowpath,
+ p5t->l4.reserved0, p5t->l4.port[0], p5t->l4.port[1],
+ sess->session_index, sess->thread_index,
+ sess->intf_policy_epoch);
+}
+
+
static void
acl_fa_verify_init_sessions (acl_main_t * am)
{
@@ -73,6 +93,8 @@ acl_fa_verify_init_sessions (acl_main_t * am)
"ACL plugin FA session bihash",
am->fa_conn_table_hash_num_buckets,
am->fa_conn_table_hash_memory_size);
+ clib_bihash_set_kvp_format_fn_40_8 (&am->fa_sessions_hash,
+ format_session_bihash_5tuple);
am->fa_sessions_hash_is_initialized = 1;
}
}
diff --git a/src/plugins/acl/session_inlines.h b/src/plugins/acl/session_inlines.h
index e75582b647b..d43e550bef9 100644
--- a/src/plugins/acl/session_inlines.h
+++ b/src/plugins/acl/session_inlines.h
@@ -67,72 +67,6 @@ acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0)
return it_has;
}
-/* Session keys match the packets received, and mirror the packets sent */
-always_inline u32
-acl_make_5tuple_session_key (acl_main_t * am, int is_input, int is_ip6,
- u32 sw_if_index, fa_5tuple_t * p5tuple_pkt,
- fa_5tuple_t * p5tuple_sess)
-{
- int src_index = is_input ? 0 : 1;
- int dst_index = is_input ? 1 : 0;
- u32 valid_new_sess = 1;
- p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0];
- p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1];
- p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64;
-
- if (PREDICT_TRUE (p5tuple_pkt->l4.proto != icmp_protos[is_ip6]))
- {
- p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0];
- p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1];
- }
- else
- {
- static const u8 *icmp_invmap[] = { icmp4_invmap, icmp6_invmap };
- static const u8 *icmp_valid_new[] =
- { icmp4_valid_new, icmp6_valid_new };
- static const u8 icmp_invmap_size[] = { sizeof (icmp4_invmap),
- sizeof (icmp6_invmap)
- };
- static const u8 icmp_valid_new_size[] = { sizeof (icmp4_valid_new),
- sizeof (icmp6_valid_new)
- };
- int type =
- is_ip6 ? p5tuple_pkt->l4.port[0] - 128 : p5tuple_pkt->l4.port[0];
-
- p5tuple_sess->l4.port[0] = p5tuple_pkt->l4.port[0];
- p5tuple_sess->l4.port[1] = p5tuple_pkt->l4.port[1];
-
- /*
- * Invert ICMP type for valid icmp_invmap messages:
- * 1) input node with outbound ACL interface
- * 2) output node with inbound ACL interface
- *
- */
- if ((is_input && acl_fa_ifc_has_out_acl (am, sw_if_index)) ||
- (!is_input && acl_fa_ifc_has_in_acl (am, sw_if_index)))
- {
- if (type >= 0 &&
- type <= icmp_invmap_size[is_ip6] && icmp_invmap[is_ip6][type])
- {
- p5tuple_sess->l4.port[0] = icmp_invmap[is_ip6][type] - 1;
- }
- }
-
- /*
- * ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table
- * are allowed to create stateful ACL.
- * The other messages will be forwarded without creating a reflexive ACL.
- */
- if (type < 0 ||
- type > icmp_valid_new_size[is_ip6] || !icmp_valid_new[is_ip6][type])
- {
- valid_new_sess = 0;
- }
- }
-
- return valid_new_sess;
-}
-
always_inline int
fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess)
{
@@ -316,6 +250,100 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
return 3;
}
+always_inline u64
+reverse_l4_u64_fastpath (u64 l4, int is_ip6)
+{
+ fa_session_l4_key_t l4i = {.as_u64 = l4 };
+ fa_session_l4_key_t l4o;
+
+ l4o.port[1] = l4i.port[0];
+ l4o.port[0] = l4i.port[1];
+
+ l4o.non_port_l4_data = l4i.non_port_l4_data;
+ l4o.is_input = 1 - l4i.is_input;
+ return l4o.as_u64;
+}
+
+always_inline u64
+reverse_l4_u64_slowpath (u64 l4, int is_ip6)
+{
+ fa_session_l4_key_t l4i = {.as_u64 = l4 };
+ fa_session_l4_key_t l4o;
+
+ if (l4i.proto == icmp_protos[is_ip6])
+ {
+ static const u8 *icmp_invmap[] = { icmp4_invmap, icmp6_invmap };
+ static const u8 *icmp_valid_new[] =
+ { icmp4_valid_new, icmp6_valid_new };
+ static const u8 icmp_invmap_size[] = { sizeof (icmp4_invmap),
+ sizeof (icmp6_invmap)
+ };
+ static const u8 icmp_valid_new_size[] = { sizeof (icmp4_valid_new),
+ sizeof (icmp6_valid_new)
+ };
+ int type = is_ip6 ? l4i.port[0] - 128 : l4i.port[0];
+
+ l4o.non_port_l4_data = l4i.non_port_l4_data;
+ l4o.port[0] = l4i.port[0];
+ l4o.port[1] = l4i.port[1];
+
+
+ /*
+ * ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table
+ * are allowed to create stateful ACL.
+ * The other messages will be forwarded without creating a reverse session.
+ */
+
+ if (type >= 0 && (type <= icmp_valid_new_size[is_ip6])
+ && (icmp_valid_new[is_ip6][type])
+ && (type <= icmp_invmap_size[is_ip6]) && icmp_invmap[is_ip6][type])
+ {
+ /*
+ * we set the inverse direction and correct the port,
+ * if it is okay to add the reverse session.
+ * If not, then the same session will be added twice
+ * to bihash, which is the same as adding just one session.
+ */
+ l4o.is_input = 1 - l4i.is_input;
+ l4o.port[0] = icmp_invmap[is_ip6][type] - 1;
+ }
+
+ return l4o.as_u64;
+ }
+ else
+ return reverse_l4_u64_fastpath (l4, is_ip6);
+}
+
+always_inline u64
+reverse_l4_u64 (u64 l4, int is_ip6)
+{
+ fa_session_l4_key_t l4i = {.as_u64 = l4 };
+
+ if (PREDICT_FALSE (l4i.is_slowpath))
+ {
+ return reverse_l4_u64_slowpath (l4, is_ip6);
+ }
+ else
+ {
+ return reverse_l4_u64_fastpath (l4, is_ip6);
+ }
+}
+
+always_inline void
+reverse_session_add_del (acl_main_t * am, const int is_ip6,
+ clib_bihash_kv_40_8_t * pkv, int is_add)
+{
+ clib_bihash_kv_40_8_t kv2;
+ /* the first 4xu64 is two addresses, so just swap them */
+ kv2.key[0] = pkv->key[2];
+ kv2.key[1] = pkv->key[3];
+ kv2.key[2] = pkv->key[0];
+ kv2.key[3] = pkv->key[1];
+ /* the last u64 needs special treatment (ports, etc.) */
+ kv2.key[4] = reverse_l4_u64 (pkv->key[4], is_ip6);
+ kv2.value = pkv->value;
+ clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv2, is_add);
+}
always_inline void
acl_fa_delete_session (acl_main_t * am, u32 sw_if_index,
@@ -326,6 +354,9 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index,
get_session_ptr (am, sess_id.thread_index, sess_id.session_index);
ASSERT (sess->thread_index == os_get_thread_index ());
clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &sess->info.kv, 0);
+
+ reverse_session_add_del (am, sess->info.pkt.is_ip6, &sess->info.kv, 0);
+
acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index];
pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
/* Deleting from timer structures not needed,
@@ -362,9 +393,11 @@ acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index,
}
}
+
always_inline fa_session_t *
-acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
- fa_5tuple_t * p5tuple, u16 current_policy_epoch)
+acl_fa_add_session (acl_main_t * am, int is_input, int is_ip6,
+ u32 sw_if_index, u64 now, fa_5tuple_t * p5tuple,
+ u16 current_policy_epoch)
{
clib_bihash_kv_40_8_t *pkv = &p5tuple->kv;
clib_bihash_kv_40_8_t kv;
@@ -396,10 +429,11 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
sess->link_prev_idx = ~0;
sess->link_next_idx = ~0;
-
-
ASSERT (am->fa_sessions_hash_is_initialized == 1);
clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv, 1);
+
+ reverse_session_add_del (am, is_ip6, &kv, 1);
+
acl_fa_conn_list_add_session (am, f_sess_id, now);
vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index);