diff options
author | Andrew Yourtchenko <ayourtch@gmail.com> | 2018-05-24 16:53:27 +0200 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2018-05-26 16:56:02 +0000 |
commit | a34c08c8c5a505e55178a9a8ef5391224d5460a5 (patch) | |
tree | 961461e2a4261dcea81b21e2eddfb026c3d01b8e | |
parent | c6f186b23d00685b3e9f132ba79a5cb44f0a44c0 (diff) |
acl-plugin: create forward and return sessions in lieu of making a special per-packet session key
Using a separate session key has proven to be tricky for the following reasons:
- it's a lot of storage to have what looks to be nearly identical to 5tuple,
just maybe with some fields swapped
- shuffling the fields from 5tuple adds to memory pressure
- the fact that the fields do not coincide with the packet memory
means for any staged processing we need to use up a lot of memory
Thus, just add two entries into the bihash table pointing to
the same session entry, so we could match the packets from either
direction.
With this we have the key layout of L3 info (which takes up
the majority of space for IPv6 case) the same as in the packet,
thus, opening up the possibility for other optimizations.
Not having to create and store a separate session key
should also give us a small performance win in itself.
Also, add the routine to show the session bihash in a better
way than a bunch of numbers.
Alas, the memory usage in the bihash obviously doubles.
Change-Id: I8fd2ed4714ad7fc447c4fa224d209bc0b736b371
Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
-rw-r--r-- | src/plugins/acl/dataplane_node.c | 45 | ||||
-rw-r--r-- | src/plugins/acl/fa_node.h | 12 | ||||
-rw-r--r-- | src/plugins/acl/public_inlines.h | 24 | ||||
-rw-r--r-- | src/plugins/acl/sess_mgmt_node.c | 22 | ||||
-rw-r--r-- | src/plugins/acl/session_inlines.h | 174 |
5 files changed, 156 insertions, 121 deletions
diff --git a/src/plugins/acl/dataplane_node.c b/src/plugins/acl/dataplane_node.c index 98e966189d8..58db6f7c8a6 100644 --- a/src/plugins/acl/dataplane_node.c +++ b/src/plugins/acl/dataplane_node.c @@ -82,7 +82,7 @@ acl_fa_node_fn (vlib_main_t * vm, u32 pkts_restart_session_timer = 0; u32 trace_bitmap = 0; acl_main_t *am = &acl_main; - fa_5tuple_t fa_5tuple, kv_sess; + fa_5tuple_t fa_5tuple; clib_bihash_kv_40_8_t value_sess; vlib_node_runtime_t *error_node; u64 now = clib_cpu_time_now (); @@ -114,7 +114,6 @@ acl_fa_node_fn (vlib_main_t * vm, u32 match_acl_pos = ~0; u32 match_rule_index = ~0; u8 error0 = 0; - u32 valid_new_sess; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -144,26 +143,16 @@ acl_fa_node_fn (vlib_main_t * vm, sw_if_index0) : (is_input * FA_POLICY_EPOCH_IS_INPUT); /* - * Extract the L3/L4 matching info into a 5-tuple structure, - * then create a session key whose layout is independent on forward or reverse - * direction of the packet. + * Extract the L3/L4 matching info into a 5-tuple structure. */ acl_plugin_fill_5tuple_inline (lc_index0, b0, is_ip6, is_input, is_l2_path, (fa_5tuple_opaque_t *) & fa_5tuple); fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff; - valid_new_sess = - acl_make_5tuple_session_key (am, is_input, is_ip6, sw_if_index0, - &fa_5tuple, &kv_sess); - // XXDEL fa_5tuple.pkt.is_input = is_input; fa_5tuple.pkt.mask_type_index_lsb = ~0; #ifdef FA_NODE_VERBOSE_DEBUG clib_warning - ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx", - kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2], - kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value); - clib_warning ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx %016llx", fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2], fa_5tuple.kv.key[3], fa_5tuple.kv.key[4], fa_5tuple.kv.value); @@ -174,7 +163,7 @@ acl_fa_node_fn (vlib_main_t * vm, if (acl_fa_ifc_has_sessions (am, sw_if_index0)) { if (acl_fa_find_session - (am, sw_if_index0, &kv_sess, &value_sess)) + (am, sw_if_index0, &fa_5tuple, &value_sess)) { trace_bitmap |= 0x80000000; error0 = ACL_FA_ERROR_ACL_EXIST_SESSION; @@ -272,26 +261,14 @@ acl_fa_node_fn (vlib_main_t * vm, if (acl_fa_can_add_session (am, is_input, sw_if_index0)) { - if (PREDICT_TRUE (valid_new_sess)) - { - fa_session_t *sess = - acl_fa_add_session (am, is_input, - sw_if_index0, - now, &kv_sess, - current_policy_epoch); - acl_fa_track_session (am, is_input, sw_if_index0, - now, sess, &fa_5tuple); - pkts_new_session += 1; - } - else - { - /* - * ICMP packets with non-icmp_valid_new type will be - * forwared without being dropped. - */ - action = 1; - pkts_acl_permit += 1; - } + fa_session_t *sess = + acl_fa_add_session (am, is_input, is_ip6, + sw_if_index0, + now, &fa_5tuple, + current_policy_epoch); + acl_fa_track_session (am, is_input, sw_if_index0, + now, sess, &fa_5tuple); + pkts_new_session += 1; } else { diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index 263cf1431e6..8d79e42e738 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -39,8 +39,16 @@ typedef union { u64 as_u64; struct { u16 port[2]; - u16 proto; - u16 lsb_of_sw_if_index; + union { + struct { + u8 proto; + u8 is_input: 1; + u8 is_slowpath: 1; + u8 reserved0: 6; + u16 lsb_of_sw_if_index; + }; + u32 non_port_l4_data; + }; }; } fa_session_l4_key_t; diff --git a/src/plugins/acl/public_inlines.h b/src/plugins/acl/public_inlines.h index a2b8fc96d3c..3e6c95ad6d9 100644 --- a/src/plugins/acl/public_inlines.h +++ b/src/plugins/acl/public_inlines.h @@ -192,7 +192,7 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, int l3_offset; int l4_offset; u16 ports[2]; - u16 proto; + u8 proto; if (is_l2_path) { @@ -307,6 +307,8 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, } p5tuple_pkt->l4.proto = proto; + p5tuple_pkt->l4.is_input = is_input; + if (PREDICT_TRUE (offset_within_packet (b0, l4_offset))) { p5tuple_pkt->pkt.l4_valid = 1; @@ -322,6 +324,7 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, *(u8 *) get_ptr_to_offset (b0, l4_offset + offsetof (icmp46_header_t, code)); + p5tuple_pkt->l4.is_slowpath = 1; } else if ((IP_PROTOCOL_TCP == proto) || (IP_PROTOCOL_UDP == proto)) { @@ -338,21 +341,12 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, l4_offset + offsetof (tcp_header_t, flags)); p5tuple_pkt->pkt.tcp_flags_valid = (proto == IP_PROTOCOL_TCP); + p5tuple_pkt->l4.is_slowpath = 0; } - /* - * FIXME: rather than the above conditional, here could - * be a nice generic mechanism to extract two L4 values: - * - * have a per-protocol array of 4 elements like this: - * u8 offset; to take the byte from, off L4 header - * u8 mask; to mask it with, before storing - * - * this way we can describe UDP, TCP and ICMP[46] semantics, - * and add a sort of FPM-type behavior for other protocols. - * - * Of course, is it faster ? and is it needed ? - * - */ + else + { + p5tuple_pkt->l4.is_slowpath = 1; + } } } diff --git a/src/plugins/acl/sess_mgmt_node.c b/src/plugins/acl/sess_mgmt_node.c index b4faf554612..103db35f7dc 100644 --- a/src/plugins/acl/sess_mgmt_node.c +++ b/src/plugins/acl/sess_mgmt_node.c @@ -48,6 +48,26 @@ fa_session_get_shortest_timeout (acl_main_t * am) return timeout; } +static u8 * +format_session_bihash_5tuple (u8 * s, va_list * args) +{ + fa_5tuple_t *p5t = va_arg (*args, fa_5tuple_t *); + fa_full_session_id_t *sess = (void *) &p5t->pkt; + + return format (s, "l3 %U -> %U" + " l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d l4_reserved0 %d port %d -> %d | sess id %d thread id %d epoch %04x", + format_ip46_address, &p5t->addr[0], + IP46_TYPE_ANY, + format_ip46_address, &p5t->addr[1], + IP46_TYPE_ANY, + p5t->l4.lsb_of_sw_if_index, + p5t->l4.proto, p5t->l4.is_input, p5t->l4.is_slowpath, + p5t->l4.reserved0, p5t->l4.port[0], p5t->l4.port[1], + sess->session_index, sess->thread_index, + sess->intf_policy_epoch); +} + + static void acl_fa_verify_init_sessions (acl_main_t * am) { @@ -73,6 +93,8 @@ acl_fa_verify_init_sessions (acl_main_t * am) "ACL plugin FA session bihash", am->fa_conn_table_hash_num_buckets, am->fa_conn_table_hash_memory_size); + clib_bihash_set_kvp_format_fn_40_8 (&am->fa_sessions_hash, + format_session_bihash_5tuple); am->fa_sessions_hash_is_initialized = 1; } } diff --git a/src/plugins/acl/session_inlines.h b/src/plugins/acl/session_inlines.h index e75582b647b..d43e550bef9 100644 --- a/src/plugins/acl/session_inlines.h +++ b/src/plugins/acl/session_inlines.h @@ -67,72 +67,6 @@ acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0) return it_has; } -/* Session keys match the packets received, and mirror the packets sent */ -always_inline u32 -acl_make_5tuple_session_key (acl_main_t * am, int is_input, int is_ip6, - u32 sw_if_index, fa_5tuple_t * p5tuple_pkt, - fa_5tuple_t * p5tuple_sess) -{ - int src_index = is_input ? 0 : 1; - int dst_index = is_input ? 1 : 0; - u32 valid_new_sess = 1; - p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0]; - p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1]; - p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64; - - if (PREDICT_TRUE (p5tuple_pkt->l4.proto != icmp_protos[is_ip6])) - { - p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0]; - p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1]; - } - else - { - static const u8 *icmp_invmap[] = { icmp4_invmap, icmp6_invmap }; - static const u8 *icmp_valid_new[] = - { icmp4_valid_new, icmp6_valid_new }; - static const u8 icmp_invmap_size[] = { sizeof (icmp4_invmap), - sizeof (icmp6_invmap) - }; - static const u8 icmp_valid_new_size[] = { sizeof (icmp4_valid_new), - sizeof (icmp6_valid_new) - }; - int type = - is_ip6 ? p5tuple_pkt->l4.port[0] - 128 : p5tuple_pkt->l4.port[0]; - - p5tuple_sess->l4.port[0] = p5tuple_pkt->l4.port[0]; - p5tuple_sess->l4.port[1] = p5tuple_pkt->l4.port[1]; - - /* - * Invert ICMP type for valid icmp_invmap messages: - * 1) input node with outbound ACL interface - * 2) output node with inbound ACL interface - * - */ - if ((is_input && acl_fa_ifc_has_out_acl (am, sw_if_index)) || - (!is_input && acl_fa_ifc_has_in_acl (am, sw_if_index))) - { - if (type >= 0 && - type <= icmp_invmap_size[is_ip6] && icmp_invmap[is_ip6][type]) - { - p5tuple_sess->l4.port[0] = icmp_invmap[is_ip6][type] - 1; - } - } - - /* - * ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table - * are allowed to create stateful ACL. - * The other messages will be forwarded without creating a reflexive ACL. - */ - if (type < 0 || - type > icmp_valid_new_size[is_ip6] || !icmp_valid_new[is_ip6][type]) - { - valid_new_sess = 0; - } - } - - return valid_new_sess; -} - always_inline int fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess) { @@ -316,6 +250,100 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, return 3; } +always_inline u64 +reverse_l4_u64_fastpath (u64 l4, int is_ip6) +{ + fa_session_l4_key_t l4i = {.as_u64 = l4 }; + fa_session_l4_key_t l4o; + + l4o.port[1] = l4i.port[0]; + l4o.port[0] = l4i.port[1]; + + l4o.non_port_l4_data = l4i.non_port_l4_data; + l4o.is_input = 1 - l4i.is_input; + return l4o.as_u64; +} + +always_inline u64 +reverse_l4_u64_slowpath (u64 l4, int is_ip6) +{ + fa_session_l4_key_t l4i = {.as_u64 = l4 }; + fa_session_l4_key_t l4o; + + if (l4i.proto == icmp_protos[is_ip6]) + { + static const u8 *icmp_invmap[] = { icmp4_invmap, icmp6_invmap }; + static const u8 *icmp_valid_new[] = + { icmp4_valid_new, icmp6_valid_new }; + static const u8 icmp_invmap_size[] = { sizeof (icmp4_invmap), + sizeof (icmp6_invmap) + }; + static const u8 icmp_valid_new_size[] = { sizeof (icmp4_valid_new), + sizeof (icmp6_valid_new) + }; + int type = is_ip6 ? l4i.port[0] - 128 : l4i.port[0]; + + l4o.non_port_l4_data = l4i.non_port_l4_data; + l4o.port[0] = l4i.port[0]; + l4o.port[1] = l4i.port[1]; + + + /* + * ONLY ICMP messages defined in icmp4_valid_new/icmp6_valid_new table + * are allowed to create stateful ACL. + * The other messages will be forwarded without creating a reverse session. + */ + + if (type >= 0 && (type <= icmp_valid_new_size[is_ip6]) + && (icmp_valid_new[is_ip6][type]) + && (type <= icmp_invmap_size[is_ip6]) && icmp_invmap[is_ip6][type]) + { + /* + * we set the inverse direction and correct the port, + * if it is okay to add the reverse session. + * If not, then the same session will be added twice + * to bihash, which is the same as adding just one session. + */ + l4o.is_input = 1 - l4i.is_input; + l4o.port[0] = icmp_invmap[is_ip6][type] - 1; + } + + return l4o.as_u64; + } + else + return reverse_l4_u64_fastpath (l4, is_ip6); +} + +always_inline u64 +reverse_l4_u64 (u64 l4, int is_ip6) +{ + fa_session_l4_key_t l4i = {.as_u64 = l4 }; + + if (PREDICT_FALSE (l4i.is_slowpath)) + { + return reverse_l4_u64_slowpath (l4, is_ip6); + } + else + { + return reverse_l4_u64_fastpath (l4, is_ip6); + } +} + +always_inline void +reverse_session_add_del (acl_main_t * am, const int is_ip6, + clib_bihash_kv_40_8_t * pkv, int is_add) +{ + clib_bihash_kv_40_8_t kv2; + /* the first 4xu64 is two addresses, so just swap them */ + kv2.key[0] = pkv->key[2]; + kv2.key[1] = pkv->key[3]; + kv2.key[2] = pkv->key[0]; + kv2.key[3] = pkv->key[1]; + /* the last u64 needs special treatment (ports, etc.) */ + kv2.key[4] = reverse_l4_u64 (pkv->key[4], is_ip6); + kv2.value = pkv->value; + clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv2, is_add); +} always_inline void acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, @@ -326,6 +354,9 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, get_session_ptr (am, sess_id.thread_index, sess_id.session_index); ASSERT (sess->thread_index == os_get_thread_index ()); clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &sess->info.kv, 0); + + reverse_session_add_del (am, sess->info.pkt.is_ip6, &sess->info.kv, 0); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index]; pool_put_index (pw->fa_sessions_pool, sess_id.session_index); /* Deleting from timer structures not needed, @@ -362,9 +393,11 @@ acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index, } } + always_inline fa_session_t * -acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, - fa_5tuple_t * p5tuple, u16 current_policy_epoch) +acl_fa_add_session (acl_main_t * am, int is_input, int is_ip6, + u32 sw_if_index, u64 now, fa_5tuple_t * p5tuple, + u16 current_policy_epoch) { clib_bihash_kv_40_8_t *pkv = &p5tuple->kv; clib_bihash_kv_40_8_t kv; @@ -396,10 +429,11 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, sess->link_prev_idx = ~0; sess->link_next_idx = ~0; - - ASSERT (am->fa_sessions_hash_is_initialized == 1); clib_bihash_add_del_40_8 (&am->fa_sessions_hash, &kv, 1); + + reverse_session_add_del (am, is_ip6, &kv, 1); + acl_fa_conn_list_add_session (am, f_sess_id, now); vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index); |