aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Yourtchenko <ayourtch@gmail.com>2018-10-25 19:01:49 +0200
committerFlorin Coras <florin.coras@gmail.com>2018-11-05 21:53:43 +0000
commit87ee947d0b053b33571c5e33617b138236bada59 (patch)
tree5f4fb61107d8ffa2faba1743c4b62c583724745b
parentb0073e276d9e12f02f8f9874fd09ae532a0baa47 (diff)
acl-plugin: 5-tuple parse: get rid of memcpy and move to flags vs. bitfields
Using bitfield struct for 5tuple proved to be fragile from the performance standpoint - the zeroizing of the entire structure and then setting the separate pieces of it triggers increased memory latency. So, move to using flags byte. Also, use the direct object copies rather than memcpy. Change-Id: Iad8faf9de050ff1256e40c950dee212cbd3e5267 Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
-rw-r--r--src/plugins/acl/fa_node.h24
-rw-r--r--src/plugins/acl/public_inlines.h125
-rw-r--r--src/plugins/acl/session_inlines.h8
3 files changed, 77 insertions, 80 deletions
diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h
index 57e102703d5..903ef874fb7 100644
--- a/src/plugins/acl/fa_node.h
+++ b/src/plugins/acl/fa_node.h
@@ -38,6 +38,11 @@ typedef union {
};
} fa_packet_info_t;
+typedef enum {
+ FA_SK_L4_FLAG_IS_INPUT = (1 << 0),
+ FA_SK_L4_FLAG_IS_SLOWPATH = (1 << 1),
+} fa_session_l4_key_l4_flags_t;
+
typedef union {
u64 as_u64;
struct {
@@ -45,9 +50,7 @@ typedef union {
union {
struct {
u8 proto;
- u8 is_input: 1;
- u8 is_slowpath: 1;
- u8 reserved0: 6;
+ u8 l4_flags;
u16 lsb_of_sw_if_index;
};
u32 non_port_l4_data;
@@ -55,6 +58,13 @@ typedef union {
};
} fa_session_l4_key_t;
+
+static_always_inline
+int is_session_l4_key_u64_slowpath(u64 l4key) {
+ fa_session_l4_key_t k = { .as_u64 = l4key };
+ return (k.l4_flags & FA_SK_L4_FLAG_IS_SLOWPATH) ? 1 : 0;
+}
+
typedef union {
struct {
union {
@@ -83,11 +93,13 @@ static_always_inline u8 *
format_fa_session_l4_key(u8 * s, va_list * args)
{
fa_session_l4_key_t *l4 = va_arg (*args, fa_session_l4_key_t *);
+ int is_input = (l4->l4_flags & FA_SK_L4_FLAG_IS_INPUT) ? 1 : 0;
+ int is_slowpath = (l4->l4_flags & FA_SK_L4_FLAG_IS_SLOWPATH) ? 1 : 0;
- return (format (s, "l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d reserved0 0x%02x port %d -> %d",
+ return (format (s, "l4 lsb_of_sw_if_index %d proto %d l4_is_input %d l4_slow_path %d l4_flags 0x%02x port %d -> %d",
l4->lsb_of_sw_if_index,
- l4->proto, l4->is_input, l4->is_slowpath,
- l4->reserved0, l4->port[0], l4->port[1]));
+ l4->proto, is_input, is_slowpath,
+ l4->l4_flags, l4->port[0], l4->port[1]));
}
typedef struct {
diff --git a/src/plugins/acl/public_inlines.h b/src/plugins/acl/public_inlines.h
index 850babfa872..ba174c9b2a6 100644
--- a/src/plugins/acl/public_inlines.h
+++ b/src/plugins/acl/public_inlines.h
@@ -58,6 +58,13 @@ offset_within_packet (vlib_buffer_t * b0, int offset)
return (offset <= (b0->current_length - 8));
}
+always_inline int
+offset_beyond_packet (vlib_buffer_t * b0, int offset)
+{
+ /* For the purposes of this code, "within" means we have at least 8 bytes after it */
+ return (offset > (b0->current_length - 8));
+}
+
always_inline void
acl_fill_5tuple_l3_data (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
@@ -65,20 +72,19 @@ acl_fill_5tuple_l3_data (acl_main_t * am, vlib_buffer_t * b0, int is_ip6,
{
if (is_ip6)
{
- clib_memcpy (&p5tuple_pkt->ip6_addr,
- get_ptr_to_offset (b0,
- offsetof (ip6_header_t,
- src_address) + l3_offset),
- sizeof (p5tuple_pkt->ip6_addr));
+ ip6_header_t *ip6 = vlib_buffer_get_current (b0) + l3_offset;
+ p5tuple_pkt->ip6_addr[0] = ip6->src_address;
+ p5tuple_pkt->ip6_addr[1] = ip6->dst_address;
}
else
{
- clib_memset(p5tuple_pkt->l3_zero_pad, 0, sizeof(p5tuple_pkt->l3_zero_pad));
- clib_memcpy (&p5tuple_pkt->ip4_addr,
- get_ptr_to_offset (b0,
- offsetof (ip4_header_t,
- src_address) + l3_offset),
- sizeof (p5tuple_pkt->ip4_addr));
+ int ii;
+ for(ii=0; ii<6; ii++) {
+ p5tuple_pkt->l3_zero_pad[ii] = 0;
+ }
+ ip4_header_t *ip4 = vlib_buffer_get_current (b0) + l3_offset;
+ p5tuple_pkt->ip4_addr[0] = ip4->src_address;
+ p5tuple_pkt->ip4_addr[1] = ip4->dst_address;
}
}
@@ -90,23 +96,19 @@ acl_fill_5tuple_l4_and_pkt_data (acl_main_t * am, u32 sw_if_index0, vlib_buffer_
static u8 icmp_protos_v4v6[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 };
int l4_offset;
- u16 ports[2];
+ u16 ports[2] = { 0 };
u8 proto;
- fa_session_l4_key_t tmp_l4 = { .lsb_of_sw_if_index = sw_if_index0 & 0xffff };
+ u8 tmp_l4_flags = 0;
fa_packet_info_t tmp_pkt = { .is_ip6 = is_ip6, .mask_type_index_lsb = ~0 };
if (is_ip6)
{
- proto =
- *(u8 *) get_ptr_to_offset (b0,
- offsetof (ip6_header_t,
- protocol) + l3_offset);
+ ip6_header_t *ip6 = vlib_buffer_get_current (b0) + l3_offset;
+ proto = ip6->protocol;
+
l4_offset = l3_offset + sizeof (ip6_header_t);
-#ifdef FA_NODE_VERBOSE_DEBUG
- clib_warning ("ACL_FA_NODE_DBG: proto: %d, l4_offset: %d", proto,
- l4_offset);
-#endif
+
/* IP6 EH handling is here, increment l4_offset if needs to, update the proto */
int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
if (PREDICT_FALSE (need_skip_eh))
@@ -117,8 +119,7 @@ acl_fill_5tuple_l4_and_pkt_data (acl_main_t * am, u32 sw_if_index0, vlib_buffer_
if (PREDICT_FALSE(ACL_EH_FRAGMENT == proto))
{
proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
- u16 frag_offset;
- clib_memcpy (&frag_offset, get_ptr_to_offset (b0, 2 + l4_offset), sizeof(frag_offset));
+ u16 frag_offset = *(u16 *) get_ptr_to_offset (b0, 2 + l4_offset);
frag_offset = clib_net_to_host_u16(frag_offset) >> 3;
if (frag_offset)
{
@@ -138,10 +139,6 @@ acl_fill_5tuple_l4_and_pkt_data (acl_main_t * am, u32 sw_if_index0, vlib_buffer_
proto = *(u8 *) get_ptr_to_offset (b0, l4_offset);
l4_offset += 8 * (1 + (u16) nwords);
}
-#ifdef FA_NODE_VERBOSE_DEBUG
- clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d",
- proto, l4_offset);
-#endif
need_skip_eh =
clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto);
}
@@ -149,21 +146,12 @@ acl_fill_5tuple_l4_and_pkt_data (acl_main_t * am, u32 sw_if_index0, vlib_buffer_
}
else
{
- proto =
- *(u8 *) get_ptr_to_offset (b0,
- offsetof (ip4_header_t,
- protocol) + l3_offset);
- l4_offset = l3_offset + sizeof (ip4_header_t);
- u16 flags_and_fragment_offset;
- clib_memcpy (&flags_and_fragment_offset,
- get_ptr_to_offset (b0,
- offsetof (ip4_header_t,
- flags_and_fragment_offset)) + l3_offset,
- sizeof(flags_and_fragment_offset));
- flags_and_fragment_offset = clib_net_to_host_u16 (flags_and_fragment_offset);
+ ip4_header_t *ip4 = vlib_buffer_get_current (b0) + l3_offset;
+ proto = ip4->protocol;
+ l4_offset = l3_offset + ip4_header_bytes(ip4);
/* non-initial fragments have non-zero offset */
- if ((PREDICT_FALSE(0xfff & flags_and_fragment_offset)))
+ if (PREDICT_FALSE(ip4_get_fragment_offset(ip4)))
{
tmp_pkt.is_nonfirst_fragment = 1;
/* invalidate L4 offset so we don't try to find L4 info */
@@ -171,50 +159,47 @@ acl_fill_5tuple_l4_and_pkt_data (acl_main_t * am, u32 sw_if_index0, vlib_buffer_
}
}
- tmp_l4.proto = proto;
- tmp_l4.is_input = is_input;
+ tmp_l4_flags |= is_input ? FA_SK_L4_FLAG_IS_INPUT : 0;
if (PREDICT_TRUE (offset_within_packet (b0, l4_offset)))
{
+ tcp_header_t *tcph = vlib_buffer_get_current (b0) + l4_offset;
+ udp_header_t *udph = vlib_buffer_get_current (b0) + l4_offset;
tmp_pkt.l4_valid = 1;
- if (icmp_protos_v4v6[is_ip6] == proto)
+
+ if (PREDICT_FALSE(icmp_protos_v4v6[is_ip6] == proto))
{
- /* type */
- tmp_l4.port[0] =
- *(u8 *) get_ptr_to_offset (b0,
- l4_offset + offsetof (icmp46_header_t,
- type));
- /* code */
- tmp_l4.port[1] =
- *(u8 *) get_ptr_to_offset (b0,
- l4_offset + offsetof (icmp46_header_t,
- code));
- tmp_l4.is_slowpath = 1;
+ icmp46_header_t *icmph = vlib_buffer_get_current (b0) + l4_offset;
+ ports[0] = icmph->type;
+ ports[1] = icmph->code;
+ /* ICMP needs special handling */
+ tmp_l4_flags |= FA_SK_L4_FLAG_IS_SLOWPATH;
}
- else if ((IP_PROTOCOL_TCP == proto) || (IP_PROTOCOL_UDP == proto))
+ else if (IP_PROTOCOL_TCP == proto)
{
- clib_memcpy (&ports,
- get_ptr_to_offset (b0,
- l4_offset + offsetof (tcp_header_t,
- src_port)),
- sizeof (ports));
- tmp_l4.port[0] = clib_net_to_host_u16 (ports[0]);
- tmp_l4.port[1] = clib_net_to_host_u16 (ports[1]);
-
- tmp_pkt.tcp_flags =
- *(u8 *) get_ptr_to_offset (b0,
- l4_offset + offsetof (tcp_header_t,
- flags));
- tmp_pkt.tcp_flags_valid = (proto == IP_PROTOCOL_TCP);
- tmp_l4.is_slowpath = 0;
+ ports[0] = clib_net_to_host_u16(tcph->src_port);
+ ports[1] = clib_net_to_host_u16(tcph->dst_port);
+ tmp_pkt.tcp_flags = tcph->flags;
+ tmp_pkt.tcp_flags_valid = 1;
}
+ else if (IP_PROTOCOL_UDP == proto)
+ {
+ ports[0] = clib_net_to_host_u16(udph->src_port);
+ ports[1] = clib_net_to_host_u16(udph->dst_port);
+ }
else
{
- tmp_l4.is_slowpath = 1;
+ tmp_l4_flags |= FA_SK_L4_FLAG_IS_SLOWPATH;
}
}
p5tuple_pkt->as_u64 = tmp_pkt.as_u64;
+
+ fa_session_l4_key_t tmp_l4 = { .port = { ports[0], ports[1] },
+ .proto = proto,
+ .l4_flags = tmp_l4_flags,
+ .lsb_of_sw_if_index = sw_if_index0 & 0xffff };
+
p5tuple_l4->as_u64 = tmp_l4.as_u64;
}
diff --git a/src/plugins/acl/session_inlines.h b/src/plugins/acl/session_inlines.h
index cd23f390b28..18d5dc8122b 100644
--- a/src/plugins/acl/session_inlines.h
+++ b/src/plugins/acl/session_inlines.h
@@ -290,7 +290,7 @@ reverse_l4_u64_fastpath (u64 l4, int is_ip6)
l4o.port[0] = l4i.port[1];
l4o.non_port_l4_data = l4i.non_port_l4_data;
- l4o.is_input = 1 - l4i.is_input;
+ l4o.l4_flags = l4i.l4_flags ^ FA_SK_L4_FLAG_IS_INPUT;
return l4o.as_u64;
}
@@ -331,7 +331,7 @@ reverse_l4_u64_slowpath_valid (u64 l4, int is_ip6, u64 * out)
&& icmp_invmap[is_ip6][type]);
if (valid_reverse_sess)
{
- l4o.is_input = 1 - l4i.is_input;
+ l4o.l4_flags = l4i.l4_flags ^ FA_SK_L4_FLAG_IS_INPUT;
l4o.port[0] = icmp_invmap[is_ip6][type] - 1;
}
@@ -355,7 +355,7 @@ reverse_session_add_del_ip6 (acl_main_t * am,
kv2.key[3] = pkv->key[1];
/* the last u64 needs special treatment (ports, etc.) so we do it last */
kv2.value = pkv->value;
- if (PREDICT_FALSE (((fa_session_l4_key_t) pkv->key[4]).is_slowpath))
+ if (PREDICT_FALSE (is_session_l4_key_u64_slowpath (pkv->key[4])))
{
if (reverse_l4_u64_slowpath_valid (pkv->key[4], 1, &kv2.key[4]))
clib_bihash_add_del_40_8 (&am->fa_ip6_sessions_hash, &kv2, is_add);
@@ -376,7 +376,7 @@ reverse_session_add_del_ip4 (acl_main_t * am,
((pkv->key[0] & 0xffffffff) << 32) | ((pkv->key[0] >> 32) & 0xffffffff);
/* the last u64 needs special treatment (ports, etc.) so we do it last */
kv2.value = pkv->value;
- if (PREDICT_FALSE (((fa_session_l4_key_t) pkv->key[1]).is_slowpath))
+ if (PREDICT_FALSE (is_session_l4_key_u64_slowpath (pkv->key[1])))
{
if (reverse_l4_u64_slowpath_valid (pkv->key[1], 0, &kv2.key[1]))
clib_bihash_add_del_16_8 (&am->fa_ip4_sessions_hash, &kv2, is_add);