From 0d88301a576191a0e330e539cf1dcb3837ee1bf6 Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 11 May 2018 11:06:23 -0700 Subject: bond: performance harvesting - hash is great. But it is a bit too slow for the DP. Use direct array indexing to quickly retrieve the slave interface. - the algorithm used by flow hash is great. But it is a bit too slow for the DP. Use l2_hash_hash() extracted from lb_hash.h which ECMP is using. It makes use of intrinsic crc32 instruction set. - shortcut modulo arithmetic when the operand is 2**x (where x up to 4) to avoid division instruction. - special case for link count == 1 in bond_tx_fn() - use clib_mem_unaligned to access data for the packet to avoid alignment error - Fix some typos for packet tracing. Change-Id: I8eae3ad497061c5473aa675ba894ee0211120d25 Signed-off-by: Steven --- src/plugins/lacp/lacp_doc.md | 14 +-- src/plugins/lb/lbhash.h | 22 +--- src/vnet/bonding/cli.c | 15 ++- src/vnet/bonding/device.c | 255 +++++++++++++++++++++++++------------------ src/vnet/bonding/node.c | 28 ++--- src/vnet/bonding/node.h | 18 +-- src/vppinfra.am | 1 + src/vppinfra/crc32.h | 2 + src/vppinfra/lb_hash_hash.h | 69 ++++++++++++ test/test_bond.py | 14 +-- 10 files changed, 266 insertions(+), 172 deletions(-) create mode 100644 src/vppinfra/lb_hash_hash.h diff --git a/src/plugins/lacp/lacp_doc.md b/src/plugins/lacp/lacp_doc.md index 1f7fe4a4493..fb2bede278c 100644 --- a/src/plugins/lacp/lacp_doc.md +++ b/src/plugins/lacp/lacp_doc.md @@ -18,27 +18,27 @@ a high bandwidth transmission medium and create a fault-tolerant link. create bond mode lacp [hw-addr ] [load-balance { l2 | l23 | l34 }] 2. Enslave the physical interface to the bond -enslave interface to [passive] [long-timeout]" +bond add [passive] [long-timeout]" 3. Delete the bond interface delete bond { | sw_if_index } 4. Detach the slave interface from the bond -detach interface +bond del ### Configuration example ``` create bond mode lacp set interface state BondEthernet0 up -enslave interface TenGigabitEthernet7/0/0 to BondEthernet1 -enslave interface TenGigabitEthernet7/0/1 to BondEthernet1 -enslave interface TenGigabitEthernet5/0/0 to BondEthernet1 -enslave interface TenGigabitEthernet5/0/1 to BondEthernet1 +bond add BondEthernet0 TenGigabitEthernet7/0/0 +bond add BondEthernet0 TenGigabitEthernet7/0/1 +bond add BondEthernet0 TenGigabitEthernet5/0/0 +bond add BondEthernet0 TenGigabitEthernet5/0/1 ``` ``` -detach interface TenGigabitEthernet5/0/1 +bond del TenGigabitEthernet5/0/1 ``` ``` diff --git a/src/plugins/lb/lbhash.h b/src/plugins/lb/lbhash.h index 10d3beab13b..5d2ff24f044 100644 --- a/src/plugins/lb/lbhash.h +++ b/src/plugins/lb/lbhash.h @@ -30,6 +30,7 @@ #define LB_PLUGIN_LB_LBHASH_H_ #include +#include #if defined (__SSE4_2__) #include @@ -101,27 +102,6 @@ void lb_hash_free(lb_hash_t *h) vec_free(mem); } -#if defined(clib_crc32c_uses_intrinsics) && !defined (__i386__) -static_always_inline -u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4) -{ - u64 val = 0; - val = crc32_u64(val, k0); - val = crc32_u64(val, k1); - val = crc32_u64(val, k2); - val = crc32_u64(val, k3); - val = crc32_u64(val, k4); - return (u32) val; -} -#else -static_always_inline -u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4) -{ - u64 tmp = k0 ^ k1 ^ k2 ^ k3 ^ k4; - return (u32)clib_xxhash (tmp); -} -#endif - static_always_inline void lb_hash_prefetch_bucket(lb_hash_t *ht, u32 hash) { diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c index b66c4af337d..2799bb88b99 100644 --- a/src/vnet/bonding/cli.c +++ b/src/vnet/bonding/cli.c @@ -142,7 +142,7 @@ bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, slave_if_t * sif) bif->port_number_bitmap = clib_bitmap_set (bif->port_number_bitmap, ntohs (sif->actor_admin.port_number) - 1, 0); - hash_unset (bm->neighbor_by_sw_if_index, sif->sw_if_index); + bm->slave_by_sw_if_index[sif->sw_if_index] = 0; vec_free (sif->last_marker_pkt); vec_free (sif->last_rx_pkt); vec_foreach_index (i, bif->slaves) @@ -451,8 +451,15 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args) else sif->ttl_in_seconds = LACP_SHORT_TIMOUT_TIME; - hash_set (bm->neighbor_by_sw_if_index, sif->sw_if_index, - sif - bm->neighbors); + vec_validate_aligned (bm->slave_by_sw_if_index, sif->sw_if_index, + CLIB_CACHE_LINE_BYTES); + /* + * sif - bm->neighbors may be 0 + * Left shift it by 1 bit to distinguish the valid entry that we actually + * store from the null entries + */ + bm->slave_by_sw_if_index[sif->sw_if_index] = + (uword) (((sif - bm->neighbors) << 1) | 1); vec_add1 (bif->slaves, sif->sw_if_index); sif_hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); @@ -726,7 +733,7 @@ bond_cli_init (vlib_main_t * vm) bm->vlib_main = vm; bm->vnet_main = vnet_get_main (); - bm->neighbor_by_sw_if_index = hash_create (0, sizeof (uword)); + vec_validate_aligned (bm->slave_by_sw_if_index, 1, CLIB_CACHE_LINE_BYTES); return 0; } diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c index 9e1bb9517e4..8ddec80850a 100644 --- a/src/vnet/bonding/device.c +++ b/src/vnet/bonding/device.c @@ -22,6 +22,7 @@ #include #include #include +#include #define foreach_bond_tx_error \ _(NONE, "no error") \ @@ -126,7 +127,7 @@ bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) return 0; } -static inline u32 +static_always_inline u32 bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) @@ -160,29 +161,26 @@ bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node, return 0; } -static inline u32 +static_always_inline u32 bond_load_balance_l2 (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0); - u32 a = 0, b = 0, c = 0, t1, t2; - u16 t11, t22; + u32 c; + u64 *dst = (u64 *) & eth->dst_address[0]; + u64 a = clib_mem_unaligned (dst, u64); + u32 *src = (u32 *) & eth->src_address[2]; + u32 b = clib_mem_unaligned (src, u32); - memcpy (&t1, eth->src_address, sizeof (t1)); - memcpy (&t11, ð->src_address[4], sizeof (t11)); - a = t1 ^ t11; + c = lb_hash_hash_2_tuples (a, b); - memcpy (&t2, eth->dst_address, sizeof (t2)); - memcpy (&t22, ð->dst_address[4], sizeof (t22)); - b = t2 ^ t22; - - hash_v3_mix32 (a, b, c); - hash_v3_finalize32 (a, b, c); - - return c % slave_count; + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } -static inline u16 * +static_always_inline u16 * bond_locate_ethertype (ethernet_header_t * eth) { u16 *ethertype_p; @@ -205,7 +203,7 @@ bond_locate_ethertype (ethernet_header_t * eth) return ethertype_p; } -static inline u32 +static_always_inline u32 bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { @@ -213,9 +211,10 @@ bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, u8 ip_version; ip4_header_t *ip4; u16 ethertype, *ethertype_p; + u32 *mac1, *mac2, *mac3; ethertype_p = bond_locate_ethertype (eth); - ethertype = *ethertype_p; + ethertype = clib_mem_unaligned (ethertype_p, u16); if ((ethertype != htons (ETHERNET_TYPE_IP4)) && (ethertype != htons (ETHERNET_TYPE_IP6))) @@ -226,55 +225,63 @@ bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, if (ip_version == 0x4) { - u16 t11, t22; - u32 a = 0, b = 0, c = 0, t1, t2; - - memcpy (&t1, eth->src_address, sizeof (t1)); - memcpy (&t11, ð->src_address[4], sizeof (t11)); - a = t1 ^ t11; - - memcpy (&t2, eth->dst_address, sizeof (t2)); - memcpy (&t22, ð->dst_address[4], sizeof (t22)); - b = t2 ^ t22; - - c = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32; - - hash_v3_mix32 (a, b, c); - hash_v3_finalize32 (a, b, c); - - return c % slave_count; + u32 a, c; + + mac1 = (u32 *) & eth->dst_address[0]; + mac2 = (u32 *) & eth->dst_address[4]; + mac3 = (u32 *) & eth->src_address[2]; + + a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^ + clib_mem_unaligned (mac3, u32); + c = + lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64), + a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } else if (ip_version == 0x6) { - u64 a, b, c; - u64 t1 = 0, t2 = 0; + u64 a; + u32 c; ip6_header_t *ip6 = (ip6_header_t *) (eth + 1); - memcpy (&t1, eth->src_address, sizeof (eth->src_address)); - memcpy (&t2, eth->dst_address, sizeof (eth->dst_address)); - a = t1 ^ t2; - - b = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]); - c = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]); - - hash_mix64 (a, b, c); - return c % slave_count; + mac1 = (u32 *) & eth->dst_address[0]; + mac2 = (u32 *) & eth->dst_address[4]; + mac3 = (u32 *) & eth->src_address[2]; + + a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^ + clib_mem_unaligned (mac3, u32); + c = + lb_hash_hash (clib_mem_unaligned + (&ip6->src_address.as_uword[0], uword), + clib_mem_unaligned (&ip6->src_address.as_uword[1], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[0], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[1], + uword), a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } return (bond_load_balance_l2 (vm, node, bif, b0, slave_count)); } -static inline u32 +static_always_inline u32 bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0); u8 ip_version; - uword is_tcp_udp = 0; + uword is_tcp_udp; ip4_header_t *ip4; u16 ethertype, *ethertype_p; ethertype_p = bond_locate_ethertype (eth); - ethertype = *ethertype_p; + ethertype = clib_mem_unaligned (ethertype_p, u16); if ((ethertype != htons (ETHERNET_TYPE_IP4)) && (ethertype != htons (ETHERNET_TYPE_IP6))) @@ -285,29 +292,30 @@ bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, if (ip_version == 0x4) { - u32 a = 0, b = 0, c = 0, t1, t2; + u32 a, c, t1, t2; tcp_header_t *tcp = (void *) (ip4 + 1); + is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) || (ip4->protocol == IP_PROTOCOL_UDP); - - a = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32; - - t1 = is_tcp_udp ? tcp->src : 0; - t2 = is_tcp_udp ? tcp->dst : 0; - b = t1 + (t2 << 16); - - hash_v3_mix32 (a, b, c); - hash_v3_finalize32 (a, b, c); - - return c % slave_count; + t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0; + t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0; + a = t1 ^ t2; + c = + lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64), + a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } else if (ip_version == 0x6) { - u64 a, b, c; - u64 t1, t2; + u64 a; + u32 c, t1, t2; ip6_header_t *ip6 = (ip6_header_t *) (eth + 1); tcp_header_t *tcp = (void *) (ip6 + 1); + is_tcp_udp = 0; if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) || (ip6->protocol == IP_PROTOCOL_UDP))) { @@ -325,33 +333,43 @@ bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3)); } } - a = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]); - b = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]); - - t1 = is_tcp_udp ? tcp->src : 0; - t2 = is_tcp_udp ? tcp->dst : 0; - c = (t2 << 16) | t1; - hash_mix64 (a, b, c); - - return c % slave_count; + t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0; + t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0; + a = t1 ^ t2; + c = + lb_hash_hash (clib_mem_unaligned + (&ip6->src_address.as_uword[0], uword), + clib_mem_unaligned (&ip6->src_address.as_uword[1], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[0], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[1], + uword), a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } return (bond_load_balance_l2 (vm, node, bif, b0, slave_count)); } -static inline u32 +static_always_inline u32 bond_load_balance_round_robin (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { bif->lb_rr_last_index++; - bif->lb_rr_last_index %= slave_count; + if (BOND_MODULO_SHORTCUT (slave_count)) + bif->lb_rr_last_index &= slave_count - 1; + else + bif->lb_rr_last_index %= slave_count; return bif->lb_rr_last_index; } -static inline u32 +static_always_inline u32 bond_load_balance_active_backup (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, @@ -379,8 +397,7 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 *from = vlib_frame_vector_args (frame); u32 n_left_from; ethernet_header_t *eth; - u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0; - u32 port, port1, port2, port3; + u32 port; u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3; bond_packet_trace_t *t0; uword n_trace = vlib_get_trace_count (vm, node); @@ -435,6 +452,9 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, { while (n_left_from >= 4) { + u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0; + u32 port0 = 0, port1 = 0, port2 = 0, port3 = 0; + // Prefetch next iteration if (n_left_from >= 8) { @@ -445,10 +465,10 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, p6 = vlib_get_buffer (vm, from[6]); p7 = vlib_get_buffer (vm, from[7]); - vlib_prefetch_buffer_header (p4, STORE); - vlib_prefetch_buffer_header (p5, STORE); - vlib_prefetch_buffer_header (p6, STORE); - vlib_prefetch_buffer_header (p7, STORE); + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, LOAD); @@ -476,20 +496,27 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX]; sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX]; - port = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b0, slave_count); - port1 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b1, slave_count); - port2 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b2, slave_count); - port3 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b3, slave_count); - - sif_if_index = *vec_elt_at_index (bif->active_slaves, port); + if (PREDICT_TRUE (slave_count != 1)) + { + port0 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b0, + slave_count); + port1 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b1, + slave_count); + port2 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b2, + slave_count); + port3 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b3, + slave_count); + } + + sif_if_index = *vec_elt_at_index (bif->active_slaves, port0); sif_if_index1 = *vec_elt_at_index (bif->active_slaves, port1); sif_if_index2 = *vec_elt_at_index (bif->active_slaves, port2); sif_if_index3 = *vec_elt_at_index (bif->active_slaves, port3); @@ -499,23 +526,27 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b2)->sw_if_index[VLIB_TX] = sif_if_index2; vnet_buffer (b3)->sw_if_index[VLIB_TX] = sif_if_index3; - if (bif->per_thread_info[thread_index].frame[port] == 0) - bif->per_thread_info[thread_index].frame[port] = + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port0] + == 0))) + bif->per_thread_info[thread_index].frame[port0] = vnet_get_frame_to_sw_interface (vnm, sif_if_index); - if (bif->per_thread_info[thread_index].frame[port1] == 0) + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port1] + == 0))) bif->per_thread_info[thread_index].frame[port1] = vnet_get_frame_to_sw_interface (vnm, sif_if_index1); - if (bif->per_thread_info[thread_index].frame[port2] == 0) + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port2] + == 0))) bif->per_thread_info[thread_index].frame[port2] = vnet_get_frame_to_sw_interface (vnm, sif_if_index2); - if (bif->per_thread_info[thread_index].frame[port3] == 0) + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port3] + == 0))) bif->per_thread_info[thread_index].frame[port3] = vnet_get_frame_to_sw_interface (vnm, sif_if_index3); - f = bif->per_thread_info[thread_index].frame[port]; + f = bif->per_thread_info[thread_index].frame[port0]; to_next = vlib_frame_vector_args (f); to_next += f->n_vectors; to_next[0] = vlib_get_buffer_index (vm, b0); @@ -597,13 +628,16 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left_from > 0) { + u32 next0 = 0; + u32 port0 = 0; + // Prefetch next iteration if (n_left_from > 1) { vlib_buffer_t *p2; p2 = vlib_get_buffer (vm, from[1]); - vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p2, LOAD); CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); } @@ -614,15 +648,18 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; - port = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b0, slave_count); - sif_if_index = *vec_elt_at_index (bif->active_slaves, port); + if (PREDICT_TRUE (slave_count != 1)) + port0 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, + b0, + slave_count); + sif_if_index = *vec_elt_at_index (bif->active_slaves, port0); vnet_buffer (b0)->sw_if_index[VLIB_TX] = sif_if_index; - if (bif->per_thread_info[thread_index].frame[port] == 0) - bif->per_thread_info[thread_index].frame[port] = + if (PREDICT_FALSE + ((bif->per_thread_info[thread_index].frame[port0] == 0))) + bif->per_thread_info[thread_index].frame[port0] = vnet_get_frame_to_sw_interface (vnm, sif_if_index); - f = bif->per_thread_info[thread_index].frame[port]; + f = bif->per_thread_info[thread_index].frame[port0]; to_next = vlib_frame_vector_args (f); to_next += f->n_vectors; to_next[0] = vlib_get_buffer_index (vm, b0); diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c index 65d3ba10470..ec251f550cf 100644 --- a/src/vnet/bonding/node.c +++ b/src/vnet/bonding/node.c @@ -90,20 +90,21 @@ bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (sif != 0)) { - bif = bond_get_master_by_sw_if_index (sif->group); + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); if (PREDICT_TRUE (bif != 0)) { if (PREDICT_TRUE (vec_len (bif->slaves) >= 1)) { if (PREDICT_TRUE (bif->admin_up == 1)) { - if (!ethernet_frame_is_tagged (ntohs (eth->type))) + ethertype = clib_mem_unaligned (ð->type, u16); + if (!ethernet_frame_is_tagged (ntohs (ethertype))) { // Let some layer2 packets pass through. - if (PREDICT_TRUE ((eth->type != + if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS)) && !packet_is_cdp (eth) - && (eth->type != + && (ethertype != htons (ETHERNET_TYPE_802_1_LLDP)))) { @@ -128,12 +129,13 @@ bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, { vlan = (void *) (eth + 1); ethertype_p = &vlan->type; - if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN)) + ethertype = clib_mem_unaligned (ethertype_p, u16); + if (ethertype == ntohs (ETHERNET_TYPE_VLAN)) { vlan++; ethertype_p = &vlan->type; } - ethertype = *ethertype_p; + ethertype = clib_mem_unaligned (ethertype_p, u16); if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS)) && (ethertype != @@ -226,10 +228,10 @@ bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, b6 = vlib_get_buffer (vm, from[6]); b7 = vlib_get_buffer (vm, from[7]); - vlib_prefetch_buffer_header (b4, STORE); - vlib_prefetch_buffer_header (b5, STORE); - vlib_prefetch_buffer_header (b6, STORE); - vlib_prefetch_buffer_header (b7, STORE); + vlib_prefetch_buffer_header (b4, LOAD); + vlib_prefetch_buffer_header (b5, LOAD); + vlib_prefetch_buffer_header (b6, LOAD); + vlib_prefetch_buffer_header (b7, LOAD); CLIB_PREFETCH (b4->data, CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (b5->data, CLIB_CACHE_LINE_BYTES, LOAD); @@ -314,7 +316,7 @@ bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (n_trace > 0)) { - vlib_trace_buffer (vm, node, next1, b2, + vlib_trace_buffer (vm, node, next2, b2, 0 /* follow_chain */ ); vlib_set_trace_count (vm, node, --n_trace); t0 = vlib_add_trace (vm, node, b2, sizeof (*t0)); @@ -325,7 +327,7 @@ bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (n_trace > 0)) { - vlib_trace_buffer (vm, node, next1, b2, + vlib_trace_buffer (vm, node, next3, b3, 0 /* follow_chain */ ); vlib_set_trace_count (vm, node, --n_trace); t0 = vlib_add_trace (vm, node, b3, sizeof (*t0)); @@ -358,7 +360,7 @@ bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *p2; p2 = vlib_get_buffer (vm, from[1]); - vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p2, LOAD); CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); } diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h index 3a01abe2226..5c6ff32ac4a 100644 --- a/src/vnet/bonding/node.h +++ b/src/vnet/bonding/node.h @@ -31,6 +31,9 @@ #define MIN(x,y) (((x)<(y))?(x):(y)) #endif +#define BOND_MODULO_SHORTCUT(a) \ + (((a) == 2) || ((a) == 4) || ((a) == 8) || ((a) == 16)) + #define foreach_bond_mode \ _ (1, ROUND_ROBIN, "round-robin") \ _ (2, ACTIVE_BACKUP, "active-backup") \ @@ -289,9 +292,6 @@ typedef struct /* pool of lacp neighbors */ slave_if_t *neighbors; - /* rapidly find a neighbor by vlib software interface index */ - uword *neighbor_by_sw_if_index; - /* rapidly find a bond by vlib software interface index */ uword *bond_by_sw_if_index; @@ -303,6 +303,8 @@ typedef struct u8 lacp_plugin_loaded; lacp_enable_disable_func lacp_enable_disable; + + uword *slave_by_sw_if_index; } bond_main_t; /* bond packet trace capture */ @@ -439,13 +441,15 @@ bond_get_slave_by_sw_if_index (u32 sw_if_index) { bond_main_t *bm = &bond_main; slave_if_t *sif = 0; - uword *p; + uword p; - p = hash_get (bm->neighbor_by_sw_if_index, sw_if_index); - if (p) + if (sw_if_index < vec_len (bm->slave_by_sw_if_index)) { - sif = pool_elt_at_index (bm->neighbors, p[0]); + p = bm->slave_by_sw_if_index[sw_if_index]; + if (p) + sif = pool_elt_at_index (bm->neighbors, p >> 1); } + return sif; } diff --git a/src/vppinfra.am b/src/vppinfra.am index 6555528317d..ec271e6dc63 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -201,6 +201,7 @@ nobase_include_HEADERS = \ vppinfra/clib_error.h \ vppinfra/cpu.h \ vppinfra/crc32.h \ + vppinfra/lb_hash_hash.h \ vppinfra/dlist.h \ vppinfra/elf.h \ vppinfra/elf_clib.h \ diff --git a/src/vppinfra/crc32.h b/src/vppinfra/crc32.h index 340b539558e..7361129ed55 100644 --- a/src/vppinfra/crc32.h +++ b/src/vppinfra/crc32.h @@ -23,6 +23,7 @@ #include #define crc32_u64 _mm_crc32_u64 +#define crc32_u32 _mm_crc32_u32 static_always_inline u32 clib_crc32c (u8 * s, int len) @@ -56,6 +57,7 @@ clib_crc32c (u8 * s, int len) #define crc32_u64 __crc32cd +#define crc32_u32 __crc32cw static_always_inline u32 clib_crc32c (u8 * s, int len) diff --git a/src/vppinfra/lb_hash_hash.h b/src/vppinfra/lb_hash_hash.h new file mode 100644 index 00000000000..fb251591eeb --- /dev/null +++ b/src/vppinfra/lb_hash_hash.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_lb_hash_hash_h__ +#define __included_lb_hash_hash_h__ + +#include +#include + +#if defined(clib_crc32c_uses_intrinsics) && !defined (__i386__) +static_always_inline u32 +lb_hash_hash (u64 k0, u64 k1, u64 k2, u64 k3, u64 k4) +{ + u64 val = 0; + val = crc32_u64 (val, k0); + val = crc32_u64 (val, k1); + val = crc32_u64 (val, k2); + val = crc32_u64 (val, k3); + val = crc32_u64 (val, k4); + return (u32) val; +} + +/* Note: k0 is u64 and k1 is u32 */ +static_always_inline u32 +lb_hash_hash_2_tuples (u64 k0, u32 k1) +{ + u64 val = 0; + val = crc32_u64 (val, k0); + val = crc32_u32 (val, k1); + return (u32) val; +} +#else +static_always_inline u32 +lb_hash_hash (u64 k0, u64 k1, u64 k2, u64 k3, u64 k4) +{ + u64 tmp = k0 ^ k1 ^ k2 ^ k3 ^ k4; + return (u32) clib_xxhash (tmp); +} + +/* Note: k0 is u64 and k1 is u32 */ +static_always_inline u32 +lb_hash_hash_2_tuples (u64 k0, u32 k1) +{ + u64 tmp = k0 ^ k1; + return (u32) clib_xxhash (tmp); +} +#endif + +#endif /* __included_lb_hash_hash_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/test/test_bond.py b/test/test_bond.py index b54a1f1deb5..e354601ca91 100644 --- a/test/test_bond.py +++ b/test/test_bond.py @@ -136,19 +136,11 @@ class TestBondInterface(VppTestCase): found = 1 self.assertEqual(found, 1) - # pg0 tx bytes = 142 - intfs = self.vapi.cli("show interface pg0").split("\n") - found = 0 - for intf in intfs: - if "tx bytes" in intf and "142" in intf: - found = 1 - self.assertEqual(found, 1) - - # pg0 tx bytes = 142 - intfs = self.vapi.cli("show interface pg1").split("\n") + # BondEthernet0 tx bytes = 284 + intfs = self.vapi.cli("show interface BondEthernet0").split("\n") found = 0 for intf in intfs: - if "tx bytes" in intf and "142" in intf: + if "tx bytes" in intf and "284" in intf: found = 1 self.assertEqual(found, 1) -- cgit 1.2.3-korg