From 0d88301a576191a0e330e539cf1dcb3837ee1bf6 Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 11 May 2018 11:06:23 -0700 Subject: bond: performance harvesting - hash is great. But it is a bit too slow for the DP. Use direct array indexing to quickly retrieve the slave interface. - the algorithm used by flow hash is great. But it is a bit too slow for the DP. Use l2_hash_hash() extracted from lb_hash.h which ECMP is using. It makes use of intrinsic crc32 instruction set. - shortcut modulo arithmetic when the operand is 2**x (where x up to 4) to avoid division instruction. - special case for link count == 1 in bond_tx_fn() - use clib_mem_unaligned to access data for the packet to avoid alignment error - Fix some typos for packet tracing. Change-Id: I8eae3ad497061c5473aa675ba894ee0211120d25 Signed-off-by: Steven --- src/vnet/bonding/cli.c | 15 ++- src/vnet/bonding/device.c | 255 ++++++++++++++++++++++++++-------------------- src/vnet/bonding/node.c | 28 ++--- src/vnet/bonding/node.h | 18 ++-- 4 files changed, 183 insertions(+), 133 deletions(-) (limited to 'src/vnet/bonding') diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c index b66c4af337d..2799bb88b99 100644 --- a/src/vnet/bonding/cli.c +++ b/src/vnet/bonding/cli.c @@ -142,7 +142,7 @@ bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, slave_if_t * sif) bif->port_number_bitmap = clib_bitmap_set (bif->port_number_bitmap, ntohs (sif->actor_admin.port_number) - 1, 0); - hash_unset (bm->neighbor_by_sw_if_index, sif->sw_if_index); + bm->slave_by_sw_if_index[sif->sw_if_index] = 0; vec_free (sif->last_marker_pkt); vec_free (sif->last_rx_pkt); vec_foreach_index (i, bif->slaves) @@ -451,8 +451,15 @@ bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args) else sif->ttl_in_seconds = LACP_SHORT_TIMOUT_TIME; - hash_set (bm->neighbor_by_sw_if_index, sif->sw_if_index, - sif - bm->neighbors); + vec_validate_aligned (bm->slave_by_sw_if_index, sif->sw_if_index, + CLIB_CACHE_LINE_BYTES); + /* + * sif - bm->neighbors may be 0 + * Left shift it by 1 bit to distinguish the valid entry that we actually + * store from the null entries + */ + bm->slave_by_sw_if_index[sif->sw_if_index] = + (uword) (((sif - bm->neighbors) << 1) | 1); vec_add1 (bif->slaves, sif->sw_if_index); sif_hw = vnet_get_sup_hw_interface (vnm, sif->sw_if_index); @@ -726,7 +733,7 @@ bond_cli_init (vlib_main_t * vm) bm->vlib_main = vm; bm->vnet_main = vnet_get_main (); - bm->neighbor_by_sw_if_index = hash_create (0, sizeof (uword)); + vec_validate_aligned (bm->slave_by_sw_if_index, 1, CLIB_CACHE_LINE_BYTES); return 0; } diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c index 9e1bb9517e4..8ddec80850a 100644 --- a/src/vnet/bonding/device.c +++ b/src/vnet/bonding/device.c @@ -22,6 +22,7 @@ #include #include #include +#include #define foreach_bond_tx_error \ _(NONE, "no error") \ @@ -126,7 +127,7 @@ bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) return 0; } -static inline u32 +static_always_inline u32 bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) @@ -160,29 +161,26 @@ bond_load_balance_broadcast (vlib_main_t * vm, vlib_node_runtime_t * node, return 0; } -static inline u32 +static_always_inline u32 bond_load_balance_l2 (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0); - u32 a = 0, b = 0, c = 0, t1, t2; - u16 t11, t22; + u32 c; + u64 *dst = (u64 *) & eth->dst_address[0]; + u64 a = clib_mem_unaligned (dst, u64); + u32 *src = (u32 *) & eth->src_address[2]; + u32 b = clib_mem_unaligned (src, u32); - memcpy (&t1, eth->src_address, sizeof (t1)); - memcpy (&t11, ð->src_address[4], sizeof (t11)); - a = t1 ^ t11; + c = lb_hash_hash_2_tuples (a, b); - memcpy (&t2, eth->dst_address, sizeof (t2)); - memcpy (&t22, ð->dst_address[4], sizeof (t22)); - b = t2 ^ t22; - - hash_v3_mix32 (a, b, c); - hash_v3_finalize32 (a, b, c); - - return c % slave_count; + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } -static inline u16 * +static_always_inline u16 * bond_locate_ethertype (ethernet_header_t * eth) { u16 *ethertype_p; @@ -205,7 +203,7 @@ bond_locate_ethertype (ethernet_header_t * eth) return ethertype_p; } -static inline u32 +static_always_inline u32 bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { @@ -213,9 +211,10 @@ bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, u8 ip_version; ip4_header_t *ip4; u16 ethertype, *ethertype_p; + u32 *mac1, *mac2, *mac3; ethertype_p = bond_locate_ethertype (eth); - ethertype = *ethertype_p; + ethertype = clib_mem_unaligned (ethertype_p, u16); if ((ethertype != htons (ETHERNET_TYPE_IP4)) && (ethertype != htons (ETHERNET_TYPE_IP6))) @@ -226,55 +225,63 @@ bond_load_balance_l23 (vlib_main_t * vm, vlib_node_runtime_t * node, if (ip_version == 0x4) { - u16 t11, t22; - u32 a = 0, b = 0, c = 0, t1, t2; - - memcpy (&t1, eth->src_address, sizeof (t1)); - memcpy (&t11, ð->src_address[4], sizeof (t11)); - a = t1 ^ t11; - - memcpy (&t2, eth->dst_address, sizeof (t2)); - memcpy (&t22, ð->dst_address[4], sizeof (t22)); - b = t2 ^ t22; - - c = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32; - - hash_v3_mix32 (a, b, c); - hash_v3_finalize32 (a, b, c); - - return c % slave_count; + u32 a, c; + + mac1 = (u32 *) & eth->dst_address[0]; + mac2 = (u32 *) & eth->dst_address[4]; + mac3 = (u32 *) & eth->src_address[2]; + + a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^ + clib_mem_unaligned (mac3, u32); + c = + lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64), + a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } else if (ip_version == 0x6) { - u64 a, b, c; - u64 t1 = 0, t2 = 0; + u64 a; + u32 c; ip6_header_t *ip6 = (ip6_header_t *) (eth + 1); - memcpy (&t1, eth->src_address, sizeof (eth->src_address)); - memcpy (&t2, eth->dst_address, sizeof (eth->dst_address)); - a = t1 ^ t2; - - b = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]); - c = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]); - - hash_mix64 (a, b, c); - return c % slave_count; + mac1 = (u32 *) & eth->dst_address[0]; + mac2 = (u32 *) & eth->dst_address[4]; + mac3 = (u32 *) & eth->src_address[2]; + + a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^ + clib_mem_unaligned (mac3, u32); + c = + lb_hash_hash (clib_mem_unaligned + (&ip6->src_address.as_uword[0], uword), + clib_mem_unaligned (&ip6->src_address.as_uword[1], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[0], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[1], + uword), a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } return (bond_load_balance_l2 (vm, node, bif, b0, slave_count)); } -static inline u32 +static_always_inline u32 bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { ethernet_header_t *eth = (ethernet_header_t *) vlib_buffer_get_current (b0); u8 ip_version; - uword is_tcp_udp = 0; + uword is_tcp_udp; ip4_header_t *ip4; u16 ethertype, *ethertype_p; ethertype_p = bond_locate_ethertype (eth); - ethertype = *ethertype_p; + ethertype = clib_mem_unaligned (ethertype_p, u16); if ((ethertype != htons (ETHERNET_TYPE_IP4)) && (ethertype != htons (ETHERNET_TYPE_IP6))) @@ -285,29 +292,30 @@ bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, if (ip_version == 0x4) { - u32 a = 0, b = 0, c = 0, t1, t2; + u32 a, c, t1, t2; tcp_header_t *tcp = (void *) (ip4 + 1); + is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) || (ip4->protocol == IP_PROTOCOL_UDP); - - a = ip4->src_address.data_u32 ^ ip4->dst_address.data_u32; - - t1 = is_tcp_udp ? tcp->src : 0; - t2 = is_tcp_udp ? tcp->dst : 0; - b = t1 + (t2 << 16); - - hash_v3_mix32 (a, b, c); - hash_v3_finalize32 (a, b, c); - - return c % slave_count; + t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0; + t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0; + a = t1 ^ t2; + c = + lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64), + a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } else if (ip_version == 0x6) { - u64 a, b, c; - u64 t1, t2; + u64 a; + u32 c, t1, t2; ip6_header_t *ip6 = (ip6_header_t *) (eth + 1); tcp_header_t *tcp = (void *) (ip6 + 1); + is_tcp_udp = 0; if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) || (ip6->protocol == IP_PROTOCOL_UDP))) { @@ -325,33 +333,43 @@ bond_load_balance_l34 (vlib_main_t * vm, vlib_node_runtime_t * node, tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3)); } } - a = (ip6->src_address.as_u64[0] ^ ip6->src_address.as_u64[1]); - b = (ip6->dst_address.as_u64[0] ^ ip6->dst_address.as_u64[1]); - - t1 = is_tcp_udp ? tcp->src : 0; - t2 = is_tcp_udp ? tcp->dst : 0; - c = (t2 << 16) | t1; - hash_mix64 (a, b, c); - - return c % slave_count; + t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0; + t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0; + a = t1 ^ t2; + c = + lb_hash_hash (clib_mem_unaligned + (&ip6->src_address.as_uword[0], uword), + clib_mem_unaligned (&ip6->src_address.as_uword[1], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[0], + uword), + clib_mem_unaligned (&ip6->dst_address.as_uword[1], + uword), a); + if (BOND_MODULO_SHORTCUT (slave_count)) + return (c & (slave_count - 1)); + else + return c % slave_count; } return (bond_load_balance_l2 (vm, node, bif, b0, slave_count)); } -static inline u32 +static_always_inline u32 bond_load_balance_round_robin (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, uword slave_count) { bif->lb_rr_last_index++; - bif->lb_rr_last_index %= slave_count; + if (BOND_MODULO_SHORTCUT (slave_count)) + bif->lb_rr_last_index &= slave_count - 1; + else + bif->lb_rr_last_index %= slave_count; return bif->lb_rr_last_index; } -static inline u32 +static_always_inline u32 bond_load_balance_active_backup (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif, vlib_buffer_t * b0, @@ -379,8 +397,7 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 *from = vlib_frame_vector_args (frame); u32 n_left_from; ethernet_header_t *eth; - u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0; - u32 port, port1, port2, port3; + u32 port; u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3; bond_packet_trace_t *t0; uword n_trace = vlib_get_trace_count (vm, node); @@ -435,6 +452,9 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, { while (n_left_from >= 4) { + u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0; + u32 port0 = 0, port1 = 0, port2 = 0, port3 = 0; + // Prefetch next iteration if (n_left_from >= 8) { @@ -445,10 +465,10 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, p6 = vlib_get_buffer (vm, from[6]); p7 = vlib_get_buffer (vm, from[7]); - vlib_prefetch_buffer_header (p4, STORE); - vlib_prefetch_buffer_header (p5, STORE); - vlib_prefetch_buffer_header (p6, STORE); - vlib_prefetch_buffer_header (p7, STORE); + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, LOAD); @@ -476,20 +496,27 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX]; sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX]; - port = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b0, slave_count); - port1 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b1, slave_count); - port2 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b2, slave_count); - port3 = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b3, slave_count); - - sif_if_index = *vec_elt_at_index (bif->active_slaves, port); + if (PREDICT_TRUE (slave_count != 1)) + { + port0 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b0, + slave_count); + port1 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b1, + slave_count); + port2 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b2, + slave_count); + port3 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, + bif, b3, + slave_count); + } + + sif_if_index = *vec_elt_at_index (bif->active_slaves, port0); sif_if_index1 = *vec_elt_at_index (bif->active_slaves, port1); sif_if_index2 = *vec_elt_at_index (bif->active_slaves, port2); sif_if_index3 = *vec_elt_at_index (bif->active_slaves, port3); @@ -499,23 +526,27 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b2)->sw_if_index[VLIB_TX] = sif_if_index2; vnet_buffer (b3)->sw_if_index[VLIB_TX] = sif_if_index3; - if (bif->per_thread_info[thread_index].frame[port] == 0) - bif->per_thread_info[thread_index].frame[port] = + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port0] + == 0))) + bif->per_thread_info[thread_index].frame[port0] = vnet_get_frame_to_sw_interface (vnm, sif_if_index); - if (bif->per_thread_info[thread_index].frame[port1] == 0) + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port1] + == 0))) bif->per_thread_info[thread_index].frame[port1] = vnet_get_frame_to_sw_interface (vnm, sif_if_index1); - if (bif->per_thread_info[thread_index].frame[port2] == 0) + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port2] + == 0))) bif->per_thread_info[thread_index].frame[port2] = vnet_get_frame_to_sw_interface (vnm, sif_if_index2); - if (bif->per_thread_info[thread_index].frame[port3] == 0) + if (PREDICT_FALSE ((bif->per_thread_info[thread_index].frame[port3] + == 0))) bif->per_thread_info[thread_index].frame[port3] = vnet_get_frame_to_sw_interface (vnm, sif_if_index3); - f = bif->per_thread_info[thread_index].frame[port]; + f = bif->per_thread_info[thread_index].frame[port0]; to_next = vlib_frame_vector_args (f); to_next += f->n_vectors; to_next[0] = vlib_get_buffer_index (vm, b0); @@ -597,13 +628,16 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left_from > 0) { + u32 next0 = 0; + u32 port0 = 0; + // Prefetch next iteration if (n_left_from > 1) { vlib_buffer_t *p2; p2 = vlib_get_buffer (vm, from[1]); - vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p2, LOAD); CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); } @@ -614,15 +648,18 @@ bond_tx_fn (vlib_main_t * vm, vlib_node_runtime_t * node, sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; - port = - (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, - b0, slave_count); - sif_if_index = *vec_elt_at_index (bif->active_slaves, port); + if (PREDICT_TRUE (slave_count != 1)) + port0 = + (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif, + b0, + slave_count); + sif_if_index = *vec_elt_at_index (bif->active_slaves, port0); vnet_buffer (b0)->sw_if_index[VLIB_TX] = sif_if_index; - if (bif->per_thread_info[thread_index].frame[port] == 0) - bif->per_thread_info[thread_index].frame[port] = + if (PREDICT_FALSE + ((bif->per_thread_info[thread_index].frame[port0] == 0))) + bif->per_thread_info[thread_index].frame[port0] = vnet_get_frame_to_sw_interface (vnm, sif_if_index); - f = bif->per_thread_info[thread_index].frame[port]; + f = bif->per_thread_info[thread_index].frame[port0]; to_next = vlib_frame_vector_args (f); to_next += f->n_vectors; to_next[0] = vlib_get_buffer_index (vm, b0); diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c index 65d3ba10470..ec251f550cf 100644 --- a/src/vnet/bonding/node.c +++ b/src/vnet/bonding/node.c @@ -90,20 +90,21 @@ bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (sif != 0)) { - bif = bond_get_master_by_sw_if_index (sif->group); + bif = bond_get_master_by_dev_instance (sif->bif_dev_instance); if (PREDICT_TRUE (bif != 0)) { if (PREDICT_TRUE (vec_len (bif->slaves) >= 1)) { if (PREDICT_TRUE (bif->admin_up == 1)) { - if (!ethernet_frame_is_tagged (ntohs (eth->type))) + ethertype = clib_mem_unaligned (ð->type, u16); + if (!ethernet_frame_is_tagged (ntohs (ethertype))) { // Let some layer2 packets pass through. - if (PREDICT_TRUE ((eth->type != + if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS)) && !packet_is_cdp (eth) - && (eth->type != + && (ethertype != htons (ETHERNET_TYPE_802_1_LLDP)))) { @@ -128,12 +129,13 @@ bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, { vlan = (void *) (eth + 1); ethertype_p = &vlan->type; - if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN)) + ethertype = clib_mem_unaligned (ethertype_p, u16); + if (ethertype == ntohs (ETHERNET_TYPE_VLAN)) { vlan++; ethertype_p = &vlan->type; } - ethertype = *ethertype_p; + ethertype = clib_mem_unaligned (ethertype_p, u16); if (PREDICT_TRUE ((ethertype != htons (ETHERNET_TYPE_SLOW_PROTOCOLS)) && (ethertype != @@ -226,10 +228,10 @@ bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, b6 = vlib_get_buffer (vm, from[6]); b7 = vlib_get_buffer (vm, from[7]); - vlib_prefetch_buffer_header (b4, STORE); - vlib_prefetch_buffer_header (b5, STORE); - vlib_prefetch_buffer_header (b6, STORE); - vlib_prefetch_buffer_header (b7, STORE); + vlib_prefetch_buffer_header (b4, LOAD); + vlib_prefetch_buffer_header (b5, LOAD); + vlib_prefetch_buffer_header (b6, LOAD); + vlib_prefetch_buffer_header (b7, LOAD); CLIB_PREFETCH (b4->data, CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (b5->data, CLIB_CACHE_LINE_BYTES, LOAD); @@ -314,7 +316,7 @@ bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (n_trace > 0)) { - vlib_trace_buffer (vm, node, next1, b2, + vlib_trace_buffer (vm, node, next2, b2, 0 /* follow_chain */ ); vlib_set_trace_count (vm, node, --n_trace); t0 = vlib_add_trace (vm, node, b2, sizeof (*t0)); @@ -325,7 +327,7 @@ bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (n_trace > 0)) { - vlib_trace_buffer (vm, node, next1, b2, + vlib_trace_buffer (vm, node, next3, b3, 0 /* follow_chain */ ); vlib_set_trace_count (vm, node, --n_trace); t0 = vlib_add_trace (vm, node, b3, sizeof (*t0)); @@ -358,7 +360,7 @@ bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *p2; p2 = vlib_get_buffer (vm, from[1]); - vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p2, LOAD); CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); } diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h index 3a01abe2226..5c6ff32ac4a 100644 --- a/src/vnet/bonding/node.h +++ b/src/vnet/bonding/node.h @@ -31,6 +31,9 @@ #define MIN(x,y) (((x)<(y))?(x):(y)) #endif +#define BOND_MODULO_SHORTCUT(a) \ + (((a) == 2) || ((a) == 4) || ((a) == 8) || ((a) == 16)) + #define foreach_bond_mode \ _ (1, ROUND_ROBIN, "round-robin") \ _ (2, ACTIVE_BACKUP, "active-backup") \ @@ -289,9 +292,6 @@ typedef struct /* pool of lacp neighbors */ slave_if_t *neighbors; - /* rapidly find a neighbor by vlib software interface index */ - uword *neighbor_by_sw_if_index; - /* rapidly find a bond by vlib software interface index */ uword *bond_by_sw_if_index; @@ -303,6 +303,8 @@ typedef struct u8 lacp_plugin_loaded; lacp_enable_disable_func lacp_enable_disable; + + uword *slave_by_sw_if_index; } bond_main_t; /* bond packet trace capture */ @@ -439,13 +441,15 @@ bond_get_slave_by_sw_if_index (u32 sw_if_index) { bond_main_t *bm = &bond_main; slave_if_t *sif = 0; - uword *p; + uword p; - p = hash_get (bm->neighbor_by_sw_if_index, sw_if_index); - if (p) + if (sw_if_index < vec_len (bm->slave_by_sw_if_index)) { - sif = pool_elt_at_index (bm->neighbors, p[0]); + p = bm->slave_by_sw_if_index[sw_if_index]; + if (p) + sif = pool_elt_at_index (bm->neighbors, p >> 1); } + return sif; } -- cgit 1.2.3-korg