aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
authorVladislav Grishenko <themiron@yandex-team.ru>2024-01-24 16:17:23 +0500
committerNeale Ranns <neale@graphiant.com>2024-04-09 04:47:02 +0000
commit302db471a01b5c21b80e9bc769315cef4da0ed80 (patch)
treebe284a966c720d6dc601128759e42a62553db2a7 /src/vnet
parent2a7bc81ae302b02e8e0d4eec7db82c1ca15b7f7a (diff)
mpls: fix default mpls lb hash config
In case of multiple path within tunnel, mpls lookup node computes lb hash with mpls_compute_flow_hash config value 0, so only mpls label and l4 ports gets accounted, not 5-tuple. This leads to flow traffic polarization and disbalance over mpls paths. Use mpls hash config from lb instead, usually it'll be MPLS_FLOw_HASH_DEFAULT with 5-tuple plus flowlabel. As optimization, fix flow hash reuse from the previous lookup node if present, like ip_lookup does. Previously mpls lookup always calcs the hash. Test lb distribution for both cases. Also, use the same flow hash hex format in ip4/ip6 and mpls traces for easier reading, most code changes is due fixstyle formatting. Type: fix Signed-off-by: Vladislav Grishenko <themiron@yandex-team.ru> Change-Id: Ib89e1ab3edec14269866fe825a3e887d6c817b7c
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/ip/ip4_forward.c8
-rw-r--r--src/vnet/ip/ip6_forward.c3
-rw-r--r--src/vnet/mpls/mpls_lookup.c235
3 files changed, 125 insertions, 121 deletions
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index e85c888f669..ff74b52eb18 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -1190,9 +1190,11 @@ format_ip4_forward_next_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%U%U",
- format_white_space, indent,
- format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
+ t->fib_index, t->dpo_index, t->flow_hash);
+ s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header,
+ t->packet_data, sizeof (t->packet_data));
return s;
}
#endif
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 06c473b1495..48fb633fd32 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -948,8 +948,7 @@ format_ip6_forward_next_trace (u8 * s, va_list * args)
ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%Ufib:%d adj:%d flow:%d",
- format_white_space, indent,
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
t->fib_index, t->adj_index, t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index db423392c03..a5ac56534a5 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -44,13 +44,13 @@ format_mpls_lookup_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
- s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %x "
- "label %d eos %d",
- t->next_index, t->lfib_index, t->lb_index, t->hash,
- vnet_mpls_uc_get_label(
- clib_net_to_host_u32(t->label_net_byte_order)),
- vnet_mpls_uc_get_s(
- clib_net_to_host_u32(t->label_net_byte_order)));
+ s = format (
+ s,
+ "MPLS: next [%d], lookup fib index %d, LB index %d hash 0x%08x "
+ "label %d eos %d",
+ t->next_index, t->lfib_index, t->lb_index, t->hash,
+ vnet_mpls_uc_get_label (clib_net_to_host_u32 (t->label_net_byte_order)),
+ vnet_mpls_uc_get_s (clib_net_to_host_u32 (t->label_net_byte_order)));
return s;
}
@@ -482,8 +482,8 @@ format_mpls_load_balance_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *);
- s = format (s, "MPLS: next [%d], LB index %d hash %d",
- t->next_index, t->lb_index, t->hash);
+ s = format (s, "MPLS: next [%d], LB index %d hash 0x%08x", t->next_index,
+ t->lb_index, t->hash);
return s;
}
@@ -553,75 +553,77 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm,
* We don't want to use the same hash value at each level in the recursion
* graph as that would lead to polarisation
*/
- hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
- hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
-
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
- }
- dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
- }
- else
- {
- dpo0 = load_balance_get_bucket_i (lb0, 0);
- }
- if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash))
- {
- hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1;
- }
- else
- {
- hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1);
- }
- dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1));
- }
- else
- {
- dpo1 = load_balance_get_bucket_i (lb1, 0);
- }
-
- next0 = dpo0->dpoi_next_node;
- next1 = dpo1->dpoi_next_node;
-
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
- vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1,
- vlib_buffer_length_in_chain (vm, p0));
- vlib_increment_combined_counter
- (cm, thread_index, lbi1, 1,
- vlib_buffer_length_in_chain (vm, p1));
-
- if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p0, sizeof (*tr));
- tr->next_index = next0;
- tr->lb_index = lbi0;
- tr->hash = hc0;
- }
- if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p1, sizeof (*tr));
- tr->next_index = next1;
- tr->lb_index = lbi1;
- tr->hash = hc1;
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next,
- to_next, n_left_to_next,
- pi0, pi1, next0, next1);
+ hc0 = hc1 = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket (
+ lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls1, lb1->lb_hash_config);
+ }
+ dpo1 = load_balance_get_fwd_bucket (
+ lb1, (hc1 & lb1->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->lb_index = lbi1;
+ tr->hash = hc1;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (
+ vm, node, next, to_next, n_left_to_next, pi0, pi1, next0, next1);
}
while (n_left_from > 0 && n_left_to_next > 0)
@@ -646,44 +648,45 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm,
lb0 = load_balance_get(lbi0);
- hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
- }
- dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
- }
- else
- {
- dpo0 = load_balance_get_bucket_i (lb0, 0);
- }
-
- next0 = dpo0->dpoi_next_node;
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
- if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p0, sizeof (*tr));
- tr->next_index = next0;
- tr->lb_index = lbi0;
- tr->hash = hc0;
- }
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1,
- vlib_buffer_length_in_chain (vm, p0));
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next,
- to_next, n_left_to_next,
- pi0, next0);
- }
+ hc0 = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket (
+ lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next,
+ n_left_to_next, pi0, next0);
+ }
vlib_put_next_frame (vm, node, next, n_left_to_next);
}