summaryrefslogtreecommitdiffstats
path: root/plugins/lb-plugin/lb/node.c
diff options
context:
space:
mode:
authorPierre Pfister <ppfister@cisco.com>2016-10-05 09:38:21 +0100
committerDamjan Marion <dmarion.lists@gmail.com>2016-10-05 09:33:07 +0000
commitd4bc9af55fc9a7bb5133da8c863569497cb92cd0 (patch)
tree6a64ccf779e28162e6c6bd260b0602011aadc9d6 /plugins/lb-plugin/lb/node.c
parent4c20e7197707aa95b144b289704e9e97335db17d (diff)
Load Balancer: Use FIB 2.0
This patch fixes load balancer, which was not working since FIB 2.0. Two FIB DPO types are defined: - One for IPv4 GRE - One for IPv6 GRE When an AS is created, the plugin automatically uses the result from the FIB in order to transmit the packet. Therefore, the packet does not need to visit ip-lookup twice. The 'bypass' command was removed, as it is now done automatically using this process. Change-Id: Ib505ba31bfc67897eaff752821087821c360360a Signed-off-by: Pierre Pfister <ppfister@cisco.com>
Diffstat (limited to 'plugins/lb-plugin/lb/node.c')
-rw-r--r--plugins/lb-plugin/lb/node.c367
1 files changed, 156 insertions, 211 deletions
diff --git a/plugins/lb-plugin/lb/node.c b/plugins/lb-plugin/lb/node.c
index c51a2108a0f..77beaac9bb2 100644
--- a/plugins/lb-plugin/lb/node.c
+++ b/plugins/lb-plugin/lb/node.c
@@ -36,28 +36,11 @@ static char *lb_error_strings[] = {
#undef _
};
-typedef enum {
- LB_NEXT_LOOKUP,
- LB_NEXT_REWRITE,
- LB_NEXT_DROP,
- LB_N_NEXT,
-} lb_next_t;
-
typedef struct {
u32 vip_index;
u32 as_index;
} lb_trace_t;
-/* u8 *lb_format_adjacency(u8 * s, va_list * va) */
-/* { */
-/* lb_main_t *lbm = &lb_main; */
-/* __attribute((unused)) ip_lookup_main_t *lm = va_arg (*va, ip_lookup_main_t *); */
-/* ip_adjacency_t *adj = va_arg (*va, ip_adjacency_t *); */
-/* lb_adj_data_t *ad = (lb_adj_data_t *) &adj->opaque; */
-/* __attribute__((unused)) lb_vip_t *vip = pool_elt_at_index (lbm->vips, ad->vip_index); */
-/* return format(s, "vip idx:%d", ad->vip_index); */
-/* } */
-
u8 *
format_lb_trace (u8 * s, va_list * args)
{
@@ -108,169 +91,162 @@ lb_node_fn (vlib_main_t * vm,
u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6)
{
- /* ip_lookup_main_t *lm = (is_input_v4)?&ip4_main.lookup_main:&ip6_main.lookup_main; */
- /* lb_main_t *lbm = &lb_main; */
- /* vlib_node_runtime_t *error_node = node; */
- /* u32 n_left_from, *from, next_index, *to_next, n_left_to_next; */
- /* u32 cpu_index = os_get_cpu_number(); */
- /* u32 lb_time = lb_hash_time_now(vm); */
-
- /* lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); */
- /* from = vlib_frame_vector_args (frame); */
- /* n_left_from = frame->n_vectors; */
- /* next_index = node->cached_next_index; */
-
- /* while (n_left_from > 0) */
- /* { */
- /* vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); */
- /* while (n_left_from > 0 && n_left_to_next > 0) */
- /* { */
- /* u32 pi0; */
- /* vlib_buffer_t *p0; */
- /* ip_adjacency_t *adj0; */
- /* lb_adj_data_t *ad0; */
- /* lb_vip_t *vip0; */
- /* lb_as_t *as0; */
- /* gre_header_t *gre0; */
- /* u16 len0; */
- /* u32 value0, available_index0, hash0; */
- /* u64 key0[5]; */
- /* lb_error_t error0 = LB_ERROR_NONE; */
- /* lb_next_t next0 = LB_NEXT_LOOKUP; */
-
- /* if (PREDICT_TRUE(n_left_from > 1)) */
- /* { */
- /* vlib_buffer_t *p2; */
- /* p2 = vlib_get_buffer(vm, from[1]); */
- /* vlib_prefetch_buffer_header(p2, STORE); */
- /* /\* IPv4 + 8 = 28. possibly plus -40 *\/ */
- /* CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE); */
- /* } */
-
- /* pi0 = to_next[0] = from[0]; */
- /* from += 1; */
- /* n_left_from -= 1; */
- /* to_next += 1; */
- /* n_left_to_next -= 1; */
-
- /* p0 = vlib_get_buffer (vm, pi0); */
- /* adj0 = ip_get_adjacency (lm, vnet_buffer (p0)->ip.adj_index[VLIB_TX]); */
- /* ad0 = (lb_adj_data_t *) &adj0->opaque; */
- /* vip0 = pool_elt_at_index (lbm->vips, ad0->vip_index); */
-
- /* if (is_input_v4) { */
- /* ip4_header_t *ip40; */
- /* ip40 = vlib_buffer_get_current (p0); */
- /* len0 = clib_net_to_host_u16(ip40->length); */
- /* key0[0] = (u64) ip40->src_address.as_u32; */
- /* key0[1] = (u64) ip40->dst_address.as_u32; */
- /* key0[2] = 0; */
- /* key0[3] = 0; */
- /* key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) | */
- /* ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16); */
-
- /* hash0 = lb_hash_hash(key0); */
- /* } else { */
- /* ip6_header_t *ip60; */
- /* ip60 = vlib_buffer_get_current (p0); */
- /* len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t); */
- /* key0[0] = ip60->src_address.as_u64[0]; */
- /* key0[1] = ip60->src_address.as_u64[1]; */
- /* key0[2] = ip60->dst_address.as_u64[0]; */
- /* key0[3] = ip60->dst_address.as_u64[1]; */
- /* key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) | */
- /* ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16); */
-
- /* hash0 = lb_hash_hash(key0); */
- /* } */
-
- /* //NOTE: This is an ugly trick to not include the VIP index in the hash calculation */
- /* //but actually use it in the key determination. */
- /* key0[4] |= ((vip0 - lbm->vips)); */
-
- /* lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0); */
- /* if (PREDICT_TRUE(value0 != ~0)) { */
- /* //Found an existing entry */
- /* as0 = &lbm->ass[value0]; */
- /* } else if (PREDICT_TRUE(available_index0 != ~0)) { */
- /* //There is an available slot for a new flow */
- /* as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */
- /* if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element */
- /* error0 = LB_ERROR_NO_SERVER; */
- /* next0 = LB_NEXT_DROP; */
- /* } else { */
- /* vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION], */
- /* cpu_index, vip0 - lbm->vips, 1); */
- /* } */
-
- /* //TODO: There are race conditions with as0 and vip0 manipulation. */
- /* //Configuration may be changed, vectors resized, etc... */
-
- /* //Dereference previously used */
- /* vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1); */
- /* vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1); */
-
- /* //Add sticky entry */
- /* //Note that when there is no AS configured, an entry is configured anyway. */
- /* //But no configured AS is not something that should happen */
- /* lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time); */
- /* } else { */
- /* //Could not store new entry in the table */
- /* as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */
- /* vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET], */
- /* cpu_index, vip0 - lbm->vips, 1); */
- /* } */
-
- /* //Now let's encap */
- /* if (is_encap_v4) { */
- /* ip4_header_t *ip40; */
- /* vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t)); */
- /* ip40 = vlib_buffer_get_current(p0); */
- /* gre0 = (gre_header_t *)(ip40 + 1); */
- /* ip40->src_address = lbm->ip4_src_address; */
- /* ip40->dst_address = as0->address.ip4; */
- /* ip40->ip_version_and_header_length = 0x45; */
- /* ip40->ttl = 128; */
- /* ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t)); */
- /* ip40->protocol = IP_PROTOCOL_GRE; */
- /* ip40->checksum = ip4_header_checksum (ip40); */
- /* } else { */
- /* ip6_header_t *ip60; */
- /* vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t)); */
- /* ip60 = vlib_buffer_get_current(p0); */
- /* gre0 = (gre_header_t *)(ip60 + 1); */
- /* ip60->dst_address = as0->address.ip6; */
- /* ip60->src_address = lbm->ip6_src_address; */
- /* ip60->hop_limit = 128; */
- /* ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28); */
- /* ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t)); */
- /* ip60->protocol = IP_PROTOCOL_GRE; */
- /* } */
-
- /* gre0->flags_and_version = 0; */
- /* gre0->protocol = (is_input_v4)? */
- /* clib_host_to_net_u16(0x0800): */
- /* clib_host_to_net_u16(0x86DD); */
-
- /* vnet_buffer(p0)->ip.adj_index[VLIB_TX] = as0->adj_index; */
- /* next0 = (as0->adj_index != ~0)?LB_NEXT_REWRITE:next0; */
-
- /* if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) */
- /* { */
- /* lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); */
- /* tr->as_index = as0 - lbm->ass; */
- /* tr->vip_index = ad0->vip_index; */
- /* } */
-
- /* p0->error = error_node->errors[error0]; */
- /* vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, */
- /* n_left_to_next, pi0, next0); */
- /* } */
- /* vlib_put_next_frame (vm, node, next_index, n_left_to_next); */
- /* } */
-
- /* return frame->n_vectors; */
- return 0;
+ lb_main_t *lbm = &lb_main;
+ vlib_node_runtime_t *error_node = node;
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ u32 cpu_index = os_get_cpu_number();
+ u32 lb_time = lb_hash_time_now(vm);
+
+ lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ lb_vip_t *vip0;
+ lb_as_t *as0;
+ gre_header_t *gre0;
+ u16 len0;
+ u32 value0, available_index0, hash0;
+ u64 key0[5];
+ lb_error_t error0 = LB_ERROR_NONE;
+
+ if (PREDICT_TRUE(n_left_from > 1))
+ {
+ vlib_buffer_t *p2;
+ p2 = vlib_get_buffer(vm, from[1]);
+ vlib_prefetch_buffer_header(p2, STORE);
+ /* IPv4 + 8 = 28. possibly plus -40 */
+ CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ vip0 = pool_elt_at_index (lbm->vips,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
+
+ if (is_input_v4) {
+ ip4_header_t *ip40;
+ ip40 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip40->length);
+ key0[0] = (u64) ip40->src_address.as_u32;
+ key0[1] = (u64) ip40->dst_address.as_u32;
+ key0[2] = 0;
+ key0[3] = 0;
+ key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) |
+ ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16);
+
+ hash0 = lb_hash_hash(key0);
+ } else {
+ ip6_header_t *ip60;
+ ip60 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
+ key0[0] = ip60->src_address.as_u64[0];
+ key0[1] = ip60->src_address.as_u64[1];
+ key0[2] = ip60->dst_address.as_u64[0];
+ key0[3] = ip60->dst_address.as_u64[1];
+ key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) |
+ ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16);
+
+ hash0 = lb_hash_hash(key0);
+ }
+
+ //NOTE: This is an ugly trick to not include the VIP index in the hash calculation
+ //but actually use it in the key determination.
+ key0[4] |= ((vip0 - lbm->vips));
+
+ lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0);
+ if (PREDICT_TRUE(value0 != ~0)) {
+ //Found an existing entry
+ as0 = &lbm->ass[value0];
+ } else if (PREDICT_TRUE(available_index0 != ~0)) {
+ //There is an available slot for a new flow
+ as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
+ if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element
+ error0 = LB_ERROR_NO_SERVER;
+ } else {
+ vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION],
+ cpu_index, vip0 - lbm->vips, 1);
+ }
+
+ //TODO: There are race conditions with as0 and vip0 manipulation.
+ //Configuration may be changed, vectors resized, etc...
+
+ //Dereference previously used
+ vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1);
+ vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1);
+
+ //Add sticky entry
+ //Note that when there is no AS configured, an entry is configured anyway.
+ //But no configured AS is not something that should happen
+ lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time);
+ } else {
+ //Could not store new entry in the table
+ as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
+ vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET],
+ cpu_index, vip0 - lbm->vips, 1);
+ }
+
+ //Now let's encap
+ if (is_encap_v4) {
+ ip4_header_t *ip40;
+ vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
+ ip40 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip40 + 1);
+ ip40->src_address = lbm->ip4_src_address;
+ ip40->dst_address = as0->address.ip4;
+ ip40->ip_version_and_header_length = 0x45;
+ ip40->ttl = 128;
+ ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
+ ip40->protocol = IP_PROTOCOL_GRE;
+ ip40->checksum = ip4_header_checksum (ip40);
+ } else {
+ ip6_header_t *ip60;
+ vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
+ ip60 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip60 + 1);
+ ip60->dst_address = as0->address.ip6;
+ ip60->src_address = lbm->ip6_src_address;
+ ip60->hop_limit = 128;
+ ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
+ ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
+ ip60->protocol = IP_PROTOCOL_GRE;
+ }
+
+ gre0->flags_and_version = 0;
+ gre0->protocol = (is_input_v4)?
+ clib_host_to_net_u16(0x0800):
+ clib_host_to_net_u16(0x86DD);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = as0->dpo.dpoi_index;
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->as_index = as0 - lbm->ass;
+ tr->vip_index = vip0 - lbm->vips;
+ }
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0,
+ as0->dpo.dpoi_next_node);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
}
static uword
@@ -314,18 +290,10 @@ VLIB_REGISTER_NODE (lb6_gre6_node) =
.n_next_nodes = LB_N_NEXT,
.next_nodes =
{
- [LB_NEXT_LOOKUP] = "ip6-lookup",
- [LB_NEXT_REWRITE] = "ip6-rewrite",
[LB_NEXT_DROP] = "error-drop"
},
};
-/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre6) = { */
-/* .node_name = "lb6-gre6", */
-/* .fn = lb_format_adjacency, */
-/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE6] */
-/* }; */
-
VLIB_REGISTER_NODE (lb6_gre4_node) =
{
.function = lb6_gre4_node_fn,
@@ -339,18 +307,10 @@ VLIB_REGISTER_NODE (lb6_gre4_node) =
.n_next_nodes = LB_N_NEXT,
.next_nodes =
{
- [LB_NEXT_LOOKUP] = "ip4-lookup",
- [LB_NEXT_REWRITE]= "ip4-rewrite-transit",
[LB_NEXT_DROP] = "error-drop"
},
};
-/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre4) = { */
-/* .node_name = "lb6-gre4", */
-/* .fn = lb_format_adjacency, */
-/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE4] */
-/* }; */
-
VLIB_REGISTER_NODE (lb4_gre6_node) =
{
.function = lb4_gre6_node_fn,
@@ -364,18 +324,10 @@ VLIB_REGISTER_NODE (lb4_gre6_node) =
.n_next_nodes = LB_N_NEXT,
.next_nodes =
{
- [LB_NEXT_LOOKUP] = "ip6-lookup",
- [LB_NEXT_REWRITE] = "ip6-rewrite",
[LB_NEXT_DROP] = "error-drop"
},
};
-/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre6) = { */
-/* .node_name = "lb4-gre6", */
-/* .fn = lb_format_adjacency, */
-/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE6] */
-/* }; */
-
VLIB_REGISTER_NODE (lb4_gre4_node) =
{
.function = lb4_gre4_node_fn,
@@ -389,14 +341,7 @@ VLIB_REGISTER_NODE (lb4_gre4_node) =
.n_next_nodes = LB_N_NEXT,
.next_nodes =
{
- [LB_NEXT_LOOKUP] = "ip4-lookup",
- [LB_NEXT_REWRITE]= "ip4-rewrite-transit",
[LB_NEXT_DROP] = "error-drop"
},
};
-/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre4) = { */
-/* .node_name = "lb4-gre4", */
-/* .fn = lb_format_adjacency, */
-/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE4] */
-/* }; */