diff options
Diffstat (limited to 'plugins/lb-plugin/lb/node.c')
-rw-r--r-- | plugins/lb-plugin/lb/node.c | 367 |
1 files changed, 156 insertions, 211 deletions
diff --git a/plugins/lb-plugin/lb/node.c b/plugins/lb-plugin/lb/node.c index c51a2108a0f..77beaac9bb2 100644 --- a/plugins/lb-plugin/lb/node.c +++ b/plugins/lb-plugin/lb/node.c @@ -36,28 +36,11 @@ static char *lb_error_strings[] = { #undef _ }; -typedef enum { - LB_NEXT_LOOKUP, - LB_NEXT_REWRITE, - LB_NEXT_DROP, - LB_N_NEXT, -} lb_next_t; - typedef struct { u32 vip_index; u32 as_index; } lb_trace_t; -/* u8 *lb_format_adjacency(u8 * s, va_list * va) */ -/* { */ -/* lb_main_t *lbm = &lb_main; */ -/* __attribute((unused)) ip_lookup_main_t *lm = va_arg (*va, ip_lookup_main_t *); */ -/* ip_adjacency_t *adj = va_arg (*va, ip_adjacency_t *); */ -/* lb_adj_data_t *ad = (lb_adj_data_t *) &adj->opaque; */ -/* __attribute__((unused)) lb_vip_t *vip = pool_elt_at_index (lbm->vips, ad->vip_index); */ -/* return format(s, "vip idx:%d", ad->vip_index); */ -/* } */ - u8 * format_lb_trace (u8 * s, va_list * args) { @@ -108,169 +91,162 @@ lb_node_fn (vlib_main_t * vm, u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6) u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6) { - /* ip_lookup_main_t *lm = (is_input_v4)?&ip4_main.lookup_main:&ip6_main.lookup_main; */ - /* lb_main_t *lbm = &lb_main; */ - /* vlib_node_runtime_t *error_node = node; */ - /* u32 n_left_from, *from, next_index, *to_next, n_left_to_next; */ - /* u32 cpu_index = os_get_cpu_number(); */ - /* u32 lb_time = lb_hash_time_now(vm); */ - - /* lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); */ - /* from = vlib_frame_vector_args (frame); */ - /* n_left_from = frame->n_vectors; */ - /* next_index = node->cached_next_index; */ - - /* while (n_left_from > 0) */ - /* { */ - /* vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); */ - /* while (n_left_from > 0 && n_left_to_next > 0) */ - /* { */ - /* u32 pi0; */ - /* vlib_buffer_t *p0; */ - /* ip_adjacency_t *adj0; */ - /* lb_adj_data_t *ad0; */ - /* lb_vip_t *vip0; */ - /* lb_as_t *as0; */ - /* gre_header_t *gre0; */ - /* u16 len0; */ - /* u32 value0, available_index0, hash0; */ - /* u64 key0[5]; */ - /* lb_error_t error0 = LB_ERROR_NONE; */ - /* lb_next_t next0 = LB_NEXT_LOOKUP; */ - - /* if (PREDICT_TRUE(n_left_from > 1)) */ - /* { */ - /* vlib_buffer_t *p2; */ - /* p2 = vlib_get_buffer(vm, from[1]); */ - /* vlib_prefetch_buffer_header(p2, STORE); */ - /* /\* IPv4 + 8 = 28. possibly plus -40 *\/ */ - /* CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE); */ - /* } */ - - /* pi0 = to_next[0] = from[0]; */ - /* from += 1; */ - /* n_left_from -= 1; */ - /* to_next += 1; */ - /* n_left_to_next -= 1; */ - - /* p0 = vlib_get_buffer (vm, pi0); */ - /* adj0 = ip_get_adjacency (lm, vnet_buffer (p0)->ip.adj_index[VLIB_TX]); */ - /* ad0 = (lb_adj_data_t *) &adj0->opaque; */ - /* vip0 = pool_elt_at_index (lbm->vips, ad0->vip_index); */ - - /* if (is_input_v4) { */ - /* ip4_header_t *ip40; */ - /* ip40 = vlib_buffer_get_current (p0); */ - /* len0 = clib_net_to_host_u16(ip40->length); */ - /* key0[0] = (u64) ip40->src_address.as_u32; */ - /* key0[1] = (u64) ip40->dst_address.as_u32; */ - /* key0[2] = 0; */ - /* key0[3] = 0; */ - /* key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) | */ - /* ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16); */ - - /* hash0 = lb_hash_hash(key0); */ - /* } else { */ - /* ip6_header_t *ip60; */ - /* ip60 = vlib_buffer_get_current (p0); */ - /* len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t); */ - /* key0[0] = ip60->src_address.as_u64[0]; */ - /* key0[1] = ip60->src_address.as_u64[1]; */ - /* key0[2] = ip60->dst_address.as_u64[0]; */ - /* key0[3] = ip60->dst_address.as_u64[1]; */ - /* key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) | */ - /* ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16); */ - - /* hash0 = lb_hash_hash(key0); */ - /* } */ - - /* //NOTE: This is an ugly trick to not include the VIP index in the hash calculation */ - /* //but actually use it in the key determination. */ - /* key0[4] |= ((vip0 - lbm->vips)); */ - - /* lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0); */ - /* if (PREDICT_TRUE(value0 != ~0)) { */ - /* //Found an existing entry */ - /* as0 = &lbm->ass[value0]; */ - /* } else if (PREDICT_TRUE(available_index0 != ~0)) { */ - /* //There is an available slot for a new flow */ - /* as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */ - /* if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element */ - /* error0 = LB_ERROR_NO_SERVER; */ - /* next0 = LB_NEXT_DROP; */ - /* } else { */ - /* vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION], */ - /* cpu_index, vip0 - lbm->vips, 1); */ - /* } */ - - /* //TODO: There are race conditions with as0 and vip0 manipulation. */ - /* //Configuration may be changed, vectors resized, etc... */ - - /* //Dereference previously used */ - /* vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1); */ - /* vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1); */ - - /* //Add sticky entry */ - /* //Note that when there is no AS configured, an entry is configured anyway. */ - /* //But no configured AS is not something that should happen */ - /* lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time); */ - /* } else { */ - /* //Could not store new entry in the table */ - /* as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */ - /* vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET], */ - /* cpu_index, vip0 - lbm->vips, 1); */ - /* } */ - - /* //Now let's encap */ - /* if (is_encap_v4) { */ - /* ip4_header_t *ip40; */ - /* vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t)); */ - /* ip40 = vlib_buffer_get_current(p0); */ - /* gre0 = (gre_header_t *)(ip40 + 1); */ - /* ip40->src_address = lbm->ip4_src_address; */ - /* ip40->dst_address = as0->address.ip4; */ - /* ip40->ip_version_and_header_length = 0x45; */ - /* ip40->ttl = 128; */ - /* ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t)); */ - /* ip40->protocol = IP_PROTOCOL_GRE; */ - /* ip40->checksum = ip4_header_checksum (ip40); */ - /* } else { */ - /* ip6_header_t *ip60; */ - /* vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t)); */ - /* ip60 = vlib_buffer_get_current(p0); */ - /* gre0 = (gre_header_t *)(ip60 + 1); */ - /* ip60->dst_address = as0->address.ip6; */ - /* ip60->src_address = lbm->ip6_src_address; */ - /* ip60->hop_limit = 128; */ - /* ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28); */ - /* ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t)); */ - /* ip60->protocol = IP_PROTOCOL_GRE; */ - /* } */ - - /* gre0->flags_and_version = 0; */ - /* gre0->protocol = (is_input_v4)? */ - /* clib_host_to_net_u16(0x0800): */ - /* clib_host_to_net_u16(0x86DD); */ - - /* vnet_buffer(p0)->ip.adj_index[VLIB_TX] = as0->adj_index; */ - /* next0 = (as0->adj_index != ~0)?LB_NEXT_REWRITE:next0; */ - - /* if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) */ - /* { */ - /* lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); */ - /* tr->as_index = as0 - lbm->ass; */ - /* tr->vip_index = ad0->vip_index; */ - /* } */ - - /* p0->error = error_node->errors[error0]; */ - /* vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, */ - /* n_left_to_next, pi0, next0); */ - /* } */ - /* vlib_put_next_frame (vm, node, next_index, n_left_to_next); */ - /* } */ - - /* return frame->n_vectors; */ - return 0; + lb_main_t *lbm = &lb_main; + vlib_node_runtime_t *error_node = node; + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + u32 cpu_index = os_get_cpu_number(); + u32 lb_time = lb_hash_time_now(vm); + + lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + lb_vip_t *vip0; + lb_as_t *as0; + gre_header_t *gre0; + u16 len0; + u32 value0, available_index0, hash0; + u64 key0[5]; + lb_error_t error0 = LB_ERROR_NONE; + + if (PREDICT_TRUE(n_left_from > 1)) + { + vlib_buffer_t *p2; + p2 = vlib_get_buffer(vm, from[1]); + vlib_prefetch_buffer_header(p2, STORE); + /* IPv4 + 8 = 28. possibly plus -40 */ + CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE); + } + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + vip0 = pool_elt_at_index (lbm->vips, + vnet_buffer (p0)->ip.adj_index[VLIB_TX]); + + if (is_input_v4) { + ip4_header_t *ip40; + ip40 = vlib_buffer_get_current (p0); + len0 = clib_net_to_host_u16(ip40->length); + key0[0] = (u64) ip40->src_address.as_u32; + key0[1] = (u64) ip40->dst_address.as_u32; + key0[2] = 0; + key0[3] = 0; + key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) | + ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16); + + hash0 = lb_hash_hash(key0); + } else { + ip6_header_t *ip60; + ip60 = vlib_buffer_get_current (p0); + len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t); + key0[0] = ip60->src_address.as_u64[0]; + key0[1] = ip60->src_address.as_u64[1]; + key0[2] = ip60->dst_address.as_u64[0]; + key0[3] = ip60->dst_address.as_u64[1]; + key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) | + ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16); + + hash0 = lb_hash_hash(key0); + } + + //NOTE: This is an ugly trick to not include the VIP index in the hash calculation + //but actually use it in the key determination. + key0[4] |= ((vip0 - lbm->vips)); + + lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0); + if (PREDICT_TRUE(value0 != ~0)) { + //Found an existing entry + as0 = &lbm->ass[value0]; + } else if (PREDICT_TRUE(available_index0 != ~0)) { + //There is an available slot for a new flow + as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; + if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element + error0 = LB_ERROR_NO_SERVER; + } else { + vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION], + cpu_index, vip0 - lbm->vips, 1); + } + + //TODO: There are race conditions with as0 and vip0 manipulation. + //Configuration may be changed, vectors resized, etc... + + //Dereference previously used + vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1); + vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1); + + //Add sticky entry + //Note that when there is no AS configured, an entry is configured anyway. + //But no configured AS is not something that should happen + lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time); + } else { + //Could not store new entry in the table + as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; + vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET], + cpu_index, vip0 - lbm->vips, 1); + } + + //Now let's encap + if (is_encap_v4) { + ip4_header_t *ip40; + vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t)); + ip40 = vlib_buffer_get_current(p0); + gre0 = (gre_header_t *)(ip40 + 1); + ip40->src_address = lbm->ip4_src_address; + ip40->dst_address = as0->address.ip4; + ip40->ip_version_and_header_length = 0x45; + ip40->ttl = 128; + ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t)); + ip40->protocol = IP_PROTOCOL_GRE; + ip40->checksum = ip4_header_checksum (ip40); + } else { + ip6_header_t *ip60; + vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t)); + ip60 = vlib_buffer_get_current(p0); + gre0 = (gre_header_t *)(ip60 + 1); + ip60->dst_address = as0->address.ip6; + ip60->src_address = lbm->ip6_src_address; + ip60->hop_limit = 128; + ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28); + ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t)); + ip60->protocol = IP_PROTOCOL_GRE; + } + + gre0->flags_and_version = 0; + gre0->protocol = (is_input_v4)? + clib_host_to_net_u16(0x0800): + clib_host_to_net_u16(0x86DD); + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = as0->dpo.dpoi_index; + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->as_index = as0 - lbm->ass; + tr->vip_index = vip0 - lbm->vips; + } + + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, + as0->dpo.dpoi_next_node); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; } static uword @@ -314,18 +290,10 @@ VLIB_REGISTER_NODE (lb6_gre6_node) = .n_next_nodes = LB_N_NEXT, .next_nodes = { - [LB_NEXT_LOOKUP] = "ip6-lookup", - [LB_NEXT_REWRITE] = "ip6-rewrite", [LB_NEXT_DROP] = "error-drop" }, }; -/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre6) = { */ -/* .node_name = "lb6-gre6", */ -/* .fn = lb_format_adjacency, */ -/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE6] */ -/* }; */ - VLIB_REGISTER_NODE (lb6_gre4_node) = { .function = lb6_gre4_node_fn, @@ -339,18 +307,10 @@ VLIB_REGISTER_NODE (lb6_gre4_node) = .n_next_nodes = LB_N_NEXT, .next_nodes = { - [LB_NEXT_LOOKUP] = "ip4-lookup", - [LB_NEXT_REWRITE]= "ip4-rewrite-transit", [LB_NEXT_DROP] = "error-drop" }, }; -/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre4) = { */ -/* .node_name = "lb6-gre4", */ -/* .fn = lb_format_adjacency, */ -/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE4] */ -/* }; */ - VLIB_REGISTER_NODE (lb4_gre6_node) = { .function = lb4_gre6_node_fn, @@ -364,18 +324,10 @@ VLIB_REGISTER_NODE (lb4_gre6_node) = .n_next_nodes = LB_N_NEXT, .next_nodes = { - [LB_NEXT_LOOKUP] = "ip6-lookup", - [LB_NEXT_REWRITE] = "ip6-rewrite", [LB_NEXT_DROP] = "error-drop" }, }; -/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre6) = { */ -/* .node_name = "lb4-gre6", */ -/* .fn = lb_format_adjacency, */ -/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE6] */ -/* }; */ - VLIB_REGISTER_NODE (lb4_gre4_node) = { .function = lb4_gre4_node_fn, @@ -389,14 +341,7 @@ VLIB_REGISTER_NODE (lb4_gre4_node) = .n_next_nodes = LB_N_NEXT, .next_nodes = { - [LB_NEXT_LOOKUP] = "ip4-lookup", - [LB_NEXT_REWRITE]= "ip4-rewrite-transit", [LB_NEXT_DROP] = "error-drop" }, }; -/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre4) = { */ -/* .node_name = "lb4-gre4", */ -/* .fn = lb_format_adjacency, */ -/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE4] */ -/* }; */ |