diff options
Diffstat (limited to 'plugins/lb-plugin/lb')
-rw-r--r-- | plugins/lb-plugin/lb/cli.c | 41 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/lb.c | 286 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/lb.h | 66 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/node.c | 367 |
4 files changed, 358 insertions, 402 deletions
diff --git a/plugins/lb-plugin/lb/cli.c b/plugins/lb-plugin/lb/cli.c index 398572ce396..b59c6426241 100644 --- a/plugins/lb-plugin/lb/cli.c +++ b/plugins/lb-plugin/lb/cli.c @@ -17,47 +17,6 @@ #include <lb/util.h> static clib_error_t * -lb_bypass_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - ip46_address_t vip_prefix, as_addr; - u8 vip_plen; - u32 vip_index; - u8 disable = 0; - int ret; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY)) - return clib_error_return (0, "invalid vip prefix: '%U'", - format_unformat_error, line_input); - - if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index))) - return clib_error_return (0, "lb_vip_find_index error %d", ret); - - if (!unformat(line_input, "%U", unformat_ip46_address, &as_addr, IP46_TYPE_ANY)) - return clib_error_return (0, "invalid as address: '%U'", - format_unformat_error, line_input); - - if (unformat(line_input, "disable")) - disable = 1; - - if ((ret = lb_as_lookup_bypass(vip_index, &as_addr, disable))) - return clib_error_return (0, "lb_as_lookup_bypass error %d", ret); - - return 0; -} - -VLIB_CLI_COMMAND (lb_bypass_command, static) = -{ - .path = "lb bypass", - .short_help = "lb bypass <prefix> <address> [disable]", - .function = lb_bypass_command_fn, -}; - -static clib_error_t * lb_vip_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { diff --git a/plugins/lb-plugin/lb/lb.c b/plugins/lb-plugin/lb/lb.c index 140c221a438..6af4697e37c 100644 --- a/plugins/lb-plugin/lb/lb.c +++ b/plugins/lb-plugin/lb/lb.c @@ -28,6 +28,25 @@ lb_main_t lb_main; #define lb_get_writer_lock() do {} while(__sync_lock_test_and_set (lb_main.writer_lock, 1)) #define lb_put_writer_lock() lb_main.writer_lock[0] = 0 +static void lb_as_stack (lb_as_t *as); + + +const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL }; +const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL }; +const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] = + { + [DPO_PROTO_IP4] = lb_dpo_gre4_ip4, + [DPO_PROTO_IP6] = lb_dpo_gre4_ip6, + }; + +const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL }; +const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL }; +const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] = + { + [DPO_PROTO_IP4] = lb_dpo_gre6_ip4, + [DPO_PROTO_IP6] = lb_dpo_gre6_ip6, + }; + u32 lb_hash_time_now(vlib_main_t * vm) { return (u32) (vlib_time_now(vm) + 10000); @@ -143,12 +162,12 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args) u32 *as_index; pool_foreach(as_index, vip->as_indexes, { as = &lbm->ass[*as_index]; - s = format(s, "%U %U %d buckets %d flows adj:%u %s\n", + s = format(s, "%U %U %d buckets %d flows dpo:%u %s\n", format_white_space, indent, format_ip46_address, &as->address, IP46_TYPE_ANY, count[as - lbm->ass], vlib_refcount_get(&lbm->as_refcount, as - lbm->ass), - as->adj_index, + as->dpo.dpoi_index, (as->flags & LB_AS_FLAGS_USED)?"used":" removed"); }); @@ -164,7 +183,6 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args) return s; } - typedef struct { u32 as_index; u32 last; @@ -195,11 +213,18 @@ static void lb_vip_garbage_collection(lb_vip_t *vip) pool_foreach(as_index, vip->as_indexes, { as = &lbm->ass[*as_index]; if (!(as->flags & LB_AS_FLAGS_USED) && //Not used - clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used - (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0)) { //Not referenced - pool_put(vip->as_indexes, as_index); - pool_put(lbm->ass, as); - } + clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used + (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0)) + { //Not referenced + fib_entry_child_remove(as->next_hop_fib_entry_index, + as->next_hop_child_index); + fib_table_entry_delete_index(as->next_hop_fib_entry_index, + FIB_SOURCE_RR); + as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID; + + pool_put(vip->as_indexes, as_index); + pool_put(lbm->ass, as); + } }); } @@ -449,7 +474,6 @@ next: //Update reused ASs vec_foreach(ip, to_be_updated) { lbm->ass[*ip].flags = LB_AS_FLAGS_USED; - lbm->ass[*ip].adj_index = ~0; } vec_free(to_be_updated); @@ -461,9 +485,36 @@ next: as->address = addresses[*ip]; as->flags = LB_AS_FLAGS_USED; as->vip_index = vip_index; - as->adj_index = ~0; pool_get(vip->as_indexes, as_index); *as_index = as - lbm->ass; + + /* + * become a child of the FIB entry + * so we are informed when its forwarding changes + */ + fib_prefix_t nh = {}; + if (lb_vip_is_gre4(vip)) { + nh.fp_addr.ip4 = as->address.ip4; + nh.fp_len = 32; + nh.fp_proto = FIB_PROTOCOL_IP4; + } else { + nh.fp_addr.ip6 = as->address.ip6; + nh.fp_len = 128; + nh.fp_proto = FIB_PROTOCOL_IP6; + } + + as->next_hop_fib_entry_index = + fib_table_entry_special_add(0, + &nh, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + as->next_hop_child_index = + fib_entry_child_add(as->next_hop_fib_entry_index, + lbm->fib_node_type, + as - lbm->ass); + + lb_as_stack(as); } vec_free(to_be_added); @@ -535,100 +586,33 @@ int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n) return ret; } -int lb_as_lookup_bypass(u32 vip_index, ip46_address_t *address, u8 is_disable) -{ - /* lb_get_writer_lock(); */ - /* lb_main_t *lbm = &lb_main; */ - /* u32 as_index; */ - /* lb_as_t *as; */ - /* lb_vip_t *vip; */ - - /* if (!(vip = lb_vip_get_by_index(vip_index)) || */ - /* lb_as_find_index_vip(vip, address, &as_index)) { */ - /* lb_put_writer_lock(); */ - /* return VNET_API_ERROR_NO_SUCH_ENTRY; */ - /* } */ - - /* as = &lbm->ass[as_index]; */ - - /* if (is_disable) { */ - /* as->adj_index = ~0; */ - /* } else if (lb_vip_is_gre4(vip)) { */ - /* uword *p = ip4_get_route (&ip4_main, 0, 0, as->address.ip4.as_u8, 32); */ - /* if (p == 0) { */ - /* lb_put_writer_lock(); */ - /* return VNET_API_ERROR_NO_SUCH_ENTRY; */ - /* } */ - /* u32 ai = (u32)p[0]; */ - /* ip_lookup_main_t *lm4 = &ip4_main.lookup_main; */ - /* ip_adjacency_t *adj4 = ip_get_adjacency (lm4, ai); */ - /* if (adj4->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) { */ - /* lb_put_writer_lock(); */ - /* return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; */ - /* } */ - - /* as->adj_index = ai; */ - /* } else { */ - /* u32 ai = ip6_get_route (&ip6_main, 0, 0, &as->address.ip6, 128); */ - /* if (ai == 0) { */ - /* lb_put_writer_lock(); */ - /* return VNET_API_ERROR_NO_SUCH_ENTRY; */ - /* } */ - - /* ip_lookup_main_t *lm6 = &ip6_main.lookup_main; */ - /* ip_adjacency_t *adj6 = ip_get_adjacency (lm6, ai); */ - /* if (adj6->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) { */ - /* lb_put_writer_lock(); */ - /* return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; */ - /* } */ - - /* as->adj_index = ai; */ - /* } */ - /* lb_put_writer_lock(); */ - return 0; -} - - /** * Add the VIP adjacency to the ip4 or ip6 fib */ static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip) { - /* ip_adjacency_t adj; */ - /* //Adjacency */ - /* memset (&adj, 0, sizeof (adj)); */ - /* adj.explicit_fib_index = ~0; */ - /* lb_adj_data_t *ad = (lb_adj_data_t *) &adj.opaque; */ - /* ad->vip_index = vip - lbm->vips; */ - - /* ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned */ - /* u32 lookup_next_index = lbm->ip_lookup_next_index[vip->type]; */ - - /* if (lb_vip_is_ip4(vip)) { */ - /* adj.lookup_next_index = lookup_next_index; */ - /* ip4_add_del_route_args_t route_args = {}; */ - /* ip4_main_t *im4 = &ip4_main; */ - /* route_args.table_index_or_table_id = 0; */ - /* route_args.flags = IP4_ROUTE_FLAG_ADD; */ - /* route_args.dst_address = vip->prefix.ip4; */ - /* route_args.dst_address_length = vip->plen - 96; */ - /* route_args.adj_index = ~0; */ - /* route_args.add_adj = &adj; */ - /* route_args.n_add_adj = 1; */ - /* ip4_add_del_route (im4, &route_args); */ - /* } else { */ - /* adj.lookup_next_index = lookup_next_index; */ - /* ip6_add_del_route_args_t route_args = {}; */ - /* ip6_main_t *im6 = &ip6_main; */ - /* route_args.table_index_or_table_id = 0; */ - /* route_args.flags = IP6_ROUTE_FLAG_ADD; */ - /* route_args.dst_address = vip->prefix.ip6; */ - /* route_args.dst_address_length = vip->plen; */ - /* route_args.adj_index = ~0; */ - /* route_args.add_adj = &adj; */ - /* route_args.n_add_adj = 1; */ - /* ip6_add_del_route (im6, &route_args); */ - /* } */ + dpo_proto_t proto = 0; + dpo_id_t dpo = DPO_NULL; + fib_prefix_t pfx = {}; + if (lb_vip_is_ip4(vip)) { + pfx.fp_addr.ip4 = vip->prefix.ip4; + pfx.fp_len = vip->plen - 96; + pfx.fp_proto = FIB_PROTOCOL_IP4; + proto = DPO_PROTO_IP4; + } else { + pfx.fp_addr.ip6 = vip->prefix.ip6; + pfx.fp_len = vip->plen; + pfx.fp_proto = FIB_PROTOCOL_IP6; + proto = DPO_PROTO_IP6; + } + dpo_set(&dpo, lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type, + proto, vip - lbm->vips); + fib_table_entry_special_dpo_add(0, + &pfx, + FIB_SOURCE_PLUGIN_HI, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + dpo_reset(&dpo); } /** @@ -636,30 +620,17 @@ static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip) */ static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip) { - /* ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned */ - /* if (lb_vip_is_ip4(vip)) { */ - /* ip4_main_t *im4 = &ip4_main; */ - /* ip4_add_del_route_args_t route_args = {}; */ - /* route_args.table_index_or_table_id = 0; */ - /* route_args.flags = IP4_ROUTE_FLAG_DEL; */ - /* route_args.dst_address = vip->prefix.ip4; */ - /* route_args.dst_address_length = vip->plen - 96; */ - /* route_args.adj_index = ~0; */ - /* route_args.add_adj = NULL; */ - /* route_args.n_add_adj = 0; */ - /* ip4_add_del_route (im4, &route_args); */ - /* } else { */ - /* ip6_main_t *im6 = &ip6_main; */ - /* ip6_add_del_route_args_t route_args = {}; */ - /* route_args.table_index_or_table_id = 0; */ - /* route_args.flags = IP6_ROUTE_FLAG_DEL; */ - /* route_args.dst_address = vip->prefix.ip6; */ - /* route_args.dst_address_length = vip->plen; */ - /* route_args.adj_index = ~0; */ - /* route_args.add_adj = NULL; */ - /* route_args.n_add_adj = 0; */ - /* ip6_add_del_route (im6, &route_args); */ - /* } */ + fib_prefix_t pfx = {}; + if (lb_vip_is_ip4(vip)) { + pfx.fp_addr.ip4 = vip->prefix.ip4; + pfx.fp_len = vip->plen - 96; + pfx.fp_proto = FIB_PROTOCOL_IP4; + } else { + pfx.fp_addr.ip6 = vip->prefix.ip6; + pfx.fp_len = vip->plen; + pfx.fp_proto = FIB_PROTOCOL_IP6; + } + fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI); } int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u32 new_length, u32 *vip_index) @@ -766,12 +737,76 @@ vlib_plugin_register (vlib_main_t * vm, return error; } + +u8 *format_lb_dpo (u8 * s, va_list * va) +{ + index_t index = va_arg (*va, index_t); + CLIB_UNUSED(u32 indent) = va_arg (*va, u32); + lb_main_t *lbm = &lb_main; + lb_vip_t *vip = pool_elt_at_index (lbm->vips, index); + return format (s, "%U", format_lb_vip, vip); +} + +static void lb_dpo_lock (dpo_id_t *dpo) {} +static void lb_dpo_unlock (dpo_id_t *dpo) {} + +static fib_node_t * +lb_fib_node_get_node (fib_node_index_t index) +{ + lb_main_t *lbm = &lb_main; + lb_as_t *as = pool_elt_at_index (lbm->ass, index); + return (&as->fib_node); +} + +static void +lb_fib_node_last_lock_gone (fib_node_t *node) +{ +} + +static lb_as_t * +lb_as_from_fib_node (fib_node_t *node) +{ + return ((lb_as_t*)(((char*)node) - + STRUCT_OFFSET_OF(lb_as_t, fib_node))); +} + +static void +lb_as_stack (lb_as_t *as) +{ + lb_main_t *lbm = &lb_main; + lb_vip_t *vip = &lbm->vips[as->vip_index]; + dpo_stack(lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type, + lb_vip_is_ip4(vip)?DPO_PROTO_IP4:DPO_PROTO_IP6, + &as->dpo, + fib_entry_contribute_ip_forwarding( + as->next_hop_fib_entry_index)); +} + +static fib_node_back_walk_rc_t +lb_fib_node_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + lb_as_stack(lb_as_from_fib_node(node)); + return (FIB_NODE_BACK_WALK_CONTINUE); +} + clib_error_t * lb_init (vlib_main_t * vm) { vlib_thread_main_t *tm = vlib_get_thread_main (); lb_main_t *lbm = &lb_main; lb_as_t *default_as; + fib_node_vft_t lb_fib_node_vft = { + .fnv_get = lb_fib_node_get_node, + .fnv_last_lock = lb_fib_node_last_lock_gone, + .fnv_back_walk = lb_fib_node_back_walk_notify, + }; + dpo_vft_t lb_vft = { + .dv_lock = lb_dpo_lock, + .dv_unlock = lb_dpo_unlock, + .dv_format = format_lb_dpo, + }; + lbm->vips = 0; lbm->per_cpu = 0; vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1); @@ -782,6 +817,9 @@ lb_init (vlib_main_t * vm) lbm->ip4_src_address.as_u32 = 0xffffffff; lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL; lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL; + lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes); + lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes); + lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft); //Init AS reference counters vlib_refcount_init(&lbm->as_refcount); @@ -790,7 +828,7 @@ lb_init (vlib_main_t * vm) lbm->ass = 0; pool_get(lbm->ass, default_as); default_as->flags = 0; - default_as->adj_index = ~0; + default_as->dpo.dpoi_next_node = LB_NEXT_DROP; default_as->vip_index = ~0; default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL; default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL; diff --git a/plugins/lb-plugin/lb/lb.h b/plugins/lb-plugin/lb/lb.h index 14a4d8a39e7..09cfde3e378 100644 --- a/plugins/lb-plugin/lb/lb.h +++ b/plugins/lb-plugin/lb/lb.h @@ -35,18 +35,30 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <vnet/dpo/dpo.h> +#include <vnet/fib/fib_table.h> #include <lb/lbhash.h> #define LB_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10 #define LB_DEFAULT_FLOW_TIMEOUT 40 +typedef enum { + LB_NEXT_DROP, + LB_N_NEXT, +} lb_next_t; + /** * Each VIP is configured with a set of * application server. */ typedef struct { /** + * Registration to FIB event. + */ + fib_node_t fib_node; + + /** * Destination address used to tunnel traffic towards * that application server. * The address is also used as ID and pseudo-random @@ -55,13 +67,6 @@ typedef struct { ip46_address_t address; /** - * Second ip lookup can be avoided by sending directly the packet - * to ip-rewrite with a configured adjacency. - * When set to ~0, the packets are sent to ip6-lookup. - */ - u32 adj_index; - - /** * ASs are indexed by address and VIP Index. * Which means there will be duplicated if the same server * address is used for multiple VIPs. @@ -86,6 +91,22 @@ typedef struct { * may happen. */ u32 last_used; + + /** + * The FIB entry index for the next-hop + */ + fib_node_index_t next_hop_fib_entry_index; + + /** + * The child index on the FIB entry + */ + u32 next_hop_child_index; + + /** + * The next DPO in the graph to follow. + */ + dpo_id_t dpo; + } lb_as_t; format_function_t format_lb_as; @@ -180,15 +201,13 @@ typedef struct { * in the adjacency index. */ u8 flags; +#define LB_VIP_FLAGS_USED 0x1 /** * Pool of AS indexes used for this VIP. * This also includes ASs that have been removed (but are still referenced). */ u32 *as_indexes; - -#define LB_VIP_FLAGS_USED 0x1 - } lb_vip_t; #define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 || (vip)->type == LB_VIP_TYPE_IP4_GRE4) @@ -261,6 +280,17 @@ typedef struct { vlib_simple_counter_main_t vip_counters[LB_N_VIP_COUNTERS]; /** + * DPO used to send packet from IP4/6 lookup to LB node. + */ + dpo_type_t dpo_gre4_type; + dpo_type_t dpo_gre6_type; + + /** + * Node type for registering to fib changes. + */ + fib_node_type_t fib_node_type; + + /** * API dynamically registered base ID. */ u16 msg_id_base; @@ -268,16 +298,6 @@ typedef struct { volatile u32 *writer_lock; } lb_main_t; -/** - * struct stored in adj->opaque data. - */ -typedef struct { - /** - * Index of the VIP associated with that IP adjacency. - */ - u32 vip_index; -} lb_adj_data_t; - extern lb_main_t lb_main; extern vlib_node_registration_t lb6_node; extern vlib_node_registration_t lb4_node; @@ -302,12 +322,6 @@ int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index); int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n); int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n); -/** - * Updates the adjacency index stored in the AS such that the second - * IP lookup (after encap) can be bypassed. - */ -int lb_as_lookup_bypass(u32 vip_index, ip46_address_t *address, u8 is_disable); - u32 lb_hash_time_now(vlib_main_t * vm); void lb_garbage_collection(); diff --git a/plugins/lb-plugin/lb/node.c b/plugins/lb-plugin/lb/node.c index c51a2108a0f..77beaac9bb2 100644 --- a/plugins/lb-plugin/lb/node.c +++ b/plugins/lb-plugin/lb/node.c @@ -36,28 +36,11 @@ static char *lb_error_strings[] = { #undef _ }; -typedef enum { - LB_NEXT_LOOKUP, - LB_NEXT_REWRITE, - LB_NEXT_DROP, - LB_N_NEXT, -} lb_next_t; - typedef struct { u32 vip_index; u32 as_index; } lb_trace_t; -/* u8 *lb_format_adjacency(u8 * s, va_list * va) */ -/* { */ -/* lb_main_t *lbm = &lb_main; */ -/* __attribute((unused)) ip_lookup_main_t *lm = va_arg (*va, ip_lookup_main_t *); */ -/* ip_adjacency_t *adj = va_arg (*va, ip_adjacency_t *); */ -/* lb_adj_data_t *ad = (lb_adj_data_t *) &adj->opaque; */ -/* __attribute__((unused)) lb_vip_t *vip = pool_elt_at_index (lbm->vips, ad->vip_index); */ -/* return format(s, "vip idx:%d", ad->vip_index); */ -/* } */ - u8 * format_lb_trace (u8 * s, va_list * args) { @@ -108,169 +91,162 @@ lb_node_fn (vlib_main_t * vm, u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6) u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6) { - /* ip_lookup_main_t *lm = (is_input_v4)?&ip4_main.lookup_main:&ip6_main.lookup_main; */ - /* lb_main_t *lbm = &lb_main; */ - /* vlib_node_runtime_t *error_node = node; */ - /* u32 n_left_from, *from, next_index, *to_next, n_left_to_next; */ - /* u32 cpu_index = os_get_cpu_number(); */ - /* u32 lb_time = lb_hash_time_now(vm); */ - - /* lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); */ - /* from = vlib_frame_vector_args (frame); */ - /* n_left_from = frame->n_vectors; */ - /* next_index = node->cached_next_index; */ - - /* while (n_left_from > 0) */ - /* { */ - /* vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); */ - /* while (n_left_from > 0 && n_left_to_next > 0) */ - /* { */ - /* u32 pi0; */ - /* vlib_buffer_t *p0; */ - /* ip_adjacency_t *adj0; */ - /* lb_adj_data_t *ad0; */ - /* lb_vip_t *vip0; */ - /* lb_as_t *as0; */ - /* gre_header_t *gre0; */ - /* u16 len0; */ - /* u32 value0, available_index0, hash0; */ - /* u64 key0[5]; */ - /* lb_error_t error0 = LB_ERROR_NONE; */ - /* lb_next_t next0 = LB_NEXT_LOOKUP; */ - - /* if (PREDICT_TRUE(n_left_from > 1)) */ - /* { */ - /* vlib_buffer_t *p2; */ - /* p2 = vlib_get_buffer(vm, from[1]); */ - /* vlib_prefetch_buffer_header(p2, STORE); */ - /* /\* IPv4 + 8 = 28. possibly plus -40 *\/ */ - /* CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE); */ - /* } */ - - /* pi0 = to_next[0] = from[0]; */ - /* from += 1; */ - /* n_left_from -= 1; */ - /* to_next += 1; */ - /* n_left_to_next -= 1; */ - - /* p0 = vlib_get_buffer (vm, pi0); */ - /* adj0 = ip_get_adjacency (lm, vnet_buffer (p0)->ip.adj_index[VLIB_TX]); */ - /* ad0 = (lb_adj_data_t *) &adj0->opaque; */ - /* vip0 = pool_elt_at_index (lbm->vips, ad0->vip_index); */ - - /* if (is_input_v4) { */ - /* ip4_header_t *ip40; */ - /* ip40 = vlib_buffer_get_current (p0); */ - /* len0 = clib_net_to_host_u16(ip40->length); */ - /* key0[0] = (u64) ip40->src_address.as_u32; */ - /* key0[1] = (u64) ip40->dst_address.as_u32; */ - /* key0[2] = 0; */ - /* key0[3] = 0; */ - /* key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) | */ - /* ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16); */ - - /* hash0 = lb_hash_hash(key0); */ - /* } else { */ - /* ip6_header_t *ip60; */ - /* ip60 = vlib_buffer_get_current (p0); */ - /* len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t); */ - /* key0[0] = ip60->src_address.as_u64[0]; */ - /* key0[1] = ip60->src_address.as_u64[1]; */ - /* key0[2] = ip60->dst_address.as_u64[0]; */ - /* key0[3] = ip60->dst_address.as_u64[1]; */ - /* key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) | */ - /* ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16); */ - - /* hash0 = lb_hash_hash(key0); */ - /* } */ - - /* //NOTE: This is an ugly trick to not include the VIP index in the hash calculation */ - /* //but actually use it in the key determination. */ - /* key0[4] |= ((vip0 - lbm->vips)); */ - - /* lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0); */ - /* if (PREDICT_TRUE(value0 != ~0)) { */ - /* //Found an existing entry */ - /* as0 = &lbm->ass[value0]; */ - /* } else if (PREDICT_TRUE(available_index0 != ~0)) { */ - /* //There is an available slot for a new flow */ - /* as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */ - /* if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element */ - /* error0 = LB_ERROR_NO_SERVER; */ - /* next0 = LB_NEXT_DROP; */ - /* } else { */ - /* vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION], */ - /* cpu_index, vip0 - lbm->vips, 1); */ - /* } */ - - /* //TODO: There are race conditions with as0 and vip0 manipulation. */ - /* //Configuration may be changed, vectors resized, etc... */ - - /* //Dereference previously used */ - /* vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1); */ - /* vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1); */ - - /* //Add sticky entry */ - /* //Note that when there is no AS configured, an entry is configured anyway. */ - /* //But no configured AS is not something that should happen */ - /* lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time); */ - /* } else { */ - /* //Could not store new entry in the table */ - /* as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */ - /* vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET], */ - /* cpu_index, vip0 - lbm->vips, 1); */ - /* } */ - - /* //Now let's encap */ - /* if (is_encap_v4) { */ - /* ip4_header_t *ip40; */ - /* vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t)); */ - /* ip40 = vlib_buffer_get_current(p0); */ - /* gre0 = (gre_header_t *)(ip40 + 1); */ - /* ip40->src_address = lbm->ip4_src_address; */ - /* ip40->dst_address = as0->address.ip4; */ - /* ip40->ip_version_and_header_length = 0x45; */ - /* ip40->ttl = 128; */ - /* ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t)); */ - /* ip40->protocol = IP_PROTOCOL_GRE; */ - /* ip40->checksum = ip4_header_checksum (ip40); */ - /* } else { */ - /* ip6_header_t *ip60; */ - /* vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t)); */ - /* ip60 = vlib_buffer_get_current(p0); */ - /* gre0 = (gre_header_t *)(ip60 + 1); */ - /* ip60->dst_address = as0->address.ip6; */ - /* ip60->src_address = lbm->ip6_src_address; */ - /* ip60->hop_limit = 128; */ - /* ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28); */ - /* ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t)); */ - /* ip60->protocol = IP_PROTOCOL_GRE; */ - /* } */ - - /* gre0->flags_and_version = 0; */ - /* gre0->protocol = (is_input_v4)? */ - /* clib_host_to_net_u16(0x0800): */ - /* clib_host_to_net_u16(0x86DD); */ - - /* vnet_buffer(p0)->ip.adj_index[VLIB_TX] = as0->adj_index; */ - /* next0 = (as0->adj_index != ~0)?LB_NEXT_REWRITE:next0; */ - - /* if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) */ - /* { */ - /* lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); */ - /* tr->as_index = as0 - lbm->ass; */ - /* tr->vip_index = ad0->vip_index; */ - /* } */ - - /* p0->error = error_node->errors[error0]; */ - /* vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, */ - /* n_left_to_next, pi0, next0); */ - /* } */ - /* vlib_put_next_frame (vm, node, next_index, n_left_to_next); */ - /* } */ - - /* return frame->n_vectors; */ - return 0; + lb_main_t *lbm = &lb_main; + vlib_node_runtime_t *error_node = node; + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + u32 cpu_index = os_get_cpu_number(); + u32 lb_time = lb_hash_time_now(vm); + + lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + lb_vip_t *vip0; + lb_as_t *as0; + gre_header_t *gre0; + u16 len0; + u32 value0, available_index0, hash0; + u64 key0[5]; + lb_error_t error0 = LB_ERROR_NONE; + + if (PREDICT_TRUE(n_left_from > 1)) + { + vlib_buffer_t *p2; + p2 = vlib_get_buffer(vm, from[1]); + vlib_prefetch_buffer_header(p2, STORE); + /* IPv4 + 8 = 28. possibly plus -40 */ + CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE); + } + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + vip0 = pool_elt_at_index (lbm->vips, + vnet_buffer (p0)->ip.adj_index[VLIB_TX]); + + if (is_input_v4) { + ip4_header_t *ip40; + ip40 = vlib_buffer_get_current (p0); + len0 = clib_net_to_host_u16(ip40->length); + key0[0] = (u64) ip40->src_address.as_u32; + key0[1] = (u64) ip40->dst_address.as_u32; + key0[2] = 0; + key0[3] = 0; + key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) | + ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16); + + hash0 = lb_hash_hash(key0); + } else { + ip6_header_t *ip60; + ip60 = vlib_buffer_get_current (p0); + len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t); + key0[0] = ip60->src_address.as_u64[0]; + key0[1] = ip60->src_address.as_u64[1]; + key0[2] = ip60->dst_address.as_u64[0]; + key0[3] = ip60->dst_address.as_u64[1]; + key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) | + ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16); + + hash0 = lb_hash_hash(key0); + } + + //NOTE: This is an ugly trick to not include the VIP index in the hash calculation + //but actually use it in the key determination. + key0[4] |= ((vip0 - lbm->vips)); + + lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0); + if (PREDICT_TRUE(value0 != ~0)) { + //Found an existing entry + as0 = &lbm->ass[value0]; + } else if (PREDICT_TRUE(available_index0 != ~0)) { + //There is an available slot for a new flow + as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; + if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element + error0 = LB_ERROR_NO_SERVER; + } else { + vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION], + cpu_index, vip0 - lbm->vips, 1); + } + + //TODO: There are race conditions with as0 and vip0 manipulation. + //Configuration may be changed, vectors resized, etc... + + //Dereference previously used + vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1); + vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1); + + //Add sticky entry + //Note that when there is no AS configured, an entry is configured anyway. + //But no configured AS is not something that should happen + lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time); + } else { + //Could not store new entry in the table + as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; + vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET], + cpu_index, vip0 - lbm->vips, 1); + } + + //Now let's encap + if (is_encap_v4) { + ip4_header_t *ip40; + vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t)); + ip40 = vlib_buffer_get_current(p0); + gre0 = (gre_header_t *)(ip40 + 1); + ip40->src_address = lbm->ip4_src_address; + ip40->dst_address = as0->address.ip4; + ip40->ip_version_and_header_length = 0x45; + ip40->ttl = 128; + ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t)); + ip40->protocol = IP_PROTOCOL_GRE; + ip40->checksum = ip4_header_checksum (ip40); + } else { + ip6_header_t *ip60; + vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t)); + ip60 = vlib_buffer_get_current(p0); + gre0 = (gre_header_t *)(ip60 + 1); + ip60->dst_address = as0->address.ip6; + ip60->src_address = lbm->ip6_src_address; + ip60->hop_limit = 128; + ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28); + ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t)); + ip60->protocol = IP_PROTOCOL_GRE; + } + + gre0->flags_and_version = 0; + gre0->protocol = (is_input_v4)? + clib_host_to_net_u16(0x0800): + clib_host_to_net_u16(0x86DD); + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = as0->dpo.dpoi_index; + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->as_index = as0 - lbm->ass; + tr->vip_index = vip0 - lbm->vips; + } + + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, + as0->dpo.dpoi_next_node); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; } static uword @@ -314,18 +290,10 @@ VLIB_REGISTER_NODE (lb6_gre6_node) = .n_next_nodes = LB_N_NEXT, .next_nodes = { - [LB_NEXT_LOOKUP] = "ip6-lookup", - [LB_NEXT_REWRITE] = "ip6-rewrite", [LB_NEXT_DROP] = "error-drop" }, }; -/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre6) = { */ -/* .node_name = "lb6-gre6", */ -/* .fn = lb_format_adjacency, */ -/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE6] */ -/* }; */ - VLIB_REGISTER_NODE (lb6_gre4_node) = { .function = lb6_gre4_node_fn, @@ -339,18 +307,10 @@ VLIB_REGISTER_NODE (lb6_gre4_node) = .n_next_nodes = LB_N_NEXT, .next_nodes = { - [LB_NEXT_LOOKUP] = "ip4-lookup", - [LB_NEXT_REWRITE]= "ip4-rewrite-transit", [LB_NEXT_DROP] = "error-drop" }, }; -/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre4) = { */ -/* .node_name = "lb6-gre4", */ -/* .fn = lb_format_adjacency, */ -/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE4] */ -/* }; */ - VLIB_REGISTER_NODE (lb4_gre6_node) = { .function = lb4_gre6_node_fn, @@ -364,18 +324,10 @@ VLIB_REGISTER_NODE (lb4_gre6_node) = .n_next_nodes = LB_N_NEXT, .next_nodes = { - [LB_NEXT_LOOKUP] = "ip6-lookup", - [LB_NEXT_REWRITE] = "ip6-rewrite", [LB_NEXT_DROP] = "error-drop" }, }; -/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre6) = { */ -/* .node_name = "lb4-gre6", */ -/* .fn = lb_format_adjacency, */ -/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE6] */ -/* }; */ - VLIB_REGISTER_NODE (lb4_gre4_node) = { .function = lb4_gre4_node_fn, @@ -389,14 +341,7 @@ VLIB_REGISTER_NODE (lb4_gre4_node) = .n_next_nodes = LB_N_NEXT, .next_nodes = { - [LB_NEXT_LOOKUP] = "ip4-lookup", - [LB_NEXT_REWRITE]= "ip4-rewrite-transit", [LB_NEXT_DROP] = "error-drop" }, }; -/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre4) = { */ -/* .node_name = "lb4-gre4", */ -/* .fn = lb_format_adjacency, */ -/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE4] */ -/* }; */ |