diff options
-rw-r--r-- | src/plugins/lb/api.c | 74 | ||||
-rw-r--r-- | src/plugins/lb/cli.c | 7 | ||||
-rw-r--r-- | src/plugins/lb/lb.api | 35 | ||||
-rw-r--r-- | src/plugins/lb/lb.c | 129 | ||||
-rw-r--r-- | src/plugins/lb/lb.h | 85 | ||||
-rw-r--r-- | src/plugins/lb/lb_test.c | 99 | ||||
-rw-r--r-- | src/plugins/lb/node.c | 279 | ||||
-rw-r--r-- | test/test_lb.py | 44 |
8 files changed, 716 insertions, 36 deletions
diff --git a/src/plugins/lb/api.c b/src/plugins/lb/api.c index bf4a50ddb04..9ccd7b5d634 100644 --- a/src/plugins/lb/api.c +++ b/src/plugins/lb/api.c @@ -128,6 +128,80 @@ vl_api_lb_add_del_vip_t_handler } static void +vl_api_lb_add_del_vip_v2_t_handler (vl_api_lb_add_del_vip_v2_t *mp) +{ + lb_main_t *lbm = &lb_main; + vl_api_lb_conf_reply_t *rmp; + int rv = 0; + lb_vip_add_args_t args; + + /* if port == 0, it means all-port VIP */ + if (mp->port == 0) + { + mp->protocol = ~0; + } + + ip_address_decode (&mp->pfx.address, &(args.prefix)); + + if (mp->is_del) + { + u32 vip_index; + if (!(rv = lb_vip_find_index (&(args.prefix), mp->pfx.len, mp->protocol, + ntohs (mp->port), &vip_index))) + rv = lb_vip_del (vip_index); + } + else + { + u32 vip_index; + lb_vip_type_t type = 0; + + if (ip46_prefix_is_ip4 (&(args.prefix), mp->pfx.len)) + { + if (mp->encap == LB_API_ENCAP_TYPE_GRE4) + type = LB_VIP_TYPE_IP4_GRE4; + else if (mp->encap == LB_API_ENCAP_TYPE_GRE6) + type = LB_VIP_TYPE_IP4_GRE6; + else if (mp->encap == LB_API_ENCAP_TYPE_L3DSR) + type = LB_VIP_TYPE_IP4_L3DSR; + else if (mp->encap == LB_API_ENCAP_TYPE_NAT4) + type = LB_VIP_TYPE_IP4_NAT4; + } + else + { + if (mp->encap == LB_API_ENCAP_TYPE_GRE4) + type = LB_VIP_TYPE_IP6_GRE4; + else if (mp->encap == LB_API_ENCAP_TYPE_GRE6) + type = LB_VIP_TYPE_IP6_GRE6; + else if (mp->encap == LB_API_ENCAP_TYPE_NAT6) + type = LB_VIP_TYPE_IP6_NAT6; + } + + args.plen = mp->pfx.len; + args.protocol = mp->protocol; + args.port = ntohs (mp->port); + args.type = type; + args.new_length = ntohl (mp->new_flows_table_length); + + if (mp->src_ip_sticky) + args.src_ip_sticky = 1; + + if (mp->encap == LB_API_ENCAP_TYPE_L3DSR) + { + args.encap_args.dscp = (u8) (mp->dscp & 0x3F); + } + else if ((mp->encap == LB_API_ENCAP_TYPE_NAT4) || + (mp->encap == LB_API_ENCAP_TYPE_NAT6)) + { + args.encap_args.srv_type = mp->type; + args.encap_args.target_port = ntohs (mp->target_port); + } + + rv = lb_vip_add (args, &vip_index); + } + REPLY_MACRO (VL_API_LB_ADD_DEL_VIP_V2_REPLY); +} + +static void vl_api_lb_add_del_as_t_handler (vl_api_lb_add_del_as_t * mp) { diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c index 7b5dc5c8549..31152cd01f4 100644 --- a/src/plugins/lb/cli.c +++ b/src/plugins/lb/cli.c @@ -32,6 +32,7 @@ lb_vip_command_fn (vlib_main_t * vm, clib_error_t *error = 0; args.new_length = 1024; + args.src_ip_sticky = 0; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -49,6 +50,8 @@ lb_vip_command_fn (vlib_main_t * vm, ; else if (unformat(line_input, "del")) del = 1; + else if (unformat (line_input, "src_ip_sticky")) + args.src_ip_sticky = 1; else if (unformat(line_input, "protocol tcp")) { args.protocol = (u8)IP_PROTOCOL_TCP; @@ -177,6 +180,7 @@ done: return error; } +/* clang-format off */ VLIB_CLI_COMMAND (lb_vip_command, static) = { .path = "lb vip", @@ -185,9 +189,10 @@ VLIB_CLI_COMMAND (lb_vip_command, static) = "[encap (gre6|gre4|l3dsr|nat4|nat6)] " "[dscp <n>] " "[type (nodeport|clusterip) target_port <n>] " - "[new_len <n>] [del]", + "[new_len <n>] [src_ip_sticky] [del]", .function = lb_vip_command_fn, }; +/* clang-format on */ static clib_error_t * lb_as_command_fn (vlib_main_t * vm, diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api index 4bf30e76b59..96f047ddbc2 100644 --- a/src/plugins/lb/lb.api +++ b/src/plugins/lb/lb.api @@ -1,4 +1,4 @@ -option version = "1.0.0"; +option version = "1.1.0"; import "plugins/lb/lb_types.api"; import "vnet/interface_types.api"; @@ -54,6 +54,39 @@ autoreply define lb_add_del_vip { option vat_help = "<prefix> [protocol (tcp|udp) port <n>] [encap (gre6|gre4|l3dsr|nat4|nat6)] [dscp <n>] [type (nodeport|clusterip) target_port <n>] [new_len <n>] [del]"; }; +/** \brief Add a virtual address (or prefix) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param pfx - ip prefix and length + @param protocol - tcp or udp. + @param port - destination port. (0) means 'all-port VIP' + @param encap - Encap is ip4 GRE(0) or ip6 GRE(1) or L3DSR(2) or NAT4(3) or NAT6(4). + @param dscp - DSCP bit corresponding to VIP(applicable in L3DSR mode only). + @param type - service type(applicable in NAT4/NAT6 mode only). + @param target_port - Pod's port corresponding to specific service(applicable in NAT4/NAT6 mode only). + @param node_port - Node's port(applicable in NAT4/NAT6 mode only). + @param new_flows_table_length - Size of the new connections flow table used + for this VIP (must be power of 2). + @param src_ip_sticky - source ip based sticky session. + @param is_del - The VIP should be removed. +*/ +autoreply define lb_add_del_vip_v2 { + u32 client_index; + u32 context; + vl_api_address_with_prefix_t pfx; + u8 protocol [default=255]; + u16 port; + vl_api_lb_encap_type_t encap; + u8 dscp; + vl_api_lb_srv_type_t type ; /* LB_API_SRV_TYPE_CLUSTERIP */ + u16 target_port; + u16 node_port; + u32 new_flows_table_length [default=1024]; + bool src_ip_sticky; + bool is_del; + option vat_help = "<prefix> [protocol (tcp|udp) port <n>] [encap (gre6|gre4|l3dsr|nat4|nat6)] [dscp <n>] [type (nodeport|clusterip) target_port <n>] [new_len <n>] [src_ip_sticky] [del]"; +}; + /** \brief Add an application server for a given VIP @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c index dfdc5066a3d..782833495c1 100644 --- a/src/plugins/lb/lb.c +++ b/src/plugins/lb/lb.c @@ -93,6 +93,78 @@ const static char* const * const lb_dpo_nat6_port_nodes[DPO_PROTO_NUM] = [DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port, }; +const static char *const lb_dpo_gre4_ip4_sticky[] = { "lb4-gre4-sticky", + NULL }; +const static char *const lb_dpo_gre4_ip6_sticky[] = { "lb6-gre4-sticky", + NULL }; +const static char *const *const lb_dpo_gre4_sticky_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_sticky, + [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_sticky, +}; + +const static char *const lb_dpo_gre6_ip4_sticky[] = { "lb4-gre6-sticky", + NULL }; +const static char *const lb_dpo_gre6_ip6_sticky[] = { "lb6-gre6-sticky", + NULL }; +const static char *const *const lb_dpo_gre6_sticky_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_sticky, + [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_sticky, +}; + +const static char *const lb_dpo_gre4_ip4_port_sticky[] = { + "lb4-gre4-port-sticky", NULL +}; +const static char *const lb_dpo_gre4_ip6_port_sticky[] = { + "lb6-gre4-port-sticky", NULL +}; +const static char *const + *const lb_dpo_gre4_port_sticky_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_port_sticky, + [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_port_sticky, + }; + +const static char *const lb_dpo_gre6_ip4_port_sticky[] = { + "lb4-gre6-port-sticky", NULL +}; +const static char *const lb_dpo_gre6_ip6_port_sticky[] = { + "lb6-gre6-port-sticky", NULL +}; +const static char *const + *const lb_dpo_gre6_port_sticky_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_port_sticky, + [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_port_sticky, + }; + +const static char *const lb_dpo_l3dsr_ip4_sticky[] = { "lb4-l3dsr-sticky", + NULL }; +const static char *const *const lb_dpo_l3dsr_sticky_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_sticky, +}; + +const static char *const lb_dpo_l3dsr_ip4_port_sticky[] = { + "lb4-l3dsr-port-sticky", NULL +}; +const static char *const + *const lb_dpo_l3dsr_port_sticky_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_port_sticky, + }; + +const static char *const lb_dpo_nat4_ip4_port_sticky[] = { + "lb4-nat4-port-sticky", NULL +}; +const static char *const + *const lb_dpo_nat4_port_sticky_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = lb_dpo_nat4_ip4_port_sticky, + }; + +const static char *const lb_dpo_nat6_ip6_port_sticky[] = { + "lb6-nat6-port-sticky", NULL +}; +const static char *const + *const lb_dpo_nat6_port_sticky_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port_sticky, + }; + u32 lb_hash_time_now(vlib_main_t * vm) { return (u32) (vlib_time_now(vm) + 10000); @@ -198,15 +270,18 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args) lb_vip_t *vip = va_arg (*args, lb_vip_t *); u32 indent = format_get_indent (s); - s = format(s, "%U %U [%lu] %U%s\n" + /* clang-format off */ + s = format(s, "%U %U [%lu] %U%s%s\n" "%U new_size:%u\n", format_white_space, indent, format_lb_vip_type, vip->type, vip - lbm->vips, format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY, + lb_vip_is_src_ip_sticky (vip) ? " src_ip_sticky" : "", (vip->flags & LB_VIP_FLAGS_USED)?"":" removed", format_white_space, indent, vip->new_flow_table_mask + 1); + /* clang-format on */ if (vip->port != 0) { @@ -949,6 +1024,22 @@ static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip, dpo_type = lbm->dpo_nat4_port_type; else if (lb_vip_is_nat6_port(vip)) dpo_type = lbm->dpo_nat6_port_type; + else if (lb_vip_is_gre4_sticky (vip)) + dpo_type = lbm->dpo_gre4_sticky_type; + else if (lb_vip_is_gre6_sticky (vip)) + dpo_type = lbm->dpo_gre6_sticky_type; + else if (lb_vip_is_gre4_port_sticky (vip)) + dpo_type = lbm->dpo_gre4_port_sticky_type; + else if (lb_vip_is_gre6_port_sticky (vip)) + dpo_type = lbm->dpo_gre6_port_sticky_type; + else if (lb_vip_is_l3dsr_sticky (vip)) + dpo_type = lbm->dpo_l3dsr_sticky_type; + else if (lb_vip_is_l3dsr_port_sticky (vip)) + dpo_type = lbm->dpo_l3dsr_port_sticky_type; + else if (lb_vip_is_nat4_port_sticky (vip)) + dpo_type = lbm->dpo_nat4_port_sticky_type; + else if (lb_vip_is_nat6_port_sticky (vip)) + dpo_type = lbm->dpo_nat6_port_sticky_type; dpo_set(&dpo, dpo_type, proto, *vip_prefix_index); fib_table_entry_special_dpo_add(0, @@ -1147,6 +1238,10 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index) } vip->flags = LB_VIP_FLAGS_USED; + if (args.src_ip_sticky) + { + vip->flags |= LB_VIP_FLAGS_SRC_IP_STICKY; + } vip->as_indexes = 0; //Validate counters @@ -1311,6 +1406,22 @@ lb_as_stack (lb_as_t *as) dpo_type = lbm->dpo_nat4_port_type; else if (lb_vip_is_nat6_port(vip)) dpo_type = lbm->dpo_nat6_port_type; + else if (lb_vip_is_gre4_sticky (vip)) + dpo_type = lbm->dpo_gre4_sticky_type; + else if (lb_vip_is_gre6_sticky (vip)) + dpo_type = lbm->dpo_gre6_sticky_type; + else if (lb_vip_is_gre4_port_sticky (vip)) + dpo_type = lbm->dpo_gre4_port_sticky_type; + else if (lb_vip_is_gre6_port_sticky (vip)) + dpo_type = lbm->dpo_gre6_port_sticky_type; + else if (lb_vip_is_l3dsr_sticky (vip)) + dpo_type = lbm->dpo_l3dsr_sticky_type; + else if (lb_vip_is_l3dsr_port_sticky (vip)) + dpo_type = lbm->dpo_l3dsr_port_sticky_type; + else if (lb_vip_is_nat4_port_sticky (vip)) + dpo_type = lbm->dpo_nat4_port_sticky_type; + else if (lb_vip_is_nat6_port_sticky (vip)) + dpo_type = lbm->dpo_nat6_port_sticky_type; dpo_stack(dpo_type, lb_vip_is_ip4(vip->type)?DPO_PROTO_IP4:DPO_PROTO_IP6, @@ -1412,6 +1523,22 @@ lb_init (vlib_main_t * vm) lb_dpo_nat4_port_nodes); lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft, lb_dpo_nat6_port_nodes); + lbm->dpo_gre4_sticky_type = + dpo_register_new_type (&lb_vft, lb_dpo_gre4_sticky_nodes); + lbm->dpo_gre6_sticky_type = + dpo_register_new_type (&lb_vft, lb_dpo_gre6_sticky_nodes); + lbm->dpo_gre4_port_sticky_type = + dpo_register_new_type (&lb_vft, lb_dpo_gre4_port_sticky_nodes); + lbm->dpo_gre6_port_sticky_type = + dpo_register_new_type (&lb_vft, lb_dpo_gre6_port_sticky_nodes); + lbm->dpo_l3dsr_sticky_type = + dpo_register_new_type (&lb_vft, lb_dpo_l3dsr_sticky_nodes); + lbm->dpo_l3dsr_port_sticky_type = + dpo_register_new_type (&lb_vft, lb_dpo_l3dsr_port_sticky_nodes); + lbm->dpo_nat4_port_sticky_type = + dpo_register_new_type (&lb_vft, lb_dpo_nat4_port_sticky_nodes); + lbm->dpo_nat6_port_sticky_type = + dpo_register_new_type (&lb_vft, lb_dpo_nat6_port_sticky_nodes); lbm->fib_node_type = fib_node_register_new_type ("lb", &lb_fib_node_vft); //Init AS reference counters diff --git a/src/plugins/lb/lb.h b/src/plugins/lb/lb.h index 4618015071e..fa1cfaadc25 100644 --- a/src/plugins/lb/lb.h +++ b/src/plugins/lb/lb.h @@ -324,6 +324,7 @@ typedef struct { */ u8 flags; #define LB_VIP_FLAGS_USED 0x1 +#define LB_VIP_FLAGS_SRC_IP_STICKY 0x2 /** * Pool of AS indexes used for this VIP. @@ -346,43 +347,100 @@ typedef struct { || (vip)->type == LB_VIP_TYPE_IP4_L3DSR \ || (vip)->type == LB_VIP_TYPE_IP4_NAT4 ) +#define lb_vip_is_src_ip_sticky(vip) \ + (((vip)->flags & LB_VIP_FLAGS_SRC_IP_STICKY) != 0) + +/* clang-format off */ #define lb_vip_is_gre4(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE4 \ || (vip)->type == LB_VIP_TYPE_IP4_GRE4) \ - && ((vip)->port == 0)) - + && ((vip)->port == 0) \ + && !lb_vip_is_src_ip_sticky (vip)) #define lb_vip_is_gre6(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \ || (vip)->type == LB_VIP_TYPE_IP4_GRE6) \ - && ((vip)->port == 0)) + && ((vip)->port == 0) \ + && !lb_vip_is_src_ip_sticky (vip)) #define lb_vip_is_gre4_port(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE4 \ || (vip)->type == LB_VIP_TYPE_IP4_GRE4) \ - && ((vip)->port != 0)) + && ((vip)->port != 0) \ + && !lb_vip_is_src_ip_sticky (vip)) #define lb_vip_is_gre6_port(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \ || (vip)->type == LB_VIP_TYPE_IP4_GRE6) \ - && ((vip)->port != 0)) + && ((vip)->port != 0) \ + && !lb_vip_is_src_ip_sticky (vip)) +/* clang-format on */ + +#define lb_vip_is_gre4_sticky(vip) \ + (((vip)->type == LB_VIP_TYPE_IP6_GRE4 || \ + (vip)->type == LB_VIP_TYPE_IP4_GRE4) && \ + ((vip)->port == 0) && lb_vip_is_src_ip_sticky (vip)) + +#define lb_vip_is_gre6_sticky(vip) \ + (((vip)->type == LB_VIP_TYPE_IP6_GRE6 || \ + (vip)->type == LB_VIP_TYPE_IP4_GRE6) && \ + ((vip)->port == 0) && lb_vip_is_src_ip_sticky (vip)) + +#define lb_vip_is_gre4_port_sticky(vip) \ + (((vip)->type == LB_VIP_TYPE_IP6_GRE4 || \ + (vip)->type == LB_VIP_TYPE_IP4_GRE4) && \ + ((vip)->port != 0) && lb_vip_is_src_ip_sticky (vip)) + +#define lb_vip_is_gre6_port_sticky(vip) \ + (((vip)->type == LB_VIP_TYPE_IP6_GRE6 || \ + (vip)->type == LB_VIP_TYPE_IP4_GRE6) && \ + ((vip)->port != 0) && lb_vip_is_src_ip_sticky (vip)) always_inline bool lb_vip_is_l3dsr(const lb_vip_t *vip) { - return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port ==0); + return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port == 0 && + !lb_vip_is_src_ip_sticky (vip)); } always_inline bool lb_vip_is_l3dsr_port(const lb_vip_t *vip) { - return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port !=0); + return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port != 0 && + !lb_vip_is_src_ip_sticky (vip)); } always_inline bool lb_vip_is_nat4_port(const lb_vip_t *vip) { - return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port !=0); + return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port != 0 && + !lb_vip_is_src_ip_sticky (vip)); } always_inline bool lb_vip_is_nat6_port(const lb_vip_t *vip) { - return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port !=0); + return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port != 0 && + !lb_vip_is_src_ip_sticky (vip)); +} + +always_inline bool +lb_vip_is_l3dsr_sticky (const lb_vip_t *vip) +{ + return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port == 0 && + lb_vip_is_src_ip_sticky (vip)); +} +always_inline bool +lb_vip_is_l3dsr_port_sticky (const lb_vip_t *vip) +{ + return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port != 0 && + lb_vip_is_src_ip_sticky (vip)); +} +always_inline bool +lb_vip_is_nat4_port_sticky (const lb_vip_t *vip) +{ + return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port != 0 && + lb_vip_is_src_ip_sticky (vip)); +} +always_inline bool +lb_vip_is_nat6_port_sticky (const lb_vip_t *vip) +{ + return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port != 0 && + lb_vip_is_src_ip_sticky (vip)); } format_function_t format_lb_vip; @@ -542,6 +600,14 @@ typedef struct { dpo_type_t dpo_l3dsr_port_type; dpo_type_t dpo_nat4_port_type; dpo_type_t dpo_nat6_port_type; + dpo_type_t dpo_gre4_sticky_type; + dpo_type_t dpo_gre6_sticky_type; + dpo_type_t dpo_gre4_port_sticky_type; + dpo_type_t dpo_gre6_port_sticky_type; + dpo_type_t dpo_l3dsr_sticky_type; + dpo_type_t dpo_l3dsr_port_sticky_type; + dpo_type_t dpo_nat4_port_sticky_type; + dpo_type_t dpo_nat6_port_sticky_type; /** * Node type for registering to fib changes. */ @@ -575,6 +641,7 @@ typedef struct { u8 plen; u8 protocol; u16 port; + u8 src_ip_sticky; lb_vip_type_t type; u32 new_length; lb_vip_encap_args_t encap_args; diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c index 80fc38e2746..f64bdd220b5 100644 --- a/src/plugins/lb/lb_test.c +++ b/src/plugins/lb/lb_test.c @@ -207,6 +207,105 @@ static int api_lb_add_del_vip (vat_main_t * vam) return ret; } +static int +api_lb_add_del_vip_v2 (vat_main_t *vam) +{ + unformat_input_t *line_input = vam->input; + vl_api_lb_add_del_vip_v2_t *mp; + int ret; + ip46_address_t ip_prefix; + u8 prefix_length = 0; + u8 protocol = 0; + u32 port = 0; + u32 encap = 0; + u32 dscp = ~0; + u32 srv_type = LB_SRV_TYPE_CLUSTERIP; + u32 target_port = 0; + u32 new_length = 1024; + u8 src_ip_sticky = 0; + int is_del = 0; + + if (!unformat (line_input, "%U", unformat_ip46_prefix, &ip_prefix, + &prefix_length, IP46_TYPE_ANY, &prefix_length)) + { + errmsg ("lb_add_del_vip: invalid vip prefix\n"); + return -99; + } + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "new_len %d", &new_length)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "src_ip_sticky")) + src_ip_sticky = 1; + else if (unformat (line_input, "protocol tcp")) + { + protocol = IP_PROTOCOL_TCP; + } + else if (unformat (line_input, "protocol udp")) + { + protocol = IP_PROTOCOL_UDP; + } + else if (unformat (line_input, "port %d", &port)) + ; + else if (unformat (line_input, "encap gre4")) + encap = LB_ENCAP_TYPE_GRE4; + else if (unformat (line_input, "encap gre6")) + encap = LB_ENCAP_TYPE_GRE6; + else if (unformat (line_input, "encap l3dsr")) + encap = LB_ENCAP_TYPE_L3DSR; + else if (unformat (line_input, "encap nat4")) + encap = LB_ENCAP_TYPE_NAT4; + else if (unformat (line_input, "encap nat6")) + encap = LB_ENCAP_TYPE_NAT6; + else if (unformat (line_input, "dscp %d", &dscp)) + ; + else if (unformat (line_input, "type clusterip")) + srv_type = LB_SRV_TYPE_CLUSTERIP; + else if (unformat (line_input, "type nodeport")) + srv_type = LB_SRV_TYPE_NODEPORT; + else if (unformat (line_input, "target_port %d", &target_port)) + ; + else + { + errmsg ("invalid arguments\n"); + return -99; + } + } + + if ((encap != LB_ENCAP_TYPE_L3DSR) && (dscp != ~0)) + { + errmsg ("lb_vip_add error: should not configure dscp for none L3DSR."); + return -99; + } + + if ((encap == LB_ENCAP_TYPE_L3DSR) && (dscp >= 64)) + { + errmsg ("lb_vip_add error: dscp for L3DSR should be less than 64."); + return -99; + } + + M (LB_ADD_DEL_VIP, mp); + ip_address_encode (&ip_prefix, IP46_TYPE_ANY, &mp->pfx.address); + mp->pfx.len = prefix_length; + mp->protocol = (u8) protocol; + mp->port = htons ((u16) port); + mp->encap = (u8) encap; + mp->dscp = (u8) dscp; + mp->type = (u8) srv_type; + mp->target_port = htons ((u16) target_port); + mp->node_port = htons ((u16) target_port); + mp->new_flows_table_length = htonl (new_length); + mp->is_del = is_del; + mp->src_ip_sticky = src_ip_sticky; + + S (mp); + W (ret); + return ret; +} + static int api_lb_add_del_as (vat_main_t * vam) { diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c index b5e9da71376..f823ea9ce3c 100644 --- a/src/plugins/lb/node.c +++ b/src/plugins/lb/node.c @@ -174,8 +174,8 @@ lb_node_get_other_ports6 (ip6_header_t *ip60) } static_always_inline void -lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4, - u32 *hash, u32 *vip_idx, u8 per_port_vip) +lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4, u32 *hash, + u32 *vip_idx, u8 per_port_vip, u8 src_ip_sticky) { vip_port_key_t key; clib_bihash_kv_8_8_t kv, value; @@ -203,7 +203,15 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4, else ports = lb_node_get_other_ports4 (ip40); - *hash = lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0); + if (src_ip_sticky) + { + *hash = lb_hash_hash (*((u64 *) &ip40->address_pair), 0, 0, 0, 0); + } + else + { + *hash = + lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0); + } if (per_port_vip) { @@ -225,10 +233,18 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4, else ports = lb_node_get_other_ports6 (ip60); - *hash = lb_hash_hash (ip60->src_address.as_u64[0], - ip60->src_address.as_u64[1], - ip60->dst_address.as_u64[0], - ip60->dst_address.as_u64[1], ports); + if (src_ip_sticky) + { + *hash = lb_hash_hash ( + ip60->src_address.as_u64[0], ip60->src_address.as_u64[1], + ip60->dst_address.as_u64[0], ip60->dst_address.as_u64[1], 0); + } + else + { + *hash = lb_hash_hash ( + ip60->src_address.as_u64[0], ip60->src_address.as_u64[1], + ip60->dst_address.as_u64[0], ip60->dst_address.as_u64[1], ports); + } if (per_port_vip) { @@ -251,13 +267,15 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4, } } +/* clang-format off */ static_always_inline uword lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6) lb_encap_type_t encap_type, //Compile-time parameter is GRE4/GRE6/L3DSR/NAT4/NAT6 - u8 per_port_vip) //Compile-time parameter stating that is per_port_vip or not + u8 per_port_vip, //Compile-time parameter stating that is per_port_vip or not + u8 src_ip_sticky) //Compile-time parameter stating that is source ip based sticky or not { lb_main_t *lbm = &lb_main; u32 n_left_from, *from, next_index, *to_next, n_left_to_next; @@ -275,7 +293,7 @@ lb_node_fn (vlib_main_t * vm, { vlib_buffer_t *p0 = vlib_get_buffer (vm, from[0]); lb_node_get_hash (lbm, p0, is_input_v4, &nexthash0, - &next_vip_idx0, per_port_vip); + &next_vip_idx0, per_port_vip, src_ip_sticky); } while (n_left_from > 0) @@ -300,7 +318,7 @@ lb_node_fn (vlib_main_t * vm, //Compute next hash and prefetch bucket lb_node_get_hash (lbm, p1, is_input_v4, &nexthash0, &next_vip_idx0, - per_port_vip); + per_port_vip, src_ip_sticky); lb_hash_prefetch_bucket (sticky_ht, nexthash0); //Prefetch for encap, next CLIB_PREFETCH(vlib_buffer_get_current (p1) - 64, 64, STORE); @@ -565,6 +583,7 @@ lb_node_fn (vlib_main_t * vm, return frame->n_vectors; } +/* clang-format on */ u8 * format_nodeport_lb_trace (u8 * s, va_list * args) @@ -947,84 +966,168 @@ static uword lb6_gre6_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 0); + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 0, 0); } static uword lb6_gre4_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 0); + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 0, 0); } static uword lb4_gre6_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 0); + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 0, 0); } static uword lb4_gre4_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 0); + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 0, 0); } static uword lb6_gre6_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 1); + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 1, 0); } static uword lb6_gre4_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 1); + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 1, 0); } static uword lb4_gre6_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 1); + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 1, 0); } static uword lb4_gre4_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 1); + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 1, 0); } static uword lb4_l3dsr_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 0); + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 0, 0); } static uword lb4_l3dsr_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 1); + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 1, 0); } static uword lb6_nat6_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_NAT6, 1); + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_NAT6, 1, 0); } static uword lb4_nat4_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_NAT4, 1); + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_NAT4, 1, 0); +} + +static uword +lb6_gre6_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 0, 1); +} + +static uword +lb6_gre4_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 0, 1); +} + +static uword +lb4_gre6_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 0, 1); +} + +static uword +lb4_gre4_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 0, 1); +} + +static uword +lb6_gre6_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 1, 1); +} + +static uword +lb6_gre4_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 1, 1); +} + +static uword +lb4_gre6_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 1, 1); +} + +static uword +lb4_gre4_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 1, 1); +} + +static uword +lb4_l3dsr_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 0, 1); +} + +static uword +lb4_l3dsr_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 1, 1); +} + +static uword +lb6_nat6_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_NAT6, 1, 1); +} + +static uword +lb4_nat4_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_NAT4, 1, 1); } static uword @@ -1197,6 +1300,138 @@ VLIB_REGISTER_NODE (lb4_nat4_port_node) = { [LB_NEXT_DROP] = "error-drop" }, }; +VLIB_REGISTER_NODE (lb6_gre6_sticky_node) = { + .function = lb6_gre6_sticky_node_fn, + .name = "lb6-gre6-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb6_gre4_sticky_node) = { + .function = lb6_gre4_sticky_node_fn, + .name = "lb6-gre4-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb4_gre6_sticky_node) = { + .function = lb4_gre6_sticky_node_fn, + .name = "lb4-gre6-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb4_gre4_sticky_node) = { + .function = lb4_gre4_sticky_node_fn, + .name = "lb4-gre4-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb6_gre6_port_sticky_node) = { + .function = lb6_gre6_port_sticky_node_fn, + .name = "lb6-gre6-port-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb6_gre4_port_sticky_node) = { + .function = lb6_gre4_port_sticky_node_fn, + .name = "lb6-gre4-port-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb4_gre6_port_sticky_node) = { + .function = lb4_gre6_port_sticky_node_fn, + .name = "lb4-gre6-port-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb4_gre4_port_sticky_node) = { + .function = lb4_gre4_port_sticky_node_fn, + .name = "lb4-gre4-port-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb4_l3dsr_port_sticky_node) = { + .function = lb4_l3dsr_port_sticky_node_fn, + .name = "lb4-l3dsr-port-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb4_l3dsr_sticky_node) = { + .function = lb4_l3dsr_sticky_node_fn, + .name = "lb4-l3dsr-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb6_nat6_port_sticky_node) = { + .function = lb6_nat6_port_sticky_node_fn, + .name = "lb6-nat6-port-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + +VLIB_REGISTER_NODE (lb4_nat4_port_sticky_node) = { + .function = lb4_nat4_port_sticky_node_fn, + .name = "lb4-nat4-port-sticky", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + .n_next_nodes = LB_N_NEXT, + .next_nodes = { [LB_NEXT_DROP] = "error-drop" }, +}; + static uword lb4_nodeport_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) diff --git a/test/test_lb.py b/test/test_lb.py index dca9ea3f7ef..6e8d82dfe40 100644 --- a/test/test_lb.py +++ b/test/test_lb.py @@ -21,6 +21,7 @@ from vpp_ip import INVALID_INDEX - IP6 to GRE6 encap on per-port vip case - IP4 to L3DSR encap on vip case - IP4 to L3DSR encap on per-port vip case + - IP4 to L3DSR encap on per-port vip with src_ip_sticky case - IP4 to NAT4 encap on per-port vip case - IP6 to NAT6 encap on per-port vip case @@ -39,7 +40,7 @@ class TestLB(VppTestCase): super(TestLB, cls).setUpClass() cls.ass = range(5) - cls.packets = range(1) + cls.packets = range(100) try: cls.create_pg_interfaces(range(2)) @@ -123,11 +124,12 @@ class TestLB(VppTestCase): scapy.compat.raw(inner), scapy.compat.raw(self.info.data[IPver]) ) - def checkCapture(self, encap, isv4): + def checkCapture(self, encap, isv4, src_ip_sticky=False): self.pg0.assert_nothing_captured() out = self.pg1.get_capture(len(self.packets)) load = [0] * len(self.ass) + sticky_as = {} self.info = None for p in out: try: @@ -201,6 +203,13 @@ class TestLB(VppTestCase): udp = UDP(scapy.compat.raw(p[IPv6].payload)) self.assertEqual(udp.dport, 3307) load[asid] += 1 + + # In case of source ip sticky, check that packets with same + # src_ip are routed to same as. + if src_ip_sticky and sticky_as.get(ip.src, asid) != asid: + raise Exception("Packets with same src_ip are routed to another as") + sticky_as[ip.src] = asid + except: self.logger.error(ppp("Unexpected or invalid packet:", p)) raise @@ -420,6 +429,37 @@ class TestLB(VppTestCase): ) self.vapi.cli("test lb flowtable flush") + def test_lb_ip4_l3dsr_port_src_ip_sticky(self): + """Load Balancer IP4 L3DSR on per-port-vip with src_ip_sticky case""" + try: + self.vapi.cli( + "lb vip 90.0.0.0/8 protocol udp port 20000 encap l3dsr dscp 7 src_ip_sticky" + ) + for asid in self.ass: + self.vapi.cli( + "lb as 90.0.0.0/8 protocol udp port 20000 10.0.0.%u" % (asid) + ) + + # Generate duplicated packets + pkts = self.generatePackets(self.pg0, isv4=True) + pkts = pkts[: len(pkts) // 2] + pkts = pkts + pkts + + self.pg0.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + self.checkCapture(encap="l3dsr", isv4=True, src_ip_sticky=True) + + finally: + for asid in self.ass: + self.vapi.cli( + "lb as 90.0.0.0/8 protocol udp port 20000 10.0.0.%u del" % (asid) + ) + self.vapi.cli( + "lb vip 90.0.0.0/8 protocol udp port 20000 encap l3dsr dscp 7 src_ip_sticky del" + ) + self.vapi.cli("test lb flowtable flush") + def test_lb_ip4_nat4_port(self): """Load Balancer IP4 NAT4 on per-port-vip case""" try: |