summaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorNobuhiro MIKI <nmiki@yahoo-corp.jp>2022-09-28 15:53:17 +0900
committerBeno�t Ganne <bganne@cisco.com>2023-01-18 10:53:23 +0000
commit613e6dc0bf928def5d337312d522e1a15df87b00 (patch)
tree04e592643029774fadcbfc8f8e82df778b867df9 /src/plugins
parent893a0c3130e1d868c939db6dcde258da1277cf41 (diff)
lb: add source ip based sticky load balancing
This patch adds source ip based sticky session, which is already implemented in many hardware LBs and software LBs. Note that sticky sessions may be reset if the hash is recalculated as ASs are added or deleted. Since this feature is unrelated to the other existing options, the lb_add_del_vip API version has been upgraded to v2 and a new option "src_ip_sticky" has been added. Type: feature Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp> Change-Id: I3eb3680a28defbc701f28c873933ec2fb54544ab
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/lb/api.c74
-rw-r--r--src/plugins/lb/cli.c7
-rw-r--r--src/plugins/lb/lb.api35
-rw-r--r--src/plugins/lb/lb.c129
-rw-r--r--src/plugins/lb/lb.h85
-rw-r--r--src/plugins/lb/lb_test.c99
-rw-r--r--src/plugins/lb/node.c279
7 files changed, 674 insertions, 34 deletions
diff --git a/src/plugins/lb/api.c b/src/plugins/lb/api.c
index bf4a50ddb04..9ccd7b5d634 100644
--- a/src/plugins/lb/api.c
+++ b/src/plugins/lb/api.c
@@ -128,6 +128,80 @@ vl_api_lb_add_del_vip_t_handler
}
static void
+vl_api_lb_add_del_vip_v2_t_handler (vl_api_lb_add_del_vip_v2_t *mp)
+{
+ lb_main_t *lbm = &lb_main;
+ vl_api_lb_conf_reply_t *rmp;
+ int rv = 0;
+ lb_vip_add_args_t args;
+
+ /* if port == 0, it means all-port VIP */
+ if (mp->port == 0)
+ {
+ mp->protocol = ~0;
+ }
+
+ ip_address_decode (&mp->pfx.address, &(args.prefix));
+
+ if (mp->is_del)
+ {
+ u32 vip_index;
+ if (!(rv = lb_vip_find_index (&(args.prefix), mp->pfx.len, mp->protocol,
+ ntohs (mp->port), &vip_index)))
+ rv = lb_vip_del (vip_index);
+ }
+ else
+ {
+ u32 vip_index;
+ lb_vip_type_t type = 0;
+
+ if (ip46_prefix_is_ip4 (&(args.prefix), mp->pfx.len))
+ {
+ if (mp->encap == LB_API_ENCAP_TYPE_GRE4)
+ type = LB_VIP_TYPE_IP4_GRE4;
+ else if (mp->encap == LB_API_ENCAP_TYPE_GRE6)
+ type = LB_VIP_TYPE_IP4_GRE6;
+ else if (mp->encap == LB_API_ENCAP_TYPE_L3DSR)
+ type = LB_VIP_TYPE_IP4_L3DSR;
+ else if (mp->encap == LB_API_ENCAP_TYPE_NAT4)
+ type = LB_VIP_TYPE_IP4_NAT4;
+ }
+ else
+ {
+ if (mp->encap == LB_API_ENCAP_TYPE_GRE4)
+ type = LB_VIP_TYPE_IP6_GRE4;
+ else if (mp->encap == LB_API_ENCAP_TYPE_GRE6)
+ type = LB_VIP_TYPE_IP6_GRE6;
+ else if (mp->encap == LB_API_ENCAP_TYPE_NAT6)
+ type = LB_VIP_TYPE_IP6_NAT6;
+ }
+
+ args.plen = mp->pfx.len;
+ args.protocol = mp->protocol;
+ args.port = ntohs (mp->port);
+ args.type = type;
+ args.new_length = ntohl (mp->new_flows_table_length);
+
+ if (mp->src_ip_sticky)
+ args.src_ip_sticky = 1;
+
+ if (mp->encap == LB_API_ENCAP_TYPE_L3DSR)
+ {
+ args.encap_args.dscp = (u8) (mp->dscp & 0x3F);
+ }
+ else if ((mp->encap == LB_API_ENCAP_TYPE_NAT4) ||
+ (mp->encap == LB_API_ENCAP_TYPE_NAT6))
+ {
+ args.encap_args.srv_type = mp->type;
+ args.encap_args.target_port = ntohs (mp->target_port);
+ }
+
+ rv = lb_vip_add (args, &vip_index);
+ }
+ REPLY_MACRO (VL_API_LB_ADD_DEL_VIP_V2_REPLY);
+}
+
+static void
vl_api_lb_add_del_as_t_handler
(vl_api_lb_add_del_as_t * mp)
{
diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c
index 7b5dc5c8549..31152cd01f4 100644
--- a/src/plugins/lb/cli.c
+++ b/src/plugins/lb/cli.c
@@ -32,6 +32,7 @@ lb_vip_command_fn (vlib_main_t * vm,
clib_error_t *error = 0;
args.new_length = 1024;
+ args.src_ip_sticky = 0;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -49,6 +50,8 @@ lb_vip_command_fn (vlib_main_t * vm,
;
else if (unformat(line_input, "del"))
del = 1;
+ else if (unformat (line_input, "src_ip_sticky"))
+ args.src_ip_sticky = 1;
else if (unformat(line_input, "protocol tcp"))
{
args.protocol = (u8)IP_PROTOCOL_TCP;
@@ -177,6 +180,7 @@ done:
return error;
}
+/* clang-format off */
VLIB_CLI_COMMAND (lb_vip_command, static) =
{
.path = "lb vip",
@@ -185,9 +189,10 @@ VLIB_CLI_COMMAND (lb_vip_command, static) =
"[encap (gre6|gre4|l3dsr|nat4|nat6)] "
"[dscp <n>] "
"[type (nodeport|clusterip) target_port <n>] "
- "[new_len <n>] [del]",
+ "[new_len <n>] [src_ip_sticky] [del]",
.function = lb_vip_command_fn,
};
+/* clang-format on */
static clib_error_t *
lb_as_command_fn (vlib_main_t * vm,
diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api
index 4bf30e76b59..96f047ddbc2 100644
--- a/src/plugins/lb/lb.api
+++ b/src/plugins/lb/lb.api
@@ -1,4 +1,4 @@
-option version = "1.0.0";
+option version = "1.1.0";
import "plugins/lb/lb_types.api";
import "vnet/interface_types.api";
@@ -54,6 +54,39 @@ autoreply define lb_add_del_vip {
option vat_help = "<prefix> [protocol (tcp|udp) port <n>] [encap (gre6|gre4|l3dsr|nat4|nat6)] [dscp <n>] [type (nodeport|clusterip) target_port <n>] [new_len <n>] [del]";
};
+/** \brief Add a virtual address (or prefix)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param pfx - ip prefix and length
+ @param protocol - tcp or udp.
+ @param port - destination port. (0) means 'all-port VIP'
+ @param encap - Encap is ip4 GRE(0) or ip6 GRE(1) or L3DSR(2) or NAT4(3) or NAT6(4).
+ @param dscp - DSCP bit corresponding to VIP(applicable in L3DSR mode only).
+ @param type - service type(applicable in NAT4/NAT6 mode only).
+ @param target_port - Pod's port corresponding to specific service(applicable in NAT4/NAT6 mode only).
+ @param node_port - Node's port(applicable in NAT4/NAT6 mode only).
+ @param new_flows_table_length - Size of the new connections flow table used
+ for this VIP (must be power of 2).
+ @param src_ip_sticky - source ip based sticky session.
+ @param is_del - The VIP should be removed.
+*/
+autoreply define lb_add_del_vip_v2 {
+ u32 client_index;
+ u32 context;
+ vl_api_address_with_prefix_t pfx;
+ u8 protocol [default=255];
+ u16 port;
+ vl_api_lb_encap_type_t encap;
+ u8 dscp;
+ vl_api_lb_srv_type_t type ; /* LB_API_SRV_TYPE_CLUSTERIP */
+ u16 target_port;
+ u16 node_port;
+ u32 new_flows_table_length [default=1024];
+ bool src_ip_sticky;
+ bool is_del;
+ option vat_help = "<prefix> [protocol (tcp|udp) port <n>] [encap (gre6|gre4|l3dsr|nat4|nat6)] [dscp <n>] [type (nodeport|clusterip) target_port <n>] [new_len <n>] [src_ip_sticky] [del]";
+};
+
/** \brief Add an application server for a given VIP
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
index dfdc5066a3d..782833495c1 100644
--- a/src/plugins/lb/lb.c
+++ b/src/plugins/lb/lb.c
@@ -93,6 +93,78 @@ const static char* const * const lb_dpo_nat6_port_nodes[DPO_PROTO_NUM] =
[DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port,
};
+const static char *const lb_dpo_gre4_ip4_sticky[] = { "lb4-gre4-sticky",
+ NULL };
+const static char *const lb_dpo_gre4_ip6_sticky[] = { "lb6-gre4-sticky",
+ NULL };
+const static char *const *const lb_dpo_gre4_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_sticky,
+ [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_sticky,
+};
+
+const static char *const lb_dpo_gre6_ip4_sticky[] = { "lb4-gre6-sticky",
+ NULL };
+const static char *const lb_dpo_gre6_ip6_sticky[] = { "lb6-gre6-sticky",
+ NULL };
+const static char *const *const lb_dpo_gre6_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_sticky,
+ [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_sticky,
+};
+
+const static char *const lb_dpo_gre4_ip4_port_sticky[] = {
+ "lb4-gre4-port-sticky", NULL
+};
+const static char *const lb_dpo_gre4_ip6_port_sticky[] = {
+ "lb6-gre4-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_gre4_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_port_sticky,
+ [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_port_sticky,
+ };
+
+const static char *const lb_dpo_gre6_ip4_port_sticky[] = {
+ "lb4-gre6-port-sticky", NULL
+};
+const static char *const lb_dpo_gre6_ip6_port_sticky[] = {
+ "lb6-gre6-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_gre6_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_port_sticky,
+ [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_port_sticky,
+ };
+
+const static char *const lb_dpo_l3dsr_ip4_sticky[] = { "lb4-l3dsr-sticky",
+ NULL };
+const static char *const *const lb_dpo_l3dsr_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_sticky,
+};
+
+const static char *const lb_dpo_l3dsr_ip4_port_sticky[] = {
+ "lb4-l3dsr-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_l3dsr_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_port_sticky,
+ };
+
+const static char *const lb_dpo_nat4_ip4_port_sticky[] = {
+ "lb4-nat4-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_nat4_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = lb_dpo_nat4_ip4_port_sticky,
+ };
+
+const static char *const lb_dpo_nat6_ip6_port_sticky[] = {
+ "lb6-nat6-port-sticky", NULL
+};
+const static char *const
+ *const lb_dpo_nat6_port_sticky_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port_sticky,
+ };
+
u32 lb_hash_time_now(vlib_main_t * vm)
{
return (u32) (vlib_time_now(vm) + 10000);
@@ -198,15 +270,18 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args)
lb_vip_t *vip = va_arg (*args, lb_vip_t *);
u32 indent = format_get_indent (s);
- s = format(s, "%U %U [%lu] %U%s\n"
+ /* clang-format off */
+ s = format(s, "%U %U [%lu] %U%s%s\n"
"%U new_size:%u\n",
format_white_space, indent,
format_lb_vip_type, vip->type,
vip - lbm->vips,
format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY,
+ lb_vip_is_src_ip_sticky (vip) ? " src_ip_sticky" : "",
(vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
format_white_space, indent,
vip->new_flow_table_mask + 1);
+ /* clang-format on */
if (vip->port != 0)
{
@@ -949,6 +1024,22 @@ static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip,
dpo_type = lbm->dpo_nat4_port_type;
else if (lb_vip_is_nat6_port(vip))
dpo_type = lbm->dpo_nat6_port_type;
+ else if (lb_vip_is_gre4_sticky (vip))
+ dpo_type = lbm->dpo_gre4_sticky_type;
+ else if (lb_vip_is_gre6_sticky (vip))
+ dpo_type = lbm->dpo_gre6_sticky_type;
+ else if (lb_vip_is_gre4_port_sticky (vip))
+ dpo_type = lbm->dpo_gre4_port_sticky_type;
+ else if (lb_vip_is_gre6_port_sticky (vip))
+ dpo_type = lbm->dpo_gre6_port_sticky_type;
+ else if (lb_vip_is_l3dsr_sticky (vip))
+ dpo_type = lbm->dpo_l3dsr_sticky_type;
+ else if (lb_vip_is_l3dsr_port_sticky (vip))
+ dpo_type = lbm->dpo_l3dsr_port_sticky_type;
+ else if (lb_vip_is_nat4_port_sticky (vip))
+ dpo_type = lbm->dpo_nat4_port_sticky_type;
+ else if (lb_vip_is_nat6_port_sticky (vip))
+ dpo_type = lbm->dpo_nat6_port_sticky_type;
dpo_set(&dpo, dpo_type, proto, *vip_prefix_index);
fib_table_entry_special_dpo_add(0,
@@ -1147,6 +1238,10 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
}
vip->flags = LB_VIP_FLAGS_USED;
+ if (args.src_ip_sticky)
+ {
+ vip->flags |= LB_VIP_FLAGS_SRC_IP_STICKY;
+ }
vip->as_indexes = 0;
//Validate counters
@@ -1311,6 +1406,22 @@ lb_as_stack (lb_as_t *as)
dpo_type = lbm->dpo_nat4_port_type;
else if (lb_vip_is_nat6_port(vip))
dpo_type = lbm->dpo_nat6_port_type;
+ else if (lb_vip_is_gre4_sticky (vip))
+ dpo_type = lbm->dpo_gre4_sticky_type;
+ else if (lb_vip_is_gre6_sticky (vip))
+ dpo_type = lbm->dpo_gre6_sticky_type;
+ else if (lb_vip_is_gre4_port_sticky (vip))
+ dpo_type = lbm->dpo_gre4_port_sticky_type;
+ else if (lb_vip_is_gre6_port_sticky (vip))
+ dpo_type = lbm->dpo_gre6_port_sticky_type;
+ else if (lb_vip_is_l3dsr_sticky (vip))
+ dpo_type = lbm->dpo_l3dsr_sticky_type;
+ else if (lb_vip_is_l3dsr_port_sticky (vip))
+ dpo_type = lbm->dpo_l3dsr_port_sticky_type;
+ else if (lb_vip_is_nat4_port_sticky (vip))
+ dpo_type = lbm->dpo_nat4_port_sticky_type;
+ else if (lb_vip_is_nat6_port_sticky (vip))
+ dpo_type = lbm->dpo_nat6_port_sticky_type;
dpo_stack(dpo_type,
lb_vip_is_ip4(vip->type)?DPO_PROTO_IP4:DPO_PROTO_IP6,
@@ -1412,6 +1523,22 @@ lb_init (vlib_main_t * vm)
lb_dpo_nat4_port_nodes);
lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft,
lb_dpo_nat6_port_nodes);
+ lbm->dpo_gre4_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_gre4_sticky_nodes);
+ lbm->dpo_gre6_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_gre6_sticky_nodes);
+ lbm->dpo_gre4_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_gre4_port_sticky_nodes);
+ lbm->dpo_gre6_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_gre6_port_sticky_nodes);
+ lbm->dpo_l3dsr_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_l3dsr_sticky_nodes);
+ lbm->dpo_l3dsr_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_l3dsr_port_sticky_nodes);
+ lbm->dpo_nat4_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_nat4_port_sticky_nodes);
+ lbm->dpo_nat6_port_sticky_type =
+ dpo_register_new_type (&lb_vft, lb_dpo_nat6_port_sticky_nodes);
lbm->fib_node_type = fib_node_register_new_type ("lb", &lb_fib_node_vft);
//Init AS reference counters
diff --git a/src/plugins/lb/lb.h b/src/plugins/lb/lb.h
index 4618015071e..fa1cfaadc25 100644
--- a/src/plugins/lb/lb.h
+++ b/src/plugins/lb/lb.h
@@ -324,6 +324,7 @@ typedef struct {
*/
u8 flags;
#define LB_VIP_FLAGS_USED 0x1
+#define LB_VIP_FLAGS_SRC_IP_STICKY 0x2
/**
* Pool of AS indexes used for this VIP.
@@ -346,43 +347,100 @@ typedef struct {
|| (vip)->type == LB_VIP_TYPE_IP4_L3DSR \
|| (vip)->type == LB_VIP_TYPE_IP4_NAT4 )
+#define lb_vip_is_src_ip_sticky(vip) \
+ (((vip)->flags & LB_VIP_FLAGS_SRC_IP_STICKY) != 0)
+
+/* clang-format off */
#define lb_vip_is_gre4(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE4) \
- && ((vip)->port == 0))
-
+ && ((vip)->port == 0) \
+ && !lb_vip_is_src_ip_sticky (vip))
#define lb_vip_is_gre6(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE6) \
- && ((vip)->port == 0))
+ && ((vip)->port == 0) \
+ && !lb_vip_is_src_ip_sticky (vip))
#define lb_vip_is_gre4_port(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE4) \
- && ((vip)->port != 0))
+ && ((vip)->port != 0) \
+ && !lb_vip_is_src_ip_sticky (vip))
#define lb_vip_is_gre6_port(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE6) \
- && ((vip)->port != 0))
+ && ((vip)->port != 0) \
+ && !lb_vip_is_src_ip_sticky (vip))
+/* clang-format on */
+
+#define lb_vip_is_gre4_sticky(vip) \
+ (((vip)->type == LB_VIP_TYPE_IP6_GRE4 || \
+ (vip)->type == LB_VIP_TYPE_IP4_GRE4) && \
+ ((vip)->port == 0) && lb_vip_is_src_ip_sticky (vip))
+
+#define lb_vip_is_gre6_sticky(vip) \
+ (((vip)->type == LB_VIP_TYPE_IP6_GRE6 || \
+ (vip)->type == LB_VIP_TYPE_IP4_GRE6) && \
+ ((vip)->port == 0) && lb_vip_is_src_ip_sticky (vip))
+
+#define lb_vip_is_gre4_port_sticky(vip) \
+ (((vip)->type == LB_VIP_TYPE_IP6_GRE4 || \
+ (vip)->type == LB_VIP_TYPE_IP4_GRE4) && \
+ ((vip)->port != 0) && lb_vip_is_src_ip_sticky (vip))
+
+#define lb_vip_is_gre6_port_sticky(vip) \
+ (((vip)->type == LB_VIP_TYPE_IP6_GRE6 || \
+ (vip)->type == LB_VIP_TYPE_IP4_GRE6) && \
+ ((vip)->port != 0) && lb_vip_is_src_ip_sticky (vip))
always_inline bool
lb_vip_is_l3dsr(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port ==0);
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port == 0 &&
+ !lb_vip_is_src_ip_sticky (vip));
}
always_inline bool
lb_vip_is_l3dsr_port(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port !=0);
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port != 0 &&
+ !lb_vip_is_src_ip_sticky (vip));
}
always_inline bool
lb_vip_is_nat4_port(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port !=0);
+ return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port != 0 &&
+ !lb_vip_is_src_ip_sticky (vip));
}
always_inline bool
lb_vip_is_nat6_port(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port !=0);
+ return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port != 0 &&
+ !lb_vip_is_src_ip_sticky (vip));
+}
+
+always_inline bool
+lb_vip_is_l3dsr_sticky (const lb_vip_t *vip)
+{
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port == 0 &&
+ lb_vip_is_src_ip_sticky (vip));
+}
+always_inline bool
+lb_vip_is_l3dsr_port_sticky (const lb_vip_t *vip)
+{
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port != 0 &&
+ lb_vip_is_src_ip_sticky (vip));
+}
+always_inline bool
+lb_vip_is_nat4_port_sticky (const lb_vip_t *vip)
+{
+ return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port != 0 &&
+ lb_vip_is_src_ip_sticky (vip));
+}
+always_inline bool
+lb_vip_is_nat6_port_sticky (const lb_vip_t *vip)
+{
+ return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port != 0 &&
+ lb_vip_is_src_ip_sticky (vip));
}
format_function_t format_lb_vip;
@@ -542,6 +600,14 @@ typedef struct {
dpo_type_t dpo_l3dsr_port_type;
dpo_type_t dpo_nat4_port_type;
dpo_type_t dpo_nat6_port_type;
+ dpo_type_t dpo_gre4_sticky_type;
+ dpo_type_t dpo_gre6_sticky_type;
+ dpo_type_t dpo_gre4_port_sticky_type;
+ dpo_type_t dpo_gre6_port_sticky_type;
+ dpo_type_t dpo_l3dsr_sticky_type;
+ dpo_type_t dpo_l3dsr_port_sticky_type;
+ dpo_type_t dpo_nat4_port_sticky_type;
+ dpo_type_t dpo_nat6_port_sticky_type;
/**
* Node type for registering to fib changes.
*/
@@ -575,6 +641,7 @@ typedef struct {
u8 plen;
u8 protocol;
u16 port;
+ u8 src_ip_sticky;
lb_vip_type_t type;
u32 new_length;
lb_vip_encap_args_t encap_args;
diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c
index 80fc38e2746..f64bdd220b5 100644
--- a/src/plugins/lb/lb_test.c
+++ b/src/plugins/lb/lb_test.c
@@ -207,6 +207,105 @@ static int api_lb_add_del_vip (vat_main_t * vam)
return ret;
}
+static int
+api_lb_add_del_vip_v2 (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_lb_add_del_vip_v2_t *mp;
+ int ret;
+ ip46_address_t ip_prefix;
+ u8 prefix_length = 0;
+ u8 protocol = 0;
+ u32 port = 0;
+ u32 encap = 0;
+ u32 dscp = ~0;
+ u32 srv_type = LB_SRV_TYPE_CLUSTERIP;
+ u32 target_port = 0;
+ u32 new_length = 1024;
+ u8 src_ip_sticky = 0;
+ int is_del = 0;
+
+ if (!unformat (line_input, "%U", unformat_ip46_prefix, &ip_prefix,
+ &prefix_length, IP46_TYPE_ANY, &prefix_length))
+ {
+ errmsg ("lb_add_del_vip: invalid vip prefix\n");
+ return -99;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "new_len %d", &new_length))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "src_ip_sticky"))
+ src_ip_sticky = 1;
+ else if (unformat (line_input, "protocol tcp"))
+ {
+ protocol = IP_PROTOCOL_TCP;
+ }
+ else if (unformat (line_input, "protocol udp"))
+ {
+ protocol = IP_PROTOCOL_UDP;
+ }
+ else if (unformat (line_input, "port %d", &port))
+ ;
+ else if (unformat (line_input, "encap gre4"))
+ encap = LB_ENCAP_TYPE_GRE4;
+ else if (unformat (line_input, "encap gre6"))
+ encap = LB_ENCAP_TYPE_GRE6;
+ else if (unformat (line_input, "encap l3dsr"))
+ encap = LB_ENCAP_TYPE_L3DSR;
+ else if (unformat (line_input, "encap nat4"))
+ encap = LB_ENCAP_TYPE_NAT4;
+ else if (unformat (line_input, "encap nat6"))
+ encap = LB_ENCAP_TYPE_NAT6;
+ else if (unformat (line_input, "dscp %d", &dscp))
+ ;
+ else if (unformat (line_input, "type clusterip"))
+ srv_type = LB_SRV_TYPE_CLUSTERIP;
+ else if (unformat (line_input, "type nodeport"))
+ srv_type = LB_SRV_TYPE_NODEPORT;
+ else if (unformat (line_input, "target_port %d", &target_port))
+ ;
+ else
+ {
+ errmsg ("invalid arguments\n");
+ return -99;
+ }
+ }
+
+ if ((encap != LB_ENCAP_TYPE_L3DSR) && (dscp != ~0))
+ {
+ errmsg ("lb_vip_add error: should not configure dscp for none L3DSR.");
+ return -99;
+ }
+
+ if ((encap == LB_ENCAP_TYPE_L3DSR) && (dscp >= 64))
+ {
+ errmsg ("lb_vip_add error: dscp for L3DSR should be less than 64.");
+ return -99;
+ }
+
+ M (LB_ADD_DEL_VIP, mp);
+ ip_address_encode (&ip_prefix, IP46_TYPE_ANY, &mp->pfx.address);
+ mp->pfx.len = prefix_length;
+ mp->protocol = (u8) protocol;
+ mp->port = htons ((u16) port);
+ mp->encap = (u8) encap;
+ mp->dscp = (u8) dscp;
+ mp->type = (u8) srv_type;
+ mp->target_port = htons ((u16) target_port);
+ mp->node_port = htons ((u16) target_port);
+ mp->new_flows_table_length = htonl (new_length);
+ mp->is_del = is_del;
+ mp->src_ip_sticky = src_ip_sticky;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
static int api_lb_add_del_as (vat_main_t * vam)
{
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
index b5e9da71376..f823ea9ce3c 100644
--- a/src/plugins/lb/node.c
+++ b/src/plugins/lb/node.c
@@ -174,8 +174,8 @@ lb_node_get_other_ports6 (ip6_header_t *ip60)
}
static_always_inline void
-lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
- u32 *hash, u32 *vip_idx, u8 per_port_vip)
+lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4, u32 *hash,
+ u32 *vip_idx, u8 per_port_vip, u8 src_ip_sticky)
{
vip_port_key_t key;
clib_bihash_kv_8_8_t kv, value;
@@ -203,7 +203,15 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
else
ports = lb_node_get_other_ports4 (ip40);
- *hash = lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0);
+ if (src_ip_sticky)
+ {
+ *hash = lb_hash_hash (*((u64 *) &ip40->address_pair), 0, 0, 0, 0);
+ }
+ else
+ {
+ *hash =
+ lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0);
+ }
if (per_port_vip)
{
@@ -225,10 +233,18 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
else
ports = lb_node_get_other_ports6 (ip60);
- *hash = lb_hash_hash (ip60->src_address.as_u64[0],
- ip60->src_address.as_u64[1],
- ip60->dst_address.as_u64[0],
- ip60->dst_address.as_u64[1], ports);
+ if (src_ip_sticky)
+ {
+ *hash = lb_hash_hash (
+ ip60->src_address.as_u64[0], ip60->src_address.as_u64[1],
+ ip60->dst_address.as_u64[0], ip60->dst_address.as_u64[1], 0);
+ }
+ else
+ {
+ *hash = lb_hash_hash (
+ ip60->src_address.as_u64[0], ip60->src_address.as_u64[1],
+ ip60->dst_address.as_u64[0], ip60->dst_address.as_u64[1], ports);
+ }
if (per_port_vip)
{
@@ -251,13 +267,15 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
}
}
+/* clang-format off */
static_always_inline uword
lb_node_fn (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame,
u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
lb_encap_type_t encap_type, //Compile-time parameter is GRE4/GRE6/L3DSR/NAT4/NAT6
- u8 per_port_vip) //Compile-time parameter stating that is per_port_vip or not
+ u8 per_port_vip, //Compile-time parameter stating that is per_port_vip or not
+ u8 src_ip_sticky) //Compile-time parameter stating that is source ip based sticky or not
{
lb_main_t *lbm = &lb_main;
u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
@@ -275,7 +293,7 @@ lb_node_fn (vlib_main_t * vm,
{
vlib_buffer_t *p0 = vlib_get_buffer (vm, from[0]);
lb_node_get_hash (lbm, p0, is_input_v4, &nexthash0,
- &next_vip_idx0, per_port_vip);
+ &next_vip_idx0, per_port_vip, src_ip_sticky);
}
while (n_left_from > 0)
@@ -300,7 +318,7 @@ lb_node_fn (vlib_main_t * vm,
//Compute next hash and prefetch bucket
lb_node_get_hash (lbm, p1, is_input_v4,
&nexthash0, &next_vip_idx0,
- per_port_vip);
+ per_port_vip, src_ip_sticky);
lb_hash_prefetch_bucket (sticky_ht, nexthash0);
//Prefetch for encap, next
CLIB_PREFETCH(vlib_buffer_get_current (p1) - 64, 64, STORE);
@@ -565,6 +583,7 @@ lb_node_fn (vlib_main_t * vm,
return frame->n_vectors;
}
+/* clang-format on */
u8 *
format_nodeport_lb_trace (u8 * s, va_list * args)
@@ -947,84 +966,168 @@ static uword
lb6_gre6_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 0);
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 0, 0);
}
static uword
lb6_gre4_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 0);
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 0, 0);
}
static uword
lb4_gre6_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 0);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 0, 0);
}
static uword
lb4_gre4_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 0);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 0, 0);
}
static uword
lb6_gre6_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 1);
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 1, 0);
}
static uword
lb6_gre4_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 1);
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 1, 0);
}
static uword
lb4_gre6_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 1);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 1, 0);
}
static uword
lb4_gre4_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 1);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 1, 0);
}
static uword
lb4_l3dsr_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 0);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 0, 0);
}
static uword
lb4_l3dsr_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 1);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 1, 0);
}
static uword
lb6_nat6_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_NAT6, 1);
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_NAT6, 1, 0);
}
static uword
lb4_nat4_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_NAT4, 1);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_NAT4, 1, 0);
+}
+
+static uword
+lb6_gre6_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 0, 1);
+}
+
+static uword
+lb6_gre4_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 0, 1);
+}
+
+static uword
+lb4_gre6_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 0, 1);
+}
+
+static uword
+lb4_gre4_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 0, 1);
+}
+
+static uword
+lb6_gre6_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 1, 1);
+}
+
+static uword
+lb6_gre4_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 1, 1);
+}
+
+static uword
+lb4_gre6_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 1, 1);
+}
+
+static uword
+lb4_gre4_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 1, 1);
+}
+
+static uword
+lb4_l3dsr_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 0, 1);
+}
+
+static uword
+lb4_l3dsr_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 1, 1);
+}
+
+static uword
+lb6_nat6_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_NAT6, 1, 1);
+}
+
+static uword
+lb4_nat4_port_sticky_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_NAT4, 1, 1);
}
static uword
@@ -1197,6 +1300,138 @@ VLIB_REGISTER_NODE (lb4_nat4_port_node) =
{ [LB_NEXT_DROP] = "error-drop" },
};
+VLIB_REGISTER_NODE (lb6_gre6_sticky_node) = {
+ .function = lb6_gre6_sticky_node_fn,
+ .name = "lb6-gre6-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb6_gre4_sticky_node) = {
+ .function = lb6_gre4_sticky_node_fn,
+ .name = "lb6-gre4-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb4_gre6_sticky_node) = {
+ .function = lb4_gre6_sticky_node_fn,
+ .name = "lb4-gre6-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb4_gre4_sticky_node) = {
+ .function = lb4_gre4_sticky_node_fn,
+ .name = "lb4-gre4-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb6_gre6_port_sticky_node) = {
+ .function = lb6_gre6_port_sticky_node_fn,
+ .name = "lb6-gre6-port-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb6_gre4_port_sticky_node) = {
+ .function = lb6_gre4_port_sticky_node_fn,
+ .name = "lb6-gre4-port-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb4_gre6_port_sticky_node) = {
+ .function = lb4_gre6_port_sticky_node_fn,
+ .name = "lb4-gre6-port-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb4_gre4_port_sticky_node) = {
+ .function = lb4_gre4_port_sticky_node_fn,
+ .name = "lb4-gre4-port-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb4_l3dsr_port_sticky_node) = {
+ .function = lb4_l3dsr_port_sticky_node_fn,
+ .name = "lb4-l3dsr-port-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb4_l3dsr_sticky_node) = {
+ .function = lb4_l3dsr_sticky_node_fn,
+ .name = "lb4-l3dsr-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb6_nat6_port_sticky_node) = {
+ .function = lb6_nat6_port_sticky_node_fn,
+ .name = "lb6-nat6-port-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
+VLIB_REGISTER_NODE (lb4_nat4_port_sticky_node) = {
+ .function = lb4_nat4_port_sticky_node_fn,
+ .name = "lb4-nat4-port-sticky",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes = { [LB_NEXT_DROP] = "error-drop" },
+};
+
static uword
lb4_nodeport_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)