aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHongjun Ni <hongjun.ni@intel.com>2018-06-28 20:14:19 +0800
committerDamjan Marion <dmarion@me.com>2018-08-31 16:00:17 +0000
commit219cc90cb435cfcfb5154c6c0e49bd4d60aae769 (patch)
treefdb5a73307ee5d27073781228a18f9b38b0c6080
parentd69f4396bfe6b41ca04b7fc1496475f0a635b0da (diff)
Support lb on both vip and per-port-vip case
Previously, a service is specified by vip. This patch extend that a service is specified by both vip and per-port-vip cases. Change-Id: Icbfd1f972c6bafde7d85c6abb498576bd9ba250d Signed-off-by: Hongjun Ni <hongjun.ni@intel.com>
-rw-r--r--src/plugins/lb/api.c16
-rw-r--r--src/plugins/lb/cli.c91
-rw-r--r--src/plugins/lb/lb.api13
-rw-r--r--src/plugins/lb/lb.c454
-rw-r--r--src/plugins/lb/lb.h114
-rw-r--r--src/plugins/lb/lb_test.c2
-rw-r--r--src/plugins/lb/node.c246
-rw-r--r--test/test_lb.py273
8 files changed, 953 insertions, 256 deletions
diff --git a/src/plugins/lb/api.c b/src/plugins/lb/api.c
index afaf61c459a..18aae43b11d 100644
--- a/src/plugins/lb/api.c
+++ b/src/plugins/lb/api.c
@@ -109,11 +109,19 @@ vl_api_lb_add_del_vip_t_handler
int rv = 0;
lb_vip_add_args_t args;
+ if((mp->protocol != IP_PROTOCOL_TCP)
+ && (mp->protocol != IP_PROTOCOL_UDP))
+ {
+ mp->protocol = ~0;
+ mp->port = 0;
+ }
+
memcpy (&(args.prefix.ip6), mp->ip_prefix, sizeof(args.prefix.ip6));
if (mp->is_del) {
u32 vip_index;
- if (!(rv = lb_vip_find_index(&(args.prefix), mp->prefix_length, &vip_index)))
+ if (!(rv = lb_vip_find_index(&(args.prefix), mp->prefix_length,
+ mp->protocol, mp->port, &vip_index)))
rv = lb_vip_del(vip_index);
} else {
u32 vip_index;
@@ -147,9 +155,7 @@ vl_api_lb_add_del_vip_t_handler
else if ((mp->encap == LB_ENCAP_TYPE_NAT4)
||(mp->encap == LB_ENCAP_TYPE_NAT6)) {
args.encap_args.srv_type = mp->type;
- args.encap_args.port = ntohs(mp->port);
args.encap_args.target_port = ntohs(mp->target_port);
- args.encap_args.node_port = ntohs(mp->node_port);
}
rv = lb_vip_add(args, &vip_index);
@@ -182,7 +188,6 @@ static void *vl_api_lb_add_del_vip_t_print
s = format (s, "type %u ", mp->type);
s = format (s, "port %u ", mp->port);
s = format (s, "target_port %u ", mp->target_port);
- s = format (s, "node_port %u ", mp->node_port);
}
s = format (s, "%u ", mp->new_flows_table_length);
@@ -208,7 +213,8 @@ vl_api_lb_add_del_as_t_handler
memcpy(&as_address.ip6, mp->as_address,
sizeof(as_address.ip6));
- if ((rv = lb_vip_find_index(&vip_ip_prefix, mp->vip_prefix_length, &vip_index)))
+ if ((rv = lb_vip_find_index(&vip_ip_prefix, mp->vip_prefix_length,
+ mp->protocol, mp->port, &vip_index)))
goto done;
if (mp->is_del)
diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c
index 76f05a586de..cfe8f381b13 100644
--- a/src/plugins/lb/cli.c
+++ b/src/plugins/lb/cli.c
@@ -24,12 +24,11 @@ lb_vip_command_fn (vlib_main_t * vm,
lb_vip_add_args_t args;
u8 del = 0;
int ret;
+ u32 port = 0;
u32 encap = 0;
u32 dscp = ~0;
u32 srv_type = LB_SRV_TYPE_CLUSTERIP;
- u32 port = 0;
u32 target_port = 0;
- u32 node_port = 0;
clib_error_t *error = 0;
args.new_length = 1024;
@@ -50,6 +49,16 @@ lb_vip_command_fn (vlib_main_t * vm,
;
else if (unformat(line_input, "del"))
del = 1;
+ else if (unformat(line_input, "protocol tcp"))
+ {
+ args.protocol = (u8)IP_PROTOCOL_TCP;
+ }
+ else if (unformat(line_input, "protocol udp"))
+ {
+ args.protocol = (u8)IP_PROTOCOL_UDP;
+ }
+ else if (unformat(line_input, "port %d", &port))
+ ;
else if (unformat(line_input, "encap gre4"))
encap = LB_ENCAP_TYPE_GRE4;
else if (unformat(line_input, "encap gre6"))
@@ -66,12 +75,8 @@ lb_vip_command_fn (vlib_main_t * vm,
srv_type = LB_SRV_TYPE_CLUSTERIP;
else if (unformat(line_input, "type nodeport"))
srv_type = LB_SRV_TYPE_NODEPORT;
- else if (unformat(line_input, "port %d", &port))
- ;
else if (unformat(line_input, "target_port %d", &target_port))
;
- else if (unformat(line_input, "node_port %d", &node_port))
- ;
else {
error = clib_error_return (0, "parse error: '%U'",
format_unformat_error, line_input);
@@ -79,6 +84,17 @@ lb_vip_command_fn (vlib_main_t * vm,
}
}
+ /* if port == 0, it means all-port VIP */
+ if (port == 0)
+ {
+ args.protocol = ~0;
+ args.port = 0;
+ }
+ else
+ {
+ args.port = (u16)port;
+ }
+
if ((encap != LB_ENCAP_TYPE_L3DSR) && (dscp != ~0))
{
error = clib_error_return(0, "lb_vip_add error: "
@@ -135,9 +151,7 @@ lb_vip_command_fn (vlib_main_t * vm,
|| (encap == LB_ENCAP_TYPE_NAT6))
{
args.encap_args.srv_type = (u8) srv_type;
- args.encap_args.port = (u16) port;
args.encap_args.target_port = (u16) target_port;
- args.encap_args.node_port = (u16) node_port;
}
if ((ret = lb_vip_add(args, &index))) {
@@ -147,7 +161,8 @@ lb_vip_command_fn (vlib_main_t * vm,
vlib_cli_output(vm, "lb_vip_add ok %d", index);
}
} else {
- if ((ret = lb_vip_find_index(&(args.prefix), args.plen, &index))) {
+ if ((ret = lb_vip_find_index(&(args.prefix), args.plen,
+ args.protocol, args.port, &index))) {
error = clib_error_return (0, "lb_vip_find_index error %d", ret);
goto done;
} else if ((ret = lb_vip_del(index))) {
@@ -165,9 +180,11 @@ done:
VLIB_CLI_COMMAND (lb_vip_command, static) =
{
.path = "lb vip",
- .short_help = "lb vip <prefix> [encap (gre6|gre4|l3dsr|nat4|nat6)] "
+ .short_help = "lb vip <prefix> "
+ "[protocol (tcp|udp) port <n>] "
+ "[encap (gre6|gre4|l3dsr|nat4|nat6)] "
"[dscp <n>] "
- "[type (nodeport|clusterip) port <n> target_port <n> node_port <n>] "
+ "[type (nodeport|clusterip) target_port <n>] "
"[new_len <n>] [del]",
.function = lb_vip_command_fn,
};
@@ -181,6 +198,8 @@ lb_as_command_fn (vlib_main_t * vm,
u8 vip_plen;
ip46_address_t *as_array = 0;
u32 vip_index;
+ u32 port = 0;
+ u8 protocol = 0;
u8 del = 0;
int ret;
clib_error_t *error = 0;
@@ -188,30 +207,52 @@ lb_as_command_fn (vlib_main_t * vm,
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
- if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY)) {
+ if (!unformat(line_input, "%U", unformat_ip46_prefix,
+ &vip_prefix, &vip_plen, IP46_TYPE_ANY))
+ {
error = clib_error_return (0, "invalid as address: '%U'",
format_unformat_error, line_input);
goto done;
}
- if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index))) {
- error = clib_error_return (0, "lb_vip_find_index error %d", ret);
- goto done;
- }
-
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat(line_input, "%U", unformat_ip46_address, &as_addr, IP46_TYPE_ANY)) {
+ if (unformat(line_input, "%U", unformat_ip46_address,
+ &as_addr, IP46_TYPE_ANY))
+ {
vec_add1(as_array, as_addr);
} else if (unformat(line_input, "del")) {
del = 1;
- } else {
+ }
+ else if (unformat(line_input, "protocol tcp"))
+ {
+ protocol = (u8)IP_PROTOCOL_TCP;
+ }
+ else if (unformat(line_input, "protocol udp"))
+ {
+ protocol = (u8)IP_PROTOCOL_UDP;
+ }
+ else if (unformat(line_input, "port %d", &port))
+ ;
+ else {
error = clib_error_return (0, "parse error: '%U'",
format_unformat_error, line_input);
goto done;
}
}
+ /* If port == 0, it means all-port VIP */
+ if (port == 0)
+ {
+ protocol = ~0;
+ }
+
+ if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, protocol,
+ (u16)port, &vip_index))){
+ error = clib_error_return (0, "lb_vip_find_index error %d", ret);
+ goto done;
+ }
+
if (!vec_len(as_array)) {
error = clib_error_return (0, "No AS address provided");
goto done;
@@ -221,12 +262,14 @@ lb_as_command_fn (vlib_main_t * vm,
clib_warning("vip index is %d", vip_index);
if (del) {
- if ((ret = lb_vip_del_ass(vip_index, as_array, vec_len(as_array)))) {
+ if ((ret = lb_vip_del_ass(vip_index, as_array, vec_len(as_array))))
+ {
error = clib_error_return (0, "lb_vip_del_ass error %d", ret);
goto done;
}
} else {
- if ((ret = lb_vip_add_ass(vip_index, as_array, vec_len(as_array)))) {
+ if ((ret = lb_vip_add_ass(vip_index, as_array, vec_len(as_array))))
+ {
error = clib_error_return (0, "lb_vip_add_ass error %d", ret);
goto done;
}
@@ -242,7 +285,8 @@ done:
VLIB_CLI_COMMAND (lb_as_command, static) =
{
.path = "lb as",
- .short_help = "lb as <vip-prefix> [<address> [<address> [...]]] [del]",
+ .short_help = "lb as <vip-prefix> [protocol (tcp|udp) port <n>]"
+ " [<address> [<address> [...]]] [del]",
.function = lb_as_command_fn,
};
@@ -335,8 +379,11 @@ lb_show_vips_command_fn (vlib_main_t * vm,
if (unformat(&line_input, "verbose"))
verbose = 1;
+ /* Hide dummy VIP */
pool_foreach(vip, lbm->vips, {
+ if (vip != lbm->vips) {
vlib_cli_output(vm, "%U\n", verbose?format_lb_vip_detailed:format_lb_vip, vip);
+ }
});
unformat_free (&line_input);
diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api
index a9f05f253c5..de3d0c1e5e7 100644
--- a/src/plugins/lb/lb.api
+++ b/src/plugins/lb/lb.api
@@ -25,10 +25,11 @@ autoreply define lb_conf
@param context - sender context, to match reply w/ request
@param ip_prefix - IP address (IPv4 in lower order 32 bits).
@param prefix_length - IP prefix length (96 + 'IPv4 prefix length' for IPv4).
+ @param protocol - tcp or udp.
+ @param port - destination port.
@param encap - Encap is ip4 GRE(0) or ip6 GRE(1) or L3DSR(2) or NAT4(3) or NAT6(4).
@param dscp - DSCP bit corresponding to VIP(applicable in L3DSR mode only).
@param type - service type(applicable in NAT4/NAT6 mode only).
- @param port - service port(applicable in NAT4/NAT6 mode only).
@param target_port - Pod's port corresponding to specific service(applicable in NAT4/NAT6 mode only).
@param node_port - Node's port(applicable in NAT4/NAT6 mode only).
@param new_flows_table_length - Size of the new connections flow table used
@@ -40,10 +41,11 @@ autoreply define lb_add_del_vip {
u32 context;
u8 ip_prefix[16];
u8 prefix_length;
+ u8 protocol;
+ u16 port;
u8 encap;
u8 dscp;
u8 type;
- u16 port;
u16 target_port;
u16 node_port;
u32 new_flows_table_length;
@@ -54,7 +56,9 @@ autoreply define lb_add_del_vip {
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param vip_ip_prefix - VIP IP address (IPv4 in lower order 32 bits).
- @param vip_ip_prefix - VIP IP prefix length (96 + 'IPv4 prefix length' for IPv4).
+ @param vip_prefix_length - VIP IP prefix length (96 + 'IPv4 prefix length' for IPv4).
+ @param protocol - tcp or udp.
+ @param port - destination port.
@param as_address - The application server address (IPv4 in lower order 32 bits).
@param is_del - The AS should be removed.
*/
@@ -63,6 +67,9 @@ autoreply define lb_add_del_as {
u32 context;
u8 vip_ip_prefix[16];
u8 vip_prefix_length;
+ u8 protocol;
+ u16 port;
u8 as_address[16];
u8 is_del;
};
+
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
index d5dc3054fb4..fb62c217e7f 100644
--- a/src/plugins/lb/lb.c
+++ b/src/plugins/lb/lb.c
@@ -49,22 +49,44 @@ const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
[DPO_PROTO_IP6] = lb_dpo_gre6_ip6,
};
-const static char * const lb_dpo_l3dsr_ip4[] = { "lb4-l3dsr" , NULL };
+const static char * const lb_dpo_gre4_ip4_port[] = { "lb4-gre4-port" , NULL };
+const static char * const lb_dpo_gre4_ip6_port[] = { "lb6-gre4-port" , NULL };
+const static char* const * const lb_dpo_gre4_port_nodes[DPO_PROTO_NUM] =
+ {
+ [DPO_PROTO_IP4] = lb_dpo_gre4_ip4_port,
+ [DPO_PROTO_IP6] = lb_dpo_gre4_ip6_port,
+ };
+
+const static char * const lb_dpo_gre6_ip4_port[] = { "lb4-gre6-port" , NULL };
+const static char * const lb_dpo_gre6_ip6_port[] = { "lb6-gre6-port" , NULL };
+const static char* const * const lb_dpo_gre6_port_nodes[DPO_PROTO_NUM] =
+ {
+ [DPO_PROTO_IP4] = lb_dpo_gre6_ip4_port,
+ [DPO_PROTO_IP6] = lb_dpo_gre6_ip6_port,
+ };
+
+const static char * const lb_dpo_l3dsr_ip4[] = {"lb4-l3dsr" , NULL};
const static char* const * const lb_dpo_l3dsr_nodes[DPO_PROTO_NUM] =
{
[DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4,
};
-const static char * const lb_dpo_nat4_ip4[] = { "lb4-nat4" , NULL };
-const static char* const * const lb_dpo_nat4_nodes[DPO_PROTO_NUM] =
+const static char * const lb_dpo_l3dsr_ip4_port[] = {"lb4-l3dsr-port" , NULL};
+const static char* const * const lb_dpo_l3dsr_port_nodes[DPO_PROTO_NUM] =
+ {
+ [DPO_PROTO_IP4] = lb_dpo_l3dsr_ip4_port,
+ };
+
+const static char * const lb_dpo_nat4_ip4_port[] = { "lb4-nat4-port" , NULL };
+const static char* const * const lb_dpo_nat4_port_nodes[DPO_PROTO_NUM] =
{
- [DPO_PROTO_IP4] = lb_dpo_nat4_ip4,
+ [DPO_PROTO_IP4] = lb_dpo_nat4_ip4_port,
};
-const static char * const lb_dpo_nat6_ip6[] = { "lb6-nat6" , NULL };
-const static char* const * const lb_dpo_nat6_nodes[DPO_PROTO_NUM] =
+const static char * const lb_dpo_nat6_ip6_port[] = { "lb6-nat6-port" , NULL };
+const static char* const * const lb_dpo_nat6_port_nodes[DPO_PROTO_NUM] =
{
- [DPO_PROTO_IP6] = lb_dpo_nat6_ip6,
+ [DPO_PROTO_IP6] = lb_dpo_nat6_ip6_port,
};
u32 lb_hash_time_now(vlib_main_t * vm)
@@ -137,6 +159,11 @@ u8 *format_lb_vip (u8 * s, va_list * args)
pool_elts(vip->as_indexes),
(vip->flags & LB_VIP_FLAGS_USED)?"":" removed");
+ if (vip->port != 0)
+ {
+ s = format(s, " protocol:%u port:%u ", vip->protocol, vip->port);
+ }
+
if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
{
s = format(s, " dscp:%u", vip->encap_args.dscp);
@@ -144,14 +171,10 @@ u8 *format_lb_vip (u8 * s, va_list * args)
else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
|| (vip->type == LB_VIP_TYPE_IP6_NAT6))
{
- if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
- s = format (s, " type:clusterip port:%u target_port:%u",
- ntohs (vip->encap_args.port),
- ntohs (vip->encap_args.target_port));
- else
- s = format (s, " type:nodeport node_port:%u target_port:%u",
- ntohs (vip->encap_args.node_port),
- ntohs (vip->encap_args.target_port));
+ s = format (s, " type:%s port:%u target_port:%u",
+ (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
+ "nodeport",
+ ntohs(vip->port), ntohs(vip->encap_args.target_port));
}
return s;
@@ -181,6 +204,13 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args)
format_white_space, indent,
vip->new_flow_table_mask + 1);
+ if (vip->port != 0)
+ {
+ s = format(s, "%U protocol:%u port:%u\n",
+ format_white_space, indent,
+ vip->protocol, vip->port);
+ }
+
if (vip->type == LB_VIP_TYPE_IP4_L3DSR)
{
s = format(s, "%U dscp:%u\n",
@@ -190,15 +220,11 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args)
else if ((vip->type == LB_VIP_TYPE_IP4_NAT4)
|| (vip->type == LB_VIP_TYPE_IP6_NAT6))
{
- if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
- s = format (s, "%U type:clusterip port:%u target_port:%u",
- format_white_space, indent, ntohs (vip->encap_args.port),
- ntohs (vip->encap_args.target_port));
- else
- s = format (s, "%U type:nodeport node_port:%u target_port:%u",
- format_white_space, indent,
- ntohs (vip->encap_args.node_port),
- ntohs (vip->encap_args.target_port));
+ s = format (s, "%U type:%s port:%u target_port:%u",
+ format_white_space, indent,
+ (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)?"clusterip":
+ "nodeport",
+ ntohs(vip->port), ntohs(vip->encap_args.target_port));
}
//Print counters
@@ -237,14 +263,6 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args)
});
vec_free(count);
-
- /*
- s = format(s, "%U new flows table:\n", format_white_space, indent);
- lb_new_flow_entry_t *nfe;
- vec_foreach(nfe, vip->new_flow_table) {
- s = format(s, "%U %d: %d\n", format_white_space, indent, nfe - vip->new_flow_table, nfe->as_index);
- }
- */
return s;
}
@@ -283,11 +301,11 @@ static void lb_vip_garbage_collection(lb_vip_t *vip)
pool_foreach(as_index, vip->as_indexes, {
as = &lbm->ass[*as_index];
if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
- clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used
+ clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) &&
(vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
{ //Not referenced
- if (lb_vip_is_nat4(vip)) {
+ if (lb_vip_is_nat4_port(vip)) {
m_key4.addr = as->address.ip4;
m_key4.port = vip->encap_args.target_port;
m_key4.protocol = 0;
@@ -300,8 +318,8 @@ static void lb_vip_garbage_collection(lb_vip_t *vip)
kv4.value = m - lbm->snat_mappings;
clib_bihash_add_del_8_8(&lbm->mapping_by_as4, &kv4, 0);
- pool_put (lbm->snat_mappings, m);
- } else if (lb_vip_is_nat6(vip)) {
+ pool_put (lbm->snat_mappings, m);
+ } else if (lb_vip_is_nat6_port(vip)) {
m_key6.addr.as_u64[0] = as->address.ip6.as_u64[0];
m_key6.addr.as_u64[1] = as->address.ip6.as_u64[1];
m_key6.port = vip->encap_args.target_port;
@@ -478,8 +496,13 @@ int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
return 0;
}
+
+
static
-int lb_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index)
+int lb_vip_port_find_index(ip46_address_t *prefix, u8 plen,
+ u8 protocol, u16 port,
+ lb_lkp_type_t lkp_type,
+ u32 *vip_index)
{
lb_main_t *lbm = &lb_main;
lb_vip_t *vip;
@@ -489,19 +512,57 @@ int lb_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index)
if ((vip->flags & LB_AS_FLAGS_USED) &&
vip->plen == plen &&
vip->prefix.as_u64[0] == prefix->as_u64[0] &&
- vip->prefix.as_u64[1] == prefix->as_u64[1]) {
- *vip_index = vip - lbm->vips;
- return 0;
- }
+ vip->prefix.as_u64[1] == prefix->as_u64[1])
+ {
+ if((lkp_type == LB_LKP_SAME_IP_PORT &&
+ vip->protocol == protocol &&
+ vip->port == port) ||
+ (lkp_type == LB_LKP_ALL_PORT_IP &&
+ vip->port == 0) ||
+ (lkp_type == LB_LKP_DIFF_IP_PORT &&
+ (vip->protocol != protocol ||
+ vip->port != port) ) )
+ {
+ *vip_index = vip - lbm->vips;
+ return 0;
+ }
+ }
});
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
-int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index)
+static
+int lb_vip_port_find_index_with_lock(ip46_address_t *prefix, u8 plen,
+ u8 protocol, u16 port, u32 *vip_index)
+{
+ return lb_vip_port_find_index(prefix, plen, protocol, port,
+ LB_LKP_SAME_IP_PORT, vip_index);
+}
+
+static
+int lb_vip_port_find_all_port_vip(ip46_address_t *prefix, u8 plen,
+ u32 *vip_index)
+{
+ return lb_vip_port_find_index(prefix, plen, ~0, 0,
+ LB_LKP_ALL_PORT_IP, vip_index);
+}
+
+/* Find out per-port-vip entry with different protocol and port */
+static
+int lb_vip_port_find_diff_port(ip46_address_t *prefix, u8 plen,
+ u8 protocol, u16 port, u32 *vip_index)
+{
+ return lb_vip_port_find_index(prefix, plen, protocol, port,
+ LB_LKP_DIFF_IP_PORT, vip_index);
+}
+
+int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u8 protocol,
+ u16 port, u32 *vip_index)
{
int ret;
lb_get_writer_lock();
- ret = lb_vip_find_index_with_lock(prefix, plen, vip_index);
+ ret = lb_vip_port_find_index_with_lock(prefix, plen,
+ protocol, port, vip_index);
lb_put_writer_lock();
return ret;
}
@@ -516,7 +577,8 @@ static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_
as = &lbm->ass[*asi];
if (as->vip_index == (vip - lbm->vips) &&
as->address.as_u64[0] == address->as_u64[0] &&
- as->address.as_u64[1] == address->as_u64[1]) {
+ as->address.as_u64[1] == address->as_u64[1])
+ {
*as_index = as - lbm->ass;
return 0;
}
@@ -609,23 +671,23 @@ next:
}
as->next_hop_fib_entry_index =
- fib_table_entry_special_add(0,
+ fib_table_entry_special_add(0,
&nh,
FIB_SOURCE_RR,
FIB_ENTRY_FLAG_NONE);
as->next_hop_child_index =
- fib_entry_child_add(as->next_hop_fib_entry_index,
+ fib_entry_child_add(as->next_hop_fib_entry_index,
lbm->fib_node_type,
as - lbm->ass);
lb_as_stack(as);
- if ( lb_vip_is_nat4(vip) || lb_vip_is_nat6(vip) )
+ if ( lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip) )
{
/* Add SNAT static mapping */
pool_get (lbm->snat_mappings, m);
memset (m, 0, sizeof (*m));
- if (lb_vip_is_nat4(vip)) {
+ if (lb_vip_is_nat4_port(vip)) {
lb_snat4_key_t m_key4;
clib_bihash_kv_8_8_t kv4;
m_key4.addr = as->address.ip4;
@@ -636,16 +698,15 @@ next:
if (vip->encap_args.srv_type == LB_SRV_TYPE_CLUSTERIP)
{
m->src_ip.ip4 = vip->prefix.ip4;
- m->src_port = vip->encap_args.port;
}
else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
{
m->src_ip.ip4 = lbm->ip4_src_address;
- m->src_port = vip->encap_args.node_port;
}
m->src_ip_is_ipv6 = 0;
m->as_ip.ip4 = as->address.ip4;
- m->as_ip_is_ipv6 = 0;;
+ m->as_ip_is_ipv6 = 0;
+ m->src_port = vip->port;
m->target_port = vip->encap_args.target_port;
m->vrf_id = 0;
m->fib_index = 0;
@@ -666,18 +727,17 @@ next:
{
m->src_ip.ip6.as_u64[0] = vip->prefix.ip6.as_u64[0];
m->src_ip.ip6.as_u64[1] = vip->prefix.ip6.as_u64[1];
- m->src_port = vip->encap_args.port;
}
else if (vip->encap_args.srv_type == LB_SRV_TYPE_NODEPORT)
{
m->src_ip.ip6.as_u64[0] = lbm->ip6_src_address.as_u64[0];
m->src_ip.ip6.as_u64[1] = lbm->ip6_src_address.as_u64[1];
- m->src_port = vip->encap_args.node_port;
}
m->src_ip_is_ipv6 = 1;
m->as_ip.ip6.as_u64[0] = as->address.ip6.as_u64[0];
m->as_ip.ip6.as_u64[1] = as->address.ip6.as_u64[1];
m->as_ip_is_ipv6 = 1;
+ m->src_port = vip->port;
m->target_port = vip->encap_args.target_port;
m->vrf_id = 0;
m->fib_index = 0;
@@ -707,6 +767,7 @@ int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n)
lb_main_t *lbm = &lb_main;
u32 now = (u32) vlib_time_now(vlib_get_main());
u32 *ip = 0;
+ u32 as_index = 0;
lb_vip_t *vip;
if (!(vip = lb_vip_get_by_index(vip_index))) {
@@ -715,8 +776,7 @@ int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n)
u32 *indexes = NULL;
while (n--) {
- u32 i;
- if (lb_as_find_index_vip(vip, &addresses[n], &i)) {
+ if (lb_as_find_index_vip(vip, &addresses[n], &as_index)) {
vec_free(indexes);
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
@@ -730,7 +790,7 @@ int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n)
}
}
- vec_add1(indexes, i);
+ vec_add1(indexes, as_index);
next:
continue;
}
@@ -757,20 +817,71 @@ int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
lb_get_writer_lock();
int ret = lb_vip_del_ass_withlock(vip_index, addresses, n);
lb_put_writer_lock();
+
return ret;
}
+static int
+lb_vip_prefix_index_alloc (lb_main_t *lbm)
+{
+ /*
+ * Check for dynamically allocaetd instance number.
+ */
+ u32 bit;
+
+ bit = clib_bitmap_first_clear (lbm->vip_prefix_indexes);
+
+ lbm->vip_prefix_indexes = clib_bitmap_set(lbm->vip_prefix_indexes, bit, 1);
+
+ return bit;
+}
+
+static int
+lb_vip_prefix_index_free (lb_main_t *lbm, u32 instance)
+{
+
+ if (clib_bitmap_get (lbm->vip_prefix_indexes, instance) == 0)
+ {
+ return -1;
+ }
+
+ lbm->vip_prefix_indexes = clib_bitmap_set (lbm->vip_prefix_indexes,
+ instance, 0);
+
+ return 0;
+}
+
/**
* Add the VIP adjacency to the ip4 or ip6 fib
*/
-static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
+static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip,
+ u32 *vip_prefix_index)
{
dpo_proto_t proto = 0;
dpo_type_t dpo_type = 0;
+ u32 vip_idx = 0;
+
+ if (vip->port != 0)
+ {
+ /* for per-port vip, if VIP adjacency has been added,
+ * no need to add adjacency. */
+ if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
+ vip->protocol, vip->port, &vip_idx))
+ {
+ return;
+ }
+
+ /* Allocate an index for per-port vip */
+ *vip_prefix_index = lb_vip_prefix_index_alloc(lbm);
+ }
+ else
+ {
+ *vip_prefix_index = vip - lbm->vips;
+ }
dpo_id_t dpo = DPO_INVALID;
fib_prefix_t pfx = {};
- if (lb_vip_is_ip4(vip)) {
+ if (lb_vip_is_ip4(vip->type)) {
pfx.fp_addr.ip4 = vip->prefix.ip4;
pfx.fp_len = vip->plen - 96;
pfx.fp_proto = FIB_PROTOCOL_IP4;
@@ -786,14 +897,20 @@ static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
dpo_type = lbm->dpo_gre4_type;
else if (lb_vip_is_gre6(vip))
dpo_type = lbm->dpo_gre6_type;
+ else if (lb_vip_is_gre4_port(vip))
+ dpo_type = lbm->dpo_gre4_port_type;
+ else if (lb_vip_is_gre6_port(vip))
+ dpo_type = lbm->dpo_gre6_port_type;
else if (lb_vip_is_l3dsr(vip))
dpo_type = lbm->dpo_l3dsr_type;
- else if(lb_vip_is_nat4(vip))
- dpo_type = lbm->dpo_nat4_type;
- else if (lb_vip_is_nat6(vip))
- dpo_type = lbm->dpo_nat6_type;
-
- dpo_set(&dpo, dpo_type, proto, vip - lbm->vips);
+ else if (lb_vip_is_l3dsr_port(vip))
+ dpo_type = lbm->dpo_l3dsr_port_type;
+ else if(lb_vip_is_nat4_port(vip))
+ dpo_type = lbm->dpo_nat4_port_type;
+ else if (lb_vip_is_nat6_port(vip))
+ dpo_type = lbm->dpo_nat6_port_type;
+
+ dpo_set(&dpo, dpo_type, proto, *vip_prefix_index);
fib_table_entry_special_dpo_add(0,
&pfx,
FIB_SOURCE_PLUGIN_HI,
@@ -803,12 +920,75 @@ static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
}
/**
+ * Add the VIP filter entry
+ */
+static int lb_vip_add_port_filter(lb_main_t *lbm, lb_vip_t *vip,
+ u32 vip_prefix_index, u32 vip_idx)
+{
+ vip_port_key_t key;
+ clib_bihash_kv_8_8_t kv;
+
+ key.vip_prefix_index = vip_prefix_index;
+ key.protocol = vip->protocol;
+ key.port = clib_host_to_net_u16(vip->port);
+ key.rsv = 0;
+
+ kv.key = key.as_u64;
+ kv.value = vip_idx;
+ clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 1);
+
+ return 0;
+}
+
+/**
+ * Del the VIP filter entry
+ */
+static int lb_vip_del_port_filter(lb_main_t *lbm, lb_vip_t *vip)
+{
+ vip_port_key_t key;
+ clib_bihash_kv_8_8_t kv, value;
+ lb_vip_t *m = 0;
+
+ key.vip_prefix_index = vip->vip_prefix_index;
+ key.protocol = vip->protocol;
+ key.port = clib_host_to_net_u16(vip->port);
+
+ kv.key = key.as_u64;
+ if(clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) == 0)
+ m = pool_elt_at_index (lbm->vips, value.value);
+ ASSERT (m);
+
+ kv.value = m - lbm->vips;
+ clib_bihash_add_del_8_8(&lbm->vip_index_per_port, &kv, 0);
+
+ return 0;
+}
+
+/**
* Deletes the adjacency associated with the VIP
*/
static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
{
fib_prefix_t pfx = {};
- if (lb_vip_is_ip4(vip)) {
+ u32 vip_idx = 0;
+
+ if (vip->port != 0)
+ {
+ /* If this vip adjacency is used by other per-port vip,
+ * no need to del this adjacency. */
+ if (!lb_vip_port_find_diff_port(&(vip->prefix), vip->plen,
+ vip->protocol, vip->port, &vip_idx))
+ {
+ lb_put_writer_lock();
+ return;
+ }
+
+ /* Return vip_prefix_index for per-port vip */
+ lb_vip_prefix_index_free(lbm, vip->vip_prefix_index);
+
+ }
+
+ if (lb_vip_is_ip4(vip->type)) {
pfx.fp_addr.ip4 = vip->prefix.ip4;
pfx.fp_len = vip->plen - 96;
pfx.fp_proto = FIB_PROTOCOL_IP4;
@@ -826,15 +1006,47 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
vlib_main_t *vm = vlib_get_main();
lb_vip_t *vip;
lb_vip_type_t type = args.type;
- u16 node_port = args.encap_args.node_port;
+ u32 vip_prefix_index = 0;
lb_get_writer_lock();
ip46_prefix_normalize(&(args.prefix), args.plen);
- if (!lb_vip_find_index_with_lock(&(args.prefix), args.plen, vip_index)) {
- lb_put_writer_lock();
- return VNET_API_ERROR_VALUE_EXIST;
- }
+ if (!lb_vip_port_find_index_with_lock(&(args.prefix), args.plen,
+ args.protocol, args.port,
+ vip_index))
+ {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ /* Make sure we can't add a per-port VIP entry
+ * when there already is an all-port VIP for the same prefix. */
+ if ((args.port != 0) &&
+ !lb_vip_port_find_all_port_vip(&(args.prefix), args.plen, vip_index))
+ {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ /* Make sure we can't add a all-port VIP entry
+ * when there already is an per-port VIP for the same prefix. */
+ if ((args.port == 0) &&
+ !lb_vip_port_find_diff_port(&(args.prefix), args.plen,
+ args.protocol, args.port, vip_index))
+ {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ /* Make sure all VIP for a given prefix (using different ports) have the same type. */
+ if ((args.port != 0) &&
+ !lb_vip_port_find_diff_port(&(args.prefix), args.plen,
+ args.protocol, args.port, vip_index)
+ && (args.type != lbm->vips[*vip_index].type))
+ {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
if (!is_pow2(args.new_length)) {
lb_put_writer_lock();
@@ -842,23 +1054,19 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
}
if (ip46_prefix_is_ip4(&(args.prefix), args.plen) &&
- (type != LB_VIP_TYPE_IP4_GRE4) &&
- (type != LB_VIP_TYPE_IP4_GRE6) &&
- (type != LB_VIP_TYPE_IP4_L3DSR) &&
- (type != LB_VIP_TYPE_IP4_NAT4)) {
+ !lb_vip_is_ip4(type)) {
lb_put_writer_lock();
return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
}
if ((!ip46_prefix_is_ip4(&(args.prefix), args.plen)) &&
- (type != LB_VIP_TYPE_IP6_GRE4) &&
- (type != LB_VIP_TYPE_IP6_GRE6) &&
- (type != LB_VIP_TYPE_IP6_NAT6)) {
+ !lb_vip_is_ip6(type)) {
lb_put_writer_lock();
return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
}
- if ((type == LB_VIP_TYPE_IP4_L3DSR) && (args.encap_args.dscp >= 64 ) )
+ if ((type == LB_VIP_TYPE_IP4_L3DSR) &&
+ (args.encap_args.dscp >= 64) )
{
lb_put_writer_lock();
return VNET_API_ERROR_VALUE_EXIST;
@@ -870,6 +1078,16 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
//Init
memcpy (&(vip->prefix), &(args.prefix), sizeof(args.prefix));
vip->plen = args.plen;
+ if (args.port != 0)
+ {
+ vip->protocol = args.protocol;
+ vip->port = args.port;
+ }
+ else
+ {
+ vip->protocol = (u8)~0;
+ vip->port = 0;
+ }
vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main());
vip->type = args.type;
@@ -877,12 +1095,10 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
vip->encap_args.dscp = args.encap_args.dscp;
}
else if ((args.type == LB_VIP_TYPE_IP4_NAT4)
- ||(args.type == LB_VIP_TYPE_IP6_NAT6)) {
+ ||(args.type == LB_VIP_TYPE_IP6_NAT6)) {
vip->encap_args.srv_type = args.encap_args.srv_type;
- vip->encap_args.port = clib_host_to_net_u16(args.encap_args.port);
vip->encap_args.target_port =
clib_host_to_net_u16(args.encap_args.target_port);
- vip->encap_args.node_port = clib_host_to_net_u16(node_port);
}
vip->flags = LB_VIP_FLAGS_USED;
@@ -899,20 +1115,20 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
vip->new_flow_table_mask = args.new_length - 1;
vip->new_flow_table = 0;
- //Create a new flow hash table full of the default entry
+ //Update flow hash table
lb_vip_update_new_flow_table(vip);
//Create adjacency to direct traffic
- lb_vip_add_adjacency(lbm, vip);
+ lb_vip_add_adjacency(lbm, vip, &vip_prefix_index);
- if ( (lb_vip_is_nat4(vip) || lb_vip_is_nat6(vip))
+ if ( (lb_vip_is_nat4_port(vip) || lb_vip_is_nat6_port(vip))
&& (args.encap_args.srv_type == LB_SRV_TYPE_NODEPORT) )
{
u32 key;
uword * entry;
//Create maping from nodeport to vip_index
- key = clib_host_to_net_u16(node_port);
+ key = clib_host_to_net_u16(args.port);
entry = hash_get_mem (lbm->vip_index_by_nodeport, &key);
if (entry) {
lb_put_writer_lock();
@@ -922,12 +1138,17 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
hash_set_mem (lbm->vip_index_by_nodeport, &key, vip - lbm->vips);
/* receive packets destined to NodeIP:NodePort */
- udp_register_dst_port (vm, node_port, lb4_nodeport_node.index, 1);
- udp_register_dst_port (vm, node_port, lb6_nodeport_node.index, 0);
+ udp_register_dst_port (vm, args.port, lb4_nodeport_node.index, 1);
+ udp_register_dst_port (vm, args.port, lb6_nodeport_node.index, 0);
}
- //Return result
*vip_index = vip - lbm->vips;
+ //Create per-port vip filtering table
+ if (args.port != 0)
+ {
+ lb_vip_add_port_filter(lbm, vip, vip_prefix_index, *vip_index);
+ vip->vip_prefix_index = vip_prefix_index;
+ }
lb_put_writer_lock();
return 0;
@@ -937,6 +1158,11 @@ int lb_vip_del(u32 vip_index)
{
lb_main_t *lbm = &lb_main;
lb_vip_t *vip;
+
+ /* Does not remove default vip, i.e. vip_index = 0 */
+ if (vip_index == 0)
+ return 0;
+
lb_get_writer_lock();
if (!(vip = lb_vip_get_by_index(vip_index))) {
lb_put_writer_lock();
@@ -963,6 +1189,12 @@ int lb_vip_del(u32 vip_index)
//Delete adjacency
lb_vip_del_adjacency(lbm, vip);
+ //Delete per-port vip filtering entry
+ if (vip->port != 0)
+ {
+ lb_vip_del_port_filter(lbm, vip);
+ }
+
//Set the VIP as unused
vip->flags &= ~LB_VIP_FLAGS_USED;
@@ -1020,15 +1252,21 @@ lb_as_stack (lb_as_t *as)
dpo_type = lbm->dpo_gre4_type;
else if (lb_vip_is_gre6(vip))
dpo_type = lbm->dpo_gre6_type;
+ else if (lb_vip_is_gre4_port(vip))
+ dpo_type = lbm->dpo_gre4_port_type;
+ else if (lb_vip_is_gre6_port(vip))
+ dpo_type = lbm->dpo_gre6_port_type;
else if (lb_vip_is_l3dsr(vip))
dpo_type = lbm->dpo_l3dsr_type;
- else if(lb_vip_is_nat4(vip))
- dpo_type = lbm->dpo_nat4_type;
- else if (lb_vip_is_nat6(vip))
- dpo_type = lbm->dpo_nat6_type;
+ else if (lb_vip_is_l3dsr_port(vip))
+ dpo_type = lbm->dpo_l3dsr_port_type;
+ else if(lb_vip_is_nat4_port(vip))
+ dpo_type = lbm->dpo_nat4_port_type;
+ else if (lb_vip_is_nat6_port(vip))
+ dpo_type = lbm->dpo_nat6_port_type;
dpo_stack(dpo_type,
- lb_vip_is_ip4(vip)?DPO_PROTO_IP4:DPO_PROTO_IP6,
+ lb_vip_is_ip4(vip->type)?DPO_PROTO_IP4:DPO_PROTO_IP6,
&as->dpo,
fib_entry_contribute_ip_forwarding(
as->next_hop_fib_entry_index));
@@ -1036,7 +1274,7 @@ lb_as_stack (lb_as_t *as)
static fib_node_back_walk_rc_t
lb_fib_node_back_walk_notify (fib_node_t *node,
- fib_node_back_walk_ctx_t *ctx)
+ fib_node_back_walk_ctx_t *ctx)
{
lb_as_stack(lb_as_from_fib_node(node));
return (FIB_NODE_BACK_WALK_CONTINUE);
@@ -1082,6 +1320,7 @@ lb_init (vlib_main_t * vm)
lbm->vnet_main = vnet_get_main ();
lbm->vlib_main = vm;
+ lb_vip_t *default_vip;
lb_as_t *default_as;
fib_node_vft_t lb_fib_node_vft = {
.fnv_get = lb_fib_node_get_node,
@@ -1094,7 +1333,15 @@ lb_init (vlib_main_t * vm)
.dv_format = format_lb_dpo,
};
+ //Allocate and init default VIP.
lbm->vips = 0;
+ pool_get(lbm->vips, default_vip);
+ default_vip->prefix.ip6.as_u64[0] = 0xffffffffffffffffL;
+ default_vip->prefix.ip6.as_u64[1] = 0xffffffffffffffffL;
+ default_vip->protocol = ~0;
+ default_vip->port = 0;
+ default_vip->flags = LB_VIP_FLAGS_USED;
+
lbm->per_cpu = 0;
vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
lbm->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
@@ -1106,9 +1353,18 @@ lb_init (vlib_main_t * vm)
lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes);
lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes);
- lbm->dpo_l3dsr_type = dpo_register_new_type(&lb_vft, lb_dpo_l3dsr_nodes);
- lbm->dpo_nat4_type = dpo_register_new_type(&lb_vft, lb_dpo_nat4_nodes);
- lbm->dpo_nat6_type = dpo_register_new_type(&lb_vft, lb_dpo_nat6_nodes);
+ lbm->dpo_gre4_port_type = dpo_register_new_type(&lb_vft,
+ lb_dpo_gre4_port_nodes);
+ lbm->dpo_gre6_port_type = dpo_register_new_type(&lb_vft,
+ lb_dpo_gre6_port_nodes);
+ lbm->dpo_l3dsr_type = dpo_register_new_type(&lb_vft,
+ lb_dpo_l3dsr_nodes);
+ lbm->dpo_l3dsr_port_type = dpo_register_new_type(&lb_vft,
+ lb_dpo_l3dsr_port_nodes);
+ lbm->dpo_nat4_port_type = dpo_register_new_type(&lb_vft,
+ lb_dpo_nat4_port_nodes);
+ lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft,
+ lb_dpo_nat6_port_nodes);
lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft);
//Init AS reference counters
@@ -1126,6 +1382,10 @@ lb_init (vlib_main_t * vm)
lbm->vip_index_by_nodeport
= hash_create_mem (0, sizeof(u16), sizeof (uword));
+ clib_bihash_init_8_8 (&lbm->vip_index_per_port,
+ "vip_index_per_port", LB_VIP_PER_PORT_BUCKETS,
+ LB_VIP_PER_PORT_MEMORY_SIZE);
+
clib_bihash_init_8_8 (&lbm->mapping_by_as4,
"mapping_by_as4", LB_MAPPING_BUCKETS,
LB_MAPPING_MEMORY_SIZE);
diff --git a/src/plugins/lb/lb.h b/src/plugins/lb/lb.h
index c6773a4108f..3177f892ae5 100644
--- a/src/plugins/lb/lb.h
+++ b/src/plugins/lb/lb.h
@@ -47,6 +47,9 @@
#define LB_MAPPING_BUCKETS 1024
#define LB_MAPPING_MEMORY_SIZE 64<<20
+#define LB_VIP_PER_PORT_BUCKETS 1024
+#define LB_VIP_PER_PORT_MEMORY_SIZE 64<<20
+
typedef enum {
LB_NEXT_DROP,
LB_N_NEXT,
@@ -187,6 +190,17 @@ typedef enum {
} lb_encap_type_t;
/**
+ * Lookup type
+ */
+
+typedef enum {
+ LB_LKP_SAME_IP_PORT,
+ LB_LKP_DIFF_IP_PORT,
+ LB_LKP_ALL_PORT_IP,
+ LB_LKP_N_TYPES,
+} lb_lkp_type_t;
+
+/**
* The load balancer supports IPv4 and IPv6 traffic
* and GRE4, GRE6, L3DSR and NAT4, NAT6 encap.
*/
@@ -214,14 +228,8 @@ typedef struct {
/* Service type. clusterip or nodeport */
u8 srv_type;
- /* Service port. network byte order */
- u16 port;
-
/* Pod's port corresponding to specific service. network byte order */
u16 target_port;
-
- /* Node's port, can access service via NodeIP:node_port. network byte order */
- u16 node_port;
};
/* DSCP bits for L3DSR */
u8 dscp;
@@ -229,8 +237,21 @@ typedef struct {
};
} lb_vip_encap_args_t;
+typedef struct {
+ /* all fields in NET byte order */
+ union {
+ struct {
+ u32 vip_prefix_index;
+ u16 port;
+ u8 protocol;
+ u8 rsv;
+ };
+ u64 as_u64;
+ };
+} vip_port_key_t;
+
/**
- * Load balancing service is provided per VIP.
+ * Load balancing service is provided per VIP+protocol+port.
* In this data model, a VIP can be a whole prefix.
* But load balancing only
* occurs on a per-source-address/port basis. Meaning that if a given source
@@ -275,6 +296,15 @@ typedef struct {
*/
u8 plen;
+ /* tcp or udp. If not per-port vip, set to ~0 */
+ u8 protocol;
+
+ /* tcp port or udp port. If not per-port vip, set to ~0 */
+ u16 port;
+
+ /* Valid for per-port vip */
+ u32 vip_prefix_index;
+
/**
* The type of traffic for this.
* LB_TYPE_UNDEFINED if unknown.
@@ -301,36 +331,57 @@ typedef struct {
u32 *as_indexes;
} lb_vip_t;
-#define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 \
- || (vip)->type == LB_VIP_TYPE_IP4_GRE4 \
- || (vip)->type == LB_VIP_TYPE_IP4_L3DSR \
- || (vip)->type == LB_VIP_TYPE_IP4_NAT4 )
-
-#define lb_vip_is_gre4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
- || (vip)->type == LB_VIP_TYPE_IP4_GRE4)
+#define lb_vip_is_ip4(type) (type == LB_VIP_TYPE_IP4_GRE6 \
+ || type == LB_VIP_TYPE_IP4_GRE4 \
+ || type == LB_VIP_TYPE_IP4_L3DSR \
+ || type == LB_VIP_TYPE_IP4_NAT4 )
-#define lb_vip_is_gre6(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
- || (vip)->type == LB_VIP_TYPE_IP4_GRE6)
+#define lb_vip_is_ip6(type) (type == LB_VIP_TYPE_IP6_GRE6 \
+ || type == LB_VIP_TYPE_IP6_GRE4 \
+ || type == LB_VIP_TYPE_IP6_NAT6 )
#define lb_encap_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE4 \
|| (vip)->type == LB_VIP_TYPE_IP4_L3DSR \
|| (vip)->type == LB_VIP_TYPE_IP4_NAT4 )
+#define lb_vip_is_gre4(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
+ || (vip)->type == LB_VIP_TYPE_IP4_GRE4) \
+ && ((vip)->port == 0))
+
+
+#define lb_vip_is_gre6(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
+ || (vip)->type == LB_VIP_TYPE_IP4_GRE6) \
+ && ((vip)->port == 0))
+
+#define lb_vip_is_gre4_port(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
+ || (vip)->type == LB_VIP_TYPE_IP4_GRE4) \
+ && ((vip)->port != 0))
+
+#define lb_vip_is_gre6_port(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
+ || (vip)->type == LB_VIP_TYPE_IP4_GRE6) \
+ && ((vip)->port != 0))
+
always_inline bool
lb_vip_is_l3dsr(const lb_vip_t *vip)
{
- return vip->type == LB_VIP_TYPE_IP4_L3DSR;
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port ==0);
+}
+
+always_inline bool
+lb_vip_is_l3dsr_port(const lb_vip_t *vip)
+{
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port !=0);
}
always_inline bool
-lb_vip_is_nat4(const lb_vip_t *vip)
+lb_vip_is_nat4_port(const lb_vip_t *vip)
{
- return vip->type == LB_VIP_TYPE_IP4_NAT4;
+ return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port !=0);
}
always_inline bool
-lb_vip_is_nat6(const lb_vip_t *vip)
+lb_vip_is_nat6_port(const lb_vip_t *vip)
{
- return vip->type == LB_VIP_TYPE_IP6_NAT6;
+ return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port !=0);
}
format_function_t format_lb_vip;
@@ -422,6 +473,11 @@ typedef struct {
lb_vip_t *vips;
/**
+ * bitmap for vip prefix to support per-port vip
+ */
+ uword *vip_prefix_indexes;
+
+ /**
* Pool of ASs.
* ASs are referenced by address and vip index.
* The first element (index 0) is special and used only to fill
@@ -479,15 +535,20 @@ typedef struct {
*/
dpo_type_t dpo_gre4_type;
dpo_type_t dpo_gre6_type;
+ dpo_type_t dpo_gre4_port_type;
+ dpo_type_t dpo_gre6_port_type;
dpo_type_t dpo_l3dsr_type;
- dpo_type_t dpo_nat4_type;
- dpo_type_t dpo_nat6_type;
-
+ dpo_type_t dpo_l3dsr_port_type;
+ dpo_type_t dpo_nat4_port_type;
+ dpo_type_t dpo_nat6_port_type;
/**
* Node type for registering to fib changes.
*/
fib_node_type_t fib_node_type;
+ /* lookup per_port vip by key */
+ clib_bihash_8_8_t vip_index_per_port;
+
/* Find a static mapping by AS IP : target_port */
clib_bihash_8_8_t mapping_by_as4;
clib_bihash_24_8_t mapping_by_as6;
@@ -511,6 +572,8 @@ typedef struct {
typedef struct {
ip46_address_t prefix;
u8 plen;
+ u8 protocol;
+ u16 port;
lb_vip_type_t type;
u32 new_length;
lb_vip_encap_args_t encap_args;
@@ -537,7 +600,8 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index);
int lb_vip_del(u32 vip_index);
-int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index);
+int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u8 protocol,
+ u16 port, u32 *vip_index);
#define lb_vip_get_by_index(index) (pool_is_free_index(lb_main.vips, index)?NULL:pool_elt_at_index(lb_main.vips, index))
diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c
index fc498706222..83766272fd7 100644
--- a/src/plugins/lb/lb_test.c
+++ b/src/plugins/lb/lb_test.c
@@ -226,7 +226,7 @@ static int api_lb_add_del_as (vat_main_t * vam)
#define foreach_vpe_api_msg \
_(lb_conf, "<ip4-src-addr> <ip6-src-address> <sticky_buckets_per_core> <flow_timeout>") \
_(lb_add_del_vip, "<ip-prefix> [gre4|gre6|l3dsr|nat4|nat6] " \
- "<dscp> <port> <target_port> <node_port> " \
+ "<dscp> <port> <target_port> " \
"<new_table_len> [del]") \
_(lb_add_del_as, "<vip-ip-prefix> <address> [del]")
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
index b33ea22b5c1..8163e35a06e 100644
--- a/src/plugins/lb/node.c
+++ b/src/plugins/lb/node.c
@@ -173,14 +173,27 @@ lb_node_get_other_ports6 (ip6_header_t *ip60)
return 0;
}
-static_always_inline u32
-lb_node_get_hash (vlib_buffer_t *p, u8 is_input_v4)
+static_always_inline void
+lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
+ u32 *hash, u32 *vip_idx, u8 per_port_vip)
{
- u32 hash;
+ vip_port_key_t key;
+ clib_bihash_kv_8_8_t kv, value;
+
+ /* For vip case, retrieve vip index for ip lookup */
+ *vip_idx = vnet_buffer (p)->ip.adj_index[VLIB_TX];
+
+ if (per_port_vip)
+ {
+ /* For per-port-vip case, ip lookup stores dummy index */
+ key.vip_prefix_index = *vip_idx;
+ }
+
if (is_input_v4)
{
ip4_header_t *ip40;
u64 ports;
+
ip40 = vlib_buffer_get_current (p);
if (PREDICT_TRUE(
ip40->protocol == IP_PROTOCOL_TCP
@@ -190,13 +203,20 @@ lb_node_get_hash (vlib_buffer_t *p, u8 is_input_v4)
else
ports = lb_node_get_other_ports4 (ip40);
- hash = lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0);
+ *hash = lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0);
+
+ if (per_port_vip)
+ {
+ key.protocol = ip40->protocol;
+ key.port = (u16)(ports & 0xFFFF);
+ }
}
else
{
ip6_header_t *ip60;
ip60 = vlib_buffer_get_current (p);
u64 ports;
+
if (PREDICT_TRUE(
ip60->protocol == IP_PROTOCOL_TCP
|| ip60->protocol == IP_PROTOCOL_UDP))
@@ -205,18 +225,39 @@ lb_node_get_hash (vlib_buffer_t *p, u8 is_input_v4)
else
ports = lb_node_get_other_ports6 (ip60);
- hash = lb_hash_hash (ip60->src_address.as_u64[0],
+ *hash = lb_hash_hash (ip60->src_address.as_u64[0],
ip60->src_address.as_u64[1],
ip60->dst_address.as_u64[0],
ip60->dst_address.as_u64[1], ports);
+
+ if (per_port_vip)
+ {
+ key.protocol = ip60->protocol;
+ key.port = (u16)(ports & 0xFFFF);
+ }
+ }
+
+ /* For per-port-vip case, retrieve vip index for vip_port_filter table */
+ if (per_port_vip)
+ {
+ kv.key = key.as_u64;
+ if (clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) < 0)
+ {
+ /* return default vip */
+ *vip_idx = 0;
+ return;
+ }
+ *vip_idx = value.value;
}
- return hash;
}
static_always_inline uword
-lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
+lb_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
- lb_encap_type_t encap_type) //Compile-time parameter is GRE4/GRE6/L3DSR/NAT4/NAT6
+ lb_encap_type_t encap_type, //Compile-time parameter is GRE4/GRE6/L3DSR/NAT4/NAT6
+ u8 per_port_vip) //Compile-time parameter stating that is per_port_vip or not
{
lb_main_t *lbm = &lb_main;
u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
@@ -229,8 +270,13 @@ lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
next_index = node->cached_next_index;
u32 nexthash0 = 0;
+ u32 next_vip_idx0 = ~0;
if (PREDICT_TRUE(n_left_from > 0))
- nexthash0 = lb_node_get_hash (vlib_get_buffer (vm, from[0]), is_input_v4);
+ {
+ vlib_buffer_t *p0 = vlib_get_buffer (vm, from[0]);
+ lb_node_get_hash (lbm, p0, is_input_v4, &nexthash0,
+ &next_vip_idx0, per_port_vip);
+ }
while (n_left_from > 0)
{
@@ -240,17 +286,21 @@ lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
u32 pi0;
vlib_buffer_t *p0;
lb_vip_t *vip0;
- u32 asindex0;
+ u32 asindex0 = 0;
u16 len0;
u32 available_index0;
u8 counter = 0;
u32 hash0 = nexthash0;
+ u32 vip_index0 = next_vip_idx0;
+ u32 next0;
if (PREDICT_TRUE(n_left_from > 1))
{
vlib_buffer_t *p1 = vlib_get_buffer (vm, from[1]);
//Compute next hash and prefetch bucket
- nexthash0 = lb_node_get_hash (p1, is_input_v4);
+ lb_node_get_hash (lbm, p1, is_input_v4,
+ &nexthash0, &next_vip_idx0,
+ per_port_vip);
lb_hash_prefetch_bucket (sticky_ht, nexthash0);
//Prefetch for encap, next
CLIB_PREFETCH(vlib_buffer_get_current (p1) - 64, 64, STORE);
@@ -272,8 +322,8 @@ lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
n_left_to_next -= 1;
p0 = vlib_get_buffer (vm, pi0);
- vip0 = pool_elt_at_index(lbm->vips,
- vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
+
+ vip0 = pool_elt_at_index(lbm->vips, vip_index0);
if (is_input_v4)
{
@@ -290,7 +340,7 @@ lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
}
lb_hash_get (sticky_ht, hash0,
- vnet_buffer (p0)->ip.adj_index[VLIB_TX], lb_time,
+ vip_index0, lb_time,
&available_index0, &asindex0);
if (PREDICT_TRUE(asindex0 != ~0))
@@ -320,7 +370,7 @@ lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
//Note that when there is no AS configured, an entry is configured anyway.
//But no configured AS is not something that should happen
lb_hash_put (sticky_ht, hash0, asindex0,
- vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ vip_index0,
available_index0, lb_time);
}
else
@@ -333,7 +383,7 @@ lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
vlib_increment_simple_counter (
&lbm->vip_counters[counter], thread_index,
- vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ vip_index0,
1);
//Now let's encap
@@ -436,8 +486,7 @@ lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
csum, lbm->ass[asindex0].address.ip4.as_u32);
ip40->checksum = ip_csum_fold (csum);
- if ((ip40->protocol == IP_PROTOCOL_UDP)
- || (uh->dst_port == vip0->encap_args.port))
+ if (ip40->protocol == IP_PROTOCOL_UDP)
{
uh->dst_port = vip0->encap_args.target_port;
csum = uh->checksum;
@@ -448,7 +497,7 @@ lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
}
else
{
- next_index = LB_NEXT_DROP;
+ asindex0 = 0;
}
}
else if ((is_input_v4 == 0) && (encap_type == LB_ENCAP_TYPE_NAT6))
@@ -481,25 +530,25 @@ lb_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
}
else
{
- next_index = LB_NEXT_DROP;
+ asindex0 = 0;
}
}
}
+ next0 = lbm->ass[asindex0].dpo.dpoi_next_node;
+ //Note that this is going to error if asindex0 == 0
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+ lbm->ass[asindex0].dpo.dpoi_index;
if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
{
lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof(*tr));
tr->as_index = asindex0;
- tr->vip_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ tr->vip_index = vip_index0;
}
//Enqueue to next
- //Note that this is going to error if asindex0 == 0
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
- lbm->ass[asindex0].dpo.dpoi_index;
vlib_validate_buffer_enqueue_x1(
- vm, node, next_index, to_next, n_left_to_next, pi0,
- lbm->ass[asindex0].dpo.dpoi_next_node);
+ vm, node, next_index, to_next, n_left_to_next, pi0, next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
@@ -887,49 +936,84 @@ static uword
lb6_gre6_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6);
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 0);
}
static uword
lb6_gre4_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4);
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 0);
}
static uword
lb4_gre6_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 0);
}
static uword
lb4_gre4_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 0);
+}
+
+static uword
+lb6_gre6_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE6, 1);
+}
+
+static uword
+lb6_gre4_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_GRE4, 1);
+}
+
+static uword
+lb4_gre6_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE6, 1);
+}
+
+static uword
+lb4_gre4_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_GRE4, 1);
}
static uword
lb4_l3dsr_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+ vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 0);
}
static uword
-lb6_nat6_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+lb4_l3dsr_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_NAT6);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_L3DSR, 1);
}
static uword
-lb4_nat4_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+lb6_nat6_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return lb_node_fn (vm, node, frame, 0, LB_ENCAP_TYPE_NAT6, 1);
+}
+
+static uword
+lb4_nat4_port_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
- return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_NAT4);
+ return lb_node_fn (vm, node, frame, 1, LB_ENCAP_TYPE_NAT4, 1);
}
static uword
@@ -952,7 +1036,8 @@ VLIB_REGISTER_NODE (lb6_gre6_node) =
.name = "lb6-gre6",
.vector_size = sizeof(u32),
.format_trace = format_lb_trace,
- .n_errors = LB_N_ERROR, .error_strings = lb_error_strings,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
.n_next_nodes = LB_N_NEXT,
.next_nodes =
{ [LB_NEXT_DROP] = "error-drop" },
@@ -992,7 +1077,72 @@ VLIB_REGISTER_NODE (lb4_gre4_node) =
.format_trace = format_lb_trace,
.n_errors = LB_N_ERROR,
.error_strings = lb_error_strings,
- .n_next_nodes = LB_N_NEXT,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ { [LB_NEXT_DROP] = "error-drop" },
+ };
+
+VLIB_REGISTER_NODE (lb6_gre6_port_node) =
+ {
+ .function = lb6_gre6_port_node_fn,
+ .name = "lb6-gre6-port",
+ .vector_size = sizeof(u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ { [LB_NEXT_DROP] = "error-drop" },
+ };
+
+VLIB_REGISTER_NODE (lb6_gre4_port_node) =
+ {
+ .function = lb6_gre4_port_node_fn,
+ .name = "lb6-gre4-port",
+ .vector_size = sizeof(u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ { [LB_NEXT_DROP] = "error-drop" },
+ };
+
+VLIB_REGISTER_NODE (lb4_gre6_port_node) =
+ {
+ .function = lb4_gre6_port_node_fn,
+ .name = "lb4-gre6-port",
+ .vector_size = sizeof(u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ { [LB_NEXT_DROP] = "error-drop" },
+ };
+
+VLIB_REGISTER_NODE (lb4_gre4_port_node) =
+ {
+ .function = lb4_gre4_port_node_fn,
+ .name = "lb4-gre4-port",
+ .vector_size = sizeof(u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ { [LB_NEXT_DROP] = "error-drop" },
+ };
+
+VLIB_REGISTER_NODE (lb4_l3dsr_port_node) =
+ {
+ .function = lb4_l3dsr_port_node_fn,
+ .name = "lb4-l3dsr-port",
+ .vector_size = sizeof(u32),
+ .format_trace = format_lb_trace,
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+ .n_next_nodes = LB_N_NEXT,
.next_nodes =
{ [LB_NEXT_DROP] = "error-drop" },
};
@@ -1010,10 +1160,10 @@ VLIB_REGISTER_NODE (lb4_l3dsr_node) =
{ [LB_NEXT_DROP] = "error-drop" },
};
-VLIB_REGISTER_NODE (lb6_nat6_node) =
+VLIB_REGISTER_NODE (lb6_nat6_port_node) =
{
- .function = lb6_nat6_node_fn,
- .name = "lb6-nat6",
+ .function = lb6_nat6_port_node_fn,
+ .name = "lb6-nat6-port",
.vector_size = sizeof(u32),
.format_trace = format_lb_trace,
.n_errors = LB_N_ERROR,
@@ -1023,10 +1173,10 @@ VLIB_REGISTER_NODE (lb6_nat6_node) =
{ [LB_NEXT_DROP] = "error-drop" },
};
-VLIB_REGISTER_NODE (lb4_nat4_node) =
+VLIB_REGISTER_NODE (lb4_nat4_port_node) =
{
- .function = lb4_nat4_node_fn,
- .name = "lb4-nat4",
+ .function = lb4_nat4_port_node_fn,
+ .name = "lb4-nat4-port",
.vector_size = sizeof(u32),
.format_trace = format_lb_trace,
.n_errors = LB_N_ERROR,
@@ -1061,7 +1211,7 @@ VLIB_REGISTER_NODE (lb4_nodeport_node) =
.n_next_nodes = LB4_NODEPORT_N_NEXT,
.next_nodes =
{
- [LB4_NODEPORT_NEXT_IP4_NAT4] = "lb4-nat4",
+ [LB4_NODEPORT_NEXT_IP4_NAT4] = "lb4-nat4-port",
[LB4_NODEPORT_NEXT_DROP] = "error-drop",
},
};
@@ -1077,7 +1227,7 @@ VLIB_REGISTER_NODE (lb6_nodeport_node) =
.n_next_nodes = LB6_NODEPORT_N_NEXT,
.next_nodes =
{
- [LB6_NODEPORT_NEXT_IP6_NAT6] = "lb6-nat6",
+ [LB6_NODEPORT_NEXT_IP6_NAT6] = "lb6-nat6-port",
[LB6_NODEPORT_NEXT_DROP] = "error-drop",
},
};
diff --git a/test/test_lb.py b/test/test_lb.py
index d2e7185cf3f..45dab79fd42 100644
--- a/test/test_lb.py
+++ b/test/test_lb.py
@@ -12,13 +12,14 @@ from util import ppp
""" TestLB is a subclass of VPPTestCase classes.
TestLB class defines Load Balancer test cases for:
- - IP4 to GRE4 encap
- - IP4 to GRE6 encap
- - IP6 to GRE4 encap
- - IP6 to GRE6 encap
- - IP4 to L3DSR encap
- - IP4 to NAT4 encap
- - IP6 to NAT6 encap
+ - IP4 to GRE4 encap on per-port vip case
+ - IP4 to GRE6 encap on per-port vip case
+ - IP6 to GRE4 encap on per-port vip case
+ - IP6 to GRE6 encap on per-port vip case
+ - IP4 to L3DSR encap on vip case
+ - IP4 to L3DSR encap on per-port vip case
+ - IP4 to NAT4 encap on per-port vip case
+ - IP6 to NAT6 encap on per-port vip case
As stated in comments below, GRE has issues with IPv6.
All test cases involving IPv6 are executed, but
@@ -35,7 +36,7 @@ class TestLB(VppTestCase):
super(TestLB, cls).setUpClass()
cls.ass = range(5)
- cls.packets = range(100)
+ cls.packets = range(1)
try:
cls.create_pg_interfaces(range(2))
@@ -66,11 +67,11 @@ class TestLB(VppTestCase):
def getIPv4Flow(self, id):
return (IP(dst="90.0.%u.%u" % (id / 255, id % 255),
src="40.0.%u.%u" % (id / 255, id % 255)) /
- UDP(sport=10000 + id, dport=20000 + id))
+ UDP(sport=10000 + id, dport=20000))
def getIPv6Flow(self, id):
return (IPv6(dst="2001::%u" % (id), src="fd00:f00d:ffff::%u" % (id)) /
- UDP(sport=10000 + id, dport=20000 + id))
+ UDP(sport=10000 + id, dport=20000))
def generatePackets(self, src_if, isv4):
self.reset_packet_infos()
@@ -117,7 +118,6 @@ class TestLB(VppTestCase):
self.assertEqual(ip.dst, "10.0.0.%u" % asid)
self.assertEqual(ip.proto, 47)
self.assertEqual(len(ip.options), 0)
- self.assertGreaterEqual(ip.ttl, 64)
gre = p[GRE]
self.checkInner(gre, isv4)
elif (encap == 'gre6'):
@@ -134,7 +134,6 @@ class TestLB(VppTestCase):
socket.inet_pton(socket.AF_INET6, "2002::%u" % asid)
)
self.assertEqual(ip.nh, 47)
- self.assertGreaterEqual(ip.hlim, 64)
# self.assertEqual(len(ip.options), 0)
gre = GRE(str(p[IPv6].payload))
self.checkInner(gre, isv4)
@@ -159,7 +158,6 @@ class TestLB(VppTestCase):
self.assertEqual(ip.dst, "10.0.0.%u" % asid)
self.assertEqual(ip.proto, 17)
self.assertEqual(len(ip.options), 0)
- self.assertGreaterEqual(ip.ttl, 63)
udp = p[UDP]
self.assertEqual(udp.dport, 3307)
elif (encap == 'nat6'):
@@ -183,7 +181,7 @@ class TestLB(VppTestCase):
self.logger.error(ppp("Unexpected or invalid packet:", p))
raise
- # This is just to roughly check that the balancing algorithm
+ # This is just to 1roughly check that the balancing algorithm
# is not completly biased.
for asid in self.ass:
if load[asid] < len(self.packets) / (len(self.ass) * 2):
@@ -192,11 +190,14 @@ class TestLB(VppTestCase):
raise Exception("Load Balancer algorithm is biased")
def test_lb_ip4_gre4(self):
- """ Load Balancer IP4 GRE4 """
+ """ Load Balancer IP4 GRE4 on vip case """
try:
- self.vapi.cli("lb vip 90.0.0.0/8 encap gre4")
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 encap gre4")
for asid in self.ass:
- self.vapi.cli("lb as 90.0.0.0/8 10.0.0.%u" % (asid))
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 10.0.0.%u"
+ % (asid))
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
self.pg_enable_capture(self.pg_interfaces)
@@ -205,17 +206,23 @@ class TestLB(VppTestCase):
finally:
for asid in self.ass:
- self.vapi.cli("lb as 90.0.0.0/8 10.0.0.%u del" % (asid))
- self.vapi.cli("lb vip 90.0.0.0/8 encap gre4 del")
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 10.0.0.%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 encap gre4 del")
self.vapi.cli("test lb flowtable flush")
def test_lb_ip6_gre4(self):
- """ Load Balancer IP6 GRE4 """
+ """ Load Balancer IP6 GRE4 on vip case """
try:
- self.vapi.cli("lb vip 2001::/16 encap gre4")
+ self.vapi.cli(
+ "lb vip 2001::/16 encap gre4")
for asid in self.ass:
- self.vapi.cli("lb as 2001::/16 10.0.0.%u" % (asid))
+ self.vapi.cli(
+ "lb as 2001::/16 10.0.0.%u"
+ % (asid))
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=False))
self.pg_enable_capture(self.pg_interfaces)
@@ -224,16 +231,22 @@ class TestLB(VppTestCase):
self.checkCapture(encap='gre4', isv4=False)
finally:
for asid in self.ass:
- self.vapi.cli("lb as 2001::/16 10.0.0.%u del" % (asid))
- self.vapi.cli("lb vip 2001::/16 encap gre4 del")
+ self.vapi.cli(
+ "lb as 2001::/16 10.0.0.%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 2001::/16 encap gre4 del")
self.vapi.cli("test lb flowtable flush")
def test_lb_ip4_gre6(self):
- """ Load Balancer IP4 GRE6 """
+ """ Load Balancer IP4 GRE6 on vip case """
try:
- self.vapi.cli("lb vip 90.0.0.0/8 encap gre6")
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 encap gre6")
for asid in self.ass:
- self.vapi.cli("lb as 90.0.0.0/8 2002::%u" % (asid))
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 2002::%u"
+ % (asid))
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
self.pg_enable_capture(self.pg_interfaces)
@@ -242,16 +255,22 @@ class TestLB(VppTestCase):
self.checkCapture(encap='gre6', isv4=True)
finally:
for asid in self.ass:
- self.vapi.cli("lb as 90.0.0.0/8 2002::%u del" % (asid))
- self.vapi.cli("lb vip 90.0.0.0/8 encap gre6 del")
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 2002::%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 encap gre6 del")
self.vapi.cli("test lb flowtable flush")
def test_lb_ip6_gre6(self):
- """ Load Balancer IP6 GRE6 """
+ """ Load Balancer IP6 GRE6 on vip case """
try:
- self.vapi.cli("lb vip 2001::/16 encap gre6")
+ self.vapi.cli(
+ "lb vip 2001::/16 encap gre6")
for asid in self.ass:
- self.vapi.cli("lb as 2001::/16 2002::%u" % (asid))
+ self.vapi.cli(
+ "lb as 2001::/16 2002::%u"
+ % (asid))
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=False))
self.pg_enable_capture(self.pg_interfaces)
@@ -260,16 +279,144 @@ class TestLB(VppTestCase):
self.checkCapture(encap='gre6', isv4=False)
finally:
for asid in self.ass:
- self.vapi.cli("lb as 2001::/16 2002::%u del" % (asid))
- self.vapi.cli("lb vip 2001::/16 encap gre6 del")
+ self.vapi.cli(
+ "lb as 2001::/16 2002::%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 2001::/16 encap gre6 del")
+ self.vapi.cli("test lb flowtable flush")
+
+ def test_lb_ip4_gre4_port(self):
+ """ Load Balancer IP4 GRE4 on per-port-vip case """
+ try:
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 protocol udp port 20000 encap gre4")
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 protocol udp port 20000 10.0.0.%u"
+ % (asid))
+
+ self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
+ self.pg_enable_capture(self.pg_interfaces)
+ self.pg_start()
+ self.checkCapture(encap='gre4', isv4=True)
+
+ finally:
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 protocol udp port 20000 10.0.0.%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 protocol udp port 20000 encap gre4 del")
+ self.vapi.cli("test lb flowtable flush")
+
+ def test_lb_ip6_gre4_port(self):
+ """ Load Balancer IP6 GRE4 on per-port-vip case """
+
+ try:
+ self.vapi.cli(
+ "lb vip 2001::/16 protocol udp port 20000 encap gre4")
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 2001::/16 protocol udp port 20000 10.0.0.%u"
+ % (asid))
+
+ self.pg0.add_stream(self.generatePackets(self.pg0, isv4=False))
+ self.pg_enable_capture(self.pg_interfaces)
+ self.pg_start()
+
+ self.checkCapture(encap='gre4', isv4=False)
+ finally:
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 2001::/16 protocol udp port 20000 10.0.0.%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 2001::/16 protocol udp port 20000 encap gre4 del")
+ self.vapi.cli("test lb flowtable flush")
+
+ def test_lb_ip4_gre6_port(self):
+ """ Load Balancer IP4 GRE6 on per-port-vip case """
+ try:
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 protocol udp port 20000 encap gre6")
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 protocol udp port 20000 2002::%u"
+ % (asid))
+
+ self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
+ self.pg_enable_capture(self.pg_interfaces)
+ self.pg_start()
+
+ self.checkCapture(encap='gre6', isv4=True)
+ finally:
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 protocol udp port 20000 2002::%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 protocol udp port 20000 encap gre6 del")
+ self.vapi.cli("test lb flowtable flush")
+
+ def test_lb_ip6_gre6_port(self):
+ """ Load Balancer IP6 GRE6 on per-port-vip case """
+ try:
+ self.vapi.cli(
+ "lb vip 2001::/16 protocol udp port 20000 encap gre6")
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 2001::/16 protocol udp port 20000 2002::%u"
+ % (asid))
+
+ self.pg0.add_stream(self.generatePackets(self.pg0, isv4=False))
+ self.pg_enable_capture(self.pg_interfaces)
+ self.pg_start()
+
+ self.checkCapture(encap='gre6', isv4=False)
+ finally:
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 2001::/16 protocol udp port 20000 2002::%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 2001::/16 protocol udp port 20000 encap gre6 del")
self.vapi.cli("test lb flowtable flush")
def test_lb_ip4_l3dsr(self):
- """ Load Balancer IP4 L3DSR """
+ """ Load Balancer IP4 L3DSR on vip case """
+ try:
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 encap l3dsr dscp 7")
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 10.0.0.%u"
+ % (asid))
+
+ self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
+ self.pg_enable_capture(self.pg_interfaces)
+ self.pg_start()
+ self.checkCapture(encap='l3dsr', isv4=True)
+
+ finally:
+ for asid in self.ass:
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 10.0.0.%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 encap l3dsr"
+ " dscp 7 del")
+ self.vapi.cli("test lb flowtable flush")
+
+ def test_lb_ip4_l3dsr_port(self):
+ """ Load Balancer IP4 L3DSR on per-port-vip case """
try:
- self.vapi.cli("lb vip 90.0.0.0/8 encap l3dsr dscp 7")
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 protocol udp port 20000 encap l3dsr dscp 7")
for asid in self.ass:
- self.vapi.cli("lb as 90.0.0.0/8 10.0.0.%u" % (asid))
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 protocol udp port 20000 10.0.0.%u"
+ % (asid))
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
self.pg_enable_capture(self.pg_interfaces)
@@ -278,17 +425,24 @@ class TestLB(VppTestCase):
finally:
for asid in self.ass:
- self.vapi.cli("lb as 90.0.0.0/8 10.0.0.%u del" % (asid))
- self.vapi.cli("lb vip 90.0.0.0/8 encap l3dsr dscp 7 del")
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 protocol udp port 20000 10.0.0.%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 protocol udp port 20000 encap l3dsr"
+ " dscp 7 del")
self.vapi.cli("test lb flowtable flush")
- def test_lb_ip4_nat4(self):
- """ Load Balancer IP4 NAT4 """
+ def test_lb_ip4_nat4_port(self):
+ """ Load Balancer IP4 NAT4 on per-port-vip case """
try:
- self.vapi.cli("lb vip 90.0.0.0/8 encap nat4"
- " type clusterip port 3306 target_port 3307")
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 protocol udp port 20000 encap nat4"
+ " type clusterip target_port 3307")
for asid in self.ass:
- self.vapi.cli("lb as 90.0.0.0/8 10.0.0.%u" % (asid))
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 protocol udp port 20000 10.0.0.%u"
+ % (asid))
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=True))
self.pg_enable_capture(self.pg_interfaces)
@@ -297,18 +451,24 @@ class TestLB(VppTestCase):
finally:
for asid in self.ass:
- self.vapi.cli("lb as 90.0.0.0/8 10.0.0.%u del" % (asid))
- self.vapi.cli("lb vip 90.0.0.0/8 encap nat4"
- " type clusterip port 3306 target_port 3307 del")
+ self.vapi.cli(
+ "lb as 90.0.0.0/8 protocol udp port 20000 10.0.0.%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 90.0.0.0/8 protocol udp port 20000 encap nat4"
+ " type clusterip target_port 3307 del")
self.vapi.cli("test lb flowtable flush")
- def test_lb_ip6_nat6(self):
- """ Load Balancer IP6 NAT6 """
+ def test_lb_ip6_nat6_port(self):
+ """ Load Balancer IP6 NAT6 on per-port-vip case """
try:
- self.vapi.cli("lb vip 2001::/16 encap nat6"
- " type clusterip port 3306 target_port 3307")
+ self.vapi.cli(
+ "lb vip 2001::/16 protocol udp port 20000 encap nat6"
+ " type clusterip target_port 3307")
for asid in self.ass:
- self.vapi.cli("lb as 2001::/16 2002::%u" % (asid))
+ self.vapi.cli(
+ "lb as 2001::/16 protocol udp port 20000 2002::%u"
+ % (asid))
self.pg0.add_stream(self.generatePackets(self.pg0, isv4=False))
self.pg_enable_capture(self.pg_interfaces)
@@ -317,7 +477,10 @@ class TestLB(VppTestCase):
finally:
for asid in self.ass:
- self.vapi.cli("lb as 2001::/16 2002::%u del" % (asid))
- self.vapi.cli("lb vip 2001::/16 encap nat6"
- " type clusterip port 3306 target_port 3307 del")
+ self.vapi.cli(
+ "lb as 2001::/16 protocol udp port 20000 2002::%u del"
+ % (asid))
+ self.vapi.cli(
+ "lb vip 2001::/16 protocol udp port 20000 encap nat6"
+ " type clusterip target_port 3307 del")
self.vapi.cli("test lb flowtable flush")