aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--plugins/lb-plugin/lb/cli.c41
-rw-r--r--plugins/lb-plugin/lb/lb.c286
-rw-r--r--plugins/lb-plugin/lb/lb.h66
-rw-r--r--plugins/lb-plugin/lb/node.c367
4 files changed, 358 insertions, 402 deletions
diff --git a/plugins/lb-plugin/lb/cli.c b/plugins/lb-plugin/lb/cli.c
index 398572ce396..b59c6426241 100644
--- a/plugins/lb-plugin/lb/cli.c
+++ b/plugins/lb-plugin/lb/cli.c
@@ -17,47 +17,6 @@
#include <lb/util.h>
static clib_error_t *
-lb_bypass_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- ip46_address_t vip_prefix, as_addr;
- u8 vip_plen;
- u32 vip_index;
- u8 disable = 0;
- int ret;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY))
- return clib_error_return (0, "invalid vip prefix: '%U'",
- format_unformat_error, line_input);
-
- if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index)))
- return clib_error_return (0, "lb_vip_find_index error %d", ret);
-
- if (!unformat(line_input, "%U", unformat_ip46_address, &as_addr, IP46_TYPE_ANY))
- return clib_error_return (0, "invalid as address: '%U'",
- format_unformat_error, line_input);
-
- if (unformat(line_input, "disable"))
- disable = 1;
-
- if ((ret = lb_as_lookup_bypass(vip_index, &as_addr, disable)))
- return clib_error_return (0, "lb_as_lookup_bypass error %d", ret);
-
- return 0;
-}
-
-VLIB_CLI_COMMAND (lb_bypass_command, static) =
-{
- .path = "lb bypass",
- .short_help = "lb bypass <prefix> <address> [disable]",
- .function = lb_bypass_command_fn,
-};
-
-static clib_error_t *
lb_vip_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
diff --git a/plugins/lb-plugin/lb/lb.c b/plugins/lb-plugin/lb/lb.c
index 140c221a438..6af4697e37c 100644
--- a/plugins/lb-plugin/lb/lb.c
+++ b/plugins/lb-plugin/lb/lb.c
@@ -28,6 +28,25 @@ lb_main_t lb_main;
#define lb_get_writer_lock() do {} while(__sync_lock_test_and_set (lb_main.writer_lock, 1))
#define lb_put_writer_lock() lb_main.writer_lock[0] = 0
+static void lb_as_stack (lb_as_t *as);
+
+
+const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL };
+const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL };
+const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] =
+ {
+ [DPO_PROTO_IP4] = lb_dpo_gre4_ip4,
+ [DPO_PROTO_IP6] = lb_dpo_gre4_ip6,
+ };
+
+const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL };
+const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL };
+const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
+ {
+ [DPO_PROTO_IP4] = lb_dpo_gre6_ip4,
+ [DPO_PROTO_IP6] = lb_dpo_gre6_ip6,
+ };
+
u32 lb_hash_time_now(vlib_main_t * vm)
{
return (u32) (vlib_time_now(vm) + 10000);
@@ -143,12 +162,12 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args)
u32 *as_index;
pool_foreach(as_index, vip->as_indexes, {
as = &lbm->ass[*as_index];
- s = format(s, "%U %U %d buckets %d flows adj:%u %s\n",
+ s = format(s, "%U %U %d buckets %d flows dpo:%u %s\n",
format_white_space, indent,
format_ip46_address, &as->address, IP46_TYPE_ANY,
count[as - lbm->ass],
vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
- as->adj_index,
+ as->dpo.dpoi_index,
(as->flags & LB_AS_FLAGS_USED)?"used":" removed");
});
@@ -164,7 +183,6 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args)
return s;
}
-
typedef struct {
u32 as_index;
u32 last;
@@ -195,11 +213,18 @@ static void lb_vip_garbage_collection(lb_vip_t *vip)
pool_foreach(as_index, vip->as_indexes, {
as = &lbm->ass[*as_index];
if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
- clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used
- (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0)) { //Not referenced
- pool_put(vip->as_indexes, as_index);
- pool_put(lbm->ass, as);
- }
+ clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used
+ (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
+ { //Not referenced
+ fib_entry_child_remove(as->next_hop_fib_entry_index,
+ as->next_hop_child_index);
+ fib_table_entry_delete_index(as->next_hop_fib_entry_index,
+ FIB_SOURCE_RR);
+ as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
+
+ pool_put(vip->as_indexes, as_index);
+ pool_put(lbm->ass, as);
+ }
});
}
@@ -449,7 +474,6 @@ next:
//Update reused ASs
vec_foreach(ip, to_be_updated) {
lbm->ass[*ip].flags = LB_AS_FLAGS_USED;
- lbm->ass[*ip].adj_index = ~0;
}
vec_free(to_be_updated);
@@ -461,9 +485,36 @@ next:
as->address = addresses[*ip];
as->flags = LB_AS_FLAGS_USED;
as->vip_index = vip_index;
- as->adj_index = ~0;
pool_get(vip->as_indexes, as_index);
*as_index = as - lbm->ass;
+
+ /*
+ * become a child of the FIB entry
+ * so we are informed when its forwarding changes
+ */
+ fib_prefix_t nh = {};
+ if (lb_vip_is_gre4(vip)) {
+ nh.fp_addr.ip4 = as->address.ip4;
+ nh.fp_len = 32;
+ nh.fp_proto = FIB_PROTOCOL_IP4;
+ } else {
+ nh.fp_addr.ip6 = as->address.ip6;
+ nh.fp_len = 128;
+ nh.fp_proto = FIB_PROTOCOL_IP6;
+ }
+
+ as->next_hop_fib_entry_index =
+ fib_table_entry_special_add(0,
+ &nh,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE,
+ ADJ_INDEX_INVALID);
+ as->next_hop_child_index =
+ fib_entry_child_add(as->next_hop_fib_entry_index,
+ lbm->fib_node_type,
+ as - lbm->ass);
+
+ lb_as_stack(as);
}
vec_free(to_be_added);
@@ -535,100 +586,33 @@ int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
return ret;
}
-int lb_as_lookup_bypass(u32 vip_index, ip46_address_t *address, u8 is_disable)
-{
- /* lb_get_writer_lock(); */
- /* lb_main_t *lbm = &lb_main; */
- /* u32 as_index; */
- /* lb_as_t *as; */
- /* lb_vip_t *vip; */
-
- /* if (!(vip = lb_vip_get_by_index(vip_index)) || */
- /* lb_as_find_index_vip(vip, address, &as_index)) { */
- /* lb_put_writer_lock(); */
- /* return VNET_API_ERROR_NO_SUCH_ENTRY; */
- /* } */
-
- /* as = &lbm->ass[as_index]; */
-
- /* if (is_disable) { */
- /* as->adj_index = ~0; */
- /* } else if (lb_vip_is_gre4(vip)) { */
- /* uword *p = ip4_get_route (&ip4_main, 0, 0, as->address.ip4.as_u8, 32); */
- /* if (p == 0) { */
- /* lb_put_writer_lock(); */
- /* return VNET_API_ERROR_NO_SUCH_ENTRY; */
- /* } */
- /* u32 ai = (u32)p[0]; */
- /* ip_lookup_main_t *lm4 = &ip4_main.lookup_main; */
- /* ip_adjacency_t *adj4 = ip_get_adjacency (lm4, ai); */
- /* if (adj4->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) { */
- /* lb_put_writer_lock(); */
- /* return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; */
- /* } */
-
- /* as->adj_index = ai; */
- /* } else { */
- /* u32 ai = ip6_get_route (&ip6_main, 0, 0, &as->address.ip6, 128); */
- /* if (ai == 0) { */
- /* lb_put_writer_lock(); */
- /* return VNET_API_ERROR_NO_SUCH_ENTRY; */
- /* } */
-
- /* ip_lookup_main_t *lm6 = &ip6_main.lookup_main; */
- /* ip_adjacency_t *adj6 = ip_get_adjacency (lm6, ai); */
- /* if (adj6->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) { */
- /* lb_put_writer_lock(); */
- /* return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; */
- /* } */
-
- /* as->adj_index = ai; */
- /* } */
- /* lb_put_writer_lock(); */
- return 0;
-}
-
-
/**
* Add the VIP adjacency to the ip4 or ip6 fib
*/
static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
{
- /* ip_adjacency_t adj; */
- /* //Adjacency */
- /* memset (&adj, 0, sizeof (adj)); */
- /* adj.explicit_fib_index = ~0; */
- /* lb_adj_data_t *ad = (lb_adj_data_t *) &adj.opaque; */
- /* ad->vip_index = vip - lbm->vips; */
-
- /* ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned */
- /* u32 lookup_next_index = lbm->ip_lookup_next_index[vip->type]; */
-
- /* if (lb_vip_is_ip4(vip)) { */
- /* adj.lookup_next_index = lookup_next_index; */
- /* ip4_add_del_route_args_t route_args = {}; */
- /* ip4_main_t *im4 = &ip4_main; */
- /* route_args.table_index_or_table_id = 0; */
- /* route_args.flags = IP4_ROUTE_FLAG_ADD; */
- /* route_args.dst_address = vip->prefix.ip4; */
- /* route_args.dst_address_length = vip->plen - 96; */
- /* route_args.adj_index = ~0; */
- /* route_args.add_adj = &adj; */
- /* route_args.n_add_adj = 1; */
- /* ip4_add_del_route (im4, &route_args); */
- /* } else { */
- /* adj.lookup_next_index = lookup_next_index; */
- /* ip6_add_del_route_args_t route_args = {}; */
- /* ip6_main_t *im6 = &ip6_main; */
- /* route_args.table_index_or_table_id = 0; */
- /* route_args.flags = IP6_ROUTE_FLAG_ADD; */
- /* route_args.dst_address = vip->prefix.ip6; */
- /* route_args.dst_address_length = vip->plen; */
- /* route_args.adj_index = ~0; */
- /* route_args.add_adj = &adj; */
- /* route_args.n_add_adj = 1; */
- /* ip6_add_del_route (im6, &route_args); */
- /* } */
+ dpo_proto_t proto = 0;
+ dpo_id_t dpo = DPO_NULL;
+ fib_prefix_t pfx = {};
+ if (lb_vip_is_ip4(vip)) {
+ pfx.fp_addr.ip4 = vip->prefix.ip4;
+ pfx.fp_len = vip->plen - 96;
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ proto = DPO_PROTO_IP4;
+ } else {
+ pfx.fp_addr.ip6 = vip->prefix.ip6;
+ pfx.fp_len = vip->plen;
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ proto = DPO_PROTO_IP6;
+ }
+ dpo_set(&dpo, lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type,
+ proto, vip - lbm->vips);
+ fib_table_entry_special_dpo_add(0,
+ &pfx,
+ FIB_SOURCE_PLUGIN_HI,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+ dpo_reset(&dpo);
}
/**
@@ -636,30 +620,17 @@ static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
*/
static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
{
- /* ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned */
- /* if (lb_vip_is_ip4(vip)) { */
- /* ip4_main_t *im4 = &ip4_main; */
- /* ip4_add_del_route_args_t route_args = {}; */
- /* route_args.table_index_or_table_id = 0; */
- /* route_args.flags = IP4_ROUTE_FLAG_DEL; */
- /* route_args.dst_address = vip->prefix.ip4; */
- /* route_args.dst_address_length = vip->plen - 96; */
- /* route_args.adj_index = ~0; */
- /* route_args.add_adj = NULL; */
- /* route_args.n_add_adj = 0; */
- /* ip4_add_del_route (im4, &route_args); */
- /* } else { */
- /* ip6_main_t *im6 = &ip6_main; */
- /* ip6_add_del_route_args_t route_args = {}; */
- /* route_args.table_index_or_table_id = 0; */
- /* route_args.flags = IP6_ROUTE_FLAG_DEL; */
- /* route_args.dst_address = vip->prefix.ip6; */
- /* route_args.dst_address_length = vip->plen; */
- /* route_args.adj_index = ~0; */
- /* route_args.add_adj = NULL; */
- /* route_args.n_add_adj = 0; */
- /* ip6_add_del_route (im6, &route_args); */
- /* } */
+ fib_prefix_t pfx = {};
+ if (lb_vip_is_ip4(vip)) {
+ pfx.fp_addr.ip4 = vip->prefix.ip4;
+ pfx.fp_len = vip->plen - 96;
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ } else {
+ pfx.fp_addr.ip6 = vip->prefix.ip6;
+ pfx.fp_len = vip->plen;
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ }
+ fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI);
}
int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u32 new_length, u32 *vip_index)
@@ -766,12 +737,76 @@ vlib_plugin_register (vlib_main_t * vm,
return error;
}
+
+u8 *format_lb_dpo (u8 * s, va_list * va)
+{
+ index_t index = va_arg (*va, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip = pool_elt_at_index (lbm->vips, index);
+ return format (s, "%U", format_lb_vip, vip);
+}
+
+static void lb_dpo_lock (dpo_id_t *dpo) {}
+static void lb_dpo_unlock (dpo_id_t *dpo) {}
+
+static fib_node_t *
+lb_fib_node_get_node (fib_node_index_t index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_as_t *as = pool_elt_at_index (lbm->ass, index);
+ return (&as->fib_node);
+}
+
+static void
+lb_fib_node_last_lock_gone (fib_node_t *node)
+{
+}
+
+static lb_as_t *
+lb_as_from_fib_node (fib_node_t *node)
+{
+ return ((lb_as_t*)(((char*)node) -
+ STRUCT_OFFSET_OF(lb_as_t, fib_node)));
+}
+
+static void
+lb_as_stack (lb_as_t *as)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip = &lbm->vips[as->vip_index];
+ dpo_stack(lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type,
+ lb_vip_is_ip4(vip)?DPO_PROTO_IP4:DPO_PROTO_IP6,
+ &as->dpo,
+ fib_entry_contribute_ip_forwarding(
+ as->next_hop_fib_entry_index));
+}
+
+static fib_node_back_walk_rc_t
+lb_fib_node_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ lb_as_stack(lb_as_from_fib_node(node));
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
clib_error_t *
lb_init (vlib_main_t * vm)
{
vlib_thread_main_t *tm = vlib_get_thread_main ();
lb_main_t *lbm = &lb_main;
lb_as_t *default_as;
+ fib_node_vft_t lb_fib_node_vft = {
+ .fnv_get = lb_fib_node_get_node,
+ .fnv_last_lock = lb_fib_node_last_lock_gone,
+ .fnv_back_walk = lb_fib_node_back_walk_notify,
+ };
+ dpo_vft_t lb_vft = {
+ .dv_lock = lb_dpo_lock,
+ .dv_unlock = lb_dpo_unlock,
+ .dv_format = format_lb_dpo,
+ };
+
lbm->vips = 0;
lbm->per_cpu = 0;
vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
@@ -782,6 +817,9 @@ lb_init (vlib_main_t * vm)
lbm->ip4_src_address.as_u32 = 0xffffffff;
lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL;
lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
+ lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes);
+ lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes);
+ lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft);
//Init AS reference counters
vlib_refcount_init(&lbm->as_refcount);
@@ -790,7 +828,7 @@ lb_init (vlib_main_t * vm)
lbm->ass = 0;
pool_get(lbm->ass, default_as);
default_as->flags = 0;
- default_as->adj_index = ~0;
+ default_as->dpo.dpoi_next_node = LB_NEXT_DROP;
default_as->vip_index = ~0;
default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;
diff --git a/plugins/lb-plugin/lb/lb.h b/plugins/lb-plugin/lb/lb.h
index 14a4d8a39e7..09cfde3e378 100644
--- a/plugins/lb-plugin/lb/lb.h
+++ b/plugins/lb-plugin/lb/lb.h
@@ -35,18 +35,30 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/fib/fib_table.h>
#include <lb/lbhash.h>
#define LB_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10
#define LB_DEFAULT_FLOW_TIMEOUT 40
+typedef enum {
+ LB_NEXT_DROP,
+ LB_N_NEXT,
+} lb_next_t;
+
/**
* Each VIP is configured with a set of
* application server.
*/
typedef struct {
/**
+ * Registration to FIB event.
+ */
+ fib_node_t fib_node;
+
+ /**
* Destination address used to tunnel traffic towards
* that application server.
* The address is also used as ID and pseudo-random
@@ -55,13 +67,6 @@ typedef struct {
ip46_address_t address;
/**
- * Second ip lookup can be avoided by sending directly the packet
- * to ip-rewrite with a configured adjacency.
- * When set to ~0, the packets are sent to ip6-lookup.
- */
- u32 adj_index;
-
- /**
* ASs are indexed by address and VIP Index.
* Which means there will be duplicated if the same server
* address is used for multiple VIPs.
@@ -86,6 +91,22 @@ typedef struct {
* may happen.
*/
u32 last_used;
+
+ /**
+ * The FIB entry index for the next-hop
+ */
+ fib_node_index_t next_hop_fib_entry_index;
+
+ /**
+ * The child index on the FIB entry
+ */
+ u32 next_hop_child_index;
+
+ /**
+ * The next DPO in the graph to follow.
+ */
+ dpo_id_t dpo;
+
} lb_as_t;
format_function_t format_lb_as;
@@ -180,15 +201,13 @@ typedef struct {
* in the adjacency index.
*/
u8 flags;
+#define LB_VIP_FLAGS_USED 0x1
/**
* Pool of AS indexes used for this VIP.
* This also includes ASs that have been removed (but are still referenced).
*/
u32 *as_indexes;
-
-#define LB_VIP_FLAGS_USED 0x1
-
} lb_vip_t;
#define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 || (vip)->type == LB_VIP_TYPE_IP4_GRE4)
@@ -261,6 +280,17 @@ typedef struct {
vlib_simple_counter_main_t vip_counters[LB_N_VIP_COUNTERS];
/**
+ * DPO used to send packet from IP4/6 lookup to LB node.
+ */
+ dpo_type_t dpo_gre4_type;
+ dpo_type_t dpo_gre6_type;
+
+ /**
+ * Node type for registering to fib changes.
+ */
+ fib_node_type_t fib_node_type;
+
+ /**
* API dynamically registered base ID.
*/
u16 msg_id_base;
@@ -268,16 +298,6 @@ typedef struct {
volatile u32 *writer_lock;
} lb_main_t;
-/**
- * struct stored in adj->opaque data.
- */
-typedef struct {
- /**
- * Index of the VIP associated with that IP adjacency.
- */
- u32 vip_index;
-} lb_adj_data_t;
-
extern lb_main_t lb_main;
extern vlib_node_registration_t lb6_node;
extern vlib_node_registration_t lb4_node;
@@ -302,12 +322,6 @@ int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index);
int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
-/**
- * Updates the adjacency index stored in the AS such that the second
- * IP lookup (after encap) can be bypassed.
- */
-int lb_as_lookup_bypass(u32 vip_index, ip46_address_t *address, u8 is_disable);
-
u32 lb_hash_time_now(vlib_main_t * vm);
void lb_garbage_collection();
diff --git a/plugins/lb-plugin/lb/node.c b/plugins/lb-plugin/lb/node.c
index c51a2108a0f..77beaac9bb2 100644
--- a/plugins/lb-plugin/lb/node.c
+++ b/plugins/lb-plugin/lb/node.c
@@ -36,28 +36,11 @@ static char *lb_error_strings[] = {
#undef _
};
-typedef enum {
- LB_NEXT_LOOKUP,
- LB_NEXT_REWRITE,
- LB_NEXT_DROP,
- LB_N_NEXT,
-} lb_next_t;
-
typedef struct {
u32 vip_index;
u32 as_index;
} lb_trace_t;
-/* u8 *lb_format_adjacency(u8 * s, va_list * va) */
-/* { */
-/* lb_main_t *lbm = &lb_main; */
-/* __attribute((unused)) ip_lookup_main_t *lm = va_arg (*va, ip_lookup_main_t *); */
-/* ip_adjacency_t *adj = va_arg (*va, ip_adjacency_t *); */
-/* lb_adj_data_t *ad = (lb_adj_data_t *) &adj->opaque; */
-/* __attribute__((unused)) lb_vip_t *vip = pool_elt_at_index (lbm->vips, ad->vip_index); */
-/* return format(s, "vip idx:%d", ad->vip_index); */
-/* } */
-
u8 *
format_lb_trace (u8 * s, va_list * args)
{
@@ -108,169 +91,162 @@ lb_node_fn (vlib_main_t * vm,
u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6)
{
- /* ip_lookup_main_t *lm = (is_input_v4)?&ip4_main.lookup_main:&ip6_main.lookup_main; */
- /* lb_main_t *lbm = &lb_main; */
- /* vlib_node_runtime_t *error_node = node; */
- /* u32 n_left_from, *from, next_index, *to_next, n_left_to_next; */
- /* u32 cpu_index = os_get_cpu_number(); */
- /* u32 lb_time = lb_hash_time_now(vm); */
-
- /* lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); */
- /* from = vlib_frame_vector_args (frame); */
- /* n_left_from = frame->n_vectors; */
- /* next_index = node->cached_next_index; */
-
- /* while (n_left_from > 0) */
- /* { */
- /* vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); */
- /* while (n_left_from > 0 && n_left_to_next > 0) */
- /* { */
- /* u32 pi0; */
- /* vlib_buffer_t *p0; */
- /* ip_adjacency_t *adj0; */
- /* lb_adj_data_t *ad0; */
- /* lb_vip_t *vip0; */
- /* lb_as_t *as0; */
- /* gre_header_t *gre0; */
- /* u16 len0; */
- /* u32 value0, available_index0, hash0; */
- /* u64 key0[5]; */
- /* lb_error_t error0 = LB_ERROR_NONE; */
- /* lb_next_t next0 = LB_NEXT_LOOKUP; */
-
- /* if (PREDICT_TRUE(n_left_from > 1)) */
- /* { */
- /* vlib_buffer_t *p2; */
- /* p2 = vlib_get_buffer(vm, from[1]); */
- /* vlib_prefetch_buffer_header(p2, STORE); */
- /* /\* IPv4 + 8 = 28. possibly plus -40 *\/ */
- /* CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE); */
- /* } */
-
- /* pi0 = to_next[0] = from[0]; */
- /* from += 1; */
- /* n_left_from -= 1; */
- /* to_next += 1; */
- /* n_left_to_next -= 1; */
-
- /* p0 = vlib_get_buffer (vm, pi0); */
- /* adj0 = ip_get_adjacency (lm, vnet_buffer (p0)->ip.adj_index[VLIB_TX]); */
- /* ad0 = (lb_adj_data_t *) &adj0->opaque; */
- /* vip0 = pool_elt_at_index (lbm->vips, ad0->vip_index); */
-
- /* if (is_input_v4) { */
- /* ip4_header_t *ip40; */
- /* ip40 = vlib_buffer_get_current (p0); */
- /* len0 = clib_net_to_host_u16(ip40->length); */
- /* key0[0] = (u64) ip40->src_address.as_u32; */
- /* key0[1] = (u64) ip40->dst_address.as_u32; */
- /* key0[2] = 0; */
- /* key0[3] = 0; */
- /* key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) | */
- /* ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16); */
-
- /* hash0 = lb_hash_hash(key0); */
- /* } else { */
- /* ip6_header_t *ip60; */
- /* ip60 = vlib_buffer_get_current (p0); */
- /* len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t); */
- /* key0[0] = ip60->src_address.as_u64[0]; */
- /* key0[1] = ip60->src_address.as_u64[1]; */
- /* key0[2] = ip60->dst_address.as_u64[0]; */
- /* key0[3] = ip60->dst_address.as_u64[1]; */
- /* key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) | */
- /* ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16); */
-
- /* hash0 = lb_hash_hash(key0); */
- /* } */
-
- /* //NOTE: This is an ugly trick to not include the VIP index in the hash calculation */
- /* //but actually use it in the key determination. */
- /* key0[4] |= ((vip0 - lbm->vips)); */
-
- /* lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0); */
- /* if (PREDICT_TRUE(value0 != ~0)) { */
- /* //Found an existing entry */
- /* as0 = &lbm->ass[value0]; */
- /* } else if (PREDICT_TRUE(available_index0 != ~0)) { */
- /* //There is an available slot for a new flow */
- /* as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */
- /* if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element */
- /* error0 = LB_ERROR_NO_SERVER; */
- /* next0 = LB_NEXT_DROP; */
- /* } else { */
- /* vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION], */
- /* cpu_index, vip0 - lbm->vips, 1); */
- /* } */
-
- /* //TODO: There are race conditions with as0 and vip0 manipulation. */
- /* //Configuration may be changed, vectors resized, etc... */
-
- /* //Dereference previously used */
- /* vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1); */
- /* vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1); */
-
- /* //Add sticky entry */
- /* //Note that when there is no AS configured, an entry is configured anyway. */
- /* //But no configured AS is not something that should happen */
- /* lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time); */
- /* } else { */
- /* //Could not store new entry in the table */
- /* as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index]; */
- /* vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET], */
- /* cpu_index, vip0 - lbm->vips, 1); */
- /* } */
-
- /* //Now let's encap */
- /* if (is_encap_v4) { */
- /* ip4_header_t *ip40; */
- /* vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t)); */
- /* ip40 = vlib_buffer_get_current(p0); */
- /* gre0 = (gre_header_t *)(ip40 + 1); */
- /* ip40->src_address = lbm->ip4_src_address; */
- /* ip40->dst_address = as0->address.ip4; */
- /* ip40->ip_version_and_header_length = 0x45; */
- /* ip40->ttl = 128; */
- /* ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t)); */
- /* ip40->protocol = IP_PROTOCOL_GRE; */
- /* ip40->checksum = ip4_header_checksum (ip40); */
- /* } else { */
- /* ip6_header_t *ip60; */
- /* vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t)); */
- /* ip60 = vlib_buffer_get_current(p0); */
- /* gre0 = (gre_header_t *)(ip60 + 1); */
- /* ip60->dst_address = as0->address.ip6; */
- /* ip60->src_address = lbm->ip6_src_address; */
- /* ip60->hop_limit = 128; */
- /* ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28); */
- /* ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t)); */
- /* ip60->protocol = IP_PROTOCOL_GRE; */
- /* } */
-
- /* gre0->flags_and_version = 0; */
- /* gre0->protocol = (is_input_v4)? */
- /* clib_host_to_net_u16(0x0800): */
- /* clib_host_to_net_u16(0x86DD); */
-
- /* vnet_buffer(p0)->ip.adj_index[VLIB_TX] = as0->adj_index; */
- /* next0 = (as0->adj_index != ~0)?LB_NEXT_REWRITE:next0; */
-
- /* if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) */
- /* { */
- /* lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); */
- /* tr->as_index = as0 - lbm->ass; */
- /* tr->vip_index = ad0->vip_index; */
- /* } */
-
- /* p0->error = error_node->errors[error0]; */
- /* vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, */
- /* n_left_to_next, pi0, next0); */
- /* } */
- /* vlib_put_next_frame (vm, node, next_index, n_left_to_next); */
- /* } */
-
- /* return frame->n_vectors; */
- return 0;
+ lb_main_t *lbm = &lb_main;
+ vlib_node_runtime_t *error_node = node;
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ u32 cpu_index = os_get_cpu_number();
+ u32 lb_time = lb_hash_time_now(vm);
+
+ lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ lb_vip_t *vip0;
+ lb_as_t *as0;
+ gre_header_t *gre0;
+ u16 len0;
+ u32 value0, available_index0, hash0;
+ u64 key0[5];
+ lb_error_t error0 = LB_ERROR_NONE;
+
+ if (PREDICT_TRUE(n_left_from > 1))
+ {
+ vlib_buffer_t *p2;
+ p2 = vlib_get_buffer(vm, from[1]);
+ vlib_prefetch_buffer_header(p2, STORE);
+ /* IPv4 + 8 = 28. possibly plus -40 */
+ CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ vip0 = pool_elt_at_index (lbm->vips,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
+
+ if (is_input_v4) {
+ ip4_header_t *ip40;
+ ip40 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip40->length);
+ key0[0] = (u64) ip40->src_address.as_u32;
+ key0[1] = (u64) ip40->dst_address.as_u32;
+ key0[2] = 0;
+ key0[3] = 0;
+ key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) |
+ ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16);
+
+ hash0 = lb_hash_hash(key0);
+ } else {
+ ip6_header_t *ip60;
+ ip60 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
+ key0[0] = ip60->src_address.as_u64[0];
+ key0[1] = ip60->src_address.as_u64[1];
+ key0[2] = ip60->dst_address.as_u64[0];
+ key0[3] = ip60->dst_address.as_u64[1];
+ key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) |
+ ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16);
+
+ hash0 = lb_hash_hash(key0);
+ }
+
+ //NOTE: This is an ugly trick to not include the VIP index in the hash calculation
+ //but actually use it in the key determination.
+ key0[4] |= ((vip0 - lbm->vips));
+
+ lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0);
+ if (PREDICT_TRUE(value0 != ~0)) {
+ //Found an existing entry
+ as0 = &lbm->ass[value0];
+ } else if (PREDICT_TRUE(available_index0 != ~0)) {
+ //There is an available slot for a new flow
+ as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
+ if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element
+ error0 = LB_ERROR_NO_SERVER;
+ } else {
+ vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION],
+ cpu_index, vip0 - lbm->vips, 1);
+ }
+
+ //TODO: There are race conditions with as0 and vip0 manipulation.
+ //Configuration may be changed, vectors resized, etc...
+
+ //Dereference previously used
+ vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1);
+ vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1);
+
+ //Add sticky entry
+ //Note that when there is no AS configured, an entry is configured anyway.
+ //But no configured AS is not something that should happen
+ lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time);
+ } else {
+ //Could not store new entry in the table
+ as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
+ vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET],
+ cpu_index, vip0 - lbm->vips, 1);
+ }
+
+ //Now let's encap
+ if (is_encap_v4) {
+ ip4_header_t *ip40;
+ vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
+ ip40 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip40 + 1);
+ ip40->src_address = lbm->ip4_src_address;
+ ip40->dst_address = as0->address.ip4;
+ ip40->ip_version_and_header_length = 0x45;
+ ip40->ttl = 128;
+ ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
+ ip40->protocol = IP_PROTOCOL_GRE;
+ ip40->checksum = ip4_header_checksum (ip40);
+ } else {
+ ip6_header_t *ip60;
+ vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
+ ip60 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip60 + 1);
+ ip60->dst_address = as0->address.ip6;
+ ip60->src_address = lbm->ip6_src_address;
+ ip60->hop_limit = 128;
+ ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
+ ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
+ ip60->protocol = IP_PROTOCOL_GRE;
+ }
+
+ gre0->flags_and_version = 0;
+ gre0->protocol = (is_input_v4)?
+ clib_host_to_net_u16(0x0800):
+ clib_host_to_net_u16(0x86DD);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = as0->dpo.dpoi_index;
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->as_index = as0 - lbm->ass;
+ tr->vip_index = vip0 - lbm->vips;
+ }
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0,
+ as0->dpo.dpoi_next_node);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
}
static uword
@@ -314,18 +290,10 @@ VLIB_REGISTER_NODE (lb6_gre6_node) =
.n_next_nodes = LB_N_NEXT,
.next_nodes =
{
- [LB_NEXT_LOOKUP] = "ip6-lookup",
- [LB_NEXT_REWRITE] = "ip6-rewrite",
[LB_NEXT_DROP] = "error-drop"
},
};
-/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre6) = { */
-/* .node_name = "lb6-gre6", */
-/* .fn = lb_format_adjacency, */
-/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE6] */
-/* }; */
-
VLIB_REGISTER_NODE (lb6_gre4_node) =
{
.function = lb6_gre4_node_fn,
@@ -339,18 +307,10 @@ VLIB_REGISTER_NODE (lb6_gre4_node) =
.n_next_nodes = LB_N_NEXT,
.next_nodes =
{
- [LB_NEXT_LOOKUP] = "ip4-lookup",
- [LB_NEXT_REWRITE]= "ip4-rewrite-transit",
[LB_NEXT_DROP] = "error-drop"
},
};
-/* VNET_IP6_REGISTER_ADJACENCY(lb6_gre4) = { */
-/* .node_name = "lb6-gre4", */
-/* .fn = lb_format_adjacency, */
-/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP6_GRE4] */
-/* }; */
-
VLIB_REGISTER_NODE (lb4_gre6_node) =
{
.function = lb4_gre6_node_fn,
@@ -364,18 +324,10 @@ VLIB_REGISTER_NODE (lb4_gre6_node) =
.n_next_nodes = LB_N_NEXT,
.next_nodes =
{
- [LB_NEXT_LOOKUP] = "ip6-lookup",
- [LB_NEXT_REWRITE] = "ip6-rewrite",
[LB_NEXT_DROP] = "error-drop"
},
};
-/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre6) = { */
-/* .node_name = "lb4-gre6", */
-/* .fn = lb_format_adjacency, */
-/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE6] */
-/* }; */
-
VLIB_REGISTER_NODE (lb4_gre4_node) =
{
.function = lb4_gre4_node_fn,
@@ -389,14 +341,7 @@ VLIB_REGISTER_NODE (lb4_gre4_node) =
.n_next_nodes = LB_N_NEXT,
.next_nodes =
{
- [LB_NEXT_LOOKUP] = "ip4-lookup",
- [LB_NEXT_REWRITE]= "ip4-rewrite-transit",
[LB_NEXT_DROP] = "error-drop"
},
};
-/* VNET_IP4_REGISTER_ADJACENCY(lb4_gre4) = { */
-/* .node_name = "lb4-gre4", */
-/* .fn = lb_format_adjacency, */
-/* .next_index = &lb_main.ip_lookup_next_index[LB_VIP_TYPE_IP4_GRE4] */
-/* }; */