diff options
Diffstat (limited to 'plugins/lb-plugin')
-rw-r--r-- | plugins/lb-plugin/Makefile.am | 55 | ||||
-rw-r--r-- | plugins/lb-plugin/configure.ac | 9 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/api.c | 228 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/cli.c | 250 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/lb.api | 71 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/lb.c | 844 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/lb.h | 333 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/lb_test.c | 293 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/lbhash.h | 216 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/node.c | 419 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/refcount.c | 41 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/refcount.h | 67 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/util.c | 72 | ||||
-rw-r--r-- | plugins/lb-plugin/lb/util.h | 40 | ||||
-rw-r--r-- | plugins/lb-plugin/lb_plugin_doc.md | 141 |
15 files changed, 0 insertions, 3079 deletions
diff --git a/plugins/lb-plugin/Makefile.am b/plugins/lb-plugin/Makefile.am deleted file mode 100644 index 8e36027949e..00000000000 --- a/plugins/lb-plugin/Makefile.am +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2016 Cisco Systems, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -AUTOMAKE_OPTIONS = foreign subdir-objects - -AM_CFLAGS = -Wall -AM_LDFLAGS = -module -shared -avoid-version - -vppapitestpluginsdir = ${libdir}/vpp_api_test_plugins -vpppluginsdir = ${libdir}/vpp_plugins - -vppapitestplugins_LTLIBRARIES = lb_test_plugin.la -vppplugins_LTLIBRARIES = lb_plugin.la - -lb_plugin_la_SOURCES = lb/lb.c lb/node.c lb/cli.c lb/util.c lb/refcount.c lb/api.c - -BUILT_SOURCES = \ - lb/lb.api.h \ - lb/lb.api.json - -SUFFIXES = .api.h .api .api.json - -%.api.h: %.api - mkdir -p `dirname $@` ; \ - $(CC) $(CPPFLAGS) -E -P -C -x c $^ \ - | vppapigen --input - --output $@ --show-name $@ - -%.api.json: %.api - @echo " JSON APIGEN " $@ ; \ - mkdir -p `dirname $@` ; \ - $(CC) $(CPPFLAGS) -E -P -C -x c $^ \ - | vppapigen --input - --json $@ - -apidir = $(prefix)/lb/ -api_DATA = lb/lb.api.json - -noinst_HEADERS = lb/lb.h lb/util.h lb/refcount.h lb/lbhash.h lb/lb.api.h - -lb_test_plugin_la_SOURCES = \ - lb/lb_test.c lb/lb_plugin.api.h - -# Remove *.la files -install-data-hook: - @(cd $(vpppluginsdir) && $(RM) $(vppplugins_LTLIBRARIES)) - @(cd $(vppapitestpluginsdir) && $(RM) $(vppapitestplugins_LTLIBRARIES)) diff --git a/plugins/lb-plugin/configure.ac b/plugins/lb-plugin/configure.ac deleted file mode 100644 index 1b02e54f5d1..00000000000 --- a/plugins/lb-plugin/configure.ac +++ /dev/null @@ -1,9 +0,0 @@ -AC_INIT(lb_plugin, 1.0) -AM_INIT_AUTOMAKE -AM_SILENT_RULES([yes]) -AC_PREFIX_DEFAULT([/usr]) - -AC_PROG_LIBTOOL -AC_PROG_CC - -AC_OUTPUT([Makefile]) diff --git a/plugins/lb-plugin/lb/api.c b/plugins/lb-plugin/lb/api.c deleted file mode 100644 index 06c53fa1005..00000000000 --- a/plugins/lb-plugin/lb/api.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <lb/lb.h> - -#include <vppinfra/byte_order.h> -#include <vlibapi/api.h> -#include <vlibapi/api.h> -#include <vlibmemory/api.h> -#include <vlibsocket/api.h> - -#define vl_msg_id(n,h) n, -typedef enum { -#include <lb/lb.api.h> - /* We'll want to know how many messages IDs we need... */ - VL_MSG_FIRST_AVAILABLE, -} vl_msg_id_t; -#undef vl_msg_id - - -/* define message structures */ -#define vl_typedefs -#include <lb/lb.api.h> -#undef vl_typedefs - -/* define generated endian-swappers */ -#define vl_endianfun -#include <lb/lb.api.h> -#undef vl_endianfun - -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) - -/* Get the API version number */ -#define vl_api_version(n,v) static u32 api_version=(v); -#include <lb/lb.api.h> -#undef vl_api_version - -#define vl_msg_name_crc_list -#include <lb/lb.api.h> -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (lb_main_t * lbm, api_main_t * am) -{ -#define _(id,n,crc) \ - vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + lbm->msg_id_base); - foreach_vl_msg_name_crc_lb; -#undef _ -} - -/* Macro to finish up custom dump fns */ -#define FINISH \ - vec_add1 (s, 0); \ - vl_print (handle, (char *)s); \ - vec_free (s); \ - return handle; - -/* - * A handy macro to set up a message reply. - * Assumes that the following variables are available: - * mp - pointer to request message - * rmp - pointer to reply message type - * rv - return value - */ - -#define REPLY_MACRO(t) \ -do { \ - unix_shared_memory_queue_t * q = \ - vl_api_client_index_to_input_queue (mp->client_index); \ - if (!q) \ - return; \ - \ - rmp = vl_msg_api_alloc (sizeof (*rmp)); \ - rmp->_vl_msg_id = ntohs((t)+lbm->msg_id_base); \ - rmp->context = mp->context; \ - rmp->retval = ntohl(rv); \ - \ - vl_msg_api_send_shmem (q, (u8 *)&rmp); \ -} while(0); - -static void -vl_api_lb_conf_t_handler -(vl_api_lb_conf_t * mp) -{ - lb_main_t *lbm = &lb_main; - vl_api_lb_conf_reply_t * rmp; - int rv = 0; - - rv = lb_conf((ip4_address_t *)&mp->ip4_src_address, - (ip6_address_t *)mp->ip6_src_address, - mp->sticky_buckets_per_core, - mp->flow_timeout); - - REPLY_MACRO (VL_API_LB_CONF_REPLY); -} - -static void *vl_api_lb_conf_t_print -(vl_api_lb_conf_t *mp, void * handle) -{ - u8 * s; - s = format (0, "SCRIPT: lb_conf "); - s = format (s, "%U ", format_ip4_address, (ip4_address_t *)&mp->ip4_src_address); - s = format (s, "%U ", format_ip6_address, (ip6_address_t *)mp->ip6_src_address); - s = format (s, "%u ", mp->sticky_buckets_per_core); - s = format (s, "%u ", mp->flow_timeout); - FINISH; -} - - -static void -vl_api_lb_add_del_vip_t_handler -(vl_api_lb_add_del_vip_t * mp) -{ - lb_main_t *lbm = &lb_main; - vl_api_lb_conf_reply_t * rmp; - int rv = 0; - ip46_address_t prefix; - memcpy(&prefix.ip6, mp->ip_prefix, sizeof(prefix.ip6)); - - if (mp->is_del) { - u32 vip_index; - if (!(rv = lb_vip_find_index(&prefix, mp->prefix_length, &vip_index))) - rv = lb_vip_del(vip_index); - } else { - u32 vip_index; - lb_vip_type_t type; - if (ip46_prefix_is_ip4(&prefix, mp->prefix_length)) { - type = mp->is_gre4?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6; - } else { - type = mp->is_gre4?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6; - } - - rv = lb_vip_add(&prefix, mp->prefix_length, type, - mp->new_flows_table_length, &vip_index); - } - REPLY_MACRO (VL_API_LB_CONF_REPLY); -} - -static void *vl_api_lb_add_del_vip_t_print -(vl_api_lb_add_del_vip_t *mp, void * handle) -{ - u8 * s; - s = format (0, "SCRIPT: lb_add_del_vip "); - s = format (s, "%U ", format_ip46_prefix, - (ip46_address_t *)mp->ip_prefix, mp->prefix_length, IP46_TYPE_ANY); - s = format (s, "%s ", mp->is_gre4?"gre4":"gre6"); - s = format (s, "%u ", mp->new_flows_table_length); - s = format (s, "%s ", mp->is_del?"del":"add"); - FINISH; -} - -static void -vl_api_lb_add_del_as_t_handler -(vl_api_lb_add_del_as_t * mp) -{ - lb_main_t *lbm = &lb_main; - vl_api_lb_conf_reply_t * rmp; - int rv = 0; - u32 vip_index; - if ((rv = lb_vip_find_index((ip46_address_t *)mp->vip_ip_prefix, - mp->vip_prefix_length, &vip_index))) - goto done; - - if (mp->is_del) - rv = lb_vip_del_ass(vip_index, (ip46_address_t *)mp->as_address, 1); - else - rv = lb_vip_add_ass(vip_index, (ip46_address_t *)mp->as_address, 1); - -done: - REPLY_MACRO (VL_API_LB_CONF_REPLY); -} - -static void *vl_api_lb_add_del_as_t_print -(vl_api_lb_add_del_as_t *mp, void * handle) -{ - u8 * s; - s = format (0, "SCRIPT: lb_add_del_as "); - s = format (s, "%U ", format_ip46_prefix, - (ip46_address_t *)mp->vip_ip_prefix, mp->vip_prefix_length, IP46_TYPE_ANY); - s = format (s, "%U ", format_ip46_address, - (ip46_address_t *)mp->as_address, IP46_TYPE_ANY); - s = format (s, "%s ", mp->is_del?"del":"add"); - FINISH; -} - -/* List of message types that this plugin understands */ -#define foreach_lb_plugin_api_msg \ -_(LB_CONF, lb_conf) \ -_(LB_ADD_DEL_VIP, lb_add_del_vip) \ -_(LB_ADD_DEL_AS, lb_add_del_as) - -static clib_error_t * lb_api_init (vlib_main_t * vm) -{ - lb_main_t *lbm = &lb_main; - u8 *name = format (0, "lb_%08x%c", api_version, 0); - lbm->msg_id_base = vl_msg_api_get_msg_ids - ((char *) name, VL_MSG_FIRST_AVAILABLE); - -#define _(N,n) \ - vl_msg_api_set_handlers((VL_API_##N + lbm->msg_id_base), \ - #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_lb_plugin_api_msg; -#undef _ - - /* Add our API messages to the global name_crc hash table */ - setup_message_id_table (lbm, &api_main); - - return 0; -} - -VLIB_INIT_FUNCTION (lb_api_init); diff --git a/plugins/lb-plugin/lb/cli.c b/plugins/lb-plugin/lb/cli.c deleted file mode 100644 index b59c6426241..00000000000 --- a/plugins/lb-plugin/lb/cli.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <lb/lb.h> -#include <lb/util.h> - -static clib_error_t * -lb_vip_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - ip46_address_t prefix; - u8 plen; - u32 new_len = 1024; - u8 del = 0; - int ret; - u32 gre4 = 0; - lb_vip_type_t type; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY, &plen)) - return clib_error_return (0, "invalid vip prefix: '%U'", - format_unformat_error, line_input); - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat(line_input, "new_len %d", &new_len)) - ; - else if (unformat(line_input, "del")) - del = 1; - else if (unformat(line_input, "encap gre4")) - gre4 = 1; - else if (unformat(line_input, "encap gre6")) - gre4 = 0; - else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - } - - unformat_free (line_input); - - - if (ip46_prefix_is_ip4(&prefix, plen)) { - type = (gre4)?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6; - } else { - type = (gre4)?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6; - } - - lb_garbage_collection(); - - u32 index; - if (!del) { - if ((ret = lb_vip_add(&prefix, plen, type, new_len, &index))) { - return clib_error_return (0, "lb_vip_add error %d", ret); - } else { - vlib_cli_output(vm, "lb_vip_add ok %d", index); - } - } else { - if ((ret = lb_vip_find_index(&prefix, plen, &index))) - return clib_error_return (0, "lb_vip_find_index error %d", ret); - else if ((ret = lb_vip_del(index))) - return clib_error_return (0, "lb_vip_del error %d", ret); - } - return NULL; -} - -VLIB_CLI_COMMAND (lb_vip_command, static) = -{ - .path = "lb vip", - .short_help = "lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del]", - .function = lb_vip_command_fn, -}; - -static clib_error_t * -lb_as_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - ip46_address_t vip_prefix, as_addr; - u8 vip_plen; - ip46_address_t *as_array = 0; - u32 vip_index; - u8 del = 0; - int ret; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY)) - return clib_error_return (0, "invalid as address: '%U'", - format_unformat_error, line_input); - - if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index))) - return clib_error_return (0, "lb_vip_find_index error %d", ret); - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat(line_input, "%U", unformat_ip46_address, &as_addr, IP46_TYPE_ANY)) { - vec_add1(as_array, as_addr); - } else if (unformat(line_input, "del")) { - del = 1; - } else { - vec_free(as_array); - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - } - } - - if (!vec_len(as_array)) { - vec_free(as_array); - return clib_error_return (0, "No AS address provided"); - } - - lb_garbage_collection(); - clib_warning("vip index is %d", vip_index); - - if (del) { - if ((ret = lb_vip_del_ass(vip_index, as_array, vec_len(as_array)))) { - vec_free(as_array); - return clib_error_return (0, "lb_vip_del_ass error %d", ret); - } - } else { - if ((ret = lb_vip_add_ass(vip_index, as_array, vec_len(as_array)))) { - vec_free(as_array); - return clib_error_return (0, "lb_vip_add_ass error %d", ret); - } - } - - vec_free(as_array); - return 0; -} - -VLIB_CLI_COMMAND (lb_as_command, static) = -{ - .path = "lb as", - .short_help = "lb as <vip-prefix> [<address> [<address> [...]]] [del]", - .function = lb_as_command_fn, -}; - -static clib_error_t * -lb_conf_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - lb_main_t *lbm = &lb_main; - unformat_input_t _line_input, *line_input = &_line_input; - ip4_address_t ip4 = lbm->ip4_src_address; - ip6_address_t ip6 = lbm->ip6_src_address; - u32 per_cpu_sticky_buckets = lbm->per_cpu_sticky_buckets; - u32 per_cpu_sticky_buckets_log2 = 0; - u32 flow_timeout = lbm->flow_timeout; - int ret; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat(line_input, "ip4-src-address %U", unformat_ip4_address, &ip4)) - ; - else if (unformat(line_input, "ip6-src-address %U", unformat_ip6_address, &ip6)) - ; - else if (unformat(line_input, "buckets %d", &per_cpu_sticky_buckets)) - ; - else if (unformat(line_input, "buckets-log2 %d", &per_cpu_sticky_buckets_log2)) { - if (per_cpu_sticky_buckets_log2 >= 32) - return clib_error_return (0, "buckets-log2 value is too high"); - per_cpu_sticky_buckets = 1 << per_cpu_sticky_buckets_log2; - } else if (unformat(line_input, "timeout %d", &flow_timeout)) - ; - else - return clib_error_return (0, "parse error: '%U'", - format_unformat_error, line_input); - } - - unformat_free (line_input); - - lb_garbage_collection(); - - if ((ret = lb_conf(&ip4, &ip6, per_cpu_sticky_buckets, flow_timeout))) - return clib_error_return (0, "lb_conf error %d", ret); - - return NULL; -} - -VLIB_CLI_COMMAND (lb_conf_command, static) = -{ - .path = "lb conf", - .short_help = "lb conf [ip4-src-address <addr>] [ip6-src-address <addr>] [buckets <n>] [timeout <s>]", - .function = lb_conf_command_fn, -}; - -static clib_error_t * -lb_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vlib_cli_output(vm, "%U", format_lb_main); - return NULL; -} - - -VLIB_CLI_COMMAND (lb_show_command, static) = -{ - .path = "show lb", - .short_help = "show lb", - .function = lb_show_command_fn, -}; - -static clib_error_t * -lb_show_vips_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t line_input; - lb_main_t *lbm = &lb_main; - lb_vip_t *vip; - u8 verbose = 0; - - if (!unformat_user (input, unformat_line_input, &line_input)) - return 0; - - if (unformat(&line_input, "verbose")) - verbose = 1; - - pool_foreach(vip, lbm->vips, { - vlib_cli_output(vm, "%U\n", verbose?format_lb_vip_detailed:format_lb_vip, vip); - }); - - unformat_free (&line_input); - return NULL; -} - -VLIB_CLI_COMMAND (lb_show_vips_command, static) = -{ - .path = "show lb vips", - .short_help = "show lb vips [verbose]", - .function = lb_show_vips_command_fn, -}; diff --git a/plugins/lb-plugin/lb/lb.api b/plugins/lb-plugin/lb/lb.api deleted file mode 100644 index 39ee3c8f98b..00000000000 --- a/plugins/lb-plugin/lb/lb.api +++ /dev/null @@ -1,71 +0,0 @@ -/** \brief Configure Load-Balancer global parameters - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param ip4_src_address - IPv4 address to be used as source for IPv4 GRE traffic. - @param ip6_src_address - IPv6 address to be used as source for IPv6 GRE traffic. - @param n_sticky_buckets - Number of buckets *per worker thread* in the - established flow table (must be power of 2). - @param flow_timeout - Time in seconds after which, if no packet is received - for a given flow, the flow is removed from the established flow table. -*/ -define lb_conf -{ - u32 client_index; - u32 context; - u32 ip4_src_address; - u8 ip6_src_address[16]; - u32 sticky_buckets_per_core; - u32 flow_timeout; -}; - -define lb_conf_reply { - u32 context; - i32 retval; -}; - -/** \brief Add a virtual address (or prefix) - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param ip_prefix - IP address (IPv4 in lower order 32 bits). - @param prefix_length - IP prefix length (96 + 'IPv4 prefix length' for IPv4). - @param is_gre4 - Encap is ip4 GRE (ip6 GRE otherwise). - @param new_flows_table_length - Size of the new connections flow table used - for this VIP (must be power of 2). - @param is_del - The VIP should be removed. -*/ -define lb_add_del_vip { - u32 client_index; - u32 context; - u8 ip_prefix[16]; - u8 prefix_length; - u8 is_gre4; - u32 new_flows_table_length; - u8 is_del; -}; - -define lb_add_del_vip_reply { - u32 context; - i32 retval; -}; - -/** \brief Add an application server for a given VIP - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param vip_ip_prefix - VIP IP address (IPv4 in lower order 32 bits). - @param vip_ip_prefix - VIP IP prefix length (96 + 'IPv4 prefix length' for IPv4). - @param as_address - The application server address (IPv4 in lower order 32 bits). - @param is_del - The AS should be removed. -*/ -define lb_add_del_as { - u32 client_index; - u32 context; - u8 vip_ip_prefix[16]; - u8 vip_prefix_length; - u8 as_address[16]; - u8 is_del; -}; - -define lb_add_del_as_reply { - u32 context; - i32 retval; -}; diff --git a/plugins/lb-plugin/lb/lb.c b/plugins/lb-plugin/lb/lb.c deleted file mode 100644 index 1d9b987095b..00000000000 --- a/plugins/lb-plugin/lb/lb.c +++ /dev/null @@ -1,844 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <lb/lb.h> -#include <vnet/plugin/plugin.h> -#include <vnet/api_errno.h> - -//GC runs at most once every so many seconds -#define LB_GARBAGE_RUN 60 - -//After so many seconds. It is assumed that inter-core race condition will not occur. -#define LB_CONCURRENCY_TIMEOUT 10 - -lb_main_t lb_main; - -#define lb_get_writer_lock() do {} while(__sync_lock_test_and_set (lb_main.writer_lock, 1)) -#define lb_put_writer_lock() lb_main.writer_lock[0] = 0 - -static void lb_as_stack (lb_as_t *as); - - -const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL }; -const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL }; -const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] = - { - [DPO_PROTO_IP4] = lb_dpo_gre4_ip4, - [DPO_PROTO_IP6] = lb_dpo_gre4_ip6, - }; - -const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL }; -const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL }; -const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] = - { - [DPO_PROTO_IP4] = lb_dpo_gre6_ip4, - [DPO_PROTO_IP6] = lb_dpo_gre6_ip6, - }; - -u32 lb_hash_time_now(vlib_main_t * vm) -{ - return (u32) (vlib_time_now(vm) + 10000); -} - -u8 *format_lb_main (u8 * s, va_list * args) -{ - vlib_thread_main_t *tm = vlib_get_thread_main(); - lb_main_t *lbm = &lb_main; - s = format(s, "lb_main"); - s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address); - s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address); - s = format(s, " #vips: %u\n", pool_elts(lbm->vips)); - s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1); - - u32 cpu_index; - for(cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++ ) { - lb_hash_t *h = lbm->per_cpu[cpu_index].sticky_ht; - if (h) { - s = format(s, "core %d\n", cpu_index); - s = format(s, " timeout: %ds\n", h->timeout); - s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h)); - } - } - - return s; -} - -static char *lb_vip_type_strings[] = { - [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6", - [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4", - [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6", - [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4", -}; - -u8 *format_lb_vip_type (u8 * s, va_list * args) -{ - lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t); - u32 i; - for (i=0; i<LB_VIP_N_TYPES; i++) - if (vipt == i) - return format(s, lb_vip_type_strings[i]); - return format(s, "_WRONG_TYPE_"); -} - -uword unformat_lb_vip_type (unformat_input_t * input, va_list * args) -{ - lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *); - u32 i; - for (i=0; i<LB_VIP_N_TYPES; i++) - if (unformat(input, lb_vip_type_strings[i])) { - *vipt = i; - return 1; - } - return 0; -} - -u8 *format_lb_vip (u8 * s, va_list * args) -{ - lb_vip_t *vip = va_arg (*args, lb_vip_t *); - return format(s, "%U %U new_size:%u #as:%u%s", - format_lb_vip_type, vip->type, - format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY, - vip->new_flow_table_mask + 1, - pool_elts(vip->as_indexes), - (vip->flags & LB_VIP_FLAGS_USED)?"":" removed"); -} - -u8 *format_lb_as (u8 * s, va_list * args) -{ - lb_as_t *as = va_arg (*args, lb_as_t *); - return format(s, "%U %s", format_ip46_address, - &as->address, IP46_TYPE_ANY, - (as->flags & LB_AS_FLAGS_USED)?"used":"removed"); -} - -u8 *format_lb_vip_detailed (u8 * s, va_list * args) -{ - lb_main_t *lbm = &lb_main; - lb_vip_t *vip = va_arg (*args, lb_vip_t *); - uword indent = format_get_indent (s); - - s = format(s, "%U %U [%u] %U%s\n" - "%U new_size:%u\n", - format_white_space, indent, - format_lb_vip_type, vip->type, - vip - lbm->vips, format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY, - (vip->flags & LB_VIP_FLAGS_USED)?"":" removed", - format_white_space, indent, - vip->new_flow_table_mask + 1); - - //Print counters - s = format(s, "%U counters:\n", - format_white_space, indent); - u32 i; - for (i=0; i<LB_N_VIP_COUNTERS; i++) - s = format(s, "%U %s: %d\n", - format_white_space, indent, - lbm->vip_counters[i].name, - vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips)); - - - s = format(s, "%U #as:%u\n", - format_white_space, indent, - pool_elts(vip->as_indexes)); - - //Let's count the buckets for each AS - u32 *count = 0; - vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much... - lb_new_flow_entry_t *nfe; - vec_foreach(nfe, vip->new_flow_table) - count[nfe->as_index]++; - - lb_as_t *as; - u32 *as_index; - pool_foreach(as_index, vip->as_indexes, { - as = &lbm->ass[*as_index]; - s = format(s, "%U %U %d buckets %d flows dpo:%u %s\n", - format_white_space, indent, - format_ip46_address, &as->address, IP46_TYPE_ANY, - count[as - lbm->ass], - vlib_refcount_get(&lbm->as_refcount, as - lbm->ass), - as->dpo.dpoi_index, - (as->flags & LB_AS_FLAGS_USED)?"used":" removed"); - }); - - vec_free(count); - - /* - s = format(s, "%U new flows table:\n", format_white_space, indent); - lb_new_flow_entry_t *nfe; - vec_foreach(nfe, vip->new_flow_table) { - s = format(s, "%U %d: %d\n", format_white_space, indent, nfe - vip->new_flow_table, nfe->as_index); - } - */ - return s; -} - -typedef struct { - u32 as_index; - u32 last; - u32 skip; -} lb_pseudorand_t; - -static int lb_pseudorand_compare(void *a, void *b) -{ - lb_as_t *asa, *asb; - lb_main_t *lbm = &lb_main; - asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index]; - asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index]; - return memcmp(&asa->address, &asb->address, sizeof(asb->address)); -} - -static void lb_vip_garbage_collection(lb_vip_t *vip) -{ - lb_main_t *lbm = &lb_main; - ASSERT (lbm->writer_lock[0]); - - u32 now = (u32) vlib_time_now(vlib_get_main()); - if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN)) - return; - - vip->last_garbage_collection = now; - lb_as_t *as; - u32 *as_index; - pool_foreach(as_index, vip->as_indexes, { - as = &lbm->ass[*as_index]; - if (!(as->flags & LB_AS_FLAGS_USED) && //Not used - clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used - (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0)) - { //Not referenced - fib_entry_child_remove(as->next_hop_fib_entry_index, - as->next_hop_child_index); - fib_table_entry_delete_index(as->next_hop_fib_entry_index, - FIB_SOURCE_RR); - as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID; - - pool_put(vip->as_indexes, as_index); - pool_put(lbm->ass, as); - } - }); -} - -void lb_garbage_collection() -{ - lb_main_t *lbm = &lb_main; - lb_get_writer_lock(); - lb_vip_t *vip; - u32 *to_be_removed_vips = 0, *i; - pool_foreach(vip, lbm->vips, { - lb_vip_garbage_collection(vip); - - if (!(vip->flags & LB_VIP_FLAGS_USED) && - (pool_elts(vip->as_indexes) == 0)) { - vec_add1(to_be_removed_vips, vip - lbm->vips); - } - }); - - vec_foreach(i, to_be_removed_vips) { - vip = &lbm->vips[*i]; - pool_put(lbm->vips, vip); - pool_free(vip->as_indexes); - } - - vec_free(to_be_removed_vips); - lb_put_writer_lock(); -} - -static void lb_vip_update_new_flow_table(lb_vip_t *vip) -{ - lb_main_t *lbm = &lb_main; - lb_new_flow_entry_t *old_table; - u32 i, *as_index; - lb_new_flow_entry_t *new_flow_table = 0; - lb_as_t *as; - lb_pseudorand_t *pr, *sort_arr = 0; - u32 count; - - ASSERT (lbm->writer_lock[0]); //We must have the lock - - //Check if some AS is configured or not - i = 0; - pool_foreach(as_index, vip->as_indexes, { - as = &lbm->ass[*as_index]; - if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore - i = 1; - goto out; //Not sure 'break' works in this macro-loop - } - }); - -out: - if (i == 0) { - //Only the default. i.e. no AS - vec_validate(new_flow_table, vip->new_flow_table_mask); - for (i=0; i<vec_len(new_flow_table); i++) - new_flow_table[i].as_index = 0; - - goto finished; - } - - //First, let's sort the ASs - sort_arr = 0; - vec_alloc(sort_arr, pool_elts(vip->as_indexes)); - - i = 0; - pool_foreach(as_index, vip->as_indexes, { - as = &lbm->ass[*as_index]; - if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore - continue; - - sort_arr[i].as_index = as - lbm->ass; - i++; - }); - _vec_len(sort_arr) = i; - - vec_sort_with_function(sort_arr, lb_pseudorand_compare); - - //Now let's pseudo-randomly generate permutations - vec_foreach(pr, sort_arr) { - lb_as_t *as = &lbm->ass[pr->as_index]; - - u64 seed = clib_xxhash(as->address.as_u64[0] ^ - as->address.as_u64[1]); - /* We have 2^n buckets. - * skip must be prime with 2^n. - * So skip must be odd. - * MagLev actually state that M should be prime, - * but this has a big computation cost (% operation). - * Using 2^n is more better (& operation). - */ - pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask; - pr->last = (seed >> 32) & vip->new_flow_table_mask; - } - - //Let's create a new flow table - vec_validate(new_flow_table, vip->new_flow_table_mask); - for (i=0; i<vec_len(new_flow_table); i++) - new_flow_table[i].as_index = ~0; - - u32 done = 0; - while (1) { - vec_foreach(pr, sort_arr) { - while (1) { - u32 last = pr->last; - pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask; - if (new_flow_table[last].as_index == ~0) { - new_flow_table[last].as_index = pr->as_index; - break; - } - } - done++; - if (done == vec_len(new_flow_table)) - goto finished; - } - } - - vec_free(sort_arr); - -finished: - -//Count number of changed entries - count = 0; - for (i=0; i<vec_len(new_flow_table); i++) - if (vip->new_flow_table == 0 || - new_flow_table[i].as_index != vip->new_flow_table[i].as_index) - count++; - - old_table = vip->new_flow_table; - vip->new_flow_table = new_flow_table; - vec_free(old_table); -} - -int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address, - u32 per_cpu_sticky_buckets, u32 flow_timeout) -{ - lb_main_t *lbm = &lb_main; - - if (!is_pow2(per_cpu_sticky_buckets)) - return VNET_API_ERROR_INVALID_MEMORY_SIZE; - - lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self - lbm->ip4_src_address = *ip4_address; - lbm->ip6_src_address = *ip6_address; - lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets; - lbm->flow_timeout = flow_timeout; - lb_put_writer_lock(); - return 0; -} - -static -int lb_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index) -{ - lb_main_t *lbm = &lb_main; - lb_vip_t *vip; - ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned - ip46_prefix_normalize(prefix, plen); - pool_foreach(vip, lbm->vips, { - if ((vip->flags & LB_AS_FLAGS_USED) && - vip->plen == plen && - vip->prefix.as_u64[0] == prefix->as_u64[0] && - vip->prefix.as_u64[1] == prefix->as_u64[1]) { - *vip_index = vip - lbm->vips; - return 0; - } - }); - return VNET_API_ERROR_NO_SUCH_ENTRY; -} - -int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index) -{ - int ret; - lb_get_writer_lock(); - ret = lb_vip_find_index_with_lock(prefix, plen, vip_index); - lb_put_writer_lock(); - return ret; -} - -static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index) -{ - lb_main_t *lbm = &lb_main; - ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned - lb_as_t *as; - u32 *asi; - pool_foreach(asi, vip->as_indexes, { - as = &lbm->ass[*asi]; - if (as->vip_index == (vip - lbm->vips) && - as->address.as_u64[0] == address->as_u64[0] && - as->address.as_u64[1] == address->as_u64[1]) { - *as_index = as - lbm->ass; - return 0; - } - }); - return -1; -} - -int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n) -{ - lb_main_t *lbm = &lb_main; - lb_get_writer_lock(); - lb_vip_t *vip; - if (!(vip = lb_vip_get_by_index(vip_index))) { - lb_put_writer_lock(); - return VNET_API_ERROR_NO_SUCH_ENTRY; - } - - ip46_type_t type = lb_vip_is_gre4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6; - u32 *to_be_added = 0; - u32 *to_be_updated = 0; - u32 i; - u32 *ip; - - //Sanity check - while (n--) { - - if (!lb_as_find_index_vip(vip, &addresses[n], &i)) { - if (lbm->ass[i].flags & LB_AS_FLAGS_USED) { - vec_free(to_be_added); - vec_free(to_be_updated); - lb_put_writer_lock(); - return VNET_API_ERROR_VALUE_EXIST; - } - vec_add1(to_be_updated, i); - goto next; - } - - if (ip46_address_type(&addresses[n]) != type) { - vec_free(to_be_added); - vec_free(to_be_updated); - lb_put_writer_lock(); - return VNET_API_ERROR_INVALID_ADDRESS_FAMILY; - } - - if (n) { - u32 n2 = n; - while(n2--) //Check for duplicates - if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] && - addresses[n2].as_u64[1] == addresses[n].as_u64[1]) - goto next; - } - - vec_add1(to_be_added, n); - -next: - continue; - } - - //Update reused ASs - vec_foreach(ip, to_be_updated) { - lbm->ass[*ip].flags = LB_AS_FLAGS_USED; - } - vec_free(to_be_updated); - - //Create those who have to be created - vec_foreach(ip, to_be_added) { - lb_as_t *as; - u32 *as_index; - pool_get(lbm->ass, as); - as->address = addresses[*ip]; - as->flags = LB_AS_FLAGS_USED; - as->vip_index = vip_index; - pool_get(vip->as_indexes, as_index); - *as_index = as - lbm->ass; - - /* - * become a child of the FIB entry - * so we are informed when its forwarding changes - */ - fib_prefix_t nh = {}; - if (lb_vip_is_gre4(vip)) { - nh.fp_addr.ip4 = as->address.ip4; - nh.fp_len = 32; - nh.fp_proto = FIB_PROTOCOL_IP4; - } else { - nh.fp_addr.ip6 = as->address.ip6; - nh.fp_len = 128; - nh.fp_proto = FIB_PROTOCOL_IP6; - } - - as->next_hop_fib_entry_index = - fib_table_entry_special_add(0, - &nh, - FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, - ADJ_INDEX_INVALID); - as->next_hop_child_index = - fib_entry_child_add(as->next_hop_fib_entry_index, - lbm->fib_node_type, - as - lbm->ass); - - lb_as_stack(as); - } - vec_free(to_be_added); - - //Recompute flows - lb_vip_update_new_flow_table(vip); - - //Garbage collection maybe - lb_vip_garbage_collection(vip); - - lb_put_writer_lock(); - return 0; -} - -int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n) -{ - lb_main_t *lbm = &lb_main; - u32 now = (u32) vlib_time_now(vlib_get_main()); - u32 *ip = 0; - - lb_vip_t *vip; - if (!(vip = lb_vip_get_by_index(vip_index))) { - return VNET_API_ERROR_NO_SUCH_ENTRY; - } - - u32 *indexes = NULL; - while (n--) { - u32 i; - if (lb_as_find_index_vip(vip, &addresses[n], &i)) { - vec_free(indexes); - return VNET_API_ERROR_NO_SUCH_ENTRY; - } - - if (n) { //Check for duplicates - u32 n2 = n - 1; - while(n2--) { - if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] && - addresses[n2].as_u64[1] == addresses[n].as_u64[1]) - goto next; - } - } - - vec_add1(indexes, i); -next: - continue; - } - - //Garbage collection maybe - lb_vip_garbage_collection(vip); - - if (indexes != NULL) { - vec_foreach(ip, indexes) { - lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED; - lbm->ass[*ip].last_used = now; - } - - //Recompute flows - lb_vip_update_new_flow_table(vip); - } - - vec_free(indexes); - return 0; -} - -int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n) -{ - lb_get_writer_lock(); - int ret = lb_vip_del_ass_withlock(vip_index, addresses, n); - lb_put_writer_lock(); - return ret; -} - -/** - * Add the VIP adjacency to the ip4 or ip6 fib - */ -static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip) -{ - dpo_proto_t proto = 0; - dpo_id_t dpo = DPO_INVALID; - fib_prefix_t pfx = {}; - if (lb_vip_is_ip4(vip)) { - pfx.fp_addr.ip4 = vip->prefix.ip4; - pfx.fp_len = vip->plen - 96; - pfx.fp_proto = FIB_PROTOCOL_IP4; - proto = DPO_PROTO_IP4; - } else { - pfx.fp_addr.ip6 = vip->prefix.ip6; - pfx.fp_len = vip->plen; - pfx.fp_proto = FIB_PROTOCOL_IP6; - proto = DPO_PROTO_IP6; - } - dpo_set(&dpo, lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type, - proto, vip - lbm->vips); - fib_table_entry_special_dpo_add(0, - &pfx, - FIB_SOURCE_PLUGIN_HI, - FIB_ENTRY_FLAG_EXCLUSIVE, - &dpo); - dpo_reset(&dpo); -} - -/** - * Deletes the adjacency associated with the VIP - */ -static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip) -{ - fib_prefix_t pfx = {}; - if (lb_vip_is_ip4(vip)) { - pfx.fp_addr.ip4 = vip->prefix.ip4; - pfx.fp_len = vip->plen - 96; - pfx.fp_proto = FIB_PROTOCOL_IP4; - } else { - pfx.fp_addr.ip6 = vip->prefix.ip6; - pfx.fp_len = vip->plen; - pfx.fp_proto = FIB_PROTOCOL_IP6; - } - fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI); -} - -int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u32 new_length, u32 *vip_index) -{ - lb_main_t *lbm = &lb_main; - lb_vip_t *vip; - lb_get_writer_lock(); - ip46_prefix_normalize(prefix, plen); - - if (!lb_vip_find_index_with_lock(prefix, plen, vip_index)) { - lb_put_writer_lock(); - return VNET_API_ERROR_VALUE_EXIST; - } - - if (!is_pow2(new_length)) { - lb_put_writer_lock(); - return VNET_API_ERROR_INVALID_MEMORY_SIZE; - } - - if (ip46_prefix_is_ip4(prefix, plen) && - (type != LB_VIP_TYPE_IP4_GRE4) && - (type != LB_VIP_TYPE_IP4_GRE6)) - return VNET_API_ERROR_INVALID_ADDRESS_FAMILY; - - - //Allocate - pool_get(lbm->vips, vip); - - //Init - vip->prefix = *prefix; - vip->plen = plen; - vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main()); - vip->type = type; - vip->flags = LB_VIP_FLAGS_USED; - vip->as_indexes = 0; - - //Validate counters - u32 i; - for (i = 0; i < LB_N_VIP_COUNTERS; i++) { - vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips); - vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips); - } - - //Configure new flow table - vip->new_flow_table_mask = new_length - 1; - vip->new_flow_table = 0; - - //Create a new flow hash table full of the default entry - lb_vip_update_new_flow_table(vip); - - //Create adjacency to direct traffic - lb_vip_add_adjacency(lbm, vip); - - //Return result - *vip_index = vip - lbm->vips; - - lb_put_writer_lock(); - return 0; -} - -int lb_vip_del(u32 vip_index) -{ - lb_main_t *lbm = &lb_main; - lb_vip_t *vip; - lb_get_writer_lock(); - if (!(vip = lb_vip_get_by_index(vip_index))) { - lb_put_writer_lock(); - return VNET_API_ERROR_NO_SUCH_ENTRY; - } - - //FIXME: This operation is actually not working - //We will need to remove state before performing this. - - { - //Remove all ASs - ip46_address_t *ass = 0; - lb_as_t *as; - u32 *as_index; - pool_foreach(as_index, vip->as_indexes, { - as = &lbm->ass[*as_index]; - vec_add1(ass, as->address); - }); - if (vec_len(ass)) - lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass)); - vec_free(ass); - } - - //Delete adjacency - lb_vip_del_adjacency(lbm, vip); - - //Set the VIP as unused - vip->flags &= ~LB_VIP_FLAGS_USED; - - lb_put_writer_lock(); - return 0; -} - -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, - vnet_plugin_handoff_t * h, - int from_early_init) -{ - clib_error_t *error = 0; - return error; -} - - -u8 *format_lb_dpo (u8 * s, va_list * va) -{ - index_t index = va_arg (*va, index_t); - CLIB_UNUSED(u32 indent) = va_arg (*va, u32); - lb_main_t *lbm = &lb_main; - lb_vip_t *vip = pool_elt_at_index (lbm->vips, index); - return format (s, "%U", format_lb_vip, vip); -} - -static void lb_dpo_lock (dpo_id_t *dpo) {} -static void lb_dpo_unlock (dpo_id_t *dpo) {} - -static fib_node_t * -lb_fib_node_get_node (fib_node_index_t index) -{ - lb_main_t *lbm = &lb_main; - lb_as_t *as = pool_elt_at_index (lbm->ass, index); - return (&as->fib_node); -} - -static void -lb_fib_node_last_lock_gone (fib_node_t *node) -{ -} - -static lb_as_t * -lb_as_from_fib_node (fib_node_t *node) -{ - return ((lb_as_t*)(((char*)node) - - STRUCT_OFFSET_OF(lb_as_t, fib_node))); -} - -static void -lb_as_stack (lb_as_t *as) -{ - lb_main_t *lbm = &lb_main; - lb_vip_t *vip = &lbm->vips[as->vip_index]; - dpo_stack(lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type, - lb_vip_is_ip4(vip)?DPO_PROTO_IP4:DPO_PROTO_IP6, - &as->dpo, - fib_entry_contribute_ip_forwarding( - as->next_hop_fib_entry_index)); -} - -static fib_node_back_walk_rc_t -lb_fib_node_back_walk_notify (fib_node_t *node, - fib_node_back_walk_ctx_t *ctx) -{ - lb_as_stack(lb_as_from_fib_node(node)); - return (FIB_NODE_BACK_WALK_CONTINUE); -} - -clib_error_t * -lb_init (vlib_main_t * vm) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - lb_main_t *lbm = &lb_main; - lb_as_t *default_as; - fib_node_vft_t lb_fib_node_vft = { - .fnv_get = lb_fib_node_get_node, - .fnv_last_lock = lb_fib_node_last_lock_gone, - .fnv_back_walk = lb_fib_node_back_walk_notify, - }; - dpo_vft_t lb_vft = { - .dv_lock = lb_dpo_lock, - .dv_unlock = lb_dpo_unlock, - .dv_format = format_lb_dpo, - }; - - lbm->vips = 0; - lbm->per_cpu = 0; - vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1); - lbm->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); - lbm->writer_lock[0] = 0; - lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS; - lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT; - lbm->ip4_src_address.as_u32 = 0xffffffff; - lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL; - lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL; - lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes); - lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes); - lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft); - - //Init AS reference counters - vlib_refcount_init(&lbm->as_refcount); - - //Allocate and init default AS. - lbm->ass = 0; - pool_get(lbm->ass, default_as); - default_as->flags = 0; - default_as->dpo.dpoi_next_node = LB_NEXT_DROP; - default_as->vip_index = ~0; - default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL; - default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL; - -#define _(a,b,c) lbm->vip_counters[c].name = b; - lb_foreach_vip_counter -#undef _ - return NULL; -} - -VLIB_INIT_FUNCTION (lb_init); diff --git a/plugins/lb-plugin/lb/lb.h b/plugins/lb-plugin/lb/lb.h deleted file mode 100644 index 882b9b30f7e..00000000000 --- a/plugins/lb-plugin/lb/lb.h +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * lb-plugin implements a MagLev-like load balancer. - * http://research.google.com/pubs/pub44824.html - * - * It hasn't been tested for interoperability with the original MagLev - * but intends to provide similar functionality. - * The load-balancer receives traffic destined to VIP (Virtual IP) - * addresses from one or multiple(ECMP) routers. - * The load-balancer tunnels the traffic toward many application servers - * ensuring session stickyness (i.e. that a single sessions is tunneled - * towards a single application server). - * - */ - -#ifndef LB_PLUGIN_LB_LB_H_ -#define LB_PLUGIN_LB_LB_H_ - -#include <lb/util.h> -#include <lb/refcount.h> - -#include <vnet/vnet.h> -#include <vnet/ip/ip.h> -#include <vnet/dpo/dpo.h> -#include <vnet/fib/fib_table.h> - -#include <lb/lbhash.h> - -#define LB_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10 -#define LB_DEFAULT_FLOW_TIMEOUT 40 - -typedef enum { - LB_NEXT_DROP, - LB_N_NEXT, -} lb_next_t; - -/** - * Each VIP is configured with a set of - * application server. - */ -typedef struct { - /** - * Registration to FIB event. - */ - fib_node_t fib_node; - - /** - * Destination address used to tunnel traffic towards - * that application server. - * The address is also used as ID and pseudo-random - * seed for the load-balancing process. - */ - ip46_address_t address; - - /** - * ASs are indexed by address and VIP Index. - * Which means there will be duplicated if the same server - * address is used for multiple VIPs. - */ - u32 vip_index; - - /** - * Some per-AS flags. - * For now only LB_AS_FLAGS_USED is defined. - */ - u8 flags; - -#define LB_AS_FLAGS_USED 0x1 - - /** - * Rotating timestamp of when LB_AS_FLAGS_USED flag was last set. - * - * AS removal is based on garbage collection and reference counting. - * When an AS is removed, there is a race between configuration core - * and worker cores which may still add a reference while it should not - * be used. This timestamp is used to not remove the AS while a race condition - * may happen. - */ - u32 last_used; - - /** - * The FIB entry index for the next-hop - */ - fib_node_index_t next_hop_fib_entry_index; - - /** - * The child index on the FIB entry - */ - u32 next_hop_child_index; - - /** - * The next DPO in the graph to follow. - */ - dpo_id_t dpo; - -} lb_as_t; - -format_function_t format_lb_as; - -typedef struct { - u32 as_index; -} lb_new_flow_entry_t; - -#define lb_foreach_vip_counter \ - _(NEXT_PACKET, "packet from existing sessions", 0) \ - _(FIRST_PACKET, "first session packet", 1) \ - _(UNTRACKED_PACKET, "untracked packet", 2) \ - _(NO_SERVER, "no server configured", 3) - -typedef enum { -#define _(a,b,c) LB_VIP_COUNTER_##a = c, - lb_foreach_vip_counter -#undef _ - LB_N_VIP_COUNTERS -} lb_vip_counter_t; - -/** - * The load balancer supports IPv4 and IPv6 traffic - * and GRE4 and GRE6 encap. - */ -typedef enum { - LB_VIP_TYPE_IP6_GRE6, - LB_VIP_TYPE_IP6_GRE4, - LB_VIP_TYPE_IP4_GRE6, - LB_VIP_TYPE_IP4_GRE4, - LB_VIP_N_TYPES, -} lb_vip_type_t; - -format_function_t format_lb_vip_type; -unformat_function_t unformat_lb_vip_type; - -/** - * Load balancing service is provided per VIP. - * In this data model, a VIP can be a whole prefix. - * But load balancing only - * occurs on a per-source-address/port basis. Meaning that if a given source - * reuses the same port for multiple destinations within the same VIP, - * they will be considered as a single flow. - */ -typedef struct { - - //Runtime - - /** - * Vector mapping (flow-hash & new_connect_table_mask) to AS index. - * This is used for new flows. - */ - lb_new_flow_entry_t *new_flow_table; - - /** - * New flows table length - 1 - * (length MUST be a power of 2) - */ - u32 new_flow_table_mask; - - /** - * Last time garbage collection was run to free the ASs. - */ - u32 last_garbage_collection; - - //Not runtime - - /** - * A Virtual IP represents a given service delivered - * by a set of application servers. It can be a single - * address or a prefix. - * IPv4 prefixes are encoded using IPv4-in-IPv6 embedded address - * (i.e. ::/96 prefix). - */ - ip46_address_t prefix; - - /** - * The VIP prefix length. - * In case of IPv4, plen = 96 + ip4_plen. - */ - u8 plen; - - /** - * The type of traffic for this. - * LB_TYPE_UNDEFINED if unknown. - */ - lb_vip_type_t type; - - /** - * Flags related to this VIP. - * LB_VIP_FLAGS_USED means the VIP is active. - * When it is not set, the VIP in the process of being removed. - * We cannot immediately remove a VIP because the VIP index still may be stored - * in the adjacency index. - */ - u8 flags; -#define LB_VIP_FLAGS_USED 0x1 - - /** - * Pool of AS indexes used for this VIP. - * This also includes ASs that have been removed (but are still referenced). - */ - u32 *as_indexes; -} lb_vip_t; - -#define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 || (vip)->type == LB_VIP_TYPE_IP4_GRE4) -#define lb_vip_is_gre4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 || (vip)->type == LB_VIP_TYPE_IP4_GRE4) -format_function_t format_lb_vip; -format_function_t format_lb_vip_detailed; - -typedef struct { - /** - * Each CPU has its own sticky flow hash table. - * One single table is used for all VIPs. - */ - lb_hash_t *sticky_ht; -} lb_per_cpu_t; - -typedef struct { - /** - * Pool of all Virtual IPs - */ - lb_vip_t *vips; - - /** - * Pool of ASs. - * ASs are referenced by address and vip index. - * The first element (index 0) is special and used only to fill - * new_flow_tables when no AS has been configured. - */ - lb_as_t *ass; - - /** - * Each AS has an associated reference counter. - * As ass[0] has a special meaning, its associated counter - * starts at 0 and is decremented instead. i.e. do not use it. - */ - vlib_refcount_t as_refcount; - - /** - * Some global data is per-cpu - */ - lb_per_cpu_t *per_cpu; - - /** - * Node next index for IP adjacencies, for each of the traffic types. - */ - u32 ip_lookup_next_index[LB_VIP_N_TYPES]; - - /** - * Source address used in IPv6 encapsulated traffic - */ - ip6_address_t ip6_src_address; - - /** - * Source address used for IPv4 encapsulated traffic - */ - ip4_address_t ip4_src_address; - - /** - * Number of buckets in the per-cpu sticky hash table. - */ - u32 per_cpu_sticky_buckets; - - /** - * Flow timeout in seconds. - */ - u32 flow_timeout; - - /** - * Per VIP counter - */ - vlib_simple_counter_main_t vip_counters[LB_N_VIP_COUNTERS]; - - /** - * DPO used to send packet from IP4/6 lookup to LB node. - */ - dpo_type_t dpo_gre4_type; - dpo_type_t dpo_gre6_type; - - /** - * Node type for registering to fib changes. - */ - fib_node_type_t fib_node_type; - - /** - * API dynamically registered base ID. - */ - u16 msg_id_base; - - volatile u32 *writer_lock; -} lb_main_t; - -extern lb_main_t lb_main; -extern vlib_node_registration_t lb6_node; -extern vlib_node_registration_t lb4_node; - -/** - * Fix global load-balancer parameters. - * @param ip4_address IPv4 source address used for encapsulated traffic - * @param ip6_address IPv6 source address used for encapsulated traffic - * @return 0 on success. VNET_LB_ERR_XXX on error - */ -int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address, - u32 sticky_buckets, u32 flow_timeout); - -int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, - u32 new_length, u32 *vip_index); -int lb_vip_del(u32 vip_index); - -int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index); - -#define lb_vip_get_by_index(index) (pool_is_free_index(lb_main.vips, index)?NULL:pool_elt_at_index(lb_main.vips, index)) - -int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n); -int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n); - -u32 lb_hash_time_now(vlib_main_t * vm); - -void lb_garbage_collection(); - -format_function_t format_lb_main; - -#endif /* LB_PLUGIN_LB_LB_H_ */ diff --git a/plugins/lb-plugin/lb/lb_test.c b/plugins/lb-plugin/lb/lb_test.c deleted file mode 100644 index 8c2eaa91ce9..00000000000 --- a/plugins/lb-plugin/lb/lb_test.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vat/vat.h> -#include <vlibapi/api.h> -#include <vlibmemory/api.h> -#include <vlibsocket/api.h> -#include <vppinfra/error.h> -#include <lb/lb.h> - -//TODO: Move that to vat/plugin_api.c -////////////////////////// -uword unformat_ip46_address (unformat_input_t * input, va_list * args) -{ - ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); - ip46_type_t type = va_arg (*args, ip46_type_t); - if ((type != IP46_TYPE_IP6) && - unformat(input, "%U", unformat_ip4_address, &ip46->ip4)) { - ip46_address_mask_ip4(ip46); - return 1; - } else if ((type != IP46_TYPE_IP4) && - unformat(input, "%U", unformat_ip6_address, &ip46->ip6)) { - return 1; - } - return 0; -} -uword unformat_ip46_prefix (unformat_input_t * input, va_list * args) -{ - ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); - u8 *len = va_arg (*args, u8 *); - ip46_type_t type = va_arg (*args, ip46_type_t); - - u32 l; - if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) { - if (l > 32) - return 0; - *len = l + 96; - ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0; - } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) { - if (l > 128) - return 0; - *len = l; - } else { - return 0; - } - return 1; -} -///////////////////////// - -#define vl_msg_id(n,h) n, -typedef enum { -#include <lb/lb.api.h> - /* We'll want to know how many messages IDs we need... */ - VL_MSG_FIRST_AVAILABLE, -} vl_msg_id_t; -#undef vl_msg_id - -/* define message structures */ -#define vl_typedefs -#include <lb/lb.api.h> -#undef vl_typedefs - -/* declare message handlers for each api */ - -#define vl_endianfun /* define message structures */ -#include <lb/lb.api.h> -#undef vl_endianfun - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) -#define vl_printfun -#include <lb/lb.api.h> -#undef vl_printfun - -/* Get the API version number. */ -#define vl_api_version(n,v) static u32 api_version=(v); -#include <lb/lb.api.h> -#undef vl_api_version - -typedef struct { - /* API message ID base */ - u16 msg_id_base; - vat_main_t *vat_main; -} lb_test_main_t; - -lb_test_main_t lb_test_main; - -#define foreach_standard_reply_retval_handler \ -_(lb_conf_reply) \ -_(lb_add_del_vip_reply) \ -_(lb_add_del_as_reply) - -#define _(n) \ - static void vl_api_##n##_t_handler \ - (vl_api_##n##_t * mp) \ - { \ - vat_main_t * vam = lb_test_main.vat_main; \ - i32 retval = ntohl(mp->retval); \ - if (vam->async_mode) { \ - vam->async_errors += (retval < 0); \ - } else { \ - vam->retval = retval; \ - vam->result_ready = 1; \ - } \ - } -foreach_standard_reply_retval_handler; -#undef _ - -/* - * Table of message reply handlers, must include boilerplate handlers - * we just generated - */ -#define foreach_vpe_api_reply_msg \ - _(LB_CONF_REPLY, lb_conf_reply) \ - _(LB_ADD_DEL_VIP_REPLY, lb_add_del_vip_reply) \ - _(LB_ADD_DEL_AS_REPLY, lb_add_del_as_reply) - -/* M: construct, but don't yet send a message */ -#define M(T,t) \ -do { \ - vam->result_ready = 0; \ - mp = vl_msg_api_alloc(sizeof(*mp)); \ - memcpy (mp, &mps, sizeof (*mp)); \ - mp->_vl_msg_id = ntohs (VL_API_##T + lbtm->msg_id_base); \ - mp->client_index = vam->my_client_index; \ -} while(0); - -/* S: send a message */ -#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) - -/* W: wait for results, with timeout */ -#define W \ -do { \ - timeout = vat_time_now (vam) + 1.0; \ - \ - while (vat_time_now (vam) < timeout) { \ - if (vam->result_ready == 1) { \ - return (vam->retval); \ - } \ - } \ - return -99; \ -} while(0); - -static int api_lb_conf (vat_main_t * vam) -{ - lb_test_main_t *lbtm = &lb_test_main; - unformat_input_t *i = vam->input; - f64 timeout; - vl_api_lb_conf_t mps, *mp; - - if (!unformat(i, "%U %U %u %u", - unformat_ip4_address, &mps.ip4_src_address, - unformat_ip6_address, mps.ip6_src_address, - &mps.sticky_buckets_per_core, - &mps.flow_timeout)) { - errmsg ("invalid arguments\n"); - return -99; - } - - M(LB_CONF, lb_conf); S; W; - - /* NOTREACHED */ - return 0; -} - -static int api_lb_add_del_vip (vat_main_t * vam) -{ - lb_test_main_t *lbtm = &lb_test_main; - unformat_input_t * i = vam->input; - f64 timeout; - vl_api_lb_add_del_vip_t mps, *mp; - mps.is_del = 0; - mps.is_gre4 = 0; - - if (!unformat(i, "%U", - unformat_ip46_prefix, mps.ip_prefix, &mps.prefix_length, IP46_TYPE_ANY)) { - errmsg ("invalid prefix\n"); - return -99; - } - - if (unformat(i, "gre4")) { - mps.is_gre4 = 1; - } else if (unformat(i, "gre6")) { - mps.is_gre4 = 0; - } else { - errmsg ("no encap\n"); - return -99; - } - - if (!unformat(i, "%d", &mps.new_flows_table_length)) { - errmsg ("no table lentgh\n"); - return -99; - } - - if (unformat(i, "del")) { - mps.is_del = 1; - } - - M(LB_ADD_DEL_VIP, lb_add_del_vip); S; W; - /* NOTREACHED */ - return 0; -} - -static int api_lb_add_del_as (vat_main_t * vam) -{ - lb_test_main_t *lbtm = &lb_test_main; - unformat_input_t * i = vam->input; - f64 timeout; - vl_api_lb_add_del_as_t mps, *mp; - mps.is_del = 0; - - if (!unformat(i, "%U %U", - unformat_ip46_prefix, mps.vip_ip_prefix, &mps.vip_prefix_length, IP46_TYPE_ANY, - unformat_ip46_address, mps.as_address)) { - errmsg ("invalid prefix or address\n"); - return -99; - } - - if (unformat(i, "del")) { - mps.is_del = 1; - } - - M(LB_ADD_DEL_AS, lb_add_del_as); S; W; - /* NOTREACHED */ - return 0; -} - -/* - * List of messages that the api test plugin sends, - * and that the data plane plugin processes - */ -#define foreach_vpe_api_msg \ -_(lb_conf, "<ip4-src-addr> <ip6-src-address> <sticky_buckets_per_core> <flow_timeout>") \ -_(lb_add_del_vip, "<ip-prefix> [gre4|gre6] <new_table_len> [del]") \ -_(lb_add_del_as, "<vip-ip-prefix> <address> [del]") - -void vat_api_hookup (vat_main_t *vam) -{ - lb_test_main_t * lbtm = &lb_test_main; - /* Hook up handlers for replies from the data plane plug-in */ -#define _(N,n) \ - vl_msg_api_set_handlers((VL_API_##N + lbtm->msg_id_base), \ - #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_api_reply_msg; -#undef _ - - /* API messages we can send */ -#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); - foreach_vpe_api_msg; -#undef _ - - /* Help strings */ -#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); - foreach_vpe_api_msg; -#undef _ -} - -clib_error_t * vat_plugin_register (vat_main_t *vam) -{ - lb_test_main_t * lbtm = &lb_test_main; - - u8 * name; - - lbtm->vat_main = vam; - - /* Ask the vpp engine for the first assigned message-id */ - name = format (0, "lb_%08x%c", api_version, 0); - lbtm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); - - if (lbtm->msg_id_base != (u16) ~0) - vat_api_hookup (vam); - - vec_free(name); - - return 0; -} diff --git a/plugins/lb-plugin/lb/lbhash.h b/plugins/lb-plugin/lb/lbhash.h deleted file mode 100644 index ca3cc143dc2..00000000000 --- a/plugins/lb-plugin/lb/lbhash.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2012 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * vppinfra already includes tons of different hash tables. - * MagLev flow table is a bit different. It has to be very efficient - * for both writing and reading operations. But it does not need to - * be 100% reliable (write can fail). It also needs to recycle - * old entries in a lazy way. - * - * This hash table is the most dummy hash table you can do. - * Fixed total size, fixed bucket size. - * Advantage is that it could be very efficient (maybe). - * - */ - -#ifndef LB_PLUGIN_LB_LBHASH_H_ -#define LB_PLUGIN_LB_LBHASH_H_ - -#include <vnet/vnet.h> - -#if defined (__SSE4_2__) -#include <immintrin.h> -#endif - -/* - * @brief Number of entries per bucket. - */ -#define LBHASH_ENTRY_PER_BUCKET 4 - -#define LB_HASH_DO_NOT_USE_SSE_BUCKETS 0 - -/* - * @brief One bucket contains 4 entries. - * Each bucket takes one 64B cache line in memory. - */ -typedef struct { - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - u32 hash[LBHASH_ENTRY_PER_BUCKET]; - u32 timeout[LBHASH_ENTRY_PER_BUCKET]; - u32 vip[LBHASH_ENTRY_PER_BUCKET]; - u32 value[LBHASH_ENTRY_PER_BUCKET]; -} lb_hash_bucket_t; - -typedef struct { - u32 buckets_mask; - u32 timeout; - lb_hash_bucket_t buckets[]; -} lb_hash_t; - -#define lb_hash_nbuckets(h) (((h)->buckets_mask) + 1) -#define lb_hash_size(h) ((h)->buckets_mask + LBHASH_ENTRY_PER_BUCKET) - -#define lb_hash_foreach_bucket(h, bucket) \ - for (bucket = (h)->buckets; \ - bucket < (h)->buckets + lb_hash_nbuckets(h); \ - bucket++) - -#define lb_hash_foreach_entry(h, bucket, i) \ - lb_hash_foreach_bucket(h, bucket) \ - for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++) - -#define lb_hash_foreach_valid_entry(h, bucket, i, now) \ - lb_hash_foreach_entry(h, bucket, i) \ - if (!clib_u32_loop_gt((now), bucket->timeout[i])) - -static_always_inline -lb_hash_t *lb_hash_alloc(u32 buckets, u32 timeout) -{ - if (!is_pow2(buckets)) - return NULL; - - // Allocate 1 more bucket for prefetch - u32 size = ((u64)&((lb_hash_t *)(0))->buckets[0]) + - sizeof(lb_hash_bucket_t) * (buckets + 1); - u8 *mem = 0; - lb_hash_t *h; - vec_alloc_aligned(mem, size, CLIB_CACHE_LINE_BYTES); - h = (lb_hash_t *)mem; - h->buckets_mask = (buckets - 1); - h->timeout = timeout; - return h; -} - -static_always_inline -void lb_hash_free(lb_hash_t *h) -{ - u8 *mem = (u8 *)h; - vec_free(mem); -} - -#if __SSE4_2__ -static_always_inline -u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4) -{ - u64 val = 0; - val = _mm_crc32_u64(val, k0); - val = _mm_crc32_u64(val, k1); - val = _mm_crc32_u64(val, k2); - val = _mm_crc32_u64(val, k3); - val = _mm_crc32_u64(val, k4); - return (u32) val; -} -#else -static_always_inline -u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4) -{ - u64 tmp = k0 ^ k1 ^ k2 ^ k3 ^ k4; - return (u32)clib_xxhash (tmp); -} -#endif - -static_always_inline -void lb_hash_prefetch_bucket(lb_hash_t *ht, u32 hash) -{ - lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask]; - CLIB_PREFETCH(bucket, sizeof(*bucket), READ); -} - -static_always_inline -void lb_hash_get(lb_hash_t *ht, u32 hash, u32 vip, u32 time_now, - u32 *available_index, u32 *found_value) -{ - lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask]; - *found_value = ~0; - *available_index = ~0; -#if __SSE4_2__ && LB_HASH_DO_NOT_USE_SSE_BUCKETS == 0 - u32 bitmask, found_index; - __m128i mask; - - // mask[*] = timeout[*] > now - mask = _mm_cmpgt_epi32(_mm_loadu_si128 ((__m128i *) bucket->timeout), - _mm_set1_epi32 (time_now)); - // bitmask[*] = now <= timeout[*/4] - bitmask = (~_mm_movemask_epi8(mask)) & 0xffff; - // Get first index with now <= timeout[*], if any. - *available_index = (bitmask)?__builtin_ctz(bitmask)/4:*available_index; - - // mask[*] = (timeout[*] > now) && (hash[*] == hash) - mask = _mm_and_si128(mask, - _mm_cmpeq_epi32( - _mm_loadu_si128 ((__m128i *) bucket->hash), - _mm_set1_epi32 (hash))); - - // Load the array of vip values - // mask[*] = (timeout[*] > now) && (hash[*] == hash) && (vip[*] == vip) - mask = _mm_and_si128(mask, - _mm_cmpeq_epi32( - _mm_loadu_si128 ((__m128i *) bucket->vip), - _mm_set1_epi32 (vip))); - - // mask[*] = (timeout[*x4] > now) && (hash[*x4] == hash) && (vip[*x4] == vip) - bitmask = _mm_movemask_epi8(mask); - // Get first index, if any - found_index = (bitmask)?__builtin_ctzll(bitmask)/4:0; - ASSERT(found_index < 4); - *found_value = (bitmask)?bucket->value[found_index]:*found_value; - bucket->timeout[found_index] = - (bitmask)?time_now + ht->timeout:bucket->timeout[found_index]; -#else - u32 i; - for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++) { - u8 cmp = (bucket->hash[i] == hash && bucket->vip[i] == vip); - u8 timeouted = clib_u32_loop_gt(time_now, bucket->timeout[i]); - *found_value = (cmp || timeouted)?*found_value:bucket->value[i]; - bucket->timeout[i] = (cmp || timeouted)?time_now + ht->timeout:bucket->timeout[i]; - *available_index = (timeouted && (*available_index == ~0))?i:*available_index; - - if (!cmp) - return; - } -#endif -} - -static_always_inline -u32 lb_hash_available_value(lb_hash_t *h, u32 hash, u32 available_index) -{ - return h->buckets[hash & h->buckets_mask].value[available_index]; -} - -static_always_inline -void lb_hash_put(lb_hash_t *h, u32 hash, u32 value, u32 vip, - u32 available_index, u32 time_now) -{ - lb_hash_bucket_t *bucket = &h->buckets[hash & h->buckets_mask]; - bucket->hash[available_index] = hash; - bucket->value[available_index] = value; - bucket->timeout[available_index] = time_now + h->timeout; - bucket->vip[available_index] = vip; -} - -static_always_inline -u32 lb_hash_elts(lb_hash_t *h, u32 time_now) -{ - u32 tot = 0; - lb_hash_bucket_t *bucket; - u32 i; - lb_hash_foreach_valid_entry(h, bucket, i, time_now) { - tot++; - } - return tot; -} - -#endif /* LB_PLUGIN_LB_LBHASH_H_ */ diff --git a/plugins/lb-plugin/lb/node.c b/plugins/lb-plugin/lb/node.c deleted file mode 100644 index 8b763c537d5..00000000000 --- a/plugins/lb-plugin/lb/node.c +++ /dev/null @@ -1,419 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <lb/lb.h> - -#include <vnet/gre/packet.h> -#include <lb/lbhash.h> - -#define foreach_lb_error \ - _(NONE, "no error") \ - _(PROTO_NOT_SUPPORTED, "protocol not supported") - -typedef enum { -#define _(sym,str) LB_ERROR_##sym, - foreach_lb_error -#undef _ - LB_N_ERROR, -} lb_error_t; - -static char *lb_error_strings[] = { -#define _(sym,string) string, - foreach_lb_error -#undef _ -}; - -typedef struct { - u32 vip_index; - u32 as_index; -} lb_trace_t; - -u8 * -format_lb_trace (u8 * s, va_list * args) -{ - lb_main_t *lbm = &lb_main; - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - lb_trace_t *t = va_arg (*args, lb_trace_t *); - if (pool_is_free_index(lbm->vips, t->vip_index)) { - s = format(s, "lb vip[%d]: This VIP was freed since capture\n"); - } else { - s = format(s, "lb vip[%d]: %U\n", t->vip_index, format_lb_vip, &lbm->vips[t->vip_index]); - } - if (pool_is_free_index(lbm->ass, t->as_index)) { - s = format(s, "lb as[%d]: This AS was freed since capture\n"); - } else { - s = format(s, "lb as[%d]: %U\n", t->as_index, format_lb_as, &lbm->ass[t->as_index]); - } - return s; -} - -lb_hash_t *lb_get_sticky_table(u32 cpu_index) -{ - lb_main_t *lbm = &lb_main; - lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht; - //Check if size changed - if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht)))) - { - //Dereference everything in there - lb_hash_bucket_t *b; - u32 i; - lb_hash_foreach_entry(sticky_ht, b, i) { - vlib_refcount_add(&lbm->as_refcount, cpu_index, b->value[i], -1); - vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, 1); - } - - lb_hash_free(sticky_ht); - sticky_ht = NULL; - } - - //Create if necessary - if (PREDICT_FALSE(sticky_ht == NULL)) { - lbm->per_cpu[cpu_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout); - sticky_ht = lbm->per_cpu[cpu_index].sticky_ht; - clib_warning("Regenerated sticky table %p", sticky_ht); - } - - ASSERT(sticky_ht); - - //Update timeout - sticky_ht->timeout = lbm->flow_timeout; - return sticky_ht; -} - -u64 -lb_node_get_other_ports4(ip4_header_t *ip40) -{ - return 0; -} - -u64 -lb_node_get_other_ports6(ip6_header_t *ip60) -{ - return 0; -} - -static_always_inline u32 -lb_node_get_hash(vlib_buffer_t *p, u8 is_input_v4) -{ - u32 hash; - if (is_input_v4) - { - ip4_header_t *ip40; - u64 ports; - ip40 = vlib_buffer_get_current (p); - if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP || - ip40->protocol == IP_PROTOCOL_UDP)) - ports = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 16) | - ((u64)((udp_header_t *)(ip40 + 1))->dst_port); - else - ports = lb_node_get_other_ports4(ip40); - - hash = lb_hash_hash(*((u64 *)&ip40->address_pair), ports, - 0, 0, 0); - } - else - { - ip6_header_t *ip60; - ip60 = vlib_buffer_get_current (p); - u64 ports; - if (PREDICT_TRUE (ip60->protocol == IP_PROTOCOL_TCP || - ip60->protocol == IP_PROTOCOL_UDP)) - ports = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 16) | - ((u64)((udp_header_t *)(ip60 + 1))->dst_port); - else - ports = lb_node_get_other_ports6(ip60); - - hash = lb_hash_hash(ip60->src_address.as_u64[0], - ip60->src_address.as_u64[1], - ip60->dst_address.as_u64[0], - ip60->dst_address.as_u64[1], - ports); - } - return hash; -} - -static_always_inline uword -lb_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame, - u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6) - u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6) -{ - lb_main_t *lbm = &lb_main; - u32 n_left_from, *from, next_index, *to_next, n_left_to_next; - u32 cpu_index = os_get_cpu_number(); - u32 lb_time = lb_hash_time_now(vm); - - lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - u32 nexthash0 = 0; - if (PREDICT_TRUE(n_left_from > 0)) - nexthash0 = lb_node_get_hash(vlib_get_buffer (vm, from[0]), is_input_v4); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0; - vlib_buffer_t *p0; - lb_vip_t *vip0; - u32 asindex0; - u16 len0; - u32 available_index0; - u8 counter = 0; - u32 hash0 = nexthash0; - - if (PREDICT_TRUE(n_left_from > 1)) - { - vlib_buffer_t *p1 = vlib_get_buffer (vm, from[1]); - //Compute next hash and prefetch bucket - nexthash0 = lb_node_get_hash(p1, is_input_v4); - lb_hash_prefetch_bucket(sticky_ht, nexthash0); - //Prefetch for encap, next - CLIB_PREFETCH (vlib_buffer_get_current(p1) - 64, 64, STORE); - } - - if (PREDICT_TRUE(n_left_from > 2)) - { - vlib_buffer_t *p2; - p2 = vlib_get_buffer(vm, from[2]); - /* prefetch packet header and data */ - vlib_prefetch_buffer_header(p2, STORE); - CLIB_PREFETCH (vlib_buffer_get_current(p2), 64, STORE); - } - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer (vm, pi0); - vip0 = pool_elt_at_index (lbm->vips, - vnet_buffer (p0)->ip.adj_index[VLIB_TX]); - - if (is_input_v4) - { - ip4_header_t *ip40; - ip40 = vlib_buffer_get_current (p0); - len0 = clib_net_to_host_u16(ip40->length); - } - else - { - ip6_header_t *ip60; - ip60 = vlib_buffer_get_current (p0); - len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t); - } - - lb_hash_get(sticky_ht, hash0, vnet_buffer (p0)->ip.adj_index[VLIB_TX], - lb_time, &available_index0, &asindex0); - - if (PREDICT_TRUE(asindex0 != ~0)) - { - //Found an existing entry - counter = LB_VIP_COUNTER_NEXT_PACKET; - } - else if (PREDICT_TRUE(available_index0 != ~0)) - { - //There is an available slot for a new flow - asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index; - counter = LB_VIP_COUNTER_FIRST_PACKET; - counter = (asindex0 == 0)?LB_VIP_COUNTER_NO_SERVER:counter; - - //TODO: There are race conditions with as0 and vip0 manipulation. - //Configuration may be changed, vectors resized, etc... - - //Dereference previously used - vlib_refcount_add(&lbm->as_refcount, cpu_index, - lb_hash_available_value(sticky_ht, hash0, available_index0), -1); - vlib_refcount_add(&lbm->as_refcount, cpu_index, - asindex0, 1); - - //Add sticky entry - //Note that when there is no AS configured, an entry is configured anyway. - //But no configured AS is not something that should happen - lb_hash_put(sticky_ht, hash0, asindex0, - vnet_buffer (p0)->ip.adj_index[VLIB_TX], - available_index0, lb_time); - } - else - { - //Could not store new entry in the table - asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index; - counter = LB_VIP_COUNTER_UNTRACKED_PACKET; - } - - vlib_increment_simple_counter(&lbm->vip_counters[counter], - cpu_index, - vnet_buffer (p0)->ip.adj_index[VLIB_TX], - 1); - - //Now let's encap - { - gre_header_t *gre0; - if (is_encap_v4) - { - ip4_header_t *ip40; - vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t)); - ip40 = vlib_buffer_get_current(p0); - gre0 = (gre_header_t *)(ip40 + 1); - ip40->src_address = lbm->ip4_src_address; - ip40->dst_address = lbm->ass[asindex0].address.ip4; - ip40->ip_version_and_header_length = 0x45; - ip40->ttl = 128; - ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t)); - ip40->protocol = IP_PROTOCOL_GRE; - ip40->checksum = ip4_header_checksum (ip40); - } - else - { - ip6_header_t *ip60; - vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t)); - ip60 = vlib_buffer_get_current(p0); - gre0 = (gre_header_t *)(ip60 + 1); - ip60->dst_address = lbm->ass[asindex0].address.ip6; - ip60->src_address = lbm->ip6_src_address; - ip60->hop_limit = 128; - ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28); - ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t)); - ip60->protocol = IP_PROTOCOL_GRE; - } - - gre0->flags_and_version = 0; - gre0->protocol = (is_input_v4)? - clib_host_to_net_u16(0x0800): - clib_host_to_net_u16(0x86DD); - } - - if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) - { - lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); - tr->as_index = asindex0; - tr->vip_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - } - - //Enqueue to next - //Note that this is going to error if asindex0 == 0 - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbm->ass[asindex0].dpo.dpoi_index; - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, - lbm->ass[asindex0].dpo.dpoi_next_node); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - return frame->n_vectors; -} - -static uword -lb6_gre6_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return lb_node_fn(vm, node, frame, 0, 0); -} - -static uword -lb6_gre4_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return lb_node_fn(vm, node, frame, 0, 1); -} - -static uword -lb4_gre6_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return lb_node_fn(vm, node, frame, 1, 0); -} - -static uword -lb4_gre4_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return lb_node_fn(vm, node, frame, 1, 1); -} - -VLIB_REGISTER_NODE (lb6_gre6_node) = -{ - .function = lb6_gre6_node_fn, - .name = "lb6-gre6", - .vector_size = sizeof (u32), - .format_trace = format_lb_trace, - - .n_errors = LB_N_ERROR, - .error_strings = lb_error_strings, - - .n_next_nodes = LB_N_NEXT, - .next_nodes = - { - [LB_NEXT_DROP] = "error-drop" - }, -}; - -VLIB_REGISTER_NODE (lb6_gre4_node) = -{ - .function = lb6_gre4_node_fn, - .name = "lb6-gre4", - .vector_size = sizeof (u32), - .format_trace = format_lb_trace, - - .n_errors = LB_N_ERROR, - .error_strings = lb_error_strings, - - .n_next_nodes = LB_N_NEXT, - .next_nodes = - { - [LB_NEXT_DROP] = "error-drop" - }, -}; - -VLIB_REGISTER_NODE (lb4_gre6_node) = -{ - .function = lb4_gre6_node_fn, - .name = "lb4-gre6", - .vector_size = sizeof (u32), - .format_trace = format_lb_trace, - - .n_errors = LB_N_ERROR, - .error_strings = lb_error_strings, - - .n_next_nodes = LB_N_NEXT, - .next_nodes = - { - [LB_NEXT_DROP] = "error-drop" - }, -}; - -VLIB_REGISTER_NODE (lb4_gre4_node) = -{ - .function = lb4_gre4_node_fn, - .name = "lb4-gre4", - .vector_size = sizeof (u32), - .format_trace = format_lb_trace, - - .n_errors = LB_N_ERROR, - .error_strings = lb_error_strings, - - .n_next_nodes = LB_N_NEXT, - .next_nodes = - { - [LB_NEXT_DROP] = "error-drop" - }, -}; - diff --git a/plugins/lb-plugin/lb/refcount.c b/plugins/lb-plugin/lb/refcount.c deleted file mode 100644 index 22415c8889e..00000000000 --- a/plugins/lb-plugin/lb/refcount.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <lb/refcount.h> - -void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size) -{ - u32 *new_counter = 0, *old_counter; - vec_validate(new_counter, size); - memcpy(new_counter, per_cpu->counters, per_cpu->length); - old_counter = per_cpu->counters; - per_cpu->counters = new_counter; - CLIB_MEMORY_BARRIER(); - per_cpu->length = vec_len(new_counter); - vec_free(old_counter); -} - -u64 vlib_refcount_get(vlib_refcount_t *r, u32 index) -{ - u64 count = 0; - vlib_thread_main_t *tm = vlib_get_thread_main (); - u32 cpu_index; - for (cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++) { - if (r->per_cpu[cpu_index].length > index) - count += r->per_cpu[cpu_index].counters[index]; - } - return count; -} - diff --git a/plugins/lb-plugin/lb/refcount.h b/plugins/lb-plugin/lb/refcount.h deleted file mode 100644 index 8c26e7be76f..00000000000 --- a/plugins/lb-plugin/lb/refcount.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * vlib provides lock-free counters but those - * - Have 16bits per-CPU counter, which may overflow. - * - Would only increment. - * - * This is very similar to vlib counters, but may be used to count reference. - * Such a counter includes an arbitrary number of counters. Each counter - * is identified by its index. This is used to aggregate per-cpu memory. - * - * Warning: - * This reference counter is lock-free but is not race-condition free. - * The counting result is approximate and another mechanism needs to be used - * in order to ensure that an object may be freed. - * - */ - -#include <vnet/vnet.h> - -typedef struct { - u32 *counters; - u32 length; - u32 *reader_lengths; - CLIB_CACHE_LINE_ALIGN_MARK(o); -} vlib_refcount_per_cpu_t; - -typedef struct { - vlib_refcount_per_cpu_t *per_cpu; -} vlib_refcount_t; - -void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size); - -static_always_inline -void vlib_refcount_add(vlib_refcount_t *r, u32 cpu_index, u32 counter_index, i32 v) -{ - vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[cpu_index]; - if (PREDICT_FALSE(counter_index >= per_cpu->length)) - __vlib_refcount_resize(per_cpu, clib_max(counter_index + 16, per_cpu->length * 2)); - - per_cpu->counters[counter_index] += v; -} - -u64 vlib_refcount_get(vlib_refcount_t *r, u32 index); - -static_always_inline -void vlib_refcount_init(vlib_refcount_t *r) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - r->per_cpu = 0; - vec_validate (r->per_cpu, tm->n_vlib_mains - 1); -} - - diff --git a/plugins/lb-plugin/lb/util.c b/plugins/lb-plugin/lb/util.c deleted file mode 100644 index d969d168dce..00000000000 --- a/plugins/lb-plugin/lb/util.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <lb/util.h> - -void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen) -{ - if (plen == 0) { - prefix->as_u64[0] = 0; - prefix->as_u64[1] = 0; - } else if (plen <= 64) { - prefix->as_u64[0] &= clib_host_to_net_u64(0xffffffffffffffffL << (64 - plen)); - prefix->as_u64[1] = 0; - } else { - prefix->as_u64[1] &= clib_host_to_net_u64(0xffffffffffffffffL << (128 - plen)); - } - -} - -uword unformat_ip46_prefix (unformat_input_t * input, va_list * args) -{ - ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); - u8 *len = va_arg (*args, u8 *); - ip46_type_t type = va_arg (*args, ip46_type_t); - - u32 l; - if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) { - if (l > 32) - return 0; - *len = l + 96; - ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0; - } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) { - if (l > 128) - return 0; - *len = l; - } else { - return 0; - } - return 1; -} - -u8 *format_ip46_prefix (u8 * s, va_list * args) -{ - ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); - u32 len = va_arg (*args, u32); //va_arg cannot use u8 or u16 - ip46_type_t type = va_arg (*args, ip46_type_t); - - int is_ip4 = 0; - if (type == IP46_TYPE_IP4) - is_ip4 = 1; - else if (type == IP46_TYPE_IP6) - is_ip4 = 0; - else - is_ip4 = (len >= 96) && ip46_address_is_ip4(ip46); - - return is_ip4 ? - format(s, "%U/%d", format_ip4_address, &ip46->ip4, len - 96): - format(s, "%U/%d", format_ip6_address, &ip46->ip6, len); -} - diff --git a/plugins/lb-plugin/lb/util.h b/plugins/lb-plugin/lb/util.h deleted file mode 100644 index 3f082310b69..00000000000 --- a/plugins/lb-plugin/lb/util.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Non-LB specific stuff comes here - */ - -#ifndef LB_PLUGIN_LB_UTIL_H_ -#define LB_PLUGIN_LB_UTIL_H_ - -#include <vnet/vnet.h> -#include <vnet/ip/ip.h> - -#define ip46_address_type(ip46) (ip46_address_is_ip4(ip46)?IP46_TYPE_IP4:IP46_TYPE_IP6) -#define ip46_prefix_is_ip4(ip46, len) ((len) >= 96 && ip46_address_is_ip4(ip46)) -#define ip46_prefix_type(ip46, len) (ip46_prefix_is_ip4(ip46, len)?IP46_TYPE_IP4:IP46_TYPE_IP6) - -void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen); -uword unformat_ip46_prefix (unformat_input_t * input, va_list * args); -u8 *format_ip46_prefix (u8 * s, va_list * args); - -/** - * 32 bits integer comparison for running values. - * 1 > 0 is true. But 1 > 0xffffffff also is. - */ -#define clib_u32_loop_gt(a, b) (((u32)(a)) - ((u32)(b)) < 0x7fffffff) - -#endif /* LB_PLUGIN_LB_UTIL_H_ */ diff --git a/plugins/lb-plugin/lb_plugin_doc.md b/plugins/lb-plugin/lb_plugin_doc.md deleted file mode 100644 index c7885ffb837..00000000000 --- a/plugins/lb-plugin/lb_plugin_doc.md +++ /dev/null @@ -1,141 +0,0 @@ -# Load Balancer plugin for VPP {#lb_plugin_doc} - -## Version - -The load balancer plugin is currently in *beta* version. -Both CLIs and APIs are subject to *heavy* changes. -Wich also means feedback is really welcome regarding features, apis, etc... - -## Overview - -This plugin provides load balancing for VPP in a way that is largely inspired -from Google's MagLev: http://research.google.com/pubs/pub44824.html - -The load balancer is configured with a set of Virtual IPs (VIP, which can be -prefixes), and for each VIP, with a set of Application Server addresses (ASs). - -Traffic received for a given VIP (or VIP prefix) is tunneled using GRE towards -the different ASs in a way that (tries to) ensure that a given session will -always be tunneled to the same AS. - -Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must be using -the same encap. type (i.e. IPv4+GRE or IPv6+GRE). Meaning that for a given VIP, -all AS addresses must be of the same family. - -## Performances - -The load balancer has been tested up to 1 millions flows and still forwards more -than 3Mpps per core in such circumstances. -Although 3Mpps seems already good, it is likely that performances will be improved -in next versions. - -## Configuration - -### Global LB parameters - -The load balancer needs to be configured with some parameters: - - lb conf [ip4-src-address <addr>] [ip6-src-address <addr>] - [buckets <n>] [timeout <s>] - -ip4-src-address: the source address used to send encap. packets using IPv4. - -ip6-src-address: the source address used to send encap. packets using IPv6. - -buckets: the *per-thread* established-connexions-table number of buckets. - -timeout: the number of seconds a connection will remain in the - established-connexions-table while no packet for this flow - is received. - - -### Configure the VIPs - - lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del] - -new_len is the size of the new-connection-table. It should be 1 or 2 orders of -magnitude bigger than the number of ASs for the VIP in order to ensure a good -load balancing. - -Examples: - - lb vip 2002::/16 encap gre6 new_len 1024 - lb vip 2003::/16 encap gre4 new_len 2048 - lb vip 80.0.0.0/8 encap gre6 new_len 16 - lb vip 90.0.0.0/8 encap gre4 new_len 1024 - -### Configure the ASs (for each VIP) - - lb as <vip-prefix> [<address> [<address> [...]]] [del] - -You can add (or delete) as many ASs at a time (for a single VIP). -Note that the AS address family must correspond to the VIP encap. IP family. - -Examples: - - lb as 2002::/16 2001::2 2001::3 2001::4 - lb as 2003::/16 10.0.0.1 10.0.0.2 - lb as 80.0.0.0/8 2001::2 - lb as 90.0.0.0/8 10.0.0.1 - - - -## Monitoring - -The plugin provides quite a bunch of counters and information. -These are still subject to quite significant changes. - - show lb - show lb vip - show lb vip verbose - - show node counters - - -## Design notes - -### Multi-Threading - -MagLev is a distributed system which pseudo-randomly generates a -new-connections-table based on AS names such that each server configured with -the same set of ASs ends up with the same table. Connection stickyness is then -ensured with an established-connections-table. Using ECMP, it is assumed (but -not relied on) that servers will mostly receive traffic for different flows. - -This implementation pushes the parallelism a little bit further by using -one established-connections table per thread. This is equivalent to assuming -that RSS will make a job similar to ECMP, and is pretty useful as threads don't -need to get a lock in order to write in the table. - -### Hash Table - -A load balancer requires an efficient read and write hash table. The hash table -used by ip6-forward is very read-efficient, but not so much for writing. In -addition, it is not a big deal if writing into the hash table fails (again, -MagLev uses a flow table but does not heaviliy relies on it). - -The plugin therefore uses a very specific (and stupid) hash table. - - Fixed (and power of 2) number of buckets (configured at runtime) - - Fixed (and power of 2) elements per buckets (configured at compilation time) - -### Reference counting - -When an AS is removed, there is two possible ways to react. - - Keep using the AS for established connections - - Change AS for established connections (likely to cause error for TCP) - -In the first case, although an AS is removed from the configuration, its -associated state needs to stay around as long as it is used by at least one -thread. - -In order to avoid locks, a specific reference counter is used. The design is quite -similar to clib counters but: - - It is possible to decrease the value - - Summing will not zero the per-thread counters - - Only the thread can reallocate its own counters vector (to avoid concurrency issues) - -This reference counter is lock free, but reading a count of 0 does not mean -the value can be freed unless it is ensured by *other* means that no other thread -is concurrently referencing the object. In the case of this plugin, it is assumed -that no concurrent event will take place after a few seconds. - |