From cb034b9b374927c7552e36dcbc306d8456b2a0cb Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 28 Dec 2016 18:38:59 +0100 Subject: Move java,lua api and remaining plugins to src/ Change-Id: I1c3b87e886603678368428ae56a6bd3327cbc90d Signed-off-by: Damjan Marion --- src/plugins/acl/acl.h | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 src/plugins/acl/acl.h (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h new file mode 100644 index 00000000..afc9b289 --- /dev/null +++ b/src/plugins/acl/acl.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_acl_h +#define included_acl_h + +#include +#include +#include +#include + + +#include +#include +#include + +#define ACL_PLUGIN_VERSION_MAJOR 1 +#define ACL_PLUGIN_VERSION_MINOR 1 + +extern vlib_node_registration_t acl_in_node; +extern vlib_node_registration_t acl_out_node; + +void input_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap); +void output_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap); + +enum address_e { IP4, IP6 }; +typedef struct +{ + enum address_e type; + union { + ip6_address_t ip6; + ip4_address_t ip4; + } addr; +} address_t; + +/* + * ACL rules + */ +typedef struct +{ + u8 is_permit; + u8 is_ipv6; + ip46_address_t src; + u8 src_prefixlen; + ip46_address_t dst; + u8 dst_prefixlen; + u8 proto; + u16 src_port_or_type_first; + u16 src_port_or_type_last; + u16 dst_port_or_code_first; + u16 dst_port_or_code_last; + u8 tcp_flags_value; + u8 tcp_flags_mask; +} acl_rule_t; + +typedef struct +{ + u8 is_permit; + u8 is_ipv6; + u8 src_mac[6]; + u8 src_mac_mask[6]; + ip46_address_t src_ip_addr; + u8 src_prefixlen; +} macip_acl_rule_t; + +/* + * ACL + */ +typedef struct +{ + u8 tag[64]; + u32 count; + acl_rule_t *rules; +} acl_list_t; + +typedef struct +{ + u8 tag[64]; + u32 count; + macip_acl_rule_t *rules; + /* References to the classifier tables that will enforce the rules */ + u32 ip4_table_index; + u32 ip6_table_index; + u32 l2_table_index; +} macip_acl_list_t; + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + + acl_list_t *acls; /* Pool of ACLs */ + macip_acl_list_t *macip_acls; /* Pool of MAC-IP ACLs */ + + /* ACLs associated with interfaces */ + u32 **input_acl_vec_by_sw_if_index; + u32 **output_acl_vec_by_sw_if_index; + + /* + * Classify tables used to grab the packets for the ACL check, + * and serving as the 5-tuple session tables at the same time + */ + u32 *acl_ip4_input_classify_table_by_sw_if_index; + u32 *acl_ip6_input_classify_table_by_sw_if_index; + u32 *acl_ip4_output_classify_table_by_sw_if_index; + u32 *acl_ip6_output_classify_table_by_sw_if_index; + + /* MACIP (input) ACLs associated with the interfaces */ + u32 *macip_acl_by_sw_if_index; + + /* next indices for our nodes in the l2-classify tables */ + u32 l2_input_classify_next_acl; + u32 l2_output_classify_next_acl; + + /* next node indices for feature bitmap */ + u32 acl_in_node_input_next_node_index[32]; + /* the respective thing for the output feature */ + l2_output_next_nodes_st acl_out_output_next_nodes; + + /* ACL match actions (must be coherent across in/out ACLs to next indices (can differ) */ + + u32 acl_in_ip4_match_next[256]; + u32 acl_in_ip6_match_next[256]; + u32 acl_out_ip4_match_next[256]; + u32 acl_out_ip6_match_next[256]; + u32 n_match_actions; + + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + ethernet_main_t * ethernet_main; +} acl_main_t; + +extern acl_main_t acl_main; + + +#endif -- cgit 1.2.3-korg From 3b46cba8f4e909bc363403c6c92215159abb2f11 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 23 Jan 2017 21:13:45 +0100 Subject: Plugin infrastructure improvements This patch replaces requirement for vlib_plugin_register function in the plugin so file and introduces new macro: VLIB_PLUGIN_REGISTER () = { .version = "version string", .version_required = "requred version", .default_disabled = 1, .early_init = "early_init_function_name", }; Plugin will nor be loaded if .default_disabled is set to 1 unless explicitely enabled in startup.conf. If .verstion_required is set, plugin will not be loaded if there is version mismatch between plugin and vpp. This can be bypassed by setting "skip-version-check" for specific plugin. If .early-init string is present, plugin loader will try to resolve this specific symbol in the plugin namespace and make a function call. Following startup.conf configuration is added: plugins { path /path/to/plugin/directory plugin ila_plugin.so { enable skip-version-check } plugin acl_plugin.so { disable } } Change-Id: I706c691dd34d94ffe9e02b59831af8859a95f061 Signed-off-by: Damjan Marion --- src/plugins/acl/acl.c | 29 +-- src/plugins/acl/acl.h | 2 - src/plugins/acl/l2sess.c | 13 -- src/plugins/acl/l2sess.h | 1 - src/plugins/flowperpkt/flowperpkt.c | 32 +-- src/plugins/ila/ila.c | 14 +- src/plugins/ioam/encap/ip6_ioam_trace.c | 18 +- src/plugins/lb/lb.c | 15 +- src/plugins/sixrd/sixrd.c | 23 +-- src/plugins/snat/snat.c | 27 +-- src/plugins/snat/snat.h | 1 - src/vlib/unix/main.c | 8 + src/vlib/unix/plugin.c | 339 ++++++++++++++++++++++++++------ src/vlib/unix/plugin.h | 31 ++- src/vnet/plugin/plugin.h | 9 +- src/vpp/vnet/main.c | 15 +- 16 files changed, 367 insertions(+), 210 deletions(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index f4db2013..85c9113b 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -128,29 +129,11 @@ _(MACIP_ACL_INTERFACE_ADD_DEL, macip_acl_interface_add_del) \ _(MACIP_ACL_DUMP, macip_acl_dump) \ _(MACIP_ACL_INTERFACE_GET, macip_acl_interface_get) -/* - * This routine exists to convince the vlib plugin framework that - * we haven't accidentally copied a random .dll into the plugin directory. - * - * Also collects global variable pointers passed from the vpp engine - */ - -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, - int from_early_init) -{ - acl_main_t *am = &acl_main; - clib_error_t *error = 0; - - am->vlib_main = vm; - am->vnet_main = h->vnet_main; - am->ethernet_main = h->ethernet_main; - - l2sess_vlib_plugin_register(vm, h, from_early_init); - - return error; -} - +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ static void vl_api_acl_plugin_get_version_t_handler (vl_api_acl_plugin_get_version_t * mp) diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index afc9b289..62046788 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -17,7 +17,6 @@ #include #include -#include #include @@ -139,7 +138,6 @@ typedef struct { /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; - ethernet_main_t * ethernet_main; } acl_main_t; extern acl_main_t acl_main; diff --git a/src/plugins/acl/l2sess.c b/src/plugins/acl/l2sess.c index cc9bde44..b0385be1 100644 --- a/src/plugins/acl/l2sess.c +++ b/src/plugins/acl/l2sess.c @@ -30,19 +30,6 @@ #include #include -void -l2sess_vlib_plugin_register (vlib_main_t * vm, void* hh, - int from_early_init) -{ - l2sess_main_t *sm = &l2sess_main; - vnet_plugin_handoff_t * h = hh; - memset (sm, 0, sizeof (*sm)); - - sm->vlib_main = vm; - sm->vnet_main = h->vnet_main; - sm->ethernet_main = h->ethernet_main; -} - void l2sess_init_next_features_input (vlib_main_t * vm, l2sess_main_t * sm) { diff --git a/src/plugins/acl/l2sess.h b/src/plugins/acl/l2sess.h index db899917..888b5301 100644 --- a/src/plugins/acl/l2sess.h +++ b/src/plugins/acl/l2sess.h @@ -132,7 +132,6 @@ foreach_l2sess_node /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; - ethernet_main_t * ethernet_main; /* Counter(s) */ u64 counter_attempted_delete_free_session; diff --git a/src/plugins/flowperpkt/flowperpkt.c b/src/plugins/flowperpkt/flowperpkt.c index cc351599..6b292eef 100644 --- a/src/plugins/flowperpkt/flowperpkt.c +++ b/src/plugins/flowperpkt/flowperpkt.c @@ -24,6 +24,7 @@ */ #include +#include #include #include @@ -479,30 +480,11 @@ static void *vl_api_flowperpkt_tx_interface_add_del_t_print #define foreach_flowperpkt_plugin_api_msg \ _(FLOWPERPKT_TX_INTERFACE_ADD_DEL, flowperpkt_tx_interface_add_del) -/** - * @brief plugin-api required function - * @param vm vlib_main_t * vlib main data structure pointer - * @param h vlib_plugin_handoff_t * handoff structure - * @param from_early_init int notused - * - * Notes: - * This routine exists to convince the vlib plugin framework that - * we haven't accidentally copied a random .dll into the plugin directory. - * - * Also collects global variable pointers passed from the vpp engine - */ -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, - int from_early_init) -{ - flowperpkt_main_t *fm = &flowperpkt_main; - clib_error_t *error = 0; - - fm->vlib_main = vm; - fm->vnet_main = h->vnet_main; - - return error; -} +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ static clib_error_t * flowperpkt_tx_interface_add_del_feature_command_fn (vlib_main_t * vm, @@ -627,6 +609,8 @@ flowperpkt_init (vlib_main_t * vm) u32 num_threads; u8 *name; + fm->vnet_main = vnet_get_main (); + /* Construct the API name */ name = format (0, "flowperpkt_%08x%c", api_version, 0); diff --git a/src/plugins/ila/ila.c b/src/plugins/ila/ila.c index 336f4cf5..e0f3907f 100644 --- a/src/plugins/ila/ila.c +++ b/src/plugins/ila/ila.c @@ -18,6 +18,7 @@ #include #include #include +#include static ila_main_t ila_main; @@ -821,14 +822,11 @@ ila_interface (u32 sw_if_index, u8 disable) return 0; } -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, - int from_early_init) -{ - clib_error_t *error = 0; - - return error; -} +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ u8 *format_ila_dpo (u8 * s, va_list * va) { diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.c b/src/plugins/ioam/encap/ip6_ioam_trace.c index 3a6758cd..a836dfe8 100644 --- a/src/plugins/ioam/encap/ip6_ioam_trace.c +++ b/src/plugins/ioam/encap/ip6_ioam_trace.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -349,18 +350,11 @@ VLIB_CLI_COMMAND (ip6_show_ioam_trace_cmd, static) = { }; /* *INDENT-ON* */ -/* - * This routine exists to convince the vlib plugin framework that - * we haven't accidentally copied a random .dll into the plugin directory. - * - * Also collects global variable pointers passed from the vpp engine - */ -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, - int from_early_init) -{ - return 0; -} +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ static clib_error_t * ip6_hop_by_hop_ioam_trace_init (vlib_main_t * vm) diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c index 1d9b9870..dc3f5be1 100644 --- a/src/plugins/lb/lb.c +++ b/src/plugins/lb/lb.c @@ -15,6 +15,7 @@ #include #include +#include #include //GC runs at most once every so many seconds @@ -730,15 +731,11 @@ int lb_vip_del(u32 vip_index) return 0; } -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, - vnet_plugin_handoff_t * h, - int from_early_init) -{ - clib_error_t *error = 0; - return error; -} - +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ u8 *format_lb_dpo (u8 * s, va_list * va) { diff --git a/src/plugins/sixrd/sixrd.c b/src/plugins/sixrd/sixrd.c index 66e631a2..71fc181f 100644 --- a/src/plugins/sixrd/sixrd.c +++ b/src/plugins/sixrd/sixrd.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * This code supports the following sixrd modes: @@ -340,27 +341,19 @@ VLIB_CLI_COMMAND(show_sixrd_stats_command, static) = { .function = show_sixrd_stats_command_fn, }; -/* - * This routine exists to convince the vlib plugin framework that - * we haven't accidentally copied a random .dll into the plugin directory. - * - * Also collects global variable pointers passed from the vpp engine - */ -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, - int from_early_init) +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ + +static clib_error_t * sixrd_init (vlib_main_t * vm) { - clib_error_t * error = 0; sixrd_main_t *mm = &sixrd_main; mm->vnet_main = vnet_get_main(); mm->vlib_main = vm; - return error; -} - -static clib_error_t * sixrd_init (vlib_main_t * vm) -{ sixrd_dpo_module_init (); return (NULL); diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c index 0fcd6ce8..750cc925 100644 --- a/src/plugins/snat/snat.c +++ b/src/plugins/snat/snat.c @@ -28,6 +28,7 @@ #include #include #include +#include snat_main_t snat_main; @@ -129,27 +130,11 @@ VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = { .runs_before = VNET_FEATURES ("ip4-lookup"), }; - -/* - * This routine exists to convince the vlib plugin framework that - * we haven't accidentally copied a random .dll into the plugin directory. - * - * Also collects global variable pointers passed from the vpp engine - */ - -clib_error_t * -vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, - int from_early_init) -{ - snat_main_t * sm = &snat_main; - clib_error_t * error = 0; - - sm->vlib_main = vm; - sm->vnet_main = h->vnet_main; - sm->ethernet_main = h->ethernet_main; - - return error; -} +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, +}; +/* *INDENT-ON* */ /*$$$$$ move to an installed header file */ #if (1 || CLIB_DEBUG > 0) /* "trust, but verify" */ diff --git a/src/plugins/snat/snat.h b/src/plugins/snat/snat.h index 32dc9f9e..39cbd3f8 100644 --- a/src/plugins/snat/snat.h +++ b/src/plugins/snat/snat.h @@ -233,7 +233,6 @@ typedef struct { vnet_main_t * vnet_main; ip4_main_t * ip4_main; ip_lookup_main_t * ip4_lookup_main; - ethernet_main_t * ethernet_main; api_main_t * api_main; } snat_main_t; diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index 562778e0..a04d9f9c 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -503,6 +503,14 @@ vlib_unix_main (int argc, char *argv[]) vm->heap_base = clib_mem_get_heap (); ASSERT (vm->heap_base); + unformat_init_command_line (&input, (char **) vm->argv); + if ((e = vlib_plugin_config (vm, &input))) + { + clib_error_report (e); + return 1; + } + unformat_free (&input); + i = vlib_plugin_early_init (vm); if (i) return i; diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c index 40399090..4aba95c8 100644 --- a/src/vlib/unix/plugin.c +++ b/src/vlib/unix/plugin.c @@ -16,29 +16,16 @@ */ #include +#include #include #include plugin_main_t vlib_plugin_main; -void -vlib_set_get_handoff_structure_cb (void *cb) -{ - plugin_main_t *pm = &vlib_plugin_main; - pm->handoff_structure_get_cb = cb; -} - -static void * -vnet_get_handoff_structure (void) -{ - void *(*fp) (void); - - fp = vlib_plugin_main.handoff_structure_get_cb; - if (fp == 0) - return 0; - else - return (*fp) (); -} +char *vlib_plugin_path __attribute__ ((weak)); +char *vlib_plugin_path = ""; +char *vlib_plugin_app_version __attribute__ ((weak)); +char *vlib_plugin_app_version = ""; void * vlib_get_plugin_symbol (char *plugin_name, char *symbol_name) @@ -54,13 +41,99 @@ vlib_get_plugin_symbol (char *plugin_name, char *symbol_name) return dlsym (pi->handle, symbol_name); } +static char * +str_array_to_vec (char *array, int len) +{ + char c, *r = 0; + int n = 0; + + do + { + c = array[n]; + vec_add1 (r, c); + } + while (c && ++n < len); + + if (c) + vec_add1 (r, 0); + + return r; +} + static int load_one_plugin (plugin_main_t * pm, plugin_info_t * pi, int from_early_init) { - void *handle, *register_handle; - clib_error_t *(*fp) (vlib_main_t *, void *, int); + void *handle; clib_error_t *error; - void *handoff_structure; + elf_main_t em = { 0 }; + elf_section_t *section; + u8 *data; + char *version_required; + vlib_plugin_registration_t *reg; + plugin_config_t *pc = 0; + uword *p; + + if (elf_read_file (&em, (char *) pi->filename)) + return -1; + + error = elf_get_section_by_name (&em, ".vlib_plugin_registration", + §ion); + if (error) + { + clib_warning ("Not a plugin: %s\n", (char *) pi->name); + return -1; + } + + data = elf_get_section_contents (&em, section->index, 1); + reg = (vlib_plugin_registration_t *) data; + + if (vec_len (data) != sizeof (*reg)) + { + clib_warning ("vlib_plugin_registration size mismatch in plugin %s\n", + (char *) pi->name); + goto error; + } + + p = hash_get_mem (pm->config_index_by_name, pi->name); + if (p) + { + pc = vec_elt_at_index (pm->configs, p[0]); + if (pc->is_disabled) + { + clib_warning ("Plugin disabled: %s", pi->name); + goto error; + } + if (reg->default_disabled && pc->is_enabled == 0) + { + clib_warning ("Plugin disabled: %s (default)", pi->name); + goto error; + } + } + else if (reg->default_disabled) + { + clib_warning ("Plugin disabled: %s (default)", pi->name); + goto error; + } + + version_required = str_array_to_vec ((char *) ®->version_required, + sizeof (reg->version_required)); + + if ((strlen (version_required) > 0) && + (strncmp (vlib_plugin_app_version, version_required, + strlen (version_required)))) + { + clib_warning ("Plugin %s version mismatch: %s != %s", + pi->name, vlib_plugin_app_version, reg->version_required); + if (!(pc && pc->skip_version_check == 1)) + { + vec_free (version_required); + goto error; + } + } + + vec_free (version_required); + vec_free (data); + elf_main_free (&em); handle = dlopen ((char *) pi->filename, RTLD_LAZY); @@ -77,35 +150,47 @@ load_one_plugin (plugin_main_t * pm, plugin_info_t * pi, int from_early_init) pi->handle = handle; + reg = dlsym (pi->handle, "vlib_plugin_registration"); - register_handle = dlsym (pi->handle, "vlib_plugin_register"); - if (register_handle == 0) + if (reg == 0) { - dlclose (handle); - clib_warning ("Plugin missing vlib_plugin_register: %s\n", - (char *) pi->name); - return 1; + /* This should never happen unless somebody chagnes registration macro */ + clib_warning ("Missing plugin registration in plugin '%s'", pi->name); + os_exit (1); } - fp = register_handle; - - handoff_structure = vnet_get_handoff_structure (); + pi->reg = reg; + pi->version = str_array_to_vec ((char *) ®->version, + sizeof (reg->version)); - if (handoff_structure == 0) - error = clib_error_return (0, "handoff structure callback returned 0"); - else - error = (*fp) (pm->vlib_main, handoff_structure, from_early_init); - - if (error) + if (reg && reg->early_init) { - clib_error_report (error); - dlclose (handle); - return 1; + clib_error_t *(*ei) (vlib_main_t *); + void *h; + + h = dlsym (pi->handle, reg->early_init); + if (h) + { + ei = h; + error = (*ei) (pm->vlib_main); + if (error) + { + clib_error_report (error); + os_exit (1); + } + } + else + clib_warning ("Plugin %s: early init function %s set but not found", + (char *) pi->name, reg->early_init); } clib_warning ("Loaded plugin: %s", pi->name); return 0; +error: + vec_free (data); + elf_main_free (&em); + return -1; } static u8 ** @@ -215,23 +300,16 @@ vlib_load_new_plugins (plugin_main_t * pm, int from_early_init) return 0; } -char *vlib_plugin_path __attribute__ ((weak)); -char *vlib_plugin_path = ""; -char *vlib_plugin_name_filter __attribute__ ((weak)); -char *vlib_plugin_name_filter = 0; - int vlib_plugin_early_init (vlib_main_t * vm) { plugin_main_t *pm = &vlib_plugin_main; - pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0); + if (pm->plugin_path == 0) + pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0); clib_warning ("plugin path %s", pm->plugin_path); - if (vlib_plugin_name_filter) - pm->plugin_name_filter = format (0, "%s%c", vlib_plugin_name_filter, 0); - pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword)); pm->vlib_main = vm; @@ -245,19 +323,24 @@ vlib_plugins_show_cmd_fn (vlib_main_t * vm, plugin_main_t *pm = &vlib_plugin_main; u8 *s = 0; u8 *key = 0; - uword *value = 0; + uword value = 0; int index = 1; + plugin_info_t *pi; - s = format (s, " Plugin path is: %s\n", pm->plugin_path); - if (vlib_plugin_name_filter) - s = format (s, " Plugin filter: %s\n", vlib_plugin_name_filter); + s = format (s, " Plugin path is: %s\n\n", pm->plugin_path); + s = format (s, " %-41s%s\n", "Plugin", "Version"); - s = format (s, " Plugins loaded: \n"); + /* *INDENT-OFF* */ hash_foreach_mem (key, value, pm->plugin_by_name_hash, - { - if (key != 0) - s = format (s, " %d.%s\n", index, key); index++;} - ); + { + if (key != 0) + { + pi = vec_elt_at_index (pm->plugin_info, value); + s = format (s, "%3d. %-40s %s\n", index, key, pi->version); + index++; + } + }); + /* *INDENT-ON* */ vlib_cli_output (vm, "%v", s); vec_free (s); @@ -273,6 +356,148 @@ VLIB_CLI_COMMAND (plugins_show_cmd, static) = }; /* *INDENT-ON* */ +static clib_error_t * +config_one_plugin (vlib_main_t * vm, char *name, unformat_input_t * input) +{ + plugin_main_t *pm = &vlib_plugin_main; + plugin_config_t *pc; + clib_error_t *error = 0; + uword *p; + int is_enable = 0; + int is_disable = 0; + int skip_version_check = 0; + + if (pm->config_index_by_name == 0) + pm->config_index_by_name = hash_create_string (0, sizeof (uword)); + + p = hash_get_mem (pm->config_index_by_name, name); + + if (p) + { + error = clib_error_return (0, "plugin '%s' already configured", name); + goto done; + } + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "enable")) + is_enable = 1; + else if (unformat (input, "disable")) + is_disable = 1; + else if (unformat (input, "skip-version-check")) + skip_version_check = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + goto done; + } + } + + if (is_enable && is_disable) + { + error = clib_error_return (0, "please specify either enable or disable" + " for plugin '%s'", name); + goto done; + } + + vec_add2 (pm->configs, pc, 1); + hash_set_mem (pm->config_index_by_name, name, pc - pm->configs); + pc->is_enabled = is_enable; + pc->is_disabled = is_disable; + pc->skip_version_check = skip_version_check; + pc->name = name; + +done: + return error; +} + +clib_error_t * +vlib_plugin_config (vlib_main_t * vm, unformat_input_t * input) +{ + plugin_main_t *pm = &vlib_plugin_main; + clib_error_t *error = 0; + unformat_input_t in; + + unformat_init (&in, 0, 0); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + u8 *s, *v; + if (unformat (input, "%s %v", &s, &v)) + { + if (strncmp ((const char *) s, "plugins", 8) == 0) + { + if (vec_len (in.buffer) > 0) + vec_add1 (in.buffer, ' '); + vec_add (in.buffer, v, vec_len (v)); + } + } + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + goto done; + } + + vec_free (v); + vec_free (s); + } +done: + input = ∈ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + unformat_input_t sub_input; + u8 *s = 0; + if (unformat (input, "path %s", &s)) + pm->plugin_path = s; + else if (unformat (input, "plugin %s %U", &s, + unformat_vlib_cli_sub_input, &sub_input)) + { + error = config_one_plugin (vm, (char *) s, &sub_input); + unformat_free (&sub_input); + if (error) + goto done2; + } + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + { + vec_free (s); + goto done2; + } + } + } + +done2: + unformat_free (&in); + return error; +} + +/* discard whole 'plugins' section, as it is already consumed prior to + plugin load */ +static clib_error_t * +plugins_config (vlib_main_t * vm, unformat_input_t * input) +{ + u8 *junk; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%s", &junk)) + { + vec_free (junk); + return 0; + } + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + return 0; +} + +VLIB_CONFIG_FUNCTION (plugins_config, "plugins"); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vlib/unix/plugin.h b/src/vlib/unix/plugin.h index 1c74cdd2..01fec356 100644 --- a/src/vlib/unix/plugin.h +++ b/src/vlib/unix/plugin.h @@ -56,7 +56,14 @@ * vlib_load_new_plugins(). */ - +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct { + u8 default_disabled; + const char version[32]; + const char version_required[32]; + const char *early_init; +}) vlib_plugin_registration_t; +/* *INDENT-ON* */ typedef struct { @@ -64,8 +71,20 @@ typedef struct u8 *filename; struct stat file_info; void *handle; + + /* plugin registration */ + vlib_plugin_registration_t *reg; + char *version; } plugin_info_t; +typedef struct +{ + char *name; + u8 is_disabled; + u8 is_enabled; + u8 skip_version_check; +} plugin_config_t; + typedef struct { /* loaded plugin info */ @@ -76,8 +95,9 @@ typedef struct u8 *plugin_path; u8 *plugin_name_filter; - /* handoff structure get callback */ - void *handoff_structure_get_cb; + /* plugin configs and hash by name */ + plugin_config_t *configs; + uword *config_index_by_name; /* usual */ vlib_main_t *vlib_main; @@ -85,10 +105,15 @@ typedef struct extern plugin_main_t vlib_plugin_main; +clib_error_t *vlib_plugin_config (vlib_main_t * vm, unformat_input_t * input); int vlib_plugin_early_init (vlib_main_t * vm); int vlib_load_new_plugins (plugin_main_t * pm, int from_early_init); void *vlib_get_plugin_symbol (char *plugin_name, char *symbol_name); +#define VLIB_PLUGIN_REGISTER() \ + vlib_plugin_registration_t vlib_plugin_registration \ + __attribute__((__section__(".vlib_plugin_registration"))) + #endif /* __included_plugin_h__ */ /* diff --git a/src/vnet/plugin/plugin.h b/src/vnet/plugin/plugin.h index a14a5932..6e1a3264 100644 --- a/src/vnet/plugin/plugin.h +++ b/src/vnet/plugin/plugin.h @@ -20,13 +20,6 @@ #include #include #include - -/* Pointers to Genuine Vnet data structures handed to plugin .dll's */ -typedef struct { - vnet_main_t * vnet_main; - ethernet_main_t * ethernet_main; -} vnet_plugin_handoff_t; - -void * vnet_get_handoff_structure (void); +#include #endif /* included_vnet_plugin_h */ diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index a252b846..4a96ca94 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -18,7 +18,7 @@ #include #include #include - +#include #include @@ -39,16 +39,7 @@ vpe_main_init (vlib_main_t * vm) * Load plugins from /usr/lib/vpp_plugins by default */ char *vlib_plugin_path = "/usr/lib/vpp_plugins"; - -void * -vnet_get_handoff_structure (void) -{ - static vnet_plugin_handoff_t _rv, *rv = &_rv; - - rv->vnet_main = vnet_get_main (); - rv->ethernet_main = ðernet_main; - return (void *) rv; -} +char *vlib_plugin_app_version = VPP_BUILD_VER; int main (int argc, char *argv[]) @@ -59,7 +50,6 @@ main (int argc, char *argv[]) uword main_heap_size = (1ULL << 30); u8 *sizep; u32 size; - void vlib_set_get_handoff_structure_cb (void *cb); #if __x86_64__ CLIB_UNUSED (const char *msg) @@ -206,7 +196,6 @@ defaulted: #if DPDK == 0 unix_physmem_init (vm, 0 /* fail_if_physical_memory_not_present */ ); #endif - vlib_set_get_handoff_structure_cb (&vnet_get_handoff_structure); return vlib_unix_main (argc, argv); } else -- cgit 1.2.3-korg From cc40565b6eb3136d42fd67b57e4f0e01a5970a72 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Fri, 3 Mar 2017 13:11:30 +0000 Subject: VPP-651: Ensure sw_if_index to node mapping for L2 output path is only done via l2output_main.next_nodes Before this commit, several output features that happen to be the last in the list of features to be executed, send the packets directly to -output. To do this, they use l2_output_dispatch, which builds a list of sw_if_index to next index mappings. When interfaces are deleted and the new interfaces are created, these mappings become stale, and cause the packets being sent to wrong interface output nodes. This patch (thanks John Lo for the brilliant idea!) adds a feature node "output", whose sole purpose is dispatching the packets to the correct interface output nodes. To do that, it uses the l2output_main.next_nodes, which is already taken care of for the case of the sw_if_index reuse, so this makes the dependent features all work correctly. Since this changes the packet path, for the features that were always the last ones it has triggered a side problem of the output feat_next_node_index not being properly initalized. These two users are l2-output-classify node and the output nodes belonging to the acl-plugin. For the first one the less invasive fix is just to initialize that field. For the acl-plugin nodes, rewrite the affected part of the code to use feat_bitmap_get_next_node_index since this is essentially what the conditional in l2_output_dispatch does, and fix the compiler warnings generated. This fix was first made in stable/1701 under commit e7dcee4027854b0ad076101471afdfff67eb9011. Change-Id: I32e876ab1e1d498cf0854c19c6318dcf59a93805 Signed-off-by: Andrew Yourtchenko --- src/plugins/acl/acl.c | 6 +++- src/plugins/acl/acl.h | 5 ++- src/plugins/acl/l2sess.c | 16 ++++++--- src/plugins/acl/l2sess.h | 3 +- src/plugins/acl/l2sess_node.c | 73 ++++++---------------------------------- src/plugins/acl/node_in.c | 2 +- src/plugins/acl/node_out.c | 16 +++------ src/vnet/l2/l2_input.c | 3 +- src/vnet/l2/l2_output.h | 15 ++++++++- src/vnet/l2/l2_output_classify.c | 2 +- 10 files changed, 53 insertions(+), 88 deletions(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 85c9113b..3fe084b4 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -1883,7 +1883,11 @@ acl_setup_nodes (void) feat_bitmap_init_next_nodes (vm, acl_in_node.index, L2INPUT_N_FEAT, l2input_get_feat_names (), - am->acl_in_node_input_next_node_index); + am->acl_in_node_feat_next_node_index); + + feat_bitmap_init_next_nodes (vm, acl_out_node.index, L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + am->acl_out_node_feat_next_node_index); memset (&am->acl_in_ip4_match_next[0], 0, sizeof (am->acl_in_ip4_match_next)); diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index 62046788..0252ff38 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -122,9 +122,8 @@ typedef struct { u32 l2_output_classify_next_acl; /* next node indices for feature bitmap */ - u32 acl_in_node_input_next_node_index[32]; - /* the respective thing for the output feature */ - l2_output_next_nodes_st acl_out_output_next_nodes; + u32 acl_in_node_feat_next_node_index[32]; + u32 acl_out_node_feat_next_node_index[32]; /* ACL match actions (must be coherent across in/out ACLs to next indices (can differ) */ diff --git a/src/plugins/acl/l2sess.c b/src/plugins/acl/l2sess.c index b0385be1..7a1567fb 100644 --- a/src/plugins/acl/l2sess.c +++ b/src/plugins/acl/l2sess.c @@ -31,10 +31,18 @@ #include void -l2sess_init_next_features_input (vlib_main_t * vm, l2sess_main_t * sm) +l2sess_init_next_features (vlib_main_t * vm, l2sess_main_t * sm) { -#define _(node_name, node_var, is_out, is_ip6, is_track) \ - if (!is_out) feat_bitmap_init_next_nodes(vm, node_var.index, L2INPUT_N_FEAT, l2input_get_feat_names (), sm->node_var ## _input_next_node_index); +#define _(node_name, node_var, is_out, is_ip6, is_track) \ + if (is_out) \ + feat_bitmap_init_next_nodes(vm, node_var.index, L2OUTPUT_N_FEAT, \ + l2output_get_feat_names (), \ + sm->node_var ## _feat_next_node_index); \ + else \ + feat_bitmap_init_next_nodes(vm, node_var.index, L2INPUT_N_FEAT, \ + l2input_get_feat_names (), \ + sm->node_var ## _feat_next_node_index); + foreach_l2sess_node #undef _ } @@ -62,7 +70,7 @@ l2sess_setup_nodes (void) vlib_main_t *vm = vlib_get_main (); l2sess_main_t *sm = &l2sess_main; - l2sess_init_next_features_input (vm, sm); + l2sess_init_next_features (vm, sm); l2sess_add_our_next_nodes (vm, sm, (u8 *) "l2-input-classify", 0); l2sess_add_our_next_nodes (vm, sm, (u8 *) "l2-output-classify", 1); diff --git a/src/plugins/acl/l2sess.h b/src/plugins/acl/l2sess.h index 888b5301..961c08c8 100644 --- a/src/plugins/acl/l2sess.h +++ b/src/plugins/acl/l2sess.h @@ -95,8 +95,7 @@ typedef struct { * on whether the node is an input or output one. */ #define _(node_name, node_var, is_out, is_ip6, is_track) \ - u32 node_var ## _input_next_node_index[32]; \ - l2_output_next_nodes_st node_var ## _next_nodes; + u32 node_var ## _feat_next_node_index[32]; foreach_l2sess_node #undef _ l2_output_next_nodes_st output_next_nodes; diff --git a/src/plugins/acl/l2sess_node.c b/src/plugins/acl/l2sess_node.c index 520e5929..689d216d 100644 --- a/src/plugins/acl/l2sess_node.c +++ b/src/plugins/acl/l2sess_node.c @@ -526,13 +526,13 @@ check_idle_sessions (l2sess_main_t * sm, u32 sw_if_index, u64 now) static uword l2sess_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) + vlib_node_runtime_t * node, vlib_frame_t * frame, + int node_is_out, int node_is_ip6, int node_is_track, + u32 *feat_next_node_index) { u32 n_left_from, *from, *to_next; l2sess_next_t next_index; u32 pkts_swapped = 0; - u32 cached_sw_if_index = (u32) ~ 0; - u32 cached_next_index = (u32) ~ 0; u32 feature_bitmap0; u32 trace_flags0; @@ -570,45 +570,19 @@ l2sess_node_fn (vlib_main_t * vm, //en0 = vlib_buffer_get_current (b0); /* - * The non-boilerplate is in the block below. - * Note first a magic macro block that sets up the behavior qualifiers: * node_is_out : 1 = is output, 0 = is input * node_is_ip6 : 1 = is ip6, 0 = is ip4 * node_is_track : 1 = is a state tracking node, 0 - is a session addition node * - * Subsequently the code adjusts its behavior depending on these variables. - * It's most probably not great performance wise but much easier to work with. - * + * The below code adjust the behavior according to these parameters. */ { - int node_is_out = -1; - CLIB_UNUSED (int node_is_ip6) = -1; - CLIB_UNUSED (int node_is_track) = -1; - u32 node_index = 0; u32 session_tables[2] = { ~0, ~0 }; u32 session_nexts[2] = { ~0, ~0 }; - l2_output_next_nodes_st *next_nodes = 0; - u32 *input_feat_next_node_index; u8 l4_proto; u64 now = clib_cpu_time_now (); -/* - * Set the variables according to which of the 8 nodes we are. - * Hopefully the compiler is smart enough to eliminate the extraneous. - */ -#define _(node_name, node_var, is_out, is_ip6, is_track) \ -if(node_var.index == node->node_index) \ - { \ - node_is_out = is_out; \ - node_is_ip6 = is_ip6; \ - node_is_track = is_track; \ - node_index = node_var.index; \ - next_nodes = &sm->node_var ## _next_nodes; \ - input_feat_next_node_index = sm->node_var ## _input_next_node_index; \ - } - foreach_l2sess_node -#undef _ - trace_flags0 = 0; + trace_flags0 = 0; if (node_is_out) { sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; @@ -715,38 +689,8 @@ if(node_var.index == node->node_index) \ check_idle_sessions (sm, sw_if_index0, now); } - if (node_is_out) - { - if (feature_bitmap0) - { - trace_flags0 |= 0x10; - } - if (sw_if_index0 == cached_sw_if_index) - { - trace_flags0 |= 0x20; - } - l2_output_dispatch (sm->vlib_main, - sm->vnet_main, - node, - node_index, - &cached_sw_if_index, - &cached_next_index, - next_nodes, - b0, sw_if_index0, feature_bitmap0, - &next0); - trace_flags0 |= 2; - - } - else - { - next0 = - feat_bitmap_get_next_node_index (input_feat_next_node_index, + next0 = feat_bitmap_get_next_node_index (feat_next_node_index, feature_bitmap0); - trace_flags0 |= 4; - - } - - if (next0 >= node->n_next_nodes) { @@ -795,7 +739,10 @@ node_var ## node_fn (vlib_main_t * vm, \ vlib_node_runtime_t * node, \ vlib_frame_t * frame) \ { \ - return l2sess_node_fn(vm, node, frame); \ + l2sess_main_t *sm = &l2sess_main; \ + return l2sess_node_fn(vm, node, frame, \ + is_out, is_ip6, is_track, \ + sm->node_var ## _feat_next_node_index); \ } \ VLIB_REGISTER_NODE (node_var) = { \ .function = node_var ## node_fn, \ diff --git a/src/plugins/acl/node_in.c b/src/plugins/acl/node_in.c index 2a5199a9..95802df5 100644 --- a/src/plugins/acl/node_in.c +++ b/src/plugins/acl/node_in.c @@ -73,7 +73,7 @@ acl_in_node_fn (vlib_main_t * vm, u32 feature_bitmap0; u32 trace_bitmap = 0; u32 *input_feat_next_node_index = - acl_main.acl_in_node_input_next_node_index; + acl_main.acl_in_node_feat_next_node_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; diff --git a/src/plugins/acl/node_out.c b/src/plugins/acl/node_out.c index 50af3679..cbec3b9a 100644 --- a/src/plugins/acl/node_out.c +++ b/src/plugins/acl/node_out.c @@ -68,13 +68,12 @@ acl_out_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { acl_main_t *am = &acl_main; - l2_output_next_nodes_st *next_nodes = &am->acl_out_output_next_nodes; + u32 *output_feat_next_node_index = + am->acl_out_node_feat_next_node_index; u32 n_left_from, *from, *to_next; acl_out_next_t next_index; u32 pkts_acl_checked = 0; u32 feature_bitmap0; - u32 cached_sw_if_index = (u32) ~ 0; - u32 cached_next_index = (u32) ~ 0; u32 match_acl_index = ~0; u32 match_rule_index = ~0; u32 trace_bitmap = 0; @@ -119,14 +118,9 @@ acl_out_node_fn (vlib_main_t * vm, } if (next0 == ~0) { - l2_output_dispatch (vm, - am->vnet_main, - node, - acl_out_node.index, - &cached_sw_if_index, - &cached_next_index, - next_nodes, - b0, sw_if_index0, feature_bitmap0, &next0); + next0 = + feat_bitmap_get_next_node_index (output_feat_next_node_index, + feature_bitmap0); } diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index fbd75f22..3aa5331b 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -703,10 +703,11 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ shg = 0; /* not used in xconnect */ } - /* set up split-horizon group */ + /* set up split-horizon group and set output feature bit */ config->shg = shg; out_config = l2output_intf_config (sw_if_index); out_config->shg = shg; + out_config->feature_bitmap |= L2OUTPUT_FEAT_OUTPUT; /* * Test: remove this when non-IP features can be configured. diff --git a/src/vnet/l2/l2_output.h b/src/vnet/l2/l2_output.h index c683b1ad..9597205c 100644 --- a/src/vnet/l2/l2_output.h +++ b/src/vnet/l2/l2_output.h @@ -94,6 +94,7 @@ l2output_main_t l2output_main; /* Mappings from feature ID to graph node name */ #define foreach_l2output_feat \ + _(OUTPUT, "interface-output") \ _(SPAN, "feature-bitmap-drop") \ _(CFM, "feature-bitmap-drop") \ _(QOS, "feature-bitmap-drop") \ @@ -217,9 +218,21 @@ l2_output_dispatch (vlib_main_t * vlib_main, vlib_buffer_t * b0, u32 sw_if_index, u32 feature_bitmap, u32 * next0) { - if (feature_bitmap) + /* + * The output feature bitmap always have at least the output feature bit set + * for a normal L2 interface (or all 0's if the interface is changed from L2 + * to L3 mode). So if next_nodes specified is that from the l2-output node and + * the bitmap is all clear except output feature bit, we know there is no more + * feature and will fall through to output packet. If next_nodes is from a L2 + * output feature node (and not l2-output), we always want to get the node for + * the next L2 output feature, including the last feature being interface- + * output node to output packet. + */ + if ((next_nodes != &l2output_main.next_nodes) + || ((feature_bitmap & ~L2OUTPUT_FEAT_OUTPUT) != 0)) { /* There are some features to execute */ + ASSERT (feature_bitmap != 0); /* Save bitmap for the next feature graph nodes */ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap; diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c index 27d5eb39..832be1a1 100644 --- a/src/vnet/l2/l2_output_classify.c +++ b/src/vnet/l2/l2_output_classify.c @@ -493,7 +493,7 @@ l2_output_classify_init (vlib_main_t * vm) l2_output_classify_node.index, L2OUTPUT_N_FEAT, l2output_get_feat_names (), - cm->feat_next_node_index); + cm->next_nodes.feat_next_node_index); rt->l2cm = cm; rt->vcm = cm->vnet_classify_main; -- cgit 1.2.3-korg From d2a59bed1e6b368a46608fd8ff631b770af8805f Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Tue, 21 Mar 2017 10:31:55 +0100 Subject: ACL plugin 1.2 L3 path support, L2+L3 unified processing node, skip IPv6 EH support. Change-Id: Iac37a466ba1c035e5c2997b03c0743bfec5c9a08 Signed-off-by: Andrew Yourtchenko --- src/plugins/acl.am | 1 + src/plugins/acl/acl.c | 351 +++++++++- src/plugins/acl/acl.h | 122 +++- src/plugins/acl/bihash_40_8.h | 89 +++ src/plugins/acl/fa_node.c | 1444 +++++++++++++++++++++++++++++++++++++++++ src/plugins/acl/fa_node.h | 99 +++ test/test_acl_plugin_l2l3.py | 722 +++++++++++++++++++++ 7 files changed, 2809 insertions(+), 19 deletions(-) create mode 100644 src/plugins/acl/bihash_40_8.h create mode 100644 src/plugins/acl/fa_node.c create mode 100644 src/plugins/acl/fa_node.h create mode 100644 test/test_acl_plugin_l2l3.py (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl.am b/src/plugins/acl.am index efed31c2..03328f6d 100644 --- a/src/plugins/acl.am +++ b/src/plugins/acl.am @@ -18,6 +18,7 @@ acl_plugin_la_SOURCES = \ acl/acl.c \ acl/node_in.c \ acl/node_out.c \ + acl/fa_node.c \ acl/l2sess.c \ acl/l2sess_node.c \ acl/l2sess.h \ diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 0d06531d..476fbc33 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -54,6 +54,7 @@ #include "node_in.h" #include "node_out.h" +#include "fa_node.h" acl_main_t acl_main; @@ -382,7 +383,7 @@ acl_unhook_l2_input_classify (acl_main_t * am, u32 sw_if_index) am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] = ~0; acl_classify_add_del_table_big (cm, ip4_5tuple_mask, sizeof (ip4_5tuple_mask) - 1, ~0, - am->l2_input_classify_next_acl, + am->l2_input_classify_next_acl_ip4, &ip4_table_index, 0); } if (am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] != ~0) @@ -392,7 +393,7 @@ acl_unhook_l2_input_classify (acl_main_t * am, u32 sw_if_index) am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] = ~0; acl_classify_add_del_table_big (cm, ip6_5tuple_mask, sizeof (ip6_5tuple_mask) - 1, ~0, - am->l2_input_classify_next_acl, + am->l2_input_classify_next_acl_ip6, &ip6_table_index, 0); } @@ -420,7 +421,7 @@ acl_unhook_l2_output_classify (acl_main_t * am, u32 sw_if_index) am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] = ~0; acl_classify_add_del_table_big (cm, ip4_5tuple_mask, sizeof (ip4_5tuple_mask) - 1, ~0, - am->l2_output_classify_next_acl, + am->l2_output_classify_next_acl_ip4, &ip4_table_index, 0); } if (am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] != ~0) @@ -430,7 +431,7 @@ acl_unhook_l2_output_classify (acl_main_t * am, u32 sw_if_index) am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] = ~0; acl_classify_add_del_table_big (cm, ip6_5tuple_mask, sizeof (ip6_5tuple_mask) - 1, ~0, - am->l2_output_classify_next_acl, + am->l2_output_classify_next_acl_ip6, &ip6_table_index, 0); } @@ -450,20 +451,20 @@ acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index) rv = acl_classify_add_del_table_big (cm, ip4_5tuple_mask, sizeof (ip4_5tuple_mask) - 1, ~0, - am->l2_input_classify_next_acl, + am->l2_input_classify_next_acl_ip4, &ip4_table_index, 1); if (rv) return rv; rv = acl_classify_add_del_table_big (cm, ip6_5tuple_mask, sizeof (ip6_5tuple_mask) - 1, ~0, - am->l2_input_classify_next_acl, + am->l2_input_classify_next_acl_ip6, &ip6_table_index, 1); if (rv) { acl_classify_add_del_table_big (cm, ip4_5tuple_mask, sizeof (ip4_5tuple_mask) - 1, ~0, - am->l2_input_classify_next_acl, + am->l2_input_classify_next_acl_ip4, &ip4_table_index, 0); return rv; } @@ -477,11 +478,11 @@ acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index) { acl_classify_add_del_table_big (cm, ip6_5tuple_mask, sizeof (ip6_5tuple_mask) - 1, ~0, - am->l2_input_classify_next_acl, + am->l2_input_classify_next_acl_ip6, &ip6_table_index, 0); acl_classify_add_del_table_big (cm, ip4_5tuple_mask, sizeof (ip4_5tuple_mask) - 1, ~0, - am->l2_input_classify_next_acl, + am->l2_input_classify_next_acl_ip4, &ip4_table_index, 0); return rv; } @@ -508,20 +509,20 @@ acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) rv = acl_classify_add_del_table_big (cm, ip4_5tuple_mask, sizeof (ip4_5tuple_mask) - 1, ~0, - am->l2_output_classify_next_acl, + am->l2_output_classify_next_acl_ip4, &ip4_table_index, 1); if (rv) return rv; rv = acl_classify_add_del_table_big (cm, ip6_5tuple_mask, sizeof (ip6_5tuple_mask) - 1, ~0, - am->l2_output_classify_next_acl, + am->l2_output_classify_next_acl_ip6, &ip6_table_index, 1); if (rv) { acl_classify_add_del_table_big (cm, ip4_5tuple_mask, sizeof (ip4_5tuple_mask) - 1, ~0, - am->l2_output_classify_next_acl, + am->l2_output_classify_next_acl_ip4, &ip4_table_index, 0); return rv; } @@ -535,11 +536,11 @@ acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) { acl_classify_add_del_table_big (cm, ip6_5tuple_mask, sizeof (ip6_5tuple_mask) - 1, ~0, - am->l2_output_classify_next_acl, + am->l2_output_classify_next_acl_ip6, &ip6_table_index, 0); acl_classify_add_del_table_big (cm, ip4_5tuple_mask, sizeof (ip4_5tuple_mask) - 1, ~0, - am->l2_output_classify_next_acl, + am->l2_output_classify_next_acl_ip4, &ip4_table_index, 0); return rv; } @@ -554,6 +555,7 @@ acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) } + int acl_interface_in_enable_disable (acl_main_t * am, u32 sw_if_index, int enable_disable) @@ -565,6 +567,8 @@ acl_interface_in_enable_disable (acl_main_t * am, u32 sw_if_index, sw_if_index)) return VNET_API_ERROR_INVALID_SW_IF_INDEX; + acl_fa_enable_disable(sw_if_index, 1, enable_disable); + if (enable_disable) { rv = acl_hook_l2_input_classify (am, sw_if_index); @@ -588,6 +592,8 @@ acl_interface_out_enable_disable (acl_main_t * am, u32 sw_if_index, sw_if_index)) return VNET_API_ERROR_INVALID_SW_IF_INDEX; + acl_fa_enable_disable(sw_if_index, 0, enable_disable); + if (enable_disable) { rv = acl_hook_l2_output_classify (am, sw_if_index); @@ -1820,10 +1826,10 @@ acl_setup_nodes (void) vlib_node_t *n; n = vlib_get_node_by_name (vm, (u8 *) "l2-input-classify"); - am->l2_input_classify_next_acl = + am->l2_input_classify_next_acl_old = vlib_node_add_next_with_slot (vm, n->index, acl_in_node.index, ~0); n = vlib_get_node_by_name (vm, (u8 *) "l2-output-classify"); - am->l2_output_classify_next_acl = + am->l2_output_classify_next_acl_old = vlib_node_add_next_with_slot (vm, n->index, acl_out_node.index, ~0); feat_bitmap_init_next_nodes (vm, acl_in_node.index, L2INPUT_N_FEAT, @@ -1844,11 +1850,299 @@ acl_setup_nodes (void) sizeof (am->acl_out_ip6_match_next)); am->n_match_actions = 0; + am->l2_input_classify_next_acl_ip4 = am->l2_input_classify_next_acl_old; + am->l2_input_classify_next_acl_ip6 = am->l2_input_classify_next_acl_old; + am->l2_output_classify_next_acl_ip4 = am->l2_output_classify_next_acl_old; + am->l2_output_classify_next_acl_ip6 = am->l2_output_classify_next_acl_old; + register_match_action_nexts (0, 0, 0, 0); /* drop */ register_match_action_nexts (~0, ~0, ~0, ~0); /* permit */ register_match_action_nexts (ACL_IN_L2S_INPUT_IP4_ADD, ACL_IN_L2S_INPUT_IP6_ADD, ACL_OUT_L2S_OUTPUT_IP4_ADD, ACL_OUT_L2S_OUTPUT_IP6_ADD); /* permit + create session */ } +void +acl_setup_fa_nodes (void) +{ + vlib_main_t *vm = vlib_get_main (); + acl_main_t *am = &acl_main; + vlib_node_t *n, *n4, *n6; + + n = vlib_get_node_by_name (vm, (u8 *) "l2-input-classify"); + n4 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-in-ip4-l2"); + n6 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-in-ip6-l2"); + + + am->fa_l2_input_classify_next_acl_ip4 = + vlib_node_add_next_with_slot (vm, n->index, n4->index, ~0); + am->fa_l2_input_classify_next_acl_ip6 = + vlib_node_add_next_with_slot (vm, n->index, n6->index, ~0); + + feat_bitmap_init_next_nodes (vm, n4->index, L2INPUT_N_FEAT, + l2input_get_feat_names (), + am->fa_acl_in_ip4_l2_node_feat_next_node_index); + + feat_bitmap_init_next_nodes (vm, n6->index, L2INPUT_N_FEAT, + l2input_get_feat_names (), + am->fa_acl_in_ip6_l2_node_feat_next_node_index); + + + n = vlib_get_node_by_name (vm, (u8 *) "l2-output-classify"); + n4 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-out-ip4-l2"); + n6 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-out-ip6-l2"); + + am->fa_l2_output_classify_next_acl_ip4 = + vlib_node_add_next_with_slot (vm, n->index, n4->index, ~0); + am->fa_l2_output_classify_next_acl_ip6 = + vlib_node_add_next_with_slot (vm, n->index, n6->index, ~0); + + feat_bitmap_init_next_nodes (vm, n4->index, L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + am->fa_acl_out_ip4_l2_node_feat_next_node_index); + + feat_bitmap_init_next_nodes (vm, n6->index, L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + am->fa_acl_out_ip6_l2_node_feat_next_node_index); + + am->l2_input_classify_next_acl_ip4 = am->fa_l2_input_classify_next_acl_ip4; + am->l2_input_classify_next_acl_ip6 = am->fa_l2_input_classify_next_acl_ip6; + am->l2_output_classify_next_acl_ip4 = am->fa_l2_output_classify_next_acl_ip4; + am->l2_output_classify_next_acl_ip6 = am->fa_l2_output_classify_next_acl_ip6; + +} + +void +acl_set_timeout_sec(int timeout_type, u32 value) +{ + acl_main_t *am = &acl_main; + l2sess_main_t *sm = &l2sess_main; + clib_time_t *ct = &am->vlib_main->clib_time; + + if (timeout_type < ACL_N_TIMEOUTS) { + am->session_timeout_sec[timeout_type] = value; + } else { + clib_warning("Unknown timeout type %d", timeout_type); + return; + } + + switch(timeout_type) { + case ACL_TIMEOUT_UDP_IDLE: + sm->udp_session_idle_timeout = (u64)(((f64)value)/ct->seconds_per_clock); + break; + case ACL_TIMEOUT_TCP_IDLE: + sm->tcp_session_idle_timeout = (u64)(((f64)value)/ct->seconds_per_clock); + break; + case ACL_TIMEOUT_TCP_TRANSIENT: + sm->tcp_session_transient_timeout = (u64)(((f64)value)/ct->seconds_per_clock); + break; + default: + clib_warning("Unknown timeout type %d", timeout_type); + } +} + +void +acl_set_session_max_entries(u32 value) +{ + acl_main_t *am = &acl_main; + am->fa_conn_table_max_entries = value; +} + +int +acl_set_skip_ipv6_eh(u32 eh, u32 value) +{ + acl_main_t *am = &acl_main; + if ((eh < 256) && (value < 2)) + { + am->fa_ipv6_known_eh_bitmap = clib_bitmap_set(am->fa_ipv6_known_eh_bitmap, eh, value); + return 1; + } + else + return 0; +} + + +static clib_error_t * +acl_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) +{ + acl_main_t *am = &acl_main; + if (0 == is_add) { + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, sw_if_index); + } + return 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (acl_sw_interface_add_del); + +static clib_error_t * +acl_set_aclplugin_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + u32 timeout = 0; + u32 val = 0; + u32 eh_val = 0; + uword memory_size = 0; + acl_main_t *am = &acl_main; + + /* The new datapath is the default. This command exists out of precaution and for comparing the two */ + if (unformat (input, "l2-datapath")) { + if (unformat(input, "old")) { + am->l2_input_classify_next_acl_ip4 = am->l2_input_classify_next_acl_old; + am->l2_input_classify_next_acl_ip6 = am->l2_input_classify_next_acl_old; + am->l2_output_classify_next_acl_ip4 = am->l2_output_classify_next_acl_old; + am->l2_output_classify_next_acl_ip6 = am->l2_output_classify_next_acl_old; + goto done; + } + if (unformat(input, "new")) { + am->l2_input_classify_next_acl_ip4 = am->fa_l2_input_classify_next_acl_ip4; + am->l2_input_classify_next_acl_ip6 = am->fa_l2_input_classify_next_acl_ip6; + am->l2_output_classify_next_acl_ip4 = am->fa_l2_output_classify_next_acl_ip4; + am->l2_output_classify_next_acl_ip6 = am->fa_l2_output_classify_next_acl_ip6; + goto done; + } + goto done; + } + if (unformat (input, "skip-ipv6-extension-header %u %u", &eh_val, &val)) { + if(!acl_set_skip_ipv6_eh(eh_val, val)) { + error = clib_error_return(0, "expecting eh=0..255, value=0..1"); + } + goto done; + } + if (unformat (input, "session")) { + if (unformat (input, "clear")) { + acl_main_t *am = &acl_main; + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, ~0); + goto done; + } + if (unformat (input, "table")) { + /* The commands here are for tuning/testing. No user-serviceable parts inside */ + if (unformat (input, "max-entries")) { + if (!unformat(input, "%u", &val)) { + error = clib_error_return(0, + "expecting maximum number of entries, got `%U`", + format_unformat_error, input); + goto done; + } else { + acl_set_session_max_entries(val); + goto done; + } + } + if (unformat (input, "hash-table-buckets")) { + if (!unformat(input, "%u", &val)) { + error = clib_error_return(0, + "expecting maximum number of hash table buckets, got `%U`", + format_unformat_error, input); + goto done; + } else { + am->fa_conn_table_hash_num_buckets = val; + goto done; + } + } + if (unformat (input, "hash-table-memory")) { + if (!unformat(input, "%U", unformat_memory_size, &memory_size)) { + error = clib_error_return(0, + "expecting maximum amount of hash table memory, got `%U`", + format_unformat_error, input); + goto done; + } else { + am->fa_conn_table_hash_memory_size = memory_size; + goto done; + } + } + goto done; + } + if (unformat (input, "timeout")) { + if (unformat(input, "udp")) { + if(unformat(input, "idle")) { + if (!unformat(input, "%u", &timeout)) { + error = clib_error_return(0, + "expecting timeout value in seconds, got `%U`", + format_unformat_error, input); + goto done; + } else { + acl_set_timeout_sec(ACL_TIMEOUT_UDP_IDLE, timeout); + goto done; + } + } + } + if (unformat(input, "tcp")) { + if(unformat(input, "idle")) { + if (!unformat(input, "%u", &timeout)) { + error = clib_error_return(0, + "expecting timeout value in seconds, got `%U`", + format_unformat_error, input); + goto done; + } else { + acl_set_timeout_sec(ACL_TIMEOUT_TCP_IDLE, timeout); + goto done; + } + } + if(unformat(input, "transient")) { + if (!unformat(input, "%u", &timeout)) { + error = clib_error_return(0, + "expecting timeout value in seconds, got `%U`", + format_unformat_error, input); + goto done; + } else { + acl_set_timeout_sec(ACL_TIMEOUT_TCP_TRANSIENT, timeout); + goto done; + } + } + } + goto done; + } + } +done: + return error; +} + +static clib_error_t * +acl_show_aclplugin_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + acl_main_t *am = &acl_main; + vnet_interface_main_t *im = &am->vnet_main->interface_main; + + vnet_sw_interface_t *swif; + + if (unformat (input, "sessions")) + { + u8 * out0 = 0; + pool_foreach (swif, im->sw_interfaces, + ({ + u32 sw_if_index = swif->sw_if_index; + u64 n_adds = sw_if_index < vec_len(am->fa_session_adds_by_sw_if_index) ? am->fa_session_adds_by_sw_if_index[sw_if_index] : 0; + u64 n_dels = sw_if_index < vec_len(am->fa_session_dels_by_sw_if_index) ? am->fa_session_dels_by_sw_if_index[sw_if_index] : 0; + out0 = format(out0, "sw_if_index %d: add %lu - del %lu = %lu\n", sw_if_index, n_adds, n_dels, n_adds - n_dels); + })); + vlib_cli_output(vm, "\n\n%s\n\n", out0); + vlib_cli_output(vm, "Sessions per interval: min %lu max %lu increment: %f ms current: %f ms", + am->fa_min_deleted_sessions_per_interval, am->fa_max_deleted_sessions_per_interval, + am->fa_cleaner_wait_time_increment * 1000.0, ((f64)am->fa_current_cleaner_timer_wait_interval) * 1000.0/(f64)vm->clib_time.clocks_per_second); + vec_free(out0); + } + return error; +} + + + /* *INDENT-OFF* */ +VLIB_CLI_COMMAND (aclplugin_set_command, static) = { + .path = "set acl-plugin", + .short_help = "set acl-plugin session timeout {{udp idle}|tcp {idle|transient}} ", + .function = acl_set_aclplugin_fn, +}; + +VLIB_CLI_COMMAND (aclplugin_show_command, static) = { + .path = "show acl-plugin", + .short_help = "show acl-plugin sessions", + .function = acl_show_aclplugin_fn, +}; +/* *INDENT-ON* */ + static clib_error_t * @@ -1874,6 +2168,31 @@ acl_init (vlib_main_t * vm) vec_free (name); + acl_setup_fa_nodes(); + am->session_timeout_sec[ACL_TIMEOUT_TCP_TRANSIENT] = TCP_SESSION_TRANSIENT_TIMEOUT_SEC; + am->session_timeout_sec[ACL_TIMEOUT_TCP_IDLE] = TCP_SESSION_IDLE_TIMEOUT_SEC; + am->session_timeout_sec[ACL_TIMEOUT_UDP_IDLE] = UDP_SESSION_IDLE_TIMEOUT_SEC; + + am->fa_conn_table_hash_num_buckets = ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS; + am->fa_conn_table_hash_memory_size = ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE; + am->fa_conn_table_max_entries = ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES; + + { + u8 tt; + for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { + am->fa_conn_list_head[tt] = ~0; + am->fa_conn_list_tail[tt] = ~0; + } + } + + am->fa_min_deleted_sessions_per_interval = ACL_FA_DEFAULT_MIN_DELETED_SESSIONS_PER_INTERVAL; + am->fa_max_deleted_sessions_per_interval = ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL; + am->fa_cleaner_wait_time_increment = ACL_FA_DEFAULT_CLEANER_WAIT_TIME_INCREMENT; + +#define _(N, v, s) am->fa_ipv6_known_eh_bitmap = clib_bitmap_set(am->fa_ipv6_known_eh_bitmap, v, 1); + foreach_acl_eh +#undef _ + return error; } diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index 0252ff38..47523636 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -22,10 +22,13 @@ #include #include +#include #include +#include "bihash_40_8.h" +#include "fa_node.h" #define ACL_PLUGIN_VERSION_MAJOR 1 -#define ACL_PLUGIN_VERSION_MINOR 1 +#define ACL_PLUGIN_VERSION_MINOR 2 extern vlib_node_registration_t acl_in_node; extern vlib_node_registration_t acl_out_node; @@ -33,6 +36,14 @@ extern vlib_node_registration_t acl_out_node; void input_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap); void output_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap); +enum acl_timeout_e { + ACL_TIMEOUT_UDP_IDLE = 0, + ACL_TIMEOUT_TCP_IDLE, + ACL_TIMEOUT_TCP_TRANSIENT, + ACL_N_TIMEOUTS +}; + + enum address_e { IP4, IP6 }; typedef struct { @@ -118,8 +129,8 @@ typedef struct { u32 *macip_acl_by_sw_if_index; /* next indices for our nodes in the l2-classify tables */ - u32 l2_input_classify_next_acl; - u32 l2_output_classify_next_acl; + u32 l2_input_classify_next_acl_old; + u32 l2_output_classify_next_acl_old; /* next node indices for feature bitmap */ u32 acl_in_node_feat_next_node_index[32]; @@ -133,12 +144,117 @@ typedef struct { u32 acl_out_ip6_match_next[256]; u32 n_match_actions; + /* bitmaps when set the processing is enabled on the interface */ + uword *fa_in_acl_on_sw_if_index; + uword *fa_out_acl_on_sw_if_index; + /* bitmap, when set the hash is initialized */ + uword *fa_sessions_on_sw_if_index; + clib_bihash_40_8_t *fa_sessions_by_sw_if_index; + /* pool for FA session data. See fa_node.h */ + fa_session_t *fa_sessions_pool; + /* The process node which is responsible to deleting the sessions */ + u32 fa_cleaner_node_index; + /* FA session timeouts, in seconds */ + u32 session_timeout_sec[ACL_N_TIMEOUTS]; + /* session add/delete counters */ + u64 *fa_session_adds_by_sw_if_index; + u64 *fa_session_dels_by_sw_if_index; + + /* L2 datapath glue */ + + /* active next indices within L2 classifiers - switch old/new path */ + u32 l2_input_classify_next_acl_ip4; + u32 l2_input_classify_next_acl_ip6; + u32 l2_output_classify_next_acl_ip4; + u32 l2_output_classify_next_acl_ip6; + /* saved next indices within L2 classifiers for ip4/ip6 fa L2 nodes */ + u32 fa_l2_input_classify_next_acl_ip4; + u32 fa_l2_input_classify_next_acl_ip6; + u32 fa_l2_output_classify_next_acl_ip4; + u32 fa_l2_output_classify_next_acl_ip6; + /* next node indices for L2 dispatch */ + u32 fa_acl_in_ip4_l2_node_feat_next_node_index[32]; + u32 fa_acl_in_ip6_l2_node_feat_next_node_index[32]; + u32 fa_acl_out_ip4_l2_node_feat_next_node_index[32]; + u32 fa_acl_out_ip6_l2_node_feat_next_node_index[32]; + + /* EH values that we can skip over */ + uword *fa_ipv6_known_eh_bitmap; + + /* conn table per-interface conn table parameters */ + u32 fa_conn_table_hash_num_buckets; + uword fa_conn_table_hash_memory_size; + u64 fa_conn_table_max_entries; + + /* + * If the cleaner has to delete more than this number + * of connections, it halves the sleep time. + */ + +#define ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL 100 + u64 fa_max_deleted_sessions_per_interval; + + /* + * If the cleaner deletes less than these connections, + * it increases the wait time by the "increment" + */ + +#define ACL_FA_DEFAULT_MIN_DELETED_SESSIONS_PER_INTERVAL 1 + u64 fa_min_deleted_sessions_per_interval; + +#define ACL_FA_DEFAULT_CLEANER_WAIT_TIME_INCREMENT 0.1 + f64 fa_cleaner_wait_time_increment; + + u64 fa_current_cleaner_timer_wait_interval; + u32 fa_conn_list_head[ACL_N_TIMEOUTS]; + u32 fa_conn_list_tail[ACL_N_TIMEOUTS]; + /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; } acl_main_t; +#define foreach_acl_eh \ + _(HOPBYHOP , 0 , "IPv6ExtHdrHopByHop") \ + _(ROUTING , 43 , "IPv6ExtHdrRouting") \ + _(DESTOPT , 60 , "IPv6ExtHdrDestOpt") \ + _(MOBILITY , 135, "Mobility Header") \ + _(HIP , 139, "Experimental use Host Identity Protocol") \ + _(SHIM6 , 140, "Shim6 Protocol") \ + _(EXP1 , 253, "Use for experimentation and testing") \ + _(EXP2 , 254, "Use for experimentation and testing") + +/* + + "No Next Header" is not a header. + Also, Fragment header needs special processing. + + _(NONEXT , 59 , "NoNextHdr") \ + _(FRAGMENT , 44 , "IPv6ExtHdrFragment") \ + + +ESP is hiding its internal format, so no point in trying to go past it. + + _(ESP , 50 , "EncapsulatingSecurityPayload") \ + + +AH has a special treatment of its length, it is in 32-bit words, not 64-bit words like the rest. + + _(AUTH , 51 , "Authentication Header") \ + + +*/ + + + typedef enum { + #define _(N, v, s) ACL_EH_##N = v, + foreach_acl_eh + #undef _ + } acl_eh_t; + + + extern acl_main_t acl_main; diff --git a/src/plugins/acl/bihash_40_8.h b/src/plugins/acl/bihash_40_8.h new file mode 100644 index 00000000..ba3dfbea --- /dev/null +++ b/src/plugins/acl/bihash_40_8.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#undef BIHASH_TYPE + +#define BIHASH_TYPE _40_8 +#define BIHASH_KVP_PER_PAGE 4 + +#ifndef __included_bihash_40_8_h__ +#define __included_bihash_40_8_h__ + +#include +#include +#include +#include + +typedef struct +{ + u64 key[5]; + u64 value; +} clib_bihash_kv_40_8_t; + +static inline int +clib_bihash_is_free_40_8 (const clib_bihash_kv_40_8_t * v) +{ + /* Free values are memset to 0xff, check a bit... */ + if (v->key[0] == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +static inline u64 +clib_bihash_hash_40_8 (const clib_bihash_kv_40_8_t * v) +{ +#if __SSE4_2__ + u32 value = 0; + value = _mm_crc32_u64 (value, v->key[0]); + value = _mm_crc32_u64 (value, v->key[1]); + value = _mm_crc32_u64 (value, v->key[2]); + value = _mm_crc32_u64 (value, v->key[3]); + value = _mm_crc32_u64 (value, v->key[4]); + return value; +#else + u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4]; + return clib_xxhash (tmp); +#endif +} + +static inline u8 * +format_bihash_kvp_40_8 (u8 * s, va_list * args) +{ + clib_bihash_kv_40_8_t *v = va_arg (*args, clib_bihash_kv_40_8_t *); + + s = format (s, "key %llu %llu %llu %llu %llu value %llu", + v->key[0], v->key[1], v->key[2], v->key[3], v->key[4], + v->value); + return s; +} + +static inline int +clib_bihash_key_compare_40_8 (const u64 * a, const u64 * b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | + (a[4] ^ b[4])) == 0; +} + +#undef __included_bihash_template_h__ +#include + +#endif /* __included_bihash_40_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c new file mode 100644 index 00000000..ac619a72 --- /dev/null +++ b/src/plugins/acl/fa_node.c @@ -0,0 +1,1444 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include +#include +#include +#include +#include +#include "bihash_40_8.h" + +#include +#include + +#include "fa_node.h" + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u32 match_acl_in_index; + u32 match_rule_index; + u64 packet_info[6]; + u32 trace_bitmap; + u8 action; +} acl_fa_trace_t; + +/* packet trace format function */ +static u8 * +format_acl_fa_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + acl_fa_trace_t *t = va_arg (*args, acl_fa_trace_t *); + + s = + format (s, + "acl-plugin: sw_if_index %d, next index %d, action: %d, match: acl %d rule %d trace_bits %08x\n" + " pkt info %016llx %016llx %016llx %016llx %016llx %016llx", + t->sw_if_index, t->next_index, t->action, t->match_acl_in_index, + t->match_rule_index, t->trace_bitmap, + t->packet_info[0], t->packet_info[1], t->packet_info[2], + t->packet_info[3], t->packet_info[4], t->packet_info[5]); + return s; +} + +/* *INDENT-OFF* */ +#define foreach_acl_fa_error \ +_(ACL_DROP, "ACL deny packets") \ +_(ACL_PERMIT, "ACL permit packets") \ +_(ACL_NEW_SESSION, "new sessions added") \ +_(ACL_EXIST_SESSION, "existing session packets") \ +_(ACL_CHECK, "checked packets") \ +_(ACL_RESTART_SESSION_TIMER, "restart session timer") \ +_(ACL_TOO_MANY_SESSIONS, "too many sessions to add new") \ +/* end of errors */ + +typedef enum +{ +#define _(sym,str) ACL_FA_ERROR_##sym, + foreach_acl_fa_error +#undef _ + ACL_FA_N_ERROR, +} acl_fa_error_t; + +static char *acl_fa_error_strings[] = { +#define _(sym,string) string, + foreach_acl_fa_error +#undef _ +}; +/* *INDENT-ON* */ + +static void * +get_ptr_to_offset (vlib_buffer_t * b0, int offset) +{ + u8 *p = vlib_buffer_get_current (b0) + offset; + return p; +} + + +static int +fa_acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2, + int prefixlen, int is_ip6) +{ + if (prefixlen == 0) + { + /* match any always succeeds */ + return 1; + } + if (is_ip6) + { + if (memcmp (addr1, addr2, prefixlen / 8)) + { + /* If the starting full bytes do not match, no point in bittwidling the thumbs further */ + return 0; + } + if (prefixlen % 8) + { + u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8); + u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8); + u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1)); + return (b1 & mask0) == b2; + } + else + { + /* The prefix fits into integer number of bytes, so nothing left to do */ + return 1; + } + } + else + { + uint32_t a1 = ntohl (addr1->ip4.as_u32); + uint32_t a2 = ntohl (addr2->ip4.as_u32); + uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1); + return (a1 & mask0) == a2; + } +} + +static int +fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6) +{ + return ((port >= port_first) && (port <= port_last)); +} + +int +acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple, + int is_ip6, u8 * r_action, u32 * r_acl_match_p, + u32 * r_rule_match_p, u32 * trace_bitmap) +{ + int i; + acl_list_t *a; + acl_rule_t *r; + + if (pool_is_free_index (am->acls, acl_index)) + { + if (r_acl_match_p) + *r_acl_match_p = acl_index; + if (r_rule_match_p) + *r_rule_match_p = -1; + /* the ACL does not exist but is used for policy. Block traffic. */ + return 0; + } + a = am->acls + acl_index; + for (i = 0; i < a->count; i++) + { + r = a->rules + i; + if (is_ip6 != r->is_ipv6) + { + continue; + } + if (!fa_acl_match_addr + (&pkt_5tuple->addr[1], &r->dst, r->dst_prefixlen, is_ip6)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt dst addr %U match rule addr %U/%d", + acl_index, i, format_ip46_address, &pkt_5tuple->addr[1], + IP46_TYPE_ANY, format_ip46_address, &r->dst, IP46_TYPE_ANY, + r->dst_prefixlen); +#endif + + if (!fa_acl_match_addr + (&pkt_5tuple->addr[0], &r->src, r->src_prefixlen, is_ip6)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt src addr %U match rule addr %U/%d", + acl_index, i, format_ip46_address, &pkt_5tuple->addr[0], + IP46_TYPE_ANY, format_ip46_address, &r->src, IP46_TYPE_ANY, + r->src_prefixlen); + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d trying to match pkt proto %d with rule %d", + acl_index, i, pkt_5tuple->l4.proto, r->proto); +#endif + if (r->proto) + { + if (pkt_5tuple->l4.proto != r->proto) + continue; + /* A sanity check just to ensure what we jave just matched was a valid L4 extracted from the packet */ + if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt proto %d match rule %d", + acl_index, i, pkt_5tuple->l4.proto, r->proto); +#endif + + if (!fa_acl_match_port + (pkt_5tuple->l4.port[0], r->src_port_or_type_first, + r->src_port_or_type_last, is_ip6)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt sport %d match rule [%d..%d]", + acl_index, i, pkt_5tuple->l4.port[0], r->src_port_or_type_first, + r->src_port_or_type_last); +#endif + + if (!fa_acl_match_port + (pkt_5tuple->l4.port[1], r->dst_port_or_code_first, + r->dst_port_or_code_last, is_ip6)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt dport %d match rule [%d..%d]", + acl_index, i, pkt_5tuple->l4.port[1], r->dst_port_or_code_first, + r->dst_port_or_code_last); +#endif + if (pkt_5tuple->pkt.tcp_flags_valid + && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) != + r->tcp_flags_value)) + continue; + } + /* everything matches! */ +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG acl %d rule %d FULL-MATCH, action %d", + acl_index, i, r->is_permit); +#endif + *r_action = r->is_permit; + if (r_acl_match_p) + *r_acl_match_p = acl_index; + if (r_rule_match_p) + *r_rule_match_p = i; + return 1; + } + return 0; +} + +static u8 +full_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap) +{ + acl_main_t *am = &acl_main; + int i; + u32 *acl_vector; + u8 action = 0; + + if (is_input) + { + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + acl_vector = am->input_acl_vec_by_sw_if_index[sw_if_index]; + } + else + { + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + acl_vector = am->output_acl_vec_by_sw_if_index[sw_if_index]; + } + for (i = 0; i < vec_len (acl_vector); i++) + { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d", + acl_vector[i]); +#endif + if (acl_match_5tuple + (am, acl_vector[i], pkt_5tuple, is_ip6, &action, + acl_match_p, rule_match_p, trace_bitmap)) + { + return action; + } + } + if (vec_len (acl_vector) > 0) + { + /* If there are ACLs and none matched, deny by default */ + return 0; + } +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG: No ACL on sw_if_index %d", sw_if_index); +#endif + /* Deny by default. If there are no ACLs defined we should not be here. */ + return 0; +} + +static int +offset_within_packet (vlib_buffer_t * b0, int offset) +{ + /* For the purposes of this code, "within" means we have at least 8 bytes after it */ + return (offset < (b0->current_length - 8)); +} + +static void +acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, + int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt) +{ + int l3_offset = 14; + int l4_offset; + u16 ports[2]; + u16 proto; + /* IP4 and IP6 protocol numbers of ICMP */ + static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 }; + + if (is_input && !(is_l2_path)) + { + l3_offset = 0; + } + + + if (is_ip6) + { + clib_memcpy (&p5tuple_pkt->addr, + get_ptr_to_offset (b0, + offsetof (ip6_header_t, + src_address) + l3_offset), + sizeof (p5tuple_pkt->addr)); + proto = + *(u8 *) get_ptr_to_offset (b0, + offsetof (ip6_header_t, + protocol) + l3_offset); + l4_offset = l3_offset + sizeof (ip6_header_t); +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG: proto: %d, l4_offset: %d", proto, + l4_offset); +#endif + /* IP6 EH handling is here, increment l4_offset if needs to, update the proto */ + int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto); + if (PREDICT_FALSE (need_skip_eh)) + { + /* FIXME: add fragment header special handling. Currently causes treated as unknown header. */ + while (need_skip_eh && offset_within_packet (b0, l4_offset)) + { + u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset); + proto = *(u8 *) get_ptr_to_offset (b0, l4_offset); + l4_offset += 8 * (1 + (u16) nwords); +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d", + proto, l4_offset); +#endif + need_skip_eh = + clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto); + } + } + } + else + { + p5tuple_pkt->kv.key[0] = 0; + p5tuple_pkt->kv.key[1] = 0; + p5tuple_pkt->kv.key[2] = 0; + p5tuple_pkt->kv.key[3] = 0; + clib_memcpy (&p5tuple_pkt->addr[0].ip4, + get_ptr_to_offset (b0, + offsetof (ip4_header_t, + src_address) + l3_offset), + sizeof (p5tuple_pkt->addr[0].ip4)); + clib_memcpy (&p5tuple_pkt->addr[1].ip4, + get_ptr_to_offset (b0, + offsetof (ip4_header_t, + dst_address) + l3_offset), + sizeof (p5tuple_pkt->addr[1].ip4)); + proto = + *(u8 *) get_ptr_to_offset (b0, + offsetof (ip4_header_t, + protocol) + l3_offset); + l4_offset = l3_offset + sizeof (ip4_header_t); + } + /* Remainder of the key and per-packet non-key data */ + p5tuple_pkt->kv.key[4] = 0; + p5tuple_pkt->kv.value = 0; + if (PREDICT_TRUE (offset_within_packet (b0, l4_offset))) + { + p5tuple_pkt->l4.proto = proto; + p5tuple_pkt->pkt.l4_valid = 1; + if (icmp_protos[is_ip6] == proto) + { + /* type */ + p5tuple_pkt->l4.port[0] = + *(u8 *) get_ptr_to_offset (b0, + l4_offset + offsetof (icmp46_header_t, + type)); + /* code */ + p5tuple_pkt->l4.port[1] = + *(u8 *) get_ptr_to_offset (b0, + l4_offset + offsetof (icmp46_header_t, + code)); + } + else if ((IPPROTO_TCP == proto) || (IPPROTO_UDP == proto)) + { + clib_memcpy (&ports, + get_ptr_to_offset (b0, + l4_offset + offsetof (tcp_header_t, + src_port)), + sizeof (ports)); + p5tuple_pkt->l4.port[0] = ntohs (ports[0]); + p5tuple_pkt->l4.port[1] = ntohs (ports[1]); + + p5tuple_pkt->pkt.tcp_flags = + *(u8 *) get_ptr_to_offset (b0, + l4_offset + offsetof (tcp_header_t, + flags)); + p5tuple_pkt->pkt.tcp_flags_valid = (proto == IPPROTO_TCP); + } + /* + * FIXME: rather than the above conditional, here could + * be a nice generic mechanism to extract two L4 values: + * + * have a per-protocol array of 4 elements like this: + * u8 offset; to take the byte from, off L4 header + * u8 mask; to mask it with, before storing + * + * this way we can describe UDP, TCP and ICMP[46] semantics, + * and add a sort of FPM-type behavior for other protocols. + * + * Of course, is it faster ? and is it needed ? + * + */ + } +} + + +/* Session keys match the packets received, and mirror the packets sent */ +static void +acl_make_5tuple_session_key (int is_input, fa_5tuple_t * p5tuple_pkt, + fa_5tuple_t * p5tuple_sess) +{ + int src_index = is_input ? 0 : 1; + int dst_index = is_input ? 1 : 0; + p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0]; + p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1]; + p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64; + p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0]; + p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1]; +} + + +static int +acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0) +{ + int has_sessions = + clib_bitmap_get (am->fa_sessions_on_sw_if_index, sw_if_index0); + return has_sessions; +} + +static int +acl_fa_ifc_has_in_acl (acl_main_t * am, int sw_if_index0) +{ + int it_has = clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index0); + return it_has; +} + +static int +acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0) +{ + int it_has = clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index0); + return it_has; +} + + +static int +fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess) +{ + /* seen both SYNs and ACKs but not FINs means we are in establshed state */ + u16 masked_flags = + sess->tcp_flags_seen.as_u16 & ((TCP_FLAGS_RSTFINACKSYN << 8) + + TCP_FLAGS_RSTFINACKSYN); + switch (sess->info.l4.proto) + { + case IPPROTO_TCP: + if (((TCP_FLAGS_ACKSYN << 8) + TCP_FLAGS_ACKSYN) == masked_flags) + { + return ACL_TIMEOUT_TCP_IDLE; + } + else + { + return ACL_TIMEOUT_TCP_TRANSIENT; + } + break; + case IPPROTO_UDP: + return ACL_TIMEOUT_UDP_IDLE; + break; + default: + return ACL_TIMEOUT_UDP_IDLE; + } +} + + +static u64 +fa_session_get_timeout (acl_main_t * am, fa_session_t * sess) +{ + u64 timeout = am->vlib_main->clib_time.clocks_per_second; + int timeout_type = fa_session_get_timeout_type (am, sess); + timeout *= am->session_timeout_sec[timeout_type]; + return timeout; +} + +static void +acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0) +{ +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("Initializing bihash for sw_if_index %d num buckets %lu memory size %llu", + sw_if_index0, am->fa_conn_table_hash_num_buckets, + am->fa_conn_table_hash_memory_size); +#endif + vec_validate (am->fa_sessions_by_sw_if_index, sw_if_index0); + BV (clib_bihash_init) (&am->fa_sessions_by_sw_if_index + [sw_if_index0], "ACL plugin FA session bihash", + am->fa_conn_table_hash_num_buckets, + am->fa_conn_table_hash_memory_size); + am->fa_sessions_on_sw_if_index = + clib_bitmap_set (am->fa_sessions_on_sw_if_index, sw_if_index0, 1); +} + +static void +acl_fa_conn_list_add_session (acl_main_t * am, u32 sess_id) +{ + fa_session_t *sess = am->fa_sessions_pool + sess_id; + u8 list_id = fa_session_get_timeout_type(am, sess); + sess->link_list_id = list_id; + sess->link_next_idx = ~0; + sess->link_prev_idx = am->fa_conn_list_tail[list_id]; + if (~0 != am->fa_conn_list_tail[list_id]) { + fa_session_t *prev_sess = am->fa_sessions_pool + am->fa_conn_list_tail[list_id]; + prev_sess->link_next_idx = sess_id; + } + am->fa_conn_list_tail[list_id] = sess_id; + + if (~0 == am->fa_conn_list_head[list_id]) { + am->fa_conn_list_head[list_id] = sess_id; + } +} + +static void +acl_fa_conn_list_delete_session (acl_main_t *am, u32 sess_id) +{ + fa_session_t *sess = am->fa_sessions_pool + sess_id; + if (~0 != sess->link_prev_idx) { + fa_session_t *prev_sess = am->fa_sessions_pool + sess->link_prev_idx; + prev_sess->link_next_idx = sess->link_next_idx; + if (prev_sess->link_list_id != sess->link_list_id) + clib_warning("(prev_sess->link_list_id != sess->link_list_id)"); + } + if (~0 != sess->link_next_idx) { + fa_session_t *next_sess = am->fa_sessions_pool + sess->link_next_idx; + next_sess->link_prev_idx = sess->link_prev_idx; + if (next_sess->link_list_id != sess->link_list_id) + clib_warning("(next_sess->link_list_id != sess->link_list_id)"); + } + if (am->fa_conn_list_head[sess->link_list_id] == sess_id) { + am->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx; + } + if (am->fa_conn_list_tail[sess->link_list_id] == sess_id) { + am->fa_conn_list_tail[sess->link_list_id] = sess->link_next_idx; + } +} + + +int +acl_fa_session_is_dead (acl_main_t * am, u32 sw_if_index, u64 now, + u32 sess_id) +{ + return 0; +} + +static void +acl_fa_restart_timer_for_session (acl_main_t * am, u64 now, u32 sess_id) +{ + // fa_session_t *sess = am->fa_sessions_pool + sess_id; + acl_fa_conn_list_delete_session(am, sess_id); + acl_fa_conn_list_add_session(am, sess_id); +} + + +static u8 +acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, + fa_session_t * sess, fa_5tuple_t * pkt_5tuple) +{ + sess->last_active_time = now; + if (pkt_5tuple->pkt.tcp_flags_valid) + { + sess->tcp_flags_seen.as_u8[is_input] |= pkt_5tuple->pkt.tcp_flags; + } + return 3; +} + + +static void +acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, u32 sess_id) +{ + fa_session_t *sess = (fa_session_t *) am->fa_sessions_pool + sess_id; + BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index], + &sess->info.kv, 0); + pool_put_index (am->fa_sessions_pool, sess_id); + /* Deleting from timer wheel not needed, as the cleaner deals with the timers. */ + vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index); + am->fa_session_dels_by_sw_if_index[sw_if_index]++; +} + +static int +acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index) +{ + u64 curr_sess; + vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index); + vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index); + curr_sess = + am->fa_session_adds_by_sw_if_index[sw_if_index] - + am->fa_session_dels_by_sw_if_index[sw_if_index]; + return (curr_sess < am->fa_conn_table_max_entries); +} + +always_inline void +acl_fa_try_recycle_session (acl_main_t * am, int is_input, u32 sw_if_index) +{ + /* try to recycle a TCP transient session */ + u8 timeout_type = ACL_TIMEOUT_TCP_TRANSIENT; + u32 sess_id = am->fa_conn_list_head[timeout_type]; + if (~0 != sess_id) { + acl_fa_conn_list_delete_session(am, sess_id); + acl_fa_delete_session(am, sw_if_index, sess_id); + } +} + +static void +acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, + fa_5tuple_t * p5tuple) +{ + clib_bihash_kv_40_8_t *pkv = &p5tuple->kv; + clib_bihash_kv_40_8_t kv; + u32 sess_id; + fa_session_t *sess; + + pool_get (am->fa_sessions_pool, sess); + sess_id = sess - am->fa_sessions_pool; + + + kv.key[0] = pkv->key[0]; + kv.key[1] = pkv->key[1]; + kv.key[2] = pkv->key[2]; + kv.key[3] = pkv->key[3]; + kv.key[4] = pkv->key[4]; + kv.value = sess_id; + + memcpy (sess, pkv, sizeof (pkv->key)); + sess->last_active_time = now; + sess->sw_if_index = sw_if_index; + sess->tcp_flags_seen.as_u16 = 0; + sess->reserved1 = 0; + sess->link_list_id = ~0; + sess->link_prev_idx = ~0; + sess->link_next_idx = ~0; + + + + if (!acl_fa_ifc_has_sessions (am, sw_if_index)) + { + acl_fa_ifc_init_sessions (am, sw_if_index); + } + + BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index], + &kv, 1); + acl_fa_conn_list_add_session(am, sess_id); + + vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index); + am->fa_session_adds_by_sw_if_index[sw_if_index]++; +} + +static int +acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple, + clib_bihash_kv_40_8_t * pvalue_sess) +{ + return (BV (clib_bihash_search) + (&am->fa_sessions_by_sw_if_index[sw_if_index0], &p5tuple->kv, + pvalue_sess) == 0); +} + + +always_inline uword +acl_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip6, + int is_input, int is_l2_path, u32 * l2_feat_next_node_index, + vlib_node_registration_t * acl_fa_node) +{ + u32 n_left_from, *from, *to_next; + acl_fa_next_t next_index; + u32 pkts_acl_checked = 0; + u32 pkts_new_session = 0; + u32 pkts_exist_session = 0; + u32 pkts_acl_permit = 0; + u32 pkts_restart_session_timer = 0; + u32 trace_bitmap = 0; + u32 feature_bitmap0; + acl_main_t *am = &acl_main; + fa_5tuple_t fa_5tuple, kv_sess; + clib_bihash_kv_40_8_t value_sess; + vlib_node_runtime_t *error_node; + u64 now = clib_cpu_time_now (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + error_node = vlib_node_get_runtime (vm, acl_fa_node->index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = 0; + u8 action = 0; + u32 sw_if_index0; + int acl_check_needed = 1; + u32 match_acl_in_index = ~0; + u32 match_rule_index = ~0; + u8 error0 = 0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (is_input) + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + else + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + if (is_l2_path) + feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap; + + /* + * Extract the L3/L4 matching info into a 5-tuple structure, + * then create a session key whose layout is independent on forward or reverse + * direction of the packet. + */ + + acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple); + acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess); +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx", + kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2], + kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value); + clib_warning + ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx", + fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2], + fa_5tuple.kv.key[3], fa_5tuple.kv.key[4], fa_5tuple.kv.value); +#endif + + /* Try to match an existing session first */ + + if (acl_fa_ifc_has_sessions (am, sw_if_index0)) + { + if (acl_fa_find_session + (am, sw_if_index0, &kv_sess, &value_sess)) + { + trace_bitmap |= 0x80000000; + error0 = ACL_FA_ERROR_ACL_EXIST_SESSION; + // FIXME assert(value_sess.value == (0xffffffff & value_sess.value)); + u32 sess_id = value_sess.value; + fa_session_t *sess = am->fa_sessions_pool + sess_id; + int old_timeout_type = + fa_session_get_timeout_type (am, sess); + action = + acl_fa_track_session (am, is_input, sw_if_index0, now, + sess, &fa_5tuple); + /* expose the session id to the tracer */ + match_rule_index = sess_id; + int new_timeout_type = + fa_session_get_timeout_type (am, sess); + acl_check_needed = 0; + pkts_exist_session += 1; + /* Tracking might have changed the session timeout type, e.g. from transient to established */ + if (PREDICT_FALSE (old_timeout_type != new_timeout_type)) + { + acl_fa_restart_timer_for_session (am, now, sess_id); + pkts_restart_session_timer++; + trace_bitmap |= + 0x00010000 + ((0xff & old_timeout_type) << 8) + + (0xff & new_timeout_type); + } + } + } + + if (acl_check_needed) + { + action = + full_acl_match_5tuple (sw_if_index0, &fa_5tuple, is_l2_path, + is_ip6, is_input, &match_acl_in_index, + &match_rule_index, &trace_bitmap); + error0 = action; + if (1 == action) + pkts_acl_permit += 1; + if (2 == action) + { + if (!acl_fa_can_add_session (am, is_input, sw_if_index0)) + acl_fa_try_recycle_session (am, is_input, sw_if_index0); + + if (acl_fa_can_add_session (am, is_input, sw_if_index0)) + { + acl_fa_add_session (am, is_input, sw_if_index0, now, + &kv_sess); + pkts_new_session += 1; + } + else + { + action = 0; + error0 = ACL_FA_ERROR_ACL_TOO_MANY_SESSIONS; + } + } + } + + + + if (action > 0) + { + if (is_l2_path) + next0 = + feat_bitmap_get_next_node_index (l2_feat_next_node_index, + feature_bitmap0); + else + vnet_feature_next (sw_if_index0, &next0, b0); + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + acl_fa_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->match_acl_in_index = match_acl_in_index; + t->match_rule_index = match_rule_index; + t->packet_info[0] = fa_5tuple.kv.key[0]; + t->packet_info[1] = fa_5tuple.kv.key[1]; + t->packet_info[2] = fa_5tuple.kv.key[2]; + t->packet_info[3] = fa_5tuple.kv.key[3]; + t->packet_info[4] = fa_5tuple.kv.key[4]; + t->packet_info[5] = fa_5tuple.kv.value; + t->action = action; + t->trace_bitmap = trace_bitmap; + } + + next0 = next0 < node->n_next_nodes ? next0 : 0; + if (0 == next0) + b0->error = error_node->errors[error0]; + + pkts_acl_checked += 1; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, + next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_CHECK, pkts_acl_checked); + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_PERMIT, pkts_acl_permit); + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_NEW_SESSION, + pkts_new_session); + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_EXIST_SESSION, + pkts_exist_session); + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_RESTART_SESSION_TIMER, + pkts_restart_session_timer); + return frame->n_vectors; +} + + +vlib_node_registration_t acl_in_l2_ip6_node; +static uword +acl_in_ip6_l2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + acl_main_t *am = &acl_main; + return acl_fa_node_fn (vm, node, frame, 1, 1, 1, + am->fa_acl_in_ip6_l2_node_feat_next_node_index, + &acl_in_l2_ip6_node); +} + +vlib_node_registration_t acl_in_l2_ip4_node; +static uword +acl_in_ip4_l2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + acl_main_t *am = &acl_main; + return acl_fa_node_fn (vm, node, frame, 0, 1, 1, + am->fa_acl_in_ip4_l2_node_feat_next_node_index, + &acl_in_l2_ip4_node); +} + +vlib_node_registration_t acl_out_l2_ip6_node; +static uword +acl_out_ip6_l2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + acl_main_t *am = &acl_main; + return acl_fa_node_fn (vm, node, frame, 1, 0, 1, + am->fa_acl_out_ip6_l2_node_feat_next_node_index, + &acl_out_l2_ip6_node); +} + +vlib_node_registration_t acl_out_l2_ip4_node; +static uword +acl_out_ip4_l2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + acl_main_t *am = &acl_main; + return acl_fa_node_fn (vm, node, frame, 0, 0, 1, + am->fa_acl_out_ip4_l2_node_feat_next_node_index, + &acl_out_l2_ip4_node); +} + + +/**** L3 processing path nodes ****/ + + +vlib_node_registration_t acl_in_fa_ip6_node; +static uword +acl_in_ip6_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return acl_fa_node_fn (vm, node, frame, 1, 1, 0, 0, &acl_in_fa_ip6_node); +} + +vlib_node_registration_t acl_in_fa_ip4_node; +static uword +acl_in_ip4_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return acl_fa_node_fn (vm, node, frame, 0, 1, 0, 0, &acl_in_fa_ip4_node); +} + +vlib_node_registration_t acl_out_fa_ip6_node; +static uword +acl_out_ip6_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return acl_fa_node_fn (vm, node, frame, 1, 0, 0, 0, &acl_out_fa_ip6_node); +} + +vlib_node_registration_t acl_out_fa_ip4_node; +static uword +acl_out_ip4_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return acl_fa_node_fn (vm, node, frame, 0, 0, 0, 0, &acl_out_fa_ip4_node); +} + +/* + * This process performs all the connection clean up - both for idle connections, + * as well as receiving the signals to clean up the connections in case of sw_if_index deletion, + * or (maybe in the future) the connection deletion due to policy reasons. + * + * The previous iteration (l2sess) attempted to clean up the connections in small increments, + * in-band, but the problem it tried to preemptively address (process starvation) is yet to be seen. + * + * The approach with a single thread deleting the connections is simpler, thus we use it until + * there is a real starvation problem to solve. + * + */ + + +/* *INDENT-OFF* */ +#define foreach_acl_fa_cleaner_error \ +_(EVENT_CYCLE, "event processing cycle") \ +_(TIMER_RESTARTED, "restarted session timers") \ +_(DELETED_SESSIONS, "deleted sessions") \ +_(ALREADY_DELETED, "timer event for already deleted session") \ +_(DELETE_BY_SW_IF_INDEX, "delete by sw_if_index event") \ +_(DELETE_BY_SW_IF_INDEX_OK, "delete by sw_if_index completed ok") \ +_(WAIT_WITHOUT_TIMEOUT, "process waits without timeout") \ +_(WAIT_WITH_TIMEOUT, "process waits with timeout") \ +_(UNKNOWN_EVENT, "unknown event received") \ +/* end of errors */ + +typedef enum +{ +#define _(sym,str) ACL_FA_CLEANER_ERROR_##sym, + foreach_acl_fa_cleaner_error +#undef _ + ACL_FA_CLEANER_N_ERROR, +} acl_fa_cleaner_error_t; + +static char *acl_fa_cleaner_error_strings[] = { +#define _(sym,string) string, + foreach_acl_fa_cleaner_error +#undef _ +}; + +static int +acl_fa_clean_sessions_by_sw_if_index (acl_main_t *am, u32 sw_if_index, u32 *count) +{ + + int undeleted = 0; + fa_session_t *sess; + uword *dv = NULL; + uword *ii; + + pool_foreach(sess, am->fa_sessions_pool, ({ + if ( (~0 == sw_if_index) || (sw_if_index == sess->sw_if_index) ) + vec_add1(dv, sess-am->fa_sessions_pool); + })); + vec_foreach(ii, dv) + { + sess = pool_elt_at_index(am->fa_sessions_pool, *ii); + acl_fa_delete_session(am, sess->sw_if_index, *ii); + (*count)++; + } + + pool_foreach(sess, am->fa_sessions_pool, ({ + if ( (~0 == sw_if_index) || (sw_if_index == sess->sw_if_index) ) + undeleted++; + })); + if (undeleted == 0) + { + if (~0 == sw_if_index) + { + /* FIXME: clean-up tables ? */ + } + else + { + /* FIXME: clean-up tables ? */ + } + } + return (undeleted == 0); +} +/* *INDENT-ON* */ + +static vlib_node_registration_t acl_fa_session_cleaner_process_node; + +static int +acl_fa_conn_has_timed_out (acl_main_t *am, u64 now, u32 session_index) +{ + fa_session_t *sess = am->fa_sessions_pool + session_index; + u64 sess_timeout_time = + sess->last_active_time + fa_session_get_timeout (am, sess); + return (sess_timeout_time < now); +} + + +static uword +acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + acl_main_t *am = &acl_main; + u64 now = clib_cpu_time_now (); + f64 cpu_cps = vm->clib_time.clocks_per_second; + u64 next_expire; + /* We should call timer wheel at least twice a second */ + u64 max_timer_wait_interval = cpu_cps / 2; + am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval; + + u32 *expired = NULL; + uword event_type, *event_data = 0; + + am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index; + + while (1) + { + u32 count_deleted_sessions = 0; + u32 count_already_deleted = 0; + u32 count_timer_restarted = 0; + now = clib_cpu_time_now (); + next_expire = now + am->fa_current_cleaner_timer_wait_interval; + + { + f64 timeout = ((i64) next_expire - (i64) now) / cpu_cps; + if (timeout <= 0) + { + /* skip waiting altogether */ + event_type = ~0; + } + else + { + /* Timing wheel code is happier if it is called regularly */ + if (timeout > 0.5) + timeout = 0.5; + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node. + index, + ACL_FA_CLEANER_ERROR_WAIT_WITH_TIMEOUT, + 1); + (void) vlib_process_wait_for_event_or_clock (vm, timeout); + event_type = vlib_process_get_events (vm, &event_data); + } + } + + now = clib_cpu_time_now (); + switch (event_type) + { + case ~0: + /* nothing to do */ + break; + case ACL_FA_CLEANER_RESCHEDULE: + /* Nothing to do. */ + break; + case ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: + { + uword *sw_if_index0; + vec_foreach (sw_if_index0, event_data) + { + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node. + index, + ACL_FA_CLEANER_ERROR_DELETE_BY_SW_IF_INDEX, + 1); +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d", + *sw_if_index0); +#endif + u32 count = 0; + int result = + acl_fa_clean_sessions_by_sw_if_index (am, *sw_if_index0, + &count); + count_deleted_sessions += count; + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node. + index, + ACL_FA_CLEANER_ERROR_DELETE_BY_SW_IF_INDEX_OK, + result); + } + } + break; + default: +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL plugin connection cleaner: unknown event %u", + event_type); +#endif + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node. + index, + ACL_FA_CLEANER_ERROR_UNKNOWN_EVENT, 1); + break; + } + + { + u8 tt = 0; + for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { + while((vec_len(expired) < 2*am->fa_max_deleted_sessions_per_interval) && (~0 != am->fa_conn_list_head[tt]) && (acl_fa_conn_has_timed_out(am, now, am->fa_conn_list_head[tt]))) { + u32 sess_id = am->fa_conn_list_head[tt]; + vec_add1(expired, sess_id); + acl_fa_conn_list_delete_session(am, sess_id); + } + } + } + + + u32 *psid = NULL; + vec_foreach (psid, expired) + { + u32 session_index = *psid; + if (!pool_is_free_index (am->fa_sessions_pool, session_index)) + { + fa_session_t *sess = am->fa_sessions_pool + session_index; + u32 sw_if_index = sess->sw_if_index; + u64 sess_timeout_time = + sess->last_active_time + fa_session_get_timeout (am, sess); + if (now < sess_timeout_time) + { + /* clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d", + (int) session_index); */ + + /* Pretend we did this in the past, at last_active moment */ + count_timer_restarted++; + } + else + { + /* clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d", + (int) session_index); */ + acl_fa_delete_session (am, sw_if_index, session_index); + count_deleted_sessions++; + } + } + else + { + count_already_deleted++; + } + } + if (expired) + _vec_len (expired) = 0; + if (event_data) + _vec_len (event_data) = 0; + + if (count_deleted_sessions > am->fa_max_deleted_sessions_per_interval) { + /* if there was too many sessions to delete, do less waiting around next time */ + am->fa_current_cleaner_timer_wait_interval /= 2; + } else if (count_deleted_sessions < am->fa_min_deleted_sessions_per_interval) { + /* Too few deleted sessions, slowly increase the amount of sleep up to a limit */ + if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval) + am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment; + } + + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node.index, + ACL_FA_CLEANER_ERROR_EVENT_CYCLE, 1); + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node.index, + ACL_FA_CLEANER_ERROR_TIMER_RESTARTED, + count_timer_restarted); + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node.index, + ACL_FA_CLEANER_ERROR_DELETED_SESSIONS, + count_deleted_sessions); + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node.index, + ACL_FA_CLEANER_ERROR_ALREADY_DELETED, + count_already_deleted); + } + /* NOT REACHED */ + return 0; +} + + +void +acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) +{ + acl_main_t *am = &acl_main; + if (is_input) + { + vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa", + sw_if_index, enable_disable, 0, 0); + vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa", + sw_if_index, enable_disable, 0, 0); + am->fa_in_acl_on_sw_if_index = + clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index, + enable_disable); + } + else + { + vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa", + sw_if_index, enable_disable, 0, 0); + vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa", + sw_if_index, enable_disable, 0, 0); + am->fa_out_acl_on_sw_if_index = + clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index, + enable_disable); + } + if ((!enable_disable) && (!acl_fa_ifc_has_in_acl (am, sw_if_index)) + && (!acl_fa_ifc_has_out_acl (am, sw_if_index))) + { + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, + sw_if_index); + } +} + + + +/* *INDENT-OFF* */ + + +VLIB_REGISTER_NODE (acl_fa_session_cleaner_process_node, static) = { + .function = acl_fa_session_cleaner_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "acl-plugin-fa-cleaner-process", + .n_errors = ARRAY_LEN (acl_fa_cleaner_error_strings), + .error_strings = acl_fa_cleaner_error_strings, + .n_next_nodes = 0, + .next_nodes = {}, +}; + + +VLIB_REGISTER_NODE (acl_in_l2_ip6_node) = +{ + .function = acl_in_ip6_l2_node_fn, + .name = "acl-plugin-in-ip6-l2", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VLIB_REGISTER_NODE (acl_in_l2_ip4_node) = +{ + .function = acl_in_ip4_l2_node_fn, + .name = "acl-plugin-in-ip4-l2", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VLIB_REGISTER_NODE (acl_out_l2_ip6_node) = +{ + .function = acl_out_ip6_l2_node_fn, + .name = "acl-plugin-out-ip6-l2", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VLIB_REGISTER_NODE (acl_out_l2_ip4_node) = +{ + .function = acl_out_ip4_l2_node_fn, + .name = "acl-plugin-out-ip4-l2", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + + +VLIB_REGISTER_NODE (acl_in_fa_ip6_node) = +{ + .function = acl_in_ip6_fa_node_fn, + .name = "acl-plugin-in-ip6-fa", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VNET_FEATURE_INIT (acl_in_ip6_fa_feature, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "acl-plugin-in-ip6-fa", + .runs_before = VNET_FEATURES ("ip6-flow-classify"), +}; + +VLIB_REGISTER_NODE (acl_in_fa_ip4_node) = +{ + .function = acl_in_ip4_fa_node_fn, + .name = "acl-plugin-in-ip4-fa", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VNET_FEATURE_INIT (acl_in_ip4_fa_feature, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "acl-plugin-in-ip4-fa", + .runs_before = VNET_FEATURES ("ip4-flow-classify"), +}; + + +VLIB_REGISTER_NODE (acl_out_fa_ip6_node) = +{ + .function = acl_out_ip6_fa_node_fn, + .name = "acl-plugin-out-ip6-fa", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VNET_FEATURE_INIT (acl_out_ip6_fa_feature, static) = +{ + .arc_name = "ip6-output", + .node_name = "acl-plugin-out-ip6-fa", + .runs_before = VNET_FEATURES ("interface-output"), +}; + +VLIB_REGISTER_NODE (acl_out_fa_ip4_node) = +{ + .function = acl_out_ip4_fa_node_fn, + .name = "acl-plugin-out-ip4-fa", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VNET_FEATURE_INIT (acl_out_ip4_fa_feature, static) = +{ + .arc_name = "ip4-output", + .node_name = "acl-plugin-out-ip4-fa", + .runs_before = VNET_FEATURES ("interface-output"), +}; + + +/* *INDENT-ON* */ diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h new file mode 100644 index 00000000..76a40a38 --- /dev/null +++ b/src/plugins/acl/fa_node.h @@ -0,0 +1,99 @@ +#ifndef _FA_NODE_H_ +#define _FA_NODE_H_ + +#include +#include "bihash_40_8.h" + +#define TCP_FLAG_FIN 0x01 +#define TCP_FLAG_SYN 0x02 +#define TCP_FLAG_RST 0x04 +#define TCP_FLAG_PUSH 0x08 +#define TCP_FLAG_ACK 0x10 +#define TCP_FLAG_URG 0x20 +#define TCP_FLAG_ECE 0x40 +#define TCP_FLAG_CWR 0x80 +#define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK) +#define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK) + +#define ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS (64 * 1024) +#define ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE (1<<30) +#define ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES 1000000 + +typedef union { + u64 as_u64; + struct { + u8 tcp_flags_valid; + u8 tcp_flags; + u8 is_input; + u8 l4_valid; + }; +} fa_packet_info_t; + +typedef union { + u64 as_u64; + struct { + u16 port[2]; + u16 proto; + u16 rsvd; + }; +} fa_session_l4_key_t; + +typedef union { + struct { + ip46_address_t addr[2]; + fa_session_l4_key_t l4; + /* This field should align with u64 value in bihash_40_8 keyvalue struct */ + fa_packet_info_t pkt; + }; + clib_bihash_kv_40_8_t kv; +} fa_5tuple_t; + + +typedef struct { + fa_5tuple_t info; /* (5+1)*8 = 48 bytes */ + u64 last_active_time; /* +8 bytes = 56 */ + u32 sw_if_index; /* +4 bytes = 60 */ + union { + u8 as_u8[2]; + u16 as_u16; + } tcp_flags_seen; ; /* +2 bytes = 62 */ + u8 link_list_id; /* +1 bytes = 63 */ + u8 reserved1; /* +1 bytes = 64 */ + u32 link_prev_idx; + u32 link_next_idx; + u64 reserved2[7]; +} fa_session_t; + + +/* + * A few compile-time constraints on the size and the layout of the union, to ensure + * it makes sense both for bihash and for us. + */ + +#define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1] +CT_ASSERT_EQUAL(fa_l3_key_size_is_40, offsetof(fa_5tuple_t, pkt), offsetof(clib_bihash_kv_40_8_t, value)); +CT_ASSERT_EQUAL(fa_l4_key_t_is_8, sizeof(fa_session_l4_key_t), sizeof(u64)); +CT_ASSERT_EQUAL(fa_packet_info_t_is_8, sizeof(fa_packet_info_t), sizeof(u64)); +CT_ASSERT_EQUAL(fa_l3_kv_size_is_48, sizeof(fa_5tuple_t), sizeof(clib_bihash_kv_40_8_t)); + +/* Let's try to fit within the cacheline */ +CT_ASSERT_EQUAL(fa_session_t_size_is_64, sizeof(fa_session_t), 128); +#undef CT_ASSERT_EQUAL + + +typedef enum { + ACL_FA_ERROR_DROP, + ACL_FA_N_NEXT, +} acl_fa_next_t; + + +enum +{ + ACL_FA_CLEANER_RESCHEDULE = 1, + ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, +} acl_fa_cleaner_process_event_e; + +void acl_fa_enable_disable(u32 sw_if_index, int is_input, int enable_disable); + + +#endif diff --git a/test/test_acl_plugin_l2l3.py b/test/test_acl_plugin_l2l3.py new file mode 100644 index 00000000..346825fc --- /dev/null +++ b/test/test_acl_plugin_l2l3.py @@ -0,0 +1,722 @@ +#!/usr/bin/env python +"""ACL IRB Test Case HLD: + +**config** + - L2 MAC learning enabled in l2bd + - 2 routed interfaces untagged, bvi (Bridge Virtual Interface) + - 2 bridged interfaces in l2bd with bvi + +**test** + - sending ip4 eth pkts between routed interfaces + - 2 routed interfaces + - 2 bridged interfaces + + - 64B, 512B, 1518B, 9200B (ether_size) + + - burst of pkts per interface + - 257pkts per burst + - routed pkts hitting different FIB entries + - bridged pkts hitting different MAC entries + +**verify** + - all packets received correctly + +""" + +import unittest +from socket import inet_pton, AF_INET, AF_INET6 +from random import choice +from pprint import pprint + +from scapy.packet import Raw +from scapy.layers.l2 import Ether +from scapy.layers.inet import IP, UDP, ICMP, TCP +from scapy.layers.inet6 import IPv6, ICMPv6Unknown, ICMPv6EchoRequest +from scapy.layers.inet6 import ICMPv6EchoReply, IPv6ExtHdrRouting + +from framework import VppTestCase, VppTestRunner +import time + + +class TestIpIrb(VppTestCase): + """IRB Test Case""" + + @classmethod + def setUpClass(cls): + """ + #. Create BD with MAC learning enabled and put interfaces to this BD. + #. Configure IPv4 addresses on loopback interface and routed interface. + #. Configure MAC address binding to IPv4 neighbors on loop0. + #. Configure MAC address on pg2. + #. Loopback BVI interface has remote hosts, one half of hosts are + behind pg0 second behind pg1. + """ + super(TestIpIrb, cls).setUpClass() + + cls.pg_if_packet_sizes = [64, 512, 1518, 9018] # packet sizes + cls.bd_id = 10 + cls.remote_hosts_count = 250 + + # create 3 pg interfaces, 1 loopback interface + cls.create_pg_interfaces(range(3)) + cls.create_loopback_interfaces(range(1)) + + cls.interfaces = list(cls.pg_interfaces) + cls.interfaces.extend(cls.lo_interfaces) + + for i in cls.interfaces: + i.admin_up() + + # Create BD with MAC learning enabled and put interfaces to this BD + cls.vapi.sw_interface_set_l2_bridge( + cls.loop0.sw_if_index, bd_id=cls.bd_id, bvi=1) + cls.vapi.sw_interface_set_l2_bridge( + cls.pg0.sw_if_index, bd_id=cls.bd_id) + cls.vapi.sw_interface_set_l2_bridge( + cls.pg1.sw_if_index, bd_id=cls.bd_id) + + # Configure IPv4 addresses on loopback interface and routed interface + cls.loop0.config_ip4() + cls.loop0.config_ip6() + cls.pg2.config_ip4() + cls.pg2.config_ip6() + + # Configure MAC address binding to IPv4 neighbors on loop0 + cls.loop0.generate_remote_hosts(cls.remote_hosts_count) + cls.loop0.configure_ipv4_neighbors() + cls.loop0.configure_ipv6_neighbors() + # configure MAC address on pg2 + cls.pg2.resolve_arp() + cls.pg2.resolve_ndp() + + cls.WITHOUT_EH = False + cls.WITH_EH = True + + # Loopback BVI interface has remote hosts, one half of hosts are behind + # pg0 second behind pg1 + half = cls.remote_hosts_count // 2 + cls.pg0.remote_hosts = cls.loop0.remote_hosts[:half] + cls.pg1.remote_hosts = cls.loop0.remote_hosts[half:] + + def tearDown(self): + """Run standard test teardown and log ``show l2patch``, + ``show l2fib verbose``,``show bridge-domain detail``, + ``show ip arp``. + """ + super(TestIpIrb, self).tearDown() + if not self.vpp_dead: + self.logger.info(self.vapi.cli("show l2patch")) + self.logger.info(self.vapi.cli("show classify tables")) + self.logger.info(self.vapi.cli("show vlib graph")) + self.logger.info(self.vapi.cli("show l2fib verbose")) + self.logger.info(self.vapi.cli("show bridge-domain %s detail" % + self.bd_id)) + self.logger.info(self.vapi.cli("show ip arp")) + self.logger.info(self.vapi.cli("show ip6 neighbors")) + self.logger.info(self.vapi.cli("show acl-plugin sessions")) + + def api_acl_add_replace(self, acl_index, r, count, tag="", + expected_retval=0): + """Add/replace an ACL + + :param int acl_index: ACL index to replace, 4294967295 to create new. + :param acl_rule r: ACL rules array. + :param str tag: symbolic tag (description) for this ACL. + :param int count: number of rules. + """ + return self.vapi.api(self.vapi.papi.acl_add_replace, + {'acl_index': acl_index, + 'r': r, + 'count': count, + 'tag': tag + }, expected_retval=expected_retval) + + def api_acl_interface_set_acl_list(self, sw_if_index, count, n_input, acls, + expected_retval=0): + return self.vapi.api(self.vapi.papi.acl_interface_set_acl_list, + {'sw_if_index': sw_if_index, + 'count': count, + 'n_input': n_input, + 'acls': acls + }, expected_retval=expected_retval) + + def api_acl_dump(self, acl_index, expected_retval=0): + return self.vapi.api(self.vapi.papi.acl_dump, + {'acl_index': acl_index}, + expected_retval=expected_retval) + + def create_stream(self, src_ip_if, dst_ip_if, reverse, packet_sizes, + is_ip6, expect_blocked, expect_established, + add_extension_header): + pkts = [] + rules = [] + permit_rules = [] + permit_and_reflect_rules = [] + total_packet_count = 8 + for i in range(0, total_packet_count): + modulo = (i//2) % 2 + can_reflect_this_packet = (modulo == 0) + is_permit = i % 2 + remote_dst_index = i % len(dst_ip_if.remote_hosts) + remote_dst_host = dst_ip_if.remote_hosts[remote_dst_index] + if is_permit == 1: + info = self.create_packet_info(src_ip_if, dst_ip_if) + payload = self.info_to_payload(info) + else: + to_be_blocked = False + if (expect_blocked and not expect_established): + to_be_blocked = True + if (not can_reflect_this_packet): + to_be_blocked = True + if to_be_blocked: + payload = "to be blocked" + else: + info = self.create_packet_info(src_ip_if, dst_ip_if) + payload = self.info_to_payload(info) + if reverse: + dst_mac = 'de:ad:00:00:00:00' + src_mac = remote_dst_host._mac + dst_ip6 = src_ip_if.remote_ip6 + src_ip6 = remote_dst_host.ip6 + dst_ip4 = src_ip_if.remote_ip4 + src_ip4 = remote_dst_host.ip4 + dst_l4 = 1234 + i + src_l4 = 4321 + i + else: + dst_mac = src_ip_if.local_mac + src_mac = src_ip_if.remote_mac + src_ip6 = src_ip_if.remote_ip6 + dst_ip6 = remote_dst_host.ip6 + src_ip4 = src_ip_if.remote_ip4 + dst_ip4 = remote_dst_host.ip4 + src_l4 = 1234 + i + dst_l4 = 4321 + i + + # default ULP should be something we do not use in tests + ulp_l4 = TCP(sport=src_l4, dport=dst_l4) + # potentially a chain of protocols leading to ULP + ulp = ulp_l4 + + if can_reflect_this_packet: + if is_ip6: + ulp_l4 = UDP(sport=src_l4, dport=dst_l4) + if add_extension_header: + # prepend some extension headers + ulp = (IPv6ExtHdrRouting() / IPv6ExtHdrRouting() / + IPv6ExtHdrRouting() / ulp_l4) + # uncomment below to test invalid ones + # ulp = IPv6ExtHdrRouting(len = 200) / ulp_l4 + else: + ulp = ulp_l4 + p = (Ether(dst=dst_mac, src=src_mac) / + IPv6(src=src_ip6, dst=dst_ip6) / + ulp / + Raw(payload)) + else: + ulp_l4 = UDP(sport=src_l4, dport=dst_l4) + # IPv4 does not allow extension headers + ulp = ulp_l4 + p = (Ether(dst=dst_mac, src=src_mac) / + IP(src=src_ip4, dst=dst_ip4) / + ulp / + Raw(payload)) + elif modulo == 1: + if is_ip6: + ulp_l4 = ICMPv6Unknown(type=128 + (i % 2), code=i % 2) + ulp = ulp_l4 + p = (Ether(dst=dst_mac, src=src_mac) / + IPv6(src=src_ip6, dst=dst_ip6) / + ulp / + Raw(payload)) + else: + ulp_l4 = ICMP(type=8 + (i % 2), code=i % 2) + ulp = ulp_l4 + p = (Ether(dst=dst_mac, src=src_mac) / + IP(src=src_ip4, dst=dst_ip4) / + ulp / + Raw(payload)) + + if i % 2 == 1: + info.data = p.copy() + size = packet_sizes[(i // 2) % len(packet_sizes)] + self.extend_packet(p, size) + pkts.append(p) + + rule_family = AF_INET6 if p.haslayer(IPv6) else AF_INET + rule_prefix_len = 128 if p.haslayer(IPv6) else 32 + rule_l3_layer = IPv6 if p.haslayer(IPv6) else IP + + if p.haslayer(UDP): + rule_l4_sport = p[UDP].sport + rule_l4_dport = p[UDP].dport + else: + if p.haslayer(ICMP): + rule_l4_sport = p[ICMP].type + rule_l4_dport = p[ICMP].code + else: + rule_l4_sport = p[ICMPv6Unknown].type + rule_l4_dport = p[ICMPv6Unknown].code + if p.haslayer(IPv6): + rule_l4_proto = ulp_l4.overload_fields[IPv6]['nh'] + else: + rule_l4_proto = p[IP].proto + + new_rule = { + 'is_permit': is_permit, + 'is_ipv6': p.haslayer(IPv6), + 'src_ip_addr': inet_pton(rule_family, + p[rule_l3_layer].src), + 'src_ip_prefix_len': rule_prefix_len, + 'dst_ip_addr': inet_pton(rule_family, + p[rule_l3_layer].dst), + 'dst_ip_prefix_len': rule_prefix_len, + 'srcport_or_icmptype_first': rule_l4_sport, + 'srcport_or_icmptype_last': rule_l4_sport, + 'dstport_or_icmpcode_first': rule_l4_dport, + 'dstport_or_icmpcode_last': rule_l4_dport, + 'proto': rule_l4_proto, + } + rules.append(new_rule) + new_rule_permit = new_rule.copy() + new_rule_permit['is_permit'] = 1 + permit_rules.append(new_rule_permit) + + new_rule_permit_and_reflect = new_rule.copy() + if can_reflect_this_packet: + new_rule_permit_and_reflect['is_permit'] = 2 + else: + new_rule_permit_and_reflect['is_permit'] = is_permit + permit_and_reflect_rules.append(new_rule_permit_and_reflect) + + return {'stream': pkts, + 'rules': rules, + 'permit_rules': permit_rules, + 'permit_and_reflect_rules': permit_and_reflect_rules} + + def verify_capture(self, dst_ip_if, src_ip_if, capture, reverse): + last_info = dict() + for i in self.interfaces: + last_info[i.sw_if_index] = None + + dst_ip_sw_if_index = dst_ip_if.sw_if_index + return + + for packet in capture: + l3 = IP if packet.haslayer(IP) else IPv6 + ip = packet[l3] + if packet.haslayer(UDP): + l4 = UDP + else: + if packet.haslayer(ICMP): + l4 = ICMP + else: + l4 = ICMPv6Unknown + + # Scapy IPv6 stuff is too smart for its own good. + # So we do this and coerce the ICMP into unknown type + if packet.haslayer(UDP): + data = str(packet[UDP][Raw]) + else: + if l3 == IP: + data = str(ICMP(str(packet[l3].payload))[Raw]) + else: + data = str(ICMPv6Unknown(str(packet[l3].payload)).msgbody) + udp_or_icmp = packet[l3].payload + payload_info = self.payload_to_info(data) + packet_index = payload_info.index + + self.assertEqual(payload_info.dst, dst_ip_sw_if_index) + + next_info = self.get_next_packet_info_for_interface2( + payload_info.src, dst_ip_sw_if_index, + last_info[payload_info.src]) + last_info[payload_info.src] = next_info + self.assertTrue(next_info is not None) + self.assertEqual(packet_index, next_info.index) + saved_packet = next_info.data + self.assertTrue(next_info is not None) + + # MAC: src, dst + if not reverse: + self.assertEqual(packet.src, dst_ip_if.local_mac) + host = dst_ip_if.host_by_mac(packet.dst) + + # IP: src, dst + # self.assertEqual(ip.src, src_ip_if.remote_ip4) + if saved_packet is not None: + self.assertEqual(ip.src, saved_packet[l3].src) + self.assertEqual(ip.dst, saved_packet[l3].dst) + if l4 == UDP: + self.assertEqual(udp_or_icmp.sport, saved_packet[l4].sport) + self.assertEqual(udp_or_icmp.dport, saved_packet[l4].dport) + else: + print("Saved packet is none") + # self.assertEqual(ip.dst, host.ip4) + + # UDP: + + def create_acls_for_a_stream(self, stream_dict, + test_l2_action, is_reflect): + r = stream_dict['rules'] + r_permit = stream_dict['permit_rules'] + r_permit_reflect = stream_dict['permit_and_reflect_rules'] + r_action = r_permit_reflect if is_reflect else r + reply = self.api_acl_add_replace(acl_index=4294967295, r=r_action, + count=len(r_action), tag="action acl") + action_acl_index = reply.acl_index + reply = self.api_acl_add_replace(acl_index=4294967295, r=r_permit, + count=len(r_permit), tag="permit acl") + permit_acl_index = reply.acl_index + return {'L2': action_acl_index if test_l2_action else permit_acl_index, + 'L3': permit_acl_index if test_l2_action else action_acl_index, + 'permit': permit_acl_index, 'action': action_acl_index} + + def apply_acl_ip46_x_to_y(self, bridged_to_routed, test_l2_deny, + is_ip6, is_reflect, add_eh): + """ Apply the ACLs + """ + self.reset_packet_infos() + stream_dict = self.create_stream( + self.pg2, self.loop0, + bridged_to_routed, + self.pg_if_packet_sizes, is_ip6, + not is_reflect, False, add_eh) + stream = stream_dict['stream'] + acl_idx = self.create_acls_for_a_stream(stream_dict, test_l2_deny, + is_reflect) + n_input_l3 = 0 if bridged_to_routed else 1 + n_input_l2 = 1 if bridged_to_routed else 0 + self.api_acl_interface_set_acl_list(sw_if_index=self.pg2.sw_if_index, + count=1, + n_input=n_input_l3, + acls=[acl_idx['L3']]) + self.api_acl_interface_set_acl_list(sw_if_index=self.pg0.sw_if_index, + count=1, + n_input=n_input_l2, + acls=[acl_idx['L2']]) + self.api_acl_interface_set_acl_list(sw_if_index=self.pg1.sw_if_index, + count=1, + n_input=n_input_l2, + acls=[acl_idx['L2']]) + + def apply_acl_ip46_both_directions_reflect(self, + primary_is_bridged_to_routed, + reflect_on_l2, is_ip6, add_eh): + primary_is_routed_to_bridged = not primary_is_bridged_to_routed + self.reset_packet_infos() + stream_dict_fwd = self.create_stream(self.pg2, self.loop0, + primary_is_bridged_to_routed, + self.pg_if_packet_sizes, is_ip6, + False, False, add_eh) + acl_idx_fwd = self.create_acls_for_a_stream(stream_dict_fwd, + reflect_on_l2, True) + + stream_dict_rev = self.create_stream(self.pg2, self.loop0, + not primary_is_bridged_to_routed, + self.pg_if_packet_sizes, is_ip6, + True, True, add_eh) + # We want the primary action to be "deny" rather than reflect + acl_idx_rev = self.create_acls_for_a_stream(stream_dict_rev, + reflect_on_l2, False) + + if primary_is_bridged_to_routed: + inbound_l2_acl = acl_idx_fwd['L2'] + else: + inbound_l2_acl = acl_idx_rev['L2'] + + if primary_is_routed_to_bridged: + outbound_l2_acl = acl_idx_fwd['L2'] + else: + outbound_l2_acl = acl_idx_rev['L2'] + + if primary_is_routed_to_bridged: + inbound_l3_acl = acl_idx_fwd['L3'] + else: + inbound_l3_acl = acl_idx_rev['L3'] + + if primary_is_bridged_to_routed: + outbound_l3_acl = acl_idx_fwd['L3'] + else: + outbound_l3_acl = acl_idx_rev['L3'] + + self.api_acl_interface_set_acl_list(sw_if_index=self.pg2.sw_if_index, + count=2, + n_input=1, + acls=[inbound_l3_acl, + outbound_l3_acl]) + self.api_acl_interface_set_acl_list(sw_if_index=self.pg0.sw_if_index, + count=2, + n_input=1, + acls=[inbound_l2_acl, + outbound_l2_acl]) + self.api_acl_interface_set_acl_list(sw_if_index=self.pg1.sw_if_index, + count=2, + n_input=1, + acls=[inbound_l2_acl, + outbound_l2_acl]) + + def apply_acl_ip46_routed_to_bridged(self, test_l2_deny, is_ip6, + is_reflect, add_eh): + self.apply_acl_ip46_x_to_y(False, test_l2_deny, is_ip6, + is_reflect, add_eh) + + def apply_acl_ip46_bridged_to_routed(self, test_l2_deny, is_ip6, + is_reflect, add_eh): + self.apply_acl_ip46_x_to_y(True, test_l2_deny, is_ip6, + is_reflect, add_eh) + + def run_traffic_ip46_x_to_y(self, bridged_to_routed, + test_l2_deny, is_ip6, + is_reflect, is_established, add_eh): + self.reset_packet_infos() + stream_dict = self.create_stream(self.pg2, self.loop0, + bridged_to_routed, + self.pg_if_packet_sizes, is_ip6, + not is_reflect, is_established, + add_eh) + stream = stream_dict['stream'] + + tx_if = self.pg0 if bridged_to_routed else self.pg2 + rx_if = self.pg2 if bridged_to_routed else self.pg0 + + tx_if.add_stream(stream) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + packet_count = self.get_packet_count_for_if_idx(self.loop0.sw_if_index) + rcvd1 = rx_if.get_capture(packet_count) + self.verify_capture(self.loop0, self.pg2, rcvd1, bridged_to_routed) + + def run_traffic_ip46_routed_to_bridged(self, test_l2_deny, is_ip6, + is_reflect, is_established, add_eh): + self.run_traffic_ip46_x_to_y(False, test_l2_deny, is_ip6, + is_reflect, is_established, add_eh) + + def run_traffic_ip46_bridged_to_routed(self, test_l2_deny, is_ip6, + is_reflect, is_established, add_eh): + self.run_traffic_ip46_x_to_y(True, test_l2_deny, is_ip6, + is_reflect, is_established, add_eh) + + def run_test_ip46_routed_to_bridged(self, test_l2_deny, + is_ip6, is_reflect, add_eh): + self.apply_acl_ip46_routed_to_bridged(test_l2_deny, + is_ip6, is_reflect, add_eh) + self.run_traffic_ip46_routed_to_bridged(test_l2_deny, is_ip6, + is_reflect, False, add_eh) + + def run_test_ip46_bridged_to_routed(self, test_l2_deny, + is_ip6, is_reflect, add_eh): + self.apply_acl_ip46_bridged_to_routed(test_l2_deny, + is_ip6, is_reflect, add_eh) + self.run_traffic_ip46_bridged_to_routed(test_l2_deny, is_ip6, + is_reflect, False, add_eh) + + def run_test_ip46_routed_to_bridged_and_back(self, test_l2_action, + is_ip6, add_eh): + self.apply_acl_ip46_both_directions_reflect(False, test_l2_action, + is_ip6, add_eh) + self.run_traffic_ip46_routed_to_bridged(test_l2_action, is_ip6, + True, False, add_eh) + self.run_traffic_ip46_bridged_to_routed(test_l2_action, is_ip6, + False, True, add_eh) + + def run_test_ip46_bridged_to_routed_and_back(self, test_l2_action, + is_ip6, add_eh): + self.apply_acl_ip46_both_directions_reflect(True, test_l2_action, + is_ip6, add_eh) + self.run_traffic_ip46_bridged_to_routed(test_l2_action, is_ip6, + True, False, add_eh) + self.run_traffic_ip46_routed_to_bridged(test_l2_action, is_ip6, + False, True, add_eh) + + def test_0000_ip6_irb_1(self): + """ ACL plugin prepare""" + if not self.vpp_dead: + cmd = "set acl-plugin session timeout udp idle 2000" + self.logger.info(self.vapi.ppcli(cmd)) + # uncomment to not skip past the routing header + # and watch the EH tests fail + # self.logger.info(self.vapi.ppcli( + # "set acl-plugin skip-ipv6-extension-header 43 0")) + # uncomment to test the session limit (stateful tests will fail) + # self.logger.info(self.vapi.ppcli( + # "set acl-plugin session table max-entries 1")) + # new datapath is the default, but just in case + # self.logger.info(self.vapi.ppcli( + # "set acl-plugin l2-datapath new")) + # If you want to see some tests fail, uncomment the next line + # self.logger.info(self.vapi.ppcli( + # "set acl-plugin l2-datapath old")) + + def test_0001_ip6_irb_1(self): + """ ACL IPv6 routed -> bridged, L2 ACL deny""" + self.run_test_ip46_routed_to_bridged(True, True, False, + self.WITHOUT_EH) + + def test_0002_ip6_irb_1(self): + """ ACL IPv6 routed -> bridged, L3 ACL deny""" + self.run_test_ip46_routed_to_bridged(False, True, False, + self.WITHOUT_EH) + + def test_0003_ip4_irb_1(self): + """ ACL IPv4 routed -> bridged, L2 ACL deny""" + self.run_test_ip46_routed_to_bridged(True, False, False, + self.WITHOUT_EH) + + def test_0004_ip4_irb_1(self): + """ ACL IPv4 routed -> bridged, L3 ACL deny""" + self.run_test_ip46_routed_to_bridged(False, False, False, + self.WITHOUT_EH) + + def test_0005_ip6_irb_1(self): + """ ACL IPv6 bridged -> routed, L2 ACL deny """ + self.run_test_ip46_bridged_to_routed(True, True, False, + self.WITHOUT_EH) + + def test_0006_ip6_irb_1(self): + """ ACL IPv6 bridged -> routed, L3 ACL deny """ + self.run_test_ip46_bridged_to_routed(False, True, False, + self.WITHOUT_EH) + + def test_0007_ip6_irb_1(self): + """ ACL IPv4 bridged -> routed, L2 ACL deny """ + self.run_test_ip46_bridged_to_routed(True, False, False, + self.WITHOUT_EH) + + def test_0008_ip6_irb_1(self): + """ ACL IPv4 bridged -> routed, L3 ACL deny """ + self.run_test_ip46_bridged_to_routed(False, False, False, + self.WITHOUT_EH) + + # Stateful ACL tests + def test_0101_ip6_irb_1(self): + """ ACL IPv6 routed -> bridged, L2 ACL permit+reflect""" + self.run_test_ip46_routed_to_bridged_and_back(True, True, + self.WITHOUT_EH) + + def test_0102_ip6_irb_1(self): + """ ACL IPv6 bridged -> routed, L2 ACL permit+reflect""" + self.run_test_ip46_bridged_to_routed_and_back(True, True, + self.WITHOUT_EH) + + def test_0103_ip6_irb_1(self): + """ ACL IPv4 routed -> bridged, L2 ACL permit+reflect""" + self.run_test_ip46_routed_to_bridged_and_back(True, False, + self.WITHOUT_EH) + + def test_0104_ip6_irb_1(self): + """ ACL IPv4 bridged -> routed, L2 ACL permit+reflect""" + self.run_test_ip46_bridged_to_routed_and_back(True, False, + self.WITHOUT_EH) + + def test_0111_ip6_irb_1(self): + """ ACL IPv6 routed -> bridged, L3 ACL permit+reflect""" + self.run_test_ip46_routed_to_bridged_and_back(False, True, + self.WITHOUT_EH) + + def test_0112_ip6_irb_1(self): + """ ACL IPv6 bridged -> routed, L3 ACL permit+reflect""" + self.run_test_ip46_bridged_to_routed_and_back(False, True, + self.WITHOUT_EH) + + def test_0113_ip6_irb_1(self): + """ ACL IPv4 routed -> bridged, L3 ACL permit+reflect""" + self.run_test_ip46_routed_to_bridged_and_back(False, False, + self.WITHOUT_EH) + + def test_0114_ip6_irb_1(self): + """ ACL IPv4 bridged -> routed, L3 ACL permit+reflect""" + self.run_test_ip46_bridged_to_routed_and_back(False, False, + self.WITHOUT_EH) + + # A block of tests with extension headers + + def test_1001_ip6_irb_1(self): + """ ACL IPv6+EH routed -> bridged, L2 ACL deny""" + self.run_test_ip46_routed_to_bridged(True, True, False, + self.WITH_EH) + + def test_1002_ip6_irb_1(self): + """ ACL IPv6+EH routed -> bridged, L3 ACL deny""" + self.run_test_ip46_routed_to_bridged(False, True, False, + self.WITH_EH) + + def test_1005_ip6_irb_1(self): + """ ACL IPv6+EH bridged -> routed, L2 ACL deny """ + self.run_test_ip46_bridged_to_routed(True, True, False, + self.WITH_EH) + + def test_1006_ip6_irb_1(self): + """ ACL IPv6+EH bridged -> routed, L3 ACL deny """ + self.run_test_ip46_bridged_to_routed(False, True, False, + self.WITH_EH) + + def test_1101_ip6_irb_1(self): + """ ACL IPv6+EH routed -> bridged, L2 ACL permit+reflect""" + self.run_test_ip46_routed_to_bridged_and_back(True, True, + self.WITH_EH) + + def test_1102_ip6_irb_1(self): + """ ACL IPv6+EH bridged -> routed, L2 ACL permit+reflect""" + self.run_test_ip46_bridged_to_routed_and_back(True, True, + self.WITH_EH) + + def test_1111_ip6_irb_1(self): + """ ACL IPv6+EH routed -> bridged, L3 ACL permit+reflect""" + self.run_test_ip46_routed_to_bridged_and_back(False, True, + self.WITH_EH) + + def test_1112_ip6_irb_1(self): + """ ACL IPv6+EH bridged -> routed, L3 ACL permit+reflect""" + self.run_test_ip46_bridged_to_routed_and_back(False, True, + self.WITH_EH) + + # Old datapath group + def test_8900_ip6_irb_1(self): + """ ACL plugin set old L2 datapath""" + if not self.vpp_dead: + cmd = "set acl-plugin l2-datapath old" + self.logger.info(self.vapi.ppcli(cmd)) + + def test_8901_ip6_irb_1(self): + """ ACL IPv6 routed -> bridged, L2 ACL deny""" + self.run_test_ip46_routed_to_bridged(True, True, False, + self.WITHOUT_EH) + + def test_8902_ip6_irb_1(self): + """ ACL IPv6 routed -> bridged, L3 ACL deny""" + self.run_test_ip46_routed_to_bridged(False, True, False, + self.WITHOUT_EH) + + def test_8903_ip4_irb_1(self): + """ ACL IPv4 routed -> bridged, L2 ACL deny""" + self.run_test_ip46_routed_to_bridged(True, False, False, + self.WITHOUT_EH) + + def test_8904_ip4_irb_1(self): + """ ACL IPv4 routed -> bridged, L3 ACL deny""" + self.run_test_ip46_routed_to_bridged(False, False, False, + self.WITHOUT_EH) + + def test_8905_ip6_irb_1(self): + """ ACL IPv6 bridged -> routed, L2 ACL deny """ + self.run_test_ip46_bridged_to_routed(True, True, False, + self.WITHOUT_EH) + + def test_8906_ip6_irb_1(self): + """ ACL IPv6 bridged -> routed, L3 ACL deny """ + self.run_test_ip46_bridged_to_routed(False, True, False, + self.WITHOUT_EH) + + def test_8907_ip6_irb_1(self): + """ ACL IPv4 bridged -> routed, L2 ACL deny """ + self.run_test_ip46_bridged_to_routed(True, False, False, + self.WITHOUT_EH) + + def test_8908_ip6_irb_1(self): + """ ACL IPv4 bridged -> routed, L3 ACL deny """ + self.run_test_ip46_bridged_to_routed(False, False, False, + self.WITHOUT_EH) + + +if __name__ == '__main__': + unittest.main(testRunner=VppTestRunner) -- cgit 1.2.3-korg From ca2cbc974615b4b0406ecbcada1c27266b3aabc7 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Wed, 29 Mar 2017 16:47:10 +0200 Subject: acl-plugin: cleaner node bugfixes (VPP-675) - use the counters in a private struct rather than node error counters - ensure the timer for the non-idle connections is restarted - fix the deletion of conn at the current tail the list Change-Id: I632f63574d2ced95fb75c5e7fb588c78fb3cce1c Signed-off-by: Andrew Yourtchenko (cherry picked from commit 097051a3bd1f63a177c0728f15375afd84a68918) --- src/plugins/acl/acl.c | 13 +++++++ src/plugins/acl/acl.h | 16 ++++++++ src/plugins/acl/fa_node.c | 97 ++++++++++++++++++++++------------------------- 3 files changed, 75 insertions(+), 51 deletions(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 4a4dd434..5d0b6c25 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -2120,10 +2120,15 @@ acl_show_aclplugin_fn (vlib_main_t * vm, u64 n_dels = sw_if_index < vec_len(am->fa_session_dels_by_sw_if_index) ? am->fa_session_dels_by_sw_if_index[sw_if_index] : 0; out0 = format(out0, "sw_if_index %d: add %lu - del %lu = %lu\n", sw_if_index, n_adds, n_dels, n_adds - n_dels); })); + out0 = format(out0, "\n\nConn cleaner thread counters:\n"); +#define _(cnt, desc) out0 = format(out0, " %20lu: %s\n", am->cnt, desc); + foreach_fa_cleaner_counter; +#undef _ vlib_cli_output(vm, "\n\n%s\n\n", out0); vlib_cli_output(vm, "Sessions per interval: min %lu max %lu increment: %f ms current: %f ms", am->fa_min_deleted_sessions_per_interval, am->fa_max_deleted_sessions_per_interval, am->fa_cleaner_wait_time_increment * 1000.0, ((f64)am->fa_current_cleaner_timer_wait_interval) * 1000.0/(f64)vm->clib_time.clocks_per_second); + vec_free(out0); } return error; @@ -2190,6 +2195,14 @@ acl_init (vlib_main_t * vm) am->fa_max_deleted_sessions_per_interval = ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL; am->fa_cleaner_wait_time_increment = ACL_FA_DEFAULT_CLEANER_WAIT_TIME_INCREMENT; + am->fa_cleaner_cnt_delete_by_sw_index = 0; + am->fa_cleaner_cnt_delete_by_sw_index_ok = 0; + am->fa_cleaner_cnt_unknown_event = 0; + am->fa_cleaner_cnt_deleted_sessions = 0; + am->fa_cleaner_cnt_timer_restarted = 0; + am->fa_cleaner_cnt_wait_with_timeout = 0; + + #define _(N, v, s) am->fa_ipv6_known_eh_bitmap = clib_bitmap_set(am->fa_ipv6_known_eh_bitmap, v, 1); foreach_acl_eh #undef _ diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index 47523636..f5a1fe0f 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -209,6 +209,22 @@ typedef struct { u32 fa_conn_list_head[ACL_N_TIMEOUTS]; u32 fa_conn_list_tail[ACL_N_TIMEOUTS]; + /* Counters for the cleaner thread */ + +#define foreach_fa_cleaner_counter \ + _(fa_cleaner_cnt_delete_by_sw_index, "delete_by_sw_index events") \ + _(fa_cleaner_cnt_delete_by_sw_index_ok, "delete_by_sw_index handled ok") \ + _(fa_cleaner_cnt_unknown_event, "unknown events received") \ + _(fa_cleaner_cnt_deleted_sessions, "sessions deleted") \ + _(fa_cleaner_cnt_timer_restarted, "session idle timers restarted") \ + _(fa_cleaner_cnt_wait_with_timeout, "event wait with timeout called") \ + _(fa_cleaner_cnt_wait_without_timeout, "event wait w/o timeout called") \ + _(fa_cleaner_cnt_event_cycles, "total event cycles") \ + _(fa_cleaner_cnt_already_deleted, "try to delete already deleted conn") \ +/* end of counters */ +#define _(id, desc) u32 id; + foreach_fa_cleaner_counter +#undef _ /* convenience */ vlib_main_t * vlib_main; diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index ac619a72..1f9117a6 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -533,6 +533,10 @@ acl_fa_conn_list_add_session (acl_main_t * am, u32 sess_id) if (~0 == am->fa_conn_list_head[list_id]) { am->fa_conn_list_head[list_id] = sess_id; + /* If it is a first conn in any list, kick off the cleaner */ + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_RESCHEDULE, 0); + } } @@ -556,7 +560,7 @@ acl_fa_conn_list_delete_session (acl_main_t *am, u32 sess_id) am->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx; } if (am->fa_conn_list_tail[sess->link_list_id] == sess_id) { - am->fa_conn_list_tail[sess->link_list_id] = sess->link_next_idx; + am->fa_conn_list_tail[sess->link_list_id] = sess->link_prev_idx; } } @@ -982,14 +986,6 @@ acl_out_ip4_fa_node_fn (vlib_main_t * vm, /* *INDENT-OFF* */ #define foreach_acl_fa_cleaner_error \ -_(EVENT_CYCLE, "event processing cycle") \ -_(TIMER_RESTARTED, "restarted session timers") \ -_(DELETED_SESSIONS, "deleted sessions") \ -_(ALREADY_DELETED, "timer event for already deleted session") \ -_(DELETE_BY_SW_IF_INDEX, "delete by sw_if_index event") \ -_(DELETE_BY_SW_IF_INDEX_OK, "delete by sw_if_index completed ok") \ -_(WAIT_WITHOUT_TIMEOUT, "process waits without timeout") \ -_(WAIT_WITH_TIMEOUT, "process waits with timeout") \ _(UNKNOWN_EVENT, "unknown event received") \ /* end of errors */ @@ -1067,7 +1063,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, f64 cpu_cps = vm->clib_time.clocks_per_second; u64 next_expire; /* We should call timer wheel at least twice a second */ - u64 max_timer_wait_interval = cpu_cps / 2; + u64 max_timer_wait_interval = cpu_cps / 2; am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval; u32 *expired = NULL; @@ -1079,10 +1075,24 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, { u32 count_deleted_sessions = 0; u32 count_already_deleted = 0; - u32 count_timer_restarted = 0; now = clib_cpu_time_now (); next_expire = now + am->fa_current_cleaner_timer_wait_interval; + int has_pending_conns = 0; + u8 tt; + for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) + { + if (~0 != am->fa_conn_list_head[tt]) + has_pending_conns = 1; + } + /* If no pending connections then no point in timing out */ + if (!has_pending_conns) + { + am->fa_cleaner_cnt_wait_without_timeout++; + (void) vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + } + else { f64 timeout = ((i64) next_expire - (i64) now) / cpu_cps; if (timeout <= 0) @@ -1095,11 +1105,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, /* Timing wheel code is happier if it is called regularly */ if (timeout > 0.5) timeout = 0.5; - vlib_node_increment_counter (vm, - acl_fa_session_cleaner_process_node. - index, - ACL_FA_CLEANER_ERROR_WAIT_WITH_TIMEOUT, - 1); + am->fa_cleaner_cnt_wait_with_timeout++; (void) vlib_process_wait_for_event_or_clock (vm, timeout); event_type = vlib_process_get_events (vm, &event_data); } @@ -1119,11 +1125,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, uword *sw_if_index0; vec_foreach (sw_if_index0, event_data) { - vlib_node_increment_counter (vm, - acl_fa_session_cleaner_process_node. - index, - ACL_FA_CLEANER_ERROR_DELETE_BY_SW_IF_INDEX, - 1); + am->fa_cleaner_cnt_delete_by_sw_index++; #ifdef FA_NODE_VERBOSE_DEBUG clib_warning ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d", @@ -1134,11 +1136,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, acl_fa_clean_sessions_by_sw_if_index (am, *sw_if_index0, &count); count_deleted_sessions += count; - vlib_node_increment_counter (vm, - acl_fa_session_cleaner_process_node. - index, - ACL_FA_CLEANER_ERROR_DELETE_BY_SW_IF_INDEX_OK, - result); + am->fa_cleaner_cnt_delete_by_sw_index_ok += result; } } break; @@ -1147,17 +1145,21 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, clib_warning ("ACL plugin connection cleaner: unknown event %u", event_type); #endif - vlib_node_increment_counter (vm, - acl_fa_session_cleaner_process_node. - index, - ACL_FA_CLEANER_ERROR_UNKNOWN_EVENT, 1); + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node. + index, + ACL_FA_CLEANER_ERROR_UNKNOWN_EVENT, 1); + am->fa_cleaner_cnt_unknown_event++; break; } { u8 tt = 0; for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { - while((vec_len(expired) < 2*am->fa_max_deleted_sessions_per_interval) && (~0 != am->fa_conn_list_head[tt]) && (acl_fa_conn_has_timed_out(am, now, am->fa_conn_list_head[tt]))) { + while((vec_len(expired) < 2*am->fa_max_deleted_sessions_per_interval) + && (~0 != am->fa_conn_list_head[tt]) + && (acl_fa_conn_has_timed_out(am, now, + am->fa_conn_list_head[tt]))) { u32 sess_id = am->fa_conn_list_head[tt]; vec_add1(expired, sess_id); acl_fa_conn_list_delete_session(am, sess_id); @@ -1165,7 +1167,6 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, } } - u32 *psid = NULL; vec_foreach (psid, expired) { @@ -1181,15 +1182,22 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, /* clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d", (int) session_index); */ - /* Pretend we did this in the past, at last_active moment */ - count_timer_restarted++; + /* There was activity on the session, so the idle timeout + has not passed. Enqueue for another time period. */ + + acl_fa_conn_list_add_session(am, session_index); + + /* FIXME: When/if moving to timer wheel, + pretend we did this in the past, + at last_active moment, so the timer is accurate */ + am->fa_cleaner_cnt_timer_restarted++; } else { /* clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d", (int) session_index); */ acl_fa_delete_session (am, sw_if_index, session_index); - count_deleted_sessions++; + count_deleted_sessions++; } } else @@ -1210,22 +1218,9 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval) am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment; } - - vlib_node_increment_counter (vm, - acl_fa_session_cleaner_process_node.index, - ACL_FA_CLEANER_ERROR_EVENT_CYCLE, 1); - vlib_node_increment_counter (vm, - acl_fa_session_cleaner_process_node.index, - ACL_FA_CLEANER_ERROR_TIMER_RESTARTED, - count_timer_restarted); - vlib_node_increment_counter (vm, - acl_fa_session_cleaner_process_node.index, - ACL_FA_CLEANER_ERROR_DELETED_SESSIONS, - count_deleted_sessions); - vlib_node_increment_counter (vm, - acl_fa_session_cleaner_process_node.index, - ACL_FA_CLEANER_ERROR_ALREADY_DELETED, - count_already_deleted); + am->fa_cleaner_cnt_event_cycles++; + am->fa_cleaner_cnt_deleted_sessions += count_deleted_sessions; + am->fa_cleaner_cnt_already_deleted += count_already_deleted; } /* NOT REACHED */ return 0; -- cgit 1.2.3-korg From d1b05647427c79cfd5322991bbe663fae65f37b5 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Tue, 4 Apr 2017 14:10:40 +0000 Subject: acl-plugin: make the IPv4/IPv6 non-first fragment handling in line with ACL (VPP-682) This fixes the previously-implicit "drop all non-first fragments" behavior to be more in line with security rules: a non-first fragment is treated for the purposes of matching the ACL as a packet with the port match succeeding. This allows to change the behavior to permit the fragmented packets for the default "permit specific rules" ruleset, but also gives the flexibility to block the non-initial fragments by inserting into the begining a bogus rule which would deny the L4 traffic. Also, add a knob which allows to potentially turn this behavior off in case of a dire need (and revert to dropping all non-initial fragments), via a debug CLI. Change-Id: I546b372b65ff2157d9c68b1d32f9e644f1dd71b4 Signed-off-by: Andrew Yourtchenko (cherry picked from commit 9fc0c26c6b28fd6c8b8142ea52f52eafa7e8c7ac) --- src/plugins/acl/acl.c | 7 +++++ src/plugins/acl/acl.h | 5 +++- src/plugins/acl/fa_node.c | 70 ++++++++++++++++++++++++++++++++++++++------ src/plugins/acl/fa_node.h | 8 +++-- test/test_acl_plugin.py | 49 +++++++++++++++++++++++++++---- test/test_acl_plugin_l2l3.py | 51 ++++++++++++++++++++++++++++++-- 6 files changed, 168 insertions(+), 22 deletions(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 6657d370..98c74b9b 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -2008,6 +2008,11 @@ acl_set_aclplugin_fn (vlib_main_t * vm, } goto done; } + if (unformat (input, "l4-match-nonfirst-fragment %u", &val)) + { + am->l4_match_nonfirst_fragment = (val != 0); + goto done; + } if (unformat (input, "session")) { if (unformat (input, "clear")) { acl_main_t *am = &acl_main; @@ -2205,6 +2210,8 @@ acl_init (vlib_main_t * vm) foreach_acl_eh #undef _ + am->l4_match_nonfirst_fragment = 1; + return error; } diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index f5a1fe0f..d708c521 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -181,6 +181,9 @@ typedef struct { /* EH values that we can skip over */ uword *fa_ipv6_known_eh_bitmap; + /* whether to match L4 ACEs with ports on the non-initial fragment */ + int l4_match_nonfirst_fragment; + /* conn table per-interface conn table parameters */ u32 fa_conn_table_hash_num_buckets; uword fa_conn_table_hash_memory_size; @@ -235,6 +238,7 @@ typedef struct { _(HOPBYHOP , 0 , "IPv6ExtHdrHopByHop") \ _(ROUTING , 43 , "IPv6ExtHdrRouting") \ _(DESTOPT , 60 , "IPv6ExtHdrDestOpt") \ + _(FRAGMENT , 44 , "IPv6ExtHdrFragment") \ _(MOBILITY , 135, "Mobility Header") \ _(HIP , 139, "Experimental use Host Identity Protocol") \ _(SHIM6 , 140, "Shim6 Protocol") \ @@ -247,7 +251,6 @@ typedef struct { Also, Fragment header needs special processing. _(NONEXT , 59 , "NoNextHdr") \ - _(FRAGMENT , 44 , "IPv6ExtHdrFragment") \ ESP is hiding its internal format, so no point in trying to go past it. diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index 1f9117a6..e12cbaa7 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -191,7 +191,21 @@ acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple, { if (pkt_5tuple->l4.proto != r->proto) continue; - /* A sanity check just to ensure what we jave just matched was a valid L4 extracted from the packet */ + + if (PREDICT_FALSE (pkt_5tuple->pkt.is_nonfirst_fragment && + am->l4_match_nonfirst_fragment)) + { + /* non-initial fragment with frag match configured - match this rule */ + *trace_bitmap |= 0x80000000; + *r_action = r->is_permit; + if (r_acl_match_p) + *r_acl_match_p = acl_index; + if (r_rule_match_p) + *r_rule_match_p = i; + return 1; + } + + /* A sanity check just to ensure we are about to match the ports extracted from the packet */ if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid)) continue; @@ -312,6 +326,10 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, l3_offset = 0; } + /* key[0..3] contains src/dst address and is cleared/set below */ + /* Remainder of the key and per-packet non-key data */ + p5tuple_pkt->kv.key[4] = 0; + p5tuple_pkt->kv.value = 0; if (is_ip6) { @@ -333,12 +351,33 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto); if (PREDICT_FALSE (need_skip_eh)) { - /* FIXME: add fragment header special handling. Currently causes treated as unknown header. */ while (need_skip_eh && offset_within_packet (b0, l4_offset)) { - u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset); - proto = *(u8 *) get_ptr_to_offset (b0, l4_offset); - l4_offset += 8 * (1 + (u16) nwords); + /* Fragment header needs special handling */ + if (PREDICT_FALSE(ACL_EH_FRAGMENT == proto)) + { + proto = *(u8 *) get_ptr_to_offset (b0, l4_offset); + u16 frag_offset; + clib_memcpy (&frag_offset, get_ptr_to_offset (b0, 2 + l4_offset), sizeof(frag_offset)); + frag_offset = ntohs(frag_offset) >> 3; + if (frag_offset) + { + p5tuple_pkt->pkt.is_nonfirst_fragment = 1; + /* invalidate L4 offset so we don't try to find L4 info */ + l4_offset += b0->current_length; + } + else + { + /* First fragment: skip the frag header and move on. */ + l4_offset += 8; + } + } + else + { + u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset); + proto = *(u8 *) get_ptr_to_offset (b0, l4_offset); + l4_offset += 8 * (1 + (u16) nwords); + } #ifdef FA_NODE_VERBOSE_DEBUG clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d", proto, l4_offset); @@ -369,13 +408,26 @@ acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, offsetof (ip4_header_t, protocol) + l3_offset); l4_offset = l3_offset + sizeof (ip4_header_t); + u16 flags_and_fragment_offset; + clib_memcpy (&flags_and_fragment_offset, + get_ptr_to_offset (b0, + offsetof (ip4_header_t, + flags_and_fragment_offset)) + l3_offset, + sizeof(flags_and_fragment_offset)); + flags_and_fragment_offset = ntohs (flags_and_fragment_offset); + + /* non-initial fragments have non-zero offset */ + if ((PREDICT_FALSE(0xfff & flags_and_fragment_offset))) + { + p5tuple_pkt->pkt.is_nonfirst_fragment = 1; + /* invalidate L4 offset so we don't try to find L4 info */ + l4_offset += b0->current_length; + } + } - /* Remainder of the key and per-packet non-key data */ - p5tuple_pkt->kv.key[4] = 0; - p5tuple_pkt->kv.value = 0; + p5tuple_pkt->l4.proto = proto; if (PREDICT_TRUE (offset_within_packet (b0, l4_offset))) { - p5tuple_pkt->l4.proto = proto; p5tuple_pkt->pkt.l4_valid = 1; if (icmp_protos[is_ip6] == proto) { diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index 76a40a38..8edd0069 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -22,10 +22,12 @@ typedef union { u64 as_u64; struct { - u8 tcp_flags_valid; u8 tcp_flags; - u8 is_input; - u8 l4_valid; + u8 tcp_flags_valid:1; + u8 is_input:1; + u8 l4_valid:1; + u8 is_nonfirst_fragment:1; + u8 flags_reserved:4; }; } fa_packet_info_t; diff --git a/test/test_acl_plugin.py b/test/test_acl_plugin.py index 2bbebe85..b051d457 100644 --- a/test/test_acl_plugin.py +++ b/test/test_acl_plugin.py @@ -9,6 +9,7 @@ from scapy.packet import Raw from scapy.layers.l2 import Ether from scapy.layers.inet import IP, TCP, UDP, ICMP from scapy.layers.inet6 import IPv6, ICMPv6EchoRequest +from scapy.layers.inet6 import IPv6ExtHdrFragment from framework import VppTestCase, VppTestRunner from util import Host, ppp @@ -229,7 +230,7 @@ class TestACLplugin(VppTestCase): return '' def create_stream(self, src_if, packet_sizes, traffic_type=0, ipv6=0, - proto=-1, ports=0): + proto=-1, ports=0, fragments=False): """ Create input packet stream for defined interface using hosts or deleted_hosts list. @@ -263,8 +264,14 @@ class TestACLplugin(VppTestCase): p = Ether(dst=dst_host.mac, src=src_host.mac) if pkt_info.ip: p /= IPv6(dst=dst_host.ip6, src=src_host.ip6) + if fragments: + p /= IPv6ExtHdrFragment(offset=64, m=1) else: - p /= IP(src=src_host.ip4, dst=dst_host.ip4) + if fragments: + p /= IP(src=src_host.ip4, dst=dst_host.ip4, + flags=1, frag=64) + else: + p /= IP(src=src_host.ip4, dst=dst_host.ip4) if traffic_type == self.ICMP: if pkt_info.ip: p /= ICMPv6EchoRequest(type=self.icmp6_type, @@ -381,14 +388,16 @@ class TestACLplugin(VppTestCase): self.pg_enable_capture(self.pg_interfaces) self.pg_start() - def run_verify_test(self, traffic_type=0, ip_type=0, proto=-1, ports=0): + def run_verify_test(self, traffic_type=0, ip_type=0, proto=-1, ports=0, + frags=False): # Test # Create incoming packet streams for packet-generator interfaces pkts_cnt = 0 for i in self.pg_interfaces: if self.flows.__contains__(i): pkts = self.create_stream(i, self.pg_if_packet_sizes, - traffic_type, ip_type, proto, ports) + traffic_type, ip_type, proto, ports, + frags) if len(pkts) > 0: i.add_stream(pkts) pkts_cnt += len(pkts) @@ -408,13 +417,14 @@ class TestACLplugin(VppTestCase): self.verify_capture(dst_if, capture, traffic_type, ip_type) def run_verify_negat_test(self, traffic_type=0, ip_type=0, proto=-1, - ports=0): + ports=0, frags=False): # Test self.reset_packet_infos() for i in self.pg_interfaces: if self.flows.__contains__(i): pkts = self.create_stream(i, self.pg_if_packet_sizes, - traffic_type, ip_type, proto, ports) + traffic_type, ip_type, proto, ports, + frags) if len(pkts) > 0: i.add_stream(pkts) @@ -1011,5 +1021,32 @@ class TestACLplugin(VppTestCase): self.logger.info("ACLP_TEST_FINISH_0020") + def test_0021_udp_deny_port_verify_fragment_deny(self): + """ deny single UDPv4/v6, permit ip any, verify non-initial fragment blocked + """ + self.logger.info("ACLP_TEST_START_0021") + + port = random.randint(0, 65535) + # Add an ACL + rules = [] + rules.append(self.create_rule(self.IPV4, self.DENY, port, + self.proto[self.IP][self.UDP])) + rules.append(self.create_rule(self.IPV6, self.DENY, port, + self.proto[self.IP][self.UDP])) + # deny ip any any in the end + rules.append(self.create_rule(self.IPV4, self.PERMIT, + self.PORTS_ALL, 0)) + rules.append(self.create_rule(self.IPV6, self.PERMIT, + self.PORTS_ALL, 0)) + + # Apply rules + self.apply_rules(rules, "deny ip4/ip6 udp "+str(port)) + + # Traffic should not pass + self.run_verify_negat_test(self.IP, self.IPRANDOM, + self.proto[self.IP][self.UDP], port, True) + + self.logger.info("ACLP_TEST_FINISH_0021") + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/test_acl_plugin_l2l3.py b/test/test_acl_plugin_l2l3.py index 346825fc..32abf184 100644 --- a/test/test_acl_plugin_l2l3.py +++ b/test/test_acl_plugin_l2l3.py @@ -33,6 +33,7 @@ from scapy.layers.l2 import Ether from scapy.layers.inet import IP, UDP, ICMP, TCP from scapy.layers.inet6 import IPv6, ICMPv6Unknown, ICMPv6EchoRequest from scapy.layers.inet6 import ICMPv6EchoReply, IPv6ExtHdrRouting +from scapy.layers.inet6 import IPv6ExtHdrFragment from framework import VppTestCase, VppTestRunner import time @@ -203,7 +204,7 @@ class TestIpIrb(VppTestCase): if add_extension_header: # prepend some extension headers ulp = (IPv6ExtHdrRouting() / IPv6ExtHdrRouting() / - IPv6ExtHdrRouting() / ulp_l4) + IPv6ExtHdrFragment(offset=0, m=1) / ulp_l4) # uncomment below to test invalid ones # ulp = IPv6ExtHdrRouting(len = 200) / ulp_l4 else: @@ -214,10 +215,12 @@ class TestIpIrb(VppTestCase): Raw(payload)) else: ulp_l4 = UDP(sport=src_l4, dport=dst_l4) - # IPv4 does not allow extension headers + # IPv4 does not allow extension headers, + # but we rather make it a first fragment + flags = 1 if add_extension_header else 0 ulp = ulp_l4 p = (Ether(dst=dst_mac, src=src_mac) / - IP(src=src_ip4, dst=dst_ip4) / + IP(src=src_ip4, dst=dst_ip4, frag=0, flags=flags) / ulp / Raw(payload)) elif modulo == 1: @@ -670,6 +673,48 @@ class TestIpIrb(VppTestCase): self.run_test_ip46_bridged_to_routed_and_back(False, True, self.WITH_EH) + # IPv4 with "MF" bit set + + def test_1201_ip6_irb_1(self): + """ ACL IPv4+MF routed -> bridged, L2 ACL deny""" + self.run_test_ip46_routed_to_bridged(True, False, False, + self.WITH_EH) + + def test_1202_ip6_irb_1(self): + """ ACL IPv4+MF routed -> bridged, L3 ACL deny""" + self.run_test_ip46_routed_to_bridged(False, False, False, + self.WITH_EH) + + def test_1205_ip6_irb_1(self): + """ ACL IPv4+MF bridged -> routed, L2 ACL deny """ + self.run_test_ip46_bridged_to_routed(True, False, False, + self.WITH_EH) + + def test_1206_ip6_irb_1(self): + """ ACL IPv4+MF bridged -> routed, L3 ACL deny """ + self.run_test_ip46_bridged_to_routed(False, False, False, + self.WITH_EH) + + def test_1301_ip6_irb_1(self): + """ ACL IPv4+MF routed -> bridged, L2 ACL permit+reflect""" + self.run_test_ip46_routed_to_bridged_and_back(True, False, + self.WITH_EH) + + def test_1302_ip6_irb_1(self): + """ ACL IPv4+MF bridged -> routed, L2 ACL permit+reflect""" + self.run_test_ip46_bridged_to_routed_and_back(True, False, + self.WITH_EH) + + def test_1311_ip6_irb_1(self): + """ ACL IPv4+MF routed -> bridged, L3 ACL permit+reflect""" + self.run_test_ip46_routed_to_bridged_and_back(False, False, + self.WITH_EH) + + def test_1312_ip6_irb_1(self): + """ ACL IPv4+MF bridged -> routed, L3 ACL permit+reflect""" + self.run_test_ip46_bridged_to_routed_and_back(False, False, + self.WITH_EH) + # Old datapath group def test_8900_ip6_irb_1(self): """ ACL plugin set old L2 datapath""" -- cgit 1.2.3-korg From 24beb840400adcdd0fbcd85727ab1a2fa7040dca Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Tue, 18 Apr 2017 13:28:28 +0000 Subject: Clean up old datapath code in ACL plugin. Change-Id: I3d64d5ced38a68f3fa208be00c49d20c4e6d4d0e Signed-off-by: Andrew Yourtchenko --- src/plugins/acl.am | 4 - src/plugins/acl/acl.c | 379 +--------------- src/plugins/acl/acl.h | 31 +- src/plugins/acl/l2sess.c | 238 ---------- src/plugins/acl/l2sess.h | 148 ------- src/plugins/acl/l2sess_node.c | 763 -------------------------------- src/plugins/acl/node_in.c | 168 ------- src/plugins/acl/node_in.h | 12 - src/plugins/acl/node_out.c | 169 ------- src/plugins/acl/node_out.h | 12 - src/plugins/acl/test/run-python | 28 -- src/plugins/acl/test/run-scapy | 26 -- src/plugins/acl/test/test_acl_plugin.py | 118 ----- test/test_acl_plugin_l2l3.py | 48 -- 14 files changed, 18 insertions(+), 2126 deletions(-) delete mode 100644 src/plugins/acl/l2sess.c delete mode 100644 src/plugins/acl/l2sess.h delete mode 100644 src/plugins/acl/l2sess_node.c delete mode 100644 src/plugins/acl/node_in.c delete mode 100644 src/plugins/acl/node_in.h delete mode 100644 src/plugins/acl/node_out.c delete mode 100644 src/plugins/acl/node_out.h delete mode 100755 src/plugins/acl/test/run-python delete mode 100755 src/plugins/acl/test/run-scapy delete mode 100644 src/plugins/acl/test/test_acl_plugin.py (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl.am b/src/plugins/acl.am index 524d9064..01e0197c 100644 --- a/src/plugins/acl.am +++ b/src/plugins/acl.am @@ -16,11 +16,7 @@ vppplugins_LTLIBRARIES += acl_plugin.la acl_plugin_la_SOURCES = \ acl/acl.c \ - acl/node_in.c \ - acl/node_out.c \ acl/fa_node.c \ - acl/l2sess.c \ - acl/l2sess_node.c \ acl/l2sess.h \ acl/manual_fns.h \ acl/acl_plugin.api.h diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 98c74b9b..83dc0c18 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -52,8 +51,6 @@ #include #undef vl_api_version -#include "node_in.h" -#include "node_out.h" #include "fa_node.h" acl_main_t acl_main; @@ -713,265 +710,6 @@ acl_interface_add_del_inout_acl (u32 sw_if_index, u8 is_add, u8 is_input, } -static void * -get_ptr_to_offset (vlib_buffer_t * b0, int offset) -{ - u8 *p = vlib_buffer_get_current (b0) + offset; - return p; -} - -static u8 -acl_get_l4_proto (vlib_buffer_t * b0, int node_is_ip6) -{ - u8 proto; - int proto_offset; - if (node_is_ip6) - { - proto_offset = 20; - } - else - { - proto_offset = 23; - } - proto = *((u8 *) vlib_buffer_get_current (b0) + proto_offset); - return proto; -} - -static int -acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2, int prefixlen, - int is_ip6) -{ - if (prefixlen == 0) - { - /* match any always succeeds */ - return 1; - } - if (is_ip6) - { - if (memcmp (addr1, addr2, prefixlen / 8)) - { - /* If the starting full bytes do not match, no point in bittwidling the thumbs further */ - return 0; - } - if (prefixlen % 8) - { - u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8); - u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8); - u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1)); - return (b1 & mask0) == b2; - } - else - { - /* The prefix fits into integer number of bytes, so nothing left to do */ - return 1; - } - } - else - { - uint32_t a1 = ntohl (addr1->ip4.as_u32); - uint32_t a2 = ntohl (addr2->ip4.as_u32); - uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1); - return (a1 & mask0) == a2; - } -} - -static int -acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6) -{ - return ((port >= port_first) && (port <= port_last)); -} - -static int -acl_packet_match (acl_main_t * am, u32 acl_index, vlib_buffer_t * b0, - u8 * r_action, int *r_is_ip6, u32 * r_acl_match_p, - u32 * r_rule_match_p, u32 * trace_bitmap) -{ - ethernet_header_t *h0; - u16 type0; - - ip46_address_t src, dst; - int is_ip6; - int is_ip4; - u8 proto; - u16 src_port = 0; - u16 dst_port = 0; - u8 tcp_flags = 0; - int i; - acl_list_t *a; - acl_rule_t *r; - - h0 = vlib_buffer_get_current (b0); - type0 = clib_net_to_host_u16 (h0->type); - is_ip4 = (type0 == ETHERNET_TYPE_IP4); - is_ip6 = (type0 == ETHERNET_TYPE_IP6); - - if (!(is_ip4 || is_ip6)) - { - return 0; - } - /* The bunch of hardcoded offsets here is intentional to get rid of them - ASAP, when getting to a faster matching code */ - if (is_ip4) - { - clib_memcpy (&src.ip4, get_ptr_to_offset (b0, 26), 4); - clib_memcpy (&dst.ip4, get_ptr_to_offset (b0, 30), 4); - proto = acl_get_l4_proto (b0, 0); - if (1 == proto) - { - *trace_bitmap |= 0x00000001; - /* type */ - src_port = ((u16) (*(u8 *) get_ptr_to_offset (b0, 34))); - /* code */ - dst_port = ((u16) (*(u8 *) get_ptr_to_offset (b0, 35))); - } else { - /* assume TCP/UDP */ - src_port = ntohs ((u16) (*(u16 *) get_ptr_to_offset (b0, 34))); - dst_port = ntohs ((u16) (*(u16 *) get_ptr_to_offset (b0, 36))); - /* UDP gets ability to check on an oddball data byte as a bonus */ - tcp_flags = *(u8 *) get_ptr_to_offset (b0, 14 + 20 + 13); - } - } - else /* is_ipv6 implicitly */ - { - clib_memcpy (&src, get_ptr_to_offset (b0, 22), 16); - clib_memcpy (&dst, get_ptr_to_offset (b0, 38), 16); - proto = acl_get_l4_proto (b0, 1); - if (58 == proto) - { - *trace_bitmap |= 0x00000002; - /* type */ - src_port = (u16) (*(u8 *) get_ptr_to_offset (b0, 54)); - /* code */ - dst_port = (u16) (*(u8 *) get_ptr_to_offset (b0, 55)); - } - else - { - /* assume TCP/UDP */ - src_port = ntohs ((u16) (*(u16 *) get_ptr_to_offset (b0, 54))); - dst_port = ntohs ((u16) (*(u16 *) get_ptr_to_offset (b0, 56))); - tcp_flags = *(u8 *) get_ptr_to_offset (b0, 14 + 40 + 13); - } - } - if (pool_is_free_index (am->acls, acl_index)) - { - if (r_acl_match_p) - *r_acl_match_p = acl_index; - if (r_rule_match_p) - *r_rule_match_p = -1; - /* the ACL does not exist but is used for policy. Block traffic. */ - return 0; - } - a = am->acls + acl_index; - for (i = 0; i < a->count; i++) - { - r = a->rules + i; - if (is_ip6 != r->is_ipv6) - { - continue; - } - if (!acl_match_addr (&dst, &r->dst, r->dst_prefixlen, is_ip6)) - continue; - if (!acl_match_addr (&src, &r->src, r->src_prefixlen, is_ip6)) - continue; - if (r->proto) - { - if (proto != r->proto) - continue; - if (!acl_match_port - (src_port, r->src_port_or_type_first, r->src_port_or_type_last, - is_ip6)) - continue; - if (!acl_match_port - (dst_port, r->dst_port_or_code_first, r->dst_port_or_code_last, - is_ip6)) - continue; - /* No need for check of proto == TCP, since in other rules both fields should be zero, so this match will succeed */ - if ((tcp_flags & r->tcp_flags_mask) != r->tcp_flags_value) - continue; - } - /* everything matches! */ - *r_action = r->is_permit; - *r_is_ip6 = is_ip6; - if (r_acl_match_p) - *r_acl_match_p = acl_index; - if (r_rule_match_p) - *r_rule_match_p = i; - return 1; - } - return 0; -} - -void -input_acl_packet_match (u32 sw_if_index, vlib_buffer_t * b0, u32 * nextp, - u32 * acl_match_p, u32 * rule_match_p, - u32 * trace_bitmap) -{ - acl_main_t *am = &acl_main; - uint8_t action = 0; - int is_ip6 = 0; - int i; - vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); - for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]); - i++) - { - if (acl_packet_match - (am, am->input_acl_vec_by_sw_if_index[sw_if_index][i], b0, &action, - &is_ip6, acl_match_p, rule_match_p, trace_bitmap)) - { - if (is_ip6) - { - *nextp = am->acl_in_ip6_match_next[action]; - } - else - { - *nextp = am->acl_in_ip4_match_next[action]; - } - return; - } - } - if (vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]) > 0) - { - /* If there are ACLs and none matched, deny by default */ - *nextp = 0; - } - -} - -void -output_acl_packet_match (u32 sw_if_index, vlib_buffer_t * b0, u32 * nextp, - u32 * acl_match_p, u32 * rule_match_p, - u32 * trace_bitmap) -{ - acl_main_t *am = &acl_main; - uint8_t action = 0; - int is_ip6 = 0; - int i; - vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); - for (i = 0; i < vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]); - i++) - { - if (acl_packet_match - (am, am->output_acl_vec_by_sw_if_index[sw_if_index][i], b0, &action, - &is_ip6, acl_match_p, rule_match_p, trace_bitmap)) - { - if (is_ip6) - { - *nextp = am->acl_out_ip6_match_next[action]; - } - else - { - *nextp = am->acl_out_ip4_match_next[action]; - } - return; - } - } - if (vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]) > 0) - { - /* If there are ACLs and none matched, deny by default */ - *nextp = 0; - } -} - typedef struct { u8 is_ipv6; @@ -1799,67 +1537,7 @@ setup_message_id_table (acl_main_t * am, api_main_t * apim) #undef _ } -u32 -register_match_action_nexts (u32 next_in_ip4, u32 next_in_ip6, - u32 next_out_ip4, u32 next_out_ip6) -{ - acl_main_t *am = &acl_main; - if (am->n_match_actions == 255) - { - return ~0; - } - u32 act = am->n_match_actions; - am->n_match_actions++; - am->acl_in_ip4_match_next[act] = next_in_ip4; - am->acl_in_ip6_match_next[act] = next_in_ip6; - am->acl_out_ip4_match_next[act] = next_out_ip4; - am->acl_out_ip6_match_next[act] = next_out_ip6; - return act; -} - -void -acl_setup_nodes (void) -{ - vlib_main_t *vm = vlib_get_main (); - acl_main_t *am = &acl_main; - vlib_node_t *n; - - n = vlib_get_node_by_name (vm, (u8 *) "l2-input-classify"); - am->l2_input_classify_next_acl_old = - vlib_node_add_next_with_slot (vm, n->index, acl_in_node.index, ~0); - n = vlib_get_node_by_name (vm, (u8 *) "l2-output-classify"); - am->l2_output_classify_next_acl_old = - vlib_node_add_next_with_slot (vm, n->index, acl_out_node.index, ~0); - - feat_bitmap_init_next_nodes (vm, acl_in_node.index, L2INPUT_N_FEAT, - l2input_get_feat_names (), - am->acl_in_node_feat_next_node_index); - - feat_bitmap_init_next_nodes (vm, acl_out_node.index, L2OUTPUT_N_FEAT, - l2output_get_feat_names (), - am->acl_out_node_feat_next_node_index); - - memset (&am->acl_in_ip4_match_next[0], 0, - sizeof (am->acl_in_ip4_match_next)); - memset (&am->acl_in_ip6_match_next[0], 0, - sizeof (am->acl_in_ip6_match_next)); - memset (&am->acl_out_ip4_match_next[0], 0, - sizeof (am->acl_out_ip4_match_next)); - memset (&am->acl_out_ip6_match_next[0], 0, - sizeof (am->acl_out_ip6_match_next)); - am->n_match_actions = 0; - - am->l2_input_classify_next_acl_ip4 = am->l2_input_classify_next_acl_old; - am->l2_input_classify_next_acl_ip6 = am->l2_input_classify_next_acl_old; - am->l2_output_classify_next_acl_ip4 = am->l2_output_classify_next_acl_old; - am->l2_output_classify_next_acl_ip6 = am->l2_output_classify_next_acl_old; - - register_match_action_nexts (0, 0, 0, 0); /* drop */ - register_match_action_nexts (~0, ~0, ~0, ~0); /* permit */ - register_match_action_nexts (ACL_IN_L2S_INPUT_IP4_ADD, ACL_IN_L2S_INPUT_IP6_ADD, ACL_OUT_L2S_OUTPUT_IP4_ADD, ACL_OUT_L2S_OUTPUT_IP6_ADD); /* permit + create session */ -} - -void +static void acl_setup_fa_nodes (void) { vlib_main_t *vm = vlib_get_main (); @@ -1871,9 +1549,9 @@ acl_setup_fa_nodes (void) n6 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-in-ip6-l2"); - am->fa_l2_input_classify_next_acl_ip4 = + am->l2_input_classify_next_acl_ip4 = vlib_node_add_next_with_slot (vm, n->index, n4->index, ~0); - am->fa_l2_input_classify_next_acl_ip6 = + am->l2_input_classify_next_acl_ip6 = vlib_node_add_next_with_slot (vm, n->index, n6->index, ~0); feat_bitmap_init_next_nodes (vm, n4->index, L2INPUT_N_FEAT, @@ -1889,9 +1567,9 @@ acl_setup_fa_nodes (void) n4 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-out-ip4-l2"); n6 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-out-ip6-l2"); - am->fa_l2_output_classify_next_acl_ip4 = + am->l2_output_classify_next_acl_ip4 = vlib_node_add_next_with_slot (vm, n->index, n4->index, ~0); - am->fa_l2_output_classify_next_acl_ip6 = + am->l2_output_classify_next_acl_ip6 = vlib_node_add_next_with_slot (vm, n->index, n6->index, ~0); feat_bitmap_init_next_nodes (vm, n4->index, L2OUTPUT_N_FEAT, @@ -1901,19 +1579,12 @@ acl_setup_fa_nodes (void) feat_bitmap_init_next_nodes (vm, n6->index, L2OUTPUT_N_FEAT, l2output_get_feat_names (), am->fa_acl_out_ip6_l2_node_feat_next_node_index); - - am->l2_input_classify_next_acl_ip4 = am->fa_l2_input_classify_next_acl_ip4; - am->l2_input_classify_next_acl_ip6 = am->fa_l2_input_classify_next_acl_ip6; - am->l2_output_classify_next_acl_ip4 = am->fa_l2_output_classify_next_acl_ip4; - am->l2_output_classify_next_acl_ip6 = am->fa_l2_output_classify_next_acl_ip6; - } -void +static void acl_set_timeout_sec(int timeout_type, u32 value) { acl_main_t *am = &acl_main; - l2sess_main_t *sm = &l2sess_main; clib_time_t *ct = &am->vlib_main->clib_time; if (timeout_type < ACL_N_TIMEOUTS) { @@ -1922,30 +1593,17 @@ acl_set_timeout_sec(int timeout_type, u32 value) clib_warning("Unknown timeout type %d", timeout_type); return; } - - switch(timeout_type) { - case ACL_TIMEOUT_UDP_IDLE: - sm->udp_session_idle_timeout = (u64)(((f64)value)/ct->seconds_per_clock); - break; - case ACL_TIMEOUT_TCP_IDLE: - sm->tcp_session_idle_timeout = (u64)(((f64)value)/ct->seconds_per_clock); - break; - case ACL_TIMEOUT_TCP_TRANSIENT: - sm->tcp_session_transient_timeout = (u64)(((f64)value)/ct->seconds_per_clock); - break; - default: - clib_warning("Unknown timeout type %d", timeout_type); - } + am->session_timeout[timeout_type] = (u64)(((f64)value)/ct->seconds_per_clock); } -void +static void acl_set_session_max_entries(u32 value) { acl_main_t *am = &acl_main; am->fa_conn_table_max_entries = value; } -int +static int acl_set_skip_ipv6_eh(u32 eh, u32 value) { acl_main_t *am = &acl_main; @@ -1984,24 +1642,6 @@ acl_set_aclplugin_fn (vlib_main_t * vm, uword memory_size = 0; acl_main_t *am = &acl_main; - /* The new datapath is the default. This command exists out of precaution and for comparing the two */ - if (unformat (input, "l2-datapath")) { - if (unformat(input, "old")) { - am->l2_input_classify_next_acl_ip4 = am->l2_input_classify_next_acl_old; - am->l2_input_classify_next_acl_ip6 = am->l2_input_classify_next_acl_old; - am->l2_output_classify_next_acl_ip4 = am->l2_output_classify_next_acl_old; - am->l2_output_classify_next_acl_ip6 = am->l2_output_classify_next_acl_old; - goto done; - } - if (unformat(input, "new")) { - am->l2_input_classify_next_acl_ip4 = am->fa_l2_input_classify_next_acl_ip4; - am->l2_input_classify_next_acl_ip6 = am->fa_l2_input_classify_next_acl_ip6; - am->l2_output_classify_next_acl_ip4 = am->fa_l2_output_classify_next_acl_ip4; - am->l2_output_classify_next_acl_ip6 = am->fa_l2_output_classify_next_acl_ip6; - goto done; - } - goto done; - } if (unformat (input, "skip-ipv6-extension-header %u %u", &eh_val, &val)) { if(!acl_set_skip_ipv6_eh(eh_val, val)) { error = clib_error_return(0, "expecting eh=0..255, value=0..1"); @@ -2170,7 +1810,6 @@ acl_init (vlib_main_t * vm) VL_MSG_FIRST_AVAILABLE); error = acl_plugin_api_hookup (vm); - acl_setup_nodes (); /* Add our API messages to the global name_crc hash table */ setup_message_id_table (am, &api_main); diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index d708c521..eb074a7b 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -30,6 +30,10 @@ #define ACL_PLUGIN_VERSION_MAJOR 1 #define ACL_PLUGIN_VERSION_MINOR 2 +#define UDP_SESSION_IDLE_TIMEOUT_SEC 600 +#define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24) +#define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 120 + extern vlib_node_registration_t acl_in_node; extern vlib_node_registration_t acl_out_node; @@ -128,22 +132,6 @@ typedef struct { /* MACIP (input) ACLs associated with the interfaces */ u32 *macip_acl_by_sw_if_index; - /* next indices for our nodes in the l2-classify tables */ - u32 l2_input_classify_next_acl_old; - u32 l2_output_classify_next_acl_old; - - /* next node indices for feature bitmap */ - u32 acl_in_node_feat_next_node_index[32]; - u32 acl_out_node_feat_next_node_index[32]; - - /* ACL match actions (must be coherent across in/out ACLs to next indices (can differ) */ - - u32 acl_in_ip4_match_next[256]; - u32 acl_in_ip6_match_next[256]; - u32 acl_out_ip4_match_next[256]; - u32 acl_out_ip6_match_next[256]; - u32 n_match_actions; - /* bitmaps when set the processing is enabled on the interface */ uword *fa_in_acl_on_sw_if_index; uword *fa_out_acl_on_sw_if_index; @@ -162,16 +150,11 @@ typedef struct { /* L2 datapath glue */ - /* active next indices within L2 classifiers - switch old/new path */ + /* next indices within L2 classifiers for ip4/ip6 fa L2 nodes */ u32 l2_input_classify_next_acl_ip4; u32 l2_input_classify_next_acl_ip6; u32 l2_output_classify_next_acl_ip4; u32 l2_output_classify_next_acl_ip6; - /* saved next indices within L2 classifiers for ip4/ip6 fa L2 nodes */ - u32 fa_l2_input_classify_next_acl_ip4; - u32 fa_l2_input_classify_next_acl_ip6; - u32 fa_l2_output_classify_next_acl_ip4; - u32 fa_l2_output_classify_next_acl_ip6; /* next node indices for L2 dispatch */ u32 fa_acl_in_ip4_l2_node_feat_next_node_index[32]; u32 fa_acl_in_ip6_l2_node_feat_next_node_index[32]; @@ -212,6 +195,10 @@ typedef struct { u32 fa_conn_list_head[ACL_N_TIMEOUTS]; u32 fa_conn_list_tail[ACL_N_TIMEOUTS]; + /* Configured session timeout */ + u64 session_timeout[ACL_N_TIMEOUTS]; + + /* Counters for the cleaner thread */ #define foreach_fa_cleaner_counter \ diff --git a/src/plugins/acl/l2sess.c b/src/plugins/acl/l2sess.c deleted file mode 100644 index 7a1567fb..00000000 --- a/src/plugins/acl/l2sess.c +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - *------------------------------------------------------------------ - * l2sess.c - simple MAC-swap API / debug CLI handling - *------------------------------------------------------------------ - */ - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -void -l2sess_init_next_features (vlib_main_t * vm, l2sess_main_t * sm) -{ -#define _(node_name, node_var, is_out, is_ip6, is_track) \ - if (is_out) \ - feat_bitmap_init_next_nodes(vm, node_var.index, L2OUTPUT_N_FEAT, \ - l2output_get_feat_names (), \ - sm->node_var ## _feat_next_node_index); \ - else \ - feat_bitmap_init_next_nodes(vm, node_var.index, L2INPUT_N_FEAT, \ - l2input_get_feat_names (), \ - sm->node_var ## _feat_next_node_index); - - foreach_l2sess_node -#undef _ -} - -void -l2sess_add_our_next_nodes (vlib_main_t * vm, l2sess_main_t * sm, - u8 * prev_node_name, int add_output_nodes) -{ - vlib_node_t *n; - n = vlib_get_node_by_name (vm, prev_node_name); -#define _(node_name, node_var, is_out, is_ip6, is_track) \ - if (is_out == add_output_nodes) { \ - u32 idx = vlib_node_add_next_with_slot(vm, n->index, node_var.index, ~0); \ - if (is_track) { \ - sm->next_slot_track_node_by_is_ip6_is_out[is_ip6][is_out] = idx; \ - } \ - } - foreach_l2sess_node -#undef _ -} - -void -l2sess_setup_nodes (void) -{ - vlib_main_t *vm = vlib_get_main (); - l2sess_main_t *sm = &l2sess_main; - - l2sess_init_next_features (vm, sm); - - l2sess_add_our_next_nodes (vm, sm, (u8 *) "l2-input-classify", 0); - l2sess_add_our_next_nodes (vm, sm, (u8 *) "l2-output-classify", 1); - -} - -static char * -get_l4_proto_str (int is_ip6, uint8_t l4_proto) -{ - switch (l4_proto) - { - case 6: - return "tcp"; - case 17: - return "udp"; - case 1: - return "icmp"; - case 58: - return "icmp6"; - default: - return ""; - } -} - -static clib_error_t * -l2sess_show_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - l2sess_main_t *sm = &l2sess_main; - clib_time_t *ct = &vm->clib_time; - l2s_session_t *s; - u64 now = clib_cpu_time_now (); - - vlib_cli_output (vm, "Timing wheel info: \n%U", format_timing_wheel, - &sm->timing_wheel, 255); - - pool_foreach (s, sm->sessions, ( - { - f64 ctime = - (now - - s->create_time) * ct->seconds_per_clock; - f64 atime0 = - (now - - s->side[0].active_time) * - ct->seconds_per_clock; - f64 atime1 = - (now - - s->side[1].active_time) * - ct->seconds_per_clock; -/* - f64 ctime = (s->create_time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock; - f64 atime0 = (s->side[0].active_time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock; - f64 atime1 = (s->side[1].active_time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock; -*/ - u8 * out0 = - format (0, - "%5d: create time: %U pkts/bytes/active time: [ %ld %ld %U : %ld %ld %U ]\n", - (s - sm->sessions), - format_time_interval, "h:m:s:u", - ctime, s->side[0].n_packets, - s->side[0].n_bytes, - format_time_interval, "h:m:s:u", - atime0, s->side[1].n_packets, - s->side[1].n_bytes, - format_time_interval, "h:m:s:u", - atime1); u8 * out1 = 0; - if (s->is_ip6) - { - out1 = - format (0, "%s %U :%u <-> %U :%u", - get_l4_proto_str (s->is_ip6, - s->l4_proto), - format_ip6_address, - &s->side[0].addr.ip6, - s->side[0].port, - format_ip6_address, - &s->side[1].addr.ip6, - s->side[1].port);} - else - { - out1 = - format (0, "%s %U :%u <-> %U :%u", - get_l4_proto_str (s->is_ip6, - s->l4_proto), - format_ip4_address, - &s->side[0].addr.ip4, - s->side[0].port, - format_ip4_address, - &s->side[1].addr.ip4, - s->side[1].port);} - vlib_cli_output (vm, "%s %s", out0, - out1); vec_free (out0); - vec_free (out1);} - )); - return 0; -} - -static clib_error_t * -l2sess_show_count_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - l2sess_main_t *sm = &l2sess_main; - - vlib_cli_output (vm, "Timing wheel info: \n%U", format_timing_wheel, - &sm->timing_wheel, 255); - vlib_cli_output (vm, "session pool len: %d, pool elts: %d", - pool_len (sm->sessions), pool_elts (sm->sessions)); - vlib_cli_output (vm, - "attempted to delete sessions which were already free: %d", - sm->counter_attempted_delete_free_session); - return 0; -} - - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (l2sess_show_command, static) = { - .path = "show l2sess", - .short_help = "show l2sess", - .function = l2sess_show_command_fn, -}; - -VLIB_CLI_COMMAND (l2sess_show_count_command, static) = { - .path = "show l2sess count", - .short_help = "show l2sess count", - .function = l2sess_show_count_command_fn, -}; -/* *INDENT-OFF* */ - -static inline u64 -time_sec_to_clock( clib_time_t *ct, f64 sec) -{ - return (u64)(((f64)sec)/ct->seconds_per_clock); -} - -static clib_error_t * l2sess_init (vlib_main_t * vm) -{ - l2sess_main_t * sm = &l2sess_main; - clib_error_t * error = 0; - u64 cpu_time_now = clib_cpu_time_now(); - - - clib_time_t *ct = &vm->clib_time; - sm->udp_session_idle_timeout = time_sec_to_clock(ct, UDP_SESSION_IDLE_TIMEOUT_SEC); - sm->tcp_session_idle_timeout = time_sec_to_clock(ct, TCP_SESSION_IDLE_TIMEOUT_SEC); - sm->tcp_session_transient_timeout = time_sec_to_clock(ct, TCP_SESSION_TRANSIENT_TIMEOUT_SEC); - - /* The min sched time of 10e-1 causes erroneous behavior... */ - sm->timing_wheel.min_sched_time = 10e-2; - sm->timing_wheel.max_sched_time = 3600.0*48.0; - timing_wheel_init (&sm->timing_wheel, cpu_time_now, vm->clib_time.clocks_per_second); - sm->timer_wheel_next_expiring_time = 0; - sm->timer_wheel_tick = time_sec_to_clock(ct, sm->timing_wheel.min_sched_time); - /* Pre-allocate expired nodes. */ - vec_alloc (sm->data_from_advancing_timing_wheel, 32); - - l2sess_setup_nodes(); - l2output_init_output_node_vec (&sm->output_next_nodes.output_node_index_vec); - - return error; -} - -VLIB_INIT_FUNCTION (l2sess_init); - - diff --git a/src/plugins/acl/l2sess.h b/src/plugins/acl/l2sess.h deleted file mode 100644 index 961c08c8..00000000 --- a/src/plugins/acl/l2sess.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __included_l2sess_h__ -#define __included_l2sess_h__ - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#define _(node_name, node_var, is_out, is_ip6, is_track) -#undef _ -#define foreach_l2sess_node \ - _("aclp-l2s-input-ip4-add", l2sess_in_ip4_add, 0, 0, 0) \ - _("aclp-l2s-input-ip6-add", l2sess_in_ip6_add, 0, 1, 0) \ - _("aclp-l2s-output-ip4-add", l2sess_out_ip4_add, 1, 0, 0) \ - _("aclp-l2s-output-ip6-add", l2sess_out_ip6_add, 1, 1, 0) \ - _("aclp-l2s-input-ip4-track", l2sess_in_ip4_track, 0, 0, 1) \ - _("aclp-l2s-input-ip6-track", l2sess_in_ip6_track, 0, 1, 1) \ - _("aclp-l2s-output-ip4-track",l2sess_out_ip4_track, 1, 0, 1) \ - _("aclp-l2s-output-ip6-track", l2sess_out_ip6_track, 1, 1, 1) - -#define _(node_name, node_var, is_out, is_ip6, is_track) \ - extern vlib_node_registration_t node_var; -foreach_l2sess_node -#undef _ - -#define TCP_FLAG_FIN 0x01 -#define TCP_FLAG_SYN 0x02 -#define TCP_FLAG_RST 0x04 -#define TCP_FLAG_PUSH 0x08 -#define TCP_FLAG_ACK 0x10 -#define TCP_FLAG_URG 0x20 -#define TCP_FLAG_ECE 0x40 -#define TCP_FLAG_CWR 0x80 -#define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK) -#define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK) - -typedef struct { - ip46_address_t addr; - u64 active_time; - u64 n_packets; - u64 n_bytes; - u16 port; -} l2s_session_side_t; - -enum { - L2S_SESSION_SIDE_IN = 0, - L2S_SESSION_SIDE_OUT, - L2S_N_SESSION_SIDES -}; - -typedef struct { - u64 create_time; - l2s_session_side_t side[L2S_N_SESSION_SIDES]; - u8 l4_proto; - u8 is_ip6; - u16 tcp_flags_seen; /* u16 because of two sides */ -} l2s_session_t; - -#define PROD -#ifdef PROD -#define UDP_SESSION_IDLE_TIMEOUT_SEC 600 -#define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24) -#define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 120 -#else -#define UDP_SESSION_IDLE_TIMEOUT_SEC 15 -#define TCP_SESSION_IDLE_TIMEOUT_SEC 15 -#define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 5 -#endif - -typedef struct { - /* - * the next two fields are present for all nodes, but - * only one of them is used per node - depending - * on whether the node is an input or output one. - */ -#define _(node_name, node_var, is_out, is_ip6, is_track) \ - u32 node_var ## _feat_next_node_index[32]; -foreach_l2sess_node -#undef _ - l2_output_next_nodes_st output_next_nodes; - - /* Next indices of the tracker nodes */ - u32 next_slot_track_node_by_is_ip6_is_out[2][2]; - - /* - * Pairing of "forward" and "reverse" tables by table index. - * Each relationship has two entries - for one and the other table, - * so it is bidirectional. - */ - - u32 *fwd_to_rev_by_table_index; - - /* - * The vector of per-interface session pools - */ - - l2s_session_t *sessions; - - /* The session timeouts */ - u64 tcp_session_transient_timeout; - u64 tcp_session_idle_timeout; - u64 udp_session_idle_timeout; - - /* Timing wheel to time out the idle sessions */ - timing_wheel_t timing_wheel; - u32 *data_from_advancing_timing_wheel; - u64 timer_wheel_next_expiring_time; - u64 timer_wheel_tick; - - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; - - /* Counter(s) */ - u64 counter_attempted_delete_free_session; -} l2sess_main_t; - -l2sess_main_t l2sess_main; - -/* Just exposed for acl.c */ - -void -l2sess_vlib_plugin_register (vlib_main_t * vm, void * hh, - int from_early_init); - - -#endif /* __included_l2sess_h__ */ diff --git a/src/plugins/acl/l2sess_node.c b/src/plugins/acl/l2sess_node.c deleted file mode 100644 index 689d216d..00000000 --- a/src/plugins/acl/l2sess_node.c +++ /dev/null @@ -1,763 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include -#include -#include - - -typedef struct -{ - u32 next_index; - u32 sw_if_index; - u32 trace_flags; - u32 session_tables[2]; - u32 session_nexts[2]; - u8 l4_proto; -} l2sess_trace_t; - -/* packet trace format function */ - -#define _(node_name, node_var, is_out, is_ip6, is_track) \ -static u8 * format_## node_var ##_trace (u8 * s, va_list * args) \ -{ \ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); \ - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); \ - l2sess_trace_t * t = va_arg (*args, l2sess_trace_t *); \ - \ - s = format (s, node_name ": sw_if_index %d, next index %d trace_flags %08x L4 proto %d\n" \ - " tables [ %d, %d ] nexts [ %d, %d ]", \ - t->sw_if_index, t->next_index, t->trace_flags, t->l4_proto, \ - t->session_tables[0], t->session_tables[1], \ - t->session_nexts[0], t->session_nexts[1]); \ - return s; \ -} -foreach_l2sess_node -#undef _ -#define foreach_l2sess_error \ -_(SWAPPED, "Mac swap packets processed") - typedef enum -{ -#define _(sym,str) L2SESS_ERROR_##sym, - foreach_l2sess_error -#undef _ - L2SESS_N_ERROR, -} l2sess_error_t; - -static char *l2sess_error_strings[] = { -#define _(sym,string) string, - foreach_l2sess_error -#undef _ -}; - -typedef enum -{ - L2SESS_NEXT_DROP, - L2SESS_N_NEXT, -} l2sess_next_t; - -u8 -l2sess_get_l4_proto (vlib_buffer_t * b0, int node_is_ip6) -{ - u8 proto; - int proto_offset; - if (node_is_ip6) - { - proto_offset = 20; - } - else - { - proto_offset = 23; - } - proto = *((u8 *) vlib_buffer_get_current (b0) + proto_offset); - return proto; -} - - -u8 -l2sess_get_tcp_flags (vlib_buffer_t * b0, int node_is_ip6) -{ - u8 flags; - int flags_offset; - if (node_is_ip6) - { - flags_offset = 14 + 40 + 13; /* FIXME: no extension headers assumed */ - } - else - { - flags_offset = 14 + 20 + 13; - } - flags = *((u8 *) vlib_buffer_get_current (b0) + flags_offset); - return flags; -} - -static inline int -l4_tcp_or_udp (u8 proto) -{ - return ((proto == 6) || (proto == 17)); -} - -void -l2sess_get_session_tables (l2sess_main_t * sm, u32 sw_if_index, - int node_is_out, int node_is_ip6, u8 l4_proto, - u32 * session_tables) -{ -/* - * Based on the direction, l3 and l4 protocol, fill a u32[2] array: - * [0] is index for the "direct match" path, [1] is for "mirrored match". - * Store the indices of the tables to add the session to in session_tables[] - */ - l2_output_classify_main_t *l2om = &l2_output_classify_main; - l2_input_classify_main_t *l2im = &l2_input_classify_main; - - u32 output_table_index; - u32 input_table_index; - - if (!l4_tcp_or_udp (l4_proto)) - { - return; - } - - if (node_is_ip6) - { - vec_validate_init_empty (l2im-> - classify_table_index_by_sw_if_index - [L2_INPUT_CLASSIFY_TABLE_IP6], sw_if_index, - ~0); - input_table_index = - l2im-> - classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6] - [sw_if_index]; - vec_validate_init_empty (l2om-> - classify_table_index_by_sw_if_index - [L2_OUTPUT_CLASSIFY_TABLE_IP6], sw_if_index, - ~0); - output_table_index = - l2om-> - classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP6] - [sw_if_index]; - } - else - { - vec_validate_init_empty (l2im-> - classify_table_index_by_sw_if_index - [L2_INPUT_CLASSIFY_TABLE_IP4], sw_if_index, - ~0); - input_table_index = - l2im-> - classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4] - [sw_if_index]; - vec_validate_init_empty (l2om-> - classify_table_index_by_sw_if_index - [L2_OUTPUT_CLASSIFY_TABLE_IP4], sw_if_index, - ~0); - output_table_index = - l2om-> - classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP4] - [sw_if_index]; - } - - if (node_is_out) - { - session_tables[0] = output_table_index; - session_tables[1] = input_table_index; - } - else - { - session_tables[0] = input_table_index; - session_tables[1] = output_table_index; - } -} - -void -l2sess_get_session_nexts (l2sess_main_t * sm, u32 sw_if_index, - int node_is_out, int node_is_ip6, u8 l4_proto, - u32 * session_nexts) -{ -/* - * Based on the direction, l3 and l4 protocol, fill a u32[2] array: - * [0] is the index for the "direct match" path, [1] is for "mirrored match". - * Store the match_next_index in session_nexts[] for a new session entry which is being added to session tables. - */ - u32 input_node_index; - u32 output_node_index; - - if (!l4_tcp_or_udp (l4_proto)) - { - return; - } - - input_node_index = - sm->next_slot_track_node_by_is_ip6_is_out[node_is_ip6][0]; - output_node_index = - sm->next_slot_track_node_by_is_ip6_is_out[node_is_ip6][1]; - - if (node_is_out) - { - session_nexts[0] = output_node_index; - session_nexts[1] = input_node_index; - } - else - { - session_nexts[0] = input_node_index; - session_nexts[1] = output_node_index; - } -} - - -static inline void -swap_bytes (vlib_buffer_t * b0, int off_a, int off_b, int nbytes) -{ - u8 tmp; - u8 *pa = vlib_buffer_get_current (b0) + off_a; - u8 *pb = vlib_buffer_get_current (b0) + off_b; - while (nbytes--) - { - tmp = *pa; - *pa++ = *pb; - *pb++ = tmp; - } -} - -/* - * This quite pro[bv]ably is a terrible idea performance wise. Moreso doing it twice. - * Would having a long (ish) chunk of memory work better for this ? - * We will see when we get to the performance of this. - */ -void -l2sess_flip_l3l4_fields (vlib_buffer_t * b0, int node_is_ip6, u8 l4_proto) -{ - if (!l4_tcp_or_udp (l4_proto)) - { - return; - } - if (node_is_ip6) - { - swap_bytes (b0, 22, 38, 16); /* L3 */ - swap_bytes (b0, 54, 56, 2); /* L4 (when no EH!) */ - } - else - { - swap_bytes (b0, 26, 30, 4); /* L3 */ - swap_bytes (b0, 34, 36, 2); /* L4 */ - } -} - -void -l2sess_add_session (vlib_buffer_t * b0, int node_is_out, int node_is_ip6, - u32 session_table, u32 session_match_next, - u32 opaque_index) -{ - vnet_classify_main_t *cm = &vnet_classify_main; - u32 action = 0; - u32 metadata = 0; - -#ifdef DEBUG_SESSIONS - printf ("Adding session to table %d with next %d\n", session_table, - session_match_next); -#endif - vnet_classify_add_del_session (cm, session_table, - vlib_buffer_get_current (b0), - session_match_next, opaque_index, 0, action, - metadata, 1); -} - - - -static void * -get_ptr_to_offset (vlib_buffer_t * b0, int offset) -{ - u8 *p = vlib_buffer_get_current (b0) + offset; - return p; -} - - -/* - * FIXME: Hardcoded offsets are ugly, although if casting to structs one - * would need to take care about alignment.. So let's for now be naive and simple. - */ - -void -session_store_ip4_l3l4_info (vlib_buffer_t * b0, l2s_session_t * sess, - int node_is_out) -{ - clib_memcpy (&sess->side[1 - node_is_out].addr.ip4, - get_ptr_to_offset (b0, 26), 4); - clib_memcpy (&sess->side[node_is_out].addr.ip4, get_ptr_to_offset (b0, 30), - 4); - sess->side[1 - node_is_out].port = - ntohs (*(u16 *) get_ptr_to_offset (b0, 34)); - sess->side[node_is_out].port = ntohs (*(u16 *) get_ptr_to_offset (b0, 36)); -} - -void -session_store_ip6_l3l4_info (vlib_buffer_t * b0, l2s_session_t * sess, - int node_is_out) -{ - clib_memcpy (&sess->side[1 - node_is_out].addr.ip6, - get_ptr_to_offset (b0, 22), 16); - clib_memcpy (&sess->side[node_is_out].addr.ip4, get_ptr_to_offset (b0, 38), - 16); - sess->side[1 - node_is_out].port = - ntohs (*(u16 *) get_ptr_to_offset (b0, 54)); - sess->side[node_is_out].port = ntohs (*(u16 *) get_ptr_to_offset (b0, 56)); -} - -static void -build_match_from_session (l2sess_main_t * sm, u8 * match, - l2s_session_t * sess, int is_out) -{ - if (sess->is_ip6) - { - match[20] = sess->l4_proto; - clib_memcpy (&match[22], &sess->side[1 - is_out].addr.ip6, 16); - clib_memcpy (&match[38], &sess->side[is_out].addr.ip4, 16); - *(u16 *) & match[54] = htons (sess->side[1 - is_out].port); - *(u16 *) & match[56] = htons (sess->side[is_out].port); - } - else - { - match[23] = sess->l4_proto; - clib_memcpy (&match[26], &sess->side[1 - is_out].addr.ip6, 4); - clib_memcpy (&match[30], &sess->side[is_out].addr.ip4, 4); - *(u16 *) & match[34] = htons (sess->side[1 - is_out].port); - *(u16 *) & match[36] = htons (sess->side[is_out].port); - } -} - -static void -delete_session (l2sess_main_t * sm, u32 sw_if_index, u32 session_index) -{ - vnet_classify_main_t *cm = &vnet_classify_main; - u8 match[5 * 16]; /* For building the mock of the packet to delete the classifier session */ - u32 session_tables[2] = { ~0, ~0 }; - l2s_session_t *sess = sm->sessions + session_index; - if (pool_is_free (sm->sessions, sess)) - { - sm->counter_attempted_delete_free_session++; - return; - } - l2sess_get_session_tables (sm, sw_if_index, 0, sess->is_ip6, sess->l4_proto, - session_tables); - if (session_tables[1] != ~0) - { - build_match_from_session (sm, match, sess, 1); - vnet_classify_add_del_session (cm, session_tables[1], match, 0, 0, 0, 0, - 0, 0); - } - if (session_tables[1] != ~0) - { - build_match_from_session (sm, match, sess, 1); - vnet_classify_add_del_session (cm, session_tables[1], match, 0, 0, 0, 0, - 0, 0); - } - pool_put (sm->sessions, sess); -} - -static void -udp_session_account_buffer (vlib_buffer_t * b0, l2s_session_t * s, - int which_side, u64 now) -{ - l2s_session_side_t *ss = &s->side[which_side]; - ss->active_time = now; - ss->n_packets++; - ss->n_bytes += b0->current_data + b0->current_length; -} - -static inline u64 -udp_session_get_timeout (l2sess_main_t * sm, l2s_session_t * sess, u64 now) -{ - return (sm->udp_session_idle_timeout); -} - -static void -tcp_session_account_buffer (vlib_buffer_t * b0, l2s_session_t * s, - int which_side, u64 now) -{ - l2s_session_side_t *ss = &s->side[which_side]; - ss->active_time = now; - ss->n_packets++; - ss->n_bytes += b0->current_data + b0->current_length; - /* Very very lightweight TCP state tracking: just record which flags were seen */ - s->tcp_flags_seen |= - l2sess_get_tcp_flags (b0, s->is_ip6) << (8 * which_side); -} - -/* - * Since we are tracking for the purposes of timing the sessions out, - * we mostly care about two states: established (maximize the idle timeouts) - * and transient (halfopen/halfclosed/reset) - we need to have a reasonably short timeout to - * quickly get rid of sessions but not short enough to violate the TCP specs. - */ - -static inline u64 -tcp_session_get_timeout (l2sess_main_t * sm, l2s_session_t * sess, u64 now) -{ - /* seen both SYNs and ACKs but not FINs means we are in establshed state */ - u16 masked_flags = - sess->tcp_flags_seen & ((TCP_FLAGS_RSTFINACKSYN << 8) + - TCP_FLAGS_RSTFINACKSYN); - if (((TCP_FLAGS_ACKSYN << 8) + TCP_FLAGS_ACKSYN) == masked_flags) - { - return (sm->tcp_session_idle_timeout); - } - else - { - return (sm->tcp_session_transient_timeout); - } -} - -static inline u64 -session_get_timeout (l2sess_main_t * sm, l2s_session_t * sess, u64 now) -{ - u64 timeout; - - switch (sess->l4_proto) - { - case 6: - timeout = tcp_session_get_timeout (sm, sess, now); - break; - case 17: - timeout = udp_session_get_timeout (sm, sess, now); - break; - default: - timeout = 0; - } - - return timeout; -} - -static inline u64 -get_session_last_active_time(l2s_session_t * sess) -{ - u64 last_active = - sess->side[0].active_time > - sess->side[1].active_time ? sess->side[0].active_time : sess->side[1]. - active_time; - return last_active; -} - -static int -session_is_alive (l2sess_main_t * sm, l2s_session_t * sess, u64 now, u64 *last_active_cache) -{ - u64 last_active = get_session_last_active_time(sess); - u64 timeout = session_get_timeout (sm, sess, now); - int is_alive = ((now - last_active) < timeout); - if (last_active_cache) - *last_active_cache = last_active; - return is_alive; -} - -static void -check_idle_sessions (l2sess_main_t * sm, u32 sw_if_index, u64 now) -{ - sm->timer_wheel_next_expiring_time = 0; - sm->data_from_advancing_timing_wheel - = - timing_wheel_advance (&sm->timing_wheel, now, - sm->data_from_advancing_timing_wheel, - &sm->timer_wheel_next_expiring_time); -#ifdef DEBUG_SESSIONS_VERBOSE - { - clib_time_t *ct = &sm->vlib_main->clib_time; - f64 ctime; - ctime = now * ct->seconds_per_clock; - clib_warning ("Now : %U", format_time_interval, "h:m:s:u", ctime); - ctime = sm->timer_wheel_next_expiring_time * ct->seconds_per_clock; - clib_warning ("Next expire: %U", format_time_interval, "h:m:s:u", ctime); - clib_warning ("Expired items: %d", - (int) vec_len (sm->data_from_advancing_timing_wheel)); - } -#endif - - sm->timer_wheel_next_expiring_time = now + sm->timer_wheel_tick; - if (PREDICT_FALSE ( 0 == sm->data_from_advancing_timing_wheel )) { - return; - } - - if (PREDICT_FALSE (_vec_len (sm->data_from_advancing_timing_wheel) > 0)) - { - uword i; - for (i = 0; i < _vec_len (sm->data_from_advancing_timing_wheel); i++) - { - u32 session_index = sm->data_from_advancing_timing_wheel[i]; - if (!pool_is_free_index (sm->sessions, session_index)) - { - l2s_session_t *sess = sm->sessions + session_index; - u64 last_active; - if (session_is_alive (sm, sess, now, &last_active)) - { -#ifdef DEBUG_SESSIONS - clib_warning ("Restarting timer for session %d", (int) session_index); -#endif - /* Pretend we did this in the past, at last_active moment */ - timing_wheel_insert (&sm->timing_wheel, - last_active + session_get_timeout (sm, sess, - last_active), - session_index); - } - else - { -#ifdef DEBUG_SESSIONS - clib_warning ("Deleting session %d", (int) session_index); -#endif - delete_session (sm, sw_if_index, session_index); - } - } - } - _vec_len (sm->data_from_advancing_timing_wheel) = 0; - } -} - -static uword -l2sess_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame, - int node_is_out, int node_is_ip6, int node_is_track, - u32 *feat_next_node_index) -{ - u32 n_left_from, *from, *to_next; - l2sess_next_t next_index; - u32 pkts_swapped = 0; - u32 feature_bitmap0; - u32 trace_flags0; - - l2sess_main_t *sm = &l2sess_main; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Only a single loop for now for simplicity */ - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - u32 next0 = L2SESS_NEXT_DROP; - u32 sw_if_index0; - //ethernet_header_t *en0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - //en0 = vlib_buffer_get_current (b0); - -/* - * node_is_out : 1 = is output, 0 = is input - * node_is_ip6 : 1 = is ip6, 0 = is ip4 - * node_is_track : 1 = is a state tracking node, 0 - is a session addition node - * - * The below code adjust the behavior according to these parameters. - */ - { - u32 session_tables[2] = { ~0, ~0 }; - u32 session_nexts[2] = { ~0, ~0 }; - u8 l4_proto; - u64 now = clib_cpu_time_now (); - - trace_flags0 = 0; - if (node_is_out) - { - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; - } - else - { - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - } - /* potentially also remove the nodes here */ - feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap; - - if (node_is_track) - { - u32 sess_index = vnet_buffer (b0)->l2_classify.opaque_index; - l2s_session_t *sess = sm->sessions + sess_index; - l4_proto = sess->l4_proto; - - if (session_is_alive (sm, sess, now, 0)) - { - if (6 == l4_proto) - { - tcp_session_account_buffer (b0, sess, node_is_out, - now); - } - else - { - udp_session_account_buffer (b0, sess, node_is_out, - now); - } - } - else - { - timing_wheel_delete (&sm->timing_wheel, sess_index); - delete_session (sm, sw_if_index0, sess_index); - /* FIXME: drop the packet that hit the obsolete node, for now. We really ought to recycle it. */ - next0 = 0; - } - } - else - { - /* - * "-add" node: take l2opaque which arrived to us, and deduce - * the tables out of that. ~0 means the topmost classifier table - * applied for this AF on the RX(for input)/TX(for output)) sw_if_index. - * Also add the mirrored session to the paired table. - */ - l2s_session_t *sess; - u32 sess_index; - - l4_proto = l2sess_get_l4_proto (b0, node_is_ip6); - - pool_get (sm->sessions, sess); - sess_index = sess - sm->sessions; - sess->create_time = now; - sess->side[node_is_out].active_time = now; - sess->side[1 - node_is_out].active_time = now; - sess->l4_proto = l4_proto; - sess->is_ip6 = node_is_ip6; - if (node_is_ip6) - { - session_store_ip6_l3l4_info (b0, sess, node_is_out); - } - else - { - session_store_ip4_l3l4_info (b0, sess, node_is_out); - } - - l2sess_get_session_tables (sm, sw_if_index0, node_is_out, - node_is_ip6, l4_proto, - session_tables); - l2sess_get_session_nexts (sm, sw_if_index0, node_is_out, - node_is_ip6, l4_proto, - session_nexts); - l2sess_flip_l3l4_fields (b0, node_is_ip6, l4_proto); - if (session_tables[1] != ~0) - { - l2sess_add_session (b0, node_is_out, node_is_ip6, - session_tables[1], session_nexts[1], - sess_index); - } - l2sess_flip_l3l4_fields (b0, node_is_ip6, l4_proto); - if (session_tables[0] != ~0) - { - l2sess_add_session (b0, node_is_out, node_is_ip6, - session_tables[0], session_nexts[0], - sess_index); - } - if (6 == sess->l4_proto) - { - tcp_session_account_buffer (b0, sess, node_is_out, now); - } - else - { - udp_session_account_buffer (b0, sess, node_is_out, now); - } - timing_wheel_insert (&sm->timing_wheel, - now + session_get_timeout (sm, sess, - now), - sess_index); - } - - if (now >= sm->timer_wheel_next_expiring_time) - { - check_idle_sessions (sm, sw_if_index0, now); - } - - next0 = feat_bitmap_get_next_node_index (feat_next_node_index, - feature_bitmap0); - - if (next0 >= node->n_next_nodes) - { - trace_flags0 |= 1; - } - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - l2sess_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->sw_if_index = sw_if_index0; - t->next_index = next0; - t->trace_flags = trace_flags0; - t->l4_proto = l4_proto; - t->session_tables[0] = session_tables[0]; - t->session_tables[1] = session_tables[1]; - t->session_nexts[0] = session_nexts[0]; - t->session_nexts[1] = session_nexts[1]; - } - - } - pkts_swapped += 1; - if (next0 >= node->n_next_nodes) - { - next0 = 0; - } - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, node->node_index, - L2SESS_ERROR_SWAPPED, pkts_swapped); - return frame->n_vectors; -} - - -#define _(node_name, node_var, is_out, is_ip6, is_track) \ -static uword \ -node_var ## node_fn (vlib_main_t * vm, \ - vlib_node_runtime_t * node, \ - vlib_frame_t * frame) \ -{ \ - l2sess_main_t *sm = &l2sess_main; \ - return l2sess_node_fn(vm, node, frame, \ - is_out, is_ip6, is_track, \ - sm->node_var ## _feat_next_node_index); \ -} \ -VLIB_REGISTER_NODE (node_var) = { \ - .function = node_var ## node_fn, \ - .name = node_name, \ - .vector_size = sizeof (u32), \ - .format_trace = format_ ## node_var ## _trace, \ - .type = VLIB_NODE_TYPE_INTERNAL, \ - \ - .n_errors = ARRAY_LEN(l2sess_error_strings), \ - .error_strings = l2sess_error_strings, \ - \ - .n_next_nodes = L2SESS_N_NEXT, \ - .next_nodes = { \ - [L2SESS_NEXT_DROP] = "error-drop", \ - }, \ -}; -foreach_l2sess_node -#undef _ diff --git a/src/plugins/acl/node_in.c b/src/plugins/acl/node_in.c deleted file mode 100644 index 95802df5..00000000 --- a/src/plugins/acl/node_in.c +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include -#include "node_in.h" - -typedef struct -{ - u32 next_index; - u32 sw_if_index; - u32 match_acl_index; - u32 match_rule_index; - u32 trace_bitmap; -} acl_in_trace_t; - -/* packet trace format function */ -static u8 * -format_acl_in_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - acl_in_trace_t *t = va_arg (*args, acl_in_trace_t *); - - s = - format (s, - "ACL_IN: sw_if_index %d, next index %d, match: inacl %d rule %d trace_bits %08x", - t->sw_if_index, t->next_index, t->match_acl_index, - t->match_rule_index, t->trace_bitmap); - return s; -} - -vlib_node_registration_t acl_in_node; - -#define foreach_acl_in_error \ -_(ACL_CHECK, "InACL check packets processed") - -typedef enum -{ -#define _(sym,str) ACL_IN_ERROR_##sym, - foreach_acl_in_error -#undef _ - ACL_IN_N_ERROR, -} acl_in_error_t; - -static char *acl_in_error_strings[] = { -#define _(sym,string) string, - foreach_acl_in_error -#undef _ -}; - -static uword -acl_in_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - u32 n_left_from, *from, *to_next; - acl_in_next_t next_index; - u32 pkts_acl_checked = 0; - u32 feature_bitmap0; - u32 trace_bitmap = 0; - u32 *input_feat_next_node_index = - acl_main.acl_in_node_feat_next_node_index; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - u32 next0 = ~0; - u32 sw_if_index0; - u32 next = ~0; - u32 match_acl_index = ~0; - u32 match_rule_index = ~0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap; - - input_acl_packet_match (sw_if_index0, b0, &next, &match_acl_index, - &match_rule_index, &trace_bitmap); - if (next != ~0) - { - next0 = next; - } - if (next0 == ~0) - { - next0 = - feat_bitmap_get_next_node_index (input_feat_next_node_index, - feature_bitmap0); - } - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - acl_in_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); - t->sw_if_index = sw_if_index0; - t->next_index = next0; - t->match_acl_index = match_acl_index; - t->match_rule_index = match_rule_index; - t->trace_bitmap = trace_bitmap; - } - - next0 = next0 < node->n_next_nodes ? next0 : 0; - - pkts_acl_checked += 1; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, acl_in_node.index, - ACL_IN_ERROR_ACL_CHECK, pkts_acl_checked); - return frame->n_vectors; -} - -VLIB_REGISTER_NODE (acl_in_node) = -{ - .function = acl_in_node_fn,.name = "acl-plugin-in",.vector_size = - sizeof (u32),.format_trace = format_acl_in_trace,.type = - VLIB_NODE_TYPE_INTERNAL,.n_errors = - ARRAY_LEN (acl_in_error_strings),.error_strings = - acl_in_error_strings,.n_next_nodes = ACL_IN_N_NEXT, - /* edit / add dispositions here */ - .next_nodes = - { - [ACL_IN_ERROR_DROP] = "error-drop", - [ACL_IN_ETHERNET_INPUT] = "ethernet-input", - [ACL_IN_L2S_INPUT_IP4_ADD] = "aclp-l2s-input-ip4-add", - [ACL_IN_L2S_INPUT_IP6_ADD] = "aclp-l2s-input-ip6-add",} -,}; diff --git a/src/plugins/acl/node_in.h b/src/plugins/acl/node_in.h deleted file mode 100644 index 502bbf8d..00000000 --- a/src/plugins/acl/node_in.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _NODE_IN_H_ -#define _NODE_IN_H_ - -typedef enum { - ACL_IN_ERROR_DROP, - ACL_IN_ETHERNET_INPUT, - ACL_IN_L2S_INPUT_IP4_ADD, - ACL_IN_L2S_INPUT_IP6_ADD, - ACL_IN_N_NEXT, -} acl_in_next_t; - -#endif diff --git a/src/plugins/acl/node_out.c b/src/plugins/acl/node_out.c deleted file mode 100644 index cbec3b9a..00000000 --- a/src/plugins/acl/node_out.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include "node_out.h" - -typedef struct -{ - u32 next_index; - u32 sw_if_index; - u32 match_acl_index; - u32 match_rule_index; - u32 trace_bitmap; -} acl_out_trace_t; - -/* packet trace format function */ -static u8 * -format_acl_out_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - acl_out_trace_t *t = va_arg (*args, acl_out_trace_t *); - s = - format (s, - "ACL_OUT: sw_if_index %d, next index %d, match: outacl %d rule %d trace_bits %08x", - t->sw_if_index, t->next_index, t->match_acl_index, - t->match_rule_index, t->trace_bitmap); - return s; -} - -vlib_node_registration_t acl_out_node; - -#define foreach_acl_out_error \ -_(ACL_CHECK, "OutACL check packets processed") - -typedef enum -{ -#define _(sym,str) ACL_OUT_ERROR_##sym, - foreach_acl_out_error -#undef _ - ACL_OUT_N_ERROR, -} acl_out_error_t; - -static char *acl_out_error_strings[] = { -#define _(sym,string) string, - foreach_acl_out_error -#undef _ -}; - -static uword -acl_out_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - acl_main_t *am = &acl_main; - u32 *output_feat_next_node_index = - am->acl_out_node_feat_next_node_index; - u32 n_left_from, *from, *to_next; - acl_out_next_t next_index; - u32 pkts_acl_checked = 0; - u32 feature_bitmap0; - u32 match_acl_index = ~0; - u32 match_rule_index = ~0; - u32 trace_bitmap = 0; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - u32 next0 = ~0; - u32 next = 0; - u32 sw_if_index0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - - sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; - feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap; - - output_acl_packet_match (sw_if_index0, b0, &next, &match_acl_index, - &match_rule_index, &trace_bitmap); - if (next != ~0) - { - next0 = next; - } - if (next0 == ~0) - { - next0 = - feat_bitmap_get_next_node_index (output_feat_next_node_index, - feature_bitmap0); - } - - - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - acl_out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); - t->sw_if_index = sw_if_index0; - t->next_index = next0; - t->match_acl_index = match_acl_index; - t->match_rule_index = match_rule_index; - t->trace_bitmap = trace_bitmap; - } - - pkts_acl_checked += 1; - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, acl_out_node.index, - ACL_OUT_ERROR_ACL_CHECK, pkts_acl_checked); - return frame->n_vectors; -} - -VLIB_REGISTER_NODE (acl_out_node) = -{ - .function = acl_out_node_fn,.name = "acl-plugin-out",.vector_size = - sizeof (u32),.format_trace = format_acl_out_trace,.type = - VLIB_NODE_TYPE_INTERNAL,.n_errors = - ARRAY_LEN (acl_out_error_strings),.error_strings = - acl_out_error_strings,.n_next_nodes = ACL_OUT_N_NEXT, - /* edit / add dispositions here */ - .next_nodes = - { - [ACL_OUT_ERROR_DROP] = "error-drop", - [ACL_OUT_INTERFACE_OUTPUT] = "interface-output", - [ACL_OUT_L2S_OUTPUT_IP4_ADD] = "aclp-l2s-output-ip4-add", - [ACL_OUT_L2S_OUTPUT_IP6_ADD] = "aclp-l2s-output-ip6-add",} -,}; diff --git a/src/plugins/acl/node_out.h b/src/plugins/acl/node_out.h deleted file mode 100644 index c919f3b7..00000000 --- a/src/plugins/acl/node_out.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _NODE_OUT_H_ -#define _NODE_OUT_H_ - -typedef enum { - ACL_OUT_ERROR_DROP, - ACL_OUT_INTERFACE_OUTPUT, - ACL_OUT_L2S_OUTPUT_IP4_ADD, - ACL_OUT_L2S_OUTPUT_IP6_ADD, - ACL_OUT_N_NEXT, -} acl_out_next_t; - -#endif diff --git a/src/plugins/acl/test/run-python b/src/plugins/acl/test/run-python deleted file mode 100755 index 215eb17a..00000000 --- a/src/plugins/acl/test/run-python +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh -# -# Do all the legwork to run a scapy shell with APIs available for load -# -CURR_DIR=`pwd` -ROOT_DIR=`git rev-parse --show-toplevel` -cd $ROOT_DIR -sudo apt-get install -y python-virtualenv -# uncomment the line below to enable build of plugins and api each time -# make plugins && make build-vpp-api || exit -virtualenv virtualenv -virtualenv/bin/pip install ipaddress -virtualenv/bin/pip install scapy -# install the python API into the virtualenv -cd $ROOT_DIR/vpp-api/python/ -$ROOT_DIR/virtualenv/bin/python setup.py install -# install the python ACL plugin API into the virtualenv -ACL_PLUGIN_SETUP_DIR=`find $ROOT_DIR/build-root -name acl-plugin` -cd $ACL_PLUGIN_SETUP_DIR; -$ROOT_DIR/virtualenv/bin/python setup.py install -cd $ROOT_DIR -# figure out the shared library path and start scapy -export LD_LIBRARY_PATH=`pwd`/`find . -name "libpneum.so" -exec dirname {} \; | grep lib64 | head -n 1` -cd $CURR_DIR -sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH $ROOT_DIR/virtualenv/bin/python $1 $2 $3 $4 $5 $6 $7 $8 $9 - - - diff --git a/src/plugins/acl/test/run-scapy b/src/plugins/acl/test/run-scapy deleted file mode 100755 index 266f07d1..00000000 --- a/src/plugins/acl/test/run-scapy +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -# -# Do all the legwork to run a scapy shell with APIs available for load -# -ROOT_DIR=`git rev-parse --show-toplevel` -cd $ROOT_DIR -sudo apt-get install -y python-virtualenv -# uncomment the line below to enable the build of plugins and API each time.. -# make plugins && make build-vpp-api || exit -virtualenv virtualenv -virtualenv/bin/pip install ipaddress -virtualenv/bin/pip install scapy -# install the python API into the virtualenv -cd $ROOT_DIR/vpp-api/python/ -$ROOT_DIR/virtualenv/bin/python setup.py install -# install the python ACL plugin API into the virtualenv -ACL_PLUGIN_SETUP_DIR=`find $ROOT_DIR/build-root -name acl-plugin` -cd $ACL_PLUGIN_SETUP_DIR; -$ROOT_DIR/virtualenv/bin/python setup.py install -cd $ROOT_DIR -# figure out the shared library path and start scapy -export LD_LIBRARY_PATH=`pwd`/`find . -name "libpneum.so" -exec dirname {} \; | grep lib64 | head -n 1` -sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH virtualenv/bin/scapy - - - diff --git a/src/plugins/acl/test/test_acl_plugin.py b/src/plugins/acl/test/test_acl_plugin.py deleted file mode 100644 index 7fc72d67..00000000 --- a/src/plugins/acl/test/test_acl_plugin.py +++ /dev/null @@ -1,118 +0,0 @@ -from __future__ import print_function -import unittest, sys, time, threading, struct, logging, os -import vpp_papi -# import vpp_papi_plugins.acl -from ipaddress import * -papi_event = threading.Event() -print(vpp_papi.vpe.VL_API_SW_INTERFACE_SET_FLAGS) -def papi_event_handler(result): - if result.vl_msg_id == vpp_papi.vpe.VL_API_SW_INTERFACE_SET_FLAGS: - return - if result.vl_msg_id == vpp_papi.vpe.VL_API_VNET_INTERFACE_COUNTERS: - print('Interface counters', result) - return - if result.vl_msg_id == vpp_papi.vpe.VL_API_VNET_IP6_FIB_COUNTERS: - print('IPv6 FIB counters', result) - papi_event.set() - return - - print('Unknown message id:', result.vl_msg_id) - -import glob, subprocess -class TestAclPlugin(unittest.TestCase): - @classmethod - def setUpClass(cls): - print("Setup") - @classmethod - def tearDownClass(cls): - print("Teardown") - - def setUp(self): - print("Connecting API") - r = vpp_papi.connect("test_papi") - self.assertEqual(r, 0) - - def tearDown(self): - r = vpp_papi.disconnect() - self.assertEqual(r, 0) - - # - # The tests themselves - # - - # - # Basic request / reply - # - def test_show_version(self): - t = vpp_papi.show_version() - print('T', t); - program = t.program.decode().rstrip('\x00') - self.assertEqual('vpe', program) - - def x_test_acl_add(self): - print("Test ACL add") - self.assertEqual(1, 1) - - # - # Details / Dump - # - def x_test_details_dump(self): - t = vpp_papi.sw_interface_dump(0, b'') - print('Dump/details T', t) - - # - # Arrays - # - def x_test_arrays(self): - t = vpp_papi.vnet_get_summary_stats() - print('Summary stats', t) - print('Packets:', t.total_pkts[0]) - print('Packets:', t.total_pkts[1]) - # - # Variable sized arrays and counters - # - #@unittest.skip("stats") - def x_test_want_stats(self): - pid = 123 - vpp_papi.register_event_callback(papi_event_handler) - papi_event.clear() - - # Need to configure IPv6 to get som IPv6 FIB stats - t = vpp_papi.create_loopback('') - print(t) - self.assertEqual(t.retval, 0) - - ifindex = t.sw_if_index - addr = str(IPv6Address(u'1::1').packed) - t = vpp_papi.sw_interface_add_del_address(ifindex, 1, 1, 0, 16, addr) - print(t) - self.assertEqual(t.retval, 0) - - # Check if interface is up - # XXX: Add new API to query interface state based on ifindex, instead of dump all. - t = vpp_papi.sw_interface_set_flags(ifindex, 1, 1, 0) - self.assertEqual(t.retval, 0) - - t = vpp_papi.want_stats(True, pid) - - print (t) - - # - # Wait for some stats - # - self.assertEqual(papi_event.wait(15), True) - t = vpp_papi.want_stats(False, pid) - print (t) - - - # - # Plugins? - # - -if __name__ == '__main__' or __name__ == '__builtin__': - print("This is main") - suite = unittest.TestLoader().loadTestsFromTestCase(TestAclPlugin) - unittest.TextTestRunner(verbosity=2).run(suite) - #logging.basicConfig(level=logging.DEBUG) - # unittest.main() - diff --git a/test/test_acl_plugin_l2l3.py b/test/test_acl_plugin_l2l3.py index 32abf184..c7f1068a 100644 --- a/test/test_acl_plugin_l2l3.py +++ b/test/test_acl_plugin_l2l3.py @@ -715,53 +715,5 @@ class TestIpIrb(VppTestCase): self.run_test_ip46_bridged_to_routed_and_back(False, False, self.WITH_EH) - # Old datapath group - def test_8900_ip6_irb_1(self): - """ ACL plugin set old L2 datapath""" - if not self.vpp_dead: - cmd = "set acl-plugin l2-datapath old" - self.logger.info(self.vapi.ppcli(cmd)) - - def test_8901_ip6_irb_1(self): - """ ACL IPv6 routed -> bridged, L2 ACL deny""" - self.run_test_ip46_routed_to_bridged(True, True, False, - self.WITHOUT_EH) - - def test_8902_ip6_irb_1(self): - """ ACL IPv6 routed -> bridged, L3 ACL deny""" - self.run_test_ip46_routed_to_bridged(False, True, False, - self.WITHOUT_EH) - - def test_8903_ip4_irb_1(self): - """ ACL IPv4 routed -> bridged, L2 ACL deny""" - self.run_test_ip46_routed_to_bridged(True, False, False, - self.WITHOUT_EH) - - def test_8904_ip4_irb_1(self): - """ ACL IPv4 routed -> bridged, L3 ACL deny""" - self.run_test_ip46_routed_to_bridged(False, False, False, - self.WITHOUT_EH) - - def test_8905_ip6_irb_1(self): - """ ACL IPv6 bridged -> routed, L2 ACL deny """ - self.run_test_ip46_bridged_to_routed(True, True, False, - self.WITHOUT_EH) - - def test_8906_ip6_irb_1(self): - """ ACL IPv6 bridged -> routed, L3 ACL deny """ - self.run_test_ip46_bridged_to_routed(False, True, False, - self.WITHOUT_EH) - - def test_8907_ip6_irb_1(self): - """ ACL IPv4 bridged -> routed, L2 ACL deny """ - self.run_test_ip46_bridged_to_routed(True, False, False, - self.WITHOUT_EH) - - def test_8908_ip6_irb_1(self): - """ ACL IPv4 bridged -> routed, L3 ACL deny """ - self.run_test_ip46_bridged_to_routed(False, False, False, - self.WITHOUT_EH) - - if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) -- cgit 1.2.3-korg From 5dbfbb7110a52595915acd5ec034f82ce517846a Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Wed, 17 May 2017 21:27:03 +0200 Subject: acl-plugin: make the ACL plugin multicore-capable Add the logic to be able to use stateful ACLs in a multithreaded setup. Change-Id: I3b0cfa6ca4ea8f46f61648611c3e97b00c3376b6 Signed-off-by: Andrew Yourtchenko --- src/plugins/acl/acl-plugin.md | 347 +++++++++++++++++++++++ src/plugins/acl/acl.c | 39 ++- src/plugins/acl/acl.h | 11 +- src/plugins/acl/fa_node.c | 646 ++++++++++++++++++++++++++++++------------ src/plugins/acl/fa_node.h | 76 ++++- 5 files changed, 912 insertions(+), 207 deletions(-) create mode 100644 src/plugins/acl/acl-plugin.md (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl-plugin.md b/src/plugins/acl/acl-plugin.md new file mode 100644 index 00000000..1b44bca9 --- /dev/null +++ b/src/plugins/acl/acl-plugin.md @@ -0,0 +1,347 @@ +Multicore support for ACL plugin +================================ + +This captures some considerations and design decisions that I have made, +both for my own memory later on ("what the hell was I thinking?!?"), +and for anyone interested to criticize/improve/hack on this code. + +One of the factors taken into account while making these decisions, +was the relative emphasis on the multi-thread vs. single-thread +use cases: the latter is the vastly more prevalent. But, +one can not optimize the single-thread performance without +having a functioning code for multi-thread. + +stateless ACLs +============== + +The stateless trivially parallelizes, and the only potential for the +race between the different threads is during the reconfiguration, +at the time of replacing the old ACL being checked, with +the new ACL. + +In case an acl_add_replace is being used to replace the rules +within the existing entry, a reallocation of am->acls[X].rules +vector will happen and potentially a change in count. + +acl_match_5tuple() has the following code: + + a = am->acls + acl_index; + for (i = 0; i < a->count; i++) + { + r = a->rules + i; + . . . + +Ideally we should be immune from a->rules changing, +but the problem arises if the count changes in flight, +and the new ruleset is smaller - then we will attempt +to "match" against the free memory. + +This can(?) be solved by replacing the for() with while(), +so the comparison happens at each iteration. + +full_acl_match_5tuple(), which iterates over the list +of ACLs, is a bit less immune, since it takes the pointer +to the vector to iterate and keeps a local copy of +that pointer. + +This race can be solved by checking the +current pointer to the vector with the source pointer, +and seeing if there is an (unlikely) change, and if +there is, return the "deny" action, or, better, +restart the check. + +Since the check reloads the ACL list on a per-packet basis, +there is only a window of opportunity of one packet to +"match" packet against an incorrect rule set. +The workers also do not change anything, only read. +Therefore, it looks like building special structures +to ensure that it does not happen at all might be not +worth it. + +At least not until we have a unit-test able to +reliably catch this condition and test that +the measures applied are effective. Adding the code +which is not possible to exercise is worse than +not adding any code at all. + +So, I opt for "do-nothing" here for the moment. + +reflexive ACLs: single-thread +============================= + +Before we talk multi-thread, is worth revisiting the +design of the reflexive ACLs in the plugin, and +the history of their evolution. + +The very first version of the ACL plugin, shipped in +1701, mostly did the job using the existing components +and gluing them together. Because it needed to work +in bridged forwarding path only, using L2 classifier +as an insertion point appeared natural, also L2 classifier, +being a table with sessions, seemed like a good place +to hold the sessions. + +So, the original design had two conceptual nodes: +one, pointed by the next_miss from the L2 classifier table, +was checking the actual ACL, and inserting session into +the L2 classifier table, and the other one, pointed +to by the next_match within the specific session rule, +was checking the existing session. The timing out +of the existing connections was done in the datapath, +by periodically calling the aging function. + +This decision to use the existing components, +with its attrativeness, did bring a few limitations as well: + +* L2 classifier is a simple mask-and-value match, with +a fixed mask across the table. So, sanely supporting IPv6 +packets with extension headers in that framework was impossible. + +* There is no way to get a backpressure from L2 classifier +depending on memory usage. When it runs out of memory, +it simply crashes the box. When it runs out of memory ? +We don't really know. Depends on how it allocates it. + +* Since we need to match the *reflected* traffic, +we had to create *two* full session entries +in two different directions, which is quite wasteful memory-wise. + +* (showstopper): the L2 classifier runs only in +the bridged data path, so supporting routed data path +would require creating something else entirely different, +which would mean much more headaches support-wise going forward. + +Because of that, I have moved to a different model of +creating a session-5-tuple from the packet data - once, +and then doing all the matching just on that 5-tuple. + +This has allowed to add support for skipping IPv6 extension headers. + +Also, this new version started to store the sessions in a dedicated +bihash-per-interface, with the session key data being +aligned for the ingress packets, and being mirrored for the +egress packets. This allows of significant savings in memory, +because now we need to keep only one copy of the session table per +interface instead of two, and also to only have ONE node for all the lookups, +(L2/L3 path, in/out, IPv4/IPv6) - significantly reducing the code complexity. + +Unfortunately, bihash still has the "lack of backpressure" problem, +in a sense that if you try to insert too many entries and run out +of memory in the heap you supplied, you get a crash. + +To somewhat workaround against that, there is a "maximum tested number of sessions" +value, which tracks the currently inserted sessions in the bihash, +and if this number is being approached, a more aggressive cleanup +can happen. If this number is reached, two behaviors are possible: + +* attempt to do the stateless ACL matching and permit the packet + if it succeeds + +* deny the packet + +Currently I have opted for a second one, since it allows for +a better defined behavior, and if you have to permit +the traffic in both directions, why using stateful anyway ? + +In order to be able to do the cleanup, we need to discriminate between +the session types, with each session type having its own idle timeout. +In order to do that, we keep three lists, defined in enum acl_timeout_e: +ACL_TIMEOUT_UDP_IDLE, ACL_TIMEOUT_TCP_IDLE, ACL_TIMEOUT_TCP_TRANSIENT. + +The first one is hopefully obvious - it is just all UDP connections. +They have an idle timeout of 600 seconds. + +The second and third is a bit more subtle. TCP is a complicated protocol, +and we need to tread the fine line between doing too little and doing +too much, and triggering the potential compatibility issues because of +being a "middlebox". + +I decided to split the TCP connections into two classes: +established, and everything else. "Established", means we have seen +the SYN and ACK from both sides (with PUSH obviously masked out). +This is the "active" state of any TCP connection and we would like +to ensure we do not screw it up. So, the connections in this state +have the default idle timer of 24 hours. + +All the rest of the connections have the idle timeout of 2 minutes, +(inspired by an old value of MSL) and based on the observation +that the states this class represent are usually very short lived. + +Once we have these three baskets of connections, it is trivial to +imagine a simple cleanup mechanism to deal with this: take a +TCP transient connection that has been hanging around. + +It is debatable whether we want to do discrimination between the +different TCP transient connections. Assuming we do FIFO (and +the lists allow us to do just that), it means a given connection +on the head of the list has been hanging around for longest. +Thus, if we are short on resources, we might just go ahead and +reuse it within the datapath. + +This is where we are slowly approaching the question +"Why in the world have not you used timer wheel or such ?" + +The answer is simple: within the above constraints, it does +not buy me much. + +Also, timer wheel creates a leaky abstraction with a difficult +to manage corner case. Which corner case ? + +We have a set of objects (sessions) with an event that may +or may not happen (idle timeout timer firing), and a +necessity to reset the idle timeout when there is +activity on the session. + +In the worst case, where we had a 10000 of one-packet +UDP sessions just created 10 minutes ago, we would need +to deal with a spike of 10000 expired timers. + +Of course, if we have the active traffic on all +of these 10000 connections, then we will not have +to deal with that ? Right, but we will still have to deal +with canceling and requeueing the timers. + +In the best possible case, requeueing a timer is +going to be something along the lines of a linked-list +removal and reinsertion. + +However, keep in mind we already need to classify the +connections for reuse, so therefore we already have +the linked lists! + +And if we just check these linked lists periodically in +a FIFO fashion, we can get away with a very simple per-packet operation: +writing back the timestamp of "now" into the connection structure. + +Then rather than requeueing the list on a per-packet or per-frame +basis, we can defer this action until the time this session +appears on the head of the FIFO list, and the cleaning +routine makes the decision about whether to discard +the session (because the interval since last activity is bigger +than the idle timeout), or to requeue the session back to +the end of the list (because the last activity was less +than idle timeout ago). + +So, rather than using the timers, we can simply reuse our classification +FIFOs, with the following heuristic: do not look at the session that was +enqueued at time X until X+session_timeout. If we enqueue the sessions +in the order of their initial activity, then we can simply use enqueue +timestamp of the head session as a decision criterion for when we need +to get back at looking at it for the timeout purposes. + +Since the number of FIFOs is small, we get a slightly worse check +performance than with timers, but still O(1). + +We seemingly do quite a few "useless" operations of requeueing the items +back to the tail of the list - but, these are the operations we do not +have to do in the active data path, so overall it is a win. + +(Diversion: I believe this problem is congruent to poll vs. epoll or +events vs. threads, some reading on this subject: +http://web.archive.org/web/20120225022154/http://sheddingbikes.com/posts/1280829388.html) + +We can also can run a TCP-like scheme for adaptively changing +the wait period in the routine that deals with the connection timeouts: +we can attempt to check the connections a couple of times per second +(same as we would advance the timer wheel), and then if we have requeued +close to a max-per-quantum number of connections, we can half the waiting +interval, and if we did not requeue any, we can slowly increment the waiting +interval - which at a steady state should stabilize similar to what the TCP rate +does. + +reflexive ACLs: multi-thread +============================= + +The single-threaded implementation in 1704 used a separate "cleaner" process +to deal with the timing out of the connections. +It is all good and great when you know that there is only a single core +to run everything on, but the existence of the lists proves to be +a massive difficulty when it comes to operating from multiple threads. + +Initial study shows that with a few assumptions (e.g. that the cleaner running in main thread +and the worker have a demarcation point in time where either one or the other one touches +the session in the list) it might be possible to make it work, but the resulting +trickiness of doing it neatly with all the corner cases is quite large. + +So, for the multi-threaded scenario, we need to move the connection +aging back to the same CPU as its creation. + +Luckily we can do this with the help of the interrupts. + +So, the design is as follows: the aging thread (acl_fa_session_cleaner_process) +periodically fires the interrupts to the workers interrupt nodes (acl_fa_worker_session_cleaner_process_node.index), +using vlib_node_set_interrupt_pending(), and +the interrupt node acl_fa_worker_conn_cleaner_process() calls acl_fa_check_idle_sessions() +which does the actual job of advancing the lists. And within the actual datapath the only thing we will be +doing is putting the items onto FIFO, and updating the last active time on the existing connection. + +The one "delicate" part is that the worker for one leg of the connection might be different from +the worker of another leg of the connection - but, even if the "owner" tries to free the connection, +nothing terrible can happen - worst case the element of the pool (which is nominally free for a short period) +will get the timestamp updated - same thing about the TCP flags seen. + +A slightly trickier issue arises when the packet initially seen by one worker (thus owned by that worker), +and the return packet processed by another worker, and as a result changes the +the class of the connection (e.g. becomes TCP_ESTABLISHED from TCP_TRANSIENT or vice versa). +If the class changes from one with the shorter idle time to the one with the longer idle time, +then unless we are in the starvation mode where the transient connections are recycled, +we can simply do nothing and let the normal requeue mechanism kick in. If the class changes from the longer idle +timer to the shorter idle timer, then we risk keeping the connection around for longer than needed, which +will affect the resource usage. + +One solution to that is to have NxN ring buffers (where N is the number of workers), such that the non-owner +can signal to the owner the connection# that needs to be requeued out of order. + +A simpler solution though, is to ensure that each FIFO's period is equal to that of a shortest timer. +This way the resource starvation problem is taken care of, at an expense of some additional work. + +This all looks sufficiently nice and simple until a skeleton falls out of the closet: +sometimes we want to clean the connections en masse before they expire. + +There few potential scenarios: +1) removal of an ACL from the interface +2) removal of an interface +3) manual action of an operator (in the future). + +In order to tackle this, we need to modify the logic which decides whether to requeue the +connection on the end of the list, or to delete it due to idle timeout: + +We define a point in time, and have each worker thread fast-forward through its FIFO, +in the process looking for sessions that satisfy the criteria, and either keeping them or requeueing them. + +To keep the ease of appearance to the outside world, we still process this as an event +within the connection cleaner thread, but this event handler does as follows: +1) it creates the bitmap of the sw_if_index values requested to be cleared +2) for each worker, it waits to ensure there is no cleanup operation in progress (and if there is one, +it waits), and then makes a copy of the bitmap, sets the per-worker flag of a cleanup operation, and sends an interrupt. +3) wait until all cleanup operations have completed. + +Within the worker interrupt node, we check if the "cleanup in progress" is set, +and if it is, we check the "fast forward time" value. If unset, we initialize it to value now, and compare the +requested bitmap of sw_if_index values (pending_clear_sw_if_index_bitmap) with the bitmap of sw_if_index that this worker deals with. + +(we set the bit in the bitmap every time we enqueue the packet onto a FIFO - serviced_sw_if_index_bitmap in acl_fa_conn_list_add_session). + +If the result of this AND operation is zero - then we can clear the flag of cleanup in progress and return. +Else we kick off the quantum of cleanup, and make sure we get another interrupt ASAP if that cleanup operation returns non-zero, +meaning there is more work to do. +When that operation returns zero, everything has been processed, we can clear the "cleanup-in-progress" flag, and +zeroize the bitmap of sw_if_index-es requested to be cleaned. + +The interrupt node signals its wish to receive an interrupt ASAP by setting interrupt_is_needed +flag within the per-worker structure. The main thread, while waiting for the +cleanup operation to complete, checks if there is a request for interrupt, +and if there is - it sends one. + +This approach gives us a way to mass-clean the connections which is reusing the code of the regular idle +connection cleanup. + +One potential inefficiency is the bitmap values set by the session insertion +in the data path - there is nothing to clear them. + +So, if one rearranges the interface placement with the workers, then the cleanups will cause some unnecessary work. +For now, we consider it an acceptable limitation. It can be resolved by having another per-worker bitmap, which, when set, +would trigger the cleanup of the bits in the serviced_sw_if_index_bitmap). + +=== the end === + diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 6b79411e..84ed7af6 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -1785,6 +1785,7 @@ done: return error; } + static clib_error_t * acl_show_aclplugin_fn (vlib_main_t * vm, unformat_input_t * input, @@ -1799,6 +1800,7 @@ acl_show_aclplugin_fn (vlib_main_t * vm, if (unformat (input, "sessions")) { u8 * out0 = 0; + u16 wk; pool_foreach (swif, im->sw_interfaces, ({ u32 sw_if_index = swif->sw_if_index; @@ -1806,6 +1808,24 @@ acl_show_aclplugin_fn (vlib_main_t * vm, u64 n_dels = sw_if_index < vec_len(am->fa_session_dels_by_sw_if_index) ? am->fa_session_dels_by_sw_if_index[sw_if_index] : 0; out0 = format(out0, "sw_if_index %d: add %lu - del %lu = %lu\n", sw_if_index, n_adds, n_dels, n_adds - n_dels); })); + out0 = format(out0, "\n\nPer-worker data:\n"); + for (wk = 0; wk < vec_len (am->per_worker_data); wk++) { + acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk]; + out0 = format(out0, "Worker #%d:\n", wk); + out0 = format(out0, " Next expiry time: %lu\n", pw->next_expiry_time); + out0 = format(out0, " Requeue until time: %lu\n", pw->requeue_until_time); + out0 = format(out0, " Current time wait interval: %lu\n", pw->current_time_wait_interval); + out0 = format(out0, " Count of deleted sessions: %lu\n", pw->cnt_deleted_sessions); + out0 = format(out0, " Delete already deleted: %lu\n", pw->cnt_already_deleted_sessions); + out0 = format(out0, " Session timers restarted: %lu\n", pw->cnt_session_timer_restarted); + out0 = format(out0, " Swipe until this time: %lu\n", pw->swipe_end_time); + out0 = format(out0, " sw_if_index serviced bitmap: %U\n", format_bitmap_hex, pw->serviced_sw_if_index_bitmap); + out0 = format(out0, " pending clear intfc bitmap : %U\n", format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap); + out0 = format(out0, " clear in progress: %u\n", pw->clear_in_process); + out0 = format(out0, " interrupt is pending: %d\n", pw->interrupt_is_pending); + out0 = format(out0, " interrupt is needed: %d\n", pw->interrupt_is_needed); + out0 = format(out0, " interrupt is unwanted: %d\n", pw->interrupt_is_unwanted); + } out0 = format(out0, "\n\nConn cleaner thread counters:\n"); #define _(cnt, desc) out0 = format(out0, " %20lu: %s\n", am->cnt, desc); foreach_fa_cleaner_counter; @@ -1867,14 +1887,24 @@ acl_init (vlib_main_t * vm) am->fa_conn_table_hash_num_buckets = ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS; am->fa_conn_table_hash_memory_size = ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE; am->fa_conn_table_max_entries = ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES; - + vlib_thread_main_t *tm = vlib_get_thread_main (); + // vec_validate(am->per_worker_data, os_get_nthreads()-1); + vec_validate(am->per_worker_data, tm->n_vlib_mains-1); + clib_warning("ACL_FA_INIT: per-worker len: %d", vec_len(am->per_worker_data)); { + u16 wk; u8 tt; - for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { - am->fa_conn_list_head[tt] = ~0; - am->fa_conn_list_tail[tt] = ~0; + for (wk = 0; wk < vec_len (am->per_worker_data); wk++) { + acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk]; + vec_validate(pw->fa_conn_list_head, ACL_N_TIMEOUTS-1); + vec_validate(pw->fa_conn_list_tail, ACL_N_TIMEOUTS-1); + for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { + pw->fa_conn_list_head[tt] = ~0; + pw->fa_conn_list_tail[tt] = ~0; + } } } + clib_warning("ACL_FA_INIT-DONE: per-worker len: %d", vec_len(am->per_worker_data)); am->fa_min_deleted_sessions_per_interval = ACL_FA_DEFAULT_MIN_DELETED_SESSIONS_PER_INTERVAL; am->fa_max_deleted_sessions_per_interval = ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL; @@ -1883,7 +1913,6 @@ acl_init (vlib_main_t * vm) am->fa_cleaner_cnt_delete_by_sw_index = 0; am->fa_cleaner_cnt_delete_by_sw_index_ok = 0; am->fa_cleaner_cnt_unknown_event = 0; - am->fa_cleaner_cnt_deleted_sessions = 0; am->fa_cleaner_cnt_timer_restarted = 0; am->fa_cleaner_cnt_wait_with_timeout = 0; diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index eb074a7b..e35e0ea7 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -138,9 +138,7 @@ typedef struct { /* bitmap, when set the hash is initialized */ uword *fa_sessions_on_sw_if_index; clib_bihash_40_8_t *fa_sessions_by_sw_if_index; - /* pool for FA session data. See fa_node.h */ - fa_session_t *fa_sessions_pool; - /* The process node which is responsible to deleting the sessions */ + /* The process node which orcherstrates the cleanup */ u32 fa_cleaner_node_index; /* FA session timeouts, in seconds */ u32 session_timeout_sec[ACL_N_TIMEOUTS]; @@ -192,8 +190,9 @@ typedef struct { f64 fa_cleaner_wait_time_increment; u64 fa_current_cleaner_timer_wait_interval; - u32 fa_conn_list_head[ACL_N_TIMEOUTS]; - u32 fa_conn_list_tail[ACL_N_TIMEOUTS]; + + /* per-worker data related t conn management */ + acl_fa_per_worker_data_t *per_worker_data; /* Configured session timeout */ u64 session_timeout[ACL_N_TIMEOUTS]; @@ -205,12 +204,10 @@ typedef struct { _(fa_cleaner_cnt_delete_by_sw_index, "delete_by_sw_index events") \ _(fa_cleaner_cnt_delete_by_sw_index_ok, "delete_by_sw_index handled ok") \ _(fa_cleaner_cnt_unknown_event, "unknown events received") \ - _(fa_cleaner_cnt_deleted_sessions, "sessions deleted") \ _(fa_cleaner_cnt_timer_restarted, "session idle timers restarted") \ _(fa_cleaner_cnt_wait_with_timeout, "event wait with timeout called") \ _(fa_cleaner_cnt_wait_without_timeout, "event wait w/o timeout called") \ _(fa_cleaner_cnt_event_cycles, "total event cycles") \ - _(fa_cleaner_cnt_already_deleted, "try to delete already deleted conn") \ /* end of counters */ #define _(id, desc) u32 id; foreach_fa_cleaner_counter diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index c71429e7..c6059aae 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -542,6 +542,39 @@ fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess) } +static u64 +fa_session_get_shortest_timeout(acl_main_t * am) +{ + int timeout_type; + u64 timeout = ~0LL; + for(timeout_type = 0; timeout_type < ACL_N_TIMEOUTS; timeout_type++) { + if (timeout > am->session_timeout_sec[timeout_type]) { + timeout = am->session_timeout_sec[timeout_type]; + } + } + return timeout; +} + +/* + * Get the timeout of the session in a list since its enqueue time. + */ + +static u64 +fa_session_get_list_timeout (acl_main_t * am, fa_session_t * sess) +{ + u64 timeout = am->vlib_main->clib_time.clocks_per_second; + /* + * we have the shortest possible timeout type in all the lists + * (see README-multicore for the rationale) + */ + timeout *= fa_session_get_shortest_timeout(am); + return timeout; +} + +/* + * Get the idle timeout of a session. + */ + static u64 fa_session_get_timeout (acl_main_t * am, fa_session_t * sess) { @@ -554,6 +587,7 @@ fa_session_get_timeout (acl_main_t * am, fa_session_t * sess) static void acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0) { + /// FIXME-MULTICORE: lock around this function #ifdef FA_NODE_VERBOSE_DEBUG clib_warning ("Initializing bihash for sw_if_index %d num buckets %lu memory size %llu", @@ -569,68 +603,97 @@ acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0) clib_bitmap_set (am->fa_sessions_on_sw_if_index, sw_if_index0, 1); } +static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index) +{ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + fa_session_t *sess = pw->fa_sessions_pool + session_index; + return sess; +} + static void -acl_fa_conn_list_add_session (acl_main_t * am, u32 sess_id, u64 now) +acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 now) { - fa_session_t *sess = am->fa_sessions_pool + sess_id; + fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index); u8 list_id = fa_session_get_timeout_type(am, sess); + uword thread_index = os_get_thread_index (); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + /* the retrieved session thread index must be necessarily the same as the one in the key */ + ASSERT (sess->thread_index == sess_id.thread_index); + /* the retrieved session thread index must be the same as current thread */ + ASSERT (sess->thread_index == thread_index); sess->link_enqueue_time = now; sess->link_list_id = list_id; sess->link_next_idx = ~0; - sess->link_prev_idx = am->fa_conn_list_tail[list_id]; - if (~0 != am->fa_conn_list_tail[list_id]) { - fa_session_t *prev_sess = am->fa_sessions_pool + am->fa_conn_list_tail[list_id]; - prev_sess->link_next_idx = sess_id; + sess->link_prev_idx = pw->fa_conn_list_tail[list_id]; + if (~0 != pw->fa_conn_list_tail[list_id]) { + fa_session_t *prev_sess = get_session_ptr(am, thread_index, pw->fa_conn_list_tail[list_id]); + prev_sess->link_next_idx = sess_id.session_index; + /* We should never try to link with a session on another thread */ + ASSERT(prev_sess->thread_index == sess->thread_index); } - am->fa_conn_list_tail[list_id] = sess_id; + pw->fa_conn_list_tail[list_id] = sess_id.session_index; + pw->serviced_sw_if_index_bitmap = clib_bitmap_set(pw->serviced_sw_if_index_bitmap, sess->sw_if_index, 1); - if (~0 == am->fa_conn_list_head[list_id]) { - am->fa_conn_list_head[list_id] = sess_id; - /* If it is a first conn in any list, kick off the cleaner */ + if (~0 == pw->fa_conn_list_head[list_id]) { + pw->fa_conn_list_head[list_id] = sess_id.session_index; + /* If it is a first conn in any list, kick the cleaner */ vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, ACL_FA_CLEANER_RESCHEDULE, 0); - } } -static void -acl_fa_conn_list_delete_session (acl_main_t *am, u32 sess_id) +static int +acl_fa_conn_list_delete_session (acl_main_t *am, fa_full_session_id_t sess_id) { - fa_session_t *sess = am->fa_sessions_pool + sess_id; + uword thread_index = os_get_thread_index (); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + if (thread_index != sess_id.thread_index) { + /* If another thread attempts to delete the session, fail it. */ +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("thread id in key %d != curr thread index, not deleting"); +#endif + return 0; + } + fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index); + /* we should never try to delete the session with another thread index */ + ASSERT(sess->thread_index == thread_index); if (~0 != sess->link_prev_idx) { - fa_session_t *prev_sess = am->fa_sessions_pool + sess->link_prev_idx; + fa_session_t *prev_sess = get_session_ptr(am, thread_index, sess->link_prev_idx); + /* the previous session must be in the same list as this one */ + ASSERT(prev_sess->link_list_id == sess->link_list_id); prev_sess->link_next_idx = sess->link_next_idx; - if (prev_sess->link_list_id != sess->link_list_id) - clib_warning("(prev_sess->link_list_id != sess->link_list_id)"); } if (~0 != sess->link_next_idx) { - fa_session_t *next_sess = am->fa_sessions_pool + sess->link_next_idx; + fa_session_t *next_sess = get_session_ptr(am, thread_index, sess->link_next_idx); + /* The next session must be in the same list as the one we are deleting */ + ASSERT(next_sess->link_list_id == sess->link_list_id); next_sess->link_prev_idx = sess->link_prev_idx; - if (next_sess->link_list_id != sess->link_list_id) - clib_warning("(next_sess->link_list_id != sess->link_list_id)"); } - if (am->fa_conn_list_head[sess->link_list_id] == sess_id) { - am->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx; + if (pw->fa_conn_list_head[sess->link_list_id] == sess_id.session_index) { + pw->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx; } - if (am->fa_conn_list_tail[sess->link_list_id] == sess_id) { - am->fa_conn_list_tail[sess->link_list_id] = sess->link_prev_idx; + if (pw->fa_conn_list_tail[sess->link_list_id] == sess_id.session_index) { + pw->fa_conn_list_tail[sess->link_list_id] = sess->link_prev_idx; } + return 1; } - -int -acl_fa_session_is_dead (acl_main_t * am, u32 sw_if_index, u64 now, - u32 sess_id) -{ - return 0; -} - -static void -acl_fa_restart_timer_for_session (acl_main_t * am, u64 now, u32 sess_id) +static int +acl_fa_restart_timer_for_session (acl_main_t * am, u64 now, fa_full_session_id_t sess_id) { - // fa_session_t *sess = am->fa_sessions_pool + sess_id; - acl_fa_conn_list_delete_session(am, sess_id); - acl_fa_conn_list_add_session(am, sess_id, now); + if (acl_fa_conn_list_delete_session(am, sess_id)) { + acl_fa_conn_list_add_session(am, sess_id, now); + return 1; + } else { + /* + * Our thread does not own this connection, so we can not delete + * The session. To avoid the complicated signaling, we simply + * pick the list waiting time to be the shortest of the timeouts. + * This way we do not have to do anything special, and let + * the regular requeue check take care of everything. + */ + return 0; + } } @@ -648,13 +711,16 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, static void -acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, u32 sess_id) +acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t sess_id) { - fa_session_t *sess = (fa_session_t *) am->fa_sessions_pool + sess_id; + fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index); + ASSERT(sess->thread_index == os_get_thread_index ()); BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index], &sess->info.kv, 0); - pool_put_index (am->fa_sessions_pool, sess_id); - /* Deleting from timer wheel not needed, as the cleaner deals with the timers. */ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index]; + pool_put_index (pw->fa_sessions_pool, sess_id.session_index); + /* Deleting from timer structures not needed, + as the caller must have dealt with the timers. */ vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index); am->fa_session_dels_by_sw_if_index[sw_if_index]++; } @@ -671,13 +737,114 @@ acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index) return (curr_sess < am->fa_conn_table_max_entries); } +static u64 +acl_fa_get_list_head_expiry_time(acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, int timeout_type) +{ + if (~0 == pw->fa_conn_list_head[timeout_type]) { + return ~0LL; // infinity. + } else { + fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]); + u64 timeout_time = + sess->link_enqueue_time + fa_session_get_list_timeout (am, sess); + return timeout_time; + } +} + +static int +acl_fa_conn_time_to_check (acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, u32 session_index) +{ + fa_session_t *sess = get_session_ptr(am, thread_index, session_index); + u64 timeout_time = + sess->link_enqueue_time + fa_session_get_list_timeout (am, sess); + return (timeout_time < now) || (sess->link_enqueue_time <= pw->swipe_end_time); +} + +/* + * see if there are sessions ready to be checked, + * do the maintenance (requeue or delete), and + * return the total number of sessions reclaimed. + */ +static int +acl_fa_check_idle_sessions(acl_main_t *am, u16 thread_index, u64 now) +{ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + fa_full_session_id_t fsid; + fsid.thread_index = thread_index; + int total_expired = 0; + + { + u8 tt = 0; + for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { + while((vec_len(pw->expired) < am->fa_max_deleted_sessions_per_interval) + && (~0 != pw->fa_conn_list_head[tt]) + && (acl_fa_conn_time_to_check(am, pw, now, thread_index, + pw->fa_conn_list_head[tt]))) { + fsid.session_index = pw->fa_conn_list_head[tt]; + vec_add1(pw->expired, fsid.session_index); + acl_fa_conn_list_delete_session(am, fsid); + } + } + } + + u32 *psid = NULL; + vec_foreach (psid, pw->expired) + { + fsid.session_index = *psid; + if (!pool_is_free_index (pw->fa_sessions_pool, fsid.session_index)) + { + fa_session_t *sess = get_session_ptr(am, thread_index, fsid.session_index); + u32 sw_if_index = sess->sw_if_index; + u64 sess_timeout_time = + sess->last_active_time + fa_session_get_timeout (am, sess); + if ((now < sess_timeout_time) && (0 == clib_bitmap_get(pw->pending_clear_sw_if_index_bitmap, sw_if_index))) + { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d", + (int) session_index); +#endif + /* There was activity on the session, so the idle timeout + has not passed. Enqueue for another time period. */ + + acl_fa_conn_list_add_session(am, fsid, now); + pw->cnt_session_timer_restarted++; + } + else + { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d", + (int) session_index); +#endif + acl_fa_delete_session (am, sw_if_index, fsid); + pw->cnt_deleted_sessions++; + } + } + else + { + pw->cnt_already_deleted_sessions++; + } + } + total_expired = vec_len(pw->expired); + /* zero out the vector which we have acted on */ + if (pw->expired) + _vec_len (pw->expired) = 0; + /* if we were advancing and reached the end + * (no more sessions to recycle), reset the fast-forward timestamp */ + + if (pw->swipe_end_time && 0 == total_expired) + pw->swipe_end_time = 0; + return (total_expired); +} + always_inline void -acl_fa_try_recycle_session (acl_main_t * am, int is_input, u32 sw_if_index) +acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index, u32 sw_if_index) { /* try to recycle a TCP transient session */ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; u8 timeout_type = ACL_TIMEOUT_TCP_TRANSIENT; - u32 sess_id = am->fa_conn_list_head[timeout_type]; - if (~0 != sess_id) { + fa_full_session_id_t sess_id; + sess_id.session_index = pw->fa_conn_list_head[timeout_type]; + if (~0 != sess_id.session_index) { + sess_id.thread_index = thread_index; acl_fa_conn_list_delete_session(am, sess_id); acl_fa_delete_session(am, sw_if_index, sess_id); } @@ -689,25 +856,28 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, { clib_bihash_kv_40_8_t *pkv = &p5tuple->kv; clib_bihash_kv_40_8_t kv; - u32 sess_id; - fa_session_t *sess; + fa_full_session_id_t f_sess_id; + uword thread_index = os_get_thread_index(); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; - pool_get (am->fa_sessions_pool, sess); - sess_id = sess - am->fa_sessions_pool; + f_sess_id.thread_index = thread_index; + fa_session_t *sess; + pool_get_aligned (pw->fa_sessions_pool, sess, CLIB_CACHE_LINE_BYTES); + f_sess_id.session_index = sess - pw->fa_sessions_pool; kv.key[0] = pkv->key[0]; kv.key[1] = pkv->key[1]; kv.key[2] = pkv->key[2]; kv.key[3] = pkv->key[3]; kv.key[4] = pkv->key[4]; - kv.value = sess_id; + kv.value = f_sess_id.as_u64; memcpy (sess, pkv, sizeof (pkv->key)); sess->last_active_time = now; sess->sw_if_index = sw_if_index; sess->tcp_flags_seen.as_u16 = 0; - sess->reserved1 = 0; + sess->thread_index = thread_index; sess->link_list_id = ~0; sess->link_prev_idx = ~0; sess->link_next_idx = ~0; @@ -721,7 +891,7 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index], &kv, 1); - acl_fa_conn_list_add_session(am, sess_id, now); + acl_fa_conn_list_add_session(am, f_sess_id, now); vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index); am->fa_session_adds_by_sw_if_index[sw_if_index]++; @@ -757,6 +927,7 @@ acl_fa_node_fn (vlib_main_t * vm, clib_bihash_kv_40_8_t value_sess; vlib_node_runtime_t *error_node; u64 now = clib_cpu_time_now (); + uword thread_index = os_get_thread_index (); from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -827,16 +998,19 @@ acl_fa_node_fn (vlib_main_t * vm, { trace_bitmap |= 0x80000000; error0 = ACL_FA_ERROR_ACL_EXIST_SESSION; - // FIXME assert(value_sess.value == (0xffffffff & value_sess.value)); - u32 sess_id = value_sess.value; - fa_session_t *sess = am->fa_sessions_pool + sess_id; + fa_full_session_id_t f_sess_id; + + f_sess_id.as_u64 = value_sess.value; + ASSERT(f_sess_id.thread_index < vec_len(vlib_mains)); + + fa_session_t *sess = get_session_ptr(am, f_sess_id.thread_index, f_sess_id.session_index); int old_timeout_type = fa_session_get_timeout_type (am, sess); action = acl_fa_track_session (am, is_input, sw_if_index0, now, sess, &fa_5tuple); /* expose the session id to the tracer */ - match_rule_index = sess_id; + match_rule_index = f_sess_id.session_index; int new_timeout_type = fa_session_get_timeout_type (am, sess); acl_check_needed = 0; @@ -844,7 +1018,7 @@ acl_fa_node_fn (vlib_main_t * vm, /* Tracking might have changed the session timeout type, e.g. from transient to established */ if (PREDICT_FALSE (old_timeout_type != new_timeout_type)) { - acl_fa_restart_timer_for_session (am, now, sess_id); + acl_fa_restart_timer_for_session (am, now, f_sess_id); pkts_restart_session_timer++; trace_bitmap |= 0x00010000 + ((0xff & old_timeout_type) << 8) + @@ -865,7 +1039,7 @@ acl_fa_node_fn (vlib_main_t * vm, if (2 == action) { if (!acl_fa_can_add_session (am, is_input, sw_if_index0)) - acl_fa_try_recycle_session (am, is_input, sw_if_index0); + acl_fa_try_recycle_session (am, is_input, thread_index, sw_if_index0); if (acl_fa_can_add_session (am, is_input, sw_if_index0)) { @@ -1024,16 +1198,9 @@ acl_out_ip4_fa_node_fn (vlib_main_t * vm, } /* - * This process performs all the connection clean up - both for idle connections, - * as well as receiving the signals to clean up the connections in case of sw_if_index deletion, - * or (maybe in the future) the connection deletion due to policy reasons. - * - * The previous iteration (l2sess) attempted to clean up the connections in small increments, - * in-band, but the problem it tried to preemptively address (process starvation) is yet to be seen. - * - * The approach with a single thread deleting the connections is simpler, thus we use it until - * there is a real starvation problem to solve. - * + * This process ensures the connection cleanup happens every so often + * even in absence of traffic, as well as provides general orchestration + * for requests like connection deletion on a given sw_if_index. */ @@ -1056,57 +1223,131 @@ static char *acl_fa_cleaner_error_strings[] = { #undef _ }; -static int -acl_fa_clean_sessions_by_sw_if_index (acl_main_t *am, u32 sw_if_index, u32 *count) -{ - - int undeleted = 0; - fa_session_t *sess; - uword *dv = NULL; - uword *ii; - - pool_foreach(sess, am->fa_sessions_pool, ({ - if ( (~0 == sw_if_index) || (sw_if_index == sess->sw_if_index) ) - vec_add1(dv, sess-am->fa_sessions_pool); - })); - vec_foreach(ii, dv) - { - sess = pool_elt_at_index(am->fa_sessions_pool, *ii); - acl_fa_delete_session(am, sess->sw_if_index, *ii); - (*count)++; - } - - pool_foreach(sess, am->fa_sessions_pool, ({ - if ( (~0 == sw_if_index) || (sw_if_index == sess->sw_if_index) ) - undeleted++; - })); - if (undeleted == 0) - { - if (~0 == sw_if_index) - { - /* FIXME: clean-up tables ? */ - } - else - { - /* FIXME: clean-up tables ? */ - } - } - return (undeleted == 0); -} /* *INDENT-ON* */ static vlib_node_registration_t acl_fa_session_cleaner_process_node; +static vlib_node_registration_t acl_fa_worker_session_cleaner_process_node; -static int -acl_fa_conn_time_to_check (acl_main_t *am, u64 now, u32 session_index) +/* + * Per-worker thread interrupt-driven cleaner thread + * to clean idle connections if there are no packets + */ +static uword +acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) { - fa_session_t *sess = am->fa_sessions_pool + session_index; - u64 timeout_time = - sess->link_enqueue_time + fa_session_get_timeout (am, sess); - return (timeout_time < now); + acl_main_t *am = &acl_main; + u64 now = clib_cpu_time_now (); + u16 thread_index = os_get_thread_index (); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + int num_expired; +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("\nacl_fa_worker_conn_cleaner: thread index %d now %lu\n\n", thread_index, now); +#endif + /* allow another interrupt to be queued */ + pw->interrupt_is_pending = 0; + if (pw->clear_in_process) { + if (0 == pw->swipe_end_time) { + /* + * Someone has just set the flag to start clearing. + * we do this by combing through the connections up to a "time T" + * which is now, and requeueing everything except the expired + * connections and those matching the interface(s) being cleared. + */ + + /* + * first filter the sw_if_index bitmap that they want from us, by + * a bitmap of sw_if_index for which we actually have connections. + */ + if ((pw->pending_clear_sw_if_index_bitmap == 0) + || (pw->serviced_sw_if_index_bitmap == 0)) { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER-CLEAR: someone tried to call clear, but one of the bitmaps are empty"); +#endif + clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap); + } else { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER-CLEAR: (before and) swiping sw-if-index bitmap: %U, my serviced bitmap %U", + format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap, + format_bitmap_hex, pw->serviced_sw_if_index_bitmap); +#endif + pw->pending_clear_sw_if_index_bitmap = clib_bitmap_and(pw->pending_clear_sw_if_index_bitmap, + pw->serviced_sw_if_index_bitmap); + } + + if (clib_bitmap_is_zero(pw->pending_clear_sw_if_index_bitmap)) { + /* if the cross-section is a zero vector, no need to do anything. */ + clib_warning("WORKER: clearing done - nothing to do"); + pw->clear_in_process = 0; + } else { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER-CLEAR: swiping sw-if-index bitmap: %U, my serviced bitmap %U", + format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap, + format_bitmap_hex, pw->serviced_sw_if_index_bitmap); +#endif + /* swipe through the connection lists until enqueue timestamps become above "now" */ + pw->swipe_end_time = now; + } + } + } + num_expired = acl_fa_check_idle_sessions(am, thread_index, now); + // clib_warning("WORKER-CLEAR: checked %d sessions (clear_in_progress: %d)", num_expired, pw->clear_in_process); + if (pw->clear_in_process) { + if (0 == num_expired) { + /* we were clearing but we could not process any more connections. time to stop. */ + clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap); + pw->clear_in_process = 0; +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER: clearing done, all done"); +#endif + } else { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER-CLEAR: more work to do, raising interrupt"); +#endif + /* should continue clearing.. So could they please sent an interrupt again? */ + pw->interrupt_is_needed = 1; + } + } else { + if (num_expired >= am->fa_max_deleted_sessions_per_interval) { + /* there was too much work, we should get an interrupt ASAP */ + pw->interrupt_is_needed = 1; + } else if (num_expired <= am->fa_min_deleted_sessions_per_interval) { + /* signal that they should trigger us less */ + pw->interrupt_is_unwanted = 1; + } else { + /* the current rate of interrupts is ok */ + pw->interrupt_is_needed = 0; + pw->interrupt_is_unwanted = 0; + } + } + return 0; } +static void +send_one_worker_interrupt (vlib_main_t * vm, acl_main_t *am, int thread_index) +{ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + if (!pw->interrupt_is_pending) { + vlib_node_set_interrupt_pending (vlib_mains[thread_index], + acl_fa_worker_session_cleaner_process_node.index); + pw->interrupt_is_pending = 1; + /* if the interrupt was requested, mark that done. */ + pw->interrupt_is_needed = 0; + } +} +static void +send_interrupts_to_workers (vlib_main_t * vm, acl_main_t *am) +{ + int i; + /* Can't use vec_len(am->per_worker_data) since the threads might not have come up yet; */ + int n_threads = vec_len(vlib_mains); + for (i = n_threads > 1 ? 1 : 0; i < n_threads; i++) { + send_one_worker_interrupt(vm, am, i); + } +} + +/* centralized process to drive per-worker cleaners */ static uword acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) @@ -1115,28 +1356,48 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, u64 now = clib_cpu_time_now (); f64 cpu_cps = vm->clib_time.clocks_per_second; u64 next_expire; - /* We should call timer wheel at least twice a second */ + /* We should check if there are connections to clean up - at least twice a second */ u64 max_timer_wait_interval = cpu_cps / 2; - am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval; - - u32 *expired = NULL; uword event_type, *event_data = 0; + acl_fa_per_worker_data_t *pw0; + am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval; am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index; while (1) { - u32 count_deleted_sessions = 0; - u32 count_already_deleted = 0; now = clib_cpu_time_now (); next_expire = now + am->fa_current_cleaner_timer_wait_interval; int has_pending_conns = 0; + u16 ti; u8 tt; - for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) - { - if (~0 != am->fa_conn_list_head[tt]) + + /* + * walk over all per-thread list heads of different timeouts, + * and see if there are any connections pending. + * If there aren't - we do not need to wake up until the + * worker code signals that it has added a connection. + * + * Also, while we are at it, calculate the earliest we need to wake up. + */ + for(ti = 0; ti < vec_len(vlib_mains); ti++) { + if (ti >= vec_len(am->per_worker_data)) { + continue; + } + acl_fa_per_worker_data_t *pw = &am->per_worker_data[ti]; + for(tt = 0; tt < vec_len(pw->fa_conn_list_head); tt++) { + u64 head_expiry = acl_fa_get_list_head_expiry_time(am, pw, now, ti, tt); + if ((head_expiry < next_expire) && !pw->interrupt_is_pending) { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("Head expiry: %lu, now: %lu, next_expire: %lu (worker: %d, tt: %d)", head_expiry, now, next_expire, ti, tt); +#endif + next_expire = head_expiry; + } + if (~0 != pw->fa_conn_list_head[tt]) { has_pending_conns = 1; + } } + } /* If no pending connections then no point in timing out */ if (!has_pending_conns) @@ -1155,9 +1416,6 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, } else { - /* Timing wheel code is happier if it is called regularly */ - if (timeout > 0.5) - timeout = 0.5; am->fa_cleaner_cnt_wait_with_timeout++; (void) vlib_process_wait_for_event_or_clock (vm, timeout); event_type = vlib_process_get_events (vm, &event_data); @@ -1175,7 +1433,11 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, break; case ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: { + uword *clear_sw_if_index_bitmap = 0; uword *sw_if_index0; +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX received"); +#endif vec_foreach (sw_if_index0, event_data) { am->fa_cleaner_cnt_delete_by_sw_index++; @@ -1184,13 +1446,54 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d", *sw_if_index0); #endif - u32 count = 0; - int result = - acl_fa_clean_sessions_by_sw_if_index (am, *sw_if_index0, - &count); - count_deleted_sessions += count; - am->fa_cleaner_cnt_delete_by_sw_index_ok += result; + clear_sw_if_index_bitmap = clib_bitmap_set(clear_sw_if_index_bitmap, *sw_if_index0, 1); } +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U", format_bitmap_hex, clear_sw_if_index_bitmap); +#endif + vec_foreach(pw0, am->per_worker_data) { + CLIB_MEMORY_BARRIER (); + while (pw0->clear_in_process) { + CLIB_MEMORY_BARRIER (); +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d...", pw0 - am->per_worker_data); +#endif + vlib_process_suspend(vm, 0.0001); + if (pw0->interrupt_is_needed) { + send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data)); + } + } + if (pw0->clear_in_process) { + clib_warning("ERROR-BUG! Could not initiate cleaning on worker because another cleanup in progress"); + } else { + pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(clear_sw_if_index_bitmap); + pw0->clear_in_process = 1; + } + } + /* send some interrupts so they can start working */ + send_interrupts_to_workers(vm, am); + + /* now wait till they all complete */ +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("CLEANER mains len: %d per-worker len: %d", vec_len(vlib_mains), vec_len(am->per_worker_data)); +#endif + vec_foreach(pw0, am->per_worker_data) { + CLIB_MEMORY_BARRIER (); + while (pw0->clear_in_process) { + CLIB_MEMORY_BARRIER (); +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d...", pw0 - am->per_worker_data); +#endif + vlib_process_suspend(vm, 0.0001); + if (pw0->interrupt_is_needed) { + send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data)); + } + } + } +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_NODE_CLEAN: cleaning done"); +#endif + clib_bitmap_free(clear_sw_if_index_bitmap); } break; default: @@ -1206,74 +1509,34 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, break; } - { - u8 tt = 0; - for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { - while((vec_len(expired) < 2*am->fa_max_deleted_sessions_per_interval) - && (~0 != am->fa_conn_list_head[tt]) - && (acl_fa_conn_time_to_check(am, now, - am->fa_conn_list_head[tt]))) { - u32 sess_id = am->fa_conn_list_head[tt]; - vec_add1(expired, sess_id); - acl_fa_conn_list_delete_session(am, sess_id); - } - } - } + send_interrupts_to_workers(vm, am); - u32 *psid = NULL; - vec_foreach (psid, expired) - { - u32 session_index = *psid; - if (!pool_is_free_index (am->fa_sessions_pool, session_index)) - { - fa_session_t *sess = am->fa_sessions_pool + session_index; - u32 sw_if_index = sess->sw_if_index; - u64 sess_timeout_time = - sess->last_active_time + fa_session_get_timeout (am, sess); - if (now < sess_timeout_time) - { - /* clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d", - (int) session_index); */ - - /* There was activity on the session, so the idle timeout - has not passed. Enqueue for another time period. */ - - acl_fa_conn_list_add_session(am, session_index, now); - - /* FIXME: When/if moving to timer wheel, - pretend we did this in the past, - at last_active moment, so the timer is accurate */ - am->fa_cleaner_cnt_timer_restarted++; - } - else - { - /* clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d", - (int) session_index); */ - acl_fa_delete_session (am, sw_if_index, session_index); - count_deleted_sessions++; - } - } - else - { - count_already_deleted++; - } - } - if (expired) - _vec_len (expired) = 0; if (event_data) _vec_len (event_data) = 0; - if (count_deleted_sessions > am->fa_max_deleted_sessions_per_interval) { - /* if there was too many sessions to delete, do less waiting around next time */ + + int interrupts_needed = 0; + int interrupts_unwanted = 0; + + vec_foreach(pw0, am->per_worker_data) { + if (pw0->interrupt_is_needed) { + interrupts_needed++; + /* the per-worker value is reset when sending the interrupt */ + } + if (pw0->interrupt_is_unwanted) { + interrupts_unwanted++; + pw0->interrupt_is_unwanted = 0; + } + } + if (interrupts_needed) { + /* they need more interrupts, do less waiting around next time */ am->fa_current_cleaner_timer_wait_interval /= 2; - } else if (count_deleted_sessions < am->fa_min_deleted_sessions_per_interval) { - /* Too few deleted sessions, slowly increase the amount of sleep up to a limit */ + } else if (interrupts_unwanted) { + /* slowly increase the amount of sleep up to a limit */ if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval) am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment; } am->fa_cleaner_cnt_event_cycles++; - am->fa_cleaner_cnt_deleted_sessions += count_deleted_sessions; - am->fa_cleaner_cnt_already_deleted += count_already_deleted; } /* NOT REACHED */ return 0; @@ -1307,6 +1570,9 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) if ((!enable_disable) && (!acl_fa_ifc_has_in_acl (am, sw_if_index)) && (!acl_fa_ifc_has_out_acl (am, sw_if_index))) { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ENABLE-DISABLE: clean the connections on interface %d", sw_if_index); +#endif vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, sw_if_index); @@ -1317,6 +1583,12 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) /* *INDENT-OFF* */ +VLIB_REGISTER_NODE (acl_fa_worker_session_cleaner_process_node, static) = { + .function = acl_fa_worker_conn_cleaner_process, + .name = "acl-plugin-fa-worker-cleaner-process", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, +}; VLIB_REGISTER_NODE (acl_fa_session_cleaner_process_node, static) = { .function = acl_fa_session_cleaner_process, diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index 86183622..a94e7db9 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -59,15 +59,29 @@ typedef struct { u8 as_u8[2]; u16 as_u16; } tcp_flags_seen; ; /* +2 bytes = 62 */ - u8 link_list_id; /* +1 bytes = 63 */ - u8 reserved1; /* +1 bytes = 64 */ - u32 link_prev_idx; - u32 link_next_idx; - u64 link_enqueue_time; - u64 reserved2[6]; + u16 thread_index; /* +2 bytes = 64 */ + u64 link_enqueue_time; /* 8 byte = 8 */ + u32 link_prev_idx; /* +4 bytes = 12 */ + u32 link_next_idx; /* +4 bytes = 16 */ + u8 link_list_id; /* +1 bytes = 17 */ + u8 reserved1[7]; /* +7 bytes = 24 */ + u64 reserved2[5]; /* +5*8 bytes = 64 */ } fa_session_t; +/* This structure is used to fill in the u64 value + in the per-sw-if-index hash table */ +typedef struct { + union { + u64 as_u64; + struct { + u32 session_index; + u16 thread_index; + u16 reserved0; + }; + }; +} fa_full_session_id_t; + /* * A few compile-time constraints on the size and the layout of the union, to ensure * it makes sense both for bihash and for us. @@ -79,10 +93,56 @@ CT_ASSERT_EQUAL(fa_l4_key_t_is_8, sizeof(fa_session_l4_key_t), sizeof(u64)); CT_ASSERT_EQUAL(fa_packet_info_t_is_8, sizeof(fa_packet_info_t), sizeof(u64)); CT_ASSERT_EQUAL(fa_l3_kv_size_is_48, sizeof(fa_5tuple_t), sizeof(clib_bihash_kv_40_8_t)); -/* Let's try to fit within the cacheline */ -CT_ASSERT_EQUAL(fa_session_t_size_is_64, sizeof(fa_session_t), 128); +/* Let's try to fit within two cachelines */ +CT_ASSERT_EQUAL(fa_session_t_size_is_128, sizeof(fa_session_t), 128); + +/* Session ID MUST be the same as u64 */ +CT_ASSERT_EQUAL(fa_full_session_id_size_is_64, sizeof(fa_full_session_id_t), sizeof(u64)); #undef CT_ASSERT_EQUAL +typedef struct { + /* The pool of sessions managed by this worker */ + fa_session_t *fa_sessions_pool; + /* per-worker ACL_N_TIMEOUTS of conn lists */ + u32 *fa_conn_list_head; + u32 *fa_conn_list_tail; + /* Vector of expired connections retrieved from lists */ + u32 *expired; + /* the earliest next expiry time */ + u64 next_expiry_time; + /* if not zero, look at all the elements until their enqueue timestamp is after below one */ + u64 requeue_until_time; + /* Current time between the checks */ + u64 current_time_wait_interval; + /* Counter of how many sessions we did delete */ + u64 cnt_deleted_sessions; + /* Counter of already deleted sessions being deleted - should not increment unless a bug */ + u64 cnt_already_deleted_sessions; + /* Number of times we requeued a session to a head of the list */ + u64 cnt_session_timer_restarted; + /* swipe up to this enqueue time, rather than following the timeouts */ + u64 swipe_end_time; + /* bitmap of sw_if_index serviced by this worker */ + uword *serviced_sw_if_index_bitmap; + /* bitmap of sw_if_indices to clear. set by main thread, cleared by worker */ + uword *pending_clear_sw_if_index_bitmap; + /* atomic, indicates that the swipe-deletion of connections is in progress */ + u32 clear_in_process; + /* Interrupt is pending from main thread */ + int interrupt_is_pending; + /* + * Interrupt node on the worker thread sets this if it knows there is + * more work to do, but it has to finish to avoid hogging the + * core for too long. + */ + int interrupt_is_needed; + /* + * Set to indicate that the interrupt node wants to get less interrupts + * because there is not enough work for the current rate. + */ + int interrupt_is_unwanted; +} acl_fa_per_worker_data_t; + typedef enum { ACL_FA_ERROR_DROP, -- cgit 1.2.3-korg From 6295d50b37ded4a335058722545dd5310202b8c0 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Tue, 6 Jun 2017 14:50:03 +0200 Subject: acl-plugin: add a plugin-specific control-ping message api and make the test code use it This fixes the undesirable pause in the dump commands in case there is nothing to dump. Change-Id: I0554556c9e442038aa2a1ed8c88234f21f7fe9b9 Signed-off-by: Andrew Yourtchenko --- src/plugins/acl/acl.api | 24 ++++++++++++++++++++++++ src/plugins/acl/acl.c | 15 +++++++++++++++ src/plugins/acl/acl.h | 2 +- src/plugins/acl/acl_test.c | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 1 deletion(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api index 3b334113..d34f374e 100644 --- a/src/plugins/acl/acl.api +++ b/src/plugins/acl/acl.api @@ -44,6 +44,30 @@ define acl_plugin_get_version_reply u32 minor; }; +/** \brief Control ping from client to api server request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define acl_plugin_control_ping +{ + u32 client_index; + u32 context; +}; + +/** \brief Control ping from the client to the server response + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param vpe_pid - the pid of the vpe, returned by the server +*/ +define acl_plugin_control_ping_reply +{ + u32 context; + i32 retval; + u32 client_index; + u32 vpe_pid; +}; + /** \brief Access List Rule entry @param is_permit - deny (0), permit (1), or permit+reflect(2) action on this rule. @param is_ipv6 - IP addresses in this rule are IPv6 (1) or IPv4 (0) diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 84ed7af6..d52f70af 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -62,6 +62,7 @@ acl_main_t acl_main; #define foreach_acl_plugin_api_msg \ _(ACL_PLUGIN_GET_VERSION, acl_plugin_get_version) \ +_(ACL_PLUGIN_CONTROL_PING, acl_plugin_control_ping) \ _(ACL_ADD_REPLACE, acl_add_replace) \ _(ACL_DEL, acl_del) \ _(ACL_INTERFACE_ADD_DEL, acl_interface_add_del) \ @@ -106,6 +107,20 @@ vl_api_acl_plugin_get_version_t_handler (vl_api_acl_plugin_get_version_t * mp) vl_msg_api_send_shmem (q, (u8 *) & rmp); } +static void +vl_api_acl_plugin_control_ping_t_handler (vl_api_acl_plugin_control_ping_t * mp) +{ + vl_api_acl_plugin_control_ping_reply_t *rmp; + acl_main_t *am = &acl_main; + int rv = 0; + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_ACL_PLUGIN_CONTROL_PING_REPLY, + ({ + rmp->vpe_pid = ntohl (getpid ()); + })); + /* *INDENT-ON* */ +} static int acl_add_list (u32 count, vl_api_acl_rule_t rules[], diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index e35e0ea7..02623a9c 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -28,7 +28,7 @@ #include "fa_node.h" #define ACL_PLUGIN_VERSION_MAJOR 1 -#define ACL_PLUGIN_VERSION_MINOR 2 +#define ACL_PLUGIN_VERSION_MINOR 3 #define UDP_SESSION_IDLE_TIMEOUT_SEC 600 #define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24) diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c index 8f6179f8..65ef8009 100644 --- a/src/plugins/acl/acl_test.c +++ b/src/plugins/acl/acl_test.c @@ -243,6 +243,22 @@ static void vl_api_macip_acl_interface_get_reply_t_handler vam->result_ready = 1; } +static void vl_api_acl_plugin_control_ping_reply_t_handler + (vl_api_acl_plugin_control_ping_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +} + /* * Table of message reply handlers, must include boilerplate handlers @@ -260,6 +276,7 @@ _(MACIP_ACL_DEL_REPLY, macip_acl_del_reply) \ _(MACIP_ACL_DETAILS, macip_acl_details) \ _(MACIP_ACL_INTERFACE_ADD_DEL_REPLY, macip_acl_interface_add_del_reply) \ _(MACIP_ACL_INTERFACE_GET_REPLY, macip_acl_interface_get_reply) \ +_(ACL_PLUGIN_CONTROL_PING_REPLY, acl_plugin_control_ping_reply) \ _(ACL_PLUGIN_GET_VERSION_REPLY, acl_plugin_get_version_reply) static int api_acl_plugin_get_version (vat_main_t * vam) @@ -728,6 +745,15 @@ static int api_acl_interface_set_acl_list (vat_main_t * vam) return ret; } +static void +api_acl_send_control_ping(vat_main_t *vam) +{ + vl_api_acl_plugin_control_ping_t *mp_ping; + + M(ACL_PLUGIN_CONTROL_PING, mp_ping); + S(mp_ping); +} + static int api_acl_interface_list_dump (vat_main_t * vam) { @@ -753,6 +779,9 @@ static int api_acl_interface_list_dump (vat_main_t * vam) /* send it... */ S(mp); + /* Use control ping for synchronization */ + api_acl_send_control_ping(vam); + /* Wait for a reply... */ W (ret); return ret; @@ -780,6 +809,9 @@ static int api_acl_dump (vat_main_t * vam) /* send it... */ S(mp); + /* Use control ping for synchronization */ + api_acl_send_control_ping(vam); + /* Wait for a reply... */ W (ret); return ret; @@ -807,6 +839,9 @@ static int api_macip_acl_dump (vat_main_t * vam) /* send it... */ S(mp); + /* Use control ping for synchronization */ + api_acl_send_control_ping(vam); + /* Wait for a reply... */ W (ret); return ret; -- cgit 1.2.3-korg From 779c3e3a632f887a7249a5cae8cce6eeacb67e3f Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Thu, 8 Jun 2017 20:03:35 +0200 Subject: acl-plugin: store sessions in a single hash table instead of a per-interface A bihash-per-interface is convenient, but turns out tricky difficult from the maintenance standpoint with the large number of interfaces. This patch makes the sessions reside in a single hash table for all the interfaces, adding the lower 16 bit of sw_if_index as part of the key into the previously unused space. There is a tradeoff, that a session with an identical 5-tuple and the same sw_if_index modulo 65536 will match on either of the interfaces. The probability of that is deemed sufficiently small to not worry about it. In case it still happens before the heat death of the universe, there is a clib_warning and the colliding packet will be dropped, at which point we will need to bump the hash key size by another u64, but rather not pay the cost of doing that right now. Change-Id: I2747839cfcceda73e597cbcafbe1e377fb8f1889 Signed-off-by: Andrew Yourtchenko --- src/plugins/acl/acl.c | 5 +++++ src/plugins/acl/acl.h | 9 ++++++--- src/plugins/acl/fa_node.c | 45 +++++++++++++++++++++++++++------------------ src/plugins/acl/fa_node.h | 2 +- 4 files changed, 39 insertions(+), 22 deletions(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 4174a570..e7b85495 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -1823,6 +1823,11 @@ acl_show_aclplugin_fn (vlib_main_t * vm, u64 n_dels = sw_if_index < vec_len(am->fa_session_dels_by_sw_if_index) ? am->fa_session_dels_by_sw_if_index[sw_if_index] : 0; out0 = format(out0, "sw_if_index %d: add %lu - del %lu = %lu\n", sw_if_index, n_adds, n_dels, n_adds - n_dels); })); + { + u64 n_adds = am->fa_session_total_adds; + u64 n_dels = am->fa_session_total_dels; + out0 = format(out0, "TOTAL: add %lu - del %lu = %lu\n", n_adds, n_dels, n_adds - n_dels); + } out0 = format(out0, "\n\nPer-worker data:\n"); for (wk = 0; wk < vec_len (am->per_worker_data); wk++) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk]; diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index 02623a9c..65bc9e74 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -135,9 +135,9 @@ typedef struct { /* bitmaps when set the processing is enabled on the interface */ uword *fa_in_acl_on_sw_if_index; uword *fa_out_acl_on_sw_if_index; - /* bitmap, when set the hash is initialized */ - uword *fa_sessions_on_sw_if_index; - clib_bihash_40_8_t *fa_sessions_by_sw_if_index; + /* bihash holding all of the sessions */ + int fa_sessions_hash_is_initialized; + clib_bihash_40_8_t fa_sessions_hash; /* The process node which orcherstrates the cleanup */ u32 fa_cleaner_node_index; /* FA session timeouts, in seconds */ @@ -145,6 +145,9 @@ typedef struct { /* session add/delete counters */ u64 *fa_session_adds_by_sw_if_index; u64 *fa_session_dels_by_sw_if_index; + /* total session adds/dels */ + u64 fa_session_total_adds; + u64 fa_session_total_dels; /* L2 datapath glue */ diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index 78b10dc9..66621b6b 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -494,9 +494,7 @@ acl_make_5tuple_session_key (int is_input, fa_5tuple_t * p5tuple_pkt, static int acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0) { - int has_sessions = - clib_bitmap_get (am->fa_sessions_on_sw_if_index, sw_if_index0); - return has_sessions; + return am->fa_sessions_hash_is_initialized; } static int @@ -594,13 +592,11 @@ acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0) sw_if_index0, am->fa_conn_table_hash_num_buckets, am->fa_conn_table_hash_memory_size); #endif - vec_validate (am->fa_sessions_by_sw_if_index, sw_if_index0); - BV (clib_bihash_init) (&am->fa_sessions_by_sw_if_index - [sw_if_index0], "ACL plugin FA session bihash", + BV (clib_bihash_init) (&am->fa_sessions_hash, + "ACL plugin FA session bihash", am->fa_conn_table_hash_num_buckets, am->fa_conn_table_hash_memory_size); - am->fa_sessions_on_sw_if_index = - clib_bitmap_set (am->fa_sessions_on_sw_if_index, sw_if_index0, 1); + am->fa_sessions_hash_is_initialized = 1; } static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index) @@ -715,7 +711,7 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t se { fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index); ASSERT(sess->thread_index == os_get_thread_index ()); - BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index], + BV (clib_bihash_add_del) (&am->fa_sessions_hash, &sess->info.kv, 0); acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index]; pool_put_index (pw->fa_sessions_pool, sess_id.session_index); @@ -723,18 +719,15 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t se as the caller must have dealt with the timers. */ vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index); am->fa_session_dels_by_sw_if_index[sw_if_index]++; + clib_smp_atomic_add(&am->fa_session_total_dels, 1); } static int acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index) { - u64 curr_sess; - vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index); - vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index); - curr_sess = - am->fa_session_adds_by_sw_if_index[sw_if_index] - - am->fa_session_dels_by_sw_if_index[sw_if_index]; - return (curr_sess < am->fa_conn_table_max_entries); + u64 curr_sess_count; + curr_sess_count = am->fa_session_total_adds - am->fa_session_total_dels; + return (curr_sess_count < am->fa_conn_table_max_entries); } static u64 @@ -889,12 +882,13 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, acl_fa_ifc_init_sessions (am, sw_if_index); } - BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index], + BV (clib_bihash_add_del) (&am->fa_sessions_hash, &kv, 1); acl_fa_conn_list_add_session(am, f_sess_id, now); vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index); am->fa_session_adds_by_sw_if_index[sw_if_index]++; + clib_smp_atomic_add(&am->fa_session_total_adds, 1); } static int @@ -902,7 +896,7 @@ acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple, clib_bihash_kv_40_8_t * pvalue_sess) { return (BV (clib_bihash_search) - (&am->fa_sessions_by_sw_if_index[sw_if_index0], &p5tuple->kv, + (&am->fa_sessions_hash, &p5tuple->kv, pvalue_sess) == 0); } @@ -977,6 +971,7 @@ acl_fa_node_fn (vlib_main_t * vm, */ acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple); + fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff; acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess); #ifdef FA_NODE_VERBOSE_DEBUG clib_warning @@ -1024,6 +1019,20 @@ acl_fa_node_fn (vlib_main_t * vm, 0x00010000 + ((0xff & old_timeout_type) << 8) + (0xff & new_timeout_type); } + /* + * I estimate the likelihood to be very low - the VPP needs + * to have >64K interfaces to start with and then on + * exactly 64K indices apart needs to be exactly the same + * 5-tuple... Anyway, since this probability is nonzero - + * print an error and drop the unlucky packet. + * If this shows up in real world, we would need to bump + * the hash key length. + */ + if (PREDICT_FALSE(sess->sw_if_index != sw_if_index0)) { + clib_warning("BUG: session LSB16(sw_if_index) and 5-tuple collision!"); + acl_check_needed = 0; + action = 0; + } } } diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index a94e7db9..671593a8 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -36,7 +36,7 @@ typedef union { struct { u16 port[2]; u16 proto; - u16 rsvd; + u16 lsb_of_sw_if_index; }; } fa_session_l4_key_t; -- cgit 1.2.3-korg From 7f4d577d6bc243f53a044d92ca9367b3f1fa170e Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Wed, 24 May 2017 13:20:47 +0200 Subject: acl-plugin: bihash-based ACL lookup Add a bihash-based ACL lookup mechanism and make it a new default. This changes the time required to lookup a 5-tuple match from O(total_N_entries) to O(total_N_mask_types), where "mask type" is an overall mask on the 5-tuple required to represent an ACE. For testing/comparison there is a temporary debug CLI "set acl-plugin use-hash-acl-matching {0|1}", which, when set to 0, makes the plugin use the "old" linear lookup, and when set to 1, makes it use the hash-based lookup. Based on the discussions on vpp-dev mailing list, prevent assigning the ACL index to an interface, when the ACL with that index is not defined, also prevent deleting an ACL if that ACL is applied. Also, for the easier debugging of the state, there are new debug CLI commands to see the ACL plugin state at several layers: "show acl-plugin acl [index N]" - show a high-level ACL representation, used for the linear lookup and as a base for building the hashtable-based lookup. Also shows if a given ACL is applied somewhere. "show acl-plugin interface [sw_if_index N]" - show which interfaces have which ACL(s) applied. "show acl-plugin tables" - a lower-level debug command used to see the state of all of the related data structures at once. There are specifiers possible, which make for a more focused and maybe augmented output: "show acl-plugin tables acl [index N]" show the "bitmask-ready" representations of the ACLs, we well as the mask types and their associated indices. "show acl-plutin tables mask" show the derived mask types and their indices only. "show acl-plugin tables applied [sw_if_index N]" show the table of all of the ACEs applied for a given sw_if_index or all interfaces. "show acl-plugin tables hash [verbose N]" show the 48x8 bihash used for the ACL lookup. Change-Id: I89fff051424cb44bcb189e3cee04c1b8f76efc28 Signed-off-by: Andrew Yourtchenko --- src/plugins/acl.am | 1 + src/plugins/acl/acl.c | 341 +++++++++++++++- src/plugins/acl/acl.h | 30 ++ src/plugins/acl/fa_node.c | 39 +- src/plugins/acl/fa_node.h | 7 +- src/plugins/acl/hash_lookup.c | 742 ++++++++++++++++++++++++++++++++++ src/plugins/acl/hash_lookup.h | 60 +++ src/plugins/acl/hash_lookup.md | 241 +++++++++++ src/plugins/acl/hash_lookup_private.h | 27 ++ src/plugins/acl/hash_lookup_types.h | 94 +++++ test/test_acl_plugin.py | 3 + test/test_acl_plugin_conns.py | 12 + test/test_acl_plugin_l2l3.py | 3 + 13 files changed, 1590 insertions(+), 10 deletions(-) create mode 100644 src/plugins/acl/hash_lookup.c create mode 100644 src/plugins/acl/hash_lookup.h create mode 100644 src/plugins/acl/hash_lookup.md create mode 100644 src/plugins/acl/hash_lookup_private.h create mode 100644 src/plugins/acl/hash_lookup_types.h (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl.am b/src/plugins/acl.am index 01e0197c..0a414481 100644 --- a/src/plugins/acl.am +++ b/src/plugins/acl.am @@ -16,6 +16,7 @@ vppplugins_LTLIBRARIES += acl_plugin.la acl_plugin_la_SOURCES = \ acl/acl.c \ + acl/hash_lookup.c \ acl/fa_node.c \ acl/l2sess.h \ acl/manual_fns.h \ diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index e7b85495..51929ad9 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -52,6 +52,7 @@ #undef vl_api_version #include "fa_node.h" +#include "hash_lookup.h" acl_main_t acl_main; @@ -154,6 +155,7 @@ acl_add_list (u32 count, vl_api_acl_rule_t rules[], for (i = 0; i < count; i++) { r = &acl_new_rules[i]; + memset(r, 0, sizeof(*r)); r->is_permit = rules[i].is_permit; r->is_ipv6 = rules[i].is_ipv6; if (r->is_ipv6) @@ -188,12 +190,14 @@ acl_add_list (u32 count, vl_api_acl_rule_t rules[], else { a = am->acls + *acl_list_index; + hash_acl_delete(am, *acl_list_index); /* Get rid of the old rules */ clib_mem_free (a->rules); } a->rules = acl_new_rules; a->count = count; memcpy (a->tag, tag, sizeof (a->tag)); + hash_acl_add(am, *acl_list_index); return 0; } @@ -209,6 +213,19 @@ acl_del_list (u32 acl_list_index) return -1; } + if (acl_list_index < vec_len(am->input_sw_if_index_vec_by_acl)) { + if (vec_len(am->input_sw_if_index_vec_by_acl[acl_list_index]) > 0) { + /* ACL is applied somewhere inbound. Refuse to delete */ + return -1; + } + } + if (acl_list_index < vec_len(am->output_sw_if_index_vec_by_acl)) { + if (vec_len(am->output_sw_if_index_vec_by_acl[acl_list_index]) > 0) { + /* ACL is applied somewhere outbound. Refuse to delete */ + return -1; + } + } + /* delete any references to the ACL */ for (i = 0; i < vec_len (am->output_acl_vec_by_sw_if_index); i++) { @@ -240,7 +257,9 @@ acl_del_list (u32 acl_list_index) } } } + /* delete the hash table data */ + hash_acl_delete(am, acl_list_index); /* now we can delete the ACL itself */ a = &am->acls[acl_list_index]; if (a->rules) @@ -619,28 +638,62 @@ acl_interface_out_enable_disable (acl_main_t * am, u32 sw_if_index, return rv; } +static int +acl_is_not_defined(acl_main_t *am, u32 acl_list_index) +{ + return (pool_is_free_index (am->acls, acl_list_index)); +} + static int acl_interface_add_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) { acl_main_t *am = &acl_main; + if (acl_is_not_defined(am, acl_list_index)) { + /* ACL is not defined. Can not apply */ + return -1; + } + if (is_input) { vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + + u32 index = vec_search(am->input_acl_vec_by_sw_if_index[sw_if_index], acl_list_index); + if (index < vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index])) { + clib_warning("ACL %d is already applied inbound on sw_if_index %d (index %d)", + acl_list_index, sw_if_index, index); + /* the entry is already there */ + return -1; + } vec_add (am->input_acl_vec_by_sw_if_index[sw_if_index], &acl_list_index, 1); + vec_validate (am->input_sw_if_index_vec_by_acl, acl_list_index); + vec_add (am->input_sw_if_index_vec_by_acl[acl_list_index], &sw_if_index, + 1); acl_interface_in_enable_disable (am, sw_if_index, 1); } else { vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + + u32 index = vec_search(am->output_acl_vec_by_sw_if_index[sw_if_index], acl_list_index); + if (index < vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index])) { + clib_warning("ACL %d is already applied outbound on sw_if_index %d (index %d)", + acl_list_index, sw_if_index, index); + /* the entry is already there */ + return -1; + } vec_add (am->output_acl_vec_by_sw_if_index[sw_if_index], &acl_list_index, 1); + vec_validate (am->output_sw_if_index_vec_by_acl, acl_list_index); + vec_add (am->output_sw_if_index_vec_by_acl[acl_list_index], &sw_if_index, + 1); acl_interface_out_enable_disable (am, sw_if_index, 1); } return 0; } + static int acl_interface_del_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) { @@ -661,6 +714,15 @@ acl_interface_del_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) break; } } + + if (acl_list_index < vec_len(am->input_sw_if_index_vec_by_acl)) { + u32 index = vec_search(am->input_sw_if_index_vec_by_acl[acl_list_index], sw_if_index); + if (index < vec_len(am->input_sw_if_index_vec_by_acl[acl_list_index])) { + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); + vec_del1 (am->input_sw_if_index_vec_by_acl[acl_list_index], index); + } + } + if (0 == vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index])) { acl_interface_in_enable_disable (am, sw_if_index, 0); @@ -680,6 +742,15 @@ acl_interface_del_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) break; } } + + if (acl_list_index < vec_len(am->output_sw_if_index_vec_by_acl)) { + u32 index = vec_search(am->output_sw_if_index_vec_by_acl[acl_list_index], sw_if_index); + if (index < vec_len(am->output_sw_if_index_vec_by_acl[acl_list_index])) { + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); + vec_del1 (am->output_sw_if_index_vec_by_acl[acl_list_index], index); + } + } + if (0 == vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index])) { acl_interface_out_enable_disable (am, sw_if_index, 0); @@ -692,16 +763,41 @@ static void acl_interface_reset_inout_acls (u32 sw_if_index, u8 is_input) { acl_main_t *am = &acl_main; + int i; if (is_input) { acl_interface_in_enable_disable (am, sw_if_index, 0); vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + + for(i = vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index])-1; i>=0; i--) { + u32 acl_list_index = am->input_acl_vec_by_sw_if_index[sw_if_index][i]; + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); + if (acl_list_index < vec_len(am->input_sw_if_index_vec_by_acl)) { + u32 index = vec_search(am->input_sw_if_index_vec_by_acl[acl_list_index], sw_if_index); + if (index < vec_len(am->input_sw_if_index_vec_by_acl[acl_list_index])) { + vec_del1 (am->input_sw_if_index_vec_by_acl[acl_list_index], index); + } + } + } + vec_reset_length (am->input_acl_vec_by_sw_if_index[sw_if_index]); } else { acl_interface_out_enable_disable (am, sw_if_index, 0); vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + + for(i = vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index])-1; i>=0; i--) { + u32 acl_list_index = am->output_acl_vec_by_sw_if_index[sw_if_index][i]; + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); + if (acl_list_index < vec_len(am->output_sw_if_index_vec_by_acl)) { + u32 index = vec_search(am->output_sw_if_index_vec_by_acl[acl_list_index], sw_if_index); + if (index < vec_len(am->output_sw_if_index_vec_by_acl[acl_list_index])) { + vec_del1 (am->output_sw_if_index_vec_by_acl[acl_list_index], index); + } + } + } + vec_reset_length (am->output_acl_vec_by_sw_if_index[sw_if_index]); } } @@ -711,13 +807,19 @@ acl_interface_add_del_inout_acl (u32 sw_if_index, u8 is_add, u8 is_input, u32 acl_list_index) { int rv = -1; + acl_main_t *am = &acl_main; if (is_add) { rv = acl_interface_add_inout_acl (sw_if_index, is_input, acl_list_index); + if (rv == 0) + { + hash_acl_apply(am, sw_if_index, is_input, acl_list_index); + } } else { + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); rv = acl_interface_del_inout_acl (sw_if_index, is_input, acl_list_index); } @@ -1198,9 +1300,18 @@ vl_api_acl_interface_set_acl_list_t_handler for (i = 0; i < mp->count; i++) { - acl_interface_add_del_inout_acl (sw_if_index, 1, (i < mp->n_input), - ntohl (mp->acls[i])); + if(acl_is_not_defined(am, ntohl (mp->acls[i]))) { + /* ACL does not exist, so we can not apply it */ + rv = -1; + } } + if (0 == rv) { + for (i = 0; i < mp->count; i++) + { + acl_interface_add_del_inout_acl (sw_if_index, 1, (i < mp->n_input), + ntohl (mp->acls[i])); + } + } } REPLY_MACRO (VL_API_ACL_INTERFACE_SET_ACL_LIST_REPLY); @@ -1706,6 +1817,11 @@ acl_set_aclplugin_fn (vlib_main_t * vm, } goto done; } + if (unformat (input, "use-hash-acl-matching %u", &val)) + { + am->use_hash_acl_matching = (val !=0); + goto done; + } if (unformat (input, "l4-match-nonfirst-fragment %u", &val)) { am->l4_match_nonfirst_fragment = (val != 0); @@ -1800,7 +1916,6 @@ done: return error; } - static clib_error_t * acl_show_aclplugin_fn (vlib_main_t * vm, unformat_input_t * input, @@ -1809,13 +1924,16 @@ acl_show_aclplugin_fn (vlib_main_t * vm, clib_error_t *error = 0; acl_main_t *am = &acl_main; vnet_interface_main_t *im = &am->vnet_main->interface_main; + u32 *pj; vnet_sw_interface_t *swif; if (unformat (input, "sessions")) { - u8 * out0 = 0; + u8 * out0 = format(0, ""); u16 wk; + u32 show_bihash_verbose = 0; + unformat (input, "verbose %u", &show_bihash_verbose); pool_foreach (swif, im->sw_interfaces, ({ u32 sw_if_index = swif->sw_if_index; @@ -1856,6 +1974,215 @@ acl_show_aclplugin_fn (vlib_main_t * vm, am->fa_cleaner_wait_time_increment * 1000.0, ((f64)am->fa_current_cleaner_timer_wait_interval) * 1000.0/(f64)vm->clib_time.clocks_per_second); vec_free(out0); + show_fa_sessions_hash(vm, show_bihash_verbose); + } + else if (unformat (input, "interface")) + { + u32 sw_if_index = ~0; + u32 swi; + u8 * out0 = format(0, ""); + unformat (input, "sw_if_index %u", &sw_if_index); + for(swi = 0; (swi < vec_len(am->input_acl_vec_by_sw_if_index)) || + (swi < vec_len(am->output_acl_vec_by_sw_if_index)); swi++) { + out0 = format(out0, "sw_if_index %d:\n", swi); + + if ((swi < vec_len(am->input_acl_vec_by_sw_if_index)) && + (vec_len(am->input_acl_vec_by_sw_if_index[swi]) > 0)) { + out0 = format(out0, " input acl(s): "); + vec_foreach(pj, am->input_acl_vec_by_sw_if_index[swi]) { + out0 = format(out0, "%d ", *pj); + } + out0 = format(out0, "\n"); + } + + if ((swi < vec_len(am->output_acl_vec_by_sw_if_index)) && + (vec_len(am->output_acl_vec_by_sw_if_index[swi]) > 0)) { + out0 = format(out0, " output acl(s): "); + vec_foreach(pj, am->output_acl_vec_by_sw_if_index[swi]) { + out0 = format(out0, "%d ", *pj); + } + out0 = format(out0, "\n"); + } + + } + vlib_cli_output(vm, "\n%s\n", out0); + vec_free(out0); + } + else if (unformat (input, "acl")) + { + u32 acl_index = ~0; + u32 i; + u8 * out0 = format(0, ""); + unformat (input, "index %u", &acl_index); + for(i=0; iacls); i++) { + if (acl_is_not_defined(am, i)) { + /* don't attempt to show the ACLs that do not exist */ + continue; + } + if ((acl_index != ~0) && (acl_index != i)) { + continue; + } + out0 = format(out0, "acl-index %u count %u tag {%s}\n", i, am->acls[i].count, am->acls[i].tag); + acl_rule_t *r; + int j; + for(j=0; jacls[i].count; j++) { + r = &am->acls[i].rules[j]; + out0 = format(out0, " %4d: %s ", j, r->is_ipv6 ? "ipv6" : "ipv4"); + out0 = format_acl_action(out0, r->is_permit); + out0 = format(out0, " src %U/%d", format_ip46_address, &r->src, IP46_TYPE_ANY, r->src_prefixlen); + out0 = format(out0, " dst %U/%d", format_ip46_address, &r->dst, IP46_TYPE_ANY, r->dst_prefixlen); + out0 = format(out0, " proto %d", r->proto); + out0 = format(out0, " sport %d", r->src_port_or_type_first); + if (r->src_port_or_type_first != r->src_port_or_type_last) { + out0 = format(out0, "-%d", r->src_port_or_type_last); + } + out0 = format(out0, " dport %d", r->dst_port_or_code_first); + if (r->dst_port_or_code_first != r->dst_port_or_code_last) { + out0 = format(out0, "-%d", r->dst_port_or_code_last); + } + if (r->tcp_flags_mask || r->tcp_flags_value) { + out0 = format(out0, " tcpflags %d mask %d", r->tcp_flags_value, r->tcp_flags_mask); + } + out0 = format(out0, "\n"); + } + + if (iinput_sw_if_index_vec_by_acl)) { + out0 = format(out0, " applied inbound on sw_if_index: "); + vec_foreach(pj, am->input_sw_if_index_vec_by_acl[i]) { + out0 = format(out0, "%d ", *pj); + } + out0 = format(out0, "\n"); + } + if (ioutput_sw_if_index_vec_by_acl)) { + out0 = format(out0, " applied outbound on sw_if_index: "); + vec_foreach(pj, am->output_sw_if_index_vec_by_acl[i]) { + out0 = format(out0, "%d ", *pj); + } + out0 = format(out0, "\n"); + } + } + vlib_cli_output(vm, "\n%s\n", out0); + vec_free(out0); + } + else if (unformat (input, "tables")) + { + ace_mask_type_entry_t *mte; + u32 acl_index = ~0; + u32 sw_if_index = ~0; + int show_acl_hash_info = 0; + int show_applied_info = 0; + int show_mask_type = 0; + int show_bihash = 0; + u32 show_bihash_verbose = 0; + + if (unformat (input, "acl")) { + show_mask_type = 1; + unformat (input, "index %u", &acl_index); + } else if (unformat (input, "applied")) { + show_applied_info = 1; + unformat (input, "sw_if_index %u", &sw_if_index); + } else if (unformat (input, "mask")) { + show_mask_type = 1; + } else if (unformat (input, "hash")) { + show_bihash = 1; + unformat (input, "verbose %u", &show_bihash_verbose); + } + + if ( ! (show_mask_type || show_acl_hash_info || show_applied_info || show_bihash) ) { + /* if no qualifiers specified, show all */ + show_mask_type = 1; + show_acl_hash_info = 1; + show_applied_info = 1; + show_bihash = 1; + } + + if (show_mask_type) { + vlib_cli_output(vm, "Mask-type entries:"); + /* *INDENT-OFF* */ + pool_foreach(mte, am->ace_mask_type_pool, + ({ + vlib_cli_output(vm, " %3d: %016llx %016llx %016llx %016llx %016llx %016llx refcount %d", + mte - am->ace_mask_type_pool, + mte->mask.kv.key[0], mte->mask.kv.key[1], mte->mask.kv.key[2], + mte->mask.kv.key[3], mte->mask.kv.key[4], mte->mask.kv.value, mte->refcount); + })); + /* *INDENT-ON* */ + } + + if (show_acl_hash_info) { + u32 i,j; + u8 * out0 = format(0, ""); + u64 *m; + out0 = format(out0, "Mask-ready ACL representations\n"); + for (i=0; i< vec_len(am->hash_acl_infos); i++) { + if ((acl_index != ~0) && (acl_index != i)) { + continue; + } + hash_acl_info_t *ha = &am->hash_acl_infos[i]; + out0 = format(out0, "acl-index %u bitmask-ready layout\n", i); + out0 = format(out0, " mask type index bitmap: %U\n", format_bitmap_hex, ha->mask_type_index_bitmap); + for(j=0; jrules); j++) { + hash_ace_info_t *pa = &ha->rules[j]; + m = (u64 *)&pa->match; + out0 = format(out0, " %4d: %016llx %016llx %016llx %016llx %016llx %016llx mask index %d acl %d rule %d action %d src/dst portrange not ^2: %d,%d\n", + j, m[0], m[1], m[2], m[3], m[4], m[5], pa->mask_type_index, + pa->acl_index, pa->ace_index, pa->action, + pa->src_portrange_not_powerof2, pa->dst_portrange_not_powerof2); + } + } + vlib_cli_output(vm, "\n%s\n", out0); + vec_free(out0); + } + + if (show_applied_info) { + u32 swi, j; + u8 * out0 = format(0, ""); + out0 = format(out0, "Applied lookup entries for interfaces\n"); + + for(swi = 0; (swi < vec_len(am->input_applied_hash_acl_info_by_sw_if_index)) || + (swi < vec_len(am->output_applied_hash_acl_info_by_sw_if_index)) || + (swi < vec_len(am->input_hash_entry_vec_by_sw_if_index)) || + (swi < vec_len(am->output_hash_entry_vec_by_sw_if_index)); swi++) { + if ((sw_if_index != ~0) && (sw_if_index != swi)) { + continue; + } + out0 = format(out0, "sw_if_index %d:\n", swi); + if (swi < vec_len(am->input_applied_hash_acl_info_by_sw_if_index)) { + applied_hash_acl_info_t *pal = &am->input_applied_hash_acl_info_by_sw_if_index[swi]; + out0 = format(out0, " input lookup mask_type_index_bitmap: %U\n", format_bitmap_hex, pal->mask_type_index_bitmap); + } + if (swi < vec_len(am->input_hash_entry_vec_by_sw_if_index)) { + out0 = format(out0, " input lookup applied entries:\n"); + for(j=0; jinput_hash_entry_vec_by_sw_if_index[swi]); j++) { + applied_hash_ace_entry_t *pae = &am->input_hash_entry_vec_by_sw_if_index[swi][j]; + out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d\n", + j, pae->acl_index, pae->ace_index, pae->action, pae->hash_ace_info_index, + pae->next_applied_entry_index, pae->prev_applied_entry_index); + } + } + + if (swi < vec_len(am->output_applied_hash_acl_info_by_sw_if_index)) { + applied_hash_acl_info_t *pal = &am->output_applied_hash_acl_info_by_sw_if_index[swi]; + out0 = format(out0, " output lookup mask_type_index_bitmap: %U\n", format_bitmap_hex, pal->mask_type_index_bitmap); + } + if (swi < vec_len(am->output_hash_entry_vec_by_sw_if_index)) { + out0 = format(out0, " output lookup applied entries:\n"); + for(j=0; joutput_hash_entry_vec_by_sw_if_index[swi]); j++) { + applied_hash_ace_entry_t *pae = &am->output_hash_entry_vec_by_sw_if_index[swi][j]; + out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d\n", + j, pae->acl_index, pae->ace_index, pae->action, pae->hash_ace_info_index, + pae->next_applied_entry_index, pae->prev_applied_entry_index); + } + } + + } + vlib_cli_output(vm, "\n%s\n", out0); + vec_free(out0); + } + + if (show_bihash) { + show_hash_acl_hash(vm, am, show_bihash_verbose); + } } return error; } @@ -1870,7 +2197,7 @@ VLIB_CLI_COMMAND (aclplugin_set_command, static) = { VLIB_CLI_COMMAND (aclplugin_show_command, static) = { .path = "show acl-plugin", - .short_help = "show acl-plugin sessions", + .short_help = "show acl-plugin {sessions|acl|interface|tables}", .function = acl_show_aclplugin_fn, }; /* *INDENT-ON* */ @@ -1940,6 +2267,10 @@ acl_init (vlib_main_t * vm) am->l4_match_nonfirst_fragment = 1; + /* use the new fancy hash-based matching */ + // NOT IMMEDIATELY + am->use_hash_acl_matching = 1; + return error; } diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index 65bc9e74..c9f403e9 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -24,8 +24,10 @@ #include #include #include +#include #include "bihash_40_8.h" #include "fa_node.h" +#include "hash_lookup_types.h" #define ACL_PLUGIN_VERSION_MAJOR 1 #define ACL_PLUGIN_VERSION_MINOR 3 @@ -109,17 +111,45 @@ typedef struct u32 l2_table_index; } macip_acl_list_t; +/* + * An element describing a particular configuration fo the mask, + * and how many times it has been used. + */ +typedef struct +{ + fa_5tuple_t mask; + u32 refcount; +} ace_mask_type_entry_t; + typedef struct { /* API message ID base */ u16 msg_id_base; acl_list_t *acls; /* Pool of ACLs */ + hash_acl_info_t *hash_acl_infos; /* corresponding hash matching housekeeping info */ + clib_bihash_48_8_t acl_lookup_hash; /* ACL lookup hash table. */ + int acl_lookup_hash_initialized; + applied_hash_ace_entry_t **input_hash_entry_vec_by_sw_if_index; + applied_hash_ace_entry_t **output_hash_entry_vec_by_sw_if_index; + applied_hash_acl_info_t *input_applied_hash_acl_info_by_sw_if_index; + applied_hash_acl_info_t *output_applied_hash_acl_info_by_sw_if_index; + macip_acl_list_t *macip_acls; /* Pool of MAC-IP ACLs */ /* ACLs associated with interfaces */ u32 **input_acl_vec_by_sw_if_index; u32 **output_acl_vec_by_sw_if_index; + /* interfaces on which given ACLs are applied */ + u32 **input_sw_if_index_vec_by_acl; + u32 **output_sw_if_index_vec_by_acl; + + /* Do we use hash-based ACL matching or linear */ + int use_hash_acl_matching; + + /* a pool of all mask types present in all ACEs */ + ace_mask_type_entry_t *ace_mask_type_pool; + /* * Classify tables used to grab the packets for the ACL check, * and serving as the 5-tuple session tables at the same time diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index 66621b6b..bfb2fc1e 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -26,6 +26,7 @@ #include #include "fa_node.h" +#include "hash_lookup.h" typedef struct { @@ -136,7 +137,7 @@ fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6) } int -acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple, +single_acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple, int is_ip6, u8 * r_action, u32 * r_acl_match_p, u32 * r_rule_match_p, u32 * trace_bitmap) { @@ -259,7 +260,7 @@ acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple, } static u8 -full_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, +linear_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, int is_ip6, int is_input, u32 * acl_match_p, u32 * rule_match_p, u32 * trace_bitmap) { @@ -284,7 +285,7 @@ full_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d", acl_vector[i]); #endif - if (acl_match_5tuple + if (single_acl_match_5tuple (am, acl_vector[i], pkt_5tuple, is_ip6, &action, acl_match_p, rule_match_p, trace_bitmap)) { @@ -303,6 +304,21 @@ full_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, return 0; } +static u8 +multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap) +{ + acl_main_t *am = &acl_main; + if (am->use_hash_acl_matching) { + return hash_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6, + is_input, acl_match_p, rule_match_p, trace_bitmap); + } else { + return linear_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6, + is_input, acl_match_p, rule_match_p, trace_bitmap); + } +} + static int offset_within_packet (vlib_buffer_t * b0, int offset) { @@ -973,6 +989,10 @@ acl_fa_node_fn (vlib_main_t * vm, acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple); fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff; acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess); + fa_5tuple.pkt.sw_if_index = sw_if_index0; + fa_5tuple.pkt.is_ip6 = is_ip6; + fa_5tuple.pkt.is_input = is_input; + fa_5tuple.pkt.mask_type_index_lsb = ~0; #ifdef FA_NODE_VERBOSE_DEBUG clib_warning ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx", @@ -1039,7 +1059,7 @@ acl_fa_node_fn (vlib_main_t * vm, if (acl_check_needed) { action = - full_acl_match_5tuple (sw_if_index0, &fa_5tuple, is_l2_path, + multi_acl_match_5tuple (sw_if_index0, &fa_5tuple, is_l2_path, is_ip6, is_input, &match_acl_in_index, &match_rule_index, &trace_bitmap); error0 = action; @@ -1590,6 +1610,17 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) } } +void +show_fa_sessions_hash(vlib_main_t * vm, u32 verbose) +{ + acl_main_t *am = &acl_main; + if (am->fa_sessions_hash_is_initialized) { + vlib_cli_output(vm, "\nSession lookup hash table:\n%U\n\n", + BV (format_bihash), &am->fa_sessions_hash, verbose); + } else { + vlib_cli_output(vm, "\nSession lookup hash table is not allocated.\n\n"); + } +} /* *INDENT-OFF* */ diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index 671593a8..e4cd2ab5 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -22,12 +22,15 @@ typedef union { u64 as_u64; struct { + u32 sw_if_index; + u16 mask_type_index_lsb; u8 tcp_flags; u8 tcp_flags_valid:1; u8 is_input:1; u8 l4_valid:1; u8 is_nonfirst_fragment:1; - u8 flags_reserved:4; + u8 is_ip6:1; + u8 flags_reserved:3; }; } fa_packet_info_t; @@ -158,5 +161,7 @@ enum void acl_fa_enable_disable(u32 sw_if_index, int is_input, int enable_disable); +void show_fa_sessions_hash(vlib_main_t * vm, u32 verbose); + #endif diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c new file mode 100644 index 00000000..8e8c5274 --- /dev/null +++ b/src/plugins/acl/hash_lookup.c @@ -0,0 +1,742 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "hash_lookup.h" +#include "hash_lookup_private.h" + +/* + * This returns true if there is indeed a match on the portranges. + * With all these levels of indirections, this is not going to be very fast, + * so, best use the individual ports or wildcard ports for performance. + */ +static int +match_portranges(acl_main_t *am, fa_5tuple_t *match, u32 index) +{ + applied_hash_ace_entry_t **applied_hash_aces = match->pkt.is_input ? &am->input_hash_entry_vec_by_sw_if_index[match->pkt.sw_if_index] : + &am->output_hash_entry_vec_by_sw_if_index[match->pkt.sw_if_index]; + applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[index]); + + // hash_acl_info_t *ha = &am->hash_acl_infos[pae->acl_index]; + acl_rule_t *r = &(am->acls[pae->acl_index].rules[pae->ace_index]); + DBG("PORTMATCH: %d <= %d <= %d && %d <= %d <= %d ?", + r->src_port_or_type_first, match->l4.port[0], r->src_port_or_type_last, + r->dst_port_or_code_first, match->l4.port[1], r->dst_port_or_code_last); + + return ( ((r->src_port_or_type_first <= match->l4.port[0]) && r->src_port_or_type_last >= match->l4.port[0]) && + ((r->dst_port_or_code_first <= match->l4.port[1]) && r->dst_port_or_code_last >= match->l4.port[1]) ); +} + +static u32 +multi_acl_match_get_applied_ace_index(acl_main_t *am, fa_5tuple_t *match) +{ + clib_bihash_kv_48_8_t kv; + clib_bihash_kv_48_8_t result; + fa_5tuple_t *kv_key = (fa_5tuple_t *)kv.key; + hash_acl_lookup_value_t *result_val = (hash_acl_lookup_value_t *)&result.value; + u64 *pmatch = (u64 *)match; + u64 *pmask; + u64 *pkey; + int mask_type_index; + u32 curr_match_index = ~0; + + u32 sw_if_index = match->pkt.sw_if_index; + u8 is_input = match->pkt.is_input; + applied_hash_ace_entry_t **applied_hash_aces = is_input ? &am->input_hash_entry_vec_by_sw_if_index[sw_if_index] : + &am->output_hash_entry_vec_by_sw_if_index[sw_if_index]; + applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index : + &am->output_applied_hash_acl_info_by_sw_if_index; + + DBG("TRYING TO MATCH: %016llx %016llx %016llx %016llx %016llx %016llx", + pmatch[0], pmatch[1], pmatch[2], pmatch[3], pmatch[4], pmatch[5]); + + for(mask_type_index=0; mask_type_index < pool_len(am->ace_mask_type_pool); mask_type_index++) { + if (!clib_bitmap_get((*applied_hash_acls)[sw_if_index].mask_type_index_bitmap, mask_type_index)) { + /* This bit is not set. Avoid trying to match */ + continue; + } + ace_mask_type_entry_t *mte = &am->ace_mask_type_pool[mask_type_index]; + pmatch = (u64 *)match; + pmask = (u64 *)&mte->mask; + pkey = (u64 *)kv.key; + /* + * unrolling the below loop results in a noticeable performance increase. + int i; + for(i=0; i<6; i++) { + kv.key[i] = pmatch[i] & pmask[i]; + } + */ + + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + + kv_key->pkt.mask_type_index_lsb = mask_type_index; + DBG(" KEY %3d: %016llx %016llx %016llx %016llx %016llx %016llx", mask_type_index, + kv.key[0], kv.key[1], kv.key[2], kv.key[3], kv.key[4], kv.key[5]); + int res = BV (clib_bihash_search) (&am->acl_lookup_hash, &kv, &result); + if (res == 0) { + DBG("ACL-MATCH! result_val: %016llx", result_val->as_u64); + if (result_val->applied_entry_index < curr_match_index) { + if (PREDICT_FALSE(result_val->need_portrange_check)) { + /* + * This is going to be slow, since we can have multiple superset + * entries for narrow-ish portranges, e.g.: + * 0..42 100..400, 230..60000, + * so we need to walk linearly and check if they match. + */ + + u32 curr_index = result_val->applied_entry_index; + while ((curr_index != ~0) && !match_portranges(am, match, curr_index)) { + /* while no match and there are more entries, walk... */ + applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[curr_index]); + DBG("entry %d did not portmatch, advancing to %d", curr_index, pae->next_applied_entry_index); + curr_index = pae->next_applied_entry_index; + } + if (curr_index < curr_match_index) { + DBG("The index %d is the new candidate in portrange matches.", curr_index); + curr_match_index = result_val->applied_entry_index; + if (!result_val->shadowed) { + /* new result is known to not be shadowed, so no point to look up further */ + break; + } + } else { + DBG("Curr portmatch index %d is too big vs. current matched one %d", curr_index, curr_match_index); + } + } else { + /* The usual path is here. Found an entry in front of the current candiate - so it's a new one */ + DBG("This match is the new candidate"); + curr_match_index = result_val->applied_entry_index; + if (!result_val->shadowed) { + /* new result is known to not be shadowed, so no point to look up further */ + break; + } + } + } + } + } + DBG("MATCH-RESULT: %d", curr_match_index); + return curr_match_index; +} + +static void +hashtable_add_del(acl_main_t *am, clib_bihash_kv_48_8_t *kv, int is_add) +{ + DBG("HASH ADD/DEL: %016llx %016llx %016llx %016llx %016llx %016llx add %d", + kv->key[0], kv->key[1], kv->key[2], + kv->key[3], kv->key[4], kv->key[5], is_add); + BV (clib_bihash_add_del) (&am->acl_lookup_hash, kv, is_add); +} + +static void +fill_applied_hash_ace_kv(acl_main_t *am, + applied_hash_ace_entry_t **applied_hash_aces, + u32 sw_if_index, u8 is_input, + u32 new_index, clib_bihash_kv_48_8_t *kv) +{ + fa_5tuple_t *kv_key = (fa_5tuple_t *)kv->key; + hash_acl_lookup_value_t *kv_val = (hash_acl_lookup_value_t *)&kv->value; + applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[new_index]); + hash_acl_info_t *ha = &am->hash_acl_infos[pae->acl_index]; + + memcpy(kv_key, &ha->rules[pae->hash_ace_info_index].match, sizeof(*kv_key)); + /* initialize the sw_if_index and direction */ + kv_key->pkt.sw_if_index = sw_if_index; + kv_key->pkt.is_input = is_input; + kv_val->as_u64 = 0; + kv_val->applied_entry_index = new_index; + kv_val->need_portrange_check = ha->rules[pae->hash_ace_info_index].src_portrange_not_powerof2 || + ha->rules[pae->hash_ace_info_index].dst_portrange_not_powerof2; + /* by default assume all values are shadowed -> check all mask types */ + kv_val->shadowed = 1; +} + +static void +add_del_hashtable_entry(acl_main_t *am, + u32 sw_if_index, u8 is_input, + applied_hash_ace_entry_t **applied_hash_aces, + u32 index, int is_add) +{ + clib_bihash_kv_48_8_t kv; + + fill_applied_hash_ace_kv(am, applied_hash_aces, sw_if_index, is_input, index, &kv); + hashtable_add_del(am, &kv, is_add); +} + + + +static void +activate_applied_ace_hash_entry(acl_main_t *am, + u32 sw_if_index, u8 is_input, + applied_hash_ace_entry_t **applied_hash_aces, + u32 new_index) +{ + clib_bihash_kv_48_8_t kv; + applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[new_index]); + + fill_applied_hash_ace_kv(am, applied_hash_aces, sw_if_index, is_input, new_index, &kv); + + DBG("APPLY ADD KY: %016llx %016llx %016llx %016llx %016llx %016llx", + kv.key[0], kv.key[1], kv.key[2], + kv.key[3], kv.key[4], kv.key[5]); + + clib_bihash_kv_48_8_t result; + hash_acl_lookup_value_t *result_val = (hash_acl_lookup_value_t *)&result.value; + int res = BV (clib_bihash_search) (&am->acl_lookup_hash, &kv, &result); + if (res == 0) { + /* There already exists an entry or more. Append at the end. */ + u32 existing_index = result_val->applied_entry_index; + DBG("A key already exists, with applied entry index: %d", existing_index); + ASSERT(existing_index != ~0); + applied_hash_ace_entry_t *existing_pae = &((*applied_hash_aces)[existing_index]); + /* walk the list to the last element */ + while (existing_pae->next_applied_entry_index != ~0) { + if (existing_index == existing_pae->next_applied_entry_index) { + DBG("existing and next index of entry %d are the same, abort", existing_index); + ASSERT(existing_index != existing_pae->next_applied_entry_index); + } + existing_index = existing_pae->next_applied_entry_index; + existing_pae = &((*applied_hash_aces)[existing_index]); + DBG("...advance to chained entry index: %d", existing_index); + } + /* link ourseves in */ + existing_pae->next_applied_entry_index = new_index; + pae->prev_applied_entry_index = existing_index; + } else { + /* It's the very first entry */ + hashtable_add_del(am, &kv, 1); + } +} + +static void +applied_hash_entries_analyze(acl_main_t *am, applied_hash_ace_entry_t **applied_hash_aces) +{ + /* + * Go over the rules and check which ones are shadowed and which aren't. + * Naive approach: try to match the match value from every ACE as if it + * was a live packet, and see if the resulting match happens earlier in the list. + * if it does not match or it is later in the ACL - then the entry is not shadowed. + * + * This approach fails, an example: + * deny tcp 2001:db8::/32 2001:db8::/32 + * permit ip 2001:db8::1/128 2001:db8::2/128 + */ +} + +void +hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) +{ + int i; + + DBG("HASH ACL apply: sw_if_index %d is_input %d acl %d", sw_if_index, is_input, acl_index); + u32 *acl_vec = is_input ? am->input_acl_vec_by_sw_if_index[sw_if_index] : + am->output_acl_vec_by_sw_if_index[sw_if_index]; + if (is_input) { + vec_validate(am->input_hash_entry_vec_by_sw_if_index, sw_if_index); + } else { + vec_validate(am->output_hash_entry_vec_by_sw_if_index, sw_if_index); + } + vec_validate(am->hash_acl_infos, acl_index); + applied_hash_ace_entry_t **applied_hash_aces = is_input ? &am->input_hash_entry_vec_by_sw_if_index[sw_if_index] : + &am->output_hash_entry_vec_by_sw_if_index[sw_if_index]; + u32 order_index = vec_search(acl_vec, acl_index); + hash_acl_info_t *ha = &am->hash_acl_infos[acl_index]; + ASSERT(order_index != ~0); + + if (!am->acl_lookup_hash_initialized) { + BV (clib_bihash_init) (&am->acl_lookup_hash, "ACL plugin rule lookup bihash", + 65536, 2 << 25); + am->acl_lookup_hash_initialized = 1; + } + int base_offset = vec_len(*applied_hash_aces); + + /* Update the bitmap of the mask types with which the lookup + needs to happen for the ACLs applied to this sw_if_index */ + applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index : + &am->output_applied_hash_acl_info_by_sw_if_index; + vec_validate((*applied_hash_acls), sw_if_index); + applied_hash_acl_info_t *pal = &(*applied_hash_acls)[sw_if_index]; + pal->mask_type_index_bitmap = clib_bitmap_or(pal->mask_type_index_bitmap, + ha->mask_type_index_bitmap); + /* + * if the applied ACL is empty, the current code will cause a + * different behavior compared to current linear search: an empty ACL will + * simply fallthrough to the next ACL, or the default deny in the end. + * + * This is not a problem, because after vpp-dev discussion, + * the consensus was it should not be possible to apply the non-existent + * ACL, so the change adding this code also takes care of that. + */ + + /* expand the applied aces vector by the necessary amount */ + vec_resize((*applied_hash_aces), vec_len(ha->rules)); + + /* add the rules from the ACL to the hash table for lookup and append to the vector*/ + for(i=0; i < vec_len(ha->rules); i++) { + u32 new_index = base_offset + i; + applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[new_index]); + pae->acl_index = acl_index; + pae->ace_index = ha->rules[i].ace_index; + pae->action = ha->rules[i].action; + pae->hash_ace_info_index = i; + /* we might link it in later */ + pae->next_applied_entry_index = ~0; + pae->prev_applied_entry_index = ~0; + activate_applied_ace_hash_entry(am, sw_if_index, is_input, applied_hash_aces, new_index); + } + applied_hash_entries_analyze(am, applied_hash_aces); +} + +static void +move_applied_ace_hash_entry(acl_main_t *am, + u32 sw_if_index, u8 is_input, + applied_hash_ace_entry_t **applied_hash_aces, + u32 old_index, u32 new_index) +{ + + /* move the entry */ + (*applied_hash_aces)[new_index] = (*applied_hash_aces)[old_index]; + + /* update the linkage and hash table if necessary */ + applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[old_index]); + + if (pae->prev_applied_entry_index != ~0) { + applied_hash_ace_entry_t *prev_pae = &((*applied_hash_aces)[pae->prev_applied_entry_index]); + ASSERT(prev_pae->next_applied_entry_index == old_index); + prev_pae->next_applied_entry_index = new_index; + } else { + /* first entry - so the hash points to it, update */ + add_del_hashtable_entry(am, sw_if_index, is_input, + applied_hash_aces, new_index, 1); + } + if (pae->next_applied_entry_index != ~0) { + applied_hash_ace_entry_t *next_pae = &((*applied_hash_aces)[pae->next_applied_entry_index]); + ASSERT(next_pae->prev_applied_entry_index == old_index); + next_pae->prev_applied_entry_index = new_index; + } + /* invalidate the old entry */ + pae->prev_applied_entry_index = ~0; + pae->next_applied_entry_index = ~0; +} + +static void +deactivate_applied_ace_hash_entry(acl_main_t *am, + u32 sw_if_index, u8 is_input, + applied_hash_ace_entry_t **applied_hash_aces, + u32 old_index) +{ + applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[old_index]); + + if (pae->next_applied_entry_index != ~0) { + applied_hash_ace_entry_t *next_pae = &((*applied_hash_aces)[pae->next_applied_entry_index]); + ASSERT(next_pae->prev_applied_entry_index == old_index); + next_pae->prev_applied_entry_index = pae->prev_applied_entry_index; + } + + if (pae->prev_applied_entry_index != ~0) { + applied_hash_ace_entry_t *prev_pae = &((*applied_hash_aces)[pae->prev_applied_entry_index]); + ASSERT(prev_pae->next_applied_entry_index == old_index); + prev_pae->next_applied_entry_index = pae->next_applied_entry_index; + } else { + /* It was the first entry. We need either to reset the hash entry or delete it */ + if (pae->next_applied_entry_index != ~0) { + add_del_hashtable_entry(am, sw_if_index, is_input, + applied_hash_aces, pae->next_applied_entry_index, 1); + } else { + /* no next entry, so just delete the entry in the hash table */ + add_del_hashtable_entry(am, sw_if_index, is_input, + applied_hash_aces, old_index, 0); + } + } +} + + +static void +hash_acl_build_applied_lookup_bitmap(acl_main_t *am, u32 sw_if_index, u8 is_input) +{ + int i; + uword *new_lookup_bitmap = 0; + u32 **applied_acls = is_input ? &am->input_acl_vec_by_sw_if_index[sw_if_index] : + &am->output_acl_vec_by_sw_if_index[sw_if_index]; + applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index : + &am->output_applied_hash_acl_info_by_sw_if_index; + applied_hash_acl_info_t *pal = &(*applied_hash_acls)[sw_if_index]; + for(i=0; i < vec_len(*applied_acls); i++) { + u32 a_acl_index = (*applied_acls)[i]; + hash_acl_info_t *ha = &am->hash_acl_infos[a_acl_index]; + new_lookup_bitmap = clib_bitmap_or(new_lookup_bitmap, + ha->mask_type_index_bitmap); + } + uword *old_lookup_bitmap = pal->mask_type_index_bitmap; + pal->mask_type_index_bitmap = new_lookup_bitmap; + clib_bitmap_free(old_lookup_bitmap); +} + +void +hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) +{ + int i; + + DBG("HASH ACL unapply: sw_if_index %d is_input %d acl %d", sw_if_index, is_input, acl_index); + hash_acl_info_t *ha = &am->hash_acl_infos[acl_index]; + applied_hash_ace_entry_t **applied_hash_aces = is_input ? &am->input_hash_entry_vec_by_sw_if_index[sw_if_index] : + &am->output_hash_entry_vec_by_sw_if_index[sw_if_index]; + + for(i=0; i < vec_len((*applied_hash_aces)); i++) { + if ((*applied_hash_aces)[i].acl_index == acl_index) { + DBG("Found applied ACL#%d at applied index %d", acl_index, i); + break; + } + } + if (vec_len((*applied_hash_aces)) <= i) { + DBG("Did not find applied ACL#%d at sw_if_index %d", acl_index, sw_if_index); + /* we went all the way without finding any entries. Probably a list was empty. */ + return; + } + int base_offset = i; + int tail_offset = base_offset + vec_len(ha->rules); + int tail_len = vec_len((*applied_hash_aces)) - tail_offset; + DBG("base_offset: %d, tail_offset: %d, tail_len: %d", base_offset, tail_offset, tail_len); + + for(i=0; i < vec_len(ha->rules); i ++) { + DBG("UNAPPLY DEACTIVATE: sw_if_index %d is_input %d, applied index %d", sw_if_index, is_input, base_offset + i); + deactivate_applied_ace_hash_entry(am, sw_if_index, is_input, + applied_hash_aces, base_offset + i); + } + for(i=0; i < tail_len; i ++) { + /* move the entry at tail offset to base offset */ + /* that is, from (tail_offset+i) -> (base_offset+i) */ + DBG("UNAPPLY MOVE: sw_if_index %d is_input %d, applied index %d ->", sw_if_index, is_input, tail_offset+i, base_offset + i); + move_applied_ace_hash_entry(am, sw_if_index, is_input, applied_hash_aces, tail_offset + i, base_offset + i); + } + /* trim the end of the vector */ + _vec_len((*applied_hash_aces)) -= vec_len(ha->rules); + + applied_hash_entries_analyze(am, applied_hash_aces); + + /* After deletion we might not need some of the mask-types anymore... */ + hash_acl_build_applied_lookup_bitmap(am, sw_if_index, is_input); +} + +/* + * Create the applied ACEs and update the hash table, + * taking into account that the ACL may not be the last + * in the vector of applied ACLs. + * + * For now, walk from the end of the vector and unapply the ACLs, + * then apply the one in question and reapply the rest. + */ + +void +hash_acl_reapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) +{ + u32 **applied_acls = is_input ? &am->input_acl_vec_by_sw_if_index[sw_if_index] : + &am->output_acl_vec_by_sw_if_index[sw_if_index]; + int i; + int start_index = vec_search((*applied_acls), acl_index); + /* + * This function is called after we find out the sw_if_index where ACL is applied. + * If the by-sw_if_index vector does not have the ACL#, then it's a bug. + */ + ASSERT(start_index < vec_len(*applied_acls)); + + /* unapply all the ACLs till the current one */ + for(i = vec_len(*applied_acls) - 1; i >= start_index; i--) { + hash_acl_unapply(am, sw_if_index, is_input, (*applied_acls)[i]); + } + for(i = start_index; i < vec_len(*applied_acls); i++) { + hash_acl_apply(am, sw_if_index, is_input, (*applied_acls)[i]); + } +} + +static void +make_address_mask(ip46_address_t *addr, u8 is_ipv6, u8 prefix_len) +{ + if (is_ipv6) { + ip6_address_mask_from_width(&addr->ip6, prefix_len); + } else { + /* FIXME: this may not be correct way */ + ip6_address_mask_from_width(&addr->ip6, prefix_len + 3*32); + ip46_address_mask_ip4(addr); + } +} + +static u8 +make_port_mask(u16 *portmask, u16 port_first, u16 port_last) +{ + if (port_first == port_last) { + *portmask = 0xffff; + /* single port is representable by masked value */ + return 0; + } + if ((port_first == 0) && (port_last == 65535)) { + *portmask = 0; + /* wildcard port is representable by a masked value */ + return 0; + } + + /* + * For now match all the ports, later + * here might be a better optimization which would + * pick out bitmaskable portranges. + * + * However, adding a new mask type potentially + * adds a per-packet extra lookup, so the benefit is not clear. + */ + *portmask = 0; + /* This port range can't be represented via bitmask exactly. */ + return 1; +} + +static void +make_mask_and_match_from_rule(fa_5tuple_t *mask, acl_rule_t *r, hash_ace_info_t *hi, int match_nonfirst_fragment) +{ + memset(mask, 0, sizeof(*mask)); + memset(&hi->match, 0, sizeof(hi->match)); + hi->action = r->is_permit; + + /* we will need to be matching based on sw_if_index, direction, and mask_type_index when applied */ + mask->pkt.sw_if_index = ~0; + mask->pkt.is_input = 1; + /* we will assign the match of mask_type_index later when we find it*/ + mask->pkt.mask_type_index_lsb = ~0; + + mask->pkt.is_ip6 = 1; + hi->match.pkt.is_ip6 = r->is_ipv6; + + make_address_mask(&mask->addr[0], r->is_ipv6, r->src_prefixlen); + hi->match.addr[0] = r->src; + make_address_mask(&mask->addr[1], r->is_ipv6, r->dst_prefixlen); + hi->match.addr[1] = r->dst; + + if (r->proto != 0) { + mask->l4.proto = ~0; /* L4 proto needs to be matched */ + hi->match.l4.proto = r->proto; + if (match_nonfirst_fragment) { + /* match the non-first fragments only */ + mask->pkt.is_nonfirst_fragment = 1; + hi->match.pkt.is_nonfirst_fragment = 1; + } else { + /* Calculate the src/dst port masks and make the src/dst port matches accordingly */ + hi->src_portrange_not_powerof2 = make_port_mask(&mask->l4.port[0], r->src_port_or_type_first, r->src_port_or_type_last); + hi->match.l4.port[0] = r->src_port_or_type_first & mask->l4.port[0]; + hi->dst_portrange_not_powerof2 = make_port_mask(&mask->l4.port[1], r->dst_port_or_code_first, r->dst_port_or_code_last); + hi->match.l4.port[1] = r->dst_port_or_code_first & mask->l4.port[1]; + /* L4 info must be valid in order to match */ + mask->pkt.l4_valid = 1; + hi->match.pkt.l4_valid = 1; + /* And we must set the mask to check that it is an initial fragment */ + mask->pkt.is_nonfirst_fragment = 1; + hi->match.pkt.is_nonfirst_fragment = 0; + if ((r->proto == IPPROTO_TCP) && (r->tcp_flags_mask != 0)) { + /* if we want to match on TCP flags, they must be masked off as well */ + mask->pkt.tcp_flags = r->tcp_flags_mask; + hi->match.pkt.tcp_flags = r->tcp_flags_value; + /* and the flags need to be present within the packet being matched */ + mask->pkt.tcp_flags_valid = 1; + hi->match.pkt.tcp_flags_valid = 1; + } + } + } + /* Sanitize the mask and the match */ + u64 *pmask = (u64 *)mask; + u64 *pmatch = (u64 *)&hi->match; + int j; + for(j=0; j<6; j++) { + pmatch[j] = pmatch[j] & pmask[j]; + } +} + +static u32 +find_mask_type_index(acl_main_t *am, fa_5tuple_t *mask) +{ + ace_mask_type_entry_t *mte; + /* *INDENT-OFF* */ + pool_foreach(mte, am->ace_mask_type_pool, + ({ + if(memcmp(&mte->mask, mask, sizeof(*mask)) == 0) + return (mte - am->ace_mask_type_pool); + })); + /* *INDENT-ON* */ + return ~0; +} + +static u32 +assign_mask_type_index(acl_main_t *am, fa_5tuple_t *mask) +{ + u32 mask_type_index = find_mask_type_index(am, mask); + ace_mask_type_entry_t *mte; + if(~0 == mask_type_index) { + pool_get_aligned (am->ace_mask_type_pool, mte, CLIB_CACHE_LINE_BYTES); + mask_type_index = mte - am->ace_mask_type_pool; + clib_memcpy(&mte->mask, mask, sizeof(mte->mask)); + mte->refcount = 0; + /* + * We can use only 16 bits, since in the match there is only u16 field. + * Realistically, once you go to 64K of mask types, it is a huge + * problem anyway, so we might as well stop half way. + */ + ASSERT(mask_type_index < 32768); + } + mte = am->ace_mask_type_pool + mask_type_index; + mte->refcount++; + return mask_type_index; +} + +static void +release_mask_type_index(acl_main_t *am, u32 mask_type_index) +{ + ace_mask_type_entry_t *mte = &am->ace_mask_type_pool[mask_type_index]; + mte->refcount--; + if (mte->refcount == 0) { + /* we are not using this entry anymore */ + pool_put(am->ace_mask_type_pool, mte); + } +} + +void hash_acl_add(acl_main_t *am, int acl_index) +{ + DBG("HASH ACL add : %d", acl_index); + int i; + acl_list_t *a = &am->acls[acl_index]; + vec_validate(am->hash_acl_infos, acl_index); + hash_acl_info_t *ha = &am->hash_acl_infos[acl_index]; + memset(ha, 0, sizeof(*ha)); + + /* walk the newly added ACL entries and ensure that for each of them there + is a mask type, increment a reference count for that mask type */ + for(i=0; i < a->count; i++) { + hash_ace_info_t ace_info; + fa_5tuple_t mask; + memset(&ace_info, 0, sizeof(ace_info)); + ace_info.acl_index = acl_index; + ace_info.ace_index = i; + + make_mask_and_match_from_rule(&mask, &a->rules[i], &ace_info, 0); + ace_info.mask_type_index = assign_mask_type_index(am, &mask); + /* assign the mask type index for matching itself */ + ace_info.match.pkt.mask_type_index_lsb = ace_info.mask_type_index; + DBG("ACE: %d mask_type_index: %d", i, ace_info.mask_type_index); + /* Ensure a given index is set in the mask type index bitmap for this ACL */ + ha->mask_type_index_bitmap = clib_bitmap_set(ha->mask_type_index_bitmap, ace_info.mask_type_index, 1); + vec_add1(ha->rules, ace_info); + if (am->l4_match_nonfirst_fragment) { + /* add the second rule which matches the noninitial fragments with the respective mask */ + make_mask_and_match_from_rule(&mask, &a->rules[i], &ace_info, 1); + ace_info.mask_type_index = assign_mask_type_index(am, &mask); + ace_info.match.pkt.mask_type_index_lsb = ace_info.mask_type_index; + DBG("ACE: %d (non-initial frags) mask_type_index: %d", i, ace_info.mask_type_index); + /* Ensure a given index is set in the mask type index bitmap for this ACL */ + ha->mask_type_index_bitmap = clib_bitmap_set(ha->mask_type_index_bitmap, ace_info.mask_type_index, 1); + vec_add1(ha->rules, ace_info); + } + } + /* + * if an ACL is applied somewhere, fill the corresponding lookup data structures. + * We need to take care if the ACL is not the last one in the vector of ACLs applied to the interface. + */ + if (acl_index < vec_len(am->input_sw_if_index_vec_by_acl)) { + u32 *sw_if_index; + vec_foreach(sw_if_index, am->input_sw_if_index_vec_by_acl[acl_index]) { + hash_acl_reapply(am, *sw_if_index, 1, acl_index); + } + } + if (acl_index < vec_len(am->output_sw_if_index_vec_by_acl)) { + u32 *sw_if_index; + vec_foreach(sw_if_index, am->output_sw_if_index_vec_by_acl[acl_index]) { + hash_acl_reapply(am, *sw_if_index, 0, acl_index); + } + } +} + +void hash_acl_delete(acl_main_t *am, int acl_index) +{ + DBG("HASH ACL delete : %d", acl_index); + /* + * If the ACL is applied somewhere, remove the references of it (call hash_acl_unapply) + * this is a different behavior from the linear lookup where an empty ACL is "deny all", + * + * However, following vpp-dev discussion the ACL that is referenced elsewhere + * should not be possible to delete, and the change adding this also adds + * the safeguards to that respect, so this is not a problem. + */ + if (acl_index < vec_len(am->input_sw_if_index_vec_by_acl)) { + u32 *sw_if_index; + vec_foreach(sw_if_index, am->input_sw_if_index_vec_by_acl[acl_index]) { + hash_acl_unapply(am, *sw_if_index, 1, acl_index); + } + } + if (acl_index < vec_len(am->output_sw_if_index_vec_by_acl)) { + u32 *sw_if_index; + vec_foreach(sw_if_index, am->output_sw_if_index_vec_by_acl[acl_index]) { + hash_acl_unapply(am, *sw_if_index, 0, acl_index); + } + } + + /* walk the mask types for the ACL about-to-be-deleted, and decrease + * the reference count, possibly freeing up some of them */ + int i; + hash_acl_info_t *ha = &am->hash_acl_infos[acl_index]; + for(i=0; i < vec_len(ha->rules); i++) { + release_mask_type_index(am, ha->rules[i].mask_type_index); + } + clib_bitmap_free(ha->mask_type_index_bitmap); + vec_free(ha->rules); +} + +u8 +hash_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap) +{ + acl_main_t *am = &acl_main; + applied_hash_ace_entry_t **applied_hash_aces = is_input ? &am->input_hash_entry_vec_by_sw_if_index[sw_if_index] : + &am->output_hash_entry_vec_by_sw_if_index[sw_if_index]; + u32 match_index = multi_acl_match_get_applied_ace_index(am, pkt_5tuple); + if (match_index < vec_len((*applied_hash_aces))) { + applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[match_index]); + *acl_match_p = pae->acl_index; + *rule_match_p = pae->ace_index; + return pae->action; + } + return 0; +} + + +void +show_hash_acl_hash (vlib_main_t * vm, acl_main_t *am, u32 verbose) +{ + vlib_cli_output(vm, "\nACL lookup hash table:\n%U\n", + BV (format_bihash), &am->acl_lookup_hash, verbose); +} diff --git a/src/plugins/acl/hash_lookup.h b/src/plugins/acl/hash_lookup.h new file mode 100644 index 00000000..c5913624 --- /dev/null +++ b/src/plugins/acl/hash_lookup.h @@ -0,0 +1,60 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef _ACL_HASH_LOOKUP_H_ +#define _ACL_HASH_LOOKUP_H_ + +#include +#include "acl.h" + +/* + * Do the necessary to logically apply the ACL to the existing vector of ACLs looked up + * during the packet processing + */ + +void hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index); + +/* Remove the ACL from the packet processing lookups on a given interface */ + +void hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index); + +/* + * Add an ACL or delete an ACL. ACL may already have been referenced elsewhere, + * so potentially we also need to do the work to enable the lookups. + */ + +void hash_acl_add(acl_main_t *am, int acl_index); +void hash_acl_delete(acl_main_t *am, int acl_index); + +/* + * Do the work required to match a given 5-tuple from the packet, + * and return the action as well as populate the values pointed + * to by the *_match_p pointers and maybe trace_bitmap. + */ + +u8 +hash_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap); + + +/* + * The debug function to show the contents of the ACL lookup hash + */ +void show_hash_acl_hash(vlib_main_t * vm, acl_main_t *am, u32 verbose); + +#endif diff --git a/src/plugins/acl/hash_lookup.md b/src/plugins/acl/hash_lookup.md new file mode 100644 index 00000000..95524643 --- /dev/null +++ b/src/plugins/acl/hash_lookup.md @@ -0,0 +1,241 @@ +ACL plugin constant-time lookup design +====================================== + +The initial implementation of ACL plugin performs a trivial for() cycle, +going through the assigned ACLs on a per-packet basis. This is not very +efficient, even if for very short ACLs due to its simplicity it can beat +more advanced methods. + +However, to cover the case of longer ACLs with acceptable performance, +we need to have a better way of matching. This write-up proposes +a mechanism to make a lookup from O(M) where M is number of entries +to O(N) where N is number of different mask combinations. + +Preparation of ACL(s) +--------------------- + +The ACL plugin will maintain a global list of "mask types", i.e. the specific +configurations of "do not care" bits within the ACEs. +Upon the creation of a new ACL, a pass will be made through all the +ACEs, to assign and possibly allocate the "mask type number". + +Each ACL has a structure *hash_acl_info_t* representing the "hash-based" +parts of information related to that ACL, primarily the array of +*hash_ace_info_t* structures - each of the members of that array +corresponding to one of the rules (ACEs) in the original ACL, +for this they have a pair of *(acl_index, ace_index)* to keep track, +predominantly for the debugging. + +Why do we need a whole separate structure, and are not adding new fields +to the existing rile structure ? First, encapsulation, to minimize +the pollution of the main ACL code with the hash-based lookup artifacts. + +Second, one rule may correspond to more than one "hash-based" ACE. +In fact, most of the rules do correspond to two of those. Why ? + +Consider that the current ACL lookup logic is that if a packet +is not the initial fragment, and there is an L4 entry acting on the packet, +the comparison will be made only on the L4 protocol field value rather +than on the protocol and port values. This beaviour is governed by +*l4_match_nonfirst_fragment* flag in the *acl_main*, and was needed to +maintain the compatibility with the existing software switch implementation. + +While for the sequential check in *single_acl_match_5tuple()* +it is very easy to implement by just breaking out at the right moment, +in case of hash-based matching this cost us two checks: +one on full 5-tuple and the flag *pkt.is_nonfirst_fragment* being zero, +the second on 3-tuple and the flag *pkt.is_nonfirst_fragment* being one, +with the second check triggered by the *acl_main.l4_match_nonfirst_fragment* +setting being the default 1. This dictates the necessity of having a "match" +field in a given *hash_ace_info_t* element, which would reflect the value +we are supposed to match after applying the mask. + +There can be other circumstances when it might be beneficial to expand +the given rule in the original ACL into multiple - for example, as an +optimization within the port range handling for small port ranges +(this is not done as of the time of writing). + +Assigning ACLs to an interface +------------------------------ + +Once the ACL list is assigned to an interface, or, rather, a new ACL +is added to the list of the existing ACLs applied to the interface, +we need to update the bihash accelerating the lookup. + +All the entries for the lookups are stored within a single *48_8* bihash, +which captures the 5-tuple from the packet as well as the miscellaneous +per-packet information flags, e.g. *l4_valid*, *is_non_first_fragment*, +and so on. To facilitate the use of the single bihash by all the interfaces, +the *is_ip6*, *is_input*, *sw_if_index* are part of the key, +as well as *mask_type_index* - the latter being necessary because +there can be entries with the same value but different masks, e.g.: +`permit ::/0, permit::/128`. + +At the moment of an ACL being applied to an interface, we need to +walk the list of *hash_ace_info_t* entries corresponding to that ACL, +and update the bihash with the keys corresponding to the match +values in these entries. + +The value of the hash match contains the index into a per-*sw_if_index* vector +of *applied_ace_hash_entry_t* elements, as well as a couple of flags: +*shadowed* (optimization: if this flag on a matched entry is zero, means +we can stop the lookup early and declare a match - see below), +and *need_portrange_check* - meaning that what matched was a superset +of the actual match, and we need to perform an extra check. + +Also, upon insertion, we must keep in mind there can be +multiple *applied_ace_hash_entry_t* for the same key and must keep +a list of those. This is necessary to incrementally apply/unapply +the ACLs as part of the ACL vector: say, two ACLs have +"permit 2001:db8::1/128 any" - we should be able to retain the entry +for the second ACL even if we have deleted the first one. +Also, in case there are two entries with the same key but +different port ranges, say 0..42 and 142..65535 - we need +to be able to sequentially match on those if we decide not +to expand them into individual port-specific entries. + +Per-packet lookup +----------------- + +The simple single-packet lookup is defined in +*multi_acl_match_get_applied_ace_index*, which returns the index +of the applied hash ACE if there was a match, or ~0 if there wasn't. + +The future optimized per-packet lookup may be batched in three phases: + +1. Prepare the keys in the per-worker vector by doing logical AND of + original 5-tuple record with the elements of the mask vector. +2. Lookup the keys in the bihash in a batch manner, collecting the + result with lowest u64 (acl index within vector, ACE index) from + the hash lookup value, and performing the list walk if necessary + (for portranges) +3. Take the action from the ACL record as defined by (ACL#, ACE#) from the + resulting lookup winner, or, if no match found, then perform default deny. + +Shadowed/independent/redundant ACEs +------------------------------------ + +During the phase of combining multiple ACLs into one rulebase, when they +are applied to interface, we also can perform several optimizations. + +If a given ACE is a strict subset of another ACE located up in the linear +search order, we can ignore this ACE completely - because by definition +it will never match. We will call such an ACE *redundant*. Here is an example: + +``` +permit 2001:db8:1::/48 2001:db8:2::/48 (B) +deny 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A) +``` + +A bit more formally, we can define this relationship of an ACE A to ACE B as: + +``` +redundant(aceA, aceB) := (contains(protoB, protoA) && contains(srcB, srcA) + && contains(dstB, dstA) && is_after(A, B)) +``` + +Here as "contains" we define an operation operating on the sets defined by +the protocol, (srcIP, srcPortDefinition) and (dstIP, dstPortDefinition) +respectively, and returning true if all the elements represented by +the second argument are represented by the first argument. The "is_after" +is true if A is located below B in the ruleset. + +If a given ACE does not intersect at all with any other ACE +in front of it, we can mark it as such. + +Then during the sequence of the lookups the successful hit on this ACE means +we do not need to look up other mask combinations - thus potentially +significantly speeding up the match process. Here is an example, +assuming we have the following ACL: + +``` +permit 2001:db8:1::/48 2001:db8:2::/48 (B) +deny 2001:db8:3::/48 2001:db8:2:1::/64 (A) +``` + +In this case if we match the second entry, we do not need to check whether +we have matched the first one - the source addresses are completely +different. We call such an ACE *independent* from another. + +We can define this as + +``` +independent(aceA, aceB) := (!intersect(protoA, protoB) || + !intersect(srcA, srcB) || + !intersect(dstA, dstB)) +``` + +where intersect is defined as operation returning true if there are +elements belonging to the sets of both arguments. + +If the entry A is neither redundant nor independent from B, and is below +B in the ruleset, we call such an entry *shadowed* by B, here is an example: + +``` +deny tcp 2001:db8:1::/48 2001:db8:2::/48 (B) +permit 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A) +``` + +This means the earlier rule "carves out" a subset of A, thus leaving +a "shadow". (Evidently, the action needs to be different for the shadow +to have an effect, but for for the terminology sake we do not care). + +The more formal definition: + +``` +shadowed(aceA, aceB) := !redundante(aceA, aceB) && + !independent(aceA, aceB) && + is_after(aceA, aceB) +``` + +Using this terminology, any ruleset can be represented as +a DAG (Directed Acyclic Graph), with the bottom being the implicit +"deny any", pointing to the set of rules shadowing it or the ones +it is redundant for. + +These rules may in turn be shadowing each other. There is no cycles in +this graph because of the natural order of the rules - the rule located +closer to the end of the ruleset can never shadow or make redundant a rule +higher up. + +The optimization that enables can allow for is to skip matching certain +masks on a per-lookup basis - if a given rule has matched, +the only adjustments that can happen is the match with one of +the shadowing rules. + +Also, another avenue for the optimization can be starting the lookup process +with the mask type that maximizes the chances of the independent ACE match, +thus resulting in an ACE lookup being a single hash table hit. + + +Plumbing +-------- + +All the new routines are located in a separate file, +so we can cleanly experiment with a different approach if this +does not fit all of the use cases. + +The constant-time lookup within the data path has the API with +the same signature as: + +``` +u8 +multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap) +``` + +There should be a new upper-level function with the same signature, which +will make a decision whether to use a linear lookup, or to use the +constant-time lookup implemented by this work, or to add some other +optimizations (e.g. by keeping the cache of the last N lookups). + +The calls to the routine doing preparatory work should happen +in `acl_add_list()` after creating the linear-lookup structures, +and the routine doing the preparatory work populating the hashtable +should be called from `acl_interface_add_del_inout_acl()` or its callees. + +The initial implementation will be geared towards looking up a single +match at a time, with the subsequent optimizations possible to make +the lookup for more than one packet. + diff --git a/src/plugins/acl/hash_lookup_private.h b/src/plugins/acl/hash_lookup_private.h new file mode 100644 index 00000000..7b6449cd --- /dev/null +++ b/src/plugins/acl/hash_lookup_private.h @@ -0,0 +1,27 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define ACL_HASH_LOOKUP_DEBUG 0 + +#if ACL_HASH_LOOKUP_DEBUG == 1 +#define DBG(...) clib_warning(__VA_ARGS__) +#define DBG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__) +#else +#define DBG(...) +#define DBG_UNIX_LOG(...) +#endif + diff --git a/src/plugins/acl/hash_lookup_types.h b/src/plugins/acl/hash_lookup_types.h new file mode 100644 index 00000000..1c044591 --- /dev/null +++ b/src/plugins/acl/hash_lookup_types.h @@ -0,0 +1,94 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef _ACL_HASH_LOOKUP_TYPES_H_ +#define _ACL_HASH_LOOKUP_TYPES_H_ + +/* The structure representing the single entry with hash representation */ +typedef struct { + /* these two entries refer to the original ACL# and rule# within that ACL */ + u32 acl_index; + u32 ace_index; + + u32 mask_type_index; + u8 src_portrange_not_powerof2; + u8 dst_portrange_not_powerof2; + + fa_5tuple_t match; + u8 action; +} hash_ace_info_t; + +/* + * The structure holding the information necessary for the hash-based ACL operation + */ +typedef struct { + /* The mask types present in this ACL */ + uword *mask_type_index_bitmap; + hash_ace_info_t *rules; +} hash_acl_info_t; + +typedef struct { + /* original non-compiled ACL */ + u32 acl_index; + u32 ace_index; + /* the index of the hash_ace_info_t */ + u32 hash_ace_info_index; + /* + * in case of the same key having multiple entries, + * this holds the index of the next entry. + */ + u32 next_applied_entry_index; + /* + * previous entry in the list of the chained ones, + * if ~0 then this is entry in the hash. + */ + u32 prev_applied_entry_index; + /* + * Action of this applied ACE + */ + u8 action; +} applied_hash_ace_entry_t; + +typedef struct { + /* + * A logical OR of all the applied_ace_hash_entry_t=> + * hash_ace_info_t=>mask_type_index bits set + */ + uword *mask_type_index_bitmap; +} applied_hash_acl_info_t; + + +typedef union { + u64 as_u64; + struct { + u32 applied_entry_index; + u16 reserved_u16; + u8 reserved_u8; + /* means there is some other entry in front intersecting with this one */ + u8 shadowed:1; + u8 need_portrange_check:1; + u8 reserved_flags:6; + }; +} hash_acl_lookup_value_t; + +#define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1] + +CT_ASSERT_EQUAL(hash_acl_lookup_value_t_is_u64, sizeof(hash_acl_lookup_value_t), sizeof(u64)); + +#undef CT_ASSERT_EQUAL + +#endif diff --git a/test/test_acl_plugin.py b/test/test_acl_plugin.py index 5267cd27..5f830ea2 100644 --- a/test/test_acl_plugin.py +++ b/test/test_acl_plugin.py @@ -121,6 +121,9 @@ class TestACLplugin(VppTestCase): super(TestACLplugin, self).tearDown() if not self.vpp_dead: self.logger.info(self.vapi.ppcli("show l2fib verbose")) + self.logger.info(self.vapi.ppcli("show acl-plugin acl")) + self.logger.info(self.vapi.ppcli("show acl-plugin interface")) + self.logger.info(self.vapi.ppcli("show acl-plugin tables")) self.logger.info(self.vapi.ppcli("show bridge-domain %s detail" % self.bd_id)) diff --git a/test/test_acl_plugin_conns.py b/test/test_acl_plugin_conns.py index be016d91..705ffbc6 100644 --- a/test/test_acl_plugin_conns.py +++ b/test/test_acl_plugin_conns.py @@ -185,6 +185,18 @@ class ACLPluginConnTestCase(VppTestCase): i.resolve_arp() i.resolve_ndp() + def tearDown(self): + """Run standard test teardown and log various show commands + """ + super(ACLPluginConnTestCase, self).tearDown() + if not self.vpp_dead: + self.logger.info(self.vapi.cli("show ip arp")) + self.logger.info(self.vapi.cli("show ip6 neighbors")) + self.logger.info(self.vapi.cli("show acl-plugin sessions")) + self.logger.info(self.vapi.cli("show acl-plugin acl")) + self.logger.info(self.vapi.cli("show acl-plugin interface")) + self.logger.info(self.vapi.cli("show acl-plugin tables")) + def api_acl_add_replace(self, acl_index, r, count=-1, tag="", expected_retval=0): """Add/replace an ACL diff --git a/test/test_acl_plugin_l2l3.py b/test/test_acl_plugin_l2l3.py index c7f1068a..f383a482 100644 --- a/test/test_acl_plugin_l2l3.py +++ b/test/test_acl_plugin_l2l3.py @@ -115,6 +115,9 @@ class TestIpIrb(VppTestCase): self.logger.info(self.vapi.cli("show ip arp")) self.logger.info(self.vapi.cli("show ip6 neighbors")) self.logger.info(self.vapi.cli("show acl-plugin sessions")) + self.logger.info(self.vapi.cli("show acl-plugin acl")) + self.logger.info(self.vapi.cli("show acl-plugin interface")) + self.logger.info(self.vapi.cli("show acl-plugin tables")) def api_acl_add_replace(self, acl_index, r, count, tag="", expected_retval=0): -- cgit 1.2.3-korg From 22d32d916f4f3806501cf39b324be19e06b89c12 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Wed, 2 Aug 2017 06:36:07 -0400 Subject: acl-plugin: multicore: CSIT c100k 2-core stateful ACL test does not pass (VPP-912) Fix several threading-related issues uncovered by the CSIT scale/performance test: - make the per-interface add/del counters per-thread - preallocate the per-worker session pools rather than attempting to resize them within the datapath - move the bihash initialization to the moment of ACL being applied rather than later during the connection creation - adjust the connection cleaning logic to not require the signaling from workers to main thread - make the connection lists check in the main thread robust against workers updating the list heads at the same time - add more information to "show acl-plugin sessions" to aid in debugging Change-Id: If82ef715e4993614df11db5e9afa7fa6b522d9bc Signed-off-by: Andrew Yourtchenko (cherry picked from commit 8e4222fc7e23a478b021930ade3cb7d20938e398) --- src/plugins/acl/acl.c | 51 +++++++++++++++++++++------ src/plugins/acl/acl.h | 6 ++-- src/plugins/acl/fa_node.c | 87 ++++++++++++++++++++++++++++++++--------------- src/plugins/acl/fa_node.h | 3 ++ 4 files changed, 106 insertions(+), 41 deletions(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 02fcb4fe..6cd2d0c6 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -1939,23 +1939,54 @@ acl_show_aclplugin_fn (vlib_main_t * vm, u8 * out0 = format(0, ""); u16 wk; u32 show_bihash_verbose = 0; + u32 show_session_thread_id = ~0; + u32 show_session_session_index = ~0; + unformat (input, "thread %u index %u", &show_session_thread_id, &show_session_session_index); unformat (input, "verbose %u", &show_bihash_verbose); - pool_foreach (swif, im->sw_interfaces, - ({ - u32 sw_if_index = swif->sw_if_index; - u64 n_adds = sw_if_index < vec_len(am->fa_session_adds_by_sw_if_index) ? am->fa_session_adds_by_sw_if_index[sw_if_index] : 0; - u64 n_dels = sw_if_index < vec_len(am->fa_session_dels_by_sw_if_index) ? am->fa_session_dels_by_sw_if_index[sw_if_index] : 0; - out0 = format(out0, "sw_if_index %d: add %lu - del %lu = %lu\n", sw_if_index, n_adds, n_dels, n_adds - n_dels); - })); { u64 n_adds = am->fa_session_total_adds; u64 n_dels = am->fa_session_total_dels; - out0 = format(out0, "TOTAL: add %lu - del %lu = %lu\n", n_adds, n_dels, n_adds - n_dels); + out0 = format(out0, "Sessions total: add %lu - del %lu = %lu\n", n_adds, n_dels, n_adds - n_dels); } - out0 = format(out0, "\n\nPer-worker data:\n"); + out0 = format(out0, "\n\nPer-thread data:\n"); for (wk = 0; wk < vec_len (am->per_worker_data); wk++) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk]; - out0 = format(out0, "Worker #%d:\n", wk); + out0 = format(out0, "Thread #%d:\n", wk); + if (show_session_thread_id == wk && show_session_session_index < pool_len(pw->fa_sessions_pool)) { + out0 = format(out0, " session index %u:\n", show_session_session_index); + fa_session_t *sess = pw->fa_sessions_pool + show_session_session_index; + u64 *m = (u64 *)&sess->info; + out0 = format(out0, " info: %016llx %016llx %016llx %016llx %016llx %016llx\n", m[0], m[1], m[2], m[3], m[4], m[5]); + out0 = format(out0, " sw_if_index: %u\n", sess->sw_if_index); + out0 = format(out0, " tcp_flags_seen: %x\n", sess->tcp_flags_seen.as_u16); + out0 = format(out0, " last active time: %lu\n", sess->last_active_time); + out0 = format(out0, " thread index: %u\n", sess->thread_index); + out0 = format(out0, " link enqueue time: %lu\n", sess->link_enqueue_time); + out0 = format(out0, " link next index: %u\n", sess->link_next_idx); + out0 = format(out0, " link prev index: %u\n", sess->link_prev_idx); + out0 = format(out0, " link list id: %u\n", sess->link_list_id); + } + out0 = format(out0, " connection add/del stats:\n", wk); + pool_foreach (swif, im->sw_interfaces, + ({ + u32 sw_if_index = swif->sw_if_index; + u64 n_adds = sw_if_index < vec_len(pw->fa_session_adds_by_sw_if_index) ? pw->fa_session_adds_by_sw_if_index[sw_if_index] : 0; + u64 n_dels = sw_if_index < vec_len(pw->fa_session_dels_by_sw_if_index) ? pw->fa_session_dels_by_sw_if_index[sw_if_index] : 0; + out0 = format(out0, " sw_if_index %d: add %lu - del %lu = %lu\n", sw_if_index, n_adds, n_dels, n_adds - n_dels); + })); + + out0 = format(out0, " connection timeout type lists:\n", wk); + u8 tt = 0; + for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { + u32 head_session_index = pw->fa_conn_list_head[tt]; + out0 = format(out0, " fa_conn_list_head[%d]: %d\n", tt, head_session_index); + if (~0 != head_session_index) { + fa_session_t *sess = pw->fa_sessions_pool + head_session_index; + out0 = format(out0, " last active time: %lu\n", sess->last_active_time); + out0 = format(out0, " link enqueue time: %lu\n", sess->link_enqueue_time); + } + } + out0 = format(out0, " Next expiry time: %lu\n", pw->next_expiry_time); out0 = format(out0, " Requeue until time: %lu\n", pw->requeue_until_time); out0 = format(out0, " Current time wait interval: %lu\n", pw->current_time_wait_interval); diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index c9f403e9..b84ed7a4 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -144,6 +144,9 @@ typedef struct { u32 **input_sw_if_index_vec_by_acl; u32 **output_sw_if_index_vec_by_acl; + /* Total count of interface+direction pairs enabled */ + u32 fa_total_enabled_count; + /* Do we use hash-based ACL matching or linear */ int use_hash_acl_matching; @@ -172,9 +175,6 @@ typedef struct { u32 fa_cleaner_node_index; /* FA session timeouts, in seconds */ u32 session_timeout_sec[ACL_N_TIMEOUTS]; - /* session add/delete counters */ - u64 *fa_session_adds_by_sw_if_index; - u64 *fa_session_dels_by_sw_if_index; /* total session adds/dels */ u64 fa_session_total_adds; u64 fa_session_total_dels; diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index c483044d..3181a22c 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -599,20 +599,23 @@ fa_session_get_timeout (acl_main_t * am, fa_session_t * sess) } static void -acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0) +acl_fa_verify_init_sessions (acl_main_t * am) { - /// FIXME-MULTICORE: lock around this function -#ifdef FA_NODE_VERBOSE_DEBUG - clib_warning - ("Initializing bihash for sw_if_index %d num buckets %lu memory size %llu", - sw_if_index0, am->fa_conn_table_hash_num_buckets, - am->fa_conn_table_hash_memory_size); -#endif - BV (clib_bihash_init) (&am->fa_sessions_hash, + if (!am->fa_sessions_hash_is_initialized) { + u16 wk; + /* Allocate the per-worker sessions pools */ + for (wk = 0; wk < vec_len (am->per_worker_data); wk++) { + acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk]; + pool_alloc_aligned(pw->fa_sessions_pool, am->fa_conn_table_max_entries, CLIB_CACHE_LINE_BYTES); + } + + /* ... and the interface session hash table */ + BV (clib_bihash_init) (&am->fa_sessions_hash, "ACL plugin FA session bihash", am->fa_conn_table_hash_num_buckets, am->fa_conn_table_hash_memory_size); - am->fa_sessions_hash_is_initialized = 1; + am->fa_sessions_hash_is_initialized = 1; + } } static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index) @@ -622,6 +625,12 @@ static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u3 return sess; } +static inline int is_valid_session_ptr(acl_main_t *am, u16 thread_index, fa_session_t *sess) +{ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + return ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool)); +} + static void acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 now) { @@ -648,9 +657,6 @@ acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 if (~0 == pw->fa_conn_list_head[list_id]) { pw->fa_conn_list_head[list_id] = sess_id.session_index; - /* If it is a first conn in any list, kick the cleaner */ - vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, - ACL_FA_CLEANER_RESCHEDULE, 0); } } @@ -733,8 +739,8 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t se pool_put_index (pw->fa_sessions_pool, sess_id.session_index); /* Deleting from timer structures not needed, as the caller must have dealt with the timers. */ - vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index); - am->fa_session_dels_by_sw_if_index[sw_if_index]++; + vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index); + pw->fa_session_dels_by_sw_if_index[sw_if_index]++; clib_smp_atomic_add(&am->fa_session_total_dels, 1); } @@ -749,10 +755,14 @@ acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index) static u64 acl_fa_get_list_head_expiry_time(acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, int timeout_type) { - if (~0 == pw->fa_conn_list_head[timeout_type]) { + fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]); + /* + * We can not check just the index here because inbetween the worker thread might + * dequeue the connection from the head just as we are about to check it. + */ + if (!is_valid_session_ptr(am, thread_index, sess)) { return ~0LL; // infinity. } else { - fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]); u64 timeout_time = sess->link_enqueue_time + fa_session_get_list_timeout (am, sess); return timeout_time; @@ -893,17 +903,13 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, - if (!acl_fa_ifc_has_sessions (am, sw_if_index)) - { - acl_fa_ifc_init_sessions (am, sw_if_index); - } - + ASSERT(am->fa_sessions_hash_is_initialized == 1); BV (clib_bihash_add_del) (&am->fa_sessions_hash, &kv, 1); acl_fa_conn_list_add_session(am, f_sess_id, now); - vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index); - am->fa_session_adds_by_sw_if_index[sw_if_index]++; + vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index); + pw->fa_session_adds_by_sw_if_index[sw_if_index]++; clib_smp_atomic_add(&am->fa_session_total_adds, 1); } @@ -1337,8 +1343,10 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, if (num_expired >= am->fa_max_deleted_sessions_per_interval) { /* there was too much work, we should get an interrupt ASAP */ pw->interrupt_is_needed = 1; + pw->interrupt_is_unwanted = 0; } else if (num_expired <= am->fa_min_deleted_sessions_per_interval) { /* signal that they should trigger us less */ + pw->interrupt_is_needed = 0; pw->interrupt_is_unwanted = 1; } else { /* the current rate of interrupts is ok */ @@ -1354,11 +1362,11 @@ send_one_worker_interrupt (vlib_main_t * vm, acl_main_t *am, int thread_index) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; if (!pw->interrupt_is_pending) { + pw->interrupt_is_pending = 1; vlib_node_set_interrupt_pending (vlib_mains[thread_index], acl_fa_worker_session_cleaner_process_node.index); - pw->interrupt_is_pending = 1; /* if the interrupt was requested, mark that done. */ - pw->interrupt_is_needed = 0; + /* pw->interrupt_is_needed = 0; */ } } @@ -1425,8 +1433,8 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, } } - /* If no pending connections then no point in timing out */ - if (!has_pending_conns) + /* If no pending connections and no ACL applied then no point in timing out */ + if (!has_pending_conns && (0 == am->fa_total_enabled_count)) { am->fa_cleaner_cnt_wait_without_timeout++; (void) vlib_process_wait_for_event (vm); @@ -1488,6 +1496,10 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U", format_bitmap_hex, clear_sw_if_index_bitmap); #endif vec_foreach(pw0, am->per_worker_data) { + if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) { + /* thread 0 in multithreaded scenario is not used */ + continue; + } CLIB_MEMORY_BARRIER (); while (pw0->clear_in_process) { CLIB_MEMORY_BARRIER (); @@ -1522,6 +1534,10 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, clib_warning("CLEANER mains len: %d per-worker len: %d", vec_len(vlib_mains), vec_len(am->per_worker_data)); #endif vec_foreach(pw0, am->per_worker_data) { + if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) { + /* thread 0 in multithreaded scenario is not used */ + continue; + } CLIB_MEMORY_BARRIER (); while (pw0->clear_in_process) { CLIB_MEMORY_BARRIER (); @@ -1563,6 +1579,10 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, int interrupts_unwanted = 0; vec_foreach(pw0, am->per_worker_data) { + if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) { + /* thread 0 in multithreaded scenario is not used */ + continue; + } if (pw0->interrupt_is_needed) { interrupts_needed++; /* the per-worker value is reset when sending the interrupt */ @@ -1575,6 +1595,8 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (interrupts_needed) { /* they need more interrupts, do less waiting around next time */ am->fa_current_cleaner_timer_wait_interval /= 2; + /* never go into zero-wait either though - we need to give the space to others */ + am->fa_current_cleaner_timer_wait_interval += 1; } else if (interrupts_unwanted) { /* slowly increase the amount of sleep up to a limit */ if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval) @@ -1591,6 +1613,15 @@ void acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) { acl_main_t *am = &acl_main; + if (enable_disable) { + acl_fa_verify_init_sessions(am); + am->fa_total_enabled_count++; + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_RESCHEDULE, 0); + } else { + am->fa_total_enabled_count--; + } + if (is_input) { ASSERT(clib_bitmap_get(am->fa_in_acl_on_sw_if_index, sw_if_index) != enable_disable); diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index e4cd2ab5..a8dece4e 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -109,6 +109,9 @@ typedef struct { /* per-worker ACL_N_TIMEOUTS of conn lists */ u32 *fa_conn_list_head; u32 *fa_conn_list_tail; + /* adds and deletes per-worker-per-interface */ + u64 *fa_session_dels_by_sw_if_index; + u64 *fa_session_adds_by_sw_if_index; /* Vector of expired connections retrieved from lists */ u32 *expired; /* the earliest next expiry time */ -- cgit 1.2.3-korg From a5e614f76fda9efb7bd1577ec18f9c0407d95d03 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Thu, 27 Jul 2017 15:39:50 +0200 Subject: acl-plugin: rework the optimization 7383, fortify acl-plugin memory behavior (VPP-910) The further prolonged testing from testbed that reported VPP-910 has uncovered a couple of deeper issues with optimization from 7384, and the usage of subscripts rather than vec_elt_at_index() allowed to hide a couple of further errors in the code. Also, the current acl-plugin behavior of using the global heap for its dynamic data is problematic - it makes the troubleshooting much harder by potentially spreading the problem around. Based on this experience, this commits makes a few changes to fix the issues seen, also improving the serviceability of the acl-plugin code for the future: - Use separate mheaps for any ACL-related control plane operations and separate for the hash lookup datastructures, to compartmentalize any memory-related issues for the ACL plugin. - Ensure vec_elt_at_index() usage throughout the hash_lookup.c file. - Use vectors rather than raw memory for storing the "ordinary" ACL rules. - Rework the optimization from 7384 to use a separate tail pointer rather than overloading the "prev" field. - Make get_session_ptr() more conservative and adjust is_valid_session_ptr accordingly Change-Id: Ifda85193f361de5ed3782a4acd39622bd33c5830 Signed-off-by: Andrew Yourtchenko (cherry picked from commit bd9c5ffe39e9ce61db95d74d150e07d738f24da1) --- src/plugins/acl/acl.c | 156 ++++++++++++++++------ src/plugins/acl/acl.h | 6 + src/plugins/acl/fa_node.c | 16 ++- src/plugins/acl/hash_lookup.c | 253 +++++++++++++++++++++--------------- src/plugins/acl/hash_lookup_types.h | 8 +- 5 files changed, 289 insertions(+), 150 deletions(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 6cd2d0c6..bc38265a 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -83,6 +83,18 @@ VLIB_PLUGIN_REGISTER () = { }; /* *INDENT-ON* */ + +static void * +acl_set_heap(acl_main_t *am) +{ + if (0 == am->acl_mheap) { + am->acl_mheap = mheap_alloc (0 /* use VM */ , 2 << 29); + } + void *oldheap = clib_mem_set_heap(am->acl_mheap); + return oldheap; +} + + static void vl_api_acl_plugin_get_version_t_handler (vl_api_acl_plugin_get_version_t * mp) { @@ -130,7 +142,7 @@ acl_add_list (u32 count, vl_api_acl_rule_t rules[], acl_main_t *am = &acl_main; acl_list_t *a; acl_rule_t *r; - acl_rule_t *acl_new_rules; + acl_rule_t *acl_new_rules = 0; int i; if (*acl_list_index != ~0) @@ -139,22 +151,23 @@ acl_add_list (u32 count, vl_api_acl_rule_t rules[], if (pool_is_free_index (am->acls, *acl_list_index)) { /* tried to replace a non-existent ACL, no point doing anything */ + clib_warning("acl-plugin-error: Trying to replace nonexistent ACL %d (tag %s)", *acl_list_index, tag); return -1; } } + if (0 == count) { + clib_warning("acl-plugin-warning: supplied no rules for ACL %d (tag %s)", *acl_list_index, tag); + } + + void *oldheap = acl_set_heap(am); /* Create and populate the rules */ - acl_new_rules = clib_mem_alloc_aligned (sizeof (acl_rule_t) * count, - CLIB_CACHE_LINE_BYTES); - if (!acl_new_rules) - { - /* Could not allocate rules. New or existing ACL - bail out regardless */ - return -1; - } + if (count > 0) + vec_validate(acl_new_rules, count-1); for (i = 0; i < count; i++) { - r = &acl_new_rules[i]; + r = vec_elt_at_index(acl_new_rules, i); memset(r, 0, sizeof(*r)); r->is_permit = rules[i].is_permit; r->is_ipv6 = rules[i].is_ipv6; @@ -192,13 +205,14 @@ acl_add_list (u32 count, vl_api_acl_rule_t rules[], a = am->acls + *acl_list_index; hash_acl_delete(am, *acl_list_index); /* Get rid of the old rules */ - clib_mem_free (a->rules); + if (a->rules) + vec_free (a->rules); } a->rules = acl_new_rules; a->count = count; memcpy (a->tag, tag, sizeof (a->tag)); hash_acl_add(am, *acl_list_index); - + clib_mem_set_heap (oldheap); return 0; } @@ -226,6 +240,7 @@ acl_del_list (u32 acl_list_index) } } + void *oldheap = acl_set_heap(am); /* delete any references to the ACL */ for (i = 0; i < vec_len (am->output_acl_vec_by_sw_if_index); i++) { @@ -263,10 +278,10 @@ acl_del_list (u32 acl_list_index) /* now we can delete the ACL itself */ a = &am->acls[acl_list_index]; if (a->rules) - { - clib_mem_free (a->rules); - } + vec_free (a->rules); + pool_put (am->acls, a); + clib_mem_set_heap (oldheap); return 0; } @@ -359,13 +374,15 @@ acl_classify_add_del_table_tiny (vnet_classify_main_t * cm, u8 * mask, if (0 == match) match = 1; - - return vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets, + void *oldheap = clib_mem_set_heap (cm->vlib_main->heap_base); + int ret = vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets, memory_size, skip, match, next_table_index, miss_next_index, table_index, current_data_flag, current_data_offset, is_add, 1 /* delete_chain */); + clib_mem_set_heap (oldheap); + return ret; } static int @@ -385,12 +402,15 @@ acl_classify_add_del_table_small (vnet_classify_main_t * cm, u8 * mask, if (0 == match) match = 1; - return vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets, + void *oldheap = clib_mem_set_heap (cm->vlib_main->heap_base); + int ret = vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets, memory_size, skip, match, next_table_index, miss_next_index, table_index, current_data_flag, current_data_offset, is_add, 1 /* delete_chain */); + return ret; + clib_mem_set_heap (oldheap); } @@ -400,12 +420,15 @@ acl_unhook_l2_input_classify (acl_main_t * am, u32 sw_if_index) vnet_classify_main_t *cm = &vnet_classify_main; u32 ip4_table_index = ~0; u32 ip6_table_index = ~0; + void *oldheap = acl_set_heap(am); vec_validate_init_empty (am->acl_ip4_input_classify_table_by_sw_if_index, sw_if_index, ~0); vec_validate_init_empty (am->acl_ip6_input_classify_table_by_sw_if_index, sw_if_index, ~0); + /* switch to global heap while calling vnet_* functions */ + clib_mem_set_heap (cm->vlib_main->heap_base); vnet_l2_input_classify_enable_disable (sw_if_index, 0); if (am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] != ~0) @@ -428,7 +451,7 @@ acl_unhook_l2_input_classify (acl_main_t * am, u32 sw_if_index) am->l2_input_classify_next_acl_ip6, &ip6_table_index, 0); } - + clib_mem_set_heap (oldheap); return 0; } @@ -438,12 +461,16 @@ acl_unhook_l2_output_classify (acl_main_t * am, u32 sw_if_index) vnet_classify_main_t *cm = &vnet_classify_main; u32 ip4_table_index = ~0; u32 ip6_table_index = ~0; + void *oldheap = acl_set_heap(am); vec_validate_init_empty (am->acl_ip4_output_classify_table_by_sw_if_index, sw_if_index, ~0); vec_validate_init_empty (am->acl_ip6_output_classify_table_by_sw_if_index, sw_if_index, ~0); + /* switch to global heap while calling vnet_* functions */ + clib_mem_set_heap (cm->vlib_main->heap_base); + vnet_l2_output_classify_enable_disable (sw_if_index, 0); if (am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] != ~0) @@ -466,7 +493,7 @@ acl_unhook_l2_output_classify (acl_main_t * am, u32 sw_if_index) am->l2_output_classify_next_acl_ip6, &ip6_table_index, 0); } - + clib_mem_set_heap (oldheap); return 0; } @@ -478,6 +505,8 @@ acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index) u32 ip6_table_index = ~0; int rv; + void *prevheap = clib_mem_set_heap (cm->vlib_main->heap_base); + /* in case there were previous tables attached */ acl_unhook_l2_input_classify (am, sw_if_index); rv = @@ -486,7 +515,7 @@ acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index) am->l2_input_classify_next_acl_ip4, &ip4_table_index, 1); if (rv) - return rv; + goto done; rv = acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask, sizeof (ip6_5tuple_mask) - 1, ~0, @@ -498,7 +527,7 @@ acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index) sizeof (ip4_5tuple_mask) - 1, ~0, am->l2_input_classify_next_acl_ip4, &ip4_table_index, 0); - return rv; + goto done; } rv = vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index, @@ -516,7 +545,7 @@ acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index) sizeof (ip4_5tuple_mask) - 1, ~0, am->l2_input_classify_next_acl_ip4, &ip4_table_index, 0); - return rv; + goto done; } am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] = @@ -525,6 +554,8 @@ acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index) ip6_table_index; vnet_l2_input_classify_enable_disable (sw_if_index, 1); +done: + clib_mem_set_heap (prevheap); return rv; } @@ -536,6 +567,8 @@ acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) u32 ip6_table_index = ~0; int rv; + void *prevheap = clib_mem_set_heap (cm->vlib_main->heap_base); + /* in case there were previous tables attached */ acl_unhook_l2_output_classify (am, sw_if_index); rv = @@ -544,7 +577,7 @@ acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) am->l2_output_classify_next_acl_ip4, &ip4_table_index, 1); if (rv) - return rv; + goto done; rv = acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask, sizeof (ip6_5tuple_mask) - 1, ~0, @@ -556,7 +589,7 @@ acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) sizeof (ip4_5tuple_mask) - 1, ~0, am->l2_output_classify_next_acl_ip4, &ip4_table_index, 0); - return rv; + goto done; } rv = vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index, @@ -574,7 +607,7 @@ acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) sizeof (ip4_5tuple_mask) - 1, ~0, am->l2_output_classify_next_acl_ip4, &ip4_table_index, 0); - return rv; + goto done; } am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] = @@ -583,6 +616,8 @@ acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) ip6_table_index; vnet_l2_output_classify_enable_disable (sw_if_index, 1); +done: + clib_mem_set_heap (prevheap); return rv; } @@ -653,6 +688,7 @@ acl_interface_add_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) /* ACL is not defined. Can not apply */ return -1; } + void *oldheap = acl_set_heap(am); if (is_input) { @@ -663,6 +699,7 @@ acl_interface_add_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) clib_warning("ACL %d is already applied inbound on sw_if_index %d (index %d)", acl_list_index, sw_if_index, index); /* the entry is already there */ + clib_mem_set_heap (oldheap); return -1; } /* if there was no ACL applied before, enable the ACL processing */ @@ -684,6 +721,7 @@ acl_interface_add_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) clib_warning("ACL %d is already applied outbound on sw_if_index %d (index %d)", acl_list_index, sw_if_index, index); /* the entry is already there */ + clib_mem_set_heap (oldheap); return -1; } /* if there was no ACL applied before, enable the ACL processing */ @@ -696,6 +734,7 @@ acl_interface_add_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) vec_add (am->output_sw_if_index_vec_by_acl[acl_list_index], &sw_if_index, 1); } + clib_mem_set_heap (oldheap); return 0; } @@ -706,6 +745,7 @@ acl_interface_del_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) acl_main_t *am = &acl_main; int i; int rv = -1; + void *oldheap = acl_set_heap(am); if (is_input) { vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); @@ -764,6 +804,7 @@ acl_interface_del_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) acl_interface_out_enable_disable (am, sw_if_index, 0); } } + clib_mem_set_heap (oldheap); return rv; } @@ -772,6 +813,7 @@ acl_interface_reset_inout_acls (u32 sw_if_index, u8 is_input) { acl_main_t *am = &acl_main; int i; + void *oldheap = acl_set_heap(am); if (is_input) { vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); @@ -812,6 +854,7 @@ acl_interface_reset_inout_acls (u32 sw_if_index, u8 is_input) vec_reset_length (am->output_acl_vec_by_sw_if_index[sw_if_index]); } + clib_mem_set_heap (oldheap); } static int @@ -820,6 +863,7 @@ acl_interface_add_del_inout_acl (u32 sw_if_index, u8 is_add, u8 is_input, { int rv = -1; acl_main_t *am = &acl_main; + void *oldheap = acl_set_heap(am); if (is_add) { rv = @@ -835,6 +879,7 @@ acl_interface_add_del_inout_acl (u32 sw_if_index, u8 is_add, u8 is_input, rv = acl_interface_del_inout_acl (sw_if_index, is_input, acl_list_index); } + clib_mem_set_heap (oldheap); return rv; } @@ -1015,6 +1060,7 @@ macip_create_classify_tables (acl_main_t * am, u32 macip_acl_index) macip_find_match_type (mvec, a->rules[i].src_mac_mask, a->rules[i].src_prefixlen, a->rules[i].is_ipv6); + ASSERT(match_type_index != ~0); /* add session to table mvec[match_type_index].table_index; */ vnet_classify_add_del_session (cm, mvec[match_type_index].table_index, mask, a->rules[i].is_permit ? ~0 : 0, i, @@ -1066,17 +1112,15 @@ macip_acl_add_list (u32 count, vl_api_macip_acl_rule_t rules[], acl_main_t *am = &acl_main; macip_acl_list_t *a; macip_acl_rule_t *r; - macip_acl_rule_t *acl_new_rules; + macip_acl_rule_t *acl_new_rules = 0; int i; - + if (0 == count) { + clib_warning("acl-plugin-warning: Trying to create empty MACIP ACL (tag %s)", tag); + } + void *oldheap = acl_set_heap(am); /* Create and populate the rules */ - acl_new_rules = clib_mem_alloc_aligned (sizeof (macip_acl_rule_t) * count, - CLIB_CACHE_LINE_BYTES); - if (!acl_new_rules) - { - /* Could not allocate rules. New or existing ACL - bail out regardless */ - return -1; - } + if (count > 0) + vec_validate(acl_new_rules, count-1); for (i = 0; i < count; i++) { @@ -1104,7 +1148,7 @@ macip_acl_add_list (u32 count, vl_api_macip_acl_rule_t rules[], /* Create and populate the classifer tables */ macip_create_classify_tables (am, *acl_list_index); - + clib_mem_set_heap (oldheap); return 0; } @@ -1117,7 +1161,9 @@ macip_acl_interface_del_acl (acl_main_t * am, u32 sw_if_index) int rv; u32 macip_acl_index; macip_acl_list_t *a; + void *oldheap = acl_set_heap(am); vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0); + clib_mem_set_heap (oldheap); macip_acl_index = am->macip_acl_by_sw_if_index[sw_if_index]; /* No point in deleting MACIP ACL which is not applied */ if (~0 == macip_acl_index) @@ -1144,12 +1190,15 @@ macip_acl_interface_add_acl (acl_main_t * am, u32 sw_if_index, { return -1; } + void *oldheap = acl_set_heap(am); a = &am->macip_acls[macip_acl_index]; vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0); /* If there already a MACIP ACL applied, unapply it */ if (~0 != am->macip_acl_by_sw_if_index[sw_if_index]) macip_acl_interface_del_acl(am, sw_if_index); am->macip_acl_by_sw_if_index[sw_if_index] = macip_acl_index; + clib_mem_set_heap (oldheap); + /* Apply the classifier tables for L2 ACLs */ rv = vnet_set_input_acl_intfc (am->vlib_main, sw_if_index, a->ip4_table_index, @@ -1161,6 +1210,7 @@ static int macip_acl_del_list (u32 acl_list_index) { acl_main_t *am = &acl_main; + void *oldheap = acl_set_heap(am); macip_acl_list_t *a; int i; if (pool_is_free_index (am->macip_acls, acl_list_index)) @@ -1184,9 +1234,10 @@ macip_acl_del_list (u32 acl_list_index) a = &am->macip_acls[acl_list_index]; if (a->rules) { - clib_mem_free (a->rules); + vec_free (a->rules); } pool_put (am->macip_acls, a); + clib_mem_set_heap (oldheap); return 0; } @@ -1196,6 +1247,7 @@ macip_acl_interface_add_del_acl (u32 sw_if_index, u8 is_add, u32 acl_list_index) { acl_main_t *am = &acl_main; + void *oldheap = acl_set_heap(am); int rv = -1; if (is_add) { @@ -1205,6 +1257,7 @@ macip_acl_interface_add_del_acl (u32 sw_if_index, u8 is_add, { rv = macip_acl_interface_del_acl (am, sw_if_index); } + clib_mem_set_heap (oldheap); return rv; } @@ -1363,6 +1416,7 @@ send_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q, vl_api_acl_rule_t *rules; int i; int msg_size = sizeof (*mp) + sizeof (mp->r[0]) * acl->count; + void *oldheap = acl_set_heap(am); mp = vl_msg_api_alloc (msg_size); memset (mp, 0, msg_size); @@ -1381,6 +1435,7 @@ send_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q, } clib_warning("Sending acl details for ACL index %d", ntohl(mp->acl_index)); + clib_mem_set_heap (oldheap); vl_msg_api_send_shmem (q, (u8 *) & mp); } @@ -1439,6 +1494,7 @@ send_acl_interface_list_details (acl_main_t * am, int n_output; int count; int i = 0; + void *oldheap = acl_set_heap(am); vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); @@ -1469,7 +1525,7 @@ send_acl_interface_list_details (acl_main_t * am, mp->acls[n_input + i] = htonl (am->output_acl_vec_by_sw_if_index[sw_if_index][i]); } - + clib_mem_set_heap (oldheap); vl_msg_api_send_shmem (q, (u8 *) & mp); } @@ -1784,6 +1840,7 @@ static int acl_set_skip_ipv6_eh(u32 eh, u32 value) { acl_main_t *am = &acl_main; + if ((eh < 256) && (value < 2)) { am->fa_ipv6_known_eh_bitmap = clib_bitmap_set(am->fa_ipv6_known_eh_bitmap, eh, value); @@ -2104,6 +2161,21 @@ acl_show_aclplugin_fn (vlib_main_t * vm, vlib_cli_output(vm, "\n%s\n", out0); vec_free(out0); } + else if (unformat (input, "memory")) + { + vlib_cli_output (vm, "ACL plugin main heap statistics:\n"); + if (am->acl_mheap) { + vlib_cli_output (vm, " %U\n", format_mheap, am->acl_mheap, 1); + } else { + vlib_cli_output (vm, " Not initialized\n"); + } + vlib_cli_output (vm, "ACL hash lookup support heap statistics:\n"); + if (am->hash_lookup_mheap) { + vlib_cli_output (vm, " %U\n", format_mheap, am->hash_lookup_mheap, 1); + } else { + vlib_cli_output (vm, " Not initialized\n"); + } + } else if (unformat (input, "tables")) { ace_mask_type_entry_t *mte; @@ -2198,9 +2270,9 @@ acl_show_aclplugin_fn (vlib_main_t * vm, out0 = format(out0, " input lookup applied entries:\n"); for(j=0; jinput_hash_entry_vec_by_sw_if_index[swi]); j++) { applied_hash_ace_entry_t *pae = &am->input_hash_entry_vec_by_sw_if_index[swi][j]; - out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d\n", + out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d tail %d\n", j, pae->acl_index, pae->ace_index, pae->action, pae->hash_ace_info_index, - pae->next_applied_entry_index, pae->prev_applied_entry_index); + pae->next_applied_entry_index, pae->prev_applied_entry_index, pae->tail_applied_entry_index); } } @@ -2212,9 +2284,9 @@ acl_show_aclplugin_fn (vlib_main_t * vm, out0 = format(out0, " output lookup applied entries:\n"); for(j=0; joutput_hash_entry_vec_by_sw_if_index[swi]); j++) { applied_hash_ace_entry_t *pae = &am->output_hash_entry_vec_by_sw_if_index[swi][j]; - out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d\n", + out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d tail %d\n", j, pae->acl_index, pae->ace_index, pae->action, pae->hash_ace_info_index, - pae->next_applied_entry_index, pae->prev_applied_entry_index); + pae->next_applied_entry_index, pae->prev_applied_entry_index, pae->tail_applied_entry_index); } } @@ -2288,6 +2360,7 @@ acl_init (vlib_main_t * vm) vec_free (name); acl_setup_fa_nodes(); + am->session_timeout_sec[ACL_TIMEOUT_TCP_TRANSIENT] = TCP_SESSION_TRANSIENT_TIMEOUT_SEC; am->session_timeout_sec[ACL_TIMEOUT_TCP_IDLE] = TCP_SESSION_IDLE_TIMEOUT_SEC; am->session_timeout_sec[ACL_TIMEOUT_UDP_IDLE] = UDP_SESSION_IDLE_TIMEOUT_SEC; @@ -2329,7 +2402,6 @@ acl_init (vlib_main_t * vm) am->l4_match_nonfirst_fragment = 1; /* use the new fancy hash-based matching */ - // NOT IMMEDIATELY am->use_hash_acl_matching = 1; return error; diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index b84ed7a4..14c6129c 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -122,12 +122,18 @@ typedef struct } ace_mask_type_entry_t; typedef struct { + /* mheap to hold all the ACL module related allocations, other than hash */ + void *acl_mheap; + /* API message ID base */ u16 msg_id_base; acl_list_t *acls; /* Pool of ACLs */ hash_acl_info_t *hash_acl_infos; /* corresponding hash matching housekeeping info */ clib_bihash_48_8_t acl_lookup_hash; /* ACL lookup hash table. */ + + /* mheap to hold all the miscellaneous allocations related to hash-based lookups */ + void *hash_lookup_mheap; int acl_lookup_hash_initialized; applied_hash_ace_entry_t **input_hash_entry_vec_by_sw_if_index; applied_hash_ace_entry_t **output_hash_entry_vec_by_sw_if_index; diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index 3181a22c..74079a2d 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -621,14 +621,14 @@ acl_fa_verify_init_sessions (acl_main_t * am) static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; - fa_session_t *sess = pw->fa_sessions_pool + session_index; + fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool, session_index) ? 0 : pool_elt_at_index(pw->fa_sessions_pool, session_index); return sess; } static inline int is_valid_session_ptr(acl_main_t *am, u16 thread_index, fa_session_t *sess) { acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; - return ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool)); + return ((sess != 0) && ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool))); } static void @@ -731,6 +731,7 @@ acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, static void acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t sess_id) { + void *oldheap = clib_mem_set_heap(am->acl_mheap); fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index); ASSERT(sess->thread_index == os_get_thread_index ()); BV (clib_bihash_add_del) (&am->fa_sessions_hash, @@ -740,6 +741,7 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t se /* Deleting from timer structures not needed, as the caller must have dealt with the timers. */ vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index); + clib_mem_set_heap (oldheap); pw->fa_session_dels_by_sw_if_index[sw_if_index]++; clib_smp_atomic_add(&am->fa_session_total_dels, 1); } @@ -877,6 +879,7 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, clib_bihash_kv_40_8_t kv; fa_full_session_id_t f_sess_id; uword thread_index = os_get_thread_index(); + void *oldheap = clib_mem_set_heap(am->acl_mheap); acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; f_sess_id.thread_index = thread_index; @@ -909,6 +912,7 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, acl_fa_conn_list_add_session(am, f_sess_id, now); vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index); + clib_mem_set_heap (oldheap); pw->fa_session_adds_by_sw_if_index[sw_if_index]++; clib_smp_atomic_add(&am->fa_session_total_adds, 1); } @@ -1616,8 +1620,10 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) if (enable_disable) { acl_fa_verify_init_sessions(am); am->fa_total_enabled_count++; + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, ACL_FA_CLEANER_RESCHEDULE, 0); + clib_mem_set_heap (oldheap); } else { am->fa_total_enabled_count--; } @@ -1625,10 +1631,12 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) if (is_input) { ASSERT(clib_bitmap_get(am->fa_in_acl_on_sw_if_index, sw_if_index) != enable_disable); + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa", sw_if_index, enable_disable, 0, 0); vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa", sw_if_index, enable_disable, 0, 0); + clib_mem_set_heap (oldheap); am->fa_in_acl_on_sw_if_index = clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index, enable_disable); @@ -1636,10 +1644,12 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) else { ASSERT(clib_bitmap_get(am->fa_out_acl_on_sw_if_index, sw_if_index) != enable_disable); + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa", sw_if_index, enable_disable, 0, 0); vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa", sw_if_index, enable_disable, 0, 0); + clib_mem_set_heap (oldheap); am->fa_out_acl_on_sw_if_index = clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index, enable_disable); @@ -1650,9 +1660,11 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) #ifdef FA_NODE_VERBOSE_DEBUG clib_warning("ENABLE-DISABLE: clean the connections on interface %d", sw_if_index); #endif + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, sw_if_index); + clib_mem_set_heap (oldheap); } } diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c index 027dabd0..a337eb84 100644 --- a/src/plugins/acl/hash_lookup.c +++ b/src/plugins/acl/hash_lookup.c @@ -33,6 +33,16 @@ #include "hash_lookup.h" #include "hash_lookup_private.h" + +static inline applied_hash_ace_entry_t **get_applied_hash_aces(acl_main_t *am, int is_input, u32 sw_if_index) +{ + applied_hash_ace_entry_t **applied_hash_aces = is_input ? vec_elt_at_index(am->input_hash_entry_vec_by_sw_if_index, sw_if_index) + : vec_elt_at_index(am->output_hash_entry_vec_by_sw_if_index, sw_if_index); + return applied_hash_aces; +} + + + /* * This returns true if there is indeed a match on the portranges. * With all these levels of indirections, this is not going to be very fast, @@ -41,11 +51,10 @@ static int match_portranges(acl_main_t *am, fa_5tuple_t *match, u32 index) { - applied_hash_ace_entry_t **applied_hash_aces = match->pkt.is_input ? &am->input_hash_entry_vec_by_sw_if_index[match->pkt.sw_if_index] : - &am->output_hash_entry_vec_by_sw_if_index[match->pkt.sw_if_index]; - applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[index]); - // hash_acl_info_t *ha = &am->hash_acl_infos[pae->acl_index]; + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, match->pkt.is_input, match->pkt.sw_if_index); + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), index); + acl_rule_t *r = &(am->acls[pae->acl_index].rules[pae->ace_index]); DBG("PORTMATCH: %d <= %d <= %d && %d <= %d <= %d ?", r->src_port_or_type_first, match->l4.port[0], r->src_port_or_type_last, @@ -70,8 +79,7 @@ multi_acl_match_get_applied_ace_index(acl_main_t *am, fa_5tuple_t *match) u32 sw_if_index = match->pkt.sw_if_index; u8 is_input = match->pkt.is_input; - applied_hash_ace_entry_t **applied_hash_aces = is_input ? &am->input_hash_entry_vec_by_sw_if_index[sw_if_index] : - &am->output_hash_entry_vec_by_sw_if_index[sw_if_index]; + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index); applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index : &am->output_applied_hash_acl_info_by_sw_if_index; @@ -79,11 +87,11 @@ multi_acl_match_get_applied_ace_index(acl_main_t *am, fa_5tuple_t *match) pmatch[0], pmatch[1], pmatch[2], pmatch[3], pmatch[4], pmatch[5]); for(mask_type_index=0; mask_type_index < pool_len(am->ace_mask_type_pool); mask_type_index++) { - if (!clib_bitmap_get((*applied_hash_acls)[sw_if_index].mask_type_index_bitmap, mask_type_index)) { + if (!clib_bitmap_get(vec_elt_at_index((*applied_hash_acls), sw_if_index)->mask_type_index_bitmap, mask_type_index)) { /* This bit is not set. Avoid trying to match */ continue; } - ace_mask_type_entry_t *mte = &am->ace_mask_type_pool[mask_type_index]; + ace_mask_type_entry_t *mte = vec_elt_at_index(am->ace_mask_type_pool, mask_type_index); pmatch = (u64 *)match; pmask = (u64 *)&mte->mask; pkey = (u64 *)kv.key; @@ -120,7 +128,7 @@ multi_acl_match_get_applied_ace_index(acl_main_t *am, fa_5tuple_t *match) u32 curr_index = result_val->applied_entry_index; while ((curr_index != ~0) && !match_portranges(am, match, curr_index)) { /* while no match and there are more entries, walk... */ - applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[curr_index]); + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces),curr_index); DBG("entry %d did not portmatch, advancing to %d", curr_index, pae->next_applied_entry_index); curr_index = pae->next_applied_entry_index; } @@ -153,9 +161,9 @@ multi_acl_match_get_applied_ace_index(acl_main_t *am, fa_5tuple_t *match) static void hashtable_add_del(acl_main_t *am, clib_bihash_kv_48_8_t *kv, int is_add) { - DBG("HASH ADD/DEL: %016llx %016llx %016llx %016llx %016llx %016llx add %d", + DBG("HASH ADD/DEL: %016llx %016llx %016llx %016llx %016llx %016llx %016llx add %d", kv->key[0], kv->key[1], kv->key[2], - kv->key[3], kv->key[4], kv->key[5], is_add); + kv->key[3], kv->key[4], kv->key[5], kv->value, is_add); BV (clib_bihash_add_del) (&am->acl_lookup_hash, kv, is_add); } @@ -167,17 +175,17 @@ fill_applied_hash_ace_kv(acl_main_t *am, { fa_5tuple_t *kv_key = (fa_5tuple_t *)kv->key; hash_acl_lookup_value_t *kv_val = (hash_acl_lookup_value_t *)&kv->value; - applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[new_index]); - hash_acl_info_t *ha = &am->hash_acl_infos[pae->acl_index]; + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index); + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, pae->acl_index); - memcpy(kv_key, &ha->rules[pae->hash_ace_info_index].match, sizeof(*kv_key)); + memcpy(kv_key, &(vec_elt_at_index(ha->rules, pae->hash_ace_info_index)->match), sizeof(*kv_key)); /* initialize the sw_if_index and direction */ kv_key->pkt.sw_if_index = sw_if_index; kv_key->pkt.is_input = is_input; kv_val->as_u64 = 0; kv_val->applied_entry_index = new_index; - kv_val->need_portrange_check = ha->rules[pae->hash_ace_info_index].src_portrange_not_powerof2 || - ha->rules[pae->hash_ace_info_index].dst_portrange_not_powerof2; + kv_val->need_portrange_check = vec_elt_at_index(ha->rules, pae->hash_ace_info_index)->src_portrange_not_powerof2 || + vec_elt_at_index(ha->rules, pae->hash_ace_info_index)->dst_portrange_not_powerof2; /* by default assume all values are shadowed -> check all mask types */ kv_val->shadowed = 1; } @@ -203,7 +211,9 @@ activate_applied_ace_hash_entry(acl_main_t *am, u32 new_index) { clib_bihash_kv_48_8_t kv; - applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[new_index]); + ASSERT(new_index != ~0); + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index); + DBG("activate_applied_ace_hash_entry sw_if_index %d is_input %d new_index %d", sw_if_index, is_input, new_index); fill_applied_hash_ace_kv(am, applied_hash_aces, sw_if_index, is_input, new_index, &kv); @@ -214,28 +224,28 @@ activate_applied_ace_hash_entry(acl_main_t *am, clib_bihash_kv_48_8_t result; hash_acl_lookup_value_t *result_val = (hash_acl_lookup_value_t *)&result.value; int res = BV (clib_bihash_search) (&am->acl_lookup_hash, &kv, &result); + ASSERT(new_index != ~0); + ASSERT(new_index < vec_len((*applied_hash_aces))); if (res == 0) { /* There already exists an entry or more. Append at the end. */ u32 first_index = result_val->applied_entry_index; - DBG("A key already exists, with applied entry index: %d", existing_index); ASSERT(first_index != ~0); - applied_hash_ace_entry_t *first_pae = &((*applied_hash_aces)[first_index]); - /* move to "prev" by one, this should land us in the end of the list */ - u32 last_index = first_pae->prev_applied_entry_index; + DBG("A key already exists, with applied entry index: %d", first_index); + applied_hash_ace_entry_t *first_pae = vec_elt_at_index((*applied_hash_aces), first_index); + u32 last_index = first_pae->tail_applied_entry_index; ASSERT(last_index != ~0); - applied_hash_ace_entry_t *last_pae = &((*applied_hash_aces)[last_index]); - ASSERT(last_pae->next_applied_entry_index == ~0); + applied_hash_ace_entry_t *last_pae = vec_elt_at_index((*applied_hash_aces), last_index); + DBG("...advance to chained entry index: %d", last_index); /* link ourseves in */ last_pae->next_applied_entry_index = new_index; pae->prev_applied_entry_index = last_index; - /* make a new reference from the very first element to new tail */ - first_pae->prev_applied_entry_index = new_index; + /* adjust the pointer to the new tail */ + first_pae->tail_applied_entry_index = new_index; } else { /* It's the very first entry */ hashtable_add_del(am, &kv, 1); - pae->is_first_entry = 1; - /* we are the tail */ - pae->prev_applied_entry_index = new_index; + ASSERT(new_index != ~0); + pae->tail_applied_entry_index = new_index; } } @@ -254,31 +264,44 @@ applied_hash_entries_analyze(acl_main_t *am, applied_hash_ace_entry_t **applied_ */ } +static void * +hash_acl_set_heap(acl_main_t *am) +{ + if (0 == am->hash_lookup_mheap) { + am->hash_lookup_mheap = mheap_alloc (0 /* use VM */ , 2 << 25); + } + void *oldheap = clib_mem_set_heap(am->hash_lookup_mheap); + return oldheap; +} + void hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) { int i; DBG("HASH ACL apply: sw_if_index %d is_input %d acl %d", sw_if_index, is_input, acl_index); - u32 *acl_vec = is_input ? am->input_acl_vec_by_sw_if_index[sw_if_index] : - am->output_acl_vec_by_sw_if_index[sw_if_index]; + if (!am->acl_lookup_hash_initialized) { + BV (clib_bihash_init) (&am->acl_lookup_hash, "ACL plugin rule lookup bihash", + 65536, 2 << 25); + am->acl_lookup_hash_initialized = 1; + } + + u32 *acl_vec = is_input ? *vec_elt_at_index(am->input_acl_vec_by_sw_if_index, sw_if_index) + : *vec_elt_at_index(am->output_acl_vec_by_sw_if_index, sw_if_index); + + void *oldheap = hash_acl_set_heap(am); if (is_input) { vec_validate(am->input_hash_entry_vec_by_sw_if_index, sw_if_index); } else { vec_validate(am->output_hash_entry_vec_by_sw_if_index, sw_if_index); } vec_validate(am->hash_acl_infos, acl_index); - applied_hash_ace_entry_t **applied_hash_aces = is_input ? &am->input_hash_entry_vec_by_sw_if_index[sw_if_index] : - &am->output_hash_entry_vec_by_sw_if_index[sw_if_index]; + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index); + u32 order_index = vec_search(acl_vec, acl_index); - hash_acl_info_t *ha = &am->hash_acl_infos[acl_index]; + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index); ASSERT(order_index != ~0); - if (!am->acl_lookup_hash_initialized) { - BV (clib_bihash_init) (&am->acl_lookup_hash, "ACL plugin rule lookup bihash", - 65536, 2 << 25); - am->acl_lookup_hash_initialized = 1; - } int base_offset = vec_len(*applied_hash_aces); /* Update the bitmap of the mask types with which the lookup @@ -286,7 +309,7 @@ hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index : &am->output_applied_hash_acl_info_by_sw_if_index; vec_validate((*applied_hash_acls), sw_if_index); - applied_hash_acl_info_t *pal = &(*applied_hash_acls)[sw_if_index]; + applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), sw_if_index); pal->mask_type_index_bitmap = clib_bitmap_or(pal->mask_type_index_bitmap, ha->mask_type_index_bitmap); /* @@ -305,8 +328,7 @@ hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) /* add the rules from the ACL to the hash table for lookup and append to the vector*/ for(i=0; i < vec_len(ha->rules); i++) { u32 new_index = base_offset + i; - applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[new_index]); - pae->is_first_entry = 0; + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index); pae->acl_index = acl_index; pae->ace_index = ha->rules[i].ace_index; pae->action = ha->rules[i].action; @@ -314,9 +336,29 @@ hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) /* we might link it in later */ pae->next_applied_entry_index = ~0; pae->prev_applied_entry_index = ~0; + pae->tail_applied_entry_index = ~0; activate_applied_ace_hash_entry(am, sw_if_index, is_input, applied_hash_aces, new_index); } applied_hash_entries_analyze(am, applied_hash_aces); + clib_mem_set_heap (oldheap); +} + +static u32 +find_head_applied_ace_index(applied_hash_ace_entry_t **applied_hash_aces, u32 curr_index) +{ + /* + * find back the first entry. Inefficient so might need to be a bit cleverer + * if this proves to be a problem.. + */ + u32 an_index = curr_index; + ASSERT(an_index != ~0); + applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), an_index); + while(head_pae->prev_applied_entry_index != ~0) { + an_index = head_pae->prev_applied_entry_index; + ASSERT(an_index != ~0); + head_pae = vec_elt_at_index((*applied_hash_aces), an_index); + } + return an_index; } static void @@ -325,43 +367,43 @@ move_applied_ace_hash_entry(acl_main_t *am, applied_hash_ace_entry_t **applied_hash_aces, u32 old_index, u32 new_index) { + ASSERT(old_index != ~0); + ASSERT(new_index != ~0); /* move the entry */ - (*applied_hash_aces)[new_index] = (*applied_hash_aces)[old_index]; + *vec_elt_at_index((*applied_hash_aces), new_index) = *vec_elt_at_index((*applied_hash_aces), old_index); /* update the linkage and hash table if necessary */ - applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[old_index]); + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), old_index); - if (!pae->is_first_entry) { - applied_hash_ace_entry_t *prev_pae = &((*applied_hash_aces)[pae->prev_applied_entry_index]); + if (pae->prev_applied_entry_index != ~0) { + applied_hash_ace_entry_t *prev_pae = vec_elt_at_index((*applied_hash_aces), pae->prev_applied_entry_index); ASSERT(prev_pae->next_applied_entry_index == old_index); prev_pae->next_applied_entry_index = new_index; } else { /* first entry - so the hash points to it, update */ add_del_hashtable_entry(am, sw_if_index, is_input, applied_hash_aces, new_index, 1); + ASSERT(pae->tail_applied_entry_index != ~0); } if (pae->next_applied_entry_index != ~0) { - applied_hash_ace_entry_t *next_pae = &((*applied_hash_aces)[pae->next_applied_entry_index]); + applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), pae->next_applied_entry_index); ASSERT(next_pae->prev_applied_entry_index == old_index); next_pae->prev_applied_entry_index = new_index; } else { /* * Moving the very last entry, so we need to update the tail pointer in the first one. - * find back the first entry. Inefficient so might need to be a bit cleverer - * if this proves to be a problem.. */ - u32 an_index = pae->prev_applied_entry_index; - applied_hash_ace_entry_t *head_pae = &((*applied_hash_aces)[pae->prev_applied_entry_index]); - while(!head_pae->is_first_entry) { - an_index = head_pae->prev_applied_entry_index; - head_pae = &((*applied_hash_aces)[an_index]); - } - ASSERT(head_pae->prev_applied_entry_index == old_index); - head_pae->prev_applied_entry_index = new_index; + u32 head_index = find_head_applied_ace_index(applied_hash_aces, old_index); + ASSERT(head_index != ~0); + applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), head_index); + + ASSERT(head_pae->tail_applied_entry_index == old_index); + head_pae->tail_applied_entry_index = new_index; } /* invalidate the old entry */ pae->prev_applied_entry_index = ~0; pae->next_applied_entry_index = ~0; + pae->tail_applied_entry_index = ~0; } static void @@ -370,39 +412,37 @@ deactivate_applied_ace_hash_entry(acl_main_t *am, applied_hash_ace_entry_t **applied_hash_aces, u32 old_index) { - applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[old_index]); - - if (pae->next_applied_entry_index != ~0) { - applied_hash_ace_entry_t *next_pae = &((*applied_hash_aces)[pae->next_applied_entry_index]); - ASSERT(next_pae->prev_applied_entry_index == old_index); - next_pae->prev_applied_entry_index = pae->prev_applied_entry_index; - } + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), old_index); + DBG("UNAPPLY DEACTIVATE: sw_if_index %d is_input %d, applied index %d", sw_if_index, is_input, old_index); - if (!pae->is_first_entry) { - applied_hash_ace_entry_t *prev_pae = &((*applied_hash_aces)[pae->prev_applied_entry_index]); + if (pae->prev_applied_entry_index != ~0) { + DBG("UNAPPLY = index %d has prev_applied_entry_index %d", old_index, pae->prev_applied_entry_index); + applied_hash_ace_entry_t *prev_pae = vec_elt_at_index((*applied_hash_aces), pae->prev_applied_entry_index); ASSERT(prev_pae->next_applied_entry_index == old_index); prev_pae->next_applied_entry_index = pae->next_applied_entry_index; if (pae->next_applied_entry_index == ~0) { /* it was a last entry we removed, update the pointer on the first one */ - u32 an_index = pae->prev_applied_entry_index; - applied_hash_ace_entry_t *head_pae = &((*applied_hash_aces)[pae->prev_applied_entry_index]); - while(!head_pae->is_first_entry) { - an_index = head_pae->prev_applied_entry_index; - head_pae = &((*applied_hash_aces)[an_index]); - } - ASSERT(head_pae->prev_applied_entry_index == old_index); - head_pae->prev_applied_entry_index = pae->prev_applied_entry_index; + u32 head_index = find_head_applied_ace_index(applied_hash_aces, old_index); + DBG("UNAPPLY = index %d head index to update %d", old_index, head_index); + ASSERT(head_index != ~0); + applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), head_index); + + ASSERT(head_pae->tail_applied_entry_index == old_index); + head_pae->tail_applied_entry_index = pae->prev_applied_entry_index; + } else { + applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), pae->next_applied_entry_index); + next_pae->prev_applied_entry_index = pae->prev_applied_entry_index; } } else { /* It was the first entry. We need either to reset the hash entry or delete it */ - pae->is_first_entry = 0; if (pae->next_applied_entry_index != ~0) { - /* a custom case of relinking for the first node, with the tail not forward linked to it */ - applied_hash_ace_entry_t *next_pae = &((*applied_hash_aces)[pae->next_applied_entry_index]); - /* this is the tail the new head should be aware of */ - next_pae->prev_applied_entry_index = pae->prev_applied_entry_index; - next_pae->is_first_entry = 1; - + /* the next element becomes the new first one, so needs the tail pointer to be set */ + applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), pae->next_applied_entry_index); + ASSERT(pae->tail_applied_entry_index != ~0); + next_pae->tail_applied_entry_index = pae->tail_applied_entry_index; + DBG("Resetting the hash table entry from %d to %d, setting tail index to %d", old_index, pae->next_applied_entry_index, pae->tail_applied_entry_index); + /* unlink from the next element */ + next_pae->prev_applied_entry_index = ~0; add_del_hashtable_entry(am, sw_if_index, is_input, applied_hash_aces, pae->next_applied_entry_index, 1); } else { @@ -411,6 +451,10 @@ deactivate_applied_ace_hash_entry(acl_main_t *am, applied_hash_aces, old_index, 0); } } + /* invalidate the old entry */ + pae->prev_applied_entry_index = ~0; + pae->next_applied_entry_index = ~0; + pae->tail_applied_entry_index = ~0; } @@ -419,14 +463,14 @@ hash_acl_build_applied_lookup_bitmap(acl_main_t *am, u32 sw_if_index, u8 is_inpu { int i; uword *new_lookup_bitmap = 0; - u32 **applied_acls = is_input ? &am->input_acl_vec_by_sw_if_index[sw_if_index] : - &am->output_acl_vec_by_sw_if_index[sw_if_index]; - applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index : - &am->output_applied_hash_acl_info_by_sw_if_index; - applied_hash_acl_info_t *pal = &(*applied_hash_acls)[sw_if_index]; + u32 **applied_acls = is_input ? vec_elt_at_index(am->input_acl_vec_by_sw_if_index, sw_if_index) + : vec_elt_at_index(am->output_acl_vec_by_sw_if_index, sw_if_index); + applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index + : &am->output_applied_hash_acl_info_by_sw_if_index; + applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), sw_if_index); for(i=0; i < vec_len(*applied_acls); i++) { - u32 a_acl_index = (*applied_acls)[i]; - hash_acl_info_t *ha = &am->hash_acl_infos[a_acl_index]; + u32 a_acl_index = *vec_elt_at_index((*applied_acls), i); + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, a_acl_index); new_lookup_bitmap = clib_bitmap_or(new_lookup_bitmap, ha->mask_type_index_bitmap); } @@ -441,12 +485,12 @@ hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) int i; DBG("HASH ACL unapply: sw_if_index %d is_input %d acl %d", sw_if_index, is_input, acl_index); - hash_acl_info_t *ha = &am->hash_acl_infos[acl_index]; - applied_hash_ace_entry_t **applied_hash_aces = is_input ? &am->input_hash_entry_vec_by_sw_if_index[sw_if_index] : - &am->output_hash_entry_vec_by_sw_if_index[sw_if_index]; + + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index); + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index); for(i=0; i < vec_len((*applied_hash_aces)); i++) { - if ((*applied_hash_aces)[i].acl_index == acl_index) { + if (vec_elt_at_index(*applied_hash_aces,i)->acl_index == acl_index) { DBG("Found applied ACL#%d at applied index %d", acl_index, i); break; } @@ -456,13 +500,14 @@ hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) /* we went all the way without finding any entries. Probably a list was empty. */ return; } + + void *oldheap = hash_acl_set_heap(am); int base_offset = i; int tail_offset = base_offset + vec_len(ha->rules); int tail_len = vec_len((*applied_hash_aces)) - tail_offset; DBG("base_offset: %d, tail_offset: %d, tail_len: %d", base_offset, tail_offset, tail_len); for(i=0; i < vec_len(ha->rules); i ++) { - DBG("UNAPPLY DEACTIVATE: sw_if_index %d is_input %d, applied index %d", sw_if_index, is_input, base_offset + i); deactivate_applied_ace_hash_entry(am, sw_if_index, is_input, applied_hash_aces, base_offset + i); } @@ -479,6 +524,7 @@ hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) /* After deletion we might not need some of the mask-types anymore... */ hash_acl_build_applied_lookup_bitmap(am, sw_if_index, is_input); + clib_mem_set_heap (oldheap); } /* @@ -493,8 +539,8 @@ hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) void hash_acl_reapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) { - u32 **applied_acls = is_input ? &am->input_acl_vec_by_sw_if_index[sw_if_index] : - &am->output_acl_vec_by_sw_if_index[sw_if_index]; + u32 **applied_acls = is_input ? vec_elt_at_index(am->input_acl_vec_by_sw_if_index, sw_if_index) + : vec_elt_at_index(am->output_acl_vec_by_sw_if_index, sw_if_index); int i; int start_index = vec_search((*applied_acls), acl_index); /* @@ -505,10 +551,10 @@ hash_acl_reapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) /* unapply all the ACLs till the current one */ for(i = vec_len(*applied_acls) - 1; i >= start_index; i--) { - hash_acl_unapply(am, sw_if_index, is_input, (*applied_acls)[i]); + hash_acl_unapply(am, sw_if_index, is_input, *vec_elt_at_index(*applied_acls, i)); } for(i = start_index; i < vec_len(*applied_acls); i++) { - hash_acl_apply(am, sw_if_index, is_input, (*applied_acls)[i]); + hash_acl_apply(am, sw_if_index, is_input, *vec_elt_at_index(*applied_acls, i)); } } @@ -649,7 +695,7 @@ assign_mask_type_index(acl_main_t *am, fa_5tuple_t *mask) static void release_mask_type_index(acl_main_t *am, u32 mask_type_index) { - ace_mask_type_entry_t *mte = &am->ace_mask_type_pool[mask_type_index]; + ace_mask_type_entry_t *mte = pool_elt_at_index(am->ace_mask_type_pool, mask_type_index); mte->refcount--; if (mte->refcount == 0) { /* we are not using this entry anymore */ @@ -659,11 +705,12 @@ release_mask_type_index(acl_main_t *am, u32 mask_type_index) void hash_acl_add(acl_main_t *am, int acl_index) { + void *oldheap = hash_acl_set_heap(am); DBG("HASH ACL add : %d", acl_index); int i; acl_list_t *a = &am->acls[acl_index]; vec_validate(am->hash_acl_infos, acl_index); - hash_acl_info_t *ha = &am->hash_acl_infos[acl_index]; + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index); memset(ha, 0, sizeof(*ha)); /* walk the newly added ACL entries and ensure that for each of them there @@ -710,10 +757,12 @@ void hash_acl_add(acl_main_t *am, int acl_index) hash_acl_reapply(am, *sw_if_index, 0, acl_index); } } + clib_mem_set_heap (oldheap); } void hash_acl_delete(acl_main_t *am, int acl_index) { + void *oldheap = hash_acl_set_heap(am); DBG("HASH ACL delete : %d", acl_index); /* * If the ACL is applied somewhere, remove the references of it (call hash_acl_unapply) @@ -739,12 +788,13 @@ void hash_acl_delete(acl_main_t *am, int acl_index) /* walk the mask types for the ACL about-to-be-deleted, and decrease * the reference count, possibly freeing up some of them */ int i; - hash_acl_info_t *ha = &am->hash_acl_infos[acl_index]; + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index); for(i=0; i < vec_len(ha->rules); i++) { release_mask_type_index(am, ha->rules[i].mask_type_index); } clib_bitmap_free(ha->mask_type_index_bitmap); vec_free(ha->rules); + clib_mem_set_heap (oldheap); } u8 @@ -753,11 +803,10 @@ hash_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l u32 * rule_match_p, u32 * trace_bitmap) { acl_main_t *am = &acl_main; - applied_hash_ace_entry_t **applied_hash_aces = is_input ? &am->input_hash_entry_vec_by_sw_if_index[sw_if_index] : - &am->output_hash_entry_vec_by_sw_if_index[sw_if_index]; + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index); u32 match_index = multi_acl_match_get_applied_ace_index(am, pkt_5tuple); if (match_index < vec_len((*applied_hash_aces))) { - applied_hash_ace_entry_t *pae = &((*applied_hash_aces)[match_index]); + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), match_index); *acl_match_p = pae->acl_index; *rule_match_p = pae->ace_index; return pae->action; diff --git a/src/plugins/acl/hash_lookup_types.h b/src/plugins/acl/hash_lookup_types.h index 1848c5ff..fbc9777b 100644 --- a/src/plugins/acl/hash_lookup_types.h +++ b/src/plugins/acl/hash_lookup_types.h @@ -53,14 +53,14 @@ typedef struct { */ u32 next_applied_entry_index; /* - * previous entry in the ring list of the chained ones. + * previous entry in the list of the chained ones, + * if ~0 then this is entry in the hash. */ u32 prev_applied_entry_index; /* - * 1 if it is the very first entry in the list, - * referenced from the hash. + * chain tail, if this is the first entry */ - u8 is_first_entry; + u32 tail_applied_entry_index; /* * Action of this applied ACE */ -- cgit 1.2.3-korg From 71bb05454c97cc18e3c95127b54385c52e4c57c2 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Wed, 16 Aug 2017 12:06:15 +0200 Subject: acl-plugin: time out the sessions created by main thread too (VPP-948) In multithread setup the main thread may send packets, which may pass through the node with permit+reflect action. This creates the connection in lists for thread0, however in multithread there are no interupt handlers there. Ensure we are not spending too much time spinning in a tight cycle by suspending the main cleaner thread until the current iteration of interrupts is processed. Change-Id: Idb7346737757ee9a67b5d3e549bc9ad9aab22e89 Signed-off-by: Andrew Yourtchenko (cherry picked from commit c1ff53f25d04ec1cc31844abd38014e91e398b5f) --- src/plugins/acl/acl.c | 2 ++ src/plugins/acl/acl.h | 2 ++ src/plugins/acl/fa_node.c | 33 ++++++++++++++++++++------------- src/plugins/acl/fa_node.h | 4 ++++ 4 files changed, 28 insertions(+), 13 deletions(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 645c6c94..a2f85d9a 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -2242,6 +2242,7 @@ acl_show_aclplugin_fn (vlib_main_t * vm, out0 = format(out0, " interrupt is pending: %d\n", pw->interrupt_is_pending); out0 = format(out0, " interrupt is needed: %d\n", pw->interrupt_is_needed); out0 = format(out0, " interrupt is unwanted: %d\n", pw->interrupt_is_unwanted); + out0 = format(out0, " interrupt generation: %d\n", pw->interrupt_generation); } out0 = format(out0, "\n\nConn cleaner thread counters:\n"); #define _(cnt, desc) out0 = format(out0, " %20lu: %s\n", am->cnt, desc); @@ -2249,6 +2250,7 @@ acl_show_aclplugin_fn (vlib_main_t * vm, #undef _ vec_terminate_c_string(out0); vlib_cli_output(vm, "\n\n%s\n\n", out0); + vlib_cli_output(vm, "Interrupt generation: %d\n", am->fa_interrupt_generation); vlib_cli_output(vm, "Sessions per interval: min %lu max %lu increment: %f ms current: %f ms", am->fa_min_deleted_sessions_per_interval, am->fa_max_deleted_sessions_per_interval, am->fa_cleaner_wait_time_increment * 1000.0, ((f64)am->fa_current_cleaner_timer_wait_interval) * 1000.0/(f64)vm->clib_time.clocks_per_second); diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index 14c6129c..dcb4e0e9 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -230,6 +230,8 @@ typedef struct { u64 fa_current_cleaner_timer_wait_interval; + int fa_interrupt_generation; + /* per-worker data related t conn management */ acl_fa_per_worker_data_t *per_worker_data; diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index 1eeaa8c8..8d8f1d2e 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -1361,6 +1361,7 @@ acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, pw->interrupt_is_unwanted = 0; } } + pw->interrupt_generation = am->fa_interrupt_generation; return 0; } @@ -1404,7 +1405,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval; am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index; - + am->fa_interrupt_generation = 1; while (1) { now = clib_cpu_time_now (); @@ -1503,10 +1504,6 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U", format_bitmap_hex, clear_sw_if_index_bitmap); #endif vec_foreach(pw0, am->per_worker_data) { - if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) { - /* thread 0 in multithreaded scenario is not used */ - continue; - } CLIB_MEMORY_BARRIER (); while (pw0->clear_in_process) { CLIB_MEMORY_BARRIER (); @@ -1541,10 +1538,6 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, clib_warning("CLEANER mains len: %d per-worker len: %d", vec_len(vlib_mains), vec_len(am->per_worker_data)); #endif vec_foreach(pw0, am->per_worker_data) { - if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) { - /* thread 0 in multithreaded scenario is not used */ - continue; - } CLIB_MEMORY_BARRIER (); while (pw0->clear_in_process) { CLIB_MEMORY_BARRIER (); @@ -1581,15 +1574,28 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (event_data) _vec_len (event_data) = 0; + /* + * If the interrupts were not processed yet, ensure we wait a bit, + * but up to a point. + */ + int need_more_wait = 0; + int max_wait_cycles = 100; + do { + need_more_wait = 0; + vec_foreach(pw0, am->per_worker_data) { + if (pw0->interrupt_generation != am->fa_interrupt_generation) { + need_more_wait = 1; + } + } + if (need_more_wait) { + vlib_process_suspend(vm, 0.0001); + } + } while (need_more_wait && (--max_wait_cycles > 0)); int interrupts_needed = 0; int interrupts_unwanted = 0; vec_foreach(pw0, am->per_worker_data) { - if ((pw0 == am->per_worker_data) && (vec_len(vlib_mains) > 1)) { - /* thread 0 in multithreaded scenario is not used */ - continue; - } if (pw0->interrupt_is_needed) { interrupts_needed++; /* the per-worker value is reset when sending the interrupt */ @@ -1610,6 +1616,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment; } am->fa_cleaner_cnt_event_cycles++; + am->fa_interrupt_generation++; } /* NOT REACHED */ return 0; diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index a8dece4e..d5c766cd 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -147,6 +147,10 @@ typedef struct { * because there is not enough work for the current rate. */ int interrupt_is_unwanted; + /* + * Set to copy of a "generation" counter in main thread so we can sync the interrupts. + */ + int interrupt_generation; } acl_fa_per_worker_data_t; -- cgit 1.2.3-korg From beb50d425dc03e082c983fde4005897d69d54288 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Tue, 8 Aug 2017 16:43:38 +0200 Subject: acl-plugin: Recreate the bihash_40_8.h in the proper place Change-Id: I30a3df53bc5fe5ab991a657918eb502bd2913440 Signed-off-by: Andrew Yourtchenko --- src/plugins/acl/acl.h | 3 +- src/plugins/acl/bihash_40_8.h | 84 ----------------------------------------- src/plugins/acl/fa_node.c | 2 +- src/plugins/acl/fa_node.h | 2 +- src/vppinfra/bihash_40_8.h | 87 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 91 insertions(+), 87 deletions(-) delete mode 100644 src/plugins/acl/bihash_40_8.h create mode 100644 src/vppinfra/bihash_40_8.h (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index dcb4e0e9..26084a66 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -25,7 +25,8 @@ #include #include #include -#include "bihash_40_8.h" +#include + #include "fa_node.h" #include "hash_lookup_types.h" diff --git a/src/plugins/acl/bihash_40_8.h b/src/plugins/acl/bihash_40_8.h deleted file mode 100644 index 1dfb6a1e..00000000 --- a/src/plugins/acl/bihash_40_8.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#undef BIHASH_TYPE - -#define BIHASH_TYPE _40_8 -#define BIHASH_KVP_PER_PAGE 4 - -#ifndef __included_bihash_40_8_h__ -#define __included_bihash_40_8_h__ - -#include -#include -#include -#include -#include - -typedef struct -{ - u64 key[5]; - u64 value; -} clib_bihash_kv_40_8_t; - -static inline int -clib_bihash_is_free_40_8 (const clib_bihash_kv_40_8_t * v) -{ - /* Free values are memset to 0xff, check a bit... */ - if (v->key[0] == ~0ULL && v->value == ~0ULL) - return 1; - return 0; -} - -static inline u64 -clib_bihash_hash_40_8 (const clib_bihash_kv_40_8_t * v) -{ -#if __SSE4_2__ - return clib_crc32c ((u8 *) v->key, 40); -#else - u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4]; - return clib_xxhash (tmp); -#endif -} - -static inline u8 * -format_bihash_kvp_40_8 (u8 * s, va_list * args) -{ - clib_bihash_kv_40_8_t *v = va_arg (*args, clib_bihash_kv_40_8_t *); - - s = format (s, "key %llu %llu %llu %llu %llu value %llu", - v->key[0], v->key[1], v->key[2], v->key[3], v->key[4], - v->value); - return s; -} - -static inline int -clib_bihash_key_compare_40_8 (const u64 * a, const u64 * b) -{ - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) | - (a[4] ^ b[4])) == 0; -} - -#undef __included_bihash_template_h__ -#include - -#endif /* __included_bihash_40_8_h__ */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c index 8d8f1d2e..a4ba967d 100644 --- a/src/plugins/acl/fa_node.c +++ b/src/plugins/acl/fa_node.c @@ -20,7 +20,7 @@ #include #include #include -#include "bihash_40_8.h" +#include #include #include diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h index d5c766cd..fa9a2303 100644 --- a/src/plugins/acl/fa_node.h +++ b/src/plugins/acl/fa_node.h @@ -2,7 +2,7 @@ #define _FA_NODE_H_ #include -#include "bihash_40_8.h" +#include #define TCP_FLAG_FIN 0x01 #define TCP_FLAG_SYN 0x02 diff --git a/src/vppinfra/bihash_40_8.h b/src/vppinfra/bihash_40_8.h new file mode 100644 index 00000000..974a78d8 --- /dev/null +++ b/src/vppinfra/bihash_40_8.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#undef BIHASH_TYPE +#undef BIHASH_KVP_CACHE_SIZE +#undef BIHASH_KVP_PER_PAGE + +#define BIHASH_TYPE _40_8 +#define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_CACHE_SIZE 2 + +#ifndef __included_bihash_40_8_h__ +#define __included_bihash_40_8_h__ + +#include +#include +#include +#include +#include + +typedef struct +{ + u64 key[5]; + u64 value; +} clib_bihash_kv_40_8_t; + +static inline int +clib_bihash_is_free_40_8 (const clib_bihash_kv_40_8_t * v) +{ + /* Free values are memset to 0xff, check a bit... */ + if (v->key[0] == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +static inline u64 +clib_bihash_hash_40_8 (const clib_bihash_kv_40_8_t * v) +{ +#ifdef clib_crc32c_uses_intrinsics + return clib_crc32c ((u8 *) v->key, 40); +#else + u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4]; + return clib_xxhash (tmp); +#endif +} + +static inline u8 * +format_bihash_kvp_40_8 (u8 * s, va_list * args) +{ + clib_bihash_kv_40_8_t *v = va_arg (*args, clib_bihash_kv_40_8_t *); + + s = format (s, "key %llu %llu %llu %llu %llu value %llu", v->key[0], + v->key[1], v->key[2], v->key[3], v->key[4], v->value); + return s; +} + +static inline int +clib_bihash_key_compare_40_8 (const u64 * a, const u64 * b) +{ + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) + | (a[4] ^ b[4])) == 0; +} + +#undef __included_bihash_template_h__ +#include + +#endif /* __included_bihash_40_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From cada5a92faaf1dd2887df5ca200195011d2a0b8d Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Mon, 11 Sep 2017 17:22:03 +0200 Subject: acl-plugin: add startup-config section "acl-plugin" and heap/hash parameters This adds the ability to tweak the memory allocation parameters of the ACL plugin from the startup config. It may be useful in the cases involving higher limit of the connections than the default 1M, or the high number of cores. Change-Id: I2b6fb3f61126ff3ee998424b762b6aefe8fb1b8e Signed-off-by: Andrew Yourtchenko --- src/plugins/acl/acl.c | 50 +++++++++++++++++++++++++++++++++++++++++-- src/plugins/acl/acl.h | 10 +++++++++ src/plugins/acl/hash_lookup.c | 4 ++-- 3 files changed, 60 insertions(+), 4 deletions(-) (limited to 'src/plugins/acl/acl.h') diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c index 611efbb7..cdfe0682 100644 --- a/src/plugins/acl/acl.c +++ b/src/plugins/acl/acl.c @@ -91,7 +91,7 @@ static void * acl_set_heap(acl_main_t *am) { if (0 == am->acl_mheap) { - am->acl_mheap = mheap_alloc (0 /* use VM */ , 2 << 29); + am->acl_mheap = mheap_alloc (0 /* use VM */ , am->acl_mheap_size); mheap_t *h = mheap_header (am->acl_mheap); h->flags |= MHEAP_FLAG_THREAD_SAFE; } @@ -2596,7 +2596,47 @@ VLIB_CLI_COMMAND (aclplugin_clear_command, static) = { }; /* *INDENT-ON* */ - +static clib_error_t * +acl_plugin_config (vlib_main_t * vm, unformat_input_t * input) +{ + acl_main_t *am = &acl_main; + u32 conn_table_hash_buckets; + u32 conn_table_hash_memory_size; + u32 conn_table_max_entries; + u32 main_heap_size; + u32 hash_heap_size; + u32 hash_lookup_hash_buckets; + u32 hash_lookup_hash_memory; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "connection hash buckets %d", &conn_table_hash_buckets)) + am->fa_conn_table_hash_num_buckets = conn_table_hash_buckets; + else if (unformat (input, "connection hash memory %d", + &conn_table_hash_memory_size)) + am->fa_conn_table_hash_memory_size = conn_table_hash_memory_size; + else if (unformat (input, "connection count max %d", + &conn_table_max_entries)) + am->fa_conn_table_max_entries = conn_table_max_entries; + else if (unformat (input, "main heap size %d", + &main_heap_size)) + am->acl_mheap_size = main_heap_size; + else if (unformat (input, "hash lookup heap size %d", + &hash_heap_size)) + am->hash_lookup_mheap_size = hash_heap_size; + else if (unformat (input, "hash lookup hash buckets %d", + &hash_lookup_hash_buckets)) + am->hash_lookup_hash_buckets = hash_lookup_hash_buckets; + else if (unformat (input, "hash lookup hash memory %d", + &hash_lookup_hash_memory)) + am->hash_lookup_hash_memory = hash_lookup_hash_memory; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + return 0; +} +VLIB_CONFIG_FUNCTION (acl_plugin_config, "acl-plugin"); static clib_error_t * acl_init (vlib_main_t * vm) @@ -2622,6 +2662,12 @@ acl_init (vlib_main_t * vm) acl_setup_fa_nodes(); + am->acl_mheap_size = ACL_FA_DEFAULT_HEAP_SIZE; + am->hash_lookup_mheap_size = ACL_PLUGIN_HASH_LOOKUP_HEAP_SIZE; + + am->hash_lookup_hash_buckets = ACL_PLUGIN_HASH_LOOKUP_HASH_BUCKETS; + am->hash_lookup_hash_memory = ACL_PLUGIN_HASH_LOOKUP_HASH_MEMORY; + am->session_timeout_sec[ACL_TIMEOUT_TCP_TRANSIENT] = TCP_SESSION_TRANSIENT_TIMEOUT_SEC; am->session_timeout_sec[ACL_TIMEOUT_TCP_IDLE] = TCP_SESSION_IDLE_TIMEOUT_SEC; am->session_timeout_sec[ACL_TIMEOUT_UDP_IDLE] = UDP_SESSION_IDLE_TIMEOUT_SEC; diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h index 26084a66..bed22e5f 100644 --- a/src/plugins/acl/acl.h +++ b/src/plugins/acl/acl.h @@ -37,6 +37,12 @@ #define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24) #define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 120 +#define ACL_FA_DEFAULT_HEAP_SIZE (2 << 29) + +#define ACL_PLUGIN_HASH_LOOKUP_HEAP_SIZE (2 << 25) +#define ACL_PLUGIN_HASH_LOOKUP_HASH_BUCKETS 65536 +#define ACL_PLUGIN_HASH_LOOKUP_HASH_MEMORY (2 << 25) + extern vlib_node_registration_t acl_in_node; extern vlib_node_registration_t acl_out_node; @@ -125,6 +131,7 @@ typedef struct typedef struct { /* mheap to hold all the ACL module related allocations, other than hash */ void *acl_mheap; + u32 acl_mheap_size; /* API message ID base */ u16 msg_id_base; @@ -132,9 +139,12 @@ typedef struct { acl_list_t *acls; /* Pool of ACLs */ hash_acl_info_t *hash_acl_infos; /* corresponding hash matching housekeeping info */ clib_bihash_48_8_t acl_lookup_hash; /* ACL lookup hash table. */ + u32 hash_lookup_hash_buckets; + u32 hash_lookup_hash_memory; /* mheap to hold all the miscellaneous allocations related to hash-based lookups */ void *hash_lookup_mheap; + u32 hash_lookup_mheap_size; int acl_lookup_hash_initialized; applied_hash_ace_entry_t **input_hash_entry_vec_by_sw_if_index; applied_hash_ace_entry_t **output_hash_entry_vec_by_sw_if_index; diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c index 13bc6b46..7869027b 100644 --- a/src/plugins/acl/hash_lookup.c +++ b/src/plugins/acl/hash_lookup.c @@ -264,7 +264,7 @@ static void * hash_acl_set_heap(acl_main_t *am) { if (0 == am->hash_lookup_mheap) { - am->hash_lookup_mheap = mheap_alloc (0 /* use VM */ , 2 << 25); + am->hash_lookup_mheap = mheap_alloc (0 /* use VM */ , am->hash_lookup_mheap_size); mheap_t *h = mheap_header (am->hash_lookup_mheap); h->flags |= MHEAP_FLAG_THREAD_SAFE; } @@ -307,7 +307,7 @@ hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) DBG0("HASH ACL apply: sw_if_index %d is_input %d acl %d", sw_if_index, is_input, acl_index); if (!am->acl_lookup_hash_initialized) { BV (clib_bihash_init) (&am->acl_lookup_hash, "ACL plugin rule lookup bihash", - 65536, 2 << 25); + am->hash_lookup_hash_buckets, am->hash_lookup_hash_memory); am->acl_lookup_hash_initialized = 1; } -- cgit 1.2.3-korg