From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 19 Dec 2016 23:05:39 +0100 Subject: Reorganize source tree to use single autotools instance Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion --- src/vnet/l2/l2_fib.c | 857 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 857 insertions(+) create mode 100644 src/vnet/l2/l2_fib.c (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c new file mode 100644 index 00000000..d34836e3 --- /dev/null +++ b/src/vnet/l2/l2_fib.c @@ -0,0 +1,857 @@ +/* + * l2_fib.c : layer 2 forwarding table (aka mac table) + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +/** + * @file + * @brief Ethernet MAC Address FIB Table Management. + * + * The MAC Address forwarding table for bridge-domains is called the l2fib. + * Entries are added automatically as part of mac learning, but MAC Addresses + * entries can also be added manually. + * + */ + +typedef struct +{ + + /* hash table */ + BVT (clib_bihash) mac_table; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2fib_main_t; + +l2fib_main_t l2fib_main; + + +/** Format sw_if_index. If the value is ~0, use the text "N/A" */ +u8 * +format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args) +{ + vnet_main_t *vnm = va_arg (*args, vnet_main_t *); + u32 sw_if_index = va_arg (*args, u32); + if (sw_if_index == ~0) + return format (s, "N/A"); + else + return format (s, "%U", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, sw_if_index)); +} + +void +l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, + l2fib_entry_result_t ** l2fe_res) +{ + l2fib_main_t *msm = &l2fib_main; + BVT (clib_bihash) * h = &msm->mac_table; + clib_bihash_bucket_t *b; + BVT (clib_bihash_value) * v; + l2fib_entry_key_t key; + l2fib_entry_result_t result; + int i, j, k; + + for (i = 0; i < h->nbuckets; i++) + { + b = &h->buckets[i]; + if (b->offset == 0) + continue; + v = BV (clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1 << b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + key.raw = v->kvp[k].key; + result.raw = v->kvp[k].value; + + if ((bd_index == ~0) || (bd_index == key.fields.bd_index)) + { + vec_add1 (*l2fe_key, key); + vec_add1 (*l2fe_res, result); + } + } + v++; + } + } +} + +/** Display the contents of the l2fib. */ +static clib_error_t * +show_l2fib (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + l2fib_main_t *msm = &l2fib_main; + l2_bridge_domain_t *bd_config; + BVT (clib_bihash) * h = &msm->mac_table; + clib_bihash_bucket_t *b; + BVT (clib_bihash_value) * v; + l2fib_entry_key_t key; + l2fib_entry_result_t result; + u32 first_entry = 1; + u64 total_entries = 0; + int i, j, k; + u8 verbose = 0; + u8 raw = 0; + u32 bd_id, bd_index = ~0; + u8 now = (u8) (vlib_time_now (vm) / 60); + u8 *s = 0; + + if (unformat (input, "raw")) + raw = 1; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "bd_index %d", &bd_index)) + verbose = 1; + else if (unformat (input, "bd_id %d", &bd_id)) + { + uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + { + verbose = 1; + bd_index = p[0]; + } + else + { + vlib_cli_output (vm, "no such bridge domain id"); + return 0; + } + } + + for (i = 0; i < h->nbuckets; i++) + { + b = &h->buckets[i]; + if (b->offset == 0) + continue; + v = BV (clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1 << b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + if (verbose && first_entry) + { + first_entry = 0; + vlib_cli_output (vm, + "%=19s%=7s%=30s%=7s%=8s%=8s%=5s%=16s", + "Mac Address", "BD Idx", "Interface", + "Index", "static", "filter", "bvi", + "Mac Age (min)"); + } + + key.raw = v->kvp[k].key; + result.raw = v->kvp[k].value; + + if (verbose + & ((bd_index >> 31) || (bd_index == key.fields.bd_index))) + { + bd_config = vec_elt_at_index (l2input_main.bd_configs, + key.fields.bd_index); + + if (bd_config->mac_age) + { + i16 delta = now - result.fields.timestamp; + delta += delta < 0 ? 256 : 0; + s = format (s, "%d", delta); + } + else + s = format (s, "disabled"); + + vlib_cli_output (vm, + "%=19U%=7d%=30U%=7d%=8d%=8d%=5d%=16v", + format_ethernet_address, key.fields.mac, + key.fields.bd_index, + format_vnet_sw_if_index_name_with_NA, + msm->vnet_main, result.fields.sw_if_index, + result.fields.sw_if_index == ~0 + ? -1 : result.fields.sw_if_index, + result.fields.static_mac, + result.fields.filter, + result.fields.bvi, s); + vec_reset_length (s); + } + total_entries++; + } + v++; + } + } + + if (total_entries == 0) + vlib_cli_output (vm, "no l2fib entries"); + else + vlib_cli_output (vm, "%lld l2fib entries", total_entries); + + if (raw) + vlib_cli_output (vm, "Raw Hash Table:\n%U\n", + BV (format_bihash), h, 1 /* verbose */ ); + + vec_free (s); + return 0; +} + +/*? + * This command dispays the MAC Address entries of the L2 FIB table. + * Output can be filtered to just get the number of MAC Addresses or display + * each MAC Address for all bridge domains or just a single bridge domain. + * + * @cliexpar + * Example of how to display the number of MAC Address entries in the L2 + * FIB table: + * @cliexstart{show l2fib} + * 3 l2fib entries + * @cliexend + * Example of how to display all the MAC Address entries in the L2 + * FIB table: + * @cliexstart{show l2fib verbose} + * Mac Address BD Idx Interface Index static filter bvi refresh timestamp + * 52:54:00:53:18:33 1 GigabitEthernet0/8/0.200 3 0 0 0 0 0 + * 52:54:00:53:18:55 1 GigabitEthernet0/8/0.200 3 1 0 0 0 0 + * 52:54:00:53:18:77 1 N/A -1 1 1 0 0 0 + * 3 l2fib entries + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_l2fib_cli, static) = { + .path = "show l2fib", + .short_help = "show l2fib [verbose | bd_id | bd_index | raw]", + .function = show_l2fib, +}; +/* *INDENT-ON* */ + + +/* Remove all entries from the l2fib */ +void +l2fib_clear_table (uint keep_static) +{ + l2fib_main_t *mp = &l2fib_main; + + if (keep_static) + { + /* TODO: remove only non-static entries */ + } + else + { + /* Remove all entries */ + BV (clib_bihash_free) (&mp->mac_table); + BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); + } + + l2learn_main.global_learn_count = 0; +} + +/** Clear all entries in L2FIB. + * @TODO: Later we may want a way to remove only the non-static entries + */ +static clib_error_t * +clear_l2fib (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2fib_clear_table (0); + return 0; +} + +/*? + * This command clears all the MAC Address entries from the L2 FIB table. + * + * @cliexpar + * Example of how to clear the L2 FIB Table: + * @cliexcmd{clear l2fib} + * Example to show the L2 FIB Table has been cleared: + * @cliexstart{show l2fib verbose} + * no l2fib entries + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (clear_l2fib_cli, static) = { + .path = "clear l2fib", + .short_help = "clear l2fib", + .function = clear_l2fib, +}; +/* *INDENT-ON* */ + + +/** + * Add an entry to the l2fib. + * If the entry already exists then overwrite it + */ +void +l2fib_add_entry (u64 mac, + u32 bd_index, + u32 sw_if_index, u32 static_mac, u32 filter_mac, u32 bvi_mac) +{ + l2fib_entry_key_t key; + l2fib_entry_result_t result; + __attribute__ ((unused)) u32 bucket_contents; + l2fib_main_t *mp = &l2fib_main; + BVT (clib_bihash_kv) kv; + + /* set up key */ + key.raw = l2fib_make_key ((u8 *) & mac, bd_index); + + /* set up result */ + result.raw = 0; /* clear all fields */ + result.fields.sw_if_index = sw_if_index; + result.fields.static_mac = static_mac; + result.fields.filter = filter_mac; + result.fields.bvi = bvi_mac; + + kv.key = key.raw; + kv.value = result.raw; + + BV (clib_bihash_add_del) (&mp->mac_table, &kv, 1 /* is_add */ ); + + /* increment counter if dynamically learned mac */ + if (result.fields.static_mac) + { + l2learn_main.global_learn_count++; + } +} + +/** + * Add an entry to the L2FIB. + * The CLI format is: + * l2fib add [static] [bvi] + * l2fib add filter + * Note that filter and bvi entries are always static + */ +static clib_error_t * +l2fib_add (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u64 mac; + u32 bd_id; + u32 bd_index; + u32 sw_if_index = ~0; + u32 filter_mac = 0; + u32 static_mac = 0; + u32 bvi_mac = 0; + uword *p; + + if (!unformat_user (input, unformat_ethernet_address, &mac)) + { + error = clib_error_return (0, "expected mac address `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) + { + error = clib_error_return (0, "bridge domain ID %d invalid", bd_id); + goto done; + } + bd_index = p[0]; + + if (unformat (input, "filter")) + { + filter_mac = 1; + static_mac = 1; + + } + else + { + + if (!unformat_user + (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + if (unformat (input, "static")) + { + static_mac = 1; + } + else if (unformat (input, "bvi")) + { + bvi_mac = 1; + static_mac = 1; + } + } + + l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, filter_mac, + bvi_mac); + +done: + return error; +} + +/*? + * This command adds a MAC Address entry to the L2 FIB table + * of an existing bridge-domain. The MAC Address can be static + * or dynamic. This command also allows a filter to be added, + * such that packets with given MAC Addresses (source mac or + * destination mac match) are dropped. + * + * @cliexpar + * Example of how to add a dynamic MAC Address entry to the L2 FIB table + * of a bridge-domain (where 200 is the bridge-domain-id): + * @cliexcmd{l2fib add 52:54:00:53:18:33 200 GigabitEthernet0/8/0.200} + * Example of how to add a static MAC Address entry to the L2 FIB table + * of a bridge-domain (where 200 is the bridge-domain-id): + * @cliexcmd{l2fib add 52:54:00:53:18:55 200 GigabitEthernet0/8/0.200 static} + * Example of how to add a filter such that a packet with the given MAC + * Address will be dropped in a given bridge-domain (where 200 is the + * bridge-domain-id): + * @cliexcmd{l2fib add 52:54:00:53:18:77 200 filter} + * Example of show command of the provisioned MAC Addresses and filters: + * @cliexstart{show l2fib verbose} + * Mac Address BD Idx Interface Index static filter bvi refresh timestamp + * 52:54:00:53:18:33 1 GigabitEthernet0/8/0.200 3 0 0 0 0 0 + * 52:54:00:53:18:55 1 GigabitEthernet0/8/0.200 3 1 0 0 0 0 + * 52:54:00:53:18:77 1 N/A -1 1 1 0 0 0 + * 3 l2fib entries + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_add_cli, static) = { + .path = "l2fib add", + .short_help = "l2fib add filter | [static | bvi]", + .function = l2fib_add, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +l2fib_test_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + u64 mac, save_mac; + u32 bd_index = 0; + u32 sw_if_index = 8; + u32 filter_mac = 0; + u32 bvi_mac = 0; + u32 is_add = 0; + u32 is_del = 0; + u32 is_check = 0; + u32 count = 1; + int mac_set = 0; + int i; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mac %U", unformat_ethernet_address, &mac)) + mac_set = 1; + else if (unformat (input, "add")) + is_add = 1; + else if (unformat (input, "del")) + is_del = 1; + else if (unformat (input, "check")) + is_check = 1; + else if (unformat (input, "count %d", &count)) + ; + else + break; + } + + if (mac_set == 0) + return clib_error_return (0, "mac not set"); + + if (is_add == 0 && is_del == 0 && is_check == 0) + return clib_error_return (0, + "noop: pick at least one of (add,del,check)"); + + save_mac = mac; + + if (is_add) + { + for (i = 0; i < count; i++) + { + u64 tmp; + l2fib_add_entry (mac, bd_index, sw_if_index, mac, + filter_mac, bvi_mac); + tmp = clib_net_to_host_u64 (mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + if (is_check) + { + BVT (clib_bihash_kv) kv; + l2fib_main_t *mp = &l2fib_main; + + mac = save_mac; + + for (i = 0; i < count; i++) + { + u64 tmp; + kv.key = l2fib_make_key ((u8 *) & mac, bd_index); + if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv)) + { + clib_warning ("key %U AWOL", format_ethernet_address, &mac); + break; + } + tmp = clib_net_to_host_u64 (mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + if (is_del) + { + for (i = 0; i < count; i++) + { + u64 tmp; + + l2fib_del_entry (mac, bd_index); + + tmp = clib_net_to_host_u64 (mac); + tmp >>= 16; + tmp++; + tmp <<= 16; + mac = clib_host_to_net_u64 (tmp); + } + } + + return error; +} + +/*? + * The set of 'test l2fib' commands allow the L2 FIB table of the default + * bridge domain (bridge-domain-id of 0) to be modified. + * + * @cliexpar + * @parblock + * Example of how to add a set of 4 sequential MAC Address entries to L2 + * FIB table of the default bridge-domain: + * @cliexcmd{test l2fib add mac 52:54:00:53:00:00 count 4} + * + * Show the set of 4 sequential MAC Address entries that were added: + * @cliexstart{show l2fib verbose} + * Mac Address BD Idx Interface Index static filter bvi refresh timestamp + * 52:54:00:53:00:00 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0 + * 52:54:00:53:00:01 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0 + * 52:54:00:53:00:03 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0 + * 52:54:00:53:00:02 0 GigabitEthernet0/8/0.300 8 0 0 0 0 0 + * 4 l2fib entries + * @cliexend + * + * Example of how to check that the set of 4 sequential MAC Address + * entries were added to L2 FIB table of the default + * bridge-domain. Used a count of 5 to produce an error: + * + * @cliexcmd{test l2fib check mac 52:54:00:53:00:00 count 5} + * The output of the check command is in the log files. Log file + * location may vary based on your OS and Version: + * + * # tail -f /var/log/messages | grep l2fib_test_command_fn + * + * Sep 7 17:15:24 localhost vnet[4952]: l2fib_test_command_fn:446: key 52:54:00:53:00:04 AWOL + * + * Example of how to delete a set of 4 sequential MAC Address entries + * from L2 FIB table of the default bridge-domain: + * @cliexcmd{test l2fib del mac 52:54:00:53:00:00 count 4} + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_test_command, static) = { + .path = "test l2fib", + .short_help = "test l2fib [add|del|check] mac count ", + .function = l2fib_test_command_fn, +}; +/* *INDENT-ON* */ + + +/** + * Delete an entry from the l2fib. + * Return 0 if the entry was deleted, or 1 if it was not found + */ +u32 +l2fib_del_entry (u64 mac, u32 bd_index) +{ + + l2fib_entry_result_t result; + l2fib_main_t *mp = &l2fib_main; + BVT (clib_bihash_kv) kv; + + /* set up key */ + kv.key = l2fib_make_key ((u8 *) & mac, bd_index); + + if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv)) + return 1; + + result.raw = kv.value; + + /* decrement counter if dynamically learned mac */ + if (result.fields.static_mac) + { + if (l2learn_main.global_learn_count > 0) + { + l2learn_main.global_learn_count--; + } + } + + /* Remove entry from hash table */ + BV (clib_bihash_add_del) (&mp->mac_table, &kv, 0 /* is_add */ ); + return 0; +} + +/** + * Delete an entry from the L2FIB. + * The CLI format is: + * l2fib del + */ +static clib_error_t * +l2fib_del (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u64 mac; + u32 bd_id; + u32 bd_index; + uword *p; + + if (!unformat_user (input, unformat_ethernet_address, &mac)) + { + error = clib_error_return (0, "expected mac address `%U'", + format_unformat_error, input); + goto done; + } + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expected bridge domain ID `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (!p) + { + error = clib_error_return (0, "bridge domain ID %d invalid", bd_id); + goto done; + } + bd_index = p[0]; + + /* Delete the entry */ + if (l2fib_del_entry (mac, bd_index)) + { + error = clib_error_return (0, "mac entry not found"); + goto done; + } + +done: + return error; +} + +/*? + * This command deletes an existing MAC Address entry from the L2 FIB + * table of an existing bridge-domain. + * + * @cliexpar + * Example of how to delete a MAC Address entry from the L2 FIB table of a bridge-domain (where 200 is the bridge-domain-id): + * @cliexcmd{l2fib del 52:54:00:53:18:33 200} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_del_cli, static) = { + .path = "l2fib del", + .short_help = "l2fib del ", + .function = l2fib_del, +}; +/* *INDENT-ON* */ + + +BVT (clib_bihash) * get_mac_table (void) +{ + l2fib_main_t *mp = &l2fib_main; + return &mp->mac_table; +} + +static uword +l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + uword event_type, *event_data = 0; + l2fib_main_t *msm = &l2fib_main; + l2_bridge_domain_t *bd_config; + BVT (clib_bihash) * h = &msm->mac_table; + clib_bihash_bucket_t *b; + BVT (clib_bihash_value) * v; + l2fib_entry_key_t key; + l2fib_entry_result_t result; + int i, j, k; + bool enabled = 0; + f64 start_time, last_run_duration = 0, t; + i16 delta; + + while (1) + { + if (enabled) + vlib_process_wait_for_event_or_clock (vm, 60 - last_run_duration); + else + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + switch (event_type) + { + case ~0: + break; + case L2_MAC_AGE_PROCESS_EVENT_START: + enabled = 1; + break; + case L2_MAC_AGE_PROCESS_EVENT_STOP: + enabled = 0; + continue; + default: + ASSERT (0); + } + last_run_duration = start_time = vlib_time_now (vm); + for (i = 0; i < h->nbuckets; i++) + { + /* Allow no more than 10us without a pause */ + t = vlib_time_now (vm); + if (t > start_time + 10e-6) + { + vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */ + start_time = vlib_time_now (vm); + } + + if (i < (h->nbuckets - 3)) + { + b = &h->buckets[i + 3]; + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); + b = &h->buckets[i + 1]; + if (b->offset) + { + v = BV (clib_bihash_get_value) (h, b->offset); + CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD); + } + } + + b = &h->buckets[i]; + if (b->offset == 0) + continue; + v = BV (clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1 << b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + key.raw = v->kvp[k].key; + result.raw = v->kvp[k].value; + + if (result.fields.static_mac) + continue; + + bd_config = vec_elt_at_index (l2input_main.bd_configs, + key.fields.bd_index); + + if (bd_config->mac_age == 0) + continue; + + delta = (u8) (start_time / 60) - result.fields.timestamp; + delta += delta < 0 ? 256 : 0; + + if (delta > bd_config->mac_age) + { + void *p = &key.fields.mac; + l2fib_del_entry (*(u64 *) p, key.fields.bd_index); + } + } + v++; + } + } + last_run_duration = vlib_time_now (vm) - last_run_duration; + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (l2fib_mac_age_scanner_process_node) = { + .function = l2fib_mac_age_scanner_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "l2fib-mac-age-scanner-process", +}; +/* *INDENT-ON* */ + +clib_error_t * +l2fib_init (vlib_main_t * vm) +{ + l2fib_main_t *mp = &l2fib_main; + l2fib_entry_key_t test_key; + u8 test_mac[6]; + + mp->vlib_main = vm; + mp->vnet_main = vnet_get_main (); + + /* Create the hash table */ + BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); + + /* verify the key constructor is good, since it is endian-sensitive */ + memset (test_mac, 0, sizeof (test_mac)); + test_mac[0] = 0x11; + test_key.raw = 0; + test_key.raw = l2fib_make_key ((u8 *) & test_mac, 0x1234); + ASSERT (test_key.fields.mac[0] == 0x11); + ASSERT (test_key.fields.bd_index == 0x1234); + + return 0; +} + +VLIB_INIT_FUNCTION (l2fib_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From da1f2c7cffb0de4ef05a48ffd107214eb11fa45f Mon Sep 17 00:00:00 2001 From: John Lo Date: Fri, 24 Mar 2017 20:11:15 -0400 Subject: Implement MAC Flush for BD or Interface from the L2FIB Allow non-static MACs in the L2FIB which is associated with an interface or a bridge domain (BD) be flushed. MAC flush are initiated automatically when an interface is removed from a BD or when a BD is deleted. MAC flush can also be invoked manually via the following CLI: l2fib mac-flush interface l2fib mac-flush bridge-domain Change-Id: Ie33243622834810a765f48ebcd22bdb8e8fc87a4 Signed-off-by: John Lo --- src/vnet/buffer.h | 8 ++- src/vnet/l2/l2_bd.c | 41 +++++++---- src/vnet/l2/l2_bd.h | 3 + src/vnet/l2/l2_fib.c | 188 +++++++++++++++++++++++++++++++++++++++++++++---- src/vnet/l2/l2_fib.h | 39 +++++----- src/vnet/l2/l2_input.c | 14 ++++ src/vnet/l2/l2_input.h | 3 + src/vnet/l2/l2_learn.c | 10 ++- src/vnet/l2/l2_learn.h | 1 + 9 files changed, 258 insertions(+), 49 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index f08b4fc1..ea3ce093 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -169,9 +169,11 @@ typedef struct struct { u32 feature_bitmap; - u16 bd_index; // bridge-domain index - u8 l2_len; // ethernet header length - u8 shg; // split-horizon group + u16 bd_index; /* bridge-domain index */ + u8 l2_len; /* ethernet header length */ + u8 shg; /* split-horizon group */ + u8 bd_sn; /* bridge domain seq# */ + u8 int_sn; /* interface seq# */ } l2; /* l2tpv3 softwire encap, only valid there */ diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index f741b643..6c01368b 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -115,6 +115,8 @@ bd_delete_bd_index (bd_main_t * bdm, u32 bd_id) l2input_main.bd_configs[bd_index].bd_id = ~0; l2input_main.bd_configs[bd_index].feature_bitmap = 0; + l2fib_flush_bd_mac (vlib_get_main (), bd_index); + return 0; } @@ -900,7 +902,6 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) u32 bd_index = ~0; l2_bridge_domain_t *bd_config; u32 start, end; - u32 printed; u32 detail = 0; u32 intf = 0; u32 arp = 0; @@ -942,7 +943,8 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) } /* Show all bridge-domains that have been initialized */ - printed = 0; + u32 printed = 0; + u8 *as = 0; for (bd_index = start; bd_index < end; bd_index++) { bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); @@ -952,26 +954,32 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { printed = 1; vlib_cli_output (vm, - "%=5s %=7s %=10s %=10s %=10s %=10s %=10s %=14s", - "ID", "Index", "Learning", "U-Forwrd", - "UU-Flood", "Flooding", "ARP-Term", + "%=5s %=7s %=4s %=9s %=9s %=9s %=9s %=9s %=9s %=9s", + "ID", "Index", "BSN", "Age(min)", "Learning", + "U-Forwrd", "UU-Flood", "Flooding", "ARP-Term", "BVI-Intf"); } + if (bd_config->mac_age) + as = format (as, "%d", bd_config->mac_age); + else + as = format (as, "off"); vlib_cli_output (vm, - "%=5d %=7d %=10s %=10s %=10s %=10s %=10s %=14U", - bd_config->bd_id, bd_index, + "%=5d %=7d %=4d %=9v %=9s %=9s %=9s %=9s %=9s %=9U", + bd_config->bd_id, bd_index, bd_config->seq_num, as, bd_config->feature_bitmap & L2INPUT_FEAT_LEARN ? "on" : "off", - bd_config->feature_bitmap & L2INPUT_FEAT_FWD ? "on" - : "off", + bd_config->feature_bitmap & L2INPUT_FEAT_FWD ? + "on" : "off", bd_config->feature_bitmap & L2INPUT_FEAT_UU_FLOOD ? "on" : "off", bd_config->feature_bitmap & L2INPUT_FEAT_FLOOD ? "on" : "off", bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM ? - "on" : "off", format_vnet_sw_if_index_name_with_NA, + "on" : "off", + format_vnet_sw_if_index_name_with_NA, vnm, bd_config->bvi_sw_if_index); + vec_reset_length (as); if (detail || intf) { @@ -981,19 +989,21 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { l2_flood_member_t *member = vec_elt_at_index (bd_config->members, i); + l2_input_config_t *int_config = + l2input_intf_config (member->sw_if_index); u32 vtr_opr, dot1q, tag1, tag2; if (i == 0) { - vlib_cli_output (vm, "\n%=30s%=7s%=5s%=5s%=9s%=30s", - "Interface", "Index", "SHG", "BVI", - "TxFlood", "VLAN-Tag-Rewrite"); + vlib_cli_output (vm, "\n%=30s%=7s%=5s%=5s%=5s%=9s%=30s", + "Interface", "If-idx", "ISN", "SHG", + "BVI", "TxFlood", "VLAN-Tag-Rewrite"); } l2vtr_get (vm, vnm, member->sw_if_index, &vtr_opr, &dot1q, &tag1, &tag2); - vlib_cli_output (vm, "%=30U%=7d%=5d%=5s%=9s%=30U", + vlib_cli_output (vm, "%=30U%=7d%=5d%=5d%=5s%=9s%=30U", format_vnet_sw_if_index_name, vnm, member->sw_if_index, member->sw_if_index, - member->shg, + int_config->seq_num, member->shg, member->flags & L2_FLOOD_MEMBER_BVI ? "*" : "-", i < bd_config->flood_count ? "*" : "-", format_vtr, vtr_opr, dot1q, tag1, tag2); @@ -1027,6 +1037,7 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) } } } + vec_free (as); if (!printed) { diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h index 4bb9bc9b..5c2502d9 100644 --- a/src/vnet/l2/l2_bd.h +++ b/src/vnet/l2/l2_bd.h @@ -86,6 +86,9 @@ typedef struct /* mac aging */ u8 mac_age; + /* sequence number for bridge domain based flush of MACs */ + u8 seq_num; + } l2_bridge_domain_t; /* Return 1 if bridge domain has been initialized */ diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index d34836e3..fadd79eb 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -168,10 +168,10 @@ show_l2fib (vlib_main_t * vm, { first_entry = 0; vlib_cli_output (vm, - "%=19s%=7s%=30s%=7s%=8s%=8s%=5s%=16s", - "Mac Address", "BD Idx", "Interface", - "Index", "static", "filter", "bvi", - "Mac Age (min)"); + "%=19s%=7s%=7s%=8s%=9s%=7s%=7s%=5s%=30s", + "Mac-Address", "BD-Idx", "If-Idx", + "BSN-ISN", "Age(min)", "static", "filter", + "bvi", "Interface-Name"); } key.raw = v->kvp[k].key; @@ -183,26 +183,27 @@ show_l2fib (vlib_main_t * vm, bd_config = vec_elt_at_index (l2input_main.bd_configs, key.fields.bd_index); - if (bd_config->mac_age) + if (bd_config->mac_age && !result.fields.static_mac) { i16 delta = now - result.fields.timestamp; delta += delta < 0 ? 256 : 0; s = format (s, "%d", delta); } else - s = format (s, "disabled"); + s = format (s, "-"); vlib_cli_output (vm, - "%=19U%=7d%=30U%=7d%=8d%=8d%=5d%=16v", + "%=19U%=7d%=7d %3d/%-3d%=9v%=7s%=7s%=5s%=30U", format_ethernet_address, key.fields.mac, key.fields.bd_index, - format_vnet_sw_if_index_name_with_NA, - msm->vnet_main, result.fields.sw_if_index, result.fields.sw_if_index == ~0 ? -1 : result.fields.sw_if_index, - result.fields.static_mac, - result.fields.filter, - result.fields.bvi, s); + result.fields.bd_sn, result.fields.int_sn, + s, result.fields.static_mac ? "*" : "-", + result.fields.filter ? "*" : "-", + result.fields.bvi ? "*" : "-", + format_vnet_sw_if_index_name_with_NA, + msm->vnet_main, result.fields.sw_if_index); vec_reset_length (s); } total_entries++; @@ -330,6 +331,15 @@ l2fib_add_entry (u64 mac, result.fields.static_mac = static_mac; result.fields.filter = filter_mac; result.fields.bvi = bvi_mac; + if (!static_mac) + { + l2_input_config_t *int_config = l2input_intf_config (sw_if_index); + l2_bridge_domain_t *bd_config = + vec_elt_at_index (l2input_main.bd_configs, + bd_index); + result.fields.int_sn = int_config->seq_num; + result.fields.bd_sn = bd_config->seq_num; + } kv.key = key.raw; kv.value = result.raw; @@ -703,6 +713,141 @@ VLIB_CLI_COMMAND (l2fib_del_cli, static) = { }; /* *INDENT-ON* */ +/** + Kick off ager to scan MACs to age/delete MAC entries +*/ +void +l2fib_start_ager_scan (vlib_main_t * vm) +{ + l2_bridge_domain_t *bd_config; + int enable = 0; + + /* check if there is at least one bd with mac aging enabled */ + vec_foreach (bd_config, l2input_main.bd_configs) + if (bd_config->bd_id != ~0 && bd_config->mac_age != 0) + enable = 1; + + vlib_process_signal_event (vm, l2fib_mac_age_scanner_process_node.index, + enable ? L2_MAC_AGE_PROCESS_EVENT_START : + L2_MAC_AGE_PROCESS_EVENT_ONE_PASS, 0); +} + +/** + Flush all learned MACs from an interface +*/ +void +l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index) +{ + l2_input_config_t *int_config; + int_config = l2input_intf_config (sw_if_index); + int_config->seq_num += 1; + l2fib_start_ager_scan (vm); +} + +/** + Flush all learned MACs in a bridge domain +*/ +void +l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index) +{ + l2_bridge_domain_t *bd_config; + vec_validate (l2input_main.bd_configs, bd_index); + bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); + bd_config->seq_num += 1; + l2fib_start_ager_scan (vm); +} + +/** + Flush MACs, except static ones, associated with an interface + The CLI format is: + l2fib flush-mac interface +*/ +static clib_error_t * +l2fib_flush_mac_int (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + l2fib_flush_int_mac (vm, sw_if_index); + +done: + return error; +} + +/*? + * This command kick off ager to delete all existing MAC Address entries, + * except static ones, associated with an interface from the L2 FIB table. + * + * @cliexpar + * Example of how to flush MAC Address entries learned on an interface from the L2 FIB table: + * @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_flush_mac_int_cli, static) = { + .path = "l2fib flush-mac interface", + .short_help = "l2fib flush-mac interface ", + .function = l2fib_flush_mac_int, +}; +/* *INDENT-ON* */ + +/** + Flush bridge-domain MACs except static ones. + The CLI format is: + l2fib flush-mac bridge-domain +*/ +static clib_error_t * +l2fib_flush_mac_bd (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + bd_main_t *bdm = &bd_main; + clib_error_t *error = 0; + u32 bd_index, bd_id; + uword *p; + + if (!unformat (input, "%d", &bd_id)) + { + error = clib_error_return (0, "expecting bridge-domain id but got `%U'", + format_unformat_error, input); + goto done; + } + + p = hash_get (bdm->bd_index_by_bd_id, bd_id); + if (p) + bd_index = *p; + else + return clib_error_return (0, "No such bridge domain %d", bd_id); + + l2fib_flush_bd_mac (vm, bd_index); + +done: + return error; +} + +/*? + * This command kick off ager to delete all existing MAC Address entries, + * except static ones, in a bridge domain from the L2 FIB table. + * + * @cliexpar + * Example of how to flush MAC Address entries learned in a bridge domain from the L2 FIB table: + * @cliexcmd{l2fib flush-mac bridge-domain 1000} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_flush_mac_bd_cli, static) = { + .path = "l2fib flush-mac bridge-domain", + .short_help = "l2fib flush-mac bridge-domain ", + .function = l2fib_flush_mac_bd, +}; +/* *INDENT-ON* */ + BVT (clib_bihash) * get_mac_table (void) { @@ -716,6 +861,7 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, { uword event_type, *event_data = 0; l2fib_main_t *msm = &l2fib_main; + l2_input_config_t *int_config; l2_bridge_domain_t *bd_config; BVT (clib_bihash) * h = &msm->mac_table; clib_bihash_bucket_t *b; @@ -747,6 +893,9 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, case L2_MAC_AGE_PROCESS_EVENT_STOP: enabled = 0; continue; + case L2_MAC_AGE_PROCESS_EVENT_ONE_PASS: + enabled = 0; + break; default: ASSERT (0); } @@ -790,8 +939,19 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (result.fields.static_mac) continue; - bd_config = vec_elt_at_index (l2input_main.bd_configs, - key.fields.bd_index); + int_config = + l2input_intf_config (result.fields.sw_if_index); + bd_config = + vec_elt_at_index (l2input_main.bd_configs, + key.fields.bd_index); + + if ((result.fields.int_sn != int_config->seq_num) || + (result.fields.bd_sn != bd_config->seq_num)) + { + void *p = &key.fields.mac; + l2fib_del_entry (*(u64 *) p, key.fields.bd_index); + continue; + } if (bd_config->mac_age == 0) continue; diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h index 4a2da59b..7e49d74b 100644 --- a/src/vnet/l2/l2_fib.h +++ b/src/vnet/l2/l2_fib.h @@ -66,7 +66,8 @@ typedef struct u8 filter:1; /* drop packets to/from this mac */ u8 unused1:5; u8 timestamp; /* timestamp for aging */ - u16 unused2; + u8 int_sn; /* interface seq num */ + u8 bd_sn; /* bridge domain seq num */ } fields; u64 raw; }; @@ -313,22 +314,28 @@ l2fib_lookup_4 (BVT (clib_bihash) * mac_table, } } +void l2fib_clear_table (uint keep_static); + +void +l2fib_add_entry (u64 mac, + u32 bd_index, + u32 sw_if_index, u32 static_mac, u32 drop_mac, u32 bvi_mac); + +u32 l2fib_del_entry (u64 mac, u32 bd_index); + +void l2fib_start_ager_scan (vlib_main_t * vm); + +void l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index); + +void l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index); + +void +l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, + l2fib_entry_result_t ** l2fe_res); + +u8 *format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args); + BVT (clib_bihash) * get_mac_table (void); - void - l2fib_clear_table (uint keep_static); - void - l2fib_add_entry (u64 mac, - u32 bd_index, - u32 sw_if_index, - u32 static_mac, u32 drop_mac, u32 bvi_mac); -u32 -l2fib_del_entry (u64 mac, u32 bd_index); - - void - l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, - l2fib_entry_result_t ** l2fe_res); - - u8 *format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args); #endif diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index 3aa5331b..041ff38d 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -201,6 +202,9 @@ classify_and_dispatch (vlib_main_t * vm, /* Get config for the bridge domain interface */ bd_config = vec_elt_at_index (msm->bd_configs, bd_index0); + /* Save bridge domain seq_num */ + vnet_buffer (b0)->l2.bd_sn = bd_config->seq_num; + /* * Process bridge domain feature enables. * To perform learning/flooding/forwarding, the corresponding bit @@ -214,6 +218,9 @@ classify_and_dispatch (vlib_main_t * vm, /* mask out features from bitmap using packet type and bd config */ feature_bitmap = config->feature_bitmap & feat_mask; + /* Save interface seq_num */ + vnet_buffer (b0)->l2.int_sn = config->seq_num; + /* save for next feature graph nodes */ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap; @@ -561,6 +568,12 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); } + + /* Clear MACs learned on the interface */ + if ((config->feature_bitmap | L2INPUT_FEAT_LEARN) || + (bd_config->feature_bitmap | L2INPUT_FEAT_LEARN)) + l2fib_flush_int_mac (vm, sw_if_index); + l2_if_adjust--; } else if (config->xconnect) @@ -632,6 +645,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ config->xconnect = 0; config->bridge = 1; config->bd_index = bd_index; + config->seq_num += 1; /* * Enable forwarding, flooding, learning and ARP termination by default diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h index f3fada6a..262f75c7 100644 --- a/src/vnet/l2/l2_input.h +++ b/src/vnet/l2/l2_input.h @@ -53,6 +53,9 @@ typedef struct /* split horizon group */ u8 shg; + /* sequence number for interface based flush of MACs */ + u8 seq_num; + } l2_input_config_t; diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index afe7f478..faed0d66 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -140,7 +140,11 @@ l2learn_process (vlib_node_runtime_t * node, counter_base[L2LEARN_ERROR_HIT] += 1; if (PREDICT_FALSE (result0->fields.timestamp != timestamp)) result0->fields.timestamp = timestamp; - + if (PREDICT_FALSE + (result0->fields.int_sn != vnet_buffer (b0)->l2.int_sn)) + result0->fields.int_sn = vnet_buffer (b0)->l2.int_sn; + if (PREDICT_FALSE (result0->fields.bd_sn != vnet_buffer (b0)->l2.bd_sn)) + result0->fields.bd_sn = vnet_buffer (b0)->l2.bd_sn; } else if (result0->raw == ~0) { @@ -167,6 +171,8 @@ l2learn_process (vlib_node_runtime_t * node, result0->raw = 0; /* clear all fields */ result0->fields.sw_if_index = sw_if_index0; result0->fields.timestamp = timestamp; + result0->fields.bd_sn = vnet_buffer (b0)->l2.bd_sn; + result0->fields.int_sn = vnet_buffer (b0)->l2.int_sn; kv.key = key0->raw; kv.value = result0->raw; @@ -204,6 +210,8 @@ l2learn_process (vlib_node_runtime_t * node, result0->raw = 0; /* clear all fields */ result0->fields.sw_if_index = sw_if_index0; result0->fields.timestamp = timestamp; + result0->fields.bd_sn = vnet_buffer (b0)->l2.bd_sn; + result0->fields.int_sn = vnet_buffer (b0)->l2.int_sn; kv.key = key0->raw; kv.value = result0->raw; diff --git a/src/vnet/l2/l2_learn.h b/src/vnet/l2/l2_learn.h index 5bb1130b..0d95de04 100644 --- a/src/vnet/l2/l2_learn.h +++ b/src/vnet/l2/l2_learn.h @@ -51,6 +51,7 @@ enum { L2_MAC_AGE_PROCESS_EVENT_START = 1, L2_MAC_AGE_PROCESS_EVENT_STOP = 2, + L2_MAC_AGE_PROCESS_EVENT_ONE_PASS = 3, } l2_mac_age_process_event_t; #endif -- cgit 1.2.3-korg From afc47aa36f44d3f865c6e1e48f41eded366a85ac Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Thu, 20 Apr 2017 14:45:17 +0300 Subject: L2FIB:flush interface learned macs on down Change-Id: I80a723f55fcf2ecc3209a35e8297c88b45b1abfb Signed-off-by: Eyal Bari --- src/vnet/l2/l2_bd.c | 15 ++++++--------- src/vnet/l2/l2_fib.c | 13 +++++++++++-- src/vnet/l2/l2_input.c | 42 ++++++++++++++---------------------------- src/vnet/l2/l2_input.h | 11 +++++++++++ 4 files changed, 42 insertions(+), 39 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index 4ebbb547..4d540220 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -94,6 +94,8 @@ bd_delete (bd_main_t * bdm, u32 bd_index) { l2_bridge_domain_t *bd = &l2input_main.bd_configs[bd_index]; u32 bd_id = bd->bd_id; + l2fib_flush_bd_mac (vlib_get_main (), bd_index); + hash_unset (bdm->bd_index_by_bd_id, bd_id); /* mark this index clear */ @@ -107,7 +109,6 @@ bd_delete (bd_main_t * bdm, u32 bd_index) vec_free (bd->members); hash_free (bd->mac_by_ip4); hash_free (bd->mac_by_ip6); - l2fib_flush_bd_mac (vlib_get_main (), bd_index); return 0; } @@ -219,13 +220,9 @@ u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable) { - l2_bridge_domain_t *bd_config; - u32 feature_bitmap = 0; - - vec_validate (l2input_main.bd_configs, bd_index); - bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); - + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); bd_validate (bd_config); + u32 feature_bitmap = 0; if (flags & L2_LEARN) { @@ -713,13 +710,13 @@ u32 bd_add_del_ip_mac (u32 bd_index, u8 * ip_addr, u8 * mac_addr, u8 is_ip6, u8 is_add) { - l2input_main_t *l2im = &l2input_main; - l2_bridge_domain_t *bd_cfg = l2input_bd_config_from_index (l2im, bd_index); + l2_bridge_domain_t *bd_cfg = l2input_bd_config (bd_index); u64 new_mac = *(u64 *) mac_addr; u64 *old_mac; u16 *mac16 = (u16 *) & new_mac; ASSERT (sizeof (uword) == sizeof (u64)); /* make sure uword is 8 bytes */ + ASSERT (bd_is_valid (bd_cfg)); mac16[3] = 0; /* Clear last 2 unsed bytes of the 8-byte MAC address */ if (is_ip6) diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index fadd79eb..d8fcc319 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -751,8 +751,7 @@ void l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index) { l2_bridge_domain_t *bd_config; - vec_validate (l2input_main.bd_configs, bd_index); - bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); + bd_config = l2input_bd_config (bd_index); bd_config->seq_num += 1; l2fib_start_ager_scan (vm); } @@ -848,6 +847,16 @@ VLIB_CLI_COMMAND (l2fib_flush_mac_bd_cli, static) = { }; /* *INDENT-ON* */ +clib_error_t * +l2fib_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +{ + l2_input_config_t *config = l2input_intf_config (sw_if_index); + if ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) == 0 && config->bridge) + l2fib_flush_int_mac (vnm->vlib_main, sw_if_index); + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (l2fib_sw_interface_up_down); BVT (clib_bihash) * get_mac_table (void) { diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index e5d6878a..fe65e694 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -481,20 +481,12 @@ l2input_intf_config (u32 sw_if_index) u32 l2input_intf_bitmap_enable (u32 sw_if_index, u32 feature_bitmap, u32 enable) { - l2input_main_t *mp = &l2input_main; - l2_input_config_t *config; - - vec_validate (mp->configs, sw_if_index); - config = vec_elt_at_index (mp->configs, sw_if_index); + l2_input_config_t *config = l2input_intf_config (sw_if_index); if (enable) - { - config->feature_bitmap |= feature_bitmap; - } + config->feature_bitmap |= feature_bitmap; else - { - config->feature_bitmap &= ~feature_bitmap; - } + config->feature_bitmap &= ~feature_bitmap; return config->feature_bitmap; } @@ -502,9 +494,7 @@ l2input_intf_bitmap_enable (u32 sw_if_index, u32 feature_bitmap, u32 enable) u32 l2input_set_bridge_features (u32 bd_index, u32 feat_mask, u32 feat_value) { - l2_bridge_domain_t *bd_config; - vec_validate (l2input_main.bd_configs, bd_index); - bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index);; bd_validate (bd_config); bd_config->feature_bitmap = (bd_config->feature_bitmap & ~feat_mask) | feat_value; @@ -535,7 +525,6 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ l2_output_config_t *out_config; l2_input_config_t *config; l2_bridge_domain_t *bd_config; - l2_flood_member_t member; u64 mac; i32 l2_if_adjust = 0; u32 slot; @@ -570,8 +559,8 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ } /* Clear MACs learned on the interface */ - if ((config->feature_bitmap | L2INPUT_FEAT_LEARN) || - (bd_config->feature_bitmap | L2INPUT_FEAT_LEARN)) + if ((config->feature_bitmap & L2INPUT_FEAT_LEARN) || + (bd_config->feature_bitmap & L2INPUT_FEAT_LEARN)) l2fib_flush_int_mac (vm, sw_if_index); l2_if_adjust--; @@ -661,8 +650,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ config->feature_bitmap &= ~L2INPUT_FEAT_XCONNECT; /* Set up bridge domain */ - vec_validate (mp->bd_configs, bd_index); - bd_config = vec_elt_at_index (mp->bd_configs, bd_index); + bd_config = l2input_bd_config (bd_index); bd_validate (bd_config); /* TODO: think: add l2fib entry even for non-bvi interface? */ @@ -694,9 +682,11 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ } /* Add interface to bridge-domain flood vector */ - member.sw_if_index = sw_if_index; - member.flags = bvi ? L2_FLOOD_MEMBER_BVI : L2_FLOOD_MEMBER_NORMAL; - member.shg = shg; + l2_flood_member_t member = { + .sw_if_index = sw_if_index, + .flags = bvi ? L2_FLOOD_MEMBER_BVI : L2_FLOOD_MEMBER_NORMAL, + .shg = shg, + }; bd_add_member (bd_config, &member); } @@ -997,10 +987,8 @@ show_int_mode (vlib_main_t * vm, char *mode; u8 *args; vnet_interface_main_t *im = &vnm->interface_main; - vnet_sw_interface_t *si, *sis = 0; - l2input_main_t *mp = &l2input_main; - l2_input_config_t *config; + vnet_sw_interface_t *si, *sis = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { u32 sw_if_index; @@ -1018,7 +1006,6 @@ show_int_mode (vlib_main_t * vm, format_unformat_error, input); goto done; } - } if (vec_len (sis) == 0) /* Get all interfaces */ @@ -1033,8 +1020,7 @@ show_int_mode (vlib_main_t * vm, vec_foreach (si, sis) { - vec_validate (mp->configs, si->sw_if_index); - config = vec_elt_at_index (mp->configs, si->sw_if_index); + l2_input_config_t *config = l2input_intf_config (si->sw_if_index); if (config->bridge) { u32 bd_id; diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h index a2ade8d8..cb67cb9d 100644 --- a/src/vnet/l2/l2_input.h +++ b/src/vnet/l2/l2_input.h @@ -89,6 +89,17 @@ l2input_bd_config_from_index (l2input_main_t * l2im, u32 bd_index) return bd_is_valid (bd_config) ? bd_config : NULL; } +static_always_inline l2_bridge_domain_t * +l2input_bd_config (u32 bd_index) +{ + l2input_main_t *mp = &l2input_main; + l2_bridge_domain_t *bd_config; + + vec_validate (mp->bd_configs, bd_index); + bd_config = vec_elt_at_index (mp->bd_configs, bd_index); + return bd_config; +} + /* L2 input indication packet is from BVI, using -2 */ #define L2INPUT_BVI ((u32) (~0-1)) -- cgit 1.2.3-korg From d48c8eb7354c6c8b5b875dc70d616d11c17e9fb8 Mon Sep 17 00:00:00 2001 From: John Lo Date: Fri, 5 May 2017 12:35:25 -0400 Subject: Fix L2FIB learn counter and memory cleanup of mac_by_ip6 hash table Fix global_learn_count to be incremented or decremented by add and deletion of non-static MAC entries from L2FIB only. Without this fix, the counter may reach the threshold of 1M and stop MAC leanring even though number of MAC entries in L2FIB is less than the threshold. Cleanup indirect hash key memory used by mac_by_ip6 hash table on BD deletion. Change-Id: I13986c4e6304c7956122520dd3f83d6bb6e65a15 Signed-off-by: John Lo --- src/vnet/l2/l2_bd.c | 13 +++++++++++-- src/vnet/l2/l2_fib.c | 8 +++++--- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index 4d540220..351e6987 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -94,8 +94,11 @@ bd_delete (bd_main_t * bdm, u32 bd_index) { l2_bridge_domain_t *bd = &l2input_main.bd_configs[bd_index]; u32 bd_id = bd->bd_id; - l2fib_flush_bd_mac (vlib_get_main (), bd_index); + u64 mac_addr; + ip6_address_t *ip6_addr_key; + /* flush non-static MACs in BD and removed bd_id from hash table */ + l2fib_flush_bd_mac (vlib_get_main (), bd_index); hash_unset (bdm->bd_index_by_bd_id, bd_id); /* mark this index clear */ @@ -105,9 +108,15 @@ bd_delete (bd_main_t * bdm, u32 bd_index) bd->bd_id = ~0; bd->feature_bitmap = 0; - /* free memory used by BD and flush non-static MACs in BD */ + /* free memory used by BD */ vec_free (bd->members); hash_free (bd->mac_by_ip4); + /* *INDENT-OFF* */ + hash_foreach_mem (ip6_addr_key, mac_addr, bd->mac_by_ip6, + ({ + clib_mem_free (ip6_addr_key); /* free memory used for ip6 addr key */ + })); + /* *INDENT-ON* */ hash_free (bd->mac_by_ip6); return 0; diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index d8fcc319..028a7326 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -215,7 +215,9 @@ show_l2fib (vlib_main_t * vm, if (total_entries == 0) vlib_cli_output (vm, "no l2fib entries"); else - vlib_cli_output (vm, "%lld l2fib entries", total_entries); + vlib_cli_output (vm, + "%lld l2fib entries with %d learned (or non-static) entries", + total_entries, l2learn_main.global_learn_count); if (raw) vlib_cli_output (vm, "Raw Hash Table:\n%U\n", @@ -347,7 +349,7 @@ l2fib_add_entry (u64 mac, BV (clib_bihash_add_del) (&mp->mac_table, &kv, 1 /* is_add */ ); /* increment counter if dynamically learned mac */ - if (result.fields.static_mac) + if (result.fields.static_mac == 0) { l2learn_main.global_learn_count++; } @@ -635,7 +637,7 @@ l2fib_del_entry (u64 mac, u32 bd_index) result.raw = kv.value; /* decrement counter if dynamically learned mac */ - if (result.fields.static_mac) + if (result.fields.static_mac == 0) { if (l2learn_main.global_learn_count > 0) { -- cgit 1.2.3-korg From 7537e717d1ca6de0e33478bc50b9f7125f04c808 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Thu, 27 Apr 2017 14:07:55 +0300 Subject: L2FIB:CLI/API to flush all non-static entries added CLI l2fib flush-mac all added API l2fib_flush_all flushes all non static l2fib entries on all valid BDs Change-Id: Ic963c88f4bed56308c03ab43106033132a0e87be Signed-off-by: Eyal Bari --- src/vnet/buffer.h | 3 +- src/vnet/l2/l2.api | 10 +++ src/vnet/l2/l2_api.c | 18 +++-- src/vnet/l2/l2_fib.c | 168 ++++++++++++++++++++++++++++------------------ src/vnet/l2/l2_fib.h | 21 +++++- src/vnet/l2/l2_input.c | 13 ++-- src/vnet/l2/l2_learn.c | 12 ++-- src/vpp/api/custom_dump.c | 12 ++++ 8 files changed, 169 insertions(+), 88 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 5d1b1c4d..ec5e2f75 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -176,8 +176,7 @@ typedef struct u16 bd_index; /* bridge-domain index */ u8 l2_len; /* ethernet header length */ u8 shg; /* split-horizon group */ - u8 bd_sn; /* bridge domain seq# */ - u8 int_sn; /* interface seq# */ + u16 l2fib_sn; /* l2fib bd/int seq_num */ } l2; /* l2tpv3 softwire encap, only valid there */ diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index db42d635..e9a1f361 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -76,6 +76,16 @@ autoreply define l2_fib_clear_table u32 context; }; +/** \brief L2 FIB flush all entries + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +autoreply define l2fib_flush_all +{ + u32 client_index; + u32 context; +}; + /** \brief L2 FIB flush bridge domain entries @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index 8cc7c794..5f371ccd 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -48,6 +48,7 @@ _(L2_XCONNECT_DUMP, l2_xconnect_dump) \ _(L2_FIB_CLEAR_TABLE, l2_fib_clear_table) \ _(L2_FIB_TABLE_DUMP, l2_fib_table_dump) \ +_(L2FIB_FLUSH_ALL, l2fib_flush_all) \ _(L2FIB_FLUSH_INT, l2fib_flush_int) \ _(L2FIB_FLUSH_BD, l2fib_flush_bd) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ @@ -106,11 +107,8 @@ vl_api_l2_fib_clear_table_t_handler (vl_api_l2_fib_clear_table_t * mp) int rv = 0; vl_api_l2_fib_clear_table_reply_t *rmp; - /* DAW-FIXME: This API should only clear non-static l2fib entries, but - * that is not currently implemented. When that TODO is fixed - * this call should be changed to pass 1 instead of 0. - */ - l2fib_clear_table (0); + /* Clear all MACs including static MACs */ + l2fib_clear_table (); REPLY_MACRO (VL_API_L2_FIB_CLEAR_TABLE_REPLY); } @@ -258,6 +256,16 @@ vl_api_l2fib_flush_int_t_handler (vl_api_l2fib_flush_int_t * mp) REPLY_MACRO (VL_API_L2FIB_FLUSH_INT_REPLY); } +static void +vl_api_l2fib_flush_all_t_handler (vl_api_l2fib_flush_all_t * mp) +{ + int rv = 0; + vl_api_l2fib_flush_all_reply_t *rmp; + + l2fib_flush_all_mac (vlib_get_main ()); + REPLY_MACRO (VL_API_L2FIB_FLUSH_ALL_REPLY); +} + static void vl_api_l2fib_flush_bd_t_handler (vl_api_l2fib_flush_bd_t * mp) { diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 028a7326..d4207e35 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -54,7 +54,6 @@ typedef struct l2fib_main_t l2fib_main; - /** Format sw_if_index. If the value is ~0, use the text "N/A" */ u8 * format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args) @@ -198,7 +197,7 @@ show_l2fib (vlib_main_t * vm, key.fields.bd_index, result.fields.sw_if_index == ~0 ? -1 : result.fields.sw_if_index, - result.fields.bd_sn, result.fields.int_sn, + result.fields.sn.bd, result.fields.sn.swif, s, result.fields.static_mac ? "*" : "-", result.fields.filter ? "*" : "-", result.fields.bvi ? "*" : "-", @@ -259,22 +258,14 @@ VLIB_CLI_COMMAND (show_l2fib_cli, static) = { /* Remove all entries from the l2fib */ void -l2fib_clear_table (uint keep_static) +l2fib_clear_table (void) { l2fib_main_t *mp = &l2fib_main; - if (keep_static) - { - /* TODO: remove only non-static entries */ - } - else - { - /* Remove all entries */ - BV (clib_bihash_free) (&mp->mac_table); - BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table", - L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); - } - + /* Remove all entries */ + BV (clib_bihash_free) (&mp->mac_table); + BV (clib_bihash_init) (&mp->mac_table, "l2fib mac table", + L2FIB_NUM_BUCKETS, L2FIB_MEMORY_SIZE); l2learn_main.global_learn_count = 0; } @@ -285,7 +276,7 @@ static clib_error_t * clear_l2fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - l2fib_clear_table (0); + l2fib_clear_table (); return 0; } @@ -308,14 +299,25 @@ VLIB_CLI_COMMAND (clear_l2fib_cli, static) = { }; /* *INDENT-ON* */ +static inline l2fib_seq_num_t +l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index) +{ + l2_input_config_t *int_config = l2input_intf_config (sw_if_index); + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); + /* *INDENT-OFF* */ + return (l2fib_seq_num_t) { + .swif = int_config->seq_num, + .bd = bd_config->seq_num, + }; + /* *INDENT-ON* */ +} /** * Add an entry to the l2fib. * If the entry already exists then overwrite it */ void -l2fib_add_entry (u64 mac, - u32 bd_index, +l2fib_add_entry (u64 mac, u32 bd_index, u32 sw_if_index, u32 static_mac, u32 filter_mac, u32 bvi_mac) { l2fib_entry_key_t key; @@ -334,14 +336,7 @@ l2fib_add_entry (u64 mac, result.fields.filter = filter_mac; result.fields.bvi = bvi_mac; if (!static_mac) - { - l2_input_config_t *int_config = l2input_intf_config (sw_if_index); - l2_bridge_domain_t *bd_config = - vec_elt_at_index (l2input_main.bd_configs, - bd_index); - result.fields.int_sn = int_config->seq_num; - result.fields.bd_sn = bd_config->seq_num; - } + result.fields.sn = l2fib_cur_seq_num (bd_index, sw_if_index); kv.key = key.raw; kv.value = result.raw; @@ -620,8 +615,8 @@ VLIB_CLI_COMMAND (l2fib_test_command, static) = { * Delete an entry from the l2fib. * Return 0 if the entry was deleted, or 1 if it was not found */ -u32 -l2fib_del_entry (u64 mac, u32 bd_index) +static u32 +l2fib_del_entry_by_key (u64 raw_key) { l2fib_entry_result_t result; @@ -629,7 +624,7 @@ l2fib_del_entry (u64 mac, u32 bd_index) BVT (clib_bihash_kv) kv; /* set up key */ - kv.key = l2fib_make_key ((u8 *) & mac, bd_index); + kv.key = raw_key; if (BV (clib_bihash_search) (&mp->mac_table, &kv, &kv)) return 1; @@ -650,6 +645,16 @@ l2fib_del_entry (u64 mac, u32 bd_index) return 0; } +/** + * Delete an entry from the l2fib. + * Return 0 if the entry was deleted, or 1 if it was not found + */ +u32 +l2fib_del_entry (u64 mac, u32 bd_index) +{ + return l2fib_del_entry_by_key (l2fib_make_key ((u8 *) & mac, bd_index)); +} + /** * Delete an entry from the L2FIB. * The CLI format is: @@ -735,29 +740,42 @@ l2fib_start_ager_scan (vlib_main_t * vm) } /** - Flush all learned MACs from an interface + Flush all non static MACs from an interface */ void l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index) { - l2_input_config_t *int_config; - int_config = l2input_intf_config (sw_if_index); + l2_input_config_t *int_config = l2input_intf_config (sw_if_index); int_config->seq_num += 1; l2fib_start_ager_scan (vm); } /** - Flush all learned MACs in a bridge domain + Flush all non static MACs in a bridge domain */ void l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index) { - l2_bridge_domain_t *bd_config; - bd_config = l2input_bd_config (bd_index); + l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); bd_config->seq_num += 1; l2fib_start_ager_scan (vm); } +/** + Flush all non static MACs - flushes all valid BDs +*/ +void +l2fib_flush_all_mac (vlib_main_t * vm) +{ + l2_bridge_domain_t *bd_config; + vec_foreach (bd_config, l2input_main.bd_configs) + if (bd_is_valid (bd_config)) + bd_config->seq_num += 1; + + l2fib_start_ager_scan (vm); +} + + /** Flush MACs, except static ones, associated with an interface The CLI format is: @@ -784,6 +802,35 @@ done: return error; } +/** + Flush all MACs, except static ones + The CLI format is: + l2fib flush-mac all +*/ +static clib_error_t * +l2fib_flush_mac_all (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2fib_flush_all_mac (vm); + return 0; +} + +/*? + * This command kick off ager to delete all existing MAC Address entries, + * except static ones, associated with an interface from the L2 FIB table. + * + * @cliexpar + * Example of how to flush MAC Address entries learned on an interface from the L2 FIB table: + * @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = { + .path = "l2fib flush-mac all", + .short_help = "l2fib flush-mac all", + .function = l2fib_flush_mac_all, +}; +/* *INDENT-ON* */ + /*? * This command kick off ager to delete all existing MAC Address entries, * except static ones, associated with an interface from the L2 FIB table. @@ -872,17 +919,8 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, { uword event_type, *event_data = 0; l2fib_main_t *msm = &l2fib_main; - l2_input_config_t *int_config; - l2_bridge_domain_t *bd_config; - BVT (clib_bihash) * h = &msm->mac_table; - clib_bihash_bucket_t *b; - BVT (clib_bihash_value) * v; - l2fib_entry_key_t key; - l2fib_entry_result_t result; - int i, j, k; bool enabled = 0; f64 start_time, last_run_duration = 0, t; - i16 delta; while (1) { @@ -911,6 +949,9 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, ASSERT (0); } last_run_duration = start_time = vlib_time_now (vm); + + BVT (clib_bihash) * h = &msm->mac_table; + int i, j, k; for (i = 0; i < h->nbuckets; i++) { /* Allow no more than 10us without a pause */ @@ -923,20 +964,22 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (i < (h->nbuckets - 3)) { - b = &h->buckets[i + 3]; + clib_bihash_bucket_t *b = &h->buckets[i + 3]; CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); b = &h->buckets[i + 1]; if (b->offset) { - v = BV (clib_bihash_get_value) (h, b->offset); + BVT (clib_bihash_value) * v = + BV (clib_bihash_get_value) (h, b->offset); CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD); } } - b = &h->buckets[i]; + clib_bihash_bucket_t *b = &h->buckets[i]; if (b->offset == 0) continue; - v = BV (clib_bihash_get_value) (h, b->offset); + BVT (clib_bihash_value) * v = + BV (clib_bihash_get_value) (h, b->offset); for (j = 0; j < (1 << b->log2_pages); j++) { for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) @@ -944,37 +987,32 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) continue; - key.raw = v->kvp[k].key; - result.raw = v->kvp[k].value; + l2fib_entry_key_t key = {.raw = v->kvp[k].key }; + l2fib_entry_result_t result = {.raw = v->kvp[k].value }; if (result.fields.static_mac) continue; - int_config = - l2input_intf_config (result.fields.sw_if_index); - bd_config = - vec_elt_at_index (l2input_main.bd_configs, - key.fields.bd_index); - - if ((result.fields.int_sn != int_config->seq_num) || - (result.fields.bd_sn != bd_config->seq_num)) + u32 bd_index = key.fields.bd_index; + u32 sw_if_index = result.fields.sw_if_index; + u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index).as_u16; + if (result.fields.sn.as_u16 != sn) { - void *p = &key.fields.mac; - l2fib_del_entry (*(u64 *) p, key.fields.bd_index); + l2fib_del_entry_by_key (key.raw); continue; } + l2_bridge_domain_t *bd_config = + vec_elt_at_index (l2input_main.bd_configs, bd_index); if (bd_config->mac_age == 0) continue; - delta = (u8) (start_time / 60) - result.fields.timestamp; + i16 delta = + (u8) (start_time / 60) - result.fields.timestamp; delta += delta < 0 ? 256 : 0; if (delta > bd_config->mac_age) - { - void *p = &key.fields.mac; - l2fib_del_entry (*(u64 *) p, key.fields.bd_index); - } + l2fib_del_entry_by_key (key.raw); } v++; } diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h index 7e49d74b..e571a210 100644 --- a/src/vnet/l2/l2_fib.h +++ b/src/vnet/l2/l2_fib.h @@ -50,6 +50,20 @@ typedef struct STATIC_ASSERT_SIZEOF (l2fib_entry_key_t, 8); + +typedef struct +{ + union + { + struct + { + u8 swif; + u8 bd; + }; + u16 as_u16; + }; +} l2fib_seq_num_t; + /* * The l2fib entry results */ @@ -66,8 +80,7 @@ typedef struct u8 filter:1; /* drop packets to/from this mac */ u8 unused1:5; u8 timestamp; /* timestamp for aging */ - u8 int_sn; /* interface seq num */ - u8 bd_sn; /* bridge domain seq num */ + l2fib_seq_num_t sn; /* bd/int seq num */ } fields; u64 raw; }; @@ -314,7 +327,7 @@ l2fib_lookup_4 (BVT (clib_bihash) * mac_table, } } -void l2fib_clear_table (uint keep_static); +void l2fib_clear_table (void); void l2fib_add_entry (u64 mac, @@ -329,6 +342,8 @@ void l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index); void l2fib_flush_bd_mac (vlib_main_t * vm, u32 bd_index); +void l2fib_flush_all_mac (vlib_main_t * vm); + void l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, l2fib_entry_result_t ** l2fe_res); diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index fe65e694..41a93f56 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -202,8 +202,14 @@ classify_and_dispatch (vlib_main_t * vm, /* Get config for the bridge domain interface */ bd_config = vec_elt_at_index (msm->bd_configs, bd_index0); - /* Save bridge domain seq_num */ - vnet_buffer (b0)->l2.bd_sn = bd_config->seq_num; + /* Save bridge domain and interface seq_num */ + /* *INDENT-OFF* */ + l2fib_seq_num_t sn = { + .swif = config->seq_num, + .bd = bd_config->seq_num, + }; + /* *INDENT-ON* */ + vnet_buffer (b0)->l2.l2fib_sn = sn.as_u16;; /* * Process bridge domain feature enables. @@ -218,9 +224,6 @@ classify_and_dispatch (vlib_main_t * vm, /* mask out features from bitmap using packet type and bd config */ feature_bitmap = config->feature_bitmap & feat_mask; - /* Save interface seq_num */ - vnet_buffer (b0)->l2.int_sn = config->seq_num; - /* save for next feature graph nodes */ vnet_buffer (b0)->l2.feature_bitmap = feature_bitmap; diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index faed0d66..adc5e70f 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -141,10 +141,8 @@ l2learn_process (vlib_node_runtime_t * node, if (PREDICT_FALSE (result0->fields.timestamp != timestamp)) result0->fields.timestamp = timestamp; if (PREDICT_FALSE - (result0->fields.int_sn != vnet_buffer (b0)->l2.int_sn)) - result0->fields.int_sn = vnet_buffer (b0)->l2.int_sn; - if (PREDICT_FALSE (result0->fields.bd_sn != vnet_buffer (b0)->l2.bd_sn)) - result0->fields.bd_sn = vnet_buffer (b0)->l2.bd_sn; + (result0->fields.sn.as_u16 != vnet_buffer (b0)->l2.l2fib_sn)) + result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; } else if (result0->raw == ~0) { @@ -171,8 +169,7 @@ l2learn_process (vlib_node_runtime_t * node, result0->raw = 0; /* clear all fields */ result0->fields.sw_if_index = sw_if_index0; result0->fields.timestamp = timestamp; - result0->fields.bd_sn = vnet_buffer (b0)->l2.bd_sn; - result0->fields.int_sn = vnet_buffer (b0)->l2.int_sn; + result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; kv.key = key0->raw; kv.value = result0->raw; @@ -210,8 +207,7 @@ l2learn_process (vlib_node_runtime_t * node, result0->raw = 0; /* clear all fields */ result0->fields.sw_if_index = sw_if_index0; result0->fields.timestamp = timestamp; - result0->fields.bd_sn = vnet_buffer (b0)->l2.bd_sn; - result0->fields.int_sn = vnet_buffer (b0)->l2.int_sn; + result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; kv.key = key0->raw; kv.value = result0->raw; diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 107e83f3..c073c52d 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -298,6 +298,17 @@ static void *vl_api_bridge_domain_dump_t_print FINISH; } +static void *vl_api_l2fib_flush_all_t_print + (vl_api_l2fib_flush_all_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: l2fib_flush_all "); + + FINISH; +} + + static void *vl_api_l2fib_flush_bd_t_print (vl_api_l2fib_flush_bd_t * mp, void *handle) { @@ -2979,6 +2990,7 @@ _(SR_POLICY_MOD, sr_policy_mod) \ _(SR_POLICY_DEL, sr_policy_del) \ _(SW_INTERFACE_SET_L2_XCONNECT, sw_interface_set_l2_xconnect) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ +_(L2FIB_FLUSH_ALL, l2fib_flush_all) \ _(L2FIB_FLUSH_BD, l2fib_flush_bd) \ _(L2FIB_FLUSH_INT, l2fib_flush_int) \ _(L2_FLAGS, l2_flags) \ -- cgit 1.2.3-korg From b823df5a7db8208f0162a50ba034a2037f7e7c67 Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Mon, 12 Jun 2017 17:07:22 +0300 Subject: L2FIB:fix crash in show with deleted subif entries after deleting a sub interface it's l2fib entries are left with a dangling sw_if_index (while waiting for the ager to delete them). changed "show l2fib" to reflect that state with "Deleted" as the interface name. added sleep in test_l2_fib as a workaround for packets still passing after flush will investigate... Change-Id: Id998d7d3c6a073ef5005c5f3009e1cfb7febf7db Signed-off-by: Eyal Bari --- src/vnet/l2/l2_fib.c | 11 +++++++---- test/test_l2_fib.py | 3 +++ 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index d4207e35..f17eee2a 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -62,10 +62,13 @@ format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args) u32 sw_if_index = va_arg (*args, u32); if (sw_if_index == ~0) return format (s, "N/A"); - else - return format (s, "%U", - format_vnet_sw_interface_name, vnm, - vnet_get_sw_interface (vnm, sw_if_index)); + + vnet_sw_interface_t *swif = vnet_get_sw_interface_safe (vnm, sw_if_index); + if (!swif) + return format (s, "Deleted"); + + return format (s, "%U", format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface_safe (vnm, sw_if_index)); } void diff --git a/test/test_l2_fib.py b/test/test_l2_fib.py index 9249a2ce..f9a78efc 100644 --- a/test/test_l2_fib.py +++ b/test/test_l2_fib.py @@ -490,6 +490,7 @@ class TestL2fib(VppTestCase): self.config_l2_fib_entries(bd_id=1, n_hosts_per_if=10) self.config_l2_fib_entries(bd_id=2, n_hosts_per_if=10) flushed = self.flush_int(self.pg_interfaces[0].sw_if_index) + self.sleep(1) self.run_verify_test(bd_id=1, dst_hosts=self.learned_hosts) self.run_verify_negat_test(bd_id=1, dst_hosts=flushed) @@ -503,6 +504,7 @@ class TestL2fib(VppTestCase): self.config_l2_fib_entries(bd_id=1, n_hosts_per_if=10) self.config_l2_fib_entries(bd_id=2, n_hosts_per_if=10) flushed = self.flush_bd(bd_id=1) + self.sleep(1) self.run_verify_negat_test(bd_id=1, dst_hosts=flushed) self.run_verify_test(bd_id=2, dst_hosts=self.learned_hosts) @@ -516,6 +518,7 @@ class TestL2fib(VppTestCase): self.config_l2_fib_entries(bd_id=1, n_hosts_per_if=10) self.config_l2_fib_entries(bd_id=2, n_hosts_per_if=10) flushed = self.flush_all() + self.sleep(2) self.run_verify_negat_test(bd_id=1, dst_hosts=flushed) self.run_verify_negat_test(bd_id=2, dst_hosts=flushed) -- cgit 1.2.3-korg From 0f360dc3aa40d0654198bd3f3850bd31a0d78f7e Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Wed, 14 Jun 2017 13:11:20 +0300 Subject: L2FWD:fix seq_num overwritten + validate l2fib entries when forwarding l2_classify memeber table_index was overlaid over l2.l2fib_seq_num which over written when table_index gets initialized in l2_input_classify solved by overlaying both table_index and opaque_index as only one is used seperated l2fib seq num from l2_input configs for better handling of theoretical ABA issue where an entry for a deleted interface is considered valid by the ager because a different interface with same sw_if_index and seq_num was created before the ager got a chance to delete Change-Id: I7b0eeded971627406f1c80834d7e02c0ebe62136 Signed-off-by: Eyal Bari --- src/vnet/buffer.h | 15 ++++++++--- src/vnet/l2/l2_bd.c | 5 ++-- src/vnet/l2/l2_fib.c | 19 +++---------- src/vnet/l2/l2_fib.h | 24 +++++++++++++++++ src/vnet/l2/l2_fwd.c | 72 +++++++++++++++++++++++++++++++++----------------- src/vnet/l2/l2_input.c | 4 +-- src/vnet/l2/l2_input.h | 3 --- src/vnet/l2/l2_learn.c | 13 +++++---- test/test_l2_fib.py | 3 --- 9 files changed, 99 insertions(+), 59 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index ec5e2f75..795bbd96 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -195,9 +195,13 @@ typedef struct /* L2 classify */ struct { - u64 pad; - u32 table_index; - u32 opaque_index; + u64 pad; /* paddind for l2 */ + u16 pad1; + union + { + u32 table_index; + u32 opaque_index; + }; u64 hash; } l2_classify; @@ -296,6 +300,11 @@ typedef struct STATIC_ASSERT (sizeof (vnet_buffer_opaque_t) <= STRUCT_SIZE_OF (vlib_buffer_t, opaque), "VNET buffer meta-data too large for vlib_buffer"); +STATIC_ASSERT (STRUCT_OFFSET_OF + (vnet_buffer_opaque_t, + l2_classify.table_index) >= + STRUCT_SIZE_OF (vnet_buffer_opaque_t, l2), + "l2_classify padding smaller than l2"); #define vnet_buffer(b) ((vnet_buffer_opaque_t *) (b)->opaque) diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index f68b6638..a87d02f2 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -1019,8 +1019,7 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { l2_flood_member_t *member = vec_elt_at_index (bd_config->members, i); - l2_input_config_t *int_config = - l2input_intf_config (member->sw_if_index); + u8 swif_seq_num = *l2fib_swif_seq_num (member->sw_if_index); u32 vtr_opr, dot1q, tag1, tag2; if (i == 0) { @@ -1033,7 +1032,7 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) vlib_cli_output (vm, "%=30U%=7d%=5d%=5d%=5s%=9s%=30U", format_vnet_sw_if_index_name, vnm, member->sw_if_index, member->sw_if_index, - int_config->seq_num, member->shg, + swif_seq_num, member->shg, member->flags & L2_FLOOD_MEMBER_BVI ? "*" : "-", i < bd_config->flood_count ? "*" : "-", format_vtr, vtr_opr, dot1q, tag1, tag2); diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index f17eee2a..2bb6d105 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -41,17 +41,6 @@ * */ -typedef struct -{ - - /* hash table */ - BVT (clib_bihash) mac_table; - - /* convenience variables */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; -} l2fib_main_t; - l2fib_main_t l2fib_main; /** Format sw_if_index. If the value is ~0, use the text "N/A" */ @@ -65,7 +54,7 @@ format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args) vnet_sw_interface_t *swif = vnet_get_sw_interface_safe (vnm, sw_if_index); if (!swif) - return format (s, "Deleted"); + return format (s, "Stale"); return format (s, "%U", format_vnet_sw_interface_name, vnm, vnet_get_sw_interface_safe (vnm, sw_if_index)); @@ -305,11 +294,10 @@ VLIB_CLI_COMMAND (clear_l2fib_cli, static) = { static inline l2fib_seq_num_t l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index) { - l2_input_config_t *int_config = l2input_intf_config (sw_if_index); l2_bridge_domain_t *bd_config = l2input_bd_config (bd_index); /* *INDENT-OFF* */ return (l2fib_seq_num_t) { - .swif = int_config->seq_num, + .swif = *l2fib_swif_seq_num (sw_if_index), .bd = bd_config->seq_num, }; /* *INDENT-ON* */ @@ -748,8 +736,7 @@ l2fib_start_ager_scan (vlib_main_t * vm) void l2fib_flush_int_mac (vlib_main_t * vm, u32 sw_if_index) { - l2_input_config_t *int_config = l2input_intf_config (sw_if_index); - int_config->seq_num += 1; + *l2fib_swif_seq_num (sw_if_index) += 1; l2fib_start_ager_scan (vm); } diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h index e571a210..03184502 100644 --- a/src/vnet/l2/l2_fib.h +++ b/src/vnet/l2/l2_fib.h @@ -27,6 +27,22 @@ #define L2FIB_NUM_BUCKETS (64 * 1024) #define L2FIB_MEMORY_SIZE (256<<20) +typedef struct +{ + + /* hash table */ + BVT (clib_bihash) mac_table; + + /* per swif vector of sequence number for interface based flush of MACs */ + u8 *swif_seq_num; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} l2fib_main_t; + +extern l2fib_main_t l2fib_main; + /* * The L2fib key is the mac address and bridge domain ID */ @@ -350,6 +366,14 @@ l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, u8 *format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args); +static_always_inline u8 * +l2fib_swif_seq_num (u32 sw_if_index) +{ + l2fib_main_t *mp = &l2fib_main; + vec_validate (mp->swif_seq_num, sw_if_index); + return vec_elt_at_index (mp->swif_seq_num, sw_if_index); +} + BVT (clib_bihash) * get_mac_table (void); #endif diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c index f7e2ccb6..8140728b 100644 --- a/src/vnet/l2/l2_fwd.c +++ b/src/vnet/l2/l2_fwd.c @@ -89,7 +89,8 @@ _(HIT, "L2 forward hits") \ _(BVI_BAD_MAC, "BVI L3 MAC mismatch") \ _(BVI_ETHERTYPE, "BVI packet with unhandled ethertype") \ _(FILTER_DROP, "Filter Mac Drop") \ -_(REFLECT_DROP, "Reflection Drop") +_(REFLECT_DROP, "Reflection Drop") \ +_(STALE_DROP, "Stale entry Drop") typedef enum { @@ -123,28 +124,15 @@ l2fwd_process (vlib_main_t * vm, vlib_buffer_t * b0, u32 sw_if_index0, l2fib_entry_result_t * result0, u32 * next0) { - if (PREDICT_FALSE (result0->raw == ~0)) - { - /* - * lookup miss, so flood - * TODO:replicate packet to each intf in bridge-domain - * For now just drop - */ - if (vnet_buffer (b0)->l2.feature_bitmap & L2INPUT_FEAT_UU_FLOOD) - { - *next0 = L2FWD_NEXT_FLOOD; - } - else - { - /* Flooding is disabled */ - b0->error = node->errors[L2FWD_ERROR_FLOOD]; - *next0 = L2FWD_NEXT_DROP; - } + int try_flood = result0->raw == ~0; + int flood_error; + if (PREDICT_FALSE (try_flood)) + { + flood_error = L2FWD_ERROR_FLOOD; } else { - /* lookup hit, forward packet */ #ifdef COUNTERS em->counters[node_counter_base_index + L2FWD_ERROR_HIT] += 1; @@ -152,22 +140,37 @@ l2fwd_process (vlib_main_t * vm, vnet_buffer (b0)->sw_if_index[VLIB_TX] = result0->fields.sw_if_index; *next0 = L2FWD_NEXT_L2_OUTPUT; + int l2fib_seq_num_valid = 1; + /* check l2fib seq num for stale entries */ + if (!result0->fields.static_mac) + { + l2fib_seq_num_t in_sn = {.as_u16 = vnet_buffer (b0)->l2.l2fib_sn }; + l2fib_seq_num_t expected_sn = { + .bd = in_sn.bd, + .swif = *l2fib_swif_seq_num (result0->fields.sw_if_index), + }; + l2fib_seq_num_valid = + expected_sn.as_u16 == result0->fields.sn.as_u16; + } + if (PREDICT_FALSE (!l2fib_seq_num_valid)) + { + flood_error = L2FWD_ERROR_STALE_DROP; + try_flood = 1; + } /* perform reflection check */ - if (PREDICT_FALSE (sw_if_index0 == result0->fields.sw_if_index)) + else if (PREDICT_FALSE (sw_if_index0 == result0->fields.sw_if_index)) { b0->error = node->errors[L2FWD_ERROR_REFLECT_DROP]; *next0 = L2FWD_NEXT_DROP; - - /* perform filter check */ } + /* perform filter check */ else if (PREDICT_FALSE (result0->fields.filter)) { b0->error = node->errors[L2FWD_ERROR_FILTER_DROP]; *next0 = L2FWD_NEXT_DROP; - - /* perform BVI check */ } + /* perform BVI check */ else if (PREDICT_FALSE (result0->fields.bvi)) { u32 rc; @@ -192,6 +195,27 @@ l2fwd_process (vlib_main_t * vm, } } } + + /* flood */ + if (PREDICT_FALSE (try_flood)) + { + /* + * lookup miss, so flood + * TODO:replicate packet to each intf in bridge-domain + * For now just drop + */ + if (vnet_buffer (b0)->l2.feature_bitmap & L2INPUT_FEAT_UU_FLOOD) + { + *next0 = L2FWD_NEXT_FLOOD; + } + else + { + /* Flooding is disabled */ + b0->error = node->errors[flood_error]; + *next0 = L2FWD_NEXT_DROP; + } + } + } diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index aca23fe0..22fc2a98 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -205,7 +205,7 @@ classify_and_dispatch (vlib_main_t * vm, /* Save bridge domain and interface seq_num */ /* *INDENT-OFF* */ l2fib_seq_num_t sn = { - .swif = config->seq_num, + .swif = *l2fib_swif_seq_num(sw_if_index0), .bd = bd_config->seq_num, }; /* *INDENT-ON* */ @@ -637,7 +637,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ config->xconnect = 0; config->bridge = 1; config->bd_index = bd_index; - config->seq_num += 1; + *l2fib_swif_seq_num (sw_if_index) += 1; /* * Enable forwarding, flooding, learning and ARP termination by default diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h index cb67cb9d..c1b669b4 100644 --- a/src/vnet/l2/l2_input.h +++ b/src/vnet/l2/l2_input.h @@ -53,9 +53,6 @@ typedef struct /* split horizon group */ u8 shg; - /* sequence number for interface based flush of MACs */ - u8 seq_num; - } l2_input_config_t; diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index adc5e70f..3ff2e704 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -138,11 +138,14 @@ l2learn_process (vlib_node_runtime_t * node, * The entry was in the table, and the sw_if_index matched, the normal case */ counter_base[L2LEARN_ERROR_HIT] += 1; - if (PREDICT_FALSE (result0->fields.timestamp != timestamp)) - result0->fields.timestamp = timestamp; - if (PREDICT_FALSE - (result0->fields.sn.as_u16 != vnet_buffer (b0)->l2.l2fib_sn)) - result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; + if (!result0->fields.static_mac) + { + if (PREDICT_FALSE (result0->fields.timestamp != timestamp)) + result0->fields.timestamp = timestamp; + if (PREDICT_FALSE + (result0->fields.sn.as_u16 != vnet_buffer (b0)->l2.l2fib_sn)) + result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; + } } else if (result0->raw == ~0) { diff --git a/test/test_l2_fib.py b/test/test_l2_fib.py index f9a78efc..9249a2ce 100644 --- a/test/test_l2_fib.py +++ b/test/test_l2_fib.py @@ -490,7 +490,6 @@ class TestL2fib(VppTestCase): self.config_l2_fib_entries(bd_id=1, n_hosts_per_if=10) self.config_l2_fib_entries(bd_id=2, n_hosts_per_if=10) flushed = self.flush_int(self.pg_interfaces[0].sw_if_index) - self.sleep(1) self.run_verify_test(bd_id=1, dst_hosts=self.learned_hosts) self.run_verify_negat_test(bd_id=1, dst_hosts=flushed) @@ -504,7 +503,6 @@ class TestL2fib(VppTestCase): self.config_l2_fib_entries(bd_id=1, n_hosts_per_if=10) self.config_l2_fib_entries(bd_id=2, n_hosts_per_if=10) flushed = self.flush_bd(bd_id=1) - self.sleep(1) self.run_verify_negat_test(bd_id=1, dst_hosts=flushed) self.run_verify_test(bd_id=2, dst_hosts=self.learned_hosts) @@ -518,7 +516,6 @@ class TestL2fib(VppTestCase): self.config_l2_fib_entries(bd_id=1, n_hosts_per_if=10) self.config_l2_fib_entries(bd_id=2, n_hosts_per_if=10) flushed = self.flush_all() - self.sleep(2) self.run_verify_negat_test(bd_id=1, dst_hosts=flushed) self.run_verify_negat_test(bd_id=2, dst_hosts=flushed) -- cgit 1.2.3-korg From 31a71ab497616940c105fa1719515fe7ae37f37a Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Sun, 25 Jun 2017 14:42:33 +0300 Subject: L2-LEARN:fix l2fib entry seq num not updated on hit (VPP-888) fixed instability in l2bd_multi_instnce test - sometimes failing with extra packets captured it appears l2-learn was not updating hit entries but rather a copy of them. if the ager did not have a chance to run before the test was running the learning cycle - entries were not updated with the packet's seq num - causing packets to flood when hitting the stale seq_num in l2-fwd - hence the extra packets fixed handling of filter entries revert workaround for instability in test Change-Id: I16d918e6310a5bf40bad5b7335b2140c2867cb71 Signed-off-by: Eyal Bari (cherry picked from commit 25ff2ea3a31e422094f6d91eab46222a29a77c4b) --- src/vnet/l2/l2_api.c | 28 +++++------- src/vnet/l2/l2_fib.c | 10 ++-- src/vnet/l2/l2_fib.h | 13 ++++++ src/vnet/l2/l2_input.c | 2 +- src/vnet/l2/l2_learn.c | 98 ++++++++++++++-------------------------- test/test_l2bd_multi_instance.py | 58 ++++++++++++------------ 6 files changed, 93 insertions(+), 116 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index aa3dcb7e..a0b40d6d 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -187,30 +187,24 @@ vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) l2input_main_t *l2im = &l2input_main; vl_api_l2fib_add_del_reply_t *rmp; int rv = 0; - u64 mac = 0; - u32 sw_if_index = ntohl (mp->sw_if_index); u32 bd_id = ntohl (mp->bd_id); - u32 bd_index; - u32 static_mac; - u32 filter_mac; - u32 bvi_mac; - uword *p; - - mac = mp->mac; + uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); - p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (!p) { rv = VNET_API_ERROR_NO_SUCH_ENTRY; goto bad_sw_if_index; } - bd_index = p[0]; + u32 bd_index = p[0]; + u64 mac = mp->mac; if (mp->is_add) { - filter_mac = mp->filter_mac ? 1 : 0; - if (filter_mac == 0) + if (mp->filter_mac) + l2fib_add_filter_entry (mac, bd_index); + else { + u32 sw_if_index = ntohl (mp->sw_if_index); VALIDATE_SW_IF_INDEX (mp); if (vec_len (l2im->configs) <= sw_if_index) { @@ -227,11 +221,11 @@ vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) goto bad_sw_if_index; } } + u32 static_mac = mp->static_mac ? 1 : 0; + u32 bvi_mac = mp->bvi_mac ? 1 : 0; + l2fib_add_fwd_entry (mac, bd_index, sw_if_index, static_mac, + bvi_mac); } - static_mac = mp->static_mac ? 1 : 0; - bvi_mac = mp->bvi_mac ? 1 : 0; - l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, filter_mac, - bvi_mac); } else { diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 2bb6d105..6f8f6e06 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -413,8 +413,10 @@ l2fib_add (vlib_main_t * vm, } } - l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, filter_mac, - bvi_mac); + if (filter_mac) + l2fib_add_filter_entry (mac, bd_index); + else + l2fib_add_fwd_entry (mac, bd_index, sw_if_index, static_mac, bvi_mac); done: return error; @@ -464,7 +466,6 @@ l2fib_test_command_fn (vlib_main_t * vm, u64 mac, save_mac; u32 bd_index = 0; u32 sw_if_index = 8; - u32 filter_mac = 0; u32 bvi_mac = 0; u32 is_add = 0; u32 is_del = 0; @@ -503,8 +504,7 @@ l2fib_test_command_fn (vlib_main_t * vm, for (i = 0; i < count; i++) { u64 tmp; - l2fib_add_entry (mac, bd_index, sw_if_index, mac, - filter_mac, bvi_mac); + l2fib_add_fwd_entry (mac, bd_index, sw_if_index, mac, bvi_mac); tmp = clib_net_to_host_u64 (mac); tmp >>= 16; tmp++; diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h index 03184502..21dcc451 100644 --- a/src/vnet/l2/l2_fib.h +++ b/src/vnet/l2/l2_fib.h @@ -350,6 +350,19 @@ l2fib_add_entry (u64 mac, u32 bd_index, u32 sw_if_index, u32 static_mac, u32 drop_mac, u32 bvi_mac); +static inline void +l2fib_add_fwd_entry (u64 mac, u32 bd_index, u32 sw_if_index, u32 static_mac, + u32 bvi_mac) +{ + l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, 0, bvi_mac); +} + +static inline void +l2fib_add_filter_entry (u64 mac, u32 bd_index) +{ + l2fib_add_entry (mac, bd_index, ~0, 1, 1, 0); +} + u32 l2fib_del_entry (u64 mac, u32 bd_index); void l2fib_start_ager_scan (vlib_main_t * vm); diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index 22fc2a98..d536d15b 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -671,7 +671,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ /* create the l2fib entry for the bvi interface */ mac = *((u64 *) hi->hw_address); - l2fib_add_entry (mac, bd_index, sw_if_index, 1, 0, 1); /* static + bvi */ + l2fib_add_fwd_entry (mac, bd_index, sw_if_index, 1, 1); /* static + bvi */ /* Disable learning by default. no use since l2fib entry is static. */ config->feature_bitmap &= ~L2INPUT_FEAT_LEARN; diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index 3ff2e704..b9904d3e 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -131,27 +131,22 @@ l2learn_process (vlib_node_runtime_t * node, feature_bitmap); /* Check mac table lookup result */ - if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0)) { /* * The entry was in the table, and the sw_if_index matched, the normal case */ counter_base[L2LEARN_ERROR_HIT] += 1; - if (!result0->fields.static_mac) - { - if (PREDICT_FALSE (result0->fields.timestamp != timestamp)) - result0->fields.timestamp = timestamp; - if (PREDICT_FALSE - (result0->fields.sn.as_u16 != vnet_buffer (b0)->l2.l2fib_sn)) - result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; - } + int update = !result0->fields.static_mac && + (result0->fields.timestamp != timestamp || + result0->fields.sn.as_u16 != vnet_buffer (b0)->l2.l2fib_sn); + + if (PREDICT_TRUE (!update)) + return; } else if (result0->raw == ~0) { - /* The entry was not in table, so add it */ - counter_base[L2LEARN_ERROR_MISS] += 1; if (msm->global_learn_count == msm->global_learn_limit) @@ -161,32 +156,27 @@ l2learn_process (vlib_node_runtime_t * node, * In the future, limits could also be per-interface or bridge-domain. */ counter_base[L2LEARN_ERROR_LIMIT] += 1; - goto done; - - } - else - { - BVT (clib_bihash_kv) kv; - /* It is ok to learn */ - - result0->raw = 0; /* clear all fields */ - result0->fields.sw_if_index = sw_if_index0; - result0->fields.timestamp = timestamp; - result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; - kv.key = key0->raw; - kv.value = result0->raw; - - BV (clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */ ); - - cached_key->raw = ~0; /* invalidate the cache */ - msm->global_learn_count++; + return; } + /* It is ok to learn */ + msm->global_learn_count++; + result0->raw = 0; /* clear all fields */ + result0->fields.sw_if_index = sw_if_index0; + cached_key->raw = ~0; /* invalidate the cache */ } else { - /* The entry was in the table, but with the wrong sw_if_index mapping (mac move) */ + if (result0->fields.filter) + { + ASSERT (result0->fields.sw_if_index == ~0); + /* drop packet because lookup matched a filter mac entry */ + b0->error = node->errors[L2LEARN_ERROR_FILTER_DROP]; + *next0 = L2LEARN_NEXT_DROP; + return; + } + counter_base[L2LEARN_ERROR_MAC_MOVE] += 1; if (result0->fields.static_mac) @@ -197,44 +187,24 @@ l2learn_process (vlib_node_runtime_t * node, */ b0->error = node->errors[L2LEARN_ERROR_MAC_MOVE_VIOLATE]; *next0 = L2LEARN_NEXT_DROP; + return; } - else - { - /* - * Update the entry - * TODO: may want to rate limit mac moves - * TODO: check global/bridge domain/interface learn limits - */ - BVT (clib_bihash_kv) kv; - - result0->raw = 0; /* clear all fields */ - result0->fields.sw_if_index = sw_if_index0; - result0->fields.timestamp = timestamp; - result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; - kv.key = key0->raw; - kv.value = result0->raw; - - cached_key->raw = ~0; /* invalidate the cache */ - - BV (clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */ ); - } + /* + * TODO: may want to rate limit mac moves + * TODO: check global/bridge domain/interface learn limits + */ + result0->fields.sw_if_index = sw_if_index0; } - if (result0->fields.filter) - { - /* drop packet because lookup matched a filter mac entry */ + /* Update the entry */ + result0->fields.timestamp = timestamp; + result0->fields.sn.as_u16 = vnet_buffer (b0)->l2.l2fib_sn; - if (*next0 != L2LEARN_NEXT_DROP) - { - /* if we're not already dropping the packet, do it now */ - b0->error = node->errors[L2LEARN_ERROR_FILTER_DROP]; - *next0 = L2LEARN_NEXT_DROP; - } - } - -done: - return; + BVT (clib_bihash_kv) kv; + kv.key = key0->raw; + kv.value = result0->raw; + BV (clib_bihash_add_del) (msm->mac_table, &kv, 1 /* is_add */ ); } diff --git a/test/test_l2bd_multi_instance.py b/test/test_l2bd_multi_instance.py index 0bb9e597..7dd27fb2 100644 --- a/test/test_l2bd_multi_instance.py +++ b/test/test_l2bd_multi_instance.py @@ -403,7 +403,33 @@ class TestL2bdMultiInst(VppTestCase): self.run_verify_test() def test_l2bd_inst_02(self): - """ L2BD Multi-instance test 2 - delete 2 BDs + """ L2BD Multi-instance test 2 - update data of 5 BDs + """ + # Config 2 + # Update data of 5 BDs (disable learn, forward, flood, uu-flood) + self.set_bd_flags(self.bd_list[0], learn=False, forward=False, + flood=False, uu_flood=False) + self.set_bd_flags(self.bd_list[1], forward=False) + self.set_bd_flags(self.bd_list[2], flood=False) + self.set_bd_flags(self.bd_list[3], uu_flood=False) + self.set_bd_flags(self.bd_list[4], learn=False) + + # Verify 2 + # Skipping check of uu_flood as it is not returned by + # bridge_domain_dump api command + self.verify_bd(self.bd_list[0], learn=False, forward=False, + flood=False, uu_flood=False) + self.verify_bd(self.bd_list[1], learn=True, forward=False, + flood=True, uu_flood=True) + self.verify_bd(self.bd_list[2], learn=True, forward=True, + flood=False, uu_flood=True) + self.verify_bd(self.bd_list[3], learn=True, forward=True, + flood=True, uu_flood=False) + self.verify_bd(self.bd_list[4], learn=False, forward=True, + flood=True, uu_flood=True) + + def test_l2bd_inst_03(self): + """ L2BD Multi-instance test 3 - delete 2 BDs """ # Config 3 # Delete 2 BDs @@ -418,8 +444,8 @@ class TestL2bdMultiInst(VppTestCase): # Test 3 self.run_verify_test() - def test_l2bd_inst_03(self): - """ L2BD Multi-instance test 3 - add 2 BDs + def test_l2bd_inst_04(self): + """ L2BD Multi-instance test 4 - add 2 BDs """ # Config 4 # Create 5 BDs, put interfaces to these BDs and send MAC learning @@ -434,32 +460,6 @@ class TestL2bdMultiInst(VppTestCase): # self.vapi.cli("clear trace") self.run_verify_test() - def test_l2bd_inst_04(self): - """ L2BD Multi-instance test 4 - update data of 5 BDs - """ - # Config 2 - # Update data of 5 BDs (disable learn, forward, flood, uu-flood) - self.set_bd_flags(self.bd_list[0], learn=False, forward=False, - flood=False, uu_flood=False) - self.set_bd_flags(self.bd_list[1], forward=False) - self.set_bd_flags(self.bd_list[2], flood=False) - self.set_bd_flags(self.bd_list[3], uu_flood=False) - self.set_bd_flags(self.bd_list[4], learn=False) - - # Verify 2 - # Skipping check of uu_flood as it is not returned by - # bridge_domain_dump api command - self.verify_bd(self.bd_list[0], learn=False, forward=False, - flood=False, uu_flood=False) - self.verify_bd(self.bd_list[1], learn=True, forward=False, - flood=True, uu_flood=True) - self.verify_bd(self.bd_list[2], learn=True, forward=True, - flood=False, uu_flood=True) - self.verify_bd(self.bd_list[3], learn=True, forward=True, - flood=True, uu_flood=False) - self.verify_bd(self.bd_list[4], learn=False, forward=True, - flood=True, uu_flood=True) - def test_l2bd_inst_05(self): """ L2BD Multi-instance test 5 - delete 5 BDs """ -- cgit 1.2.3-korg From b2fd6cb586fe69082cc12995910c65843fc5bb4a Mon Sep 17 00:00:00 2001 From: John Lo Date: Wed, 12 Jul 2017 19:56:45 -0400 Subject: Fix crash with worker threads on 4K VXLAN/BD setup (VPP-907) Cleanup mapping of interface output node for the l2-output node when interface is configured to L2 or L3 modes. The mapping is now always done in the main thread as part of API/CLI processing, instead of initiate mapping in the forwarding path which can be in the worker threads. Change-Id: Ia789493e7d9f5c76d68edfaf34db43f3e3f53506 Signed-off-by: John Lo (cherry picked from commit bea5ebf205e0bec922bf26c6c1a6a9392b4cad67) --- src/vnet/interface.h | 8 +--- src/vnet/l2/l2_efp_filter.c | 3 -- src/vnet/l2/l2_fib.c | 7 ++++ src/vnet/l2/l2_input.c | 49 ++++++++++++------------ src/vnet/l2/l2_output.c | 83 +++++----------------------------------- src/vnet/l2/l2_output.h | 68 ++++---------------------------- src/vnet/l2/l2_output_acl.c | 3 -- src/vnet/l2/l2_output_classify.c | 3 -- 8 files changed, 51 insertions(+), 173 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/interface.h b/src/vnet/interface.h index ce7700e4..9d64fc28 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -416,10 +416,6 @@ typedef struct vnet_hw_interface_t VNET_HW_INTERFACE_FLAG_SPEED_40G | \ VNET_HW_INTERFACE_FLAG_SPEED_100G) - /* l2output node flags */ -#define VNET_HW_INTERFACE_FLAG_L2OUTPUT_SHIFT 9 -#define VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED (1 << 9) - /* rx mode flags */ #define VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE (1 << 10) @@ -569,10 +565,10 @@ typedef struct #define VNET_SW_INTERFACE_FLAG_BOND_SLAVE (1 << 4) -/* Interface does not appear in CLI/API */ + /* Interface does not appear in CLI/API */ #define VNET_SW_INTERFACE_FLAG_HIDDEN (1 << 5) -/* Interface in ERROR state */ + /* Interface in ERROR state */ #define VNET_SW_INTERFACE_FLAG_ERROR (1 << 6) /* Index for this interface. */ diff --git a/src/vnet/l2/l2_efp_filter.c b/src/vnet/l2/l2_efp_filter.c index 2db4dc69..f9ba8f2f 100644 --- a/src/vnet/l2/l2_efp_filter.c +++ b/src/vnet/l2/l2_efp_filter.c @@ -530,9 +530,6 @@ VLIB_NODE_FUNCTION_MULTIARCH (l2_efp_filter_node, l2_efp_filter_node_fn) l2output_get_feat_names (), mp->next_nodes.feat_next_node_index); - /* Initialize the output node mapping table */ - l2output_init_output_node_vec (&mp->next_nodes.output_node_index_vec); - return 0; } diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 6f8f6e06..4ed16987 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -413,6 +413,13 @@ l2fib_add (vlib_main_t * vm, } } + if (vec_len (l2input_main.configs) <= sw_if_index) + { + error = clib_error_return (0, "Interface sw_if_index %d not in L2 mode", + sw_if_index); + goto done; + } + if (filter_mac) l2fib_add_filter_entry (mac, bd_index); else diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c index d536d15b..9a3148c5 100644 --- a/src/vnet/l2/l2_input.c +++ b/src/vnet/l2/l2_input.c @@ -573,13 +573,9 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ l2_if_adjust--; } - /* - * Directs the l2 output path to work out the interface - * output next-arc itself. Needed when recycling a sw_if_index. - */ + /* Make sure vector is big enough */ vec_validate_init_empty (l2om->next_nodes.output_node_index_vec, - sw_if_index, ~0); - l2om->next_nodes.output_node_index_vec[sw_if_index] = ~0; + sw_if_index, L2OUTPUT_NEXT_DROP); /* Initialize the l2-input configuration for the interface */ if (mode == MODE_L3) @@ -601,26 +597,11 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ l2om->next_nodes.output_node_index_vec[sw_if_index] = L2OUTPUT_NEXT_BAD_INTF; } - else if (mode == MODE_L2_CLASSIFY) - { - config->xconnect = 1; - config->bridge = 0; - config->output_sw_if_index = xc_sw_if_index; - - /* Make sure last-chance drop is configured */ - config->feature_bitmap |= - L2INPUT_FEAT_DROP | L2INPUT_FEAT_INPUT_CLASSIFY; - - /* Make sure bridging features are disabled */ - config->feature_bitmap &= - ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD); - shg = 0; /* not used in xconnect */ - - /* Insure all packets go to ethernet-input */ - ethernet_set_rx_redirect (vnet_main, hi, 1); - } else { + /* Add or update l2-output node next-arc and output_node_index_vec table + * for the interface */ + l2output_create_output_node_mapping (vm, vnet_main, sw_if_index); if (mode == MODE_L2_BRIDGE) { @@ -693,7 +674,7 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ bd_add_member (bd_config, &member); } - else + else if (mode == MODE_L2_XC) { config->xconnect = 1; config->bridge = 0; @@ -709,6 +690,24 @@ set_int_l2_mode (vlib_main_t * vm, vnet_main_t * vnet_main, /* */ config->feature_bitmap |= L2INPUT_FEAT_XCONNECT; shg = 0; /* not used in xconnect */ } + else if (mode == MODE_L2_CLASSIFY) + { + config->xconnect = 1; + config->bridge = 0; + config->output_sw_if_index = xc_sw_if_index; + + /* Make sure last-chance drop is configured */ + config->feature_bitmap |= + L2INPUT_FEAT_DROP | L2INPUT_FEAT_INPUT_CLASSIFY; + + /* Make sure bridging features are disabled */ + config->feature_bitmap &= + ~(L2INPUT_FEAT_LEARN | L2INPUT_FEAT_FWD | L2INPUT_FEAT_FLOOD); + shg = 0; /* not used in xconnect */ + + /* Insure all packets go to ethernet-input */ + ethernet_set_rx_redirect (vnet_main, hi, 1); + } /* set up split-horizon group and set output feature bit */ config->shg = shg; diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c index e17b2a16..51d5e145 100644 --- a/src/vnet/l2/l2_output.c +++ b/src/vnet/l2/l2_output.c @@ -601,90 +601,27 @@ VLIB_NODE_FUNCTION_MULTIARCH (l2output_node, l2output_node_fn) mp->next_nodes.feat_next_node_index); /* Initialize the output node mapping table */ - l2output_init_output_node_vec (&mp->next_nodes.output_node_index_vec); + vec_validate_init_empty (mp->next_nodes.output_node_index_vec, 100, + L2OUTPUT_NEXT_DROP); return 0; } VLIB_INIT_FUNCTION (l2output_init); -typedef struct -{ - u32 node_index; - u32 sw_if_index; -} output_node_mapping_rpc_args_t; - -static void output_node_rpc_callback (output_node_mapping_rpc_args_t * a); - -static void -output_node_mapping_send_rpc (u32 node_index, u32 sw_if_index) -{ - output_node_mapping_rpc_args_t args; - void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); - - args.node_index = node_index; - args.sw_if_index = sw_if_index; - - vl_api_rpc_call_main_thread (output_node_rpc_callback, - (u8 *) & args, sizeof (args)); -} - /** Create a mapping in the next node mapping table for the given sw_if_index. */ -u32 -l2output_create_output_node_mapping (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 node_index, /* index of current node */ - u32 * output_node_index_vec, - u32 sw_if_index) -{ - - u32 next; /* index of next graph node */ - vnet_hw_interface_t *hw0; - u32 *node; - - hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index); - - uword thread_index; - - thread_index = vlib_get_thread_index (); - - if (thread_index) - { - u32 oldflags; - - oldflags = __sync_fetch_and_or (&hw0->flags, - VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED); - - if ((oldflags & VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED)) - return L2OUTPUT_NEXT_DROP; - - output_node_mapping_send_rpc (node_index, sw_if_index); - return L2OUTPUT_NEXT_DROP; - } - - /* dynamically create graph node arc */ - next = vlib_node_add_next (vlib_main, node_index, hw0->output_node_index); - - /* Initialize vector with the mapping */ - - node = vec_elt_at_index (output_node_index_vec, sw_if_index); - *node = next; - - /* reset mapping bit, includes memory barrier */ - __sync_fetch_and_and (&hw0->flags, ~VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED); - - return next; -} - void -output_node_rpc_callback (output_node_mapping_rpc_args_t * a) +l2output_create_output_node_mapping (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, u32 sw_if_index) { - vlib_main_t *vm = vlib_get_main (); - vnet_main_t *vnm = vnet_get_main (); - l2output_main_t *mp = &l2output_main; + vnet_hw_interface_t *hw0 = + vnet_get_sup_hw_interface (vnet_main, sw_if_index); - (void) l2output_create_output_node_mapping - (vm, vnm, a->node_index, mp->next_nodes.output_node_index_vec, - a->sw_if_index); + /* dynamically create graph node arc */ + u32 next = vlib_node_add_next (vlib_main, l2output_node.index, + hw0->output_node_index); + l2output_main.next_nodes.output_node_index_vec[sw_if_index] = next; } /* Get a pointer to the config for the given interface */ diff --git a/src/vnet/l2/l2_output.h b/src/vnet/l2/l2_output.h index 9597205c..82cefd2c 100644 --- a/src/vnet/l2/l2_output.h +++ b/src/vnet/l2/l2_output.h @@ -130,7 +130,7 @@ _(EFP_DROP, "L2 EFP filter pre-rewrite drops") \ _(VTR_DROP, "L2 output tag rewrite drops") \ _(SHG_DROP, "L2 split horizon drops") \ _(DROP, "L2 output drops") \ -_(MAPPING_DROP, "L2 Output interface mapping in progress") +_(MAPPING_DROP, "L2 Output interface not valid") typedef enum { @@ -159,52 +159,9 @@ char **l2output_get_feat_names (void); */ /* Create a mapping to the output graph node for the given sw_if_index */ -u32 l2output_create_output_node_mapping (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 node_index, /* index of current node */ - u32 * output_node_index_vec, - u32 sw_if_index); - -/* Initialize the next node mapping table */ -always_inline void -l2output_init_output_node_vec (u32 ** output_node_index_vec) -{ - - /* - * Size it at 100 sw_if_indexes initially - * Uninitialized mappings are set to ~0 - */ - vec_validate_init_empty (*output_node_index_vec, 100, ~0); -} - - -/** - * Get a mapping from the output node mapping table, - * creating the entry if necessary. - */ -always_inline u32 -l2output_get_output_node (vlib_main_t * vlib_main, vnet_main_t * vnet_main, u32 node_index, /* index of current node */ - u32 sw_if_index, u32 ** output_node_index_vec) /* may be updated */ -{ - u32 next; /* index of next graph node */ - - /* Insure the vector is big enough */ - vec_validate_init_empty (*output_node_index_vec, sw_if_index, ~0); - - /* Get the mapping for the sw_if_index */ - next = vec_elt (*output_node_index_vec, sw_if_index); - - if (next == ~0) - { - /* Mapping doesn't exist so create it */ - next = l2output_create_output_node_mapping (vlib_main, - vnet_main, - node_index, - *output_node_index_vec, - sw_if_index); - } - - return next; -} - +void l2output_create_output_node_mapping (vlib_main_t * vlib_main, + vnet_main_t * vnet_main, + u32 sw_if_index); /** Determine the next L2 node based on the output feature bitmap */ always_inline void @@ -257,21 +214,12 @@ l2_output_dispatch (vlib_main_t * vlib_main, } else { - /* Look up the output TX node */ - *next0 = l2output_get_output_node (vlib_main, - vnet_main, - node_index, - sw_if_index, - &next_nodes->output_node_index_vec); + /* Look up the output TX node for the sw_if_index */ + *next0 = vec_elt (l2output_main.next_nodes.output_node_index_vec, + sw_if_index); if (*next0 == L2OUTPUT_NEXT_DROP) - { - vnet_hw_interface_t *hw0; - hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index); - - if (hw0->flags & VNET_HW_INTERFACE_FLAG_L2OUTPUT_MAPPED) - b0->error = node->errors[L2OUTPUT_ERROR_MAPPING_DROP]; - } + b0->error = node->errors[L2OUTPUT_ERROR_MAPPING_DROP]; /* Update the one-entry cache */ *cached_sw_if_index = sw_if_index; diff --git a/src/vnet/l2/l2_output_acl.c b/src/vnet/l2/l2_output_acl.c index 94a4d66b..1d1971a5 100644 --- a/src/vnet/l2/l2_output_acl.c +++ b/src/vnet/l2/l2_output_acl.c @@ -297,9 +297,6 @@ VLIB_NODE_FUNCTION_MULTIARCH (l2_outacl_node, l2_outacl_node_fn) l2output_get_feat_names (), mp->next_nodes.feat_next_node_index); - /* Initialize the output node mapping table */ - l2output_init_output_node_vec (&mp->next_nodes.output_node_index_vec); - return 0; } diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c index c1bdaddc..869b0656 100644 --- a/src/vnet/l2/l2_output_classify.c +++ b/src/vnet/l2/l2_output_classify.c @@ -497,9 +497,6 @@ l2_output_classify_init (vlib_main_t * vm) rt->l2cm = cm; rt->vcm = cm->vnet_classify_main; - /* Initialize the output node mapping table */ - l2output_init_output_node_vec (&cm->next_nodes.output_node_index_vec); - return 0; } -- cgit 1.2.3-korg From 908a5ea6e247b4a15f0ec7e8ee8ebff799abdc4f Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Fri, 14 Jul 2017 12:42:21 -0400 Subject: Add a bihash prefetchable bucket-level cache According to Maciek, the easiest way to leverage the csit "performance trend" job is to actually merge the patch once verified. Manual testing indicates that the patch improves l2 path performance. Other use-cases are TBD. It's possible that we'll need to back out the patch depending on what happens. Change-Id: Ic0a0363de35ef9be953ad7709c57c3936b73fd5a Signed-off-by: Dave Barach --- src/vnet/fib/ip6_fib.c | 4 +- src/vnet/fib/ip6_fib.h | 2 +- src/vnet/l2/l2_fib.c | 8 +- src/vppinfra.am | 2 + src/vppinfra/bihash_16_8.h | 3 + src/vppinfra/bihash_24_8.h | 3 + src/vppinfra/bihash_48_8.h | 3 + src/vppinfra/bihash_8_8.h | 3 + src/vppinfra/bihash_template.c | 78 ++++++++++++-- src/vppinfra/bihash_template.h | 206 ++++++++++++++++++++++++++++++++---- src/vppinfra/test_bihash_template.c | 61 +++++++++-- 11 files changed, 329 insertions(+), 44 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 527f9114..8fde6f9f 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -200,7 +200,7 @@ ip6_fib_table_lookup (u32 fib_index, const ip6_address_t *addr, u32 len) { - const ip6_fib_table_instance_t *table; + ip6_fib_table_instance_t *table; BVT(clib_bihash_kv) kv, value; int i, n_p, rv; u64 fib; @@ -246,7 +246,7 @@ ip6_fib_table_lookup_exact_match (u32 fib_index, const ip6_address_t *addr, u32 len) { - const ip6_fib_table_instance_t *table; + ip6_fib_table_instance_t *table; BVT(clib_bihash_kv) kv, value; ip6_address_t *mask; u64 fib; diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index 9789da4f..aad8305c 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -68,7 +68,7 @@ ip6_fib_table_fwding_lookup (ip6_main_t * im, u32 fib_index, const ip6_address_t * dst) { - const ip6_fib_table_instance_t *table; + ip6_fib_table_instance_t *table; int i, len; int rv; BVT(clib_bihash_kv) kv, value; diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 4ed16987..7e59b098 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -66,7 +66,7 @@ l2fib_table_dump (u32 bd_index, l2fib_entry_key_t ** l2fe_key, { l2fib_main_t *msm = &l2fib_main; BVT (clib_bihash) * h = &msm->mac_table; - clib_bihash_bucket_t *b; + BVT (clib_bihash_bucket) * b; BVT (clib_bihash_value) * v; l2fib_entry_key_t key; l2fib_entry_result_t result; @@ -108,7 +108,7 @@ show_l2fib (vlib_main_t * vm, l2fib_main_t *msm = &l2fib_main; l2_bridge_domain_t *bd_config; BVT (clib_bihash) * h = &msm->mac_table; - clib_bihash_bucket_t *b; + BVT (clib_bihash_bucket) * b; BVT (clib_bihash_value) * v; l2fib_entry_key_t key; l2fib_entry_result_t result; @@ -961,7 +961,7 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (i < (h->nbuckets - 3)) { - clib_bihash_bucket_t *b = &h->buckets[i + 3]; + BVT (clib_bihash_bucket) * b = &h->buckets[i + 3]; CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); b = &h->buckets[i + 1]; if (b->offset) @@ -972,7 +972,7 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, } } - clib_bihash_bucket_t *b = &h->buckets[i]; + BVT (clib_bihash_bucket) * b = &h->buckets[i]; if (b->offset == 0) continue; BVT (clib_bihash_value) * v = diff --git a/src/vppinfra.am b/src/vppinfra.am index 785445a6..533bacd6 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -42,6 +42,8 @@ TESTS += test_bihash_template \ test_zvec endif +TESTS += test_bihash_template + noinst_PROGRAMS = $(TESTS) check_PROGRAMS = $(TESTS) diff --git a/src/vppinfra/bihash_16_8.h b/src/vppinfra/bihash_16_8.h index 6b1b563e..361665be 100644 --- a/src/vppinfra/bihash_16_8.h +++ b/src/vppinfra/bihash_16_8.h @@ -13,9 +13,12 @@ * limitations under the License. */ #undef BIHASH_TYPE +#undef BIHASH_KVP_CACHE_SIZE +#undef BIHASH_KVP_PER_PAGE #define BIHASH_TYPE _16_8 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_CACHE_SIZE 5 #ifndef __included_bihash_16_8_h__ #define __included_bihash_16_8_h__ diff --git a/src/vppinfra/bihash_24_8.h b/src/vppinfra/bihash_24_8.h index db77daa4..d0be028c 100644 --- a/src/vppinfra/bihash_24_8.h +++ b/src/vppinfra/bihash_24_8.h @@ -13,9 +13,12 @@ * limitations under the License. */ #undef BIHASH_TYPE +#undef BIHASH_KVP_CACHE_SIZE +#undef BIHASH_KVP_PER_PAGE #define BIHASH_TYPE _24_8 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_CACHE_SIZE 3 #ifndef __included_bihash_24_8_h__ #define __included_bihash_24_8_h__ diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h index 48079e0a..107bcace 100644 --- a/src/vppinfra/bihash_48_8.h +++ b/src/vppinfra/bihash_48_8.h @@ -14,9 +14,12 @@ */ #undef BIHASH_TYPE +#undef BIHASH_KVP_CACHE_SIZE +#undef BIHASH_KVP_PER_PAGE #define BIHASH_TYPE _48_8 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_CACHE_SIZE 2 #ifndef __included_bihash_48_8_h__ #define __included_bihash_48_8_h__ diff --git a/src/vppinfra/bihash_8_8.h b/src/vppinfra/bihash_8_8.h index 68049351..f81002d6 100644 --- a/src/vppinfra/bihash_8_8.h +++ b/src/vppinfra/bihash_8_8.h @@ -13,9 +13,12 @@ * limitations under the License. */ #undef BIHASH_TYPE +#undef BIHASH_KVP_CACHE_SIZE +#undef BIHASH_KVP_PER_PAGE #define BIHASH_TYPE _8_8 #define BIHASH_KVP_PER_PAGE 4 +#define BIHASH_KVP_CACHE_SIZE 5 #ifndef __included_bihash_8_8_h__ #define __included_bihash_8_8_h__ diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c index 004e8a9a..e3a5759d 100644 --- a/src/vppinfra/bihash_template.c +++ b/src/vppinfra/bihash_template.c @@ -19,12 +19,15 @@ void BV (clib_bihash_init) (BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size) { void *oldheap; + int i; nbuckets = 1 << (max_log2 (nbuckets)); h->name = (u8 *) name; h->nbuckets = nbuckets; h->log2_nbuckets = max_log2 (nbuckets); + h->cache_hits = 0; + h->cache_misses = 0; h->mheap = mheap_alloc (0 /* use VM */ , memory_size); @@ -33,6 +36,9 @@ void BV (clib_bihash_init) h->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + for (i = 0; i < nbuckets; i++) + BV (clib_bihash_reset_cache) (h->buckets + i); + clib_mem_set_heap (oldheap); } @@ -87,10 +93,10 @@ BV (value_free) (BVT (clib_bihash) * h, BVT (clib_bihash_value) * v, } static inline void -BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b) +BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b) { BVT (clib_bihash_value) * v; - clib_bihash_bucket_t working_bucket __attribute__ ((aligned (8))); + BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8))); void *oldheap; BVT (clib_bihash_value) * working_copy; u32 thread_index = os_get_thread_index (); @@ -129,6 +135,9 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b) clib_mem_set_heap (oldheap); + /* Turn off the cache */ + BV (clib_bihash_cache_enable_disable) (b, 0); + v = BV (clib_bihash_get_value) (h, b->offset); clib_memcpy (working_copy, v, sizeof (*v) * (1 << b->log2_pages)); @@ -235,7 +244,7 @@ int BV (clib_bihash_add_del) (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add) { u32 bucket_index; - clib_bihash_bucket_t *b, tmp_b; + BVT (clib_bihash_bucket) * b, tmp_b; BVT (clib_bihash_value) * v, *new_v, *save_new_v, *working_copy; int rv = 0; int i, limit; @@ -276,6 +285,7 @@ int BV (clib_bihash_add_del) goto unlock; } + /* Note: this leaves the cache disabled */ BV (make_working_copy) (h, b); v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset); @@ -405,19 +415,22 @@ expand_ok: BV (value_free) (h, v, old_log2_pages); unlock: + BV (clib_bihash_reset_cache) (b); + BV (clib_bihash_cache_enable_disable) (b, 1 /* enable */ ); CLIB_MEMORY_BARRIER (); h->writer_lock[0] = 0; return rv; } int BV (clib_bihash_search) - (const BVT (clib_bihash) * h, + (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep) { u64 hash; u32 bucket_index; BVT (clib_bihash_value) * v; - clib_bihash_bucket_t *b; + BVT (clib_bihash_kv) * kvp; + BVT (clib_bihash_bucket) * b; int i, limit; ASSERT (valuep); @@ -430,6 +443,22 @@ int BV (clib_bihash_search) if (b->offset == 0) return -1; + /* Check the cache, if currently enabled */ + if (PREDICT_TRUE (b->cache_lru & (1 << 15))) + { + limit = BIHASH_KVP_CACHE_SIZE; + kvp = b->cache; + for (i = 0; i < limit; i++) + { + if (BV (clib_bihash_key_compare) (kvp[i].key, search_key->key)) + { + *valuep = kvp[i]; + h->cache_hits++; + return 0; + } + } + } + hash >>= h->log2_nbuckets; v = BV (clib_bihash_get_value) (h, b->offset); @@ -442,18 +471,50 @@ int BV (clib_bihash_search) { if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key)) { + u8 cache_slot; *valuep = v->kvp[i]; + + /* Shut off the cache */ + BV (clib_bihash_cache_enable_disable) (b, 0); + CLIB_MEMORY_BARRIER (); + + cache_slot = BV (clib_bihash_get_lru) (b); + b->cache[cache_slot] = v->kvp[i]; + BV (clib_bihash_update_lru) (b, cache_slot); + + /* Reenable the cache */ + BV (clib_bihash_cache_enable_disable) (b, 1); + h->cache_misses++; return 0; } } return -1; } +u8 *BV (format_bihash_lru) (u8 * s, va_list * args) +{ + int i; + BVT (clib_bihash_bucket) * b = va_arg (*args, BVT (clib_bihash_bucket) *); + u16 cache_lru = b->cache_lru; + + s = format (s, "cache %s, order ", cache_lru & (1 << 15) ? "on" : "off"); + + for (i = 0; i < BIHASH_KVP_CACHE_SIZE; i++) + s = format (s, "[%d] ", ((cache_lru >> (3 * i)) & 7)); + return (s); +} + +void +BV (clib_bihash_update_lru_not_inline) (BVT (clib_bihash_bucket) * b, u8 slot) +{ + BV (clib_bihash_update_lru) (b, slot); +} + u8 *BV (format_bihash) (u8 * s, va_list * args) { BVT (clib_bihash) * h = va_arg (*args, BVT (clib_bihash) *); int verbose = va_arg (*args, int); - clib_bihash_bucket_t *b; + BVT (clib_bihash_bucket) * b; BVT (clib_bihash_value) * v; int i, j, k; u64 active_elements = 0; @@ -503,7 +564,8 @@ u8 *BV (format_bihash) (u8 * s, va_list * args) s = format (s, " %lld active elements\n", active_elements); s = format (s, " %d free lists\n", vec_len (h->freelists)); s = format (s, " %d linear search buckets\n", h->linear_buckets); - + s = format (s, " %lld cache hits, %lld cache misses\n", + h->cache_hits, h->cache_misses); return s; } @@ -511,7 +573,7 @@ void BV (clib_bihash_foreach_key_value_pair) (BVT (clib_bihash) * h, void *callback, void *arg) { int i, j, k; - clib_bihash_bucket_t *b; + BVT (clib_bihash_bucket) * b; BVT (clib_bihash_value) * v; void (*fp) (BVT (clib_bihash_kv) *, void *) = callback; diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h index 4ea14ff0..feb6fb68 100644 --- a/src/vppinfra/bihash_template.h +++ b/src/vppinfra/bihash_template.h @@ -48,12 +48,10 @@ typedef struct BV (clib_bihash_value) }; } BVT (clib_bihash_value); -/* - * This is shared across all uses of the template, so it needs - * a "personal" #include recursion block - */ -#ifndef __defined_clib_bihash_bucket_t__ -#define __defined_clib_bihash_bucket_t__ +#if BIHASH_KVP_CACHE_SIZE > 5 +#error Requested KVP cache LRU data exceeds 16 bits +#endif + typedef struct { union @@ -62,36 +60,139 @@ typedef struct { u32 offset; u8 linear_search; - u8 pad[2]; u8 log2_pages; + u16 cache_lru; }; u64 as_u64; }; -} clib_bihash_bucket_t; -#endif /* __defined_clib_bihash_bucket_t__ */ + BVT (clib_bihash_kv) cache[BIHASH_KVP_CACHE_SIZE]; +} BVT (clib_bihash_bucket); typedef struct { BVT (clib_bihash_value) * values; - clib_bihash_bucket_t *buckets; + BVT (clib_bihash_bucket) * buckets; volatile u32 *writer_lock; BVT (clib_bihash_value) ** working_copies; int *working_copy_lengths; - clib_bihash_bucket_t saved_bucket; + BVT (clib_bihash_bucket) saved_bucket; u32 nbuckets; u32 log2_nbuckets; u32 linear_buckets; u8 *name; + u64 cache_hits; + u64 cache_misses; + BVT (clib_bihash_value) ** freelists; void *mheap; } BVT (clib_bihash); -static inline void *BV (clib_bihash_get_value) (const BVT (clib_bihash) * h, +static inline void +BV (clib_bihash_update_lru) (BVT (clib_bihash_bucket) * b, u8 slot) +{ + u16 value, tmp, mask; + u8 found_lru_pos; + u16 save_hi; + + if (BIHASH_KVP_CACHE_SIZE < 2) + return; + + ASSERT (slot < BIHASH_KVP_CACHE_SIZE); + + /* First, find the slot in cache_lru */ + mask = slot; + if (BIHASH_KVP_CACHE_SIZE > 1) + mask |= slot << 3; + if (BIHASH_KVP_CACHE_SIZE > 2) + mask |= slot << 6; + if (BIHASH_KVP_CACHE_SIZE > 3) + mask |= slot << 9; + if (BIHASH_KVP_CACHE_SIZE > 4) + mask |= slot << 12; + + value = b->cache_lru; + tmp = value ^ mask; + + /* Already the most-recently used? */ + if ((tmp & 7) == 0) + return; + + found_lru_pos = ((tmp & (7 << 3)) == 0) ? 1 : 0; + if (BIHASH_KVP_CACHE_SIZE > 2) + found_lru_pos = ((tmp & (7 << 6)) == 0) ? 2 : found_lru_pos; + if (BIHASH_KVP_CACHE_SIZE > 3) + found_lru_pos = ((tmp & (7 << 9)) == 0) ? 3 : found_lru_pos; + if (BIHASH_KVP_CACHE_SIZE > 4) + found_lru_pos = ((tmp & (7 << 12)) == 0) ? 4 : found_lru_pos; + + ASSERT (found_lru_pos); + + /* create a mask to kill bits in or above slot */ + mask = 0xFFFF << found_lru_pos; + mask <<= found_lru_pos; + mask <<= found_lru_pos; + mask ^= 0xFFFF; + tmp = value & mask; + + /* Save bits above slot */ + mask ^= 0xFFFF; + mask <<= 3; + save_hi = value & mask; + + value = save_hi | (tmp << 3) | slot; + + b->cache_lru = value; +} + +void +BV (clib_bihash_update_lru_not_inline) (BVT (clib_bihash_bucket) * b, + u8 slot); + +static inline u8 BV (clib_bihash_get_lru) (BVT (clib_bihash_bucket) * b) +{ + return (b->cache_lru >> (3 * (BIHASH_KVP_CACHE_SIZE - 1))) & 7; +} + +static inline void BV (clib_bihash_reset_cache) (BVT (clib_bihash_bucket) * b) +{ + u16 initial_lru_value; + + memset (b->cache, 0xff, sizeof (b->cache)); + + /* + * We'll want the cache to be loaded from slot 0 -> slot N, so + * the initial LRU order is reverse index order. + */ + if (BIHASH_KVP_CACHE_SIZE == 1) + initial_lru_value = 0; + else if (BIHASH_KVP_CACHE_SIZE == 2) + initial_lru_value = (0 << 3) | (1 << 0); + else if (BIHASH_KVP_CACHE_SIZE == 3) + initial_lru_value = (0 << 6) | (1 << 3) | (2 << 0); + else if (BIHASH_KVP_CACHE_SIZE == 4) + initial_lru_value = (0 << 9) | (1 << 6) | (2 << 3) | (3 << 0); + else if (BIHASH_KVP_CACHE_SIZE == 5) + initial_lru_value = (0 << 12) | (1 << 9) | (2 << 6) | (3 << 3) | (4 << 0); + + b->cache_lru = initial_lru_value; +} + +static inline void BV (clib_bihash_cache_enable_disable) + (BVT (clib_bihash_bucket) * b, u8 enable) +{ + BVT (clib_bihash_bucket) tmp_b; + tmp_b.as_u64 = b->as_u64; + tmp_b.cache_lru &= 0x7FFF; + tmp_b.cache_lru |= enable << 15; + b->as_u64 = tmp_b.as_u64; +} + +static inline void *BV (clib_bihash_get_value) (BVT (clib_bihash) * h, uword offset) { u8 *hp = h->mheap; @@ -100,7 +201,7 @@ static inline void *BV (clib_bihash_get_value) (const BVT (clib_bihash) * h, return (void *) vp; } -static inline uword BV (clib_bihash_get_offset) (const BVT (clib_bihash) * h, +static inline uword BV (clib_bihash_get_offset) (BVT (clib_bihash) * h, void *v) { u8 *hp, *vp; @@ -119,7 +220,7 @@ void BV (clib_bihash_free) (BVT (clib_bihash) * h); int BV (clib_bihash_add_del) (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add); -int BV (clib_bihash_search) (const BVT (clib_bihash) * h, +int BV (clib_bihash_search) (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * search_v, BVT (clib_bihash_kv) * return_v); @@ -128,18 +229,19 @@ void BV (clib_bihash_foreach_key_value_pair) (BVT (clib_bihash) * h, format_function_t BV (format_bihash); format_function_t BV (format_bihash_kvp); - +format_function_t BV (format_bihash_lru); static inline int BV (clib_bihash_search_inline) - (const BVT (clib_bihash) * h, BVT (clib_bihash_kv) * kvp) + (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * key_result) { u64 hash; u32 bucket_index; BVT (clib_bihash_value) * v; - clib_bihash_bucket_t *b; + BVT (clib_bihash_bucket) * b; + BVT (clib_bihash_kv) * kvp; int i, limit; - hash = BV (clib_bihash_hash) (kvp); + hash = BV (clib_bihash_hash) (key_result); bucket_index = hash & (h->nbuckets - 1); b = &h->buckets[bucket_index]; @@ -147,6 +249,22 @@ static inline int BV (clib_bihash_search_inline) if (b->offset == 0) return -1; + /* Check the cache, if currently enabled */ + if (PREDICT_TRUE (b->cache_lru & (1 << 15))) + { + limit = BIHASH_KVP_CACHE_SIZE; + kvp = b->cache; + for (i = 0; i < limit; i++) + { + if (BV (clib_bihash_key_compare) (kvp[i].key, key_result->key)) + { + *key_result = kvp[i]; + h->cache_hits++; + return 0; + } + } + } + hash >>= h->log2_nbuckets; v = BV (clib_bihash_get_value) (h, b->offset); @@ -159,9 +277,22 @@ static inline int BV (clib_bihash_search_inline) for (i = 0; i < limit; i++) { - if (BV (clib_bihash_key_compare) (v->kvp[i].key, kvp->key)) + if (BV (clib_bihash_key_compare) (v->kvp[i].key, key_result->key)) { - *kvp = v->kvp[i]; + u8 cache_slot; + *key_result = v->kvp[i]; + + /* Shut off the cache */ + BV (clib_bihash_cache_enable_disable) (b, 0); + CLIB_MEMORY_BARRIER (); + + cache_slot = BV (clib_bihash_get_lru) (b); + b->cache[cache_slot] = v->kvp[i]; + BV (clib_bihash_update_lru) (b, cache_slot); + + /* Reenable the cache */ + BV (clib_bihash_cache_enable_disable) (b, 1); + h->cache_misses++; return 0; } } @@ -169,13 +300,14 @@ static inline int BV (clib_bihash_search_inline) } static inline int BV (clib_bihash_search_inline_2) - (const BVT (clib_bihash) * h, + (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep) { u64 hash; u32 bucket_index; BVT (clib_bihash_value) * v; - clib_bihash_bucket_t *b; + BVT (clib_bihash_bucket) * b; + BVT (clib_bihash_kv) * kvp; int i, limit; ASSERT (valuep); @@ -188,6 +320,22 @@ static inline int BV (clib_bihash_search_inline_2) if (b->offset == 0) return -1; + /* Check the cache, if currently enabled */ + if (PREDICT_TRUE (b->cache_lru & (1 << 15))) + { + limit = BIHASH_KVP_CACHE_SIZE; + kvp = b->cache; + for (i = 0; i < limit; i++) + { + if (BV (clib_bihash_key_compare) (kvp[i].key, search_key->key)) + { + *valuep = kvp[i]; + h->cache_hits++; + return 0; + } + } + } + hash >>= h->log2_nbuckets; v = BV (clib_bihash_get_value) (h, b->offset); @@ -201,14 +349,26 @@ static inline int BV (clib_bihash_search_inline_2) { if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key)) { + u8 cache_slot; *valuep = v->kvp[i]; + + /* Shut off the cache */ + BV (clib_bihash_cache_enable_disable) (b, 0); + CLIB_MEMORY_BARRIER (); + + cache_slot = BV (clib_bihash_get_lru) (b); + b->cache[cache_slot] = v->kvp[i]; + BV (clib_bihash_update_lru) (b, cache_slot); + + /* Reenable the cache */ + BV (clib_bihash_cache_enable_disable) (b, 1); + h->cache_misses++; return 0; } } return -1; } - #endif /* __included_bihash_template_h__ */ /** @endcond */ diff --git a/src/vppinfra/test_bihash_template.c b/src/vppinfra/test_bihash_template.c index 1e262430..589c815d 100644 --- a/src/vppinfra/test_bihash_template.c +++ b/src/vppinfra/test_bihash_template.c @@ -236,12 +236,45 @@ test_bihash (test_main_t * tm) return 0; } +clib_error_t * +test_bihash_cache (test_main_t * tm) +{ + u32 lru; + BVT (clib_bihash_bucket) _b, *b = &_b; + + BV (clib_bihash_reset_cache) (b); + + fformat (stdout, "Initial LRU config: %U\n", BV (format_bihash_lru), b); + + BV (clib_bihash_update_lru_not_inline) (b, 3); + + fformat (stdout, "use slot 3, LRU config: %U\n", BV (format_bihash_lru), b); + + BV (clib_bihash_update_lru) (b, 1); + + fformat (stdout, "use slot 1 LRU config: %U\n", BV (format_bihash_lru), b); + + lru = BV (clib_bihash_get_lru) (b); + + fformat (stdout, "least-recently-used is %d\n", lru); + + BV (clib_bihash_update_lru) (b, 4); + + fformat (stdout, "use slot 4 LRU config: %U\n", BV (format_bihash_lru), b); + + lru = BV (clib_bihash_get_lru) (b); + + fformat (stdout, "least-recently-used is %d\n", lru); + + return 0; +} + clib_error_t * test_bihash_main (test_main_t * tm) { unformat_input_t *i = tm->input; clib_error_t *error; - int test_vec64 = 0; + int which = 0; while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { @@ -261,7 +294,10 @@ test_bihash_main (test_main_t * tm) else if (unformat (i, "search %d", &tm->search_iter)) ; else if (unformat (i, "vec64")) - test_vec64 = 1; + which = 1; + else if (unformat (i, "cache")) + which = 2; + else if (unformat (i, "verbose")) tm->verbose = 1; else @@ -269,10 +305,23 @@ test_bihash_main (test_main_t * tm) format_unformat_error, i); } - if (test_vec64) - error = test_bihash_vec64 (tm); - else - error = test_bihash (tm); + switch (which) + { + case 0: + error = test_bihash (tm); + break; + + case 1: + error = test_bihash_vec64 (tm); + break; + + case 2: + error = test_bihash_cache (tm); + break; + + default: + return clib_error_return (0, "no such test?"); + } return error; } -- cgit 1.2.3-korg From 8d00fff8dff4e449767601645422e03df92a83af Mon Sep 17 00:00:00 2001 From: John Lo Date: Thu, 3 Aug 2017 00:35:36 -0400 Subject: Add support for API client to receive L2 MAC events Added APIs want_l2_macs_events and l2_macs_event to allow an API client to receive notification events from VPP for MAC learned or aged in L2FIB. Only one API client is allowed for L2 MAC events. The want_l2_macs_events API allow caller to specify MAC learn limit, event scan delay and max number of MACs that can be included in a event message. These parameters should be choosen properly as to not have too many MAC events sent by VPP and overwhelm the API share memory. They can all be left as 0's so VPP will setup reasonable defaults which are: 1000 learn limit, 100 msec scan delay and 100 MACs per event message. If want_l2_macs_events is never called, VPP learning and aging should behave as before except that MAC entries provisioned by API or CLI will not be aged, even if it is not set as static_mac. These non static MACs, however, can be overwritten by MAC learning on a MAC move as a leared MAC. Only learned MACs are subject to aging. Change-Id: Ia3757a80cf8adb2811a089d2eafbd6439461285c Signed-off-by: John Lo --- src/vat/api_format.c | 86 +++++++++- src/vnet/api_errno.h | 5 +- src/vnet/l2/l2.api | 65 +++++++- src/vnet/l2/l2_api.c | 83 +++++++++- src/vnet/l2/l2_bd.c | 37 +---- src/vnet/l2/l2_fib.c | 394 ++++++++++++++++++++++++++++++++++------------ src/vnet/l2/l2_fib.h | 37 ++++- src/vnet/l2/l2_fwd.c | 3 +- src/vnet/l2/l2_learn.c | 30 ++-- src/vnet/l2/l2_learn.h | 5 + src/vpp/api/custom_dump.c | 33 +++- 11 files changed, 605 insertions(+), 173 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vat/api_format.c b/src/vat/api_format.c index bbd97ba1..27286686 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -1283,6 +1283,30 @@ vl_api_ip6_nd_event_t_handler_json (vl_api_ip6_nd_event_t * mp) /* JSON output not supported */ } +static void +vl_api_l2_macs_event_t_handler (vl_api_l2_macs_event_t * mp) +{ + u32 n_macs = ntohl (mp->n_macs); + errmsg ("L2MAC event recived with pid %d cl-idx %d for %d macs: \n", + ntohl (mp->pid), mp->client_index, n_macs); + int i; + for (i = 0; i < n_macs; i++) + { + vl_api_mac_entry_t *mac = &mp->mac[i]; + errmsg (" [%d] sw_if_index %d mac_addr %U is_del %d \n", + i + 1, ntohl (mac->sw_if_index), + format_ethernet_address, mac->mac_addr, mac->is_del); + if (i == 1000) + break; + } +} + +static void +vl_api_l2_macs_event_t_handler_json (vl_api_l2_macs_event_t * mp) +{ + /* JSON output not supported */ +} + #define vl_api_bridge_domain_details_t_endian vl_noop_handler #define vl_api_bridge_domain_details_t_print vl_noop_handler @@ -4597,6 +4621,7 @@ _(modify_vhost_user_if_reply) \ _(delete_vhost_user_if_reply) \ _(want_ip4_arp_events_reply) \ _(want_ip6_nd_events_reply) \ +_(want_l2_macs_events_reply) \ _(input_acl_set_interface_reply) \ _(ipsec_spd_add_del_reply) \ _(ipsec_interface_add_del_spd_reply) \ @@ -4813,6 +4838,8 @@ _(WANT_IP4_ARP_EVENTS_REPLY, want_ip4_arp_events_reply) \ _(IP4_ARP_EVENT, ip4_arp_event) \ _(WANT_IP6_ND_EVENTS_REPLY, want_ip6_nd_events_reply) \ _(IP6_ND_EVENT, ip6_nd_event) \ +_(WANT_L2_MACS_EVENTS_REPLY, want_l2_macs_events_reply) \ +_(L2_MACS_EVENT, l2_macs_event) \ _(INPUT_ACL_SET_INTERFACE_REPLY, input_acl_set_interface_reply) \ _(IP_ADDRESS_DETAILS, ip_address_details) \ _(IP_DETAILS, ip_details) \ @@ -6607,8 +6634,9 @@ api_l2_flags (vat_main_t * vam) unformat_input_t *i = vam->input; vl_api_l2_flags_t *mp; u32 sw_if_index; - u32 feature_bitmap = 0; + u32 flags = 0; u8 sw_if_index_set = 0; + u8 is_set = 0; int ret; /* Parse args required to build the message */ @@ -6628,13 +6656,19 @@ api_l2_flags (vat_main_t * vam) break; } else if (unformat (i, "learn")) - feature_bitmap |= L2INPUT_FEAT_LEARN; + flags |= L2_LEARN; else if (unformat (i, "forward")) - feature_bitmap |= L2INPUT_FEAT_FWD; + flags |= L2_FWD; else if (unformat (i, "flood")) - feature_bitmap |= L2INPUT_FEAT_FLOOD; + flags |= L2_FLOOD; else if (unformat (i, "uu-flood")) - feature_bitmap |= L2INPUT_FEAT_UU_FLOOD; + flags |= L2_UU_FLOOD; + else if (unformat (i, "arp-term")) + flags |= L2_ARP_TERM; + else if (unformat (i, "off")) + is_set = 0; + else if (unformat (i, "disable")) + is_set = 0; else break; } @@ -6648,7 +6682,8 @@ api_l2_flags (vat_main_t * vam) M (L2_FLAGS, mp); mp->sw_if_index = ntohl (sw_if_index); - mp->feature_bitmap = ntohl (feature_bitmap); + mp->feature_bitmap = ntohl (flags); + mp->is_set = is_set; S (mp); W (ret); @@ -12534,6 +12569,42 @@ api_want_ip6_nd_events (vat_main_t * vam) return ret; } +static int +api_want_l2_macs_events (vat_main_t * vam) +{ + unformat_input_t *line_input = vam->input; + vl_api_want_l2_macs_events_t *mp; + u8 enable_disable = 1; + u32 scan_delay = 0; + u32 max_macs_in_event = 0; + u32 learn_limit = 0; + int ret; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "learn-limit %d", &learn_limit)) + ; + else if (unformat (line_input, "scan-delay %d", &scan_delay)) + ; + else if (unformat (line_input, "max-entries %d", &max_macs_in_event)) + ; + else if (unformat (line_input, "disable")) + enable_disable = 0; + else + break; + } + + M (WANT_L2_MACS_EVENTS, mp); + mp->enable_disable = enable_disable; + mp->pid = htonl (getpid ()); + mp->learn_limit = htonl (learn_limit); + mp->scan_delay = (u8) scan_delay; + mp->max_macs_in_event = (u8) (max_macs_in_event / 10); + S (mp); + W (ret); + return ret; +} + static int api_input_acl_set_interface (vat_main_t * vam) { @@ -19831,7 +19902,7 @@ _(l2fib_add_del, \ _(l2fib_flush_bd, "bd_id ") \ _(l2fib_flush_int, " | sw_if_index ") \ _(l2_flags, \ - "sw_if | sw_if_index [learn] [forward] [uu-flood] [flood]\n") \ + "sw_if | sw_if_index [learn] [forward] [uu-flood] [flood] [arp-term] [disable]\n") \ _(bridge_flags, \ "bd_id [learn] [forward] [uu-flood] [flood] [arp-term] [disable]\n") \ _(tap_connect, \ @@ -19974,6 +20045,7 @@ _(input_acl_set_interface, \ " [l2-table ] [del]") \ _(want_ip4_arp_events, "address [del]") \ _(want_ip6_nd_events, "address [del]") \ +_(want_l2_macs_events, "[disable] [learn-limit ] [scan-delay ] [max-entries ]") \ _(ip_address_dump, "(ipv4 | ipv6) ( | sw_if_index )") \ _(ip_dump, "ipv4 | ipv6") \ _(ipsec_spd_add_del, "spd_id [del]") \ diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h index 747c65e7..22522f34 100644 --- a/src/vnet/api_errno.h +++ b/src/vnet/api_errno.h @@ -112,8 +112,9 @@ _(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \ _(BD_IN_USE, -120, "Bridge domain has member interfaces") \ _(BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \ _(BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceed 16M limit") \ -_(UNSUPPORTED, -123, "Unsupported") \ -_(SUBIF_DOESNT_EXIST, -124, "Subinterface doesn't exist") +_(SUBIF_DOESNT_EXIST, -123, "Subinterface doesn't exist") \ +_(L2_MACS_EVENT_CLINET_PRESENT, -124, "Client already exist for L2 MACs events") \ +_(UNSUPPORTED, -125, "Unsupported") typedef enum { diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index bb3990c6..e508bfb5 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -133,12 +133,64 @@ autoreply define l2fib_add_del u8 bvi_mac; }; -/** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h +/** \brief Register to recive L2 MAC events for leanred and aged MAC + Will also change MAC learn limit to L2LEARN_INFORM_LIMIT + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param learn_limit - MAC learn limit, 0 => default to 1000 + @param scan_delay - event scan delay in 10 msec unit, 0 => default to 100 msec + @param max_macs_in_event - in units of 10 mac entries, 0 => default to 100 entries + @param enable_disable - 1 => register for MAC events, 0 => cancel registration + @param pid - sender's pid +*/ +autoreply define want_l2_macs_events +{ + u32 client_index; + u32 context; + u32 learn_limit; + u8 scan_delay; + u8 max_macs_in_event; + u8 enable_disable; + u32 pid; +}; + +/** \brief Entry for learned or aged MAC in L2 MAC Events + @param sw_if_index - sw_if_index in the domain + @param mac_addr - mac_address + @is_del - 0 => newly learned MAC, 1 => aged out MAC +*/ +typeonly define mac_entry +{ + u32 sw_if_index; + u8 mac_addr[6]; + u8 is_del; + u8 spare; +}; + +/** \brief L2 MAC event for a list of learned or aged MACs + @param client_index - opaque cookie to identify the sender + @param pid - client pid registered to receive notification + @param n_macs - number of learned/aged MAC enntries + @param mac - array of learned/aged MAC entries +*/ +define l2_macs_event +{ + u32 client_index; + u32 pid; + u32 n_macs; + vl_api_mac_entry_t mac[n_macs]; +}; + +/** \brief Set interface L2 flags (such as L2_LEARN, L2_FWD, + L2_FLOOD, L2_UU_FLOOD, or L2_ARP_TERM bits). This can be used + to disable one or more of the features represented by the + flag bits on an interface to override what is set as default + for all interfaces in the bridge domain @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param sw_if_index - interface @param is_set - if non-zero, set the bits, else clear them - @param feature_bitmap - non-zero bits to set or clear + @param feature_bitmap - non-zero bits (as above) to set or clear */ define l2_flags { @@ -149,9 +201,10 @@ define l2_flags u32 feature_bitmap; }; -/** \brief Set L2 bits response +/** \brief Set interface L2 flags response @param context - sender context, to match reply w/ request @param retval - return code for the set l2 bits request + @param resulting_feature_bitmap - the internal l2 feature bitmap after the request is implemented */ define l2_flags_reply { @@ -250,12 +303,12 @@ manual_print manual_endian define bridge_domain_details }; /** \brief Set bridge flags (such as L2_LEARN, L2_FWD, L2_FLOOD, - L2_UU_FLOOD, or L2_ARP_TERM) request + L2_UU_FLOOD, or L2_ARP_TERM bits) request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param bd_id - the bridge domain to set the flags for @param is_set - if non-zero, set the flags, else clear them - @param feature_bitmap - bits that are non-zero to set or clear + @param feature_bitmap - bits (as above) that are non-zero to set or clear */ define bridge_flags { @@ -269,7 +322,7 @@ define bridge_flags /** \brief Set bridge flags response @param context - sender context, to match reply w/ request @param retval - return code for the set bridge flags request - @param resulting_feature_bitmap - the feature bitmap value after the request is implemented + @param resulting_feature_bitmap - the internal L2 feature bitmap after the request is implemented */ define bridge_flags_reply { diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index a0b40d6d..c81cbad7 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -55,6 +56,7 @@ _(L2FIB_FLUSH_ALL, l2fib_flush_all) \ _(L2FIB_FLUSH_INT, l2fib_flush_int) \ _(L2FIB_FLUSH_BD, l2fib_flush_bd) \ _(L2FIB_ADD_DEL, l2fib_add_del) \ +_(WANT_L2_MACS_EVENTS, want_l2_macs_events) \ _(L2_FLAGS, l2_flags) \ _(BRIDGE_DOMAIN_ADD_DEL, bridge_domain_add_del) \ _(BRIDGE_DOMAIN_DUMP, bridge_domain_dump) \ @@ -221,8 +223,8 @@ vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) goto bad_sw_if_index; } } - u32 static_mac = mp->static_mac ? 1 : 0; - u32 bvi_mac = mp->bvi_mac ? 1 : 0; + u8 static_mac = mp->static_mac ? 1 : 0; + u8 bvi_mac = mp->bvi_mac ? 1 : 0; l2fib_add_fwd_entry (mac, bd_index, sw_if_index, static_mac, bvi_mac); } @@ -237,6 +239,58 @@ vl_api_l2fib_add_del_t_handler (vl_api_l2fib_add_del_t * mp) REPLY_MACRO (VL_API_L2FIB_ADD_DEL_REPLY); } +static void +vl_api_want_l2_macs_events_t_handler (vl_api_want_l2_macs_events_t * mp) +{ + int rv = 0; + vl_api_want_l2_macs_events_reply_t *rmp; + l2learn_main_t *lm = &l2learn_main; + l2fib_main_t *fm = &l2fib_main; + u32 pid = ntohl (mp->pid); + u32 learn_limit = ntohl (mp->learn_limit); + + if (mp->enable_disable) + { + if (lm->client_pid == 0) + { + lm->client_pid = pid; + lm->client_index = mp->client_index; + + if (mp->max_macs_in_event) + fm->max_macs_in_event = mp->max_macs_in_event * 10; + else + fm->max_macs_in_event = L2FIB_EVENT_MAX_MACS_DEFAULT; + + if (mp->scan_delay) + fm->event_scan_delay = (f64) (mp->scan_delay) * 10e-3; + else + fm->event_scan_delay = L2FIB_EVENT_SCAN_DELAY_DEFAULT; + + /* change learn limit and flush all learned MACs */ + if (learn_limit && (learn_limit < L2LEARN_DEFAULT_LIMIT)) + lm->global_learn_limit = learn_limit; + else + lm->global_learn_limit = L2FIB_EVENT_LEARN_LIMIT_DEFAULT; + + l2fib_flush_all_mac (vlib_get_main ()); + } + else if (lm->client_pid != pid) + { + rv = VNET_API_ERROR_L2_MACS_EVENT_CLINET_PRESENT; + goto exit; + } + } + else if (lm->client_pid) + { + lm->client_pid = 0; + lm->client_index = 0; + lm->global_learn_limit = L2LEARN_DEFAULT_LIMIT; + } + +exit: + REPLY_MACRO (VL_API_WANT_L2_MACS_EVENTS_REPLY); +} + static void vl_api_l2fib_flush_int_t_handler (vl_api_l2fib_flush_int_t * mp) { @@ -293,8 +347,25 @@ vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp) VALIDATE_SW_IF_INDEX (mp); u32 sw_if_index = ntohl (mp->sw_if_index); - u32 flags = ntohl (mp->feature_bitmap) & L2INPUT_VALID_MASK; - rbm = l2input_intf_bitmap_enable (sw_if_index, flags, mp->is_set); + u32 flags = ntohl (mp->feature_bitmap); + u32 bitmap = 0; + + if (flags & L2_LEARN) + bitmap |= L2INPUT_FEAT_LEARN; + + if (flags & L2_FWD) + bitmap |= L2INPUT_FEAT_FWD; + + if (flags & L2_FLOOD) + bitmap |= L2INPUT_FEAT_FLOOD; + + if (flags & L2_UU_FLOOD) + bitmap |= L2INPUT_FEAT_UU_FLOOD; + + if (flags & L2_ARP_TERM) + bitmap |= L2INPUT_FEAT_ARP_TERM; + + rbm = l2input_intf_bitmap_enable (sw_if_index, bitmap, mp->is_set); BAD_SW_IF_INDEX_LABEL; @@ -455,13 +526,13 @@ vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp) goto out; } - bd_set_flags (vm, bd_index, flags, mp->is_set); + u32 bitmap = bd_set_flags (vm, bd_index, flags, mp->is_set); out: /* *INDENT-OFF* */ REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY, ({ - rmp->resulting_feature_bitmap = ntohl(flags); + rmp->resulting_feature_bitmap = ntohl(bitmap); })); /* *INDENT-ON* */ } diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c index a87d02f2..6e0db058 100644 --- a/src/vnet/l2/l2_bd.c +++ b/src/vnet/l2/l2_bd.c @@ -263,7 +263,7 @@ bd_set_flags (vlib_main_t * vm, u32 bd_index, u32 flags, u32 enable) bd_config->feature_bitmap &= ~feature_bitmap; } - return 0; + return bd_config->feature_bitmap; } /** @@ -328,12 +328,7 @@ bd_learn (vlib_main_t * vm, } /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_LEARN, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_LEARN, enable); done: return error; @@ -397,12 +392,7 @@ bd_fwd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) } /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_FWD, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_FWD, enable); done: return error; @@ -468,12 +458,7 @@ bd_flood (vlib_main_t * vm, } /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_FLOOD, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_FLOOD, enable); done: return error; @@ -538,12 +523,7 @@ bd_uu_flood (vlib_main_t * vm, } /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_UU_FLOOD, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_UU_FLOOD, enable); done: return error; @@ -605,12 +585,7 @@ bd_arp_term (vlib_main_t * vm, enable = 0; /* set the bridge domain flag */ - if (bd_set_flags (vm, bd_index, L2_ARP_TERM, enable)) - { - error = - clib_error_return (0, "bridge-domain id %d out of range", bd_index); - goto done; - } + bd_set_flags (vm, bd_index, L2_ARP_TERM, enable); done: return error; diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 7e59b098..8aa0ac29 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -31,6 +31,17 @@ #include +#include +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + /** * @file * @brief Ethernet MAC Address FIB Table Management. @@ -117,6 +128,7 @@ show_l2fib (vlib_main_t * vm, int i, j, k; u8 verbose = 0; u8 raw = 0; + u8 learn = 0; u32 bd_id, bd_index = ~0; u8 now = (u8) (vlib_time_now (vm) / 60); u8 *s = 0; @@ -127,12 +139,18 @@ show_l2fib (vlib_main_t * vm, verbose = 1; else if (unformat (input, "bd_index %d", &bd_index)) verbose = 1; + else if (unformat (input, "learn")) + { + learn = 1; + verbose = 0; + } else if (unformat (input, "bd_id %d", &bd_id)) { uword *p = hash_get (bdm->bd_index_by_bd_id, bd_id); if (p) { - verbose = 1; + if (learn == 0) + verbose = 1; bd_index = p[0]; } else @@ -155,7 +173,7 @@ show_l2fib (vlib_main_t * vm, if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) continue; - if (verbose && first_entry) + if ((verbose || learn) && first_entry) { first_entry = 0; vlib_cli_output (vm, @@ -168,13 +186,19 @@ show_l2fib (vlib_main_t * vm, key.raw = v->kvp[k].key; result.raw = v->kvp[k].value; - if (verbose + if ((verbose || learn) & ((bd_index >> 31) || (bd_index == key.fields.bd_index))) { + if (learn && result.fields.age_not) + { + total_entries++; + continue; /* skip provisioned macs */ + } + bd_config = vec_elt_at_index (l2input_main.bd_configs, key.fields.bd_index); - if (bd_config->mac_age && !result.fields.static_mac) + if (bd_config->mac_age && !result.fields.age_not) { i16 delta = now - result.fields.timestamp; delta += delta < 0 ? 256 : 0; @@ -206,9 +230,19 @@ show_l2fib (vlib_main_t * vm, if (total_entries == 0) vlib_cli_output (vm, "no l2fib entries"); else - vlib_cli_output (vm, - "%lld l2fib entries with %d learned (or non-static) entries", - total_entries, l2learn_main.global_learn_count); + { + l2learn_main_t *lm = &l2learn_main; + vlib_cli_output (vm, "L2FIB total/learned entries: %d/%d " + "Last scan time: %.4esec Learn limit: %d ", + total_entries, lm->global_learn_count, + msm->age_scan_duration, lm->global_learn_limit); + if (lm->client_pid) + vlib_cli_output (vm, "L2MAC events client PID: %d " + "Last e-scan time: %.4esec Delay: %.2esec " + "Max macs in event: %d", + lm->client_pid, msm->evt_scan_duration, + msm->event_scan_delay, msm->max_macs_in_event); + } if (raw) vlib_cli_output (vm, "Raw Hash Table:\n%U\n", @@ -242,7 +276,7 @@ show_l2fib (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_l2fib_cli, static) = { .path = "show l2fib", - .short_help = "show l2fib [verbose | bd_id | bd_index | raw]", + .short_help = "show l2fib [verbose | learn | bd_id | bd_index | raw", .function = show_l2fib, }; /* *INDENT-ON* */ @@ -309,36 +343,39 @@ l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index) */ void l2fib_add_entry (u64 mac, u32 bd_index, - u32 sw_if_index, u32 static_mac, u32 filter_mac, u32 bvi_mac) + u32 sw_if_index, u8 static_mac, u8 filter_mac, u8 bvi_mac) { l2fib_entry_key_t key; l2fib_entry_result_t result; __attribute__ ((unused)) u32 bucket_contents; - l2fib_main_t *mp = &l2fib_main; + l2fib_main_t *fm = &l2fib_main; + l2learn_main_t *lm = &l2learn_main; BVT (clib_bihash_kv) kv; /* set up key */ key.raw = l2fib_make_key ((u8 *) & mac, bd_index); + /* check if entry alread exist */ + if (BV (clib_bihash_search) (&fm->mac_table, &kv, &kv)) + { + /* decrement counter if overwriting a learned mac */ + result.raw = kv.value; + if ((result.fields.age_not == 0) && (lm->global_learn_count)) + lm->global_learn_count--; + } + /* set up result */ result.raw = 0; /* clear all fields */ result.fields.sw_if_index = sw_if_index; result.fields.static_mac = static_mac; result.fields.filter = filter_mac; result.fields.bvi = bvi_mac; - if (!static_mac) - result.fields.sn = l2fib_cur_seq_num (bd_index, sw_if_index); + result.fields.age_not = 1; /* no aging for provisioned entry */ kv.key = key.raw; kv.value = result.raw; - BV (clib_bihash_add_del) (&mp->mac_table, &kv, 1 /* is_add */ ); - - /* increment counter if dynamically learned mac */ - if (result.fields.static_mac == 0) - { - l2learn_main.global_learn_count++; - } + BV (clib_bihash_add_del) (&fm->mac_table, &kv, 1 /* is_add */ ); } /** @@ -630,13 +667,8 @@ l2fib_del_entry_by_key (u64 raw_key) result.raw = kv.value; /* decrement counter if dynamically learned mac */ - if (result.fields.static_mac == 0) - { - if (l2learn_main.global_learn_count > 0) - { - l2learn_main.global_learn_count--; - } - } + if ((result.fields.age_not == 0) && (l2learn_main.global_learn_count)) + l2learn_main.global_learn_count--; /* Remove entry from hash table */ BV (clib_bihash_add_del) (&mp->mac_table, &kv, 0 /* is_add */ ); @@ -910,111 +942,273 @@ BVT (clib_bihash) * get_mac_table (void) return &mp->mac_table; } +static_always_inline void * +allocate_mac_evt_buf (u32 client, u32 client_index) +{ + l2fib_main_t *fm = &l2fib_main; + vl_api_l2_macs_event_t *mp = vl_msg_api_alloc + (sizeof (*mp) + (fm->max_macs_in_event * sizeof (vl_api_mac_entry_t))); + mp->_vl_msg_id = htons (VL_API_L2_MACS_EVENT); + mp->pid = htonl (client); + mp->client_index = client_index; + return mp; +} + +static_always_inline f64 +l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) +{ + l2fib_main_t *fm = &l2fib_main; + l2learn_main_t *lm = &l2learn_main; + + BVT (clib_bihash) * h = &fm->mac_table; + int i, j, k; + f64 last_start = start_time; + f64 accum_t = 0; + f64 delta_t = 0; + u32 evt_idx = 0; + u32 learn_count = 0; + u32 client = lm->client_pid; + u32 cl_idx = lm->client_index; + vl_api_l2_macs_event_t *mp = 0; + unix_shared_memory_queue_t *q = 0; + + if (client) + { + mp = allocate_mac_evt_buf (client, cl_idx); + q = vl_api_client_index_to_input_queue (lm->client_index); + } + + for (i = 0; i < h->nbuckets; i++) + { + /* allow no more than 20us without a pause */ + delta_t = vlib_time_now (vm) - last_start; + if (delta_t > 20e-6) + { + vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */ + last_start = vlib_time_now (vm); + accum_t += delta_t; + } + + if (i < (h->nbuckets - 3)) + { + BVT (clib_bihash_bucket) * b = &h->buckets[i + 3]; + CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); + b = &h->buckets[i + 1]; + if (b->offset) + { + BVT (clib_bihash_value) * v = + BV (clib_bihash_get_value) (h, b->offset); + CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD); + } + } + + BVT (clib_bihash_bucket) * b = &h->buckets[i]; + if (b->offset == 0) + continue; + BVT (clib_bihash_value) * v = BV (clib_bihash_get_value) (h, b->offset); + for (j = 0; j < (1 << b->log2_pages); j++) + { + for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) + { + if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) + continue; + + l2fib_entry_key_t key = {.raw = v->kvp[k].key }; + l2fib_entry_result_t result = {.raw = v->kvp[k].value }; + + if (result.fields.age_not == 0) + learn_count++; + + if (PREDICT_FALSE (evt_idx >= fm->max_macs_in_event)) + { + /* evet message full, sent it and start a new one */ + if (q && (q->cursize < q->maxsize)) + { + mp->n_macs = htonl (evt_idx); + vl_msg_api_send_shmem (q, (u8 *) & mp); + mp = allocate_mac_evt_buf (client, cl_idx); + } + else + { + clib_warning ("MAC event to pid %d queue stuffed!" + " %d MAC entries lost", client, evt_idx); + } + evt_idx = 0; + } + + if (client) + { + if (result.fields.lrn_evt) + { + /* copy mac entry to event msg */ + clib_memcpy (mp->mac[evt_idx].mac_addr, key.fields.mac, + 6); + mp->mac[evt_idx].is_del = 0; + mp->mac[evt_idx].sw_if_index = + htonl (result.fields.sw_if_index); + /* clear event bit and update mac entry */ + result.fields.lrn_evt = 0; + BVT (clib_bihash_kv) kv; + kv.key = key.raw; + kv.value = result.raw; + BV (clib_bihash_add_del) (&fm->mac_table, &kv, 1); + evt_idx++; + continue; /* skip aging */ + } + } + + if (event_only || result.fields.age_not) + continue; /* skip aging - static_mac alsways age_not */ + + /* start aging processing */ + u32 bd_index = key.fields.bd_index; + u32 sw_if_index = result.fields.sw_if_index; + u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index).as_u16; + if (result.fields.sn.as_u16 != sn) + goto age_out; /* stale mac */ + + l2_bridge_domain_t *bd_config = + vec_elt_at_index (l2input_main.bd_configs, bd_index); + + if (bd_config->mac_age == 0) + continue; /* skip aging */ + + i16 delta = (u8) (start_time / 60) - result.fields.timestamp; + delta += delta < 0 ? 256 : 0; + + if (delta < bd_config->mac_age) + continue; /* still valid */ + + age_out: + if (client) + { + /* copy mac entry to event msg */ + clib_memcpy (mp->mac[evt_idx].mac_addr, key.fields.mac, 6); + mp->mac[evt_idx].is_del = 1; + mp->mac[evt_idx].sw_if_index = + htonl (result.fields.sw_if_index); + evt_idx++; + } + /* delete mac entry */ + BVT (clib_bihash_kv) kv; + kv.key = key.raw; + BV (clib_bihash_add_del) (&fm->mac_table, &kv, 0); + learn_count--; + } + v++; + } + } + + /* keep learn count consistent */ + l2learn_main.global_learn_count = learn_count; + + if (mp) + { + /* send any outstanding mac event message else free message buffer */ + if (evt_idx) + { + if (q && (q->cursize < q->maxsize)) + { + mp->n_macs = htonl (evt_idx); + vl_msg_api_send_shmem (q, (u8 *) & mp); + } + else + { + clib_warning ("MAC event to pid %d queue stuffed!" + " %d MAC entries lost", client, evt_idx); + vl_msg_api_free (mp); + } + } + else + vl_msg_api_free (mp); + } + return delta_t + accum_t; +} + +/* Type of scan */ +#define SCAN_MAC_AGE 0 +#define SCAN_MAC_EVENT 1 + +/* Maximum f64 value */ +#define TIME_MAX (1.7976931348623157e+308) + static uword l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { uword event_type, *event_data = 0; - l2fib_main_t *msm = &l2fib_main; + l2fib_main_t *fm = &l2fib_main; + l2learn_main_t *lm = &l2learn_main; bool enabled = 0; - f64 start_time, last_run_duration = 0, t; + bool scan = SCAN_MAC_AGE; /* SCAN_FOR_AGE or SCAN_FOR_EVENT */ + f64 start_time, next_age_scan_time = TIME_MAX; while (1) { if (enabled) - vlib_process_wait_for_event_or_clock (vm, 60 - last_run_duration); + { + if (lm->client_pid) /* mac event client waiting */ + vlib_process_wait_for_event_or_clock (vm, fm->event_scan_delay); + else /* agin only */ + { + f64 t = next_age_scan_time - vlib_time_now (vm); + if (t < fm->event_scan_delay) + t = fm->event_scan_delay; + vlib_process_wait_for_event_or_clock (vm, t); + } + } else vlib_process_wait_for_event (vm); event_type = vlib_process_get_events (vm, &event_data); vec_reset_length (event_data); + start_time = vlib_time_now (vm); + switch (event_type) { - case ~0: + case ~0: /* timer expired */ + if ((lm->client_pid == 0) || (start_time >= next_age_scan_time)) + { + scan = SCAN_MAC_AGE; + if (enabled) + next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; + else + next_age_scan_time = TIME_MAX; + } + else + scan = SCAN_MAC_EVENT; break; + case L2_MAC_AGE_PROCESS_EVENT_START: + scan = SCAN_MAC_AGE; + next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; enabled = 1; break; + case L2_MAC_AGE_PROCESS_EVENT_STOP: enabled = 0; + next_age_scan_time = TIME_MAX; + l2fib_main.age_scan_duration = 0; + l2fib_main.evt_scan_duration = 0; continue; + case L2_MAC_AGE_PROCESS_EVENT_ONE_PASS: - enabled = 0; + scan = SCAN_MAC_AGE; + if (enabled) + next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; + else + next_age_scan_time = TIME_MAX; break; + default: ASSERT (0); } - last_run_duration = start_time = vlib_time_now (vm); - BVT (clib_bihash) * h = &msm->mac_table; - int i, j, k; - for (i = 0; i < h->nbuckets; i++) - { - /* Allow no more than 10us without a pause */ - t = vlib_time_now (vm); - if (t > start_time + 10e-6) - { - vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */ - start_time = vlib_time_now (vm); - } - - if (i < (h->nbuckets - 3)) - { - BVT (clib_bihash_bucket) * b = &h->buckets[i + 3]; - CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD); - b = &h->buckets[i + 1]; - if (b->offset) - { - BVT (clib_bihash_value) * v = - BV (clib_bihash_get_value) (h, b->offset); - CLIB_PREFETCH (v, CLIB_CACHE_LINE_BYTES, LOAD); - } - } - - BVT (clib_bihash_bucket) * b = &h->buckets[i]; - if (b->offset == 0) - continue; - BVT (clib_bihash_value) * v = - BV (clib_bihash_get_value) (h, b->offset); - for (j = 0; j < (1 << b->log2_pages); j++) - { - for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) - { - if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL) - continue; - - l2fib_entry_key_t key = {.raw = v->kvp[k].key }; - l2fib_entry_result_t result = {.raw = v->kvp[k].value }; - - if (result.fields.static_mac) - continue; - - u32 bd_index = key.fields.bd_index; - u32 sw_if_index = result.fields.sw_if_index; - u16 sn = l2fib_cur_seq_num (bd_index, sw_if_index).as_u16; - if (result.fields.sn.as_u16 != sn) - { - l2fib_del_entry_by_key (key.raw); - continue; - } - l2_bridge_domain_t *bd_config = - vec_elt_at_index (l2input_main.bd_configs, bd_index); - - if (bd_config->mac_age == 0) - continue; - - i16 delta = - (u8) (start_time / 60) - result.fields.timestamp; - delta += delta < 0 ? 256 : 0; - - if (delta > bd_config->mac_age) - l2fib_del_entry_by_key (key.raw); - } - v++; - } - } - last_run_duration = vlib_time_now (vm) - last_run_duration; + if (scan == SCAN_MAC_EVENT) + l2fib_main.evt_scan_duration = l2fib_scan (vm, start_time, 1); + else + l2fib_main.age_scan_duration = l2fib_scan (vm, start_time, 0); } return 0; } diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h index ee6f0dc5..49a8b5b6 100644 --- a/src/vnet/l2/l2_fib.h +++ b/src/vnet/l2/l2_fib.h @@ -27,6 +27,18 @@ #define L2FIB_NUM_BUCKETS (64 * 1024) #define L2FIB_MEMORY_SIZE (256<<20) +/* Ager scan interval is 1 minute for aging */ +#define L2FIB_AGE_SCAN_INTERVAL (60.0) + +/* MAC event scan delay is 100 msec unless specified by MAC event client */ +#define L2FIB_EVENT_SCAN_DELAY_DEFAULT (0.1) + +/* Max MACs in a event message is 100 unless specified by MAC event client */ +#define L2FIB_EVENT_MAX_MACS_DEFAULT (100) + +/* MAC event learn limit is 1000 unless specified by MAC event client */ +#define L2FIB_EVENT_LEARN_LIMIT_DEFAULT (1000) + typedef struct { @@ -36,6 +48,16 @@ typedef struct /* per swif vector of sequence number for interface based flush of MACs */ u8 *swif_seq_num; + /* last event or ager scan duration */ + f64 evt_scan_duration; + f64 age_scan_duration; + + /* delay between event scans, default to 100 msec */ + f64 event_scan_delay; + + /* max macs in evet message, default to 100 entries */ + u32 max_macs_in_event; + /* convenience variables */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; @@ -89,12 +111,15 @@ typedef struct { struct { - u32 sw_if_index; /* output sw_if_index (L3 interface if bvi==1) */ + u32 sw_if_index; /* output sw_if_index (L3 intf if bvi==1) */ - u8 static_mac:1; /* static mac, no dataplane learning */ + u8 static_mac:1; /* static mac, no MAC move */ + u8 age_not:1; /* not subject to age */ u8 bvi:1; /* mac is for a bridged virtual interface */ u8 filter:1; /* drop packets to/from this mac */ - u8 unused1:5; + u8 lrn_evt:1; /* MAC learned to be sent in L2 MAC event */ + u8 unused:3; + u8 timestamp; /* timestamp for aging */ l2fib_seq_num_t sn; /* bd/int seq num */ } fields; @@ -348,11 +373,11 @@ void l2fib_clear_table (void); void l2fib_add_entry (u64 mac, u32 bd_index, - u32 sw_if_index, u32 static_mac, u32 drop_mac, u32 bvi_mac); + u32 sw_if_index, u8 static_mac, u8 drop_mac, u8 bvi_mac); static inline void -l2fib_add_fwd_entry (u64 mac, u32 bd_index, u32 sw_if_index, u32 static_mac, - u32 bvi_mac) +l2fib_add_fwd_entry (u64 mac, u32 bd_index, u32 sw_if_index, u8 static_mac, + u8 bvi_mac) { l2fib_add_entry (mac, bd_index, sw_if_index, static_mac, 0, bvi_mac); } diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c index 8140728b..2bb7307c 100644 --- a/src/vnet/l2/l2_fwd.c +++ b/src/vnet/l2/l2_fwd.c @@ -141,8 +141,9 @@ l2fwd_process (vlib_main_t * vm, vnet_buffer (b0)->sw_if_index[VLIB_TX] = result0->fields.sw_if_index; *next0 = L2FWD_NEXT_L2_OUTPUT; int l2fib_seq_num_valid = 1; + /* check l2fib seq num for stale entries */ - if (!result0->fields.static_mac) + if (!result0->fields.age_not) { l2fib_seq_num_t in_sn = {.as_u16 = vnet_buffer (b0)->l2.l2fib_sn }; l2fib_seq_num_t expected_sn = { diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index 65406292..47c036b0 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -123,11 +123,9 @@ l2learn_process (vlib_node_runtime_t * node, /* Check mac table lookup result */ if (PREDICT_TRUE (result0->fields.sw_if_index == sw_if_index0)) { - /* - * The entry was in the table, and the sw_if_index matched, the normal case - */ + /* Entry in L2FIB with matching sw_if_index matched - normal fast path */ counter_base[L2LEARN_ERROR_HIT] += 1; - int update = !result0->fields.static_mac && + int update = !result0->fields.age_not && /* static_mac always age_not */ (result0->fields.timestamp != timestamp || result0->fields.sn.as_u16 != vnet_buffer (b0)->l2.l2fib_sn); @@ -136,10 +134,10 @@ l2learn_process (vlib_node_runtime_t * node, } else if (result0->raw == ~0) { - /* The entry was not in table, so add it */ + /* Entry not in L2FIB - add it */ counter_base[L2LEARN_ERROR_MISS] += 1; - if (msm->global_learn_count == msm->global_learn_limit) + if (msm->global_learn_count >= msm->global_learn_limit) { /* * Global limit reached. Do not learn the mac but forward the packet. @@ -149,15 +147,22 @@ l2learn_process (vlib_node_runtime_t * node, return; } + /* Do not learn if mac is 0 */ + l2fib_entry_key_t key = *key0; + key.fields.bd_index = 0; + if (key.raw == 0) + return; + /* It is ok to learn */ msm->global_learn_count++; result0->raw = 0; /* clear all fields */ result0->fields.sw_if_index = sw_if_index0; + result0->fields.lrn_evt = (msm->client_pid != 0); cached_key->raw = ~0; /* invalidate the cache */ } else { - /* The entry was in the table, but with the wrong sw_if_index mapping (mac move) */ + /* Entry in L2FIB with different sw_if_index - mac move or filter */ if (result0->fields.filter) { ASSERT (result0->fields.sw_if_index == ~0); @@ -167,8 +172,6 @@ l2learn_process (vlib_node_runtime_t * node, return; } - counter_base[L2LEARN_ERROR_MAC_MOVE] += 1; - if (result0->fields.static_mac) { /* @@ -185,6 +188,13 @@ l2learn_process (vlib_node_runtime_t * node, * TODO: check global/bridge domain/interface learn limits */ result0->fields.sw_if_index = sw_if_index0; + if (result0->fields.age_not) /* The mac was provisioned */ + { + msm->global_learn_count++; + result0->fields.age_not = 0; + } + result0->fields.lrn_evt = (msm->client_pid != 0); + counter_base[L2LEARN_ERROR_MAC_MOVE] += 1; } /* Update the entry */ @@ -479,7 +489,7 @@ VLIB_NODE_FUNCTION_MULTIARCH (l2learn_node, l2learn_node_fn) * Set the default number of dynamically learned macs to the number * of buckets. */ - mp->global_learn_limit = L2FIB_NUM_BUCKETS * 16; + mp->global_learn_limit = L2LEARN_DEFAULT_LIMIT; return 0; } diff --git a/src/vnet/l2/l2_learn.h b/src/vnet/l2/l2_learn.h index 0d95de04..000ab59e 100644 --- a/src/vnet/l2/l2_learn.h +++ b/src/vnet/l2/l2_learn.h @@ -34,6 +34,10 @@ typedef struct /* maximum number of dynamically learned mac entries */ u32 global_learn_limit; + /* client waiting for L2 MAC events for learned and aged MACs */ + u32 client_pid; + u32 client_index; + /* Next nodes for each feature */ u32 feat_next_node_index[32]; @@ -42,6 +46,7 @@ typedef struct vnet_main_t *vnet_main; } l2learn_main_t; +#define L2LEARN_DEFAULT_LIMIT (L2FIB_NUM_BUCKETS * 16) l2learn_main_t l2learn_main; diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 520361f6..a57799cb 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -373,10 +373,19 @@ vl_api_l2_flags_t_print (vl_api_l2_flags_t * mp, void *handle) s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); -#define _(a,b) \ - if (flags & L2INPUT_FEAT_ ## a) s = format (s, #a " "); - foreach_l2input_feat; -#undef _ + if (flags & L2_LEARN) + s = format (s, "learn "); + if (flags & L2_FWD) + s = format (s, "forward "); + if (flags & L2_FLOOD) + s = format (s, "flood "); + if (flags & L2_UU_FLOOD) + s = format (s, "uu-flood "); + if (flags & L2_ARP_TERM) + s = format (s, "arp-term "); + + if (mp->is_set == 0) + s = format (s, "clear "); FINISH; } @@ -1783,6 +1792,21 @@ static void *vl_api_want_ip6_nd_events_t_print FINISH; } +static void *vl_api_want_l2_macs_events_t_print + (vl_api_want_l2_macs_events_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: want_l2_macs_events "); + s = format (s, "learn-limit %d ", ntohl (mp->learn_limit)); + s = format (s, "scan-delay %d ", (u32) mp->scan_delay); + s = format (s, "max-entries %d ", (u32) mp->max_macs_in_event * 10); + if (mp->enable_disable == 0) + s = format (s, "disable"); + + FINISH; +} + static void *vl_api_input_acl_set_interface_t_print (vl_api_input_acl_set_interface_t * mp, void *handle) { @@ -3066,6 +3090,7 @@ _(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ _(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ _(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \ _(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \ +_(WANT_L2_MACS_EVENTS, want_l2_macs_events) \ _(INPUT_ACL_SET_INTERFACE, input_acl_set_interface) \ _(IP_ADDRESS_DUMP, ip_address_dump) \ _(IP_DUMP, ip_dump) \ -- cgit 1.2.3-korg From e531f4cb5766fbf27e7a8af8e19ccf667b53852b Mon Sep 17 00:00:00 2001 From: John Lo Date: Tue, 22 Aug 2017 09:16:50 -0400 Subject: Increase default MAC learn limit and check it in learn-update path 1. Increase default MAC learn limit from 1M to 8M entries. 2. Check MAC learn limit in MAC learning update path. 3. Allow disable of want_l2_macs_events to set MAC learn limit 4. Other minor cleanups Change-Id: I62438440937b5fa455e16f4a2e4d910277753395 Signed-off-by: John Lo --- src/vnet/l2/l2.api | 1 - src/vnet/l2/l2_api.c | 5 ++++- src/vnet/l2/l2_fib.c | 2 +- src/vnet/l2/l2_learn.c | 2 ++ src/vnet/l2/l2_learn.h | 2 +- 5 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api index e508bfb5..9f97ebbe 100644 --- a/src/vnet/l2/l2.api +++ b/src/vnet/l2/l2.api @@ -134,7 +134,6 @@ autoreply define l2fib_add_del }; /** \brief Register to recive L2 MAC events for leanred and aged MAC - Will also change MAC learn limit to L2LEARN_INFORM_LIMIT @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param learn_limit - MAC learn limit, 0 => default to 1000 diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c index c81cbad7..7e79d6fa 100644 --- a/src/vnet/l2/l2_api.c +++ b/src/vnet/l2/l2_api.c @@ -284,7 +284,10 @@ vl_api_want_l2_macs_events_t_handler (vl_api_want_l2_macs_events_t * mp) { lm->client_pid = 0; lm->client_index = 0; - lm->global_learn_limit = L2LEARN_DEFAULT_LIMIT; + if (learn_limit) + lm->global_learn_limit = learn_limit; + else + lm->global_learn_limit = L2LEARN_DEFAULT_LIMIT; } exit: diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 8aa0ac29..9f4c823f 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -1021,7 +1021,7 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) if (PREDICT_FALSE (evt_idx >= fm->max_macs_in_event)) { - /* evet message full, sent it and start a new one */ + /* event message full, send it and start a new one */ if (q && (q->cursize < q->maxsize)) { mp->n_macs = htonl (evt_idx); diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c index 47c036b0..623c2de2 100644 --- a/src/vnet/l2/l2_learn.c +++ b/src/vnet/l2/l2_learn.c @@ -131,6 +131,8 @@ l2learn_process (vlib_node_runtime_t * node, if (PREDICT_TRUE (!update)) return; + else if (msm->global_learn_count > msm->global_learn_limit) + return; /* Above learn limit - do not update */ } else if (result0->raw == ~0) { diff --git a/src/vnet/l2/l2_learn.h b/src/vnet/l2/l2_learn.h index 000ab59e..d6f41d40 100644 --- a/src/vnet/l2/l2_learn.h +++ b/src/vnet/l2/l2_learn.h @@ -46,7 +46,7 @@ typedef struct vnet_main_t *vnet_main; } l2learn_main_t; -#define L2LEARN_DEFAULT_LIMIT (L2FIB_NUM_BUCKETS * 16) +#define L2LEARN_DEFAULT_LIMIT (L2FIB_NUM_BUCKETS * 128) l2learn_main_t l2learn_main; -- cgit 1.2.3-korg From bd70c2f2e39b85939714aa025355eac973b2451f Mon Sep 17 00:00:00 2001 From: Eyal Bari Date: Wed, 27 Sep 2017 21:43:51 +0300 Subject: L2-FIB:add mac learn events test fixes an issue where events were not sent if BD doesn't enable mac aging Change-Id: Iddc53cb5c45e560633e6c5cff2731dccfc70ad5b Signed-off-by: Eyal Bari (cherry picked from commit 24db0ec78fb651c4c585ebf30e07108240574045) --- src/vnet/l2/l2_fib.c | 105 ++++++++++++++++++++++------------------------ test/test_l2_fib.py | 18 ++++++++ test/vpp_papi_provider.py | 9 ++++ 3 files changed, 76 insertions(+), 56 deletions(-) (limited to 'src/vnet/l2/l2_fib.c') diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c index 9f4c823f..64b3275b 100644 --- a/src/vnet/l2/l2_fib.c +++ b/src/vnet/l2/l2_fib.c @@ -756,17 +756,21 @@ VLIB_CLI_COMMAND (l2fib_del_cli, static) = { void l2fib_start_ager_scan (vlib_main_t * vm) { - l2_bridge_domain_t *bd_config; - int enable = 0; + uword evt = L2_MAC_AGE_PROCESS_EVENT_ONE_PASS; /* check if there is at least one bd with mac aging enabled */ + l2_bridge_domain_t *bd_config; vec_foreach (bd_config, l2input_main.bd_configs) + { if (bd_config->bd_id != ~0 && bd_config->mac_age != 0) - enable = 1; + { + evt = L2_MAC_AGE_PROCESS_EVENT_START; + break; + } + } vlib_process_signal_event (vm, l2fib_mac_age_scanner_process_node.index, - enable ? L2_MAC_AGE_PROCESS_EVENT_START : - L2_MAC_AGE_PROCESS_EVENT_ONE_PASS, 0); + evt, 0); } /** @@ -1019,25 +1023,26 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) if (result.fields.age_not == 0) learn_count++; - if (PREDICT_FALSE (evt_idx >= fm->max_macs_in_event)) + if (client) { - /* event message full, send it and start a new one */ - if (q && (q->cursize < q->maxsize)) + if (PREDICT_FALSE (evt_idx >= fm->max_macs_in_event)) { - mp->n_macs = htonl (evt_idx); - vl_msg_api_send_shmem (q, (u8 *) & mp); - mp = allocate_mac_evt_buf (client, cl_idx); + /* event message full, send it and start a new one */ + if (q && (q->cursize < q->maxsize)) + { + mp->n_macs = htonl (evt_idx); + vl_msg_api_send_shmem (q, (u8 *) & mp); + mp = allocate_mac_evt_buf (client, cl_idx); + } + else + { + clib_warning ("MAC event to pid %d queue stuffed!" + " %d MAC entries lost", client, + evt_idx); + } + evt_idx = 0; } - else - { - clib_warning ("MAC event to pid %d queue stuffed!" - " %d MAC entries lost", client, evt_idx); - } - evt_idx = 0; - } - if (client) - { if (result.fields.lrn_evt) { /* copy mac entry to event msg */ @@ -1125,10 +1130,6 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only) return delta_t + accum_t; } -/* Type of scan */ -#define SCAN_MAC_AGE 0 -#define SCAN_MAC_EVENT 1 - /* Maximum f64 value */ #define TIME_MAX (1.7976931348623157e+308) @@ -1140,22 +1141,16 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, l2fib_main_t *fm = &l2fib_main; l2learn_main_t *lm = &l2learn_main; bool enabled = 0; - bool scan = SCAN_MAC_AGE; /* SCAN_FOR_AGE or SCAN_FOR_EVENT */ f64 start_time, next_age_scan_time = TIME_MAX; while (1) { - if (enabled) + if (lm->client_pid) + vlib_process_wait_for_event_or_clock (vm, fm->event_scan_delay); + else if (enabled) { - if (lm->client_pid) /* mac event client waiting */ - vlib_process_wait_for_event_or_clock (vm, fm->event_scan_delay); - else /* agin only */ - { - f64 t = next_age_scan_time - vlib_time_now (vm); - if (t < fm->event_scan_delay) - t = fm->event_scan_delay; - vlib_process_wait_for_event_or_clock (vm, t); - } + f64 t = next_age_scan_time - vlib_time_now (vm); + vlib_process_wait_for_event_or_clock (vm, t); } else vlib_process_wait_for_event (vm); @@ -1164,41 +1159,26 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vec_reset_length (event_data); start_time = vlib_time_now (vm); + enum + { SCAN_MAC_AGE, SCAN_MAC_EVENT, SCAN_DISABLE } scan = SCAN_MAC_AGE; switch (event_type) { case ~0: /* timer expired */ - if ((lm->client_pid == 0) || (start_time >= next_age_scan_time)) - { - scan = SCAN_MAC_AGE; - if (enabled) - next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; - else - next_age_scan_time = TIME_MAX; - } - else + if (lm->client_pid != 0 && start_time < next_age_scan_time) scan = SCAN_MAC_EVENT; break; case L2_MAC_AGE_PROCESS_EVENT_START: - scan = SCAN_MAC_AGE; - next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; enabled = 1; break; case L2_MAC_AGE_PROCESS_EVENT_STOP: enabled = 0; - next_age_scan_time = TIME_MAX; - l2fib_main.age_scan_duration = 0; - l2fib_main.evt_scan_duration = 0; - continue; + scan = SCAN_DISABLE; + break; case L2_MAC_AGE_PROCESS_EVENT_ONE_PASS: - scan = SCAN_MAC_AGE; - if (enabled) - next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; - else - next_age_scan_time = TIME_MAX; break; default: @@ -1208,7 +1188,20 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (scan == SCAN_MAC_EVENT) l2fib_main.evt_scan_duration = l2fib_scan (vm, start_time, 1); else - l2fib_main.age_scan_duration = l2fib_scan (vm, start_time, 0); + { + if (scan == SCAN_MAC_AGE) + l2fib_main.age_scan_duration = l2fib_scan (vm, start_time, 0); + if (scan == SCAN_DISABLE) + { + l2fib_main.age_scan_duration = 0; + l2fib_main.evt_scan_duration = 0; + } + /* schedule next scan */ + if (enabled) + next_age_scan_time = start_time + L2FIB_AGE_SCAN_INTERVAL; + else + next_age_scan_time = TIME_MAX; + } } return 0; } diff --git a/test/test_l2_fib.py b/test/test_l2_fib.py index 9249a2ce..52bf9c86 100644 --- a/test/test_l2_fib.py +++ b/test/test_l2_fib.py @@ -519,6 +519,24 @@ class TestL2fib(VppTestCase): self.run_verify_negat_test(bd_id=1, dst_hosts=flushed) self.run_verify_negat_test(bd_id=2, dst_hosts=flushed) + def test_l2_fib_09(self): + """ L2 FIB test 9 - mac learning events + """ + self.create_hosts(10, subnet=39) + + self.vapi.want_macs_learn_events() + self.learn_hosts(bd_id=1, n_hosts_per_if=10) + + self.sleep(1) + self.logger.info(self.vapi.ppcli("show l2fib")) + evs = self.vapi.collect_events() + learned_macs = { + e.mac[i].mac_addr for e in evs for i in range(e.n_macs)} + macs = {h.bin_mac for swif_hs in self.learned_hosts.itervalues() + for h in swif_hs} + self.vapi.want_macs_learn_events(enable_disable=0) + self.assertEqual(len(learned_macs ^ macs), 0) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 634dabea..b6759ec3 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -431,6 +431,15 @@ class VppPapiProvider(object): 'address': address, 'pid': os.getpid(), }) + def want_macs_learn_events(self, enable_disable=1, scan_delay=0, + max_macs_in_event=0, learn_limit=0): + return self.api(self.papi.want_l2_macs_events, + {'enable_disable': enable_disable, + 'scan_delay': scan_delay, + 'max_macs_in_event': max_macs_in_event, + 'learn_limit': learn_limit, + 'pid': os.getpid(), }) + def l2fib_add_del(self, mac, bd_id, sw_if_index, is_add=1, static_mac=0, filter_mac=0, bvi_mac=0): """Create/delete L2 FIB entry. -- cgit 1.2.3-korg