From b2ef4dde97b51b73a596093f06cbbdb84f23a824 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 23 Mar 2016 08:56:01 -0400 Subject: Adjacency allocator Change-Id: Ieacbfa4dbbfd13b38eaa2d37f618f212cef4e492 Signed-off-by: Damjan Marion --- vnet/Makefile.am | 2 + vnet/vnet/ip/adj_alloc.c | 241 +++++++++++++++++++++++++++ vnet/vnet/ip/adj_alloc.h | 53 ++++++ vnet/vnet/ip/ip4_forward.c | 394 --------------------------------------------- vnet/vnet/ip/ip4_mtrie.c | 11 +- vnet/vnet/ip/lookup.c | 237 +++++---------------------- vnet/vnet/ip/lookup.h | 7 +- 7 files changed, 344 insertions(+), 601 deletions(-) create mode 100644 vnet/vnet/ip/adj_alloc.c create mode 100644 vnet/vnet/ip/adj_alloc.h (limited to 'vnet') diff --git a/vnet/Makefile.am b/vnet/Makefile.am index 460becbfff9..b254d80ad48 100644 --- a/vnet/Makefile.am +++ b/vnet/Makefile.am @@ -240,6 +240,7 @@ nobase_include_HEADERS += \ # Layer 3 protocol: IP v4/v6 ######################################## libvnet_la_SOURCES += \ + vnet/ip/adj_alloc.c \ vnet/ip/format.c \ vnet/ip/icmp4.c \ vnet/ip/icmp6.c \ @@ -269,6 +270,7 @@ libvnet_la_SOURCES += \ vnet/ip/ip_frag.c nobase_include_HEADERS += \ + vnet/ip/adj_alloc.h \ vnet/ip/format.h \ vnet/ip/icmp46_packet.h \ vnet/ip/icmp4.h \ diff --git a/vnet/vnet/ip/adj_alloc.c b/vnet/vnet/ip/adj_alloc.c new file mode 100644 index 00000000000..56104207363 --- /dev/null +++ b/vnet/vnet/ip/adj_alloc.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/* + * any operation which could cause the adj vector to be reallocated + * must have a worker thread barrier + */ + +static inline int will_reallocate (ip_adjacency_t * adjs, u32 n) +{ + uword aligned_header_bytes, new_data_bytes; + uword data_bytes; + aa_header_t * ah = aa_header (adjs); + + if (adjs == 0) + return 1; + + data_bytes = (vec_len (adjs) + n) * sizeof (*adjs); + + aligned_header_bytes = vec_header_bytes (aa_aligned_header_bytes); + + new_data_bytes = data_bytes + aligned_header_bytes; + + ASSERT (clib_mem_is_heap_object (_vec_find(ah))); + + if (PREDICT_TRUE(new_data_bytes <= clib_mem_size (_vec_find(ah)))) + return 0; + + return 1; +} + +ip_adjacency_t * +aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n) +{ + vlib_main_t * vm = &vlib_global_main; + aa_header_t * ah = aa_header (adjs); + ip_adjacency_t * adj_block; + u32 freelist_length; + int need_barrier_sync; + + ASSERT(os_get_cpu_number() == 0); + ASSERT (clib_mem_is_heap_object (_vec_find(ah))); + + /* If we don't have a freelist of size N, fresh allocation is required */ + if (vec_len (ah->free_indices_by_size) <= n) + { + if (will_reallocate (adjs, n)) + { + need_barrier_sync = 1; + vlib_worker_thread_barrier_sync (vm); + } + /* Workers wont look at the freelists... */ + vec_validate (ah->free_indices_by_size, n); + vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes, + CLIB_CACHE_LINE_BYTES); + if (need_barrier_sync) + vlib_worker_thread_barrier_release (vm); + goto out; + } + /* See if we have a free adj block to dole out */ + if ((freelist_length = vec_len(ah->free_indices_by_size[n]))) + { + u32 index = ah->free_indices_by_size[n][freelist_length-1]; + + adj_block = &adjs[index]; + _vec_len(ah->free_indices_by_size[n]) -= 1; + goto out; + } + /* Allocate a new block of size N */ + if (will_reallocate (adjs, n)) + { + need_barrier_sync = 1; + vlib_worker_thread_barrier_sync (vm); + } + vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes, + CLIB_CACHE_LINE_BYTES); + + if (need_barrier_sync) + vlib_worker_thread_barrier_release (vm); + + out: + memset (adj_block, 0, n * (sizeof(*adj_block))); + adj_block->heap_handle = adj_block - adjs; + adj_block->n_adj = n; + *blockp = adj_block; + return adjs; +} + +void aa_free (ip_adjacency_t * adjs, ip_adjacency_t * adj) +{ + aa_header_t * ah = aa_header (adjs); + + ASSERT (adjs && adj && (adj->heap_handle < vec_len (adjs))); + ASSERT (adj->n_adj < vec_len (ah->free_indices_by_size)); + ASSERT (adj->heap_handle != 0); + + vec_add1 (ah->free_indices_by_size[adj->n_adj], adj->heap_handle); + adj->heap_handle = 0; +} + +ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n) +{ + ip_adjacency_t * adj_block; + aa_header_t * ah; + int i; + + vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes, + CLIB_CACHE_LINE_BYTES); + + memset (adj_block, 0, n * sizeof(*adj_block)); + ah = aa_header (adjs); + memset (ah, 0, sizeof (*ah)); + + vec_validate (ah->free_indices_by_size, 1); + + for (i = 0 ; i < vec_len (adjs); i++) + { + adj_block->n_adj = 1; + adj_block->heap_handle = ~0; + /* Euchre the allocator into returning 0, 1, 2, etc. */ + vec_add1 (ah->free_indices_by_size[1], n - (i+1)); + } + + return adjs; +} + +u8 * format_adjacency_alloc (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); + ip_adjacency_t * adjs = va_arg (*args, ip_adjacency_t *); + int verbose = va_arg (*args, int); + ip_adjacency_t * adj; + u32 inuse = 0, freed = 0; + u32 on_freelist = 0; + int i, j; + aa_header_t * ah = aa_header (adjs); + + for (i = 0; i < vec_len (adjs); i += adj->n_adj) + { + adj = adjs + i; + if ((i == 0) || adj->heap_handle) + inuse += adj->n_adj; + else + freed += adj->n_adj; + } + + for (i = 1; i < vec_len(ah->free_indices_by_size); i++) + { + for (j = 0; j < vec_len(ah->free_indices_by_size[i]); j++) + { + adj = adjs + ah->free_indices_by_size[i][j]; + ASSERT(adj->heap_handle == 0); + on_freelist += adj->n_adj; + } + } + + s = format (s, "adjs: %d total, %d in use, %d free, %d on freelists\n", + vec_len(adjs), inuse, freed, on_freelist); + if (verbose) + { + for (i = 0; i < vec_len (adjs); i += adj->n_adj) + { + adj = adjs + i; + if ((i == 0) || adj->heap_handle) + { + if (adj->n_adj > 1) + s = format (s, "[%d-%d] ", i, i+adj->n_adj-1); + else + s = format (s, "[%d] ", i); + + for (j = 0; j < adj->n_adj; j++) + { + if (j > 0) + s = format (s, " "); + + s = format(s, "%U\n", format_ip_adjacency, + vnm, lm, i+j); + } + } + } + } + return s; +} + +static clib_error_t * +show_adjacency_alloc_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int verbose = 0; + vnet_main_t *vnm = vnet_get_main(); + ip_lookup_main_t *lm = 0; + ip_adjacency_t * adjs = 0; + int is_ip4 = 1; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "ip4")) + ; + else if (unformat (input, "ip6")) + is_ip4 = 0; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (is_ip4) + lm = &ip4_main.lookup_main; + else + lm = &ip6_main.lookup_main; + + adjs = lm->adjacency_heap; + + vlib_cli_output (vm, "%U", format_adjacency_alloc, vnm, lm, adjs, verbose); + + return 0; +} + +VLIB_CLI_COMMAND (show_adjacency_alloc_command, static) = { + .path = "show adjacency alloc", + .short_help = "show adjacency alloc", + .function = show_adjacency_alloc_command_fn, +}; diff --git a/vnet/vnet/ip/adj_alloc.h b/vnet/vnet/ip/adj_alloc.h new file mode 100644 index 00000000000..a10146c53a5 --- /dev/null +++ b/vnet/vnet/ip/adj_alloc.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __adj_alloc_h__ +#define __adj_alloc_h__ + +/* + * Adjacency allocator: heap-like in that the code + * will dole out contiguous chunks of n items. In the interests of + * thread safety, we don't bother about coalescing free blocks of size r + * into free blocks of size s, where r < s. + * + * We include explicit references to worker thread barrier synchronization + * where necessary. + */ + +#include +#include +#include + +typedef struct { + u32 ** free_indices_by_size; +} aa_header_t; + +#define aa_aligned_header_bytes \ + vec_aligned_header_bytes (sizeof (aa_header_t), sizeof (void *)) + +/* Pool header from user pointer */ +static inline aa_header_t * aa_header (void * v) +{ + return vec_aligned_header (v, sizeof (aa_header_t), sizeof (void *)); +} + +ip_adjacency_t * +aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n); +void aa_free (ip_adjacency_t * adjs, ip_adjacency_t * adj); +ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n); + +format_function_t format_adj_allocation; + +#endif /* __adj_alloc_h__ */ diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index c5b3e9a5a0a..04619837469 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -136,136 +136,6 @@ ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm, vec_validate_init_empty (fib->old_hash_values, max_index, ~0); } -static void serialize_ip4_address (serialize_main_t * m, va_list * va) -{ - ip4_address_t * a = va_arg (*va, ip4_address_t *); - u8 * p = serialize_get (m, sizeof (a->as_u8)); - memcpy (p, a->as_u8, sizeof (a->as_u8)); -} - -static void unserialize_ip4_address (serialize_main_t * m, va_list * va) -{ - ip4_address_t * a = va_arg (*va, ip4_address_t *); - u8 * p = unserialize_get (m, sizeof (a->as_u8)); - memcpy (a->as_u8, p, sizeof (a->as_u8)); -} - -static void serialize_ip4_address_and_length (serialize_main_t * m, va_list * va) -{ - ip4_address_t * a = va_arg (*va, ip4_address_t *); - u32 l = va_arg (*va, u32); - u32 n_bytes = (l / 8) + ((l % 8) != 0); - u8 * p = serialize_get (m, 1 + n_bytes); - ASSERT (l <= 32); - p[0] = l; - memcpy (p + 1, a->as_u8, n_bytes); -} - -static void unserialize_ip4_address_and_length (serialize_main_t * m, va_list * va) -{ - ip4_address_t * a = va_arg (*va, ip4_address_t *); - u32 * al = va_arg (*va, u32 *); - u8 * p = unserialize_get (m, 1); - u32 l, n_bytes; - - al[0] = l = p[0]; - ASSERT (l <= 32); - n_bytes = (l / 8) + ((l % 8) != 0); - - if (n_bytes) - { - p = unserialize_get (m, n_bytes); - memcpy (a->as_u8, p, n_bytes); - } -} - -static void serialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va) -{ - ip4_add_del_route_args_t * a = va_arg (*va, ip4_add_del_route_args_t *); - - serialize_likely_small_unsigned_integer (m, a->table_index_or_table_id); - serialize_likely_small_unsigned_integer (m, a->flags); - serialize (m, serialize_ip4_address_and_length, &a->dst_address, a->dst_address_length); - serialize_likely_small_unsigned_integer (m, a->adj_index); - serialize_likely_small_unsigned_integer (m, a->n_add_adj); - if (a->n_add_adj > 0) - serialize (m, serialize_vec_ip_adjacency, a->add_adj, a->n_add_adj); -} - -/* Serialized adjacencies for arp/rewrite do not send graph next_index - since graph hookup is not guaranteed to be the same for both sides - of serialize/unserialize. */ -static void -unserialize_fixup_ip4_rewrite_adjacencies (vlib_main_t * vm, - ip_adjacency_t * adj, - u32 n_adj) -{ - vnet_main_t * vnm = vnet_get_main(); - u32 i, ni, sw_if_index, is_arp; - vnet_hw_interface_t * hw; - - for (i = 0; i < n_adj; i++) - { - switch (adj[i].lookup_next_index) - { - case IP_LOOKUP_NEXT_REWRITE: - case IP_LOOKUP_NEXT_ARP: - is_arp = adj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP; - sw_if_index = adj[i].rewrite_header.sw_if_index; - hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - ni = is_arp ? ip4_arp_node.index : ip4_rewrite_node.index; - adj[i].rewrite_header.node_index = ni; - adj[i].rewrite_header.next_index = vlib_node_add_next (vm, ni, hw->output_node_index); - if (is_arp) - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_ARP, - sw_if_index, - ni, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, - &adj[i].rewrite_header, - sizeof (adj->rewrite_data)); - break; - - default: - break; - } - } -} - -static void unserialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va) -{ - ip4_main_t * i4m = &ip4_main; - ip4_add_del_route_args_t a; - - a.table_index_or_table_id = unserialize_likely_small_unsigned_integer (m); - a.flags = unserialize_likely_small_unsigned_integer (m); - unserialize (m, unserialize_ip4_address_and_length, &a.dst_address, &a.dst_address_length); - a.adj_index = unserialize_likely_small_unsigned_integer (m); - a.n_add_adj = unserialize_likely_small_unsigned_integer (m); - a.add_adj = 0; - if (a.n_add_adj > 0) - { - vec_resize (a.add_adj, a.n_add_adj); - unserialize (m, unserialize_vec_ip_adjacency, a.add_adj, a.n_add_adj); - unserialize_fixup_ip4_rewrite_adjacencies (vlib_get_main(), - a.add_adj, a.n_add_adj); - } - - /* Prevent re-re-distribution. */ - a.flags |= IP4_ROUTE_FLAG_NO_REDISTRIBUTE; - - ip4_add_del_route (i4m, &a); - - vec_free (a.add_adj); -} - -MC_SERIALIZE_MSG (ip4_add_del_route_msg, static) = { - .name = "vnet_ip4_add_del_route", - .serialize = serialize_ip4_add_del_route_msg, - .unserialize = unserialize_ip4_add_del_route_msg, -}; - static void ip4_fib_set_adj_index (ip4_main_t * im, ip4_fib_t * fib, @@ -317,21 +187,12 @@ ip4_fib_set_adj_index (ip4_main_t * im, void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a) { - vlib_main_t * vm = vlib_get_main(); ip_lookup_main_t * lm = &im->lookup_main; ip4_fib_t * fib; u32 dst_address, dst_address_length, adj_index, old_adj_index; uword * hash, is_del; ip4_add_del_route_callback_t * cb; - if (vm->mc_main && ! (a->flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE)) - { - u32 multiple_messages_per_vlib_buffer = (a->flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP); - mc_serialize2 (vm->mc_main, multiple_messages_per_vlib_buffer, - &ip4_add_del_route_msg, a); - return; - } - /* Either create new adjacency or use given one depending on arguments. */ if (a->n_add_adj > 0) { @@ -399,51 +260,6 @@ void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a) ip_del_adjacency (lm, old_adj_index); } -static void serialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va) -{ - u32 flags = va_arg (*va, u32); - ip4_address_t * dst_address = va_arg (*va, ip4_address_t *); - u32 dst_address_length = va_arg (*va, u32); - ip4_address_t * next_hop_address = va_arg (*va, ip4_address_t *); - u32 next_hop_sw_if_index = va_arg (*va, u32); - u32 next_hop_weight = va_arg (*va, u32); - - serialize_likely_small_unsigned_integer (m, flags); - serialize (m, serialize_ip4_address_and_length, dst_address, dst_address_length); - serialize (m, serialize_ip4_address, next_hop_address); - serialize_likely_small_unsigned_integer (m, next_hop_sw_if_index); - serialize_likely_small_unsigned_integer (m, next_hop_weight); -} - -static void unserialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va) -{ - ip4_main_t * im = &ip4_main; - u32 flags, dst_address_length, next_hop_sw_if_index, next_hop_weight; - ip4_address_t dst_address, next_hop_address; - - flags = unserialize_likely_small_unsigned_integer (m); - unserialize (m, unserialize_ip4_address_and_length, &dst_address, &dst_address_length); - unserialize (m, unserialize_ip4_address, &next_hop_address); - next_hop_sw_if_index = unserialize_likely_small_unsigned_integer (m); - next_hop_weight = unserialize_likely_small_unsigned_integer (m); - - ip4_add_del_route_next_hop - (im, - flags | IP4_ROUTE_FLAG_NO_REDISTRIBUTE, - &dst_address, - dst_address_length, - &next_hop_address, - next_hop_sw_if_index, - next_hop_weight, (u32)~0, - (u32)~0 /* explicit FIB index */); -} - -MC_SERIALIZE_MSG (ip4_add_del_route_next_hop_msg, static) = { - .name = "vnet_ip4_add_del_route_next_hop", - .serialize = serialize_ip4_add_del_route_next_hop_msg, - .unserialize = unserialize_ip4_add_del_route_next_hop_msg, -}; - void ip4_add_del_route_next_hop (ip4_main_t * im, u32 flags, @@ -455,7 +271,6 @@ ip4_add_del_route_next_hop (ip4_main_t * im, u32 explicit_fib_index) { vnet_main_t * vnm = vnet_get_main(); - vlib_main_t * vm = vlib_get_main(); ip_lookup_main_t * lm = &im->lookup_main; u32 fib_index; ip4_fib_t * fib; @@ -469,18 +284,6 @@ ip4_add_del_route_next_hop (ip4_main_t * im, int is_interface_next_hop; clib_error_t * error = 0; - if (vm->mc_main && ! (flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE)) - { - u32 multiple_messages_per_vlib_buffer = (flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP); - mc_serialize2 (vm->mc_main, - multiple_messages_per_vlib_buffer, - &ip4_add_del_route_next_hop_msg, - flags, - dst_address, dst_address_length, - next_hop, next_hop_sw_if_index, next_hop_weight); - return; - } - if (explicit_fib_index == (u32)~0) fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index); else @@ -1249,38 +1052,6 @@ typedef struct { u32 length; } ip4_interface_address_t; -static void serialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va) -{ - ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *); - u32 n = va_arg (*va, u32); - u32 i; - for (i = 0; i < n; i++) { - serialize_integer (m, a[i].sw_if_index, sizeof (a[i].sw_if_index)); - serialize (m, serialize_ip4_address, &a[i].address); - serialize_integer (m, a[i].length, sizeof (a[i].length)); - } -} - -static void unserialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va) -{ - ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *); - u32 n = va_arg (*va, u32); - u32 i; - for (i = 0; i < n; i++) { - unserialize_integer (m, &a[i].sw_if_index, sizeof (a[i].sw_if_index)); - unserialize (m, unserialize_ip4_address, &a[i].address); - unserialize_integer (m, &a[i].length, sizeof (a[i].length)); - } -} - -static void serialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va) -{ - ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *); - int is_del = va_arg (*va, int); - serialize (m, serialize_vec_ip4_set_interface_address, a, 1); - serialize_integer (m, is_del, sizeof (is_del)); -} - static clib_error_t * ip4_add_del_interface_address_internal (vlib_main_t * vm, u32 sw_if_index, @@ -1290,31 +1061,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, u32 insert_routes, u32 is_del); -static void unserialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va) -{ - mc_main_t * mcm = va_arg (*va, mc_main_t *); - vlib_main_t * vm = mcm->vlib_main; - ip4_interface_address_t a; - clib_error_t * error; - int is_del; - - unserialize (m, unserialize_vec_ip4_set_interface_address, &a, 1); - unserialize_integer (m, &is_del, sizeof (is_del)); - error = ip4_add_del_interface_address_internal - (vm, a.sw_if_index, &a.address, a.length, - /* redistribute */ 0, - /* insert_routes */ 1, - is_del); - if (error) - clib_error_report (error); -} - -MC_SERIALIZE_MSG (ip4_set_interface_address_msg, static) = { - .name = "vnet_ip4_set_interface_address", - .serialize = serialize_ip4_set_interface_address_msg, - .unserialize = unserialize_ip4_set_interface_address_msg, -}; - static clib_error_t * ip4_add_del_interface_address_internal (vlib_main_t * vm, u32 sw_if_index, @@ -1354,17 +1100,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, })); } - if (vm->mc_main && redistribute) - { - ip4_interface_address_t a; - a.sw_if_index = sw_if_index; - a.address = address[0]; - a.length = address_length; - mc_serialize (vm->mc_main, &ip4_set_interface_address_msg, - &a, (int)is_del); - goto done; - } - elts_before = pool_elts (lm->if_address_pool); error = ip_interface_address_add_del @@ -1418,135 +1153,6 @@ ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, is_del); } -static void serialize_ip4_fib (serialize_main_t * m, va_list * va) -{ - ip4_fib_t * f = va_arg (*va, ip4_fib_t *); - u32 l, dst, adj_index; - - serialize_integer (m, f->table_id, sizeof (f->table_id)); - for (l = 0; l < ARRAY_LEN (f->adj_index_by_dst_address); l++) - { - u32 n_elts = hash_elts (f->adj_index_by_dst_address[l]); - - serialize_integer (m, n_elts, sizeof (n_elts)); - hash_foreach (dst, adj_index, f->adj_index_by_dst_address[l], ({ - ip4_address_t tmp; - tmp.as_u32 = dst; - serialize (m, serialize_ip4_address, &tmp); - serialize_integer (m, adj_index, sizeof (adj_index)); - })); - } -} - -static void unserialize_ip4_fib (serialize_main_t * m, va_list * va) -{ - ip4_add_del_route_args_t a; - u32 i; - - a.flags = (IP4_ROUTE_FLAG_ADD - | IP4_ROUTE_FLAG_NO_REDISTRIBUTE - | IP4_ROUTE_FLAG_TABLE_ID); - a.n_add_adj = 0; - a.add_adj = 0; - - unserialize_integer (m, &a.table_index_or_table_id, - sizeof (a.table_index_or_table_id)); - - for (i = 0; i < STRUCT_ARRAY_LEN (ip4_fib_t, adj_index_by_dst_address); i++) - { - u32 n_elts; - unserialize_integer (m, &n_elts, sizeof (u32)); - a.dst_address_length = i; - while (n_elts > 0) - { - unserialize (m, unserialize_ip4_address, &a.dst_address); - unserialize_integer (m, &a.adj_index, sizeof (a.adj_index)); - ip4_add_del_route (&ip4_main, &a); - n_elts--; - } - } -} - -void serialize_vnet_ip4_main (serialize_main_t * m, va_list * va) -{ - vnet_main_t * vnm = va_arg (*va, vnet_main_t *); - vnet_interface_main_t * vim = &vnm->interface_main; - vnet_sw_interface_t * si; - ip4_main_t * i4m = &ip4_main; - ip4_interface_address_t * as = 0, * a; - - /* Download adjacency tables & multipath stuff. */ - serialize (m, serialize_ip_lookup_main, &i4m->lookup_main); - - /* FIBs. */ - { - ip4_fib_t * f; - u32 n_fibs = vec_len (i4m->fibs); - serialize_integer (m, n_fibs, sizeof (n_fibs)); - vec_foreach (f, i4m->fibs) - serialize (m, serialize_ip4_fib, f); - } - - /* FIB interface config. */ - vec_serialize (m, i4m->fib_index_by_sw_if_index, serialize_vec_32); - - /* Interface ip4 addresses. */ - pool_foreach (si, vim->sw_interfaces, ({ - u32 sw_if_index = si->sw_if_index; - ip_interface_address_t * ia; - foreach_ip_interface_address (&i4m->lookup_main, ia, sw_if_index, - 0 /* honor unnumbered */, - ({ - ip4_address_t * x = ip_interface_address_get_address (&i4m->lookup_main, ia); - vec_add2 (as, a, 1); - a->address = x[0]; - a->length = ia->address_length; - a->sw_if_index = sw_if_index; - })); - })); - vec_serialize (m, as, serialize_vec_ip4_set_interface_address); - vec_free (as); -} - -void unserialize_vnet_ip4_main (serialize_main_t * m, va_list * va) -{ - vlib_main_t * vm = va_arg (*va, vlib_main_t *); - ip4_main_t * i4m = &ip4_main; - ip4_interface_address_t * as = 0, * a; - - unserialize (m, unserialize_ip_lookup_main, &i4m->lookup_main); - - { - ip_adjacency_t * adj, * adj_heap; - u32 n_adj; - adj_heap = i4m->lookup_main.adjacency_heap; - heap_foreach (adj, n_adj, adj_heap, ({ - unserialize_fixup_ip4_rewrite_adjacencies (vm, adj, n_adj); - ip_call_add_del_adjacency_callbacks (&i4m->lookup_main, adj - adj_heap, /* is_del */ 0); - })); - } - - /* FIBs */ - { - u32 i, n_fibs; - unserialize_integer (m, &n_fibs, sizeof (n_fibs)); - for (i = 0; i < n_fibs; i++) - unserialize (m, unserialize_ip4_fib); - } - - vec_unserialize (m, &i4m->fib_index_by_sw_if_index, unserialize_vec_32); - - vec_unserialize (m, &as, unserialize_vec_ip4_set_interface_address); - vec_foreach (a, as) { - ip4_add_del_interface_address_internal - (vm, a->sw_if_index, &a->address, a->length, - /* redistribute */ 0, - /* insert_routes */ 0, - /* is_del */ 0); - } - vec_free (as); -} - static clib_error_t * ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, diff --git a/vnet/vnet/ip/ip4_mtrie.c b/vnet/vnet/ip/ip4_mtrie.c index ed4a0d9f44f..461cd64b86d 100644 --- a/vnet/vnet/ip/ip4_mtrie.c +++ b/vnet/vnet/ip/ip4_mtrie.c @@ -191,7 +191,8 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, /* Replace less specific terminal leaves with new leaf. */ else if (new_leaf_dst_address_bits >= ply->dst_address_bits_of_leaves[i]) { - ply->leaves[i] = new_leaf; + __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf); + ASSERT(ply->leaves[i] == new_leaf); ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits; ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf); } @@ -240,7 +241,9 @@ set_leaf (ip4_fib_mtrie_t * m, if (old_leaf_is_terminal) { old_ply->dst_address_bits_of_leaves[i] = a->dst_address_length; - old_ply->leaves[i] = new_leaf; + __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf, + new_leaf); + ASSERT(old_ply->leaves[i] == new_leaf); old_ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf); ASSERT (old_ply->n_non_empty_leafs <= ARRAY_LEN (old_ply->leaves)); } @@ -274,7 +277,9 @@ set_leaf (ip4_fib_mtrie_t * m, /* Refetch since ply_create may move pool. */ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index); - old_ply->leaves[dst_byte] = new_leaf; + __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, + new_leaf); + ASSERT(old_ply->leaves[dst_byte] == new_leaf); old_ply->dst_address_bits_of_leaves[dst_byte] = 0; old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_leaf); diff --git a/vnet/vnet/ip/lookup.c b/vnet/vnet/ip/lookup.c index 9e34bfa9064..a6b037a3ad9 100644 --- a/vnet/vnet/ip/lookup.c +++ b/vnet/vnet/ip/lookup.c @@ -39,6 +39,7 @@ #include /* for fabs */ #include +#include static void ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index); @@ -47,7 +48,15 @@ always_inline void ip_poison_adjacencies (ip_adjacency_t * adj, uword n_adj) { if (CLIB_DEBUG > 0) - memset (adj, 0xfe, n_adj * sizeof (adj[0])); + { + u32 save_handle = adj->heap_handle;; + u32 save_n_adj = adj->n_adj; + + memset (adj, 0xfe, n_adj * sizeof (adj[0])); + + adj->heap_handle = save_handle; + adj->n_adj = save_n_adj; + } } /* Create new block of given number of contiguous adjacencies. */ @@ -92,7 +101,7 @@ ip_add_adjacency (ip_lookup_main_t * lm, p = hash_get (lm->adj_index_by_signature, signature); if (p) { - adj = heap_elt_at_index (lm->adjacency_heap, p[0]); + adj = vec_elt_at_index (lm->adjacency_heap, p[0]); while (1) { if (vnet_ip_adjacency_share_compare (adj, copy_adj)) @@ -103,14 +112,14 @@ ip_add_adjacency (ip_lookup_main_t * lm, } if (adj->next_adj_with_signature == 0) break; - adj = heap_elt_at_index (lm->adjacency_heap, - adj->next_adj_with_signature); + adj = vec_elt_at_index (lm->adjacency_heap, + adj->next_adj_with_signature); } } } - ai = heap_alloc (lm->adjacency_heap, n_adj, handle); - adj = heap_elt_at_index (lm->adjacency_heap, ai); + lm->adjacency_heap = aa_alloc (lm->adjacency_heap, &adj, n_adj); + handle = ai = adj->heap_handle; ip_poison_adjacencies (adj, n_adj); @@ -169,23 +178,14 @@ ip_add_adjacency (ip_lookup_main_t * lm, static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_multipath_adjacency) { ip_adjacency_t * adj; - uword handle; ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1); adj = ip_get_adjacency (lm, adj_index); - handle = adj->heap_handle; - /* Special-case local, drop adjs */ - switch (adj->lookup_next_index) - { - case IP_LOOKUP_NEXT_LOCAL: - case IP_LOOKUP_NEXT_DROP: + /* Special-case miss, local, drop adjs */ + if (adj_index < 3) return; - default: - break; - } - if (adj->n_adj == 1) { @@ -202,11 +202,8 @@ static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_ signature = vnet_ip_adjacency_signature (adj); p = hash_get (lm->adj_index_by_signature, signature); if (p == 0) - { - clib_warning ("adj 0x%llx signature %llx not in table", - adj, signature); goto bag_it; - } + this_ai = p[0]; /* At the top of the signature chain (likely)? */ if (this_ai == adj_index) @@ -232,7 +229,12 @@ static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_ prev_adj = this_adj; this_adj = ip_get_adjacency (lm, this_adj->next_adj_with_signature); - ASSERT(this_adj->heap_handle != 0); + /* + * This can happen when creating the first multipath adj of a set + * We end up looking at the miss adjacency (handle==0). + */ + if (this_adj->heap_handle == 0) + goto bag_it; } prev_adj->next_adj_with_signature = this_adj->next_adj_with_signature; } @@ -244,7 +246,7 @@ static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_ ip_poison_adjacencies (adj, adj->n_adj); - heap_dealloc (lm->adjacency_heap, handle); + aa_free (lm->adjacency_heap, adj); } void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index) @@ -792,178 +794,6 @@ ip_interface_address_add_del (ip_lookup_main_t * lm, return /* no error */ 0; } -void serialize_vec_ip_adjacency (serialize_main_t * m, va_list * va) -{ - ip_adjacency_t * a = va_arg (*va, ip_adjacency_t *); - u32 n = va_arg (*va, u32); - u32 i; - for (i = 0; i < n; i++) - { - serialize_integer (m, a[i].heap_handle, sizeof (a[i].heap_handle)); - serialize_integer (m, a[i].n_adj, sizeof (a[i].n_adj)); - serialize_integer (m, a[i].lookup_next_index, sizeof (a[i].lookup_next_index_as_int)); - switch (a[i].lookup_next_index) - { - case IP_LOOKUP_NEXT_LOCAL: - serialize_integer (m, a[i].if_address_index, sizeof (a[i].if_address_index)); - break; - - case IP_LOOKUP_NEXT_ARP: - serialize_integer (m, a[i].if_address_index, sizeof (a[i].if_address_index)); - serialize_integer (m, a[i].rewrite_header.sw_if_index, sizeof (a[i].rewrite_header.sw_if_index)); - break; - - case IP_LOOKUP_NEXT_REWRITE: - serialize (m, serialize_vnet_rewrite, &a[i].rewrite_header, sizeof (a[i].rewrite_data)); - break; - - default: - /* nothing else to serialize. */ - break; - } - } -} - -void unserialize_vec_ip_adjacency (serialize_main_t * m, va_list * va) -{ - ip_adjacency_t * a = va_arg (*va, ip_adjacency_t *); - u32 n = va_arg (*va, u32); - u32 i; - ip_poison_adjacencies (a, n); - for (i = 0; i < n; i++) - { - unserialize_integer (m, &a[i].heap_handle, sizeof (a[i].heap_handle)); - unserialize_integer (m, &a[i].n_adj, sizeof (a[i].n_adj)); - unserialize_integer (m, &a[i].lookup_next_index_as_int, sizeof (a[i].lookup_next_index_as_int)); - switch (a[i].lookup_next_index) - { - case IP_LOOKUP_NEXT_LOCAL: - unserialize_integer (m, &a[i].if_address_index, sizeof (a[i].if_address_index)); - break; - - case IP_LOOKUP_NEXT_ARP: - unserialize_integer (m, &a[i].if_address_index, sizeof (a[i].if_address_index)); - unserialize_integer (m, &a[i].rewrite_header.sw_if_index, sizeof (a[i].rewrite_header.sw_if_index)); - break; - - case IP_LOOKUP_NEXT_REWRITE: - unserialize (m, unserialize_vnet_rewrite, &a[i].rewrite_header, sizeof (a[i].rewrite_data)); - break; - - default: - /* nothing else to unserialize. */ - break; - } - } -} - -static void serialize_vec_ip_multipath_next_hop (serialize_main_t * m, va_list * va) -{ - ip_multipath_next_hop_t * nh = va_arg (*va, ip_multipath_next_hop_t *); - u32 n = va_arg (*va, u32); - u32 i; - for (i = 0; i < n; i++) - { - serialize_integer (m, nh[i].next_hop_adj_index, sizeof (nh[i].next_hop_adj_index)); - serialize_integer (m, nh[i].weight, sizeof (nh[i].weight)); - } -} - -static void unserialize_vec_ip_multipath_next_hop (serialize_main_t * m, va_list * va) -{ - ip_multipath_next_hop_t * nh = va_arg (*va, ip_multipath_next_hop_t *); - u32 n = va_arg (*va, u32); - u32 i; - for (i = 0; i < n; i++) - { - unserialize_integer (m, &nh[i].next_hop_adj_index, sizeof (nh[i].next_hop_adj_index)); - unserialize_integer (m, &nh[i].weight, sizeof (nh[i].weight)); - } -} - -static void serialize_vec_ip_multipath_adjacency (serialize_main_t * m, va_list * va) -{ - ip_multipath_adjacency_t * a = va_arg (*va, ip_multipath_adjacency_t *); - u32 n = va_arg (*va, u32); - u32 i; - for (i = 0; i < n; i++) - { -#define foreach_ip_multipath_adjacency_field \ - _ (adj_index) _ (n_adj_in_block) _ (reference_count) \ - _ (normalized_next_hops.count) \ - _ (normalized_next_hops.heap_offset) \ - _ (normalized_next_hops.heap_handle) \ - _ (unnormalized_next_hops.count) \ - _ (unnormalized_next_hops.heap_offset) \ - _ (unnormalized_next_hops.heap_handle) - -#define _(f) serialize_integer (m, a[i].f, sizeof (a[i].f)); - foreach_ip_multipath_adjacency_field; -#undef _ - } -} - -static void unserialize_vec_ip_multipath_adjacency (serialize_main_t * m, va_list * va) -{ - ip_multipath_adjacency_t * a = va_arg (*va, ip_multipath_adjacency_t *); - u32 n = va_arg (*va, u32); - u32 i; - for (i = 0; i < n; i++) - { -#define _(f) unserialize_integer (m, &a[i].f, sizeof (a[i].f)); - foreach_ip_multipath_adjacency_field; -#undef _ - } -} - -void serialize_ip_lookup_main (serialize_main_t * m, va_list * va) -{ - ip_lookup_main_t * lm = va_arg (*va, ip_lookup_main_t *); - - /* If this isn't true you need to call e.g. ip4_maybe_remap_adjacencies - to make it true. */ - ASSERT (lm->n_adjacency_remaps == 0); - - serialize (m, serialize_heap, lm->adjacency_heap, serialize_vec_ip_adjacency); - - serialize (m, serialize_heap, lm->next_hop_heap, serialize_vec_ip_multipath_next_hop); - vec_serialize (m, lm->multipath_adjacencies, serialize_vec_ip_multipath_adjacency); - - /* Adjacency counters (FIXME disabled for now). */ - if (0) - serialize (m, serialize_vlib_combined_counter_main, &lm->adjacency_counters, /* incremental */ 0); -} - -void unserialize_ip_lookup_main (serialize_main_t * m, va_list * va) -{ - ip_lookup_main_t * lm = va_arg (*va, ip_lookup_main_t *); - - unserialize (m, unserialize_heap, &lm->adjacency_heap, unserialize_vec_ip_adjacency); - unserialize (m, unserialize_heap, &lm->next_hop_heap, unserialize_vec_ip_multipath_next_hop); - vec_unserialize (m, &lm->multipath_adjacencies, unserialize_vec_ip_multipath_adjacency); - - /* Build hash table from unserialized data. */ - { - ip_multipath_adjacency_t * a; - - vec_foreach (a, lm->multipath_adjacencies) - { - if (a->n_adj_in_block > 0 && a->reference_count > 0) - hash_set (lm->multipath_adjacency_by_next_hops, - ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle), - a - lm->multipath_adjacencies); - } - } - - /* Validate adjacency counters. */ - vlib_validate_combined_counter (&lm->adjacency_counters, - vec_len (lm->adjacency_heap) - 1); - - /* Adjacency counters (FIXME disabled for now). */ - if (0) - unserialize (m, unserialize_vlib_combined_counter_main, &lm->adjacency_counters, /* incremental */ 0); -} - void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) { ip_adjacency_t * adj; @@ -976,20 +806,26 @@ void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) lm->adj_index_by_signature = hash_create (0, sizeof (uword)); memset (&template_adj, 0, sizeof (template_adj)); + /* Preallocate three "special" adjacencies */ + lm->adjacency_heap = aa_bootstrap (0, 3 /* n=1 free items */); + /* Hand-craft special miss adjacency to use when nothing matches in the routing table. Same for drop adjacency. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->miss_adj_index); + adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, + &lm->miss_adj_index); adj->lookup_next_index = IP_LOOKUP_NEXT_MISS; ASSERT (lm->miss_adj_index == IP_LOOKUP_MISS_ADJ_INDEX); /* Make the "drop" adj sharable */ template_adj.lookup_next_index = IP_LOOKUP_NEXT_DROP; - adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, &lm->drop_adj_index); + adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, + &lm->drop_adj_index); /* Make the "local" adj sharable */ template_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL; template_adj.if_address_index = ~0; - adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, &lm->local_adj_index); + adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, + &lm->local_adj_index); if (! lm->fib_result_n_bytes) lm->fib_result_n_bytes = sizeof (uword); @@ -1782,6 +1618,7 @@ VLIB_CLI_COMMAND (ip_route_command, static) = { .path = "ip route", .short_help = "Add/delete IP routes", .function = vnet_ip_route_cmd, + .is_mp_safe = 1, }; /* @@ -1831,10 +1668,10 @@ ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index, default: clib_warning ("unknown event_type %d", event_type); } + vec_reset_length (event_data); } done: - vec_reset_length (event_data); if (!resolved) return clib_error_return (0, "Resolution failed for %U", @@ -1884,6 +1721,7 @@ ip4_probe_neighbor_wait (vlib_main_t *vm, ip4_address_t * a, u32 sw_if_index, default: clib_warning ("unknown event_type %d", event_type); } + vec_reset_length (event_data); } done: @@ -1956,6 +1794,7 @@ VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = { .path = "ip probe-neighbor", .function = probe_neighbor_address, .short_help = "ip probe-neighbor | [retry nn]", + .is_mp_safe = 1, }; typedef CLIB_PACKED (struct { diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h index 02ab20d11d9..42869350dff 100644 --- a/vnet/vnet/ip/lookup.h +++ b/vnet/vnet/ip/lookup.h @@ -373,9 +373,9 @@ ip_get_adjacency (ip_lookup_main_t * lm, { ip_adjacency_t * adj; - adj = heap_elt_at_index (lm->adjacency_heap, adj_index); + adj = vec_elt_at_index (lm->adjacency_heap, adj_index); - ASSERT (! heap_is_free_handle (lm->adjacency_heap, adj->heap_handle)); + ASSERT (adj->heap_handle != ~0); return adj; } @@ -483,7 +483,4 @@ do { \ void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index); -serialize_function_t serialize_ip_lookup_main, unserialize_ip_lookup_main; -serialize_function_t serialize_vec_ip_adjacency, unserialize_vec_ip_adjacency; - #endif /* included_ip_lookup_h */ -- cgit 1.2.3-korg