diff options
Diffstat (limited to 'vnet/vnet/ip')
-rw-r--r-- | vnet/vnet/ip/adj_alloc.c | 241 | ||||
-rw-r--r-- | vnet/vnet/ip/adj_alloc.h | 53 | ||||
-rw-r--r-- | vnet/vnet/ip/format.h | 6 | ||||
-rw-r--r-- | vnet/vnet/ip/ip4.h | 217 | ||||
-rw-r--r-- | vnet/vnet/ip/ip4_forward.c | 2133 | ||||
-rw-r--r-- | vnet/vnet/ip/ip4_mtrie.c | 74 | ||||
-rw-r--r-- | vnet/vnet/ip/ip4_mtrie.h | 9 | ||||
-rw-r--r-- | vnet/vnet/ip/ip4_source_and_port_range_check.c | 994 | ||||
-rw-r--r-- | vnet/vnet/ip/ip4_source_check.c | 134 | ||||
-rw-r--r-- | vnet/vnet/ip/ip4_test.c | 3 | ||||
-rw-r--r-- | vnet/vnet/ip/ip6.h | 240 | ||||
-rw-r--r-- | vnet/vnet/ip/ip6_forward.c | 1444 | ||||
-rw-r--r-- | vnet/vnet/ip/ip6_hop_by_hop.c | 65 | ||||
-rw-r--r-- | vnet/vnet/ip/ip6_neighbor.c | 479 | ||||
-rw-r--r-- | vnet/vnet/ip/ip6_packet.h | 18 | ||||
-rw-r--r-- | vnet/vnet/ip/ip_feature_registration.c | 34 | ||||
-rw-r--r-- | vnet/vnet/ip/ip_feature_registration.h | 3 | ||||
-rw-r--r-- | vnet/vnet/ip/ip_source_and_port_range_check.h | 66 | ||||
-rw-r--r-- | vnet/vnet/ip/lookup.c | 2193 | ||||
-rw-r--r-- | vnet/vnet/ip/lookup.h | 333 | ||||
-rw-r--r-- | vnet/vnet/ip/ping.c | 22 | ||||
-rw-r--r-- | vnet/vnet/ip/udp.h | 38 |
22 files changed, 2746 insertions, 6053 deletions
diff --git a/vnet/vnet/ip/adj_alloc.c b/vnet/vnet/ip/adj_alloc.c deleted file mode 100644 index 3ae7a199..00000000 --- a/vnet/vnet/ip/adj_alloc.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vnet/ip/adj_alloc.h> -#include <vnet/ip/ip.h> - -/* - * any operation which could cause the adj vector to be reallocated - * must have a worker thread barrier - */ - -static inline int will_reallocate (ip_adjacency_t * adjs, u32 n) -{ - uword aligned_header_bytes, new_data_bytes; - uword data_bytes; - aa_header_t * ah = aa_header (adjs); - - if (adjs == 0) - return 1; - - data_bytes = (vec_len (adjs) + n) * sizeof (*adjs); - - aligned_header_bytes = vec_header_bytes (aa_aligned_header_bytes); - - new_data_bytes = data_bytes + aligned_header_bytes; - - ASSERT (clib_mem_is_heap_object (_vec_find(ah))); - - if (PREDICT_TRUE(new_data_bytes <= clib_mem_size (_vec_find(ah)))) - return 0; - - return 1; -} - -ip_adjacency_t * -aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n) -{ - vlib_main_t * vm = &vlib_global_main; - aa_header_t * ah = aa_header (adjs); - ip_adjacency_t * adj_block; - u32 freelist_length; - int need_barrier_sync = 0; - - ASSERT(os_get_cpu_number() == 0); - ASSERT (clib_mem_is_heap_object (_vec_find(ah))); - - /* If we don't have a freelist of size N, fresh allocation is required */ - if (vec_len (ah->free_indices_by_size) <= n) - { - if (will_reallocate (adjs, n)) - { - need_barrier_sync = 1; - vlib_worker_thread_barrier_sync (vm); - } - /* Workers wont look at the freelists... */ - vec_validate (ah->free_indices_by_size, n); - vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes, - CLIB_CACHE_LINE_BYTES); - if (need_barrier_sync) - vlib_worker_thread_barrier_release (vm); - goto out; - } - /* See if we have a free adj block to dole out */ - if ((freelist_length = vec_len(ah->free_indices_by_size[n]))) - { - u32 index = ah->free_indices_by_size[n][freelist_length-1]; - - adj_block = &adjs[index]; - _vec_len(ah->free_indices_by_size[n]) -= 1; - goto out; - } - /* Allocate a new block of size N */ - if (will_reallocate (adjs, n)) - { - need_barrier_sync = 1; - vlib_worker_thread_barrier_sync (vm); - } - vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes, - CLIB_CACHE_LINE_BYTES); - - if (need_barrier_sync) - vlib_worker_thread_barrier_release (vm); - - out: - memset (adj_block, 0, n * (sizeof(*adj_block))); - adj_block->heap_handle = adj_block - adjs; - adj_block->n_adj = n; - *blockp = adj_block; - return adjs; -} - -void aa_free (ip_adjacency_t * adjs, ip_adjacency_t * adj) -{ - aa_header_t * ah = aa_header (adjs); - - ASSERT (adjs && adj && (adj->heap_handle < vec_len (adjs))); - ASSERT (adj->n_adj < vec_len (ah->free_indices_by_size)); - ASSERT (adj->heap_handle != 0); - - vec_add1 (ah->free_indices_by_size[adj->n_adj], adj->heap_handle); - adj->heap_handle = 0; -} - -ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n) -{ - ip_adjacency_t * adj_block; - aa_header_t * ah; - int i; - - vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes, - CLIB_CACHE_LINE_BYTES); - - memset (adj_block, 0, n * sizeof(*adj_block)); - ah = aa_header (adjs); - memset (ah, 0, sizeof (*ah)); - - vec_validate (ah->free_indices_by_size, 1); - - for (i = 0 ; i < vec_len (adjs); i++) - { - adj_block->n_adj = 1; - adj_block->heap_handle = ~0; - /* Euchre the allocator into returning 0, 1, 2, etc. */ - vec_add1 (ah->free_indices_by_size[1], n - (i+1)); - } - - return adjs; -} - -u8 * format_adjacency_alloc (u8 * s, va_list * args) -{ - vnet_main_t * vnm = va_arg (*args, vnet_main_t *); - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); - ip_adjacency_t * adjs = va_arg (*args, ip_adjacency_t *); - int verbose = va_arg (*args, int); - ip_adjacency_t * adj; - u32 inuse = 0, freed = 0; - u32 on_freelist = 0; - int i, j; - aa_header_t * ah = aa_header (adjs); - - for (i = 0; i < vec_len (adjs); i += adj->n_adj) - { - adj = adjs + i; - if ((i == 0) || adj->heap_handle) - inuse += adj->n_adj; - else - freed += adj->n_adj; - } - - for (i = 1; i < vec_len(ah->free_indices_by_size); i++) - { - for (j = 0; j < vec_len(ah->free_indices_by_size[i]); j++) - { - adj = adjs + ah->free_indices_by_size[i][j]; - ASSERT(adj->heap_handle == 0); - on_freelist += adj->n_adj; - } - } - - s = format (s, "adjs: %d total, %d in use, %d free, %d on freelists\n", - vec_len(adjs), inuse, freed, on_freelist); - if (verbose) - { - for (i = 0; i < vec_len (adjs); i += adj->n_adj) - { - adj = adjs + i; - if ((i == 0) || adj->heap_handle) - { - if (adj->n_adj > 1) - s = format (s, "[%d-%d] ", i, i+adj->n_adj-1); - else - s = format (s, "[%d] ", i); - - for (j = 0; j < adj->n_adj; j++) - { - if (j > 0) - s = format (s, " "); - - s = format(s, "%U\n", format_ip_adjacency, - vnm, lm, i+j); - } - } - } - } - return s; -} - -static clib_error_t * -show_adjacency_alloc_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int verbose = 0; - vnet_main_t *vnm = vnet_get_main(); - ip_lookup_main_t *lm = 0; - ip_adjacency_t * adjs = 0; - int is_ip4 = 1; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "verbose")) - verbose = 1; - else if (unformat (input, "ip4")) - ; - else if (unformat (input, "ip6")) - is_ip4 = 0; - else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - } - - if (is_ip4) - lm = &ip4_main.lookup_main; - else - lm = &ip6_main.lookup_main; - - adjs = lm->adjacency_heap; - - vlib_cli_output (vm, "%U", format_adjacency_alloc, vnm, lm, adjs, verbose); - - return 0; -} - -VLIB_CLI_COMMAND (show_adjacency_alloc_command, static) = { - .path = "show adjacency alloc", - .short_help = "show adjacency alloc", - .function = show_adjacency_alloc_command_fn, -}; diff --git a/vnet/vnet/ip/adj_alloc.h b/vnet/vnet/ip/adj_alloc.h deleted file mode 100644 index a10146c5..00000000 --- a/vnet/vnet/ip/adj_alloc.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __adj_alloc_h__ -#define __adj_alloc_h__ - -/* - * Adjacency allocator: heap-like in that the code - * will dole out contiguous chunks of n items. In the interests of - * thread safety, we don't bother about coalescing free blocks of size r - * into free blocks of size s, where r < s. - * - * We include explicit references to worker thread barrier synchronization - * where necessary. - */ - -#include <vppinfra/vec.h> -#include <vlib/vlib.h> -#include <vnet/ip/lookup.h> - -typedef struct { - u32 ** free_indices_by_size; -} aa_header_t; - -#define aa_aligned_header_bytes \ - vec_aligned_header_bytes (sizeof (aa_header_t), sizeof (void *)) - -/* Pool header from user pointer */ -static inline aa_header_t * aa_header (void * v) -{ - return vec_aligned_header (v, sizeof (aa_header_t), sizeof (void *)); -} - -ip_adjacency_t * -aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n); -void aa_free (ip_adjacency_t * adjs, ip_adjacency_t * adj); -ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n); - -format_function_t format_adj_allocation; - -#endif /* __adj_alloc_h__ */ diff --git a/vnet/vnet/ip/format.h b/vnet/vnet/ip/format.h index 4d73d6b1..0d0eb6c9 100644 --- a/vnet/vnet/ip/format.h +++ b/vnet/vnet/ip/format.h @@ -48,6 +48,12 @@ unformat_function_t unformat_ip_protocol; format_function_t format_tcp_udp_port; unformat_function_t unformat_tcp_udp_port; +typedef enum format_ip_adjacency_flags_t_ +{ + FORMAT_IP_ADJACENCY_NONE, + FORMAT_IP_ADJACENCY_DETAIL = (1 << 0), +} format_ip_adjacency_flags_t; + format_function_t format_ip_adjacency; format_function_t format_ip_adjacency_packet_data; diff --git a/vnet/vnet/ip/ip4.h b/vnet/vnet/ip/ip4.h index fc74e9d6..f9fe4868 100644 --- a/vnet/vnet/ip/ip4.h +++ b/vnet/vnet/ip/ip4.h @@ -47,10 +47,7 @@ typedef struct ip4_fib_t { /* Hash table for each prefix length mapping. */ - uword * adj_index_by_dst_address[33]; - - /* Temporary vectors for holding new/old values for hash_set. */ - uword * new_hash_values, * old_hash_values; + uword * fib_entry_by_dst_address[33]; /* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */ ip4_fib_mtrie_t mtrie; @@ -62,7 +59,7 @@ typedef struct ip4_fib_t { u32 index; /* flow hash configuration */ - u32 flow_hash_config; + flow_hash_config_t flow_hash_config; /* N-tuple classifier indices */ u32 fwd_classify_table_index; @@ -72,22 +69,6 @@ typedef struct ip4_fib_t { struct ip4_main_t; -typedef void (ip4_add_del_route_function_t) - (struct ip4_main_t * im, - uword opaque, - ip4_fib_t * fib, - u32 flags, - ip4_address_t * address, - u32 address_length, - void * old_result, - void * new_result); - -typedef struct { - ip4_add_del_route_function_t * function; - uword required_flags; - uword function_opaque; -} ip4_add_del_route_callback_t; - typedef void (ip4_add_del_interface_address_function_t) (struct ip4_main_t * im, uword opaque, @@ -115,23 +96,20 @@ typedef struct ip4_main_t { ip_lookup_main_t lookup_main; /** Vector of FIBs. */ - ip4_fib_t * fibs; + struct fib_table_t_ * fibs; u32 fib_masks[33]; /** Table index indexed by software interface. */ u32 * fib_index_by_sw_if_index; + /* IP4 enabled count by software interface */ + u8 * ip_enabled_by_sw_if_index; + /** Hash table mapping table id to fib index. ID space is not necessarily dense; index space is dense. */ uword * fib_index_by_table_id; - /** Vector of functions to call when routes are added/deleted. */ - ip4_add_del_route_callback_t * add_del_route_callbacks; - - /** Hash table mapping interface route rewrite adjacency index by sw if index. */ - uword * interface_route_adj_index_by_sw_if_index; - /** Functions to call when interface address changes. */ ip4_add_del_interface_address_callback_t * add_del_interface_address_callbacks; @@ -159,11 +137,15 @@ typedef struct ip4_main_t { u32 ip4_unicast_rx_feature_lookup; /** Built-in unicast feature path index, see @ref ip_feature_init_cast() */ u32 ip4_unicast_rx_feature_source_and_port_range_check; + /** Built-in unicast feature path indice, see @ref ip_feature_init_cast() */ + u32 ip4_unicast_rx_feature_drop; /** Built-in multicast feature path index */ u32 ip4_multicast_rx_feature_vpath; /** Built-in multicast feature path index */ u32 ip4_multicast_rx_feature_lookup; + /** Built-in multicast feature path indices */ + u32 ip4_multicast_rx_feature_drop; /** Built-in unicast feature path index, see @ref ip_feature_init_cast() */ u32 ip4_unicast_tx_feature_source_and_port_range_check; @@ -235,30 +217,13 @@ extern vlib_node_registration_t ip4_lookup_node; extern vlib_node_registration_t ip4_rewrite_node; extern vlib_node_registration_t ip4_rewrite_local_node; extern vlib_node_registration_t ip4_arp_node; - -u32 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, ip4_address_t * dst, - u32 disable_default_route); - -always_inline u32 -ip4_fib_lookup_buffer (ip4_main_t * im, u32 fib_index, ip4_address_t * dst, - vlib_buffer_t * b) -{ - return ip4_fib_lookup_with_table (im, fib_index, dst, - /* disable_default_route */ 0); -} - -always_inline u32 -ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst) -{ - u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - return ip4_fib_lookup_with_table (im, fib_index, dst, - /* disable_default_route */ 0); -} +extern vlib_node_registration_t ip4_glean_node; +extern vlib_node_registration_t ip4_midchain_node; always_inline uword -ip4_destination_matches_route (ip4_main_t * im, - ip4_address_t * key, - ip4_address_t * dest, +ip4_destination_matches_route (const ip4_main_t * im, + const ip4_address_t * key, + const ip4_address_t * dest, uword dest_length) { return 0 == ((key->data_u32 ^ dest->data_u32) & im->fib_masks[dest_length]); } @@ -280,15 +245,26 @@ ip4_unaligned_destination_matches_route (ip4_main_t * im, { return 0 == ((clib_mem_unaligned (&key->data_u32, u32) ^ dest->data_u32) & im->fib_masks[dest_length]); } always_inline int -ip4_src_address_for_packet (ip4_main_t * im, vlib_buffer_t * p, ip4_address_t * src, u32 sw_if_index) +ip4_src_address_for_packet (ip_lookup_main_t * lm, + u32 sw_if_index, + ip4_address_t * src) { - ip_lookup_main_t * lm = &im->lookup_main; - ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index); - if (ia == NULL) - return -1; - ip4_address_t * a = ip_interface_address_get_address (lm, ia); - *src = a[0]; - return 0; + u32 if_add_index = + lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + if (PREDICT_TRUE(if_add_index != ~0)) { + ip_interface_address_t *if_add = + pool_elt_at_index(lm->if_address_pool, if_add_index); + ip4_address_t *if_ip = + ip_interface_address_get_address(lm, if_add); + *src = *if_ip; + return 0; + } + else + { + ASSERT(0); + src->as_u32 = 0; + } + return (!0); } /* Find interface address which matches destination. */ @@ -315,126 +291,20 @@ ip4_interface_address_matching_destination (ip4_main_t * im, ip4_address_t * dst return result; } +ip4_address_t * +ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, + ip_interface_address_t ** result_ia); + clib_error_t * ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, ip4_address_t * address, u32 address_length, u32 is_del); -int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2); - -/* Add/del a route to the FIB. */ - -#define IP4_ROUTE_FLAG_ADD (0 << 0) -#define IP4_ROUTE_FLAG_DEL (1 << 0) -#define IP4_ROUTE_FLAG_TABLE_ID (0 << 1) -#define IP4_ROUTE_FLAG_FIB_INDEX (1 << 1) -#define IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2) -#define IP4_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3) -/* Not last add/del in group. Facilities batching requests into packets. */ -#define IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4) -/* Dynamic route created via ARP reply. */ -#define IP4_ROUTE_FLAG_NEIGHBOR (1 << 5) - -typedef struct { - /* IP4_ROUTE_FLAG_* */ - u32 flags; - - /* Either index of fib or table_id to hash and get fib. - IP4_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */ - u32 table_index_or_table_id; - - /* Destination address (prefix) and length. */ - ip4_address_t dst_address; - u32 dst_address_length; - - /* Adjacency to use for this destination. */ - u32 adj_index; - - /* If specified adjacencies to add and then - use for this destination. add_adj/n_add_adj - are override adj_index if specified. */ - ip_adjacency_t * add_adj; - u32 n_add_adj; -} ip4_add_del_route_args_t; - -/** - * \brief Get or create an IPv4 fib. - * - * Get or create an IPv4 fib with the provided fib ID or index. - * The fib ID is a possibly-sparse user-defined value while - * the fib index defines the position of the fib in the fib vector. - * - * \param im - * ip4_main pointer. - * \param table_index_or_id - * The table index if \c IP4_ROUTE_FLAG_FIB_INDEX bit is set in \p flags. - * Otherwise, when set to \c ~0, an arbitrary and unused fib ID is picked - * and can be retrieved with \c ret->table_id. - * Otherwise, the fib ID to be used to retrieve or create the desired fib. - * \param flags - * Indicates whether \p table_index_or_id is the fib index or ID. - * When the bit \c IP4_ROUTE_FLAG_FIB_INDEX is set, \p table_index_or_id - * is considered as the fib index, and the fib ID otherwise. - * \returns A pointer to the retrieved or created fib. - * - * \remark When getting a fib with the fib index, the fib MUST already exist. - */ -ip4_fib_t * -find_ip4_fib_by_table_index_or_id (ip4_main_t * im, - u32 table_index_or_id, u32 flags); - -void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * args); - -void ip4_add_del_route_next_hop (ip4_main_t * im, - u32 flags, - ip4_address_t * dst_address, - u32 dst_address_length, - ip4_address_t * next_hop, - u32 next_hop_sw_if_index, - u32 next_hop_weight, u32 adj_index, - u32 explicit_fib_index); - -u32 -ip4_route_get_next_hop_adj (ip4_main_t * im, - u32 fib_index, - ip4_address_t *next_hop, - u32 next_hop_sw_if_index, - u32 explicit_fib_index); - -void * -ip4_get_route (ip4_main_t * im, - u32 fib_index_or_table_id, - u32 flags, - u8 * address, - u32 address_length); - void -ip4_foreach_matching_route (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip4_address_t * address, - u32 address_length, - ip4_address_t ** results, - u8 ** result_lengths); - -void ip4_delete_matching_routes (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip4_address_t * address, - u32 address_length); - -void ip4_maybe_remap_adjacencies (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags); - -void ip4_adjacency_set_interface_route (vnet_main_t * vnm, - ip_adjacency_t * adj, - u32 sw_if_index, - u32 if_address_index); +ip4_sw_interface_enable_disable (u32 sw_if_index, + u32 is_enable); -ip4_address_t * -ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, - ip_interface_address_t ** result_ia); +int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2); /* Send an ARP request to see if given destination is reachable on given interface. */ clib_error_t * @@ -458,7 +328,7 @@ void ip4_register_protocol (u32 protocol, u32 node_index); serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main; -int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config); +int vnet_set_ip4_flow_hash (u32 table_id, flow_hash_config_t flow_hash_config); void ip4_mtrie_init (ip4_fib_mtrie_t * m); @@ -468,7 +338,8 @@ int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, /* Compute flow hash. We'll use it to select which adjacency to use for this flow. And other things. */ always_inline u32 -ip4_compute_flow_hash (ip4_header_t * ip, u32 flow_hash_config) +ip4_compute_flow_hash (const ip4_header_t * ip, + flow_hash_config_t flow_hash_config) { tcp_header_t * tcp = (void *) (ip + 1); u32 a, b, c, t1, t2; diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index 751260a7..4c49d0e4 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -39,668 +39,16 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> -/** for ethernet_header_t */ -#include <vnet/ethernet/ethernet.h> -/** for ethernet_arp_header_t */ -#include <vnet/ethernet/arp_packet.h> +#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ +#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */ #include <vnet/ppp/ppp.h> -/** for srp_hw_interface_class */ -#include <vnet/srp/srp.h> -/** for API error numbers */ -#include <vnet/api_errno.h> - -/** @file - vnet ip4 forwarding -*/ - -/* This is really, really simple but stupid fib. */ -u32 -ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, - ip4_address_t * dst, - u32 disable_default_route) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index); - uword * p, * hash, key; - i32 i, i_min, dst_address, ai; - - i_min = disable_default_route ? 1 : 0; - dst_address = clib_mem_unaligned (&dst->data_u32, u32); - for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--) - { - hash = fib->adj_index_by_dst_address[i]; - if (! hash) - continue; - - key = dst_address & im->fib_masks[i]; - if ((p = hash_get (hash, key)) != 0) - { - ai = p[0]; - goto done; - } - } - - /* Nothing matches in table. */ - ai = lm->miss_adj_index; - - done: - return ai; -} - -/** @brief Create FIB from table ID and init all hashing. - @param im - @ref ip4_main_t - @param table_id - table ID - @return fib - @ref ip4_fib_t -*/ -static ip4_fib_t * -create_fib_with_table_id (ip4_main_t * im, u32 table_id) -{ - ip4_fib_t * fib; - hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs)); - vec_add2 (im->fibs, fib, 1); - fib->table_id = table_id; - fib->index = fib - im->fibs; - /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */ - fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; - fib->fwd_classify_table_index = ~0; - fib->rev_classify_table_index = ~0; - ip4_mtrie_init (&fib->mtrie); - return fib; -} - -/** @brief Find existing or Create new FIB based on index - @param im @ref ip4_main_t - @param table_index_or_id - overloaded parameter referring - to the table or a table's index in the FIB vector - @param flags - used to check if table_index_or_id was a table or - an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX) - @return either the existing or a new ip4_fib_t entry -*/ -ip4_fib_t * -find_ip4_fib_by_table_index_or_id (ip4_main_t * im, - u32 table_index_or_id, u32 flags) -{ - uword * p, fib_index; - - fib_index = table_index_or_id; - /* If this isn't a FIB_INDEX ... */ - if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX)) - { - /* If passed ~0 then request the next table available */ - if (table_index_or_id == ~0) { - table_index_or_id = 0; - while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) { - table_index_or_id++; - } - /* Create the next table and return the ip4_fib_t associated with it */ - return create_fib_with_table_id (im, table_index_or_id); - } - /* A specific table_id was requested.. */ - p = hash_get (im->fib_index_by_table_id, table_index_or_id); - /* ... and if it doesn't exist create it else grab its index */ - if (! p) - return create_fib_with_table_id (im, table_index_or_id); - fib_index = p[0]; - } - /* Return the ip4_fib_t associated with this index */ - return vec_elt_at_index (im->fibs, fib_index); -} - -static void -ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm, - ip4_fib_t * fib, - u32 address_length) -{ - hash_t * h; - uword max_index; - - ASSERT (lm->fib_result_n_bytes >= sizeof (uword)); - lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword); - - fib->adj_index_by_dst_address[address_length] = - hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword)); - - hash_set_flags (fib->adj_index_by_dst_address[address_length], - HASH_FLAG_NO_AUTO_SHRINK); - - h = hash_header (fib->adj_index_by_dst_address[address_length]); - max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1; - - /* Initialize new/old hash value vectors. */ - vec_validate_init_empty (fib->new_hash_values, max_index, ~0); - vec_validate_init_empty (fib->old_hash_values, max_index, ~0); -} - -static void -ip4_fib_set_adj_index (ip4_main_t * im, - ip4_fib_t * fib, - u32 flags, - u32 dst_address_u32, - u32 dst_address_length, - u32 adj_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - uword * hash; - - if (vec_bytes(fib->old_hash_values)) - memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values)); - if (vec_bytes(fib->new_hash_values)) - memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values)); - fib->new_hash_values[0] = adj_index; - - /* Make sure adj index is valid. */ - if (CLIB_DEBUG > 0) - (void) ip_get_adjacency (lm, adj_index); - - hash = fib->adj_index_by_dst_address[dst_address_length]; - - hash = _hash_set3 (hash, dst_address_u32, - fib->new_hash_values, - fib->old_hash_values); - - fib->adj_index_by_dst_address[dst_address_length] = hash; - - if (vec_len (im->add_del_route_callbacks) > 0) - { - ip4_add_del_route_callback_t * cb; - ip4_address_t d; - uword * p; - - d.data_u32 = dst_address_u32; - vec_foreach (cb, im->add_del_route_callbacks) - if ((flags & cb->required_flags) == cb->required_flags) - cb->function (im, cb->function_opaque, - fib, flags, - &d, dst_address_length, - fib->old_hash_values, - fib->new_hash_values); - - p = hash_get (hash, dst_address_u32); - /* hash_get should never return NULL here */ - if (p) - clib_memcpy (p, fib->new_hash_values, - vec_bytes (fib->new_hash_values)); - else - ASSERT(0); - } -} - -void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip4_fib_t * fib; - u32 dst_address, dst_address_length, adj_index, old_adj_index; - uword * hash, is_del; - ip4_add_del_route_callback_t * cb; - - /* Either create new adjacency or use given one depending on arguments. */ - if (a->n_add_adj > 0) - { - ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); - } - else - adj_index = a->adj_index; - - dst_address = a->dst_address.data_u32; - dst_address_length = a->dst_address_length; - fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags); - - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - dst_address &= im->fib_masks[dst_address_length]; - - if (! fib->adj_index_by_dst_address[dst_address_length]) - ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length); - - hash = fib->adj_index_by_dst_address[dst_address_length]; - - is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0; - - if (is_del) - { - fib->old_hash_values[0] = ~0; - hash = _hash_unset (hash, dst_address, fib->old_hash_values); - fib->adj_index_by_dst_address[dst_address_length] = hash; - - if (vec_len (im->add_del_route_callbacks) > 0 - && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */ - { - fib->new_hash_values[0] = ~0; - vec_foreach (cb, im->add_del_route_callbacks) - if ((a->flags & cb->required_flags) == cb->required_flags) - cb->function (im, cb->function_opaque, - fib, a->flags, - &a->dst_address, dst_address_length, - fib->old_hash_values, - fib->new_hash_values); - } - } - else - ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length, - adj_index); - - old_adj_index = fib->old_hash_values[0]; - - /* Avoid spurious reference count increments */ - if (old_adj_index == adj_index - && adj_index != ~0 - && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) - { - ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); - if (adj->share_count > 0) - adj->share_count --; - } - - ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length, - is_del ? old_adj_index : adj_index, - is_del); - - /* Delete old adjacency index if present and changed. */ - if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY) - && old_adj_index != ~0 - && old_adj_index != adj_index) - ip_del_adjacency (lm, old_adj_index); -} - - -u32 -ip4_route_get_next_hop_adj (ip4_main_t * im, - u32 fib_index, - ip4_address_t *next_hop, - u32 next_hop_sw_if_index, - u32 explicit_fib_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - vnet_main_t * vnm = vnet_get_main(); - uword * nh_hash, * nh_result; - int is_interface_next_hop; - u32 nh_adj_index; - ip4_fib_t * fib; - - fib = vec_elt_at_index (im->fibs, fib_index); - - is_interface_next_hop = next_hop->data_u32 == 0; - if (is_interface_next_hop) - { - nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index); - if (nh_result) - nh_adj_index = *nh_result; - else - { - ip_adjacency_t * adj; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &nh_adj_index); - ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0); - ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0); - hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index); - } - } - else if (next_hop_sw_if_index == ~0) - { - /* next-hop is recursive. we always need a indirect adj - * for recursive paths. Any LPM we perform now will give - * us a valid adj, but without tracking the next-hop we - * have no way to keep it valid. - */ - ip_adjacency_t add_adj; - memset (&add_adj, 0, sizeof(add_adj)); - add_adj.n_adj = 1; - add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT; - add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32; - add_adj.explicit_fib_index = explicit_fib_index; - ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index); - } - else - { - nh_hash = fib->adj_index_by_dst_address[32]; - nh_result = hash_get (nh_hash, next_hop->data_u32); - - /* Next hop must be known. */ - if (! nh_result) - { - ip_adjacency_t * adj; - - /* no /32 exists, get the longest prefix match */ - nh_adj_index = ip4_fib_lookup_with_table (im, fib_index, - next_hop, 0); - adj = ip_get_adjacency (lm, nh_adj_index); - /* if ARP interface adjacency is present, we need to - install ARP adjaceny for specific next hop */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - adj->arp.next_hop.ip4.as_u32 == 0) - { - nh_adj_index = vnet_arp_glean_add(fib_index, next_hop); - } - } - else - { - nh_adj_index = *nh_result; - } - } - - return (nh_adj_index); -} - -void -ip4_add_del_route_next_hop (ip4_main_t * im, - u32 flags, - ip4_address_t * dst_address, - u32 dst_address_length, - ip4_address_t * next_hop, - u32 next_hop_sw_if_index, - u32 next_hop_weight, u32 adj_index, - u32 explicit_fib_index) -{ - vnet_main_t * vnm = vnet_get_main(); - ip_lookup_main_t * lm = &im->lookup_main; - u32 fib_index; - ip4_fib_t * fib; - u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index; - u32 dst_adj_index, nh_adj_index; - uword * dst_hash, * dst_result; - ip_adjacency_t * dst_adj; - ip_multipath_adjacency_t * old_mp, * new_mp; - int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0; - clib_error_t * error = 0; - - if (explicit_fib_index == (u32)~0) - fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index); - else - fib_index = explicit_fib_index; - - fib = vec_elt_at_index (im->fibs, fib_index); - - /* Lookup next hop to be added or deleted. */ - if (adj_index == (u32)~0) - { - nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index, - next_hop, - next_hop_sw_if_index, - explicit_fib_index); - } - else - { - nh_adj_index = adj_index; - } - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length]; - - dst_hash = fib->adj_index_by_dst_address[dst_address_length]; - dst_result = hash_get (dst_hash, dst_address_u32); - if (dst_result) - { - dst_adj_index = dst_result[0]; - dst_adj = ip_get_adjacency (lm, dst_adj_index); - } - else - { - /* For deletes destination must be known. */ - if (is_del) - { - vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; - error = clib_error_return (0, "unknown destination %U/%d", - format_ip4_address, dst_address, - dst_address_length); - goto done; - } - - dst_adj_index = ~0; - dst_adj = 0; - } - - /* Ignore adds of X/32 with next hop of X. */ - if (! is_del - && dst_address_length == 32 - && dst_address->data_u32 == next_hop->data_u32 - && adj_index != (u32)~0) - { - vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP; - error = clib_error_return (0, "prefix matches next hop %U/%d", - format_ip4_address, dst_address, - dst_address_length); - goto done; - } - - /* Destination is not known and default weight is set so add route - to existing non-multipath adjacency */ - if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0) - { - /* create / delete additional mapping of existing adjacency */ - ip4_add_del_route_args_t a; - - a.table_index_or_table_id = fib_index; - a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD) - | IP4_ROUTE_FLAG_FIB_INDEX - | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE - | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP))); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = nh_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip4_add_del_route (im, &a); - goto done; - } - - old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0; - - if (! ip_multipath_adjacency_add_del_next_hop - (lm, is_del, - old_mp_adj_index, - nh_adj_index, - next_hop_weight, - &new_mp_adj_index)) - { - vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP; - error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path", - format_ip4_address, next_hop); - goto done; - } - - old_mp = new_mp = 0; - if (old_mp_adj_index != ~0) - old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); - if (new_mp_adj_index != ~0) - new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index); - - if (old_mp != new_mp) - { - ip4_add_del_route_args_t a; - ip_adjacency_t * adj; - - a.table_index_or_table_id = fib_index; - a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD) - | IP4_ROUTE_FLAG_FIB_INDEX - | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP))); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip4_add_del_route (im, &a); - - adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index); - if (adj->n_adj == 1) - adj->share_count += is_del ? -1 : 1; - } - - done: - if (error) - clib_error_report (error); -} - -void * -ip4_get_route (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - u8 * address, - u32 address_length) -{ - ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - u32 dst_address = * (u32 *) address; - uword * hash, * p; - - ASSERT (address_length < ARRAY_LEN (im->fib_masks)); - dst_address &= im->fib_masks[address_length]; - - hash = fib->adj_index_by_dst_address[address_length]; - p = hash_get (hash, dst_address); - return (void *) p; -} - -void -ip4_foreach_matching_route (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip4_address_t * address, - u32 address_length, - ip4_address_t ** results, - u8 ** result_lengths) -{ - ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - u32 dst_address = address->data_u32; - u32 this_length = address_length; - - if (*results) - _vec_len (*results) = 0; - if (*result_lengths) - _vec_len (*result_lengths) = 0; - - while (this_length <= 32 && vec_len (results) == 0) - { - uword k, v; - hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({ - if (0 == ((k ^ dst_address) & im->fib_masks[address_length])) - { - ip4_address_t a; - a.data_u32 = k; - vec_add1 (*results, a); - vec_add1 (*result_lengths, this_length); - } - })); - - this_length++; - } -} - -void ip4_maybe_remap_adjacencies (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags) -{ - ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - ip_lookup_main_t * lm = &im->lookup_main; - u32 i, l; - ip4_address_t a; - ip4_add_del_route_callback_t * cb; - static ip4_address_t * to_delete; - - if (lm->n_adjacency_remaps == 0) - return; - - for (l = 0; l <= 32; l++) - { - hash_pair_t * p; - uword * hash = fib->adj_index_by_dst_address[l]; - - if (hash_elts (hash) == 0) - continue; - - if (to_delete) - _vec_len (to_delete) = 0; - - hash_foreach_pair (p, hash, ({ - u32 adj_index = p->value[0]; - u32 m = vec_elt (lm->adjacency_remap_table, adj_index); - - if (m) - { - /* Record destination address from hash key. */ - a.data_u32 = p->key; - - /* New adjacency points to nothing: so delete prefix. */ - if (m == ~0) - vec_add1 (to_delete, a); - else - { - /* Remap to new adjacency. */ - clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values)); - - /* Set new adjacency value. */ - fib->new_hash_values[0] = p->value[0] = m - 1; - - vec_foreach (cb, im->add_del_route_callbacks) - if ((flags & cb->required_flags) == cb->required_flags) - cb->function (im, cb->function_opaque, - fib, flags | IP4_ROUTE_FLAG_ADD, - &a, l, - fib->old_hash_values, - fib->new_hash_values); - } - } - })); - - fib->new_hash_values[0] = ~0; - for (i = 0; i < vec_len (to_delete); i++) - { - hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values); - vec_foreach (cb, im->add_del_route_callbacks) - if ((flags & cb->required_flags) == cb->required_flags) - cb->function (im, cb->function_opaque, - fib, flags | IP4_ROUTE_FLAG_DEL, - &a, l, - fib->old_hash_values, - fib->new_hash_values); - } - } - - /* Also remap adjacencies in mtrie. */ - ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie); - - /* Reset mapping table. */ - vec_zero (lm->adjacency_remap_table); - - /* All remaps have been performed. */ - lm->n_adjacency_remaps = 0; -} - -void ip4_delete_matching_routes (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip4_address_t * address, - u32 address_length) -{ - static ip4_address_t * matching_addresses; - static u8 * matching_address_lengths; - u32 l, i; - ip4_add_del_route_args_t a; - - a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags; - a.table_index_or_table_id = table_index_or_table_id; - a.adj_index = ~0; - a.add_adj = 0; - a.n_add_adj = 0; - - for (l = address_length + 1; l <= 32; l++) - { - ip4_foreach_matching_route (im, table_index_or_table_id, flags, - address, - l, - &matching_addresses, - &matching_address_lengths); - for (i = 0; i < vec_len (matching_addresses); i++) - { - a.dst_address = matching_addresses[i]; - a.dst_address_length = matching_address_lengths[i]; - ip4_add_del_route (im, &a); - } - } - - ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags); -} +#include <vnet/srp/srp.h> /* for srp_hw_interface_class */ +#include <vnet/api_errno.h> /* for API error numbers */ +#include <vnet/fib/fib_table.h> /* for FIB table and entry creation */ +#include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */ +#include <vnet/fib/ip4_fib.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/classify_dpo.h> void ip4_forward_next_trace (vlib_main_t * vm, @@ -712,12 +60,10 @@ always_inline uword ip4_lookup_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, - int lookup_for_responses_to_locally_received_packets, - int is_indirect) + int lookup_for_responses_to_locally_received_packets) { ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, * from, * to_next; ip_lookup_next_t next; u32 cpu_index = os_get_cpu_number(); @@ -732,217 +78,194 @@ ip4_lookup_inline (vlib_main_t * vm, to_next, n_left_to_next); while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t * p0, * p1; - ip4_header_t * ip0, * ip1; - __attribute__((unused)) tcp_header_t * tcp0, * tcp1; - ip_lookup_next_t next0, next1; - ip_adjacency_t * adj0, * adj1; - ip4_fib_mtrie_t * mtrie0, * mtrie1; - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip4_address_t * dst_addr0, *dst_addr1; - __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0; - __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1; - u32 flow_hash_config0, flow_hash_config1; + { + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + __attribute__((unused)) tcp_header_t * tcp0, * tcp1; + ip_lookup_next_t next0, next1; + const load_balance_t * lb0, * lb1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_address_t * dst_addr0, *dst_addr1; + __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0; + __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1; + flow_hash_config_t flow_hash_config0, flow_hash_config1; u32 hash_c0, hash_c1; - u32 wrong_next; + u32 wrong_next; + const dpo_id_t *dpo0, *dpo1; - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); - CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD); - } + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD); + } - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); - if (is_indirect) - { - ip_adjacency_t * iadj0, * iadj1; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip4; - dst_addr1 = &iadj1->indirect.next_hop.ip4; - } - else - { - dst_addr0 = &ip0->dst_address; - dst_addr1 = &ip1->dst_address; - } + dst_addr0 = &ip0->dst_address; + dst_addr1 = &ip1->dst_address; - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; - if (! lookup_for_responses_to_locally_received_packets) - { - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; - mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; - - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0); - } - - tcp0 = (void *) (ip0 + 1); - tcp1 = (void *) (ip1 + 1); - - is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP - || ip0->protocol == IP_PROTOCOL_UDP); - is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP - || ip1->protocol == IP_PROTOCOL_UDP); - - if (! lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1); - } - - if (! lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); - } - - if (! lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3); - } - - if (lookup_for_responses_to_locally_received_packets) - { - adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; - adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX]; - } - else - { - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); - - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - } - - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - dst_addr0, - /* no_default_route */ 0)); - ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, - dst_addr1, - /* no_default_route */ 0)); - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); - - next0 = adj0->lookup_next_index; - next1 = adj1->lookup_next_index; - - /* Use flow hash to compute multipath adjacency. */ + if (! lookup_for_responses_to_locally_received_packets) + { + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + mtrie1 = &ip4_fib_get (fib_index1)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0); + } + + tcp0 = (void *) (ip0 + 1); + tcp1 = (void *) (ip1 + 1); + + is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP + || ip0->protocol == IP_PROTOCOL_UDP); + is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP + || ip1->protocol == IP_PROTOCOL_UDP); + + if (! lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1); + } + + if (! lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); + } + + if (! lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3); + } + + if (lookup_for_responses_to_locally_received_packets) + { + lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; + lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX]; + } + else + { + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); + + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + } + + lb0 = load_balance_get (lb_index0); + lb1 = load_balance_get (lb_index1); + + /* Use flow hash to compute multipath adjacency. */ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0; - if (PREDICT_FALSE (adj0->n_adj > 1)) + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; - hash_c0 = vnet_buffer (p0)->ip.flow_hash = + flow_hash_config0 = lb0->lb_hash_config; + hash_c0 = vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash (ip0, flow_hash_config0); } - if (PREDICT_FALSE(adj1->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config1 = - vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config; - hash_c1 = vnet_buffer (p1)->ip.flow_hash = + flow_hash_config1 = lb1->lb_hash_config; + hash_c1 = vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash (ip1, flow_hash_config1); } - ASSERT (adj0->n_adj > 0); - ASSERT (adj1->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - ASSERT (is_pow2 (adj1->n_adj)); - adj_index0 += (hash_c0 & (adj0->n_adj - 1)); - adj_index1 += (hash_c1 & (adj1->n_adj - 1)); - - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; - - if (is_indirect) - { - /* ARP for next-hop not packet's destination address */ - if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP) - ip0->dst_address.as_u32 = dst_addr0->as_u32; - if (adj1->lookup_next_index == IP_LOOKUP_NEXT_ARP) - ip1->dst_address.as_u32 = dst_addr1->as_u32; - } - - vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, - vlib_buffer_length_in_chain (vm, p0) + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + dpo1 = load_balance_get_bucket_i(lb1, + (hash_c1 & + (lb0->lb_n_buckets_minus_1))); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + next1 = dpo1->dpoi_next_node; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lb_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + sizeof(ethernet_header_t)); - vlib_increment_combined_counter - (cm, cpu_index, adj_index1, 1, + vlib_increment_combined_counter + (cm, cpu_index, lb_index1, 1, vlib_buffer_length_in_chain (vm, p1) + sizeof(ethernet_header_t)); - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - wrong_next = (next0 != next) + 2*(next1 != next); - if (PREDICT_FALSE (wrong_next != 0)) - { - switch (wrong_next) - { - case 1: - /* A B A */ - to_next[-2] = pi1; - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - break; - - case 2: - /* A A B */ - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next1, pi1); - break; - - case 3: - /* A B C */ - to_next -= 2; - n_left_to_next += 2; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - vlib_set_next_frame_buffer (vm, node, next1, pi1); - if (next0 == next1) - { - /* A B B */ - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next1; - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - } - } - } - } + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + wrong_next = (next0 != next) + 2*(next1 != next); + if (PREDICT_FALSE (wrong_next != 0)) + { + switch (wrong_next) + { + case 1: + /* A B A */ + to_next[-2] = pi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, pi1); + break; + + case 3: + /* A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + vlib_set_next_frame_buffer (vm, node, next1, pi1); + if (next0 == next1) + { + /* A B B */ + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next1; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + } + } + } + } while (n_left_from > 0 && n_left_to_next > 0) { @@ -950,12 +273,14 @@ ip4_lookup_inline (vlib_main_t * vm, ip4_header_t * ip0; __attribute__((unused)) tcp_header_t * tcp0; ip_lookup_next_t next0; - ip_adjacency_t * adj0; + const load_balance_t *lb0; ip4_fib_mtrie_t * mtrie0; ip4_fib_mtrie_leaf_t leaf0; ip4_address_t * dst_addr0; - __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0; - u32 flow_hash_config0, hash_c0; + __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0; + flow_hash_config_t flow_hash_config0; + const dpo_id_t *dpo0; + u32 hash_c0; pi0 = from[0]; to_next[0] = pi0; @@ -964,16 +289,7 @@ ip4_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); - if (is_indirect) - { - ip_adjacency_t * iadj0; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip4; - } - else - { - dst_addr0 = &ip0->dst_address; - } + dst_addr0 = &ip0->dst_address; fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? @@ -981,7 +297,7 @@ ip4_lookup_inline (vlib_main_t * vm, if (! lookup_for_responses_to_locally_received_packets) { - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + mtrie0 = &ip4_fib_get( fib_index0)->mtrie; leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -1003,50 +319,39 @@ ip4_lookup_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); if (lookup_for_responses_to_locally_received_packets) - adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; else { /* Handle default route. */ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); } - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - dst_addr0, - /* no_default_route */ 0)); - - adj0 = ip_get_adjacency (lm, adj_index0); - - next0 = adj0->lookup_next_index; + lb0 = load_balance_get (lbi0); /* Use flow hash to compute multipath adjacency. */ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; - if (PREDICT_FALSE(adj0->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + flow_hash_config0 = lb0->lb_hash_config; hash_c0 = vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash (ip0, flow_hash_config0); } - ASSERT (adj0->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - adj_index0 += (hash_c0 & (adj0->n_adj - 1)); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); - if (is_indirect) - { - /* ARP for next-hop not packet's destination address */ - if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP) - ip0->dst_address.as_u32 = dst_addr0->as_u32; - } + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, - vlib_buffer_length_in_chain (vm, p0) - + sizeof(ethernet_header_t)); + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); from += 1; to_next += 1; @@ -1113,55 +418,135 @@ ip4_lookup (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_lookup_inline (vm, node, frame, - /* lookup_for_responses_to_locally_received_packets */ 0, - /* is_indirect */ 0); + /* lookup_for_responses_to_locally_received_packets */ 0); } -void ip4_adjacency_set_interface_route (vnet_main_t * vnm, - ip_adjacency_t * adj, - u32 sw_if_index, - u32 if_address_index) +static u8 * format_ip4_lookup_trace (u8 * s, va_list * args); + +VLIB_REGISTER_NODE (ip4_lookup_node) = { + .function = ip4_lookup, + .name = "ip4-lookup", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_lookup_trace, + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = IP4_LOOKUP_NEXT_NODES, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup) + +always_inline uword +ip4_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - ip_lookup_next_t n; - vnet_l3_packet_type_t packet_type; - u32 node_index; + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); - if (hw->hw_class_index == ethernet_hw_interface_class.index - || hw->hw_class_index == srp_hw_interface_class.index) - { - /* - * We have a bit of a problem in this case. ip4-arp uses - * the rewrite_header.next_index to hand pkts to the - * indicated inteface output node. We can end up in - * ip4_rewrite_local, too, which also pays attention to - * rewrite_header.next index. Net result: a hack in - * ip4_rewrite_local... - */ - n = IP_LOOKUP_NEXT_ARP; - node_index = ip4_arp_node.index; - adj->if_address_index = if_address_index; - adj->arp.next_hop.ip4.as_u32 = 0; - ip46_address_reset(&adj->arp.next_hop); - packet_type = VNET_L3_PACKET_TYPE_ARP; - } - else + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace(vm, node, frame, VLIB_TX); + + while (n_left_from > 0) { - n = IP_LOOKUP_NEXT_REWRITE; - node_index = ip4_rewrite_node.index; - packet_type = VNET_L3_PACKET_TYPE_IP4; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t * p0; + u32 pi0, lbi0, hc0; + const ip4_header_t *ip0; + const dpo_id_t *dpo0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + hc0 = lb0->lb_hash_config; + vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0); + + dpo0 = load_balance_get_bucket_i(lb0, + vnet_buffer(p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); } - adj->lookup_next_index = n; - vnet_rewrite_for_sw_interface - (vnm, - packet_type, - sw_if_index, - node_index, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); + return frame->n_vectors; +} + +static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args); + +VLIB_REGISTER_NODE (ip4_load_balance_node) = { + .function = ip4_load_balance, + .name = "ip4-load-balance", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + + .format_trace = format_ip4_forward_next_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance) + +/* get first interface address */ +ip4_address_t * +ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, + ip_interface_address_t ** result_ia) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip4_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip4_address_t * a = ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + if (result_ia) + *result_ia = result ? ia : 0; + return result; } static void @@ -1169,115 +554,160 @@ ip4_add_interface_routes (u32 sw_if_index, ip4_main_t * im, u32 fib_index, ip_interface_address_t * a) { - vnet_main_t * vnm = vnet_get_main(); ip_lookup_main_t * lm = &im->lookup_main; - ip_adjacency_t * adj; ip4_address_t * address = ip_interface_address_get_address (lm, a); - ip4_add_del_route_args_t x; - vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index); - u32 classify_table_index; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP4_ROUTE_FLAG_ADD - | IP4_ROUTE_FLAG_FIB_INDEX - | IP4_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = a->address_length; - x.n_add_adj = 0; - x.add_adj = 0; + fib_prefix_t pfx = { + .fp_len = a->address_length, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = *address, + }; a->neighbor_probe_adj_index = ~0; - if (a->address_length < 32) - { - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); - ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool); - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - ip4_add_del_route (im, &x); - a->neighbor_probe_adj_index = x.adj_index; - } - - /* Add e.g. 1.1.1.1/32 as local to this host. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); - - classify_table_index = ~0; + + if (pfx.fp_len < 32) + { + fib_node_index_t fei; + + fei = fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, /* No next-hop address */ + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + a->neighbor_probe_adj_index = fib_entry_get_adj(fei); + } + + pfx.fp_len = 32; + if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) - classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index]; - if (classify_table_index != (u32) ~0) - { - adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; - adj->classify.table_index = classify_table_index; - } - else - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - - adj->if_address_index = a - lm->if_address_pool; - adj->rewrite_header.sw_if_index = sw_if_index; - adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX]; - /* - * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local - * fail an RPF-ish check, but still go thru the rewrite code... - */ - adj->rewrite_header.data_bytes = 0; + { + u32 classify_table_index = + lm->classify_table_index_by_sw_if_index [sw_if_index]; + if (classify_table_index != (u32) ~0) + { + dpo_id_t dpo = DPO_NULL; + + dpo_set(&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP4, + classify_dpo_create(FIB_PROTOCOL_IP4, + classify_table_index)); + + fib_table_entry_special_dpo_add(fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, + &dpo); + dpo_reset(&dpo); + } + } - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - x.dst_address_length = 32; - ip4_add_del_route (im, &x); + fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + &pfx.fp_addr, + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); } static void -ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length) +ip4_del_interface_routes (ip4_main_t * im, + u32 fib_index, + ip4_address_t * address, + u32 address_length) { - ip4_add_del_route_args_t x; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP4_ROUTE_FLAG_DEL - | IP4_ROUTE_FLAG_FIB_INDEX - | IP4_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = address_length; - x.adj_index = ~0; - x.n_add_adj = 0; - x.add_adj = 0; - - if (address_length < 32) - ip4_add_del_route (im, &x); - - x.dst_address_length = 32; - ip4_add_del_route (im, &x); - - ip4_delete_matching_routes (im, - fib_index, - IP4_ROUTE_FLAG_FIB_INDEX, - address, - address_length); + fib_prefix_t pfx = { + .fp_len = address_length, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = *address, + }; + + if (pfx.fp_len < 32) + { + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); + } + + pfx.fp_len = 32; + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); } -typedef struct { - u32 sw_if_index; - ip4_address_t address; - u32 length; -} ip4_interface_address_t; +void +ip4_sw_interface_enable_disable (u32 sw_if_index, + u32 is_enable) +{ + vlib_main_t * vm = vlib_get_main(); + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 ci, cast; + u32 lookup_feature_index; -static clib_error_t * -ip4_add_del_interface_address_internal (vlib_main_t * vm, - u32 sw_if_index, - ip4_address_t * new_address, - u32 new_length, - u32 redistribute, - u32 insert_routes, - u32 is_del); + vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) + { + if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + else + { + ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + + for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++) + { + ip_config_main_t * cm = &lm->feature_config_mains[cast]; + vnet_config_main_t * vcm = &cm->config_main; + + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + if (cast == VNET_IP_RX_UNICAST_FEAT) + lookup_feature_index = im->ip4_unicast_rx_feature_lookup; + else + lookup_feature_index = im->ip4_multicast_rx_feature_lookup; + + if (is_enable) + ci = vnet_config_add_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + ci = vnet_config_del_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + cm->config_index_by_sw_if_index[sw_if_index] = ci; + } +} static clib_error_t * ip4_add_del_interface_address_internal (vlib_main_t * vm, u32 sw_if_index, ip4_address_t * address, u32 address_length, - u32 redistribute, - u32 insert_routes, u32 is_del) { vnet_main_t * vnm = vnet_get_main(); @@ -1292,9 +722,15 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip4_af); - /* When adding an address check that it does not conflict with an existing address. */ + /* FIXME-LATER + * there is no support for adj-fib handling in the presence of overlapping + * subnets on interfaces. Easy fix - disallow overlapping subnets, like + * most routers do. + */ if (! is_del) { + /* When adding an address check that it does not conflict + with an existing address. */ ip_interface_address_t * ia; foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, @@ -1307,7 +743,7 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, format_ip4_address_and_length, address, address_length, format_ip4_address_and_length, x, ia->address_length, format_vnet_sw_if_index_name, vnm, sw_if_index); - })); + })); } elts_before = pool_elts (lm->if_address_pool); @@ -1322,18 +758,16 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, if (error) goto done; - if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes) - { - if (is_del) - ip4_del_interface_routes (im, ip4_af.fib_index, address, - address_length); - - else - ip4_add_interface_routes (sw_if_index, - im, ip4_af.fib_index, - pool_elt_at_index - (lm->if_address_pool, if_address_index)); - } + ip4_sw_interface_enable_disable(sw_if_index, !is_del); + + if (is_del) + ip4_del_interface_routes (im, ip4_af.fib_index, address, + address_length); + else + ip4_add_interface_routes (sw_if_index, + im, ip4_af.fib_index, + pool_elt_at_index + (lm->if_address_pool, if_address_index)); /* If pool did not grow/shrink: add duplicate address. */ if (elts_before != pool_elts (lm->if_address_pool)) @@ -1358,48 +792,9 @@ ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, { return ip4_add_del_interface_address_internal (vm, sw_if_index, address, address_length, - /* redistribute */ 1, - /* insert_routes */ 1, is_del); } -static clib_error_t * -ip4_sw_interface_admin_up_down (vnet_main_t * vnm, - u32 sw_if_index, - u32 flags) -{ - ip4_main_t * im = &ip4_main; - ip_interface_address_t * ia; - ip4_address_t * a; - u32 is_admin_up, fib_index; - - /* Fill in lookup tables with default table (0). */ - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - - vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0); - - is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - - fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - - foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, - 0 /* honor unnumbered */, - ({ - a = ip_interface_address_get_address (&im->lookup_main, ia); - if (is_admin_up) - ip4_add_interface_routes (sw_if_index, - im, fib_index, - ia); - else - ip4_del_interface_routes (im, fib_index, - a, ia->address_length); - })); - - return 0; -} - -VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down); - /* Built-in ip4 unicast rx feature path definition */ VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = { .node_name = "ip4-inacl", @@ -1449,10 +844,17 @@ VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = { VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = { .node_name = "ip4-lookup", - .runs_before = 0, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0}, .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup, }; +VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = { + .node_name = "ip4-drop", + .runs_before = 0, /* not before any other features */ + .feature_index = &ip4_main.ip4_unicast_rx_feature_drop, +}; + + /* Built-in ip4 multicast rx feature path definition */ VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = { .node_name = "vpath-input-ip4", @@ -1462,10 +864,16 @@ VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = { VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = { .node_name = "ip4-lookup-multicast", - .runs_before = 0, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0}, .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup, }; +VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = { + .node_name = "ip4-drop", + .runs_before = 0, /* last feature */ + .feature_index = &ip4_main.ip4_multicast_rx_feature_drop, +}; + static char * rx_feature_start_nodes[] = { "ip4-input", "ip4-input-no-checksum"}; @@ -1488,7 +896,6 @@ VNET_IP4_TX_FEATURE_INIT (interface_output, static) = { .feature_index = &ip4_main.ip4_tx_feature_interface_output, }; - static clib_error_t * ip4_feature_init (vlib_main_t * vm, ip4_main_t * im) { @@ -1520,7 +927,7 @@ ip4_feature_init (vlib_main_t * vm, ip4_main_t * im) feature_start_nodes, feature_start_len, cast, - 1 /* is_ip4 */))) + VNET_L3_PACKET_TYPE_IP4))) return error; } @@ -1538,6 +945,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 ci, cast; u32 feature_index; + /* Fill in lookup tables with default table (0). */ + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + for (cast = 0; cast < VNET_N_IP_FEAT; cast++) { ip_config_main_t * cm = &lm->feature_config_mains[cast]; @@ -1547,9 +957,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, ci = cm->config_index_by_sw_if_index[sw_if_index]; if (cast == VNET_IP_RX_UNICAST_FEAT) - feature_index = im->ip4_unicast_rx_feature_lookup; + feature_index = im->ip4_unicast_rx_feature_drop; else if (cast == VNET_IP_RX_MULTICAST_FEAT) - feature_index = im->ip4_multicast_rx_feature_lookup; + feature_index = im->ip4_multicast_rx_feature_drop; else feature_index = im->ip4_tx_feature_interface_output; @@ -1560,14 +970,16 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, /* config data */ 0, /* # bytes of config data */ 0); else - ci = vnet_config_del_feature (vm, vcm, - ci, - feature_index, - /* config data */ 0, - /* # bytes of config data */ 0); - + { + ci = vnet_config_del_feature (vm, vcm, ci, + feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index) + im->ip_enabled_by_sw_if_index[sw_if_index] = 0; + } cm->config_index_by_sw_if_index[sw_if_index] = ci; - /* + /* * note: do not update the tx feature count here. */ } @@ -1577,44 +989,6 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del); -static u8 * format_ip4_lookup_trace (u8 * s, va_list * args); - -VLIB_REGISTER_NODE (ip4_lookup_node) = { - .function = ip4_lookup, - .name = "ip4-lookup", - .vector_size = sizeof (u32), - - .format_trace = format_ip4_lookup_trace, - - .n_next_nodes = IP4_LOOKUP_N_NEXT, - .next_nodes = IP4_LOOKUP_NEXT_NODES, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup); - -static uword -ip4_indirect (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return ip4_lookup_inline (vm, node, frame, - /* lookup_for_responses_to_locally_received_packets */ 0, - /* is_indirect */ 1); -} - -VLIB_REGISTER_NODE (ip4_indirect_node) = { - .function = ip4_indirect, - .name = "ip4-indirect", - .vector_size = sizeof (u32), - .sibling_of = "ip4-lookup", - .format_trace = format_ip4_lookup_trace, - - .n_next_nodes = 0, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect); - - /* Global IP4 main. */ ip4_main_t ip4_main; @@ -1636,11 +1010,11 @@ ip4_lookup_init (vlib_main_t * vm) im->fib_masks[i] = clib_host_to_net_u32 (m); } - /* Create FIB with index 0 and table id of 0. */ - find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID); - ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0); + /* Create FIB with index 0 and table id of 0. */ + fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0); + { pg_node_t * pn; pn = pg_get_node (ip4_lookup_node.index); @@ -1708,12 +1082,12 @@ static u8 * format_ip4_lookup_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *); vnet_main_t * vnm = vnet_get_main(); - ip4_main_t * im = &ip4_main; uword indent = format_get_indent (s); s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x", t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header, t->packet_data); @@ -1726,16 +1100,16 @@ static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *); vnet_main_t * vnm = vnet_get_main(); - ip4_main_t * im = &ip4_main; uword indent = format_get_indent (s); s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x", t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, format_ip_adjacency_packet_data, - vnm, &im->lookup_main, t->adj_index, + vnm, t->adj_index, t->packet_data, sizeof (t->packet_data)); return s; } @@ -1863,12 +1237,6 @@ ip4_punt (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); } -static uword -ip4_miss (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); } - VLIB_REGISTER_NODE (ip4_drop_node,static) = { .function = ip4_drop, .name = "ip4-drop", @@ -1882,7 +1250,7 @@ VLIB_REGISTER_NODE (ip4_drop_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop) VLIB_REGISTER_NODE (ip4_punt_node,static) = { .function = ip4_punt, @@ -1897,22 +1265,7 @@ VLIB_REGISTER_NODE (ip4_punt_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt); - -VLIB_REGISTER_NODE (ip4_miss_node,static) = { - .function = ip4_miss, - .name = "ip4-miss", - .vector_size = sizeof (u32), - - .format_trace = format_ip4_forward_next_trace, - - .n_next_nodes = 1, - .next_nodes = { - [0] = "error-drop", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt) /* Compute TCP/UDP/ICMP4 checksum in software. */ u16 @@ -2009,26 +1362,27 @@ ip4_local (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t * p0, * p1; - ip4_header_t * ip0, * ip1; - udp_header_t * udp0, * udp1; - ip4_fib_mtrie_t * mtrie0, * mtrie1; - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip_adjacency_t * adj0, * adj1; - u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0; - u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1; - i32 len_diff0, len_diff1; - u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; - u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; - u8 enqueue_code; + { + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + udp_header_t * udp0, * udp1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + const dpo_id_t *dpo0, *dpo1; + const load_balance_t *lb0, *lb1; + u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0; + u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1; + i32 len_diff0, len_diff1; + u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; + u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; + u8 enqueue_code; - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); @@ -2041,8 +1395,8 @@ ip4_local (vlib_main_t * vm, fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer(p1)->sw_if_index[VLIB_RX]); - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; - mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + mtrie1 = &ip4_fib_get (fib_index1)->mtrie; leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -2130,41 +1484,42 @@ ip4_local (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; - - vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - &ip0->src_address, - /* no_default_route */ 1)); - ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, - &ip1->src_address, - /* no_default_route */ 1)); + vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1; - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); + lb0 = load_balance_get(lbi0); + lb1 = load_balance_get(lbi1); + dpo0 = load_balance_get_bucket_i(lb0, 0); + dpo1 = load_balance_get_bucket_i(lb1, 0); /* * Must have a route to source otherwise we drop the packet. * ip4 broadcasts are accepted, e.g. to make dhcp client work */ error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL - && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE - && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP - && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL + && dpo0->dpoi_type != DPO_ADJACENCY + && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE && ip0->dst_address.as_u32 != 0xFFFFFFFF ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + error0 = (dpo0->dpoi_type == DPO_RECEIVE ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : + error0); error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL - && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE - && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP - && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL - && ip0->dst_address.as_u32 != 0xFFFFFFFF + && dpo1->dpoi_type != DPO_ADJACENCY + && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE + && ip1->dst_address.as_u32 != 0xFFFFFFFF ? IP4_ERROR_SRC_LOOKUP_MISS : error1); + error1 = (dpo0->dpoi_type == DPO_RECEIVE ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : + error1); next0 = lm->local_next_by_ip_protocol[proto0]; next1 = lm->local_next_by_ip_protocol[proto1]; @@ -2220,11 +1575,12 @@ ip4_local (vlib_main_t * vm, udp_header_t * udp0; ip4_fib_mtrie_t * mtrie0; ip4_fib_mtrie_leaf_t leaf0; - ip_adjacency_t * adj0; - u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0; + u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0; i32 len_diff0; u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; - + load_balance_t *lb0; + const dpo_id_t *dpo0; + pi0 = to_next[0] = from[0]; from += 1; n_left_from -= 1; @@ -2238,7 +1594,7 @@ ip4_local (vlib_main_t * vm, fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer(p0)->sw_if_index[VLIB_RX]); - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -2296,24 +1652,30 @@ ip4_local (vlib_main_t * vm, : error0); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - &ip0->src_address, - /* no_default_route */ 1)); + lb0 = load_balance_get(lbi0); + dpo0 = load_balance_get_bucket_i(lb0, 0); - adj0 = ip_get_adjacency (lm, adj_index0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = + dpo0->dpoi_index; /* Must have a route to source otherwise we drop the packet. */ error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL - && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE - && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP - && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL + && dpo0->dpoi_type != DPO_ADJACENCY + && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE + && dpo0->dpoi_type != DPO_RECEIVE && ip0->dst_address.as_u32 != 0xFFFFFFFF ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + /* Packet originated from a local address => spoofing */ + error0 = (dpo0->dpoi_type == DPO_RECEIVE ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : + error0); next0 = lm->local_next_by_ip_protocol[proto0]; @@ -2356,7 +1718,7 @@ VLIB_REGISTER_NODE (ip4_local_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local) void ip4_register_protocol (u32 protocol, u32 node_index) { @@ -2394,10 +1756,11 @@ VLIB_CLI_COMMAND (show_ip_local, static) = { .short_help = "Show ip local protocol table", }; -static uword -ip4_arp (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +always_inline uword +ip4_arp_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_glean) { vnet_main_t * vnm = vnet_get_main(); ip4_main_t * im = &ip4_main; @@ -2441,12 +1804,11 @@ ip4_arp (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next_drop > 0) { + u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0; + ip_adjacency_t * adj0; vlib_buffer_t * p0; ip4_header_t * ip0; - ethernet_header_t * eh0; - u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0; uword bm0; - ip_adjacency_t * adj0; pi0 = from[0]; @@ -2456,35 +1818,10 @@ ip4_arp (vlib_main_t * vm, adj0 = ip_get_adjacency (lm, adj_index0); ip0 = vlib_buffer_get_current (p0); - /* If packet destination is not local, send ARP to next hop */ - if (adj0->arp.next_hop.ip4.as_u32) - ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32; - - /* - * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP - * rewrite to this packet, we need to skip it here. - * Note, to distinguish from src IP addr *.8.6.*, we - * check for a bcast eth dest instead of IPv4 version. - */ - eh0 = (ethernet_header_t*)ip0; - if ((ip0->ip_version_and_header_length & 0xF0) != 0x40) - { - u32 vlan_num = 0; - u16 * etype = &eh0->type; - while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q - || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad - { - vlan_num += 1; - etype += 2; //vlan tag also 16 bits, same as etype - } - if (*etype == clib_host_to_net_u16 (0x0806)) //arp - { - vlib_buffer_advance ( - p0, sizeof(ethernet_header_t) + (4*vlan_num)); - ip0 = vlib_buffer_get_current (p0); - } - } - + /* + * this is the Glean case, so we are ARPing for the + * packet's destination + */ a0 = hash_seeds[0]; b0 = hash_seeds[1]; c0 = hash_seeds[2]; @@ -2492,7 +1829,14 @@ ip4_arp (vlib_main_t * vm, sw_if_index0 = adj0->rewrite_header.sw_if_index; vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; - a0 ^= ip0->dst_address.data_u32; + if (is_glean) + { + a0 ^= ip0->dst_address.data_u32; + } + else + { + a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32; + } b0 ^= sw_if_index0; hash_v3_finalize32 (a0, b0, c0); @@ -2522,10 +1866,11 @@ ip4_arp (vlib_main_t * vm, * Can happen if the control-plane is programming tables * with traffic flowing; at least that's today's lame excuse. */ - if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) - { - p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ]; - } + if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) || + (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)) + { + p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ]; + } else /* Send ARP request. */ { @@ -2545,15 +1890,32 @@ ip4_arp (vlib_main_t * vm, clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, sizeof (h0->ip4_over_ethernet[0].ethernet)); - if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) { - //No source address available - p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS]; - vlib_buffer_free(vm, &bi0, 1); - continue; + if (is_glean) + { + /* The interface's source address is stashed in the Glean Adj */ + h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4; + + /* Copy in destination address we are requesting. This is the + * glean case, so it's the packet's destination.*/ + h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32; } + else + { + /* Src IP address in ARP header. */ + if (ip4_src_address_for_packet(lm, sw_if_index0, + &h0->ip4_over_ethernet[0].ip4)) + { + /* No source address available */ + p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS]; + vlib_buffer_free(vm, &bi0, 1); + continue; + } - /* Copy in destination address we are requesting. */ - h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32; + /* Copy in destination address we are requesting from the + incomplete adj */ + h0->ip4_over_ethernet[1].ip4.data_u32 = + adj0->sub_type.nbr.next_hop.ip4.as_u32; + } vlib_buffer_copy_trace_flag (vm, p0, bi0); b0 = vlib_get_buffer (vm, bi0); @@ -2571,6 +1933,22 @@ ip4_arp (vlib_main_t * vm, return frame->n_vectors; } +static uword +ip4_arp (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip4_arp_inline(vm, node, frame, 0)); +} + +static uword +ip4_glean (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip4_arp_inline(vm, node, frame, 1)); +} + static char * ip4_arp_error_strings[] = { [IP4_ARP_ERROR_DROP] = "address overflow drops", [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent", @@ -2596,6 +1974,22 @@ VLIB_REGISTER_NODE (ip4_arp_node) = { }, }; +VLIB_REGISTER_NODE (ip4_glean_node) = { + .function = ip4_glean, + .name = "ip4-glean", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_errors = ARRAY_LEN (ip4_arp_error_strings), + .error_strings = ip4_arp_error_strings, + + .n_next_nodes = IP4_ARP_N_NEXT, + .next_nodes = { + [IP4_ARP_NEXT_DROP] = "error-drop", + }, +}; + #define foreach_notrace_ip4_arp_error \ _(DROP) \ _(REQUEST_SENT) \ @@ -2720,7 +2114,7 @@ ip4_rewrite_inline (vlib_main_t * vm, u32 pi1, rw_len1, next1, error1, checksum1, adj_index1; u32 next0_override, next1_override; u32 tx_sw_if_index0, tx_sw_if_index1; - + if (rewrite_for_locally_received_packets) next0_override = next1_override = 0; @@ -2818,21 +2212,9 @@ ip4_rewrite_inline (vlib_main_t * vm, if (rewrite_for_locally_received_packets) { - /* - * If someone sends e.g. an icmp4 w/ src = dst = interface addr, - * we end up here with a local adjacency in hand - * The local adj rewrite data is 0xfefe on purpose. - * Bad engineer, no donut for you. - */ - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS; if (PREDICT_FALSE(adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)) next0_override = IP4_REWRITE_NEXT_ARP; - if (PREDICT_FALSE(adj1->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS; if (PREDICT_FALSE(adj1->lookup_next_index == IP_LOOKUP_NEXT_ARP)) next1_override = IP4_REWRITE_NEXT_ARP; @@ -2869,14 +2251,14 @@ ip4_rewrite_inline (vlib_main_t * vm, */ if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) vlib_increment_combined_counter - (&lm->adjacency_counters, + (&adjacency_counters, cpu_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0-sizeof(ethernet_header_t)); if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t))) vlib_increment_combined_counter - (&lm->adjacency_counters, + (&adjacency_counters, cpu_index, adj_index1, /* packet increment */ 0, /* byte increment */ rw_len1-sizeof(ethernet_header_t)); @@ -2945,7 +2327,7 @@ ip4_rewrite_inline (vlib_main_t * vm, u32 pi0, rw_len0, adj_index0, next0, error0, checksum0; u32 next0_override; u32 tx_sw_if_index0; - + if (rewrite_for_locally_received_packets) next0_override = 0; @@ -3000,15 +2382,6 @@ ip4_rewrite_inline (vlib_main_t * vm, if (rewrite_for_locally_received_packets) { - /* - * If someone sends e.g. an icmp4 w/ src = dst = interface addr, - * we end up here with a local adjacency in hand - * The local adj rewrite data is 0xfefe on purpose. - * Bad engineer, no donut for you. - */ - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS; /* * We have to override the next_index in ARP adjacencies, * because they're set up for ip4-arp, not this node... @@ -3028,7 +2401,7 @@ ip4_rewrite_inline (vlib_main_t * vm, if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) vlib_increment_combined_counter - (&lm->adjacency_counters, + (&adjacency_counters, cpu_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0-sizeof(ethernet_header_t)); @@ -3172,6 +2545,15 @@ ip4_rewrite_local (vlib_main_t * vm, /* rewrite_for_locally_received_packets */ 1); } +static uword +ip4_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_rewrite_inline (vm, node, frame, + /* rewrite_for_locally_received_packets */ 0); +} + VLIB_REGISTER_NODE (ip4_rewrite_node) = { .function = ip4_rewrite_transit, .name = "ip4-rewrite-transit", @@ -3187,7 +2569,23 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit) + +VLIB_REGISTER_NODE (ip4_midchain_node) = { + .function = ip4_midchain, + .name = "ip4-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_next_nodes = 2, + .next_nodes = { + [IP4_REWRITE_NEXT_DROP] = "error-drop", + [IP4_REWRITE_NEXT_ARP] = "ip4-arp", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain) VLIB_REGISTER_NODE (ip4_rewrite_local_node) = { .function = ip4_rewrite_local, @@ -3201,7 +2599,7 @@ VLIB_REGISTER_NODE (ip4_rewrite_local_node) = { .n_next_nodes = 0, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local) static clib_error_t * add_del_interface_table (vlib_main_t * vm, @@ -3232,13 +2630,18 @@ add_del_interface_table (vlib_main_t * vm, { ip4_main_t * im = &ip4_main; - ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID); - - if (fib) - { - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = fib->index; - } + u32 fib_index; + + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + table_id); + + // + // FIXME-LATER + // changing an interface's table has consequences for any connecteds + // and adj-fibs already installed. + // + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + im->fib_index_by_sw_if_index[sw_if_index] = fib_index; } done: @@ -3272,8 +2675,7 @@ ip4_lookup_multicast (vlib_main_t * vm, vlib_frame_t * frame) { ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, * from, * to_next; ip_lookup_next_t next; u32 cpu_index = os_get_cpu_number(); @@ -3290,12 +2692,12 @@ ip4_lookup_multicast (vlib_main_t * vm, while (n_left_from >= 4 && n_left_to_next >= 2) { vlib_buffer_t * p0, * p1; - u32 pi0, pi1, adj_index0, adj_index1, wrong_next; + u32 pi0, pi1, lb_index0, lb_index1, wrong_next; ip_lookup_next_t next0, next1; ip4_header_t * ip0, * ip1; - ip_adjacency_t * adj0, * adj1; u32 fib_index0, fib_index1; - u32 flow_hash_config0, flow_hash_config1; + const dpo_id_t *dpo0, *dpo1; + const load_balance_t * lb0, * lb1; /* Prefetch next iteration. */ { @@ -3327,46 +2729,44 @@ ip4_lookup_multicast (vlib_main_t * vm, fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; - adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, - &ip0->dst_address, p0); - adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, - &ip1->dst_address, p1); - - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); - - next0 = adj0->lookup_next_index; - next1 = adj1->lookup_next_index; + lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), + &ip0->dst_address); + lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1), + &ip1->dst_address); - flow_hash_config0 = - vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + lb0 = load_balance_get (lb_index0); + lb1 = load_balance_get (lb_index1); - flow_hash_config1 = - vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config; + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash - (ip0, flow_hash_config0); + (ip0, lb0->lb_hash_config); vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash - (ip1, flow_hash_config1); + (ip1, lb1->lb_hash_config); - ASSERT (adj0->n_adj > 0); - ASSERT (adj1->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - ASSERT (is_pow2 (adj1->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); - adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); + dpo1 = load_balance_get_bucket_i(lb1, + (vnet_buffer (p1)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + next1 = dpo1->dpoi_next_node; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; if (1) /* $$$$$$ HACK FIXME */ vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + (cm, cpu_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, p0)); if (1) /* $$$$$$ HACK FIXME */ vlib_increment_combined_counter - (cm, cpu_index, adj_index1, 1, + (cm, cpu_index, lb_index1, 1, vlib_buffer_length_in_chain (vm, p1)); from += 2; @@ -3415,11 +2815,11 @@ ip4_lookup_multicast (vlib_main_t * vm, { vlib_buffer_t * p0; ip4_header_t * ip0; - u32 pi0, adj_index0; + u32 pi0, lb_index0; ip_lookup_next_t next0; - ip_adjacency_t * adj0; u32 fib_index0; - u32 flow_hash_config0; + const dpo_id_t *dpo0; + const load_balance_t * lb0; pi0 = from[0]; to_next[0] = pi0; @@ -3433,28 +2833,27 @@ ip4_lookup_multicast (vlib_main_t * vm, fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; - adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, - &ip0->dst_address, p0); - - adj0 = ip_get_adjacency (lm, adj_index0); + lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), + &ip0->dst_address); - next0 = adj0->lookup_next_index; + lb0 = load_balance_get (lb_index0); - flow_hash_config0 = - vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); - vnet_buffer (p0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, flow_hash_config0); + vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash + (ip0, lb0->lb_hash_config); - ASSERT (adj0->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; if (1) /* $$$$$$ HACK FIXME */ vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + (cm, cpu_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, p0)); from += 1; @@ -3494,7 +2893,7 @@ VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = { .n_next_nodes = 0, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast) VLIB_REGISTER_NODE (ip4_multicast_node,static) = { .function = ip4_drop, @@ -3511,12 +2910,11 @@ VLIB_REGISTER_NODE (ip4_multicast_node,static) = { int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0) { - ip4_main_t * im = &ip4_main; ip4_fib_mtrie_t * mtrie0; ip4_fib_mtrie_leaf_t leaf0; - u32 adj_index0; + u32 lbi0; - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0); @@ -3527,11 +2925,9 @@ int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0) /* Handle default route. */ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - a, - /* no_default_route */ 0); + return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a); } static clib_error_t * @@ -3595,7 +2991,7 @@ int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config) if (p == 0) return VNET_API_ERROR_NO_SUCH_FIB; - fib = vec_elt_at_index (im4->fibs, p[0]); + fib = ip4_fib_get (p[0]); fib->flow_hash_config = flow_hash_config; return 0; @@ -3719,44 +3115,3 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) = { .function = set_ip_classify_command_fn, }; - -#define TEST_CODE 1 -#if TEST_CODE > 0 - -static clib_error_t * -set_interface_output_feature_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vnet_main_t * vnm = vnet_get_main(); - u32 sw_if_index = ~0; - int is_add = 1; - ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) - ; - else if (unformat (input, "del")) - is_add = 0; - else - break; - } - - if (sw_if_index == ~0) - return clib_error_return (0, "unknown interface `%U'", - format_unformat_error, input); - - lm->tx_sw_if_has_ip_output_features = - clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add); - - return 0; -} - -VLIB_CLI_COMMAND (set_interface_output_feature, static) = { - .path = "set interface output feature", - .function = set_interface_output_feature_command_fn, - .short_help = "set interface output feature <intfc>", -}; -#endif /* TEST_CODE */ diff --git a/vnet/vnet/ip/ip4_mtrie.c b/vnet/vnet/ip/ip4_mtrie.c index 006610a0..36418241 100644 --- a/vnet/vnet/ip/ip4_mtrie.c +++ b/vnet/vnet/ip/ip4_mtrie.c @@ -38,6 +38,7 @@ */ #include <vnet/ip/ip.h> +#include <vnet/fib/fib_entry.h> static void ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, uword prefix_len) @@ -401,21 +402,27 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, unset_leaf (m, &a, root_ply, 0); /* Find next less specific route and insert into mtrie. */ - for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= 1; i--) + for (i = dst_address_length - 1; i >= 1; i--) { uword * p; + index_t lbi; ip4_address_t key; - if (! fib->adj_index_by_dst_address[i]) + if (! fib->fib_entry_by_dst_address[i]) continue; key.as_u32 = dst_address.as_u32 & im->fib_masks[i]; - p = hash_get (fib->adj_index_by_dst_address[i], key.as_u32); + p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32); if (p) { + lbi = fib_entry_contribute_ip_forwarding(p[0])->dpoi_index; + if (INDEX_INVALID == lbi) + continue; + a.dst_address = key; + a.adj_index = lbi; a.dst_address_length = i; - a.adj_index = p[0]; + set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); break; } @@ -424,65 +431,6 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, } } -always_inline uword -maybe_remap_leaf (ip_lookup_main_t * lm, ip4_fib_mtrie_leaf_t * p) -{ - ip4_fib_mtrie_leaf_t l = p[0]; - uword was_remapped_to_empty_leaf = 0; - if (ip4_fib_mtrie_leaf_is_terminal (l)) - { - u32 adj_index = ip4_fib_mtrie_leaf_get_adj_index (l); - u32 m = vec_elt (lm->adjacency_remap_table, adj_index); - if (m) - { - was_remapped_to_empty_leaf = m == ~0; - - /* - * The intent of the original form - which dates to 2013 or - * earlier - is not obvious. Here's the original: - * - * if (was_remapped_to_empty_leaf) - * p[0] = (was_remapped_to_empty_leaf - * ? IP4_FIB_MTRIE_LEAF_EMPTY - * : ip4_fib_mtrie_leaf_set_adj_index (m - 1)); - * - * Notice the outer "if (was_remapped_to_empty_leaf)" - * means that p[0] is always set to IP4_FIB_MTRIE_LEAF_EMPTY, - * and is otherwise left intact. - * - * It seems unlikely that the adjacency mapping scheme - * works in detail. Coverity correctly complains that the - * else-case of the original ternary expression is dead code. - */ - if (was_remapped_to_empty_leaf) - p[0] = IP4_FIB_MTRIE_LEAF_EMPTY; - } - } - return was_remapped_to_empty_leaf; -} - -static void maybe_remap_ply (ip_lookup_main_t * lm, ip4_fib_mtrie_ply_t * ply) -{ - u32 n_remapped_to_empty = 0; - u32 i; - for (i = 0; i < ARRAY_LEN (ply->leaves); i++) - n_remapped_to_empty += maybe_remap_leaf (lm, &ply->leaves[i]); - if (n_remapped_to_empty > 0) - { - ASSERT (n_remapped_to_empty <= ply->n_non_empty_leafs); - ply->n_non_empty_leafs -= n_remapped_to_empty; - if (ply->n_non_empty_leafs == 0) - os_panic (); - } -} - -void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m) -{ - ip4_fib_mtrie_ply_t * ply; - pool_foreach (ply, m->ply_pool, maybe_remap_ply (lm, ply)); - maybe_remap_leaf (lm, &m->default_leaf); -} - /* Returns number of bytes of memory used by mtrie. */ static uword mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) { diff --git a/vnet/vnet/ip/ip4_mtrie.h b/vnet/vnet/ip/ip4_mtrie.h index 31de41e1..c49937d6 100644 --- a/vnet/vnet/ip/ip4_mtrie.h +++ b/vnet/vnet/ip/ip4_mtrie.h @@ -51,7 +51,7 @@ 1 => empty (adjacency index of zero is special miss adjacency). */ typedef u32 ip4_fib_mtrie_leaf_t; -#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*IP_LOOKUP_MISS_ADJ_INDEX) +#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0) #define IP4_FIB_MTRIE_LEAF_ROOT (0 + 2*0) always_inline u32 ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n) @@ -115,6 +115,9 @@ typedef struct { - 1 * sizeof (i32)]; } ip4_fib_mtrie_ply_t; +_Static_assert(0 == sizeof(ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES, + "IP4 Mtrie ply cache line"); + typedef struct { /* Pool of plies. Index zero is root ply. */ ip4_fib_mtrie_ply_t * ply_pool; @@ -136,15 +139,13 @@ void ip4_fib_mtrie_add_del_route (struct ip4_fib_t * f, /* Returns adjacency index. */ u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst); -void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m); - format_function_t format_ip4_fib_mtrie; /* Lookup step. Processes 1 byte of 4 byte ip4 address. */ always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t current_leaf, - ip4_address_t * dst_address, + const ip4_address_t * dst_address, u32 dst_address_byte_index) { ip4_fib_mtrie_leaf_t next_leaf; diff --git a/vnet/vnet/ip/ip4_source_and_port_range_check.c b/vnet/vnet/ip/ip4_source_and_port_range_check.c index ebfa767d..8a469baa 100644 --- a/vnet/vnet/ip/ip4_source_and_port_range_check.c +++ b/vnet/vnet/ip/ip4_source_and_port_range_check.c @@ -14,7 +14,19 @@ */ #include <vnet/ip/ip.h> #include <vnet/ip/ip_source_and_port_range_check.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +/** + * @brief The pool of range chack DPOs + */ +static protocol_port_range_dpo_t *ppr_dpo_pool; + +/** + * @brief Dynamically registered DPO type + */ +static dpo_type_t ppr_dpo_type; vlib_node_registration_t ip4_source_port_and_range_check_rx; vlib_node_registration_t ip4_source_port_and_range_check_tx; @@ -73,23 +85,20 @@ typedef enum static inline u32 -check_adj_port_range_x1 (ip_adjacency_t * adj, u16 dst_port, u32 next) +check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo, + u16 dst_port, u32 next) { - protocol_port_range_t *range; + const protocol_port_range_t *range; u16x8vec_t key; u16x8vec_t diff1; u16x8vec_t diff2; u16x8vec_t sum, sum_equal_diff2; u16 sum_nonzero, sum_equal, winner_mask; int i; - u8 *rwh; - if (adj->lookup_next_index != IP_LOOKUP_NEXT_ICMP_ERROR || dst_port == 0) + if (NULL == ppr_dpo || dst_port == 0) return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; - rwh = (u8 *) (&adj->rewrite_header); - range = (protocol_port_range_t *) rwh; - /* Make the obvious screw-case work. A variant also works w/ no MMX */ if (PREDICT_FALSE (dst_port == 65535)) { @@ -100,20 +109,20 @@ check_adj_port_range_x1 (ip_adjacency_t * adj, u16 dst_port, u32 next) i++) { for (j = 0; j < 8; j++) - if (range->low.as_u16[j] == 65535) + if (ppr_dpo->blocks[i].low.as_u16[j] == 65535) return next; - range++; } return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; } key.as_u16x8 = u16x8_splat (dst_port); - for (i = 0; i < VLIB_BUFFER_PRE_DATA_SIZE / sizeof (protocol_port_range_t); - i++) + for (i = 0; i < ppr_dpo->n_used_blocks; i++) { - diff1.as_u16x8 = u16x8_sub_saturate (range->low.as_u16x8, key.as_u16x8); - diff2.as_u16x8 = u16x8_sub_saturate (range->hi.as_u16x8, key.as_u16x8); + diff1.as_u16x8 = + u16x8_sub_saturate (ppr_dpo->blocks[i].low.as_u16x8, key.as_u16x8); + diff2.as_u16x8 = + u16x8_sub_saturate (ppr_dpo->blocks[i].hi.as_u16x8, key.as_u16x8); sum.as_u16x8 = u16x8_add (diff1.as_u16x8, diff2.as_u16x8); sum_equal_diff2.as_u16x8 = u16x8_is_equal (sum.as_u16x8, diff2.as_u16x8); @@ -127,6 +136,12 @@ check_adj_port_range_x1 (ip_adjacency_t * adj, u16 dst_port, u32 next) return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; } +always_inline protocol_port_range_dpo_t * +protocol_port_range_dpo_get (index_t index) +{ + return (pool_elt_at_index (ppr_dpo_pool, index)); +} + always_inline uword ip4_source_and_port_range_check_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -154,264 +169,263 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t *b0, *b1; - ip4_header_t *ip0, *ip1; - ip4_fib_mtrie_t *mtrie0, *mtrie1; - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip_source_and_port_range_check_config_t *c0, *c1; - ip_adjacency_t *adj0 = 0, *adj1 = 0; - u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; - u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1; - udp_header_t *udp0, *udp1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); - } - - bi0 = to_next[0] = from[0]; - bi1 = to_next[1] = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - fib_index0 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX]); - fib_index1 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (b1)->sw_if_index[VLIB_RX]); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - if (is_tx) - { - c0 = vnet_get_config_data (&tx_cm->config_main, - &b0->current_config_index, - &next0, sizeof (c0[0])); - c1 = vnet_get_config_data (&tx_cm->config_main, - &b1->current_config_index, - &next1, sizeof (c1[0])); - } - else - { - c0 = vnet_get_config_data (&rx_cm->config_main, - &b0->current_config_index, - &next0, sizeof (c0[0])); - c1 = vnet_get_config_data (&rx_cm->config_main, - &b1->current_config_index, - &next1, sizeof (c1[0])); - } - - /* we can't use the default VRF here... */ - for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) - { - ASSERT (c0->fib_index[i] && c1->fib_index[i]); - } - - - if (is_tx) - { - if (ip0->protocol == IP_PROTOCOL_UDP) - fib_index0 = - c0->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; - if (ip0->protocol == IP_PROTOCOL_TCP) - fib_index0 = - c0->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; - } - else - { - if (ip0->protocol == IP_PROTOCOL_UDP) - fib_index0 = - c0->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; - if (ip0->protocol == IP_PROTOCOL_TCP) - fib_index0 = - c0->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; - } - - if (PREDICT_TRUE (fib_index0 != ~0)) - { - - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; - - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 0); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 2); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 3); - - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - &ip0->src_address, - 0 - /* use dflt rt */ - )); - adj0 = ip_get_adjacency (lm, adj_index0); - } - - if (is_tx) - { - if (ip1->protocol == IP_PROTOCOL_UDP) - fib_index1 = - c1->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; - if (ip1->protocol == IP_PROTOCOL_TCP) - fib_index1 = - c1->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; - } - else - { - if (ip1->protocol == IP_PROTOCOL_UDP) - fib_index1 = - c1->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; - if (ip1->protocol == IP_PROTOCOL_TCP) - fib_index1 = - c1->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; - } - - if (PREDICT_TRUE (fib_index1 != ~0)) - { - - mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; - - leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 0); - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 1); - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 2); - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 3); - - adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - - ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, - &ip1->src_address, - 0)); - adj1 = ip_get_adjacency (lm, adj_index1); - } - - pass0 = 0; - pass0 |= adj0 == 0; - pass0 |= ip4_address_is_multicast (&ip0->src_address); - pass0 |= - ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); - pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) - && (ip0->protocol != IP_PROTOCOL_TCP); - - pass1 = 0; - pass1 |= adj1 == 0; - pass1 |= ip4_address_is_multicast (&ip1->src_address); - pass1 |= - ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); - pass1 |= (ip1->protocol != IP_PROTOCOL_UDP) - && (ip1->protocol != IP_PROTOCOL_TCP); - - save_next0 = next0; - udp0 = ip4_next_header (ip0); - save_next1 = next1; - udp1 = ip4_next_header (ip1); - - if (PREDICT_TRUE (pass0 == 0)) - { - good_packets++; - next0 = check_adj_port_range_x1 - (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); - good_packets -= (save_next0 != next0); - b0->error = error_node->errors - [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; - } - - if (PREDICT_TRUE (pass1 == 0)) - { - good_packets++; - next1 = check_adj_port_range_x1 - (adj1, clib_net_to_host_u16 (udp1->dst_port), next1); - good_packets -= (save_next1 != next1); - b1->error = error_node->errors - [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; - } - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - ip4_source_and_port_range_check_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->pass = next0 == save_next0; - t->bypass = pass0; - t->fib_index = fib_index0; - t->src_addr.as_u32 = ip0->src_address.as_u32; - t->port = (pass0 == 0) ? - clib_net_to_host_u16 (udp0->dst_port) : 0; - t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; - } - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b1->flags & VLIB_BUFFER_IS_TRACED))) - { - ip4_source_and_port_range_check_trace_t *t = - vlib_add_trace (vm, node, b1, sizeof (*t)); - t->pass = next1 == save_next1; - t->bypass = pass1; - t->fib_index = fib_index1; - t->src_addr.as_u32 = ip1->src_address.as_u32; - t->port = (pass1 == 0) ? - clib_net_to_host_u16 (udp1->dst_port) : 0; - t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP; - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* { */ + /* vlib_buffer_t *b0, *b1; */ + /* ip4_header_t *ip0, *ip1; */ + /* ip4_fib_mtrie_t *mtrie0, *mtrie1; */ + /* ip4_fib_mtrie_leaf_t leaf0, leaf1; */ + /* ip_source_and_port_range_check_config_t *c0, *c1; */ + /* ip_adjacency_t *adj0 = 0, *adj1 = 0; */ + /* u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; */ + /* u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1; */ + /* udp_header_t *udp0, *udp1; */ + + /* /\* Prefetch next iteration. *\/ */ + /* { */ + /* vlib_buffer_t *p2, *p3; */ + + /* p2 = vlib_get_buffer (vm, from[2]); */ + /* p3 = vlib_get_buffer (vm, from[3]); */ + + /* vlib_prefetch_buffer_header (p2, LOAD); */ + /* vlib_prefetch_buffer_header (p3, LOAD); */ + + /* CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); */ + /* CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); */ + /* } */ + + /* bi0 = to_next[0] = from[0]; */ + /* bi1 = to_next[1] = from[1]; */ + /* from += 2; */ + /* to_next += 2; */ + /* n_left_from -= 2; */ + /* n_left_to_next -= 2; */ + + /* b0 = vlib_get_buffer (vm, bi0); */ + /* b1 = vlib_get_buffer (vm, bi1); */ + + /* fib_index0 = */ + /* vec_elt (im->fib_index_by_sw_if_index, */ + /* vnet_buffer (b0)->sw_if_index[VLIB_RX]); */ + /* fib_index1 = */ + /* vec_elt (im->fib_index_by_sw_if_index, */ + /* vnet_buffer (b1)->sw_if_index[VLIB_RX]); */ + + /* ip0 = vlib_buffer_get_current (b0); */ + /* ip1 = vlib_buffer_get_current (b1); */ + + /* if (is_tx) */ + /* { */ + /* c0 = vnet_get_config_data (&tx_cm->config_main, */ + /* &b0->current_config_index, */ + /* &next0, sizeof (c0[0])); */ + /* c1 = vnet_get_config_data (&tx_cm->config_main, */ + /* &b1->current_config_index, */ + /* &next1, sizeof (c1[0])); */ + /* } */ + /* else */ + /* { */ + /* c0 = vnet_get_config_data (&rx_cm->config_main, */ + /* &b0->current_config_index, */ + /* &next0, sizeof (c0[0])); */ + /* c1 = vnet_get_config_data (&rx_cm->config_main, */ + /* &b1->current_config_index, */ + /* &next1, sizeof (c1[0])); */ + /* } */ + + /* /\* we can't use the default VRF here... *\/ */ + /* for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) */ + /* { */ + /* ASSERT (c0->fib_index[i] && c1->fib_index[i]); */ + /* } */ + + + /* if (is_tx) */ + /* { */ + /* if (ip0->protocol == IP_PROTOCOL_UDP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */ + /* if (ip0->protocol == IP_PROTOCOL_TCP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */ + /* } */ + /* else */ + /* { */ + /* if (ip0->protocol == IP_PROTOCOL_UDP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */ + /* if (ip0->protocol == IP_PROTOCOL_TCP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */ + /* } */ + + /* if (PREDICT_TRUE (fib_index0 != ~0)) */ + /* { */ + + /* mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; */ + + /* leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 0); */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 1); */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 2); */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 3); */ + + /* adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); */ + + /* ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, */ + /* &ip0->src_address, */ + /* 0 */ + /* /\* use dflt rt *\/ */ + /* )); */ + /* adj0 = ip_get_adjacency (lm, adj_index0); */ + /* } */ + + /* if (is_tx) */ + /* { */ + /* if (ip1->protocol == IP_PROTOCOL_UDP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */ + /* if (ip1->protocol == IP_PROTOCOL_TCP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */ + /* } */ + /* else */ + /* { */ + /* if (ip1->protocol == IP_PROTOCOL_UDP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */ + /* if (ip1->protocol == IP_PROTOCOL_TCP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */ + /* } */ + + /* if (PREDICT_TRUE (fib_index1 != ~0)) */ + /* { */ + + /* mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; */ + + /* leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 0); */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 1); */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 2); */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 3); */ + + /* adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); */ + + /* ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, */ + /* &ip1->src_address, */ + /* 0)); */ + /* adj1 = ip_get_adjacency (lm, adj_index1); */ + /* } */ + + /* pass0 = 0; */ + /* pass0 |= adj0 == 0; */ + /* pass0 |= ip4_address_is_multicast (&ip0->src_address); */ + /* pass0 |= */ + /* ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */ + /* pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) */ + /* && (ip0->protocol != IP_PROTOCOL_TCP); */ + + /* pass1 = 0; */ + /* pass1 |= adj1 == 0; */ + /* pass1 |= ip4_address_is_multicast (&ip1->src_address); */ + /* pass1 |= */ + /* ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */ + /* pass1 |= (ip1->protocol != IP_PROTOCOL_UDP) */ + /* && (ip1->protocol != IP_PROTOCOL_TCP); */ + + /* save_next0 = next0; */ + /* udp0 = ip4_next_header (ip0); */ + /* save_next1 = next1; */ + /* udp1 = ip4_next_header (ip1); */ + + /* if (PREDICT_TRUE (pass0 == 0)) */ + /* { */ + /* good_packets++; */ + /* next0 = check_adj_port_range_x1 */ + /* (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); */ + /* good_packets -= (save_next0 != next0); */ + /* b0->error = error_node->errors */ + /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */ + /* } */ + + /* if (PREDICT_TRUE (pass1 == 0)) */ + /* { */ + /* good_packets++; */ + /* next1 = check_adj_port_range_x1 */ + /* (adj1, clib_net_to_host_u16 (udp1->dst_port), next1); */ + /* good_packets -= (save_next1 != next1); */ + /* b1->error = error_node->errors */ + /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */ + /* } */ + + /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */ + /* && (b0->flags & VLIB_BUFFER_IS_TRACED))) */ + /* { */ + /* ip4_source_and_port_range_check_trace_t *t = */ + /* vlib_add_trace (vm, node, b0, sizeof (*t)); */ + /* t->pass = next0 == save_next0; */ + /* t->bypass = pass0; */ + /* t->fib_index = fib_index0; */ + /* t->src_addr.as_u32 = ip0->src_address.as_u32; */ + /* t->port = (pass0 == 0) ? */ + /* clib_net_to_host_u16 (udp0->dst_port) : 0; */ + /* t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; */ + /* } */ + + /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */ + /* && (b1->flags & VLIB_BUFFER_IS_TRACED))) */ + /* { */ + /* ip4_source_and_port_range_check_trace_t *t = */ + /* vlib_add_trace (vm, node, b1, sizeof (*t)); */ + /* t->pass = next1 == save_next1; */ + /* t->bypass = pass1; */ + /* t->fib_index = fib_index1; */ + /* t->src_addr.as_u32 = ip1->src_address.as_u32; */ + /* t->port = (pass1 == 0) ? */ + /* clib_net_to_host_u16 (udp1->dst_port) : 0; */ + /* t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP; */ + /* } */ + + /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */ + /* to_next, n_left_to_next, */ + /* bi0, bi1, next0, next1); */ + /* } */ while (n_left_from > 0 && n_left_to_next > 0) { vlib_buffer_t *b0; ip4_header_t *ip0; - ip4_fib_mtrie_t *mtrie0; - ip4_fib_mtrie_leaf_t leaf0; ip_source_and_port_range_check_config_t *c0; - ip_adjacency_t *adj0 = 0; - u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; + u32 bi0, next0, lb_index0, pass0, save_next0, fib_index0; udp_header_t *udp0; + const protocol_port_range_dpo_t *ppr_dpo0 = NULL; + const dpo_id_t *dpo; bi0 = from[0]; to_next[0] = bi0; @@ -476,35 +490,25 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm, if (fib_index0 != ~0) { + lb_index0 = ip4_fib_forwarding_lookup (fib_index0, + &ip0->src_address); - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; - - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 0); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 2); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 3); + dpo = + load_balance_get_bucket_i (load_balance_get (lb_index0), 0); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - - ASSERT (adj_index0 == ip4_fib_lookup_with_table - (im, fib_index0, - &ip0->src_address, 0 /* use default route */ )); - adj0 = ip_get_adjacency (lm, adj_index0); + if (ppr_dpo_type == dpo->dpoi_type) + { + ppr_dpo0 = protocol_port_range_dpo_get (dpo->dpoi_index); + } + /* + * else the lookup hit an enty that was no inserted + * by this range checker, which is the default route + */ } /* * $$$ which (src,dst) categories should we always pass? */ pass0 = 0; - pass0 |= adj0 == 0; pass0 |= ip4_address_is_multicast (&ip0->src_address); pass0 |= ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); @@ -518,7 +522,7 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm, { good_packets++; next0 = check_adj_port_range_x1 - (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); + (ppr_dpo0, clib_net_to_host_u16 (udp0->dst_port), next0); good_packets -= (save_next0 != next0); b0->error = error_node->errors [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; @@ -558,6 +562,7 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm, IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK, good_packets); return frame->n_vectors; + return 0; } static uword @@ -786,209 +791,299 @@ VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, /* *INDENT-ON* */ static u8 * -format_source_and_port_rc_adjacency (u8 * s, va_list * args) +format_ppr_dpo (u8 * s, va_list * args) { - CLIB_UNUSED (vnet_main_t * vnm) = va_arg (*args, vnet_main_t *); - ip_lookup_main_t *lm = va_arg (*args, ip_lookup_main_t *); - u32 adj_index = va_arg (*args, u32); - ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index); - source_range_check_main_t *srm = &source_range_check_main; - u8 *rwh = (u8 *) (&adj->rewrite_header); - protocol_port_range_t *range; + index_t index = va_arg (args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (args, u32); + + protocol_port_range_dpo_t *ppr_dpo; int i, j; int printed = 0; - range = (protocol_port_range_t *) rwh; + ppr_dpo = protocol_port_range_dpo_get (index); s = format (s, "allow "); - for (i = 0; i < srm->ranges_per_adjacency; i++) + for (i = 0; i < ppr_dpo->n_used_blocks; i++) { for (j = 0; j < 8; j++) { - if (range->low.as_u16[j]) + if (ppr_dpo->blocks[i].low.as_u16[j]) { if (printed) s = format (s, ", "); - if (range->hi.as_u16[j] > (range->low.as_u16[j] + 1)) - s = format (s, "%d-%d", (u32) range->low.as_u16[j], - (u32) range->hi.as_u16[j] - 1); + if (ppr_dpo->blocks[i].hi.as_u16[j] > + (ppr_dpo->blocks[i].low.as_u16[j] + 1)) + s = + format (s, "%d-%d", (u32) ppr_dpo->blocks[i].low.as_u16[j], + (u32) ppr_dpo->blocks[i].hi.as_u16[j] - 1); else - s = format (s, "%d", range->low.as_u16[j]); + s = format (s, "%d", ppr_dpo->blocks[i].low.as_u16[j]); printed = 1; } } - range++; } return s; } +static void +ppr_dpo_lock (dpo_id_t * dpo) +{ +} + +static void +ppr_dpo_unlock (dpo_id_t * dpo) +{ +} + +const static dpo_vft_t ppr_vft = { + .dv_lock = ppr_dpo_lock, + .dv_unlock = ppr_dpo_unlock, + .dv_format = format_ppr_dpo, +}; + +const static char *const ppr_ip4_nodes[] = { + "ip4-source-and-port-range-check-rx", + NULL, +}; + +const static char *const *const ppr_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = ppr_ip4_nodes, +}; + clib_error_t * ip4_source_and_port_range_check_init (vlib_main_t * vm) { source_range_check_main_t *srm = &source_range_check_main; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; srm->vlib_main = vm; srm->vnet_main = vnet_get_main (); - srm->ranges_per_adjacency = - VLIB_BUFFER_PRE_DATA_SIZE / (2 * sizeof (u16x8)); - srm->special_adjacency_format_function_index = - vnet_register_special_adjacency_format_function (lm, - format_source_and_port_rc_adjacency); - ASSERT (srm->special_adjacency_format_function_index); + ppr_dpo_type = dpo_register_new_type (&ppr_vft, ppr_nodes); return 0; } VLIB_INIT_FUNCTION (ip4_source_and_port_range_check_init); -int -add_port_range_adjacency (ip4_address_t * address, - u32 length, - u32 adj_index, - u16 * low_ports, u16 * high_ports, u32 fib_index) +protocol_port_range_dpo_t * +protocol_port_range_dpo_alloc (void) { - ip_adjacency_t *adj; - int i, j, k; - source_range_check_main_t *srm = &source_range_check_main; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; - protocol_port_range_t *range; - u8 *rwh; + protocol_port_range_dpo_t *ppr_dpo; - adj = ip_get_adjacency (lm, adj_index); - /* $$$$ fixme: add ports if address + mask match */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ICMP_ERROR) - return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + pool_get_aligned (ppr_dpo_pool, ppr_dpo, CLIB_CACHE_LINE_BYTES); + memset (ppr_dpo, 0, sizeof (*ppr_dpo)); - ip_adjacency_t template_adj; - ip4_add_del_route_args_t a; + ppr_dpo->n_free_ranges = N_PORT_RANGES_PER_DPO; - memset (&template_adj, 0, sizeof (template_adj)); + return (ppr_dpo); +} - template_adj.lookup_next_index = IP_LOOKUP_NEXT_ICMP_ERROR; - template_adj.if_address_index = ~0; - template_adj.special_adjacency_format_function_index = - srm->special_adjacency_format_function_index; - rwh = (u8 *) (&template_adj.rewrite_header); +static int +add_port_range_adjacency (u32 fib_index, + ip4_address_t * address, + u32 length, u16 * low_ports, u16 * high_ports) +{ + protocol_port_range_dpo_t *ppr_dpo; + dpo_id_t dpop = DPO_NULL; + int i, j, k; - range = (protocol_port_range_t *) rwh; + fib_node_index_t fei; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = length, + .fp_addr = { + .ip4 = *address, + }, + }; + + /* + * check to see if we have already sourced this prefix + */ + fei = fib_table_lookup_exact_match (fib_index, &pfx); + + if (FIB_NODE_INDEX_INVALID == fei) + { + /* + * this is a first time add for this prefix. + */ + ppr_dpo = protocol_port_range_dpo_alloc (); + } + else + { + /* + * the prefix is already there. + * check it was sourced by us, and if so get the ragne DPO from it. + */ + dpo_id_t dpo = DPO_NULL; + const dpo_id_t *bucket; + + if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo)) + { + /* + * there is existing state. we'll want to add the new ranges to it + */ + bucket = + load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0); + ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index); + dpo_reset (&dpo); + } + else + { + /* + * there is no PPR state associated with this prefix, + * so we'll need a new DPO + */ + ppr_dpo = protocol_port_range_dpo_alloc (); + } + } - if (vec_len (low_ports) > 8 * srm->ranges_per_adjacency) + if (vec_len (low_ports) > ppr_dpo->n_free_ranges) return VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY; j = k = 0; for (i = 0; i < vec_len (low_ports); i++) { - for (; j < srm->ranges_per_adjacency; j++) + for (; j < N_BLOCKS_PER_DPO; j++) { for (; k < 8; k++) { - if (range->low.as_u16[k] == 0) + if (ppr_dpo->blocks[j].low.as_u16[k] == 0) { - range->low.as_u16[k] = low_ports[i]; - range->hi.as_u16[k] = high_ports[i]; - k++; - if (k == 7) - { - k = 0; - j++; - } - goto doublebreak2; + ppr_dpo->blocks[j].low.as_u16[k] = low_ports[i]; + ppr_dpo->blocks[j].hi.as_u16[k] = high_ports[i]; + goto doublebreak; } } - k = 0; - range++; } - j = 0; - /* Too many ports specified... */ - return VNET_API_ERROR_EXCEEDED_NUMBER_OF_PORTS_CAPACITY; - - doublebreak2:; + doublebreak:; } + ppr_dpo->n_used_blocks = j + 1; - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_FIB_INDEX; - a.table_index_or_table_id = fib_index; - a.dst_address = address[0]; - a.dst_address_length = length; - a.add_adj = &template_adj; - a.n_add_adj = 1; + /* + * add or update the entry in the FIB + */ + dpo_set (&dpop, ppr_dpo_type, DPO_PROTO_IP4, (ppr_dpo - ppr_dpo_pool)); + + if (FIB_NODE_INDEX_INVALID == fei) + { + fib_table_entry_special_dpo_add (fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_NONE, &dpop); + } + else + { + fib_table_entry_special_dpo_update (fei, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_NONE, &dpop); + } - ip4_add_del_route (im, &a); return 0; } -int -remove_port_range_adjacency (ip4_address_t * address, - u32 length, - u32 adj_index, - u16 * low_ports, u16 * high_ports, u32 fib_index) +static int +remove_port_range_adjacency (u32 fib_index, + ip4_address_t * address, + u32 length, u16 * low_ports, u16 * high_ports) { - ip_adjacency_t *adj; + protocol_port_range_dpo_t *ppr_dpo; + fib_node_index_t fei; int i, j, k; - source_range_check_main_t *srm = &source_range_check_main; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; - protocol_port_range_t *range; - u8 *rwh; - adj = ip_get_adjacency (lm, adj_index); - if (adj->lookup_next_index != IP_LOOKUP_NEXT_ICMP_ERROR) /* _ICMP_ERROR is a dummy placeholder */ - return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = length, + .fp_addr = { + .ip4 = *address, + }, + }; + + /* + * check to see if we have sourced this prefix + */ + fei = fib_table_lookup_exact_match (fib_index, &pfx); - rwh = (u8 *) (&adj->rewrite_header); + if (FIB_NODE_INDEX_INVALID == fei) + { + /* + * not one of ours + */ + return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + } + else + { + /* + * the prefix is already there. + * check it was sourced by us + */ + dpo_id_t dpo = DPO_NULL; + const dpo_id_t *bucket; + + if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo)) + { + /* + * there is existing state. we'll want to add the new ranges to it + */ + bucket = + load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0); + ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index); + dpo_reset (&dpo); + } + else + { + /* + * not one of ours + */ + return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + } + } for (i = 0; i < vec_len (low_ports); i++) { - range = (protocol_port_range_t *) rwh; - for (j = 0; j < srm->ranges_per_adjacency; j++) + for (j = 0; j < N_BLOCKS_PER_DPO; j++) { for (k = 0; k < 8; k++) { - if (low_ports[i] == range->low.as_u16[k] && - high_ports[i] == range->hi.as_u16[k]) + if (low_ports[i] == ppr_dpo->blocks[j].low.as_u16[k] && + high_ports[i] == ppr_dpo->blocks[j].hi.as_u16[k]) { - range->low.as_u16[k] = range->hi.as_u16[k] = 0; + ppr_dpo->blocks[j].low.as_u16[k] = + ppr_dpo->blocks[j].hi.as_u16[k] = 0; goto doublebreak; } } - range++; } doublebreak:; } - range = (protocol_port_range_t *) rwh; + ppr_dpo->n_free_ranges = 0; + /* Have we deleted all ranges yet? */ - for (i = 0; i < srm->ranges_per_adjacency; i++) + for (i = 0; i < N_BLOCKS_PER_DPO; i++) { for (j = 0; j < 8; j++) { - if (range->low.as_u16[i] != 0) - goto still_occupied; + if (ppr_dpo->blocks[j].low.as_u16[i] == 0) + ppr_dpo->n_free_ranges++; } - range++; } - /* Yes, lose the adjacency... */ - { - ip4_add_del_route_args_t a; - - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; - a.table_index_or_table_id = fib_index; - a.dst_address = address[0]; - a.dst_address_length = length; - a.adj_index = adj_index; - ip4_add_del_route (im, &a); - } - -still_occupied: - ; + + if (N_PORT_RANGES_PER_DPO == ppr_dpo->n_free_ranges) + { + /* Yes, lose the adjacency... */ + fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_SPECIAL); + } + else + { + /* + * compact the ranges down to a contiguous block + */ + // FIXME. TODO. + } + return 0; } @@ -1010,35 +1105,19 @@ ip4_source_and_port_range_check_add_del (ip4_address_t * address, u16 * low_ports, u16 * high_ports, int is_add) { - - ip4_main_t *im = &ip4_main; - // ip_lookup_main_t * lm = &im->lookup_main; - uword *p; u32 fib_index; - u32 adj_index; - - p = hash_get (im->fib_index_by_table_id, vrf_id); - if (!p) - { - ip4_fib_t *f; - f = find_ip4_fib_by_table_index_or_id (im, vrf_id, 0 /* flags */ ); - fib_index = f->index; - } - else - fib_index = p[0]; - adj_index = ip4_fib_lookup_with_table - (im, fib_index, address, 0 /* disable_default_route */ ); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id); if (is_add == 0) { - remove_port_range_adjacency (address, length, adj_index, low_ports, - high_ports, fib_index); + remove_port_range_adjacency (fib_index, address, length, + low_ports, high_ports); } else { - add_port_range_adjacency (address, length, adj_index, low_ports, - high_ports, fib_index); + add_port_range_adjacency (fib_index, address, length, + low_ports, high_ports); } return 0; @@ -1159,24 +1238,20 @@ show_source_and_port_range_check_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - source_range_check_main_t *srm = &source_range_check_main; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; - protocol_port_range_t *range; + protocol_port_range_dpo_t *ppr_dpo; u32 fib_index; - ip4_address_t addr; u8 addr_set = 0; u32 vrf_id = ~0; int rv, i, j; - u32 adj_index; - ip_adjacency_t *adj; u32 port = 0; - u8 *rwh; - uword *p; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + }; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "%U", unformat_ip4_address, &addr)) + if (unformat (input, "%U", unformat_ip4_address, &pfx.fp_addr.ip4)) addr_set = 1; else if (unformat (input, "vrf %d", &vrf_id)) ; @@ -1192,51 +1267,58 @@ show_source_and_port_range_check_fn (vlib_main_t * vm, if (vrf_id == ~0) return clib_error_return (0, "VRF ID required, not specified"); - p = hash_get (im->fib_index_by_table_id, vrf_id); - if (p == 0) + fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); + if (~0 == fib_index) return clib_error_return (0, "VRF %d not found", vrf_id); - fib_index = p[0]; - adj_index = ip4_fib_lookup_with_table - (im, fib_index, &addr, 0 /* disable_default_route */ ); + /* + * find the longest prefix match on the address requested, + * check it was sourced by us + */ + dpo_id_t dpo = DPO_NULL; + const dpo_id_t *bucket; - adj = ip_get_adjacency (lm, adj_index); - - if (adj->lookup_next_index != IP_LOOKUP_NEXT_ICMP_ERROR) + if (!fib_entry_get_dpo_for_source (fib_table_lookup (fib_index, &pfx), + FIB_SOURCE_SPECIAL, &dpo)) { - vlib_cli_output (vm, "%U: src address drop", format_ip4_address, &addr); + /* + * not one of ours + */ + vlib_cli_output (vm, "%U: src address drop", format_ip4_address, + &pfx.fp_addr.ip4); return 0; } + bucket = load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0); + ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index); + dpo_reset (&dpo); + if (port) { - rv = check_adj_port_range_x1 (adj, (u16) port, 1234); + rv = check_adj_port_range_x1 (ppr_dpo, (u16) port, 1234); if (rv == 1234) vlib_cli_output (vm, "%U port %d PASS", format_ip4_address, - &addr, port); + &pfx.fp_addr.ip4, port); else vlib_cli_output (vm, "%U port %d FAIL", format_ip4_address, - &addr, port); + &pfx.fp_addr.ip4, port); return 0; } else { u8 *s; - rwh = (u8 *) (&adj->rewrite_header); - - s = format (0, "%U: ", format_ip4_address, &addr); - range = (protocol_port_range_t *) rwh; + s = format (0, "%U: ", format_ip4_address, &pfx.fp_addr.ip4); - for (i = 0; i < srm->ranges_per_adjacency; i++) + for (i = 0; i < N_BLOCKS_PER_DPO; i++) { for (j = 0; j < 8; j++) { - if (range->low.as_u16[j]) - s = format (s, "%d - %d ", (u32) range->low.as_u16[j], - (u32) range->hi.as_u16[j]); + if (ppr_dpo->blocks[i].low.as_u16[j]) + s = format (s, "%d - %d ", + (u32) ppr_dpo->blocks[i].low.as_u16[j], + (u32) ppr_dpo->blocks[i].hi.as_u16[j]); } - range++; } vlib_cli_output (vm, "%s", s); vec_free (s); diff --git a/vnet/vnet/ip/ip4_source_check.c b/vnet/vnet/ip/ip4_source_check.c index 1f8e7214..2323ac29 100644 --- a/vnet/vnet/ip/ip4_source_check.c +++ b/vnet/vnet/ip/ip4_source_check.c @@ -38,6 +38,8 @@ */ #include <vnet/ip/ip.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/dpo/load_balance.h> typedef struct { u8 packet_data[64]; @@ -110,9 +112,12 @@ ip4_source_check_inline (vlib_main_t * vm, ip4_fib_mtrie_t * mtrie0, * mtrie1; ip4_fib_mtrie_leaf_t leaf0, leaf1; ip4_source_check_config_t * c0, * c1; - ip_adjacency_t * adj0, * adj1; - u32 pi0, next0, pass0, adj_index0; - u32 pi1, next1, pass1, adj_index1; + const load_balance_t * lb0, * lb1; + u32 pi0, next0, pass0, lb_index0; + u32 pi1, next1, pass1, lb_index1; + const ip_adjacency_t *adj0, *adj1; + const dpo_id_t *dpo0, *dpo1; + u32 ii0, ii1; /* Prefetch next iteration. */ { @@ -150,8 +155,8 @@ ip4_source_check_inline (vlib_main_t * vm, &next1, sizeof (c1[0])); - mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie; - mtrie1 = &vec_elt_at_index (im->fibs, c1->fib_index)->mtrie; + mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; + mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie; leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -167,29 +172,70 @@ ip4_source_check_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index, - &ip0->src_address, - c0->no_default_route)); - ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, c1->fib_index, - &ip1->src_address, - c1->no_default_route)); - - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); + lb0 = load_balance_get(lb_index0); + lb1 = load_balance_get(lb_index1); /* Pass multicast. */ pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF); pass1 = ip4_address_is_multicast (&ip1->src_address) || ip1->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF); - pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE - && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY - || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index)); - pass1 |= (adj1->lookup_next_index == IP_LOOKUP_NEXT_REWRITE - && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY - || vnet_buffer (p1)->sw_if_index[VLIB_RX] == adj1->rewrite_header.sw_if_index)); + if (PREDICT_TRUE(1 == lb0->lb_n_buckets)) + { + dpo0 = load_balance_get_bucket_i(lb0, 0); + if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY)) + { + pass0 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj0 = adj_get(dpo0->dpoi_index); + pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] == + adj0->rewrite_header.sw_if_index); + } + } + else + { + for (ii0 = 0; ii0 < lb0->lb_n_buckets && !pass0; ii0++) + { + dpo0 = load_balance_get_bucket_i(lb0, ii0); + if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY)) + { + pass0 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj0 = adj_get(dpo0->dpoi_index); + pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] == + adj0->rewrite_header.sw_if_index); + } + } + } + if (PREDICT_TRUE(1 == lb1->lb_n_buckets)) + { + dpo1 = load_balance_get_bucket_i(lb1, 0); + if (PREDICT_TRUE(dpo1->dpoi_type == DPO_ADJACENCY)) + { + pass1 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj1 = adj_get(dpo1->dpoi_index); + pass1 |= (vnet_buffer (p1)->sw_if_index[VLIB_RX] == + adj1->rewrite_header.sw_if_index); + } + } + else + { + for (ii1 = 0; ii1 < lb1->lb_n_buckets && !pass1; ii1++) + { + dpo1 = load_balance_get_bucket_i(lb1, ii1); + if (PREDICT_TRUE(dpo1->dpoi_type == DPO_ADJACENCY)) + { + pass1 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj1 = adj_get(dpo1->dpoi_index); + pass1 |= (vnet_buffer (p1)->sw_if_index[VLIB_RX] == + adj1->rewrite_header.sw_if_index); + } + } + } next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP); next1 = (pass1 ? next1 : IP4_SOURCE_CHECK_NEXT_DROP); @@ -210,7 +256,10 @@ ip4_source_check_inline (vlib_main_t * vm, ip4_fib_mtrie_leaf_t leaf0; ip4_source_check_config_t * c0; ip_adjacency_t * adj0; - u32 pi0, next0, pass0, adj_index0; + u32 pi0, next0, pass0, lb_index0; + const load_balance_t * lb0; + const dpo_id_t *dpo0; + u32 ii0; pi0 = from[0]; to_next[0] = pi0; @@ -227,7 +276,7 @@ ip4_source_check_inline (vlib_main_t * vm, &next0, sizeof (c0[0])); - mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie; + mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -239,19 +288,40 @@ ip4_source_check_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index, - &ip0->src_address, - c0->no_default_route)); - adj0 = ip_get_adjacency (lm, adj_index0); + lb0 = load_balance_get(lb_index0); /* Pass multicast. */ pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF); - pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE - && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY - || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index)); + if (PREDICT_TRUE(1 == lb0->lb_n_buckets)) + { + dpo0 = load_balance_get_bucket_i(lb0, 0); + if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY)) + { + pass0 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj0 = adj_get(dpo0->dpoi_index); + pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] == + adj0->rewrite_header.sw_if_index); + } + } + else + { + for (ii0 = 0; ii0 < lb0->lb_n_buckets && !pass0; ii0++) + { + dpo0 = load_balance_get_bucket_i(lb0, ii0); + if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY)) + { + pass0 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj0 = adj_get(dpo0->dpoi_index); + pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] == + adj0->rewrite_header.sw_if_index); + } + } + } next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP); p0->error = error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS]; diff --git a/vnet/vnet/ip/ip4_test.c b/vnet/vnet/ip/ip4_test.c index ff088e78..b76a719f 100644 --- a/vnet/vnet/ip/ip4_test.c +++ b/vnet/vnet/ip/ip4_test.c @@ -142,7 +142,7 @@ thrash (vlib_main_t * vm, } /* Find or create FIB table 11 */ - fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID); + fib = ip4_fib_find_or_create_fib_by_table_id (table_id); for (i = tm->test_interfaces_created; i < ninterfaces; i++) { @@ -164,6 +164,7 @@ thrash (vlib_main_t * vm, hw = vnet_get_hw_interface (vnm, hw_if_index); vec_validate (im->fib_index_by_sw_if_index, hw->sw_if_index); im->fib_index_by_sw_if_index[hw->sw_if_index] = fib->index; + ip4_sw_interface_enable_disable(sw_if_index, 1); } tm->test_interfaces_created = ninterfaces; diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h index f5f3de84..36be6494 100644 --- a/vnet/vnet/ip/ip6.h +++ b/vnet/vnet/ip/ip6.h @@ -71,27 +71,11 @@ typedef struct { u32 index; /* flow hash configuration */ - u32 flow_hash_config; + flow_hash_config_t flow_hash_config; } ip6_fib_t; struct ip6_main_t; -typedef void (ip6_add_del_route_function_t) - (struct ip6_main_t * im, - uword opaque, - ip6_fib_t * fib, - u32 flags, - ip6_address_t * address, - u32 address_length, - void * old_result, - void * new_result); - -typedef struct { - ip6_add_del_route_function_t * function; - uword required_flags; - uword function_opaque; -} ip6_add_del_route_callback_t; - typedef void (ip6_add_del_interface_address_function_t) (struct ip6_main_t * im, uword opaque, @@ -106,31 +90,63 @@ typedef struct { uword function_opaque; } ip6_add_del_interface_address_callback_t; -typedef struct ip6_main_t { - BVT(clib_bihash) ip6_lookup_table; +/** + * Enumeration of the FIB table instance types + */ +typedef enum ip6_fib_table_instance_type_t_ { + /** + * This table stores the routes that are used to forward traffic. + * The key is the prefix, the result the adjacnecy to forward on. + */ + IP6_FIB_TABLE_FWDING, + /** + * The table that stores ALL routes learned by the DP. + * Some of these routes may not be ready to install in forwarding + * at a given time. + * The key in this table is the prefix, the result is the fib_entry_t + */ + IP6_FIB_TABLE_NON_FWDING, +} ip6_fib_table_instance_type_t; + +#define IP6_FIB_NUM_TABLES (IP6_FIB_TABLE_NON_FWDING+1) - ip_lookup_main_t lookup_main; +/** + * A represenation of a single IP6 table + */ +typedef struct ip6_fib_table_instance_t_ { + /* The hash table */ + BVT(clib_bihash) ip6_hash; /* bitmap / refcounts / vector of mask widths to search */ uword * non_empty_dst_address_length_bitmap; u8 * prefix_lengths_in_search_order; i32 dst_address_length_refcounts[129]; +} ip6_fib_table_instance_t; + +typedef struct ip6_main_t { + /** + * The two FIB tables; fwding and non-fwding + */ + ip6_fib_table_instance_t ip6_table[IP6_FIB_NUM_TABLES]; + + ip_lookup_main_t lookup_main; - /* Vector of FIBs. */ - ip6_fib_t * fibs; + /* Pool of FIBs. */ + struct fib_table_t_ * fibs; + /* Network byte orders subnet mask for each prefix length */ ip6_address_t fib_masks[129]; /* Table index indexed by software interface. */ u32 * fib_index_by_sw_if_index; + /* IP6 enabled count by software interface */ + u8 * ip_enabled_by_sw_if_index; + /* Hash table mapping table id to fib index. ID space is not necessarily dense; index space is dense. */ uword * fib_index_by_table_id; - /* Vector of functions to call when routes are added/deleted. */ - ip6_add_del_route_callback_t * add_del_route_callbacks; - /* Hash table mapping interface rewrite adjacency index by sw if index. */ uword * interface_route_adj_index_by_sw_if_index; @@ -156,8 +172,10 @@ typedef struct ip6_main_t { u32 ip6_unicast_rx_feature_l2tp_decap; u32 ip6_unicast_rx_feature_vpath; u32 ip6_unicast_rx_feature_lookup; + u32 ip6_unicast_rx_feature_drop; /* Built-in multicast feature path indices */ + u32 ip6_multicast_rx_feature_drop; u32 ip6_multicast_rx_feature_vpath; u32 ip6_multicast_rx_feature_lookup; @@ -226,6 +244,8 @@ extern vlib_node_registration_t ip6_input_node; extern vlib_node_registration_t ip6_rewrite_node; extern vlib_node_registration_t ip6_rewrite_local_node; extern vlib_node_registration_t ip6_discover_neighbor_node; +extern vlib_node_registration_t ip6_glean_node; +extern vlib_node_registration_t ip6_midchain_node; extern vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node; @@ -242,40 +262,10 @@ typedef union { } up_down_event; } ip6_icmp_neighbor_discovery_event_data_t; -u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst); -u32 ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, - ip6_address_t * dst); - -/** - * \brief Get or create an IPv6 fib. - * - * Get or create an IPv6 fib with the provided fib ID or index. - * The fib ID is a possibly-sparse user-defined value while - * the fib index defines the position of the fib in the fib vector. - * - * \param im - * ip6_main pointer. - * \param table_index_or_id - * The table index if \c IP6_ROUTE_FLAG_FIB_INDEX bit is set in \p flags. - * Otherwise, when set to \c ~0, an arbitrary and unused fib ID is picked - * and can be retrieved with \c ret->table_id. - * Otherwise, it is the fib ID to be used to retrieve or create the desired fib. - * \param flags - * Indicates whether \p table_index_or_id is the fib index or ID. - * When the bit \c IP6_ROUTE_FLAG_FIB_INDEX is set, \p table_index_or_id - * is considered as the fib index, and the fib ID otherwise. - * \return A pointer to the retrieved or created fib. - * - * \remark When getting a fib with the fib index, the fib MUST already exist. - */ -ip6_fib_t * find_ip6_fib_by_table_index_or_id (ip6_main_t * im, - u32 table_index_or_id, - u32 flags); - always_inline uword -ip6_destination_matches_route (ip6_main_t * im, - ip6_address_t * key, - ip6_address_t * dest, +ip6_destination_matches_route (const ip6_main_t * im, + const ip6_address_t * key, + const ip6_address_t * dest, uword dest_length) { int i; @@ -313,25 +303,26 @@ ip6_unaligned_destination_matches_route (ip6_main_t * im, } always_inline int -ip6_src_address_for_packet (ip6_main_t * im, vlib_buffer_t * p, ip6_address_t * src, u32 sw_if_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index); - if (ia == NULL) - return -1; - ip6_address_t * a = ip_interface_address_get_address (lm, ia); - *src = a[0]; - return 0; -} - -always_inline u32 -ip6_src_lookup_for_packet (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i) +ip6_src_address_for_packet (ip_lookup_main_t * lm, + u32 sw_if_index, + ip6_address_t * src) { - if (vnet_buffer (b)->ip.adj_index[VLIB_RX] == ~0) - vnet_buffer (b)->ip.adj_index[VLIB_RX] - = ip6_fib_lookup (im, vnet_buffer (b)->sw_if_index[VLIB_RX], - &i->src_address); - return vnet_buffer (b)->ip.adj_index[VLIB_RX]; + u32 if_add_index = + lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + if (PREDICT_TRUE(if_add_index != ~0)) { + ip_interface_address_t *if_add = + pool_elt_at_index(lm->if_address_pool, if_add_index); + ip6_address_t *if_ip = + ip_interface_address_get_address(lm, if_add); + *src = *if_ip; + return (0); + } + else + { + src->as_u64[0] = 0; + src->as_u64[1] = 0; + } + return (!0); } /* Find interface address which matches destination. */ @@ -362,95 +353,12 @@ clib_error_t * ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, ip6_address_t * address, u32 address_length, u32 is_del); +void +ip6_sw_interface_enable_disable (u32 sw_if_index, + u32 is_enable); int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2); -/* Add/del a route to the FIB. */ - -#define IP6_ROUTE_FLAG_ADD (0 << 0) -#define IP6_ROUTE_FLAG_DEL (1 << 0) -#define IP6_ROUTE_FLAG_TABLE_ID (0 << 1) -#define IP6_ROUTE_FLAG_FIB_INDEX (1 << 1) -#define IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2) -#define IP6_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3) -#define IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4) -/* Dynamic route created via neighbor discovery. */ -#define IP6_ROUTE_FLAG_NEIGHBOR (1 << 5) - -typedef struct { - /* IP6_ROUTE_FLAG_* */ - u32 flags; - - /* Either index of fib or table_id to hash and get fib. - IP6_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */ - u32 table_index_or_table_id; - - /* Destination address (prefix) and length. */ - ip6_address_t dst_address; - u32 dst_address_length; - - /* Adjacency to use for this destination. */ - u32 adj_index; - - /* If specified adjacencies to add and then - use for this destination. add_adj/n_add_adj - are override adj_index if specified. */ - ip_adjacency_t * add_adj; - u32 n_add_adj; -} ip6_add_del_route_args_t; - -void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * args); - -void ip6_add_del_route_next_hop (ip6_main_t * im, - u32 flags, - ip6_address_t * dst_address, - u32 dst_address_length, - ip6_address_t * next_hop, - u32 next_hop_sw_if_index, - u32 next_hop_weight, u32 adj_index, - u32 explicit_fib_index); - -u32 -ip6_route_get_next_hop_adj (ip6_main_t * im, - u32 fib_index, - ip6_address_t *next_hop, - u32 next_hop_sw_if_index, - u32 explicit_fib_index); - -u32 -ip6_get_route (ip6_main_t * im, - u32 fib_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length); - -void -ip6_foreach_matching_route (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length, - ip6_address_t ** results, - u8 ** result_length); - -void ip6_delete_matching_routes (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length); - -void ip6_maybe_remap_adjacencies (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags); - -void ip6_adjacency_set_interface_route (vnet_main_t * vnm, - ip_adjacency_t * adj, - u32 sw_if_index, - u32 if_address_index); - -u32 -vnet_ip6_neighbor_glean_add(u32 fib_index, void * next_hop_arg); - clib_error_t * ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index); @@ -481,8 +389,6 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, ip6_address_t * a, u8 * link_layer_address, uword n_bytes_link_layer_address); -void -vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index); void ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip, @@ -492,7 +398,8 @@ void ip6_ethernet_mac_address_from_link_local_address (u8 *mac, ip6_address_t *ip); -int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config); +int vnet_set_ip6_flow_hash (u32 table_id, + flow_hash_config_t flow_hash_config); int ip6_neighbor_ra_config(vlib_main_t * vm, u32 sw_if_index, @@ -560,7 +467,8 @@ extern vlib_node_registration_t ip6_lookup_node; /* Compute flow hash. We'll use it to select which Sponge to use for this flow. And other things. */ always_inline u32 -ip6_compute_flow_hash (ip6_header_t * ip, u32 flow_hash_config) +ip6_compute_flow_hash (const ip6_header_t * ip, + flow_hash_config_t flow_hash_config) { tcp_header_t * tcp = (void *) (ip + 1); u64 a, b, c; diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index c9779602..f7514dc3 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -42,668 +42,13 @@ #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ #include <vnet/srp/srp.h> /* for srp_hw_interface_class */ #include <vppinfra/cache.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/classify_dpo.h> #include <vppinfra/bihash_template.c> -static void compute_prefix_lengths_in_search_order (ip6_main_t * im) -{ - int i; - vec_reset_length (im->prefix_lengths_in_search_order); - /* Note: bitmap reversed so this is in fact a longest prefix match */ - clib_bitmap_foreach (i, im->non_empty_dst_address_length_bitmap, - ({ - int dst_address_length = 128 - i; - vec_add1 (im->prefix_lengths_in_search_order, dst_address_length); - })); -} - -u32 -ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, ip6_address_t * dst) -{ - ip_lookup_main_t * lm = &im->lookup_main; - int i, len; - int rv; - BVT(clib_bihash_kv) kv, value; - u64 fib; - - len = vec_len (im->prefix_lengths_in_search_order); - - kv.key[0] = dst->as_u64[0]; - kv.key[1] = dst->as_u64[1]; - fib = ((u64)((fib_index))<<32); - - for (i = 0; i < len; i++) - { - int dst_address_length = im->prefix_lengths_in_search_order[i]; - ip6_address_t * mask = &im->fib_masks[dst_address_length]; - - ASSERT(dst_address_length >= 0 && dst_address_length <= 128); - //As lengths are decreasing, masks are increasingly specific. - kv.key[0] &= mask->as_u64[0]; - kv.key[1] &= mask->as_u64[1]; - kv.key[2] = fib | dst_address_length; - - rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value); - if (rv == 0) - return value.value; - } - - return lm->miss_adj_index; -} - -u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst) -{ - u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - return ip6_fib_lookup_with_table (im, fib_index, dst); -} - -void -vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip6_add_del_route_args_t a; - ip_adjacency_t * adj; - - memset(&a, 0x0, sizeof(ip6_add_del_route_args_t)); - - a.table_index_or_table_id = fib_index; - a.flags = (IP6_ROUTE_FLAG_ADD - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); - - /* Add ff02::1:ff00:0/104 via local route for all tables. - This is required for neighbor discovery to work. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_solicited_node_multicast_address (&a.dst_address, 0); - - a.dst_address_length = 104; - ip6_add_del_route (im, &a); - - /* Add all-routers multicast address via local route for all tables */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_reserved_multicast_address (&a.dst_address, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_all_routers); - - a.dst_address_length = 128; - ip6_add_del_route (im, &a); - - /* Add all-nodes multicast address via local route for all tables */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_reserved_multicast_address (&a.dst_address, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_all_hosts); - - a.dst_address_length = 128; - ip6_add_del_route (im, &a); - - /* Add all-mldv2 multicast address via local route for all tables */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_reserved_multicast_address (&a.dst_address, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_mldv2_routers); - - a.dst_address_length = 128; - ip6_add_del_route (im, &a); -} - -static ip6_fib_t * -create_fib_with_table_id (ip6_main_t * im, u32 table_id) -{ - ip6_fib_t * fib; - hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs)); - vec_add2 (im->fibs, fib, 1); - fib->table_id = table_id; - fib->index = fib - im->fibs; - fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; - vnet_ip6_fib_init (im, fib->index); - return fib; -} - -ip6_fib_t * -find_ip6_fib_by_table_index_or_id (ip6_main_t * im, u32 table_index_or_id, u32 flags) -{ - uword * p, fib_index; - - fib_index = table_index_or_id; - if (! (flags & IP6_ROUTE_FLAG_FIB_INDEX)) - { - if (table_index_or_id == ~0) { - table_index_or_id = 0; - while (hash_get (im->fib_index_by_table_id, table_index_or_id)) { - table_index_or_id++; - } - return create_fib_with_table_id (im, table_index_or_id); - } - - p = hash_get (im->fib_index_by_table_id, table_index_or_id); - if (! p) - return create_fib_with_table_id (im, table_index_or_id); - fib_index = p[0]; - } - return vec_elt_at_index (im->fibs, fib_index); -} - -void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * a) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip6_fib_t * fib; - ip6_address_t dst_address; - u32 dst_address_length, adj_index; - uword is_del; - u32 old_adj_index = ~0; - BVT(clib_bihash_kv) kv, value; - - vlib_smp_unsafe_warning(); - - is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0; - - /* Either create new adjacency or use given one depending on arguments. */ - if (a->n_add_adj > 0) - { - ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); - } - else - adj_index = a->adj_index; - - dst_address = a->dst_address; - dst_address_length = a->dst_address_length; - fib = find_ip6_fib_by_table_index_or_id (im, a->table_index_or_table_id, - a->flags); - - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - ip6_address_mask (&dst_address, &im->fib_masks[dst_address_length]); - - /* refcount accounting */ - if (is_del) - { - ASSERT (im->dst_address_length_refcounts[dst_address_length] > 0); - if (--im->dst_address_length_refcounts[dst_address_length] == 0) - { - im->non_empty_dst_address_length_bitmap = - clib_bitmap_set (im->non_empty_dst_address_length_bitmap, - 128 - dst_address_length, 0); - compute_prefix_lengths_in_search_order (im); - } - } - else - { - im->dst_address_length_refcounts[dst_address_length]++; - - im->non_empty_dst_address_length_bitmap = - clib_bitmap_set (im->non_empty_dst_address_length_bitmap, - 128 - dst_address_length, 1); - compute_prefix_lengths_in_search_order (im); - } - - kv.key[0] = dst_address.as_u64[0]; - kv.key[1] = dst_address.as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0) - old_adj_index = value.value; - - if (is_del) - BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 0 /* is_add */); - else - { - /* Make sure adj index is valid. */ - if (CLIB_DEBUG > 0) - (void) ip_get_adjacency (lm, adj_index); - - kv.value = adj_index; - - BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 1 /* is_add */); - } - - /* Avoid spurious reference count increments */ - if (old_adj_index == adj_index - && adj_index != ~0 - && !(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) - { - ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); - if (adj->share_count > 0) - adj->share_count --; - } - - /* Delete old adjacency index if present and changed. */ - { - if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY) - && old_adj_index != ~0 - && old_adj_index != adj_index) - ip_del_adjacency (lm, old_adj_index); - } -} - -u32 -ip6_route_get_next_hop_adj (ip6_main_t * im, - u32 fib_index, - ip6_address_t *next_hop, - u32 next_hop_sw_if_index, - u32 explicit_fib_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - vnet_main_t * vnm = vnet_get_main(); - int is_interface_next_hop; - uword * nh_result; - u32 nh_adj_index; - ip6_fib_t * fib; - - fib = vec_elt_at_index (im->fibs, fib_index); - - is_interface_next_hop = ip6_address_is_zero (next_hop); - - if (is_interface_next_hop) - { - nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, - next_hop_sw_if_index); - if (nh_result) - nh_adj_index = *nh_result; - else - { - ip_adjacency_t * adj; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &nh_adj_index); - ip6_adjacency_set_interface_route (vnm, adj, - next_hop_sw_if_index, ~0); - ip_call_add_del_adjacency_callbacks - (lm, next_hop_sw_if_index, /* is_del */ 0); - hash_set (im->interface_route_adj_index_by_sw_if_index, - next_hop_sw_if_index, nh_adj_index); - } - } - else if (next_hop_sw_if_index == ~0) - { - /* next-hop is recursive. we always need a indirect adj - * for recursive paths. Any LPM we perform now will give - * us a valid adj, but without tracking the next-hop we - * have no way to keep it valid. - */ - ip_adjacency_t add_adj; - memset (&add_adj, 0, sizeof(add_adj)); - add_adj.n_adj = 1; - add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT; - add_adj.indirect.next_hop.ip6.as_u64[0] = next_hop->as_u64[0]; - add_adj.indirect.next_hop.ip6.as_u64[1] = next_hop->as_u64[1]; - add_adj.explicit_fib_index = explicit_fib_index; - ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index); - } - else - { - BVT(clib_bihash_kv) kv, value; - - /* Look for the interface /128 route */ - kv.key[0] = next_hop->as_u64[0]; - kv.key[1] = next_hop->as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128; -after_nd: - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0) - { - ip_adjacency_t * adj; - nh_adj_index = ip6_fib_lookup_with_table (im, fib_index, next_hop); - adj = ip_get_adjacency (lm, nh_adj_index); - /* if ND interface adjacencty is present, we need to - install ND adjaceny for specific next hop */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - adj->arp.next_hop.ip6.as_u64[0] == 0 && - adj->arp.next_hop.ip6.as_u64[1] == 0) - { - nh_adj_index = vnet_ip6_neighbor_glean_add(fib_index, next_hop); - } - else if (next_hop->as_u8[0] == 0xfe) - { - //Next hop is link-local. No indirect in this case. - //Let's add it as a possible neighbor on this interface - ip6_address_t null_addr= {}; - ip6_add_del_route_next_hop (im, IP6_ROUTE_FLAG_ADD, - next_hop, 128, - &null_addr, next_hop_sw_if_index, - 1, ~0, fib_index); - goto after_nd; - } - } - else - { - nh_adj_index = value.value; - } - } - - return (nh_adj_index); -} - -void -ip6_add_del_route_next_hop (ip6_main_t * im, - u32 flags, - ip6_address_t * dst_address, - u32 dst_address_length, - ip6_address_t * next_hop, - u32 next_hop_sw_if_index, - u32 next_hop_weight, u32 adj_index, - u32 explicit_fib_index) -{ - vnet_main_t * vnm = vnet_get_main(); - ip_lookup_main_t * lm = &im->lookup_main; - u32 fib_index; - ip6_fib_t * fib; - ip6_address_t masked_dst_address; - u32 old_mp_adj_index, new_mp_adj_index; - u32 dst_adj_index, nh_adj_index; - int rv; - ip_adjacency_t * dst_adj; - ip_multipath_adjacency_t * old_mp, * new_mp; - int is_del = (flags & IP6_ROUTE_FLAG_DEL) != 0; - clib_error_t * error = 0; - BVT(clib_bihash_kv) kv, value; - - vlib_smp_unsafe_warning(); - - if (explicit_fib_index == (u32)~0) - fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index); - else - fib_index = explicit_fib_index; - - fib = vec_elt_at_index (im->fibs, fib_index); - - /* Lookup next hop to be added or deleted. */ - if (adj_index == (u32)~0) - { - nh_adj_index = ip6_route_get_next_hop_adj(im, fib_index, - next_hop, - next_hop_sw_if_index, - explicit_fib_index); - } - else - { - /* Look for the interface /128 route */ - kv.key[0] = next_hop->as_u64[0]; - kv.key[1] = next_hop->as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0) - { - vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; - error = clib_error_return (0, "next-hop %U/128 not in FIB", - format_ip6_address, next_hop); - goto done; - } - - nh_adj_index = value.value; - } - - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - masked_dst_address = dst_address[0]; - ip6_address_mask (&masked_dst_address, &im->fib_masks[dst_address_length]); - - kv.key[0] = masked_dst_address.as_u64[0]; - kv.key[1] = masked_dst_address.as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length; - - rv = BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value); - - if (rv == 0) - { - dst_adj_index = value.value; - dst_adj = ip_get_adjacency (lm, dst_adj_index); - } - else - { - /* For deletes destination must be known. */ - if (is_del) - { - vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; - error = clib_error_return (0, "unknown destination %U/%d", - format_ip6_address, dst_address, - dst_address_length); - goto done; - } - - dst_adj_index = ~0; - dst_adj = 0; - } - - /* Ignore adds of X/128 with next hop of X. */ - if (! is_del - && dst_address_length == 128 - && ip6_address_is_equal (dst_address, next_hop)) - { - vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP; - error = clib_error_return (0, "prefix matches next hop %U/%d", - format_ip6_address, dst_address, - dst_address_length); - goto done; - } - - /* Destination is not known and default weight is set so add route - to existing non-multipath adjacency */ - if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0) - { - /* create / delete additional mapping of existing adjacency */ - ip6_add_del_route_args_t a; - - a.table_index_or_table_id = fib_index; - a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD) - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & (IP6_ROUTE_FLAG_NO_REDISTRIBUTE - | IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP))); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = nh_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip6_add_del_route (im, &a); - goto done; - } - - old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0; - - if (! ip_multipath_adjacency_add_del_next_hop - (lm, is_del, - dst_adj ? dst_adj->heap_handle : ~0, - nh_adj_index, - next_hop_weight, - &new_mp_adj_index)) - { - vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP; - error = clib_error_return - (0, "requested deleting next-hop %U not found in multi-path", - format_ip6_address, next_hop); - goto done; - } - - old_mp = new_mp = 0; - if (old_mp_adj_index != ~0) - old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); - if (new_mp_adj_index != ~0) - new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index); - - if (old_mp != new_mp) - { - ip6_add_del_route_args_t a; - ip_adjacency_t * adj; - - a.table_index_or_table_id = fib_index; - a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD) - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & IP6_ROUTE_FLAG_NO_REDISTRIBUTE)); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip6_add_del_route (im, &a); - - adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index); - if (adj->n_adj == 1) - adj->share_count += is_del ? -1 : 1; - } - - done: - if (error) - clib_error_report (error); -} - -u32 -ip6_get_route (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length) -{ - ip6_fib_t * fib = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - ip6_address_t masked_address; - BVT(clib_bihash_kv) kv, value; - - ASSERT (address_length < ARRAY_LEN (im->fib_masks)); - clib_memcpy (&masked_address, address, sizeof (masked_address)); - ip6_address_mask (&masked_address, &im->fib_masks[address_length]); - - kv.key[0] = masked_address.as_u64[0]; - kv.key[1] = masked_address.as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | address_length; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0) - return (value.value); - return 0; -} - -void -ip6_foreach_matching_route (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * dst_address, - u32 address_length, - ip6_address_t ** results, - u8 ** result_lengths) -{ - ip6_fib_t * fib = - find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - BVT(clib_bihash) * h = &im->ip6_lookup_table; - BVT(clib_bihash_value) * v; - clib_bihash_bucket_t * b; - int i, j, k; - - if (*results) - _vec_len (*results) = 0; - if (*result_lengths) - _vec_len (*result_lengths) = 0; - - /* Walk the table looking for routes which match the supplied address */ - for (i = 0; i < h->nbuckets; i++) - { - b = &h->buckets [i]; - if (b->offset == 0) - continue; - - v = BV(clib_bihash_get_value) (h, b->offset); - for (j = 0; j < (1<<b->log2_pages); j++) - { - for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) - { - if (BV(clib_bihash_is_free)(&v->kvp[k])) - continue; - - if ((v->kvp[k].key[2] - == (((u64)((fib - im->fibs))<<32) | address_length)) - && ip6_destination_matches_route - (im, dst_address, (ip6_address_t *) &v->kvp[k], - address_length)) - { - ip6_address_t * a; - - a = (ip6_address_t *)(&v->kvp[k]); - - vec_add1 (*results, a[0]); - vec_add1 (*result_lengths, address_length); - } - } - v++; - } - } -} - -void ip6_maybe_remap_adjacencies (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags) -{ -#if SOONE - ip6_fib_t * fib - = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); -#endif - ip_lookup_main_t * lm = &im->lookup_main; - - if (lm->n_adjacency_remaps == 0) - return; - - clib_warning ("unimplemented, please report to vpp-dev@cisco.com"); - - /* All remaps have been performed. */ - lm->n_adjacency_remaps = 0; -} - -void ip6_delete_matching_routes (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length) -{ - /* $$$$ static may be OK - this should happen only on thread 0 */ - static ip6_address_t * matching_addresses; - static u8 * matching_address_lengths; - u32 l, i; - ip6_add_del_route_args_t a; - - vlib_smp_unsafe_warning(); - - a.flags = IP6_ROUTE_FLAG_DEL | IP6_ROUTE_FLAG_NO_REDISTRIBUTE | flags; - a.table_index_or_table_id = table_index_or_table_id; - a.adj_index = ~0; - a.add_adj = 0; - a.n_add_adj = 0; - - for (l = address_length + 1; l <= 128; l++) - { - ip6_foreach_matching_route (im, table_index_or_table_id, flags, - address, - l, - &matching_addresses, - &matching_address_lengths); - for (i = 0; i < vec_len (matching_addresses); i++) - { - a.dst_address = matching_addresses[i]; - a.dst_address_length = matching_address_lengths[i]; - ip6_add_del_route (im, &a); - } - } - - ip6_maybe_remap_adjacencies (im, table_index_or_table_id, flags); -} - void ip6_forward_next_trace (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -713,12 +58,10 @@ ip6_forward_next_trace (vlib_main_t * vm, always_inline uword ip6_lookup_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, - int is_indirect) + vlib_frame_t * frame) { ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; - vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, * from, * to_next; ip_lookup_next_t next; u32 cpu_index = os_get_cpu_number(); @@ -735,13 +78,14 @@ ip6_lookup_inline (vlib_main_t * vm, while (n_left_from >= 4 && n_left_to_next >= 2) { vlib_buffer_t * p0, * p1; - u32 pi0, pi1, adj_index0, adj_index1, wrong_next; + u32 pi0, pi1, lbi0, lbi1, wrong_next; ip_lookup_next_t next0, next1; ip6_header_t * ip0, * ip1; - ip_adjacency_t * adj0, * adj1; ip6_address_t * dst_addr0, * dst_addr1; u32 fib_index0, fib_index1; u32 flow_hash_config0, flow_hash_config1; + const dpo_id_t *dpo0, *dpo1; + const load_balance_t *lb0, *lb1; /* Prefetch next iteration. */ { @@ -765,19 +109,8 @@ ip6_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); - if (PREDICT_FALSE(is_indirect)) - { - ip_adjacency_t * iadj0, * iadj1; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip6; - dst_addr1 = &iadj1->indirect.next_hop.ip6; - } - else - { - dst_addr0 = &ip0->dst_address; - dst_addr1 = &ip1->dst_address; - } + dst_addr0 = &ip0->dst_address; + dst_addr1 = &ip1->dst_address; fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); @@ -787,69 +120,60 @@ ip6_lookup_inline (vlib_main_t * vm, fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; - adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0); - adj_index1 = ip6_fib_lookup_with_table (im, fib_index1, dst_addr1); + lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0); + lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1); - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); - - if (PREDICT_FALSE (adj0->explicit_fib_index != ~0)) - { - adj_index0 = ip6_fib_lookup_with_table - (im, adj0->explicit_fib_index, dst_addr0); - adj0 = ip_get_adjacency (lm, adj_index0); - } - if (PREDICT_FALSE (adj1->explicit_fib_index != ~0)) - { - adj_index1 = ip6_fib_lookup_with_table - (im, adj1->explicit_fib_index, dst_addr1); - adj1 = ip_get_adjacency (lm, adj_index1); - } - - next0 = adj0->lookup_next_index; - next1 = adj1->lookup_next_index; - - /* Only process the HBH Option Header if explicitly configured to do so */ - next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled && - adj_index0 ? (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : adj0->lookup_next_index; - next1 = (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled && - adj_index1 ? (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : adj1->lookup_next_index; + lb0 = load_balance_get (lbi0); + lb1 = load_balance_get (lbi1); vnet_buffer (p0)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash = 0; - if (PREDICT_FALSE(adj0->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + flow_hash_config0 = lb0->lb_hash_config; vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, flow_hash_config0); } - - if (PREDICT_FALSE(adj1->n_adj > 1)) + if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) { - flow_hash_config1 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; - + flow_hash_config1 = lb1->lb_hash_config; vnet_buffer (p1)->ip.flow_hash = ip6_compute_flow_hash (ip1, flow_hash_config1); } - ASSERT (adj0->n_adj > 0); - ASSERT (adj1->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - ASSERT (is_pow2 (adj1->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); - adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1)); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + lb0->lb_n_buckets_minus_1)); + dpo1 = load_balance_get_bucket_i(lb1, + (vnet_buffer (p1)->ip.flow_hash & + lb1->lb_n_buckets_minus_1)); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + /* Only process the HBH Option Header if explicitly configured to do so */ + next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && + im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : + next0; + next1 = ((ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && + im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : + next1; + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); vlib_increment_combined_counter - (cm, cpu_index, adj_index1, 1, + (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); from += 2; @@ -898,11 +222,12 @@ ip6_lookup_inline (vlib_main_t * vm, { vlib_buffer_t * p0; ip6_header_t * ip0; - u32 pi0, adj_index0; + u32 pi0, lbi0; ip_lookup_next_t next0; - ip_adjacency_t * adj0; + load_balance_t * lb0; ip6_address_t * dst_addr0; u32 fib_index0, flow_hash_config0; + const dpo_id_t *dpo0; pi0 = from[0]; to_next[0] = pi0; @@ -911,57 +236,44 @@ ip6_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); - if (PREDICT_FALSE(is_indirect)) - { - ip_adjacency_t * iadj0; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip6; - } - else - { - dst_addr0 = &ip0->dst_address; - } + dst_addr0 = &ip0->dst_address; fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; flow_hash_config0 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + ip6_fib_get (fib_index0)->flow_hash_config; - adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0); + lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0); - adj0 = ip_get_adjacency (lm, adj_index0); - - if (PREDICT_FALSE (adj0->explicit_fib_index != ~0)) - { - adj_index0 = ip6_fib_lookup_with_table - (im, adj0->explicit_fib_index, dst_addr0); - adj0 = ip_get_adjacency (lm, adj_index0); - } - - /* Only process the HBH Option Header if explicitly configured to do so */ - next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled && - adj_index0 ? (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : adj0->lookup_next_index; + lb0 = load_balance_get (lbi0); vnet_buffer (p0)->ip.flow_hash = 0; - if (PREDICT_FALSE(adj0->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + flow_hash_config0 = lb0->lb_hash_config; vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, flow_hash_config0); } - ASSERT (adj0->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + lb0->lb_n_buckets_minus_1)); + next0 = dpo0->dpoi_next_node; + /* Only process the HBH Option Header if explicitly configured to do so */ + next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && + im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : + next0; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); from += 1; @@ -986,163 +298,171 @@ ip6_lookup_inline (vlib_main_t * vm, } if (node->flags & VLIB_NODE_FLAG_TRACE) - ip6_forward_next_trace(vm, node, frame, VLIB_TX); + ip6_forward_next_trace(vm, node, frame, VLIB_TX); return frame->n_vectors; } -void ip6_adjacency_set_interface_route (vnet_main_t * vnm, - ip_adjacency_t * adj, - u32 sw_if_index, - u32 if_address_index) -{ - vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - ip_lookup_next_t n; - u32 node_index; - - if (hw->hw_class_index == ethernet_hw_interface_class.index - || hw->hw_class_index == srp_hw_interface_class.index) - { - n = IP_LOOKUP_NEXT_ARP; - node_index = ip6_discover_neighbor_node.index; - adj->if_address_index = if_address_index; - adj->arp.next_hop.ip6.as_u64[0] = 0; - adj->arp.next_hop.ip6.as_u64[1] = 0; - } - else - { - n = IP_LOOKUP_NEXT_REWRITE; - node_index = ip6_rewrite_node.index; - } - - adj->lookup_next_index = n; - adj->explicit_fib_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - node_index, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); -} - static void ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, ip6_main_t * im, u32 fib_index, ip_interface_address_t * a) { ip_lookup_main_t * lm = &im->lookup_main; - ip_adjacency_t * adj; ip6_address_t * address = ip_interface_address_get_address (lm, a); - ip6_add_del_route_args_t x; - vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index); - u32 classify_table_index; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP6_ROUTE_FLAG_ADD - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = a->address_length; - x.n_add_adj = 0; - x.add_adj = 0; + fib_prefix_t pfx = { + .fp_len = a->address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *address, + }; a->neighbor_probe_adj_index = ~0; if (a->address_length < 128) - { - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); - ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool); - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - ip6_add_del_route (im, &x); - a->neighbor_probe_adj_index = x.adj_index; - } - - /* Add e.g. ::1/128 as local to this host. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); + { + fib_node_index_t fei; + + fei = fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP6, + NULL, /* No next-hop address */ + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + a->neighbor_probe_adj_index = fib_entry_get_adj(fei); + } - classify_table_index = ~0; + pfx.fp_len = 128; if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) - classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index]; - if (classify_table_index != (u32) ~0) + { + u32 classify_table_index = + lm->classify_table_index_by_sw_if_index [sw_if_index]; + if (classify_table_index != (u32) ~0) + { + dpo_id_t dpo = DPO_NULL; + + dpo_set(&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP4, + classify_dpo_create(FIB_PROTOCOL_IP6, + classify_table_index)); + + fib_table_entry_special_dpo_add(fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, + &dpo); + dpo_reset(&dpo); + } + } + + fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP6, + &pfx.fp_addr, + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); +} + +static void +ip6_del_interface_routes (ip6_main_t * im, + u32 fib_index, + ip6_address_t * address, + u32 address_length) +{ + fib_prefix_t pfx = { + .fp_len = address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *address, + }; + + if (pfx.fp_len < 128) { - adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; - adj->classify.table_index = classify_table_index; + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); + } - else - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - - adj->if_address_index = a - lm->if_address_pool; - adj->rewrite_header.sw_if_index = sw_if_index; - adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX]; - adj->rewrite_header.data_bytes = 0; - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - x.dst_address_length = 128; - ip6_add_del_route (im, &x); + + pfx.fp_len = 128; + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); } -static void -ip6_del_interface_routes (ip6_main_t * im, u32 fib_index, - ip6_address_t * address, u32 address_length) +void +ip6_sw_interface_enable_disable (u32 sw_if_index, + u32 is_enable) { - ip6_add_del_route_args_t x; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP6_ROUTE_FLAG_DEL - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = address_length; - x.adj_index = ~0; - x.n_add_adj = 0; - x.add_adj = 0; - - if (address_length < 128) + vlib_main_t * vm = vlib_get_main(); + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 ci, cast; + u32 lookup_feature_index; + + vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) { - /* Don't wipe out fe80::0/64 */ - if (address_length != 64 || - address[0].as_u64[0] != clib_net_to_host_u64(0xfe80000000000000ULL)) - ip6_add_del_route (im, &x); + if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + else + { + ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) + return; } - x.dst_address_length = 128; - ip6_add_del_route (im, &x); + for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++) + { + ip_config_main_t * cm = &lm->feature_config_mains[cast]; + vnet_config_main_t * vcm = &cm->config_main; - ip6_delete_matching_routes (im, - fib_index, - IP6_ROUTE_FLAG_FIB_INDEX, - address, - address_length); -} + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; -typedef struct { - u32 sw_if_index; - ip6_address_t address; - u32 length; -} ip6_interface_address_t; + if (cast == VNET_IP_RX_UNICAST_FEAT) + lookup_feature_index = im->ip6_unicast_rx_feature_lookup; + else + lookup_feature_index = im->ip6_multicast_rx_feature_lookup; -static clib_error_t * -ip6_add_del_interface_address_internal (vlib_main_t * vm, - u32 sw_if_index, - ip6_address_t * new_address, - u32 new_length, - u32 redistribute, - u32 insert_routes, - u32 is_del); + if (is_enable) + ci = vnet_config_add_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + ci = vnet_config_del_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); -static clib_error_t * -ip6_add_del_interface_address_internal (vlib_main_t * vm, - u32 sw_if_index, - ip6_address_t * address, - u32 address_length, - u32 redistribute, - u32 insert_routes, - u32 is_del) + cm->config_index_by_sw_if_index[sw_if_index] = ci; + } +} + +clib_error_t * +ip6_add_del_interface_address (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 is_del) { vnet_main_t * vnm = vnet_get_main(); ip6_main_t * im = &ip6_main; @@ -1174,17 +494,13 @@ ip6_add_del_interface_address_internal (vlib_main_t * vm, goto done; } - if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes) - { - if (is_del) - ip6_del_interface_routes (im, ip6_af.fib_index, address, - address_length); - - else - ip6_add_interface_routes (vnm, sw_if_index, - im, ip6_af.fib_index, - pool_elt_at_index (lm->if_address_pool, if_address_index)); - } + if (is_del) + ip6_del_interface_routes (im, ip6_af.fib_index, address, + address_length); + else + ip6_add_interface_routes (vnm, sw_if_index, + im, ip6_af.fib_index, + pool_elt_at_index (lm->if_address_pool, if_address_index)); { ip6_add_del_interface_address_callback_t * cb; @@ -1201,18 +517,6 @@ ip6_add_del_interface_address_internal (vlib_main_t * vm, } clib_error_t * -ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, - ip6_address_t * address, u32 address_length, - u32 is_del) -{ - return ip6_add_del_interface_address_internal - (vm, sw_if_index, address, address_length, - /* redistribute */ 1, - /* insert_routes */ 1, - is_del); -} - -clib_error_t * ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) @@ -1282,10 +586,16 @@ VNET_IP6_UNICAST_FEATURE_INIT (ip6_vpath, static) = { VNET_IP6_UNICAST_FEATURE_INIT (ip6_lookup, static) = { .node_name = "ip6-lookup", - .runs_before = 0, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0}, .feature_index = &ip6_main.ip6_unicast_rx_feature_lookup, }; +VNET_IP6_UNICAST_FEATURE_INIT (ip6_drop, static) = { + .node_name = "ip6-drop", + .runs_before = 0, /*last feature*/ + .feature_index = &ip6_main.ip6_unicast_rx_feature_drop, +}; + /* Built-in ip6 multicast rx feature path definition (none now) */ VNET_IP6_MULTICAST_FEATURE_INIT (ip6_vpath_mc, static) = { .node_name = "vpath-input-ip6", @@ -1295,10 +605,16 @@ VNET_IP6_MULTICAST_FEATURE_INIT (ip6_vpath_mc, static) = { VNET_IP6_MULTICAST_FEATURE_INIT (ip6_lookup, static) = { .node_name = "ip6-lookup", - .runs_before = 0, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0}, .feature_index = &ip6_main.ip6_multicast_rx_feature_lookup, }; +VNET_IP6_MULTICAST_FEATURE_INIT (ip6_drop_mc, static) = { + .node_name = "ip6-drop", + .runs_before = 0, /* last feature */ + .feature_index = &ip6_main.ip6_multicast_rx_feature_drop, +}; + static char * rx_feature_start_nodes[] = {"ip6-input"}; @@ -1343,7 +659,7 @@ ip6_feature_init (vlib_main_t * vm, ip6_main_t * im) feature_start_nodes, feature_start_len, cast, - 0 /* is_ip4 */))) + VNET_L3_PACKET_TYPE_IP6))) return error; } return 0; @@ -1369,9 +685,9 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, ci = cm->config_index_by_sw_if_index[sw_if_index]; if (cast == VNET_IP_RX_UNICAST_FEAT) - feature_index = im->ip6_unicast_rx_feature_lookup; + feature_index = im->ip6_unicast_rx_feature_drop; else if (cast == VNET_IP_RX_MULTICAST_FEAT) - feature_index = im->ip6_multicast_rx_feature_lookup; + feature_index = im->ip6_multicast_rx_feature_drop; else feature_index = im->ip6_tx_feature_interface_output; @@ -1382,12 +698,14 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, /* config data */ 0, /* # bytes of config data */ 0); else - ci = vnet_config_del_feature (vm, vcm, - ci, - feature_index, - /* config data */ 0, - /* # bytes of config data */ 0); - + { + ci = vnet_config_del_feature (vm, vcm, ci, + feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index) + im->ip_enabled_by_sw_if_index[sw_if_index] = 0; + } cm->config_index_by_sw_if_index[sw_if_index] = ci; /* * note: do not update the tx feature count here. @@ -1403,7 +721,7 @@ ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 0); + return ip6_lookup_inline (vm, node, frame); } static u8 * format_ip6_lookup_trace (u8 * s, va_list * args); @@ -1419,27 +737,97 @@ VLIB_REGISTER_NODE (ip6_lookup_node) = { .next_nodes = IP6_LOOKUP_NEXT_NODES, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup) -static uword -ip6_indirect (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +always_inline uword +ip6_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 1); -} + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace(vm, node, frame, VLIB_TX); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t * p0; + u32 pi0, lbi0, hc0; + const ip6_header_t *ip0; + const dpo_id_t *dpo0; + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + hc0 = lb0->lb_hash_config; + vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0); + + dpo0 = load_balance_get_bucket_i(lb0, + vnet_buffer(p0)->ip.flow_hash & + (lb0->lb_n_buckets - 1)); -VLIB_REGISTER_NODE (ip6_indirect_node) = { - .function = ip6_indirect, - .name = "ip6-indirect", + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_load_balance_node) = { + .function = ip6_load_balance, + .name = "ip6-load-balance", .vector_size = sizeof (u32), .sibling_of = "ip6-lookup", .format_trace = format_ip6_lookup_trace, .n_next_nodes = 0, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_indirect_node, ip6_indirect); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance) typedef struct { /* Adjacency taken. */ @@ -1469,13 +857,10 @@ static u8 * format_ip6_lookup_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *); - vnet_main_t * vnm = vnet_get_main(); - ip6_main_t * im = &ip6_main; uword indent = format_get_indent (s); - s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x", - t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + s = format (s, "fib %d dpo-idx %d : flow hash: 0x%08x", + t->fib_index, t->adj_index, t->flow_hash); s = format(s, "\n%U%U", format_white_space, indent, format_ip6_header, t->packet_data); @@ -1489,16 +874,16 @@ static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *); vnet_main_t * vnm = vnet_get_main(); - ip6_main_t * im = &ip6_main; uword indent = format_get_indent (s); s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x", t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, format_ip_adjacency_packet_data, - vnm, &im->lookup_main, t->adj_index, + vnm, t->adj_index, t->packet_data, sizeof (t->packet_data)); return s; } @@ -1628,12 +1013,6 @@ ip6_punt (vlib_main_t * vm, vlib_frame_t * frame) { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); } -static uword -ip6_miss (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_DST_LOOKUP_MISS); } - VLIB_REGISTER_NODE (ip6_drop_node,static) = { .function = ip6_drop, .name = "ip6-drop", @@ -1647,7 +1026,7 @@ VLIB_REGISTER_NODE (ip6_drop_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop) VLIB_REGISTER_NODE (ip6_punt_node,static) = { .function = ip6_punt, @@ -1662,22 +1041,7 @@ VLIB_REGISTER_NODE (ip6_punt_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt); - -VLIB_REGISTER_NODE (ip6_miss_node,static) = { - .function = ip6_miss, - .name = "ip6-miss", - .vector_size = sizeof (u32), - - .format_trace = format_ip6_forward_next_trace, - - .n_next_nodes = 1, - .next_nodes = { - [0] = "error-drop", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip6_miss_node, ip6_miss); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt) VLIB_REGISTER_NODE (ip6_multicast_node,static) = { .function = ip6_drop, @@ -1931,17 +1295,21 @@ ip6_local (vlib_main_t * vm, /* Drop packets from unroutable hosts. */ /* If this is a neighbor solicitation (ICMP), skip source RPF check */ - if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP) + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && + type0 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast(&ip0->src_address)) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - error0 = (lm->miss_adj_index == src_adj_index0 + error0 = (ADJ_INDEX_INVALID == src_adj_index0 ? IP6_ERROR_SRC_LOOKUP_MISS : error0); } - if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && type1 != IP_BUILTIN_PROTOCOL_ICMP) + if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && + type1 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast(&ip1->src_address)) { u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1); - error1 = (lm->miss_adj_index == src_adj_index1 + error1 = (ADJ_INDEX_INVALID == src_adj_index1 ? IP6_ERROR_SRC_LOOKUP_MISS : error1); } @@ -2018,10 +1386,12 @@ ip6_local (vlib_main_t * vm, : error0); /* If this is a neighbor solicitation (ICMP), skip source RPF check */ - if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP) + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && + type0 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast(&ip0->src_address)) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - error0 = (lm->miss_adj_index == src_adj_index0 + error0 = (ADJ_INDEX_INVALID == src_adj_index0 ? IP6_ERROR_SRC_LOOKUP_MISS : error0); } @@ -2057,7 +1427,7 @@ VLIB_REGISTER_NODE (ip6_local_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local) void ip6_register_protocol (u32 protocol, u32 node_index) { @@ -2082,9 +1452,10 @@ typedef enum { } ip6_discover_neighbor_error_t; static uword -ip6_discover_neighbor (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +ip6_discover_neighbor_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_glean) { vnet_main_t * vnm = vnet_get_main(); ip6_main_t * im = &ip6_main; @@ -2144,11 +1515,11 @@ ip6_discover_neighbor (vlib_main_t * vm, adj0 = ip_get_adjacency (lm, adj_index0); - if (adj0->arp.next_hop.ip6.as_u64[0] || - adj0->arp.next_hop.ip6.as_u64[1]) { - ip0->dst_address.as_u64[0] = adj0->arp.next_hop.ip6.as_u64[0]; - ip0->dst_address.as_u64[1] = adj0->arp.next_hop.ip6.as_u64[1]; - } + if (!is_glean) + { + ip0->dst_address.as_u64[0] = adj0->sub_type.nbr.next_hop.ip6.as_u64[0]; + ip0->dst_address.as_u64[1] = adj0->sub_type.nbr.next_hop.ip6.as_u64[1]; + } a0 = hash_seeds[0]; b0 = hash_seeds[1]; @@ -2209,13 +1580,15 @@ ip6_discover_neighbor (vlib_main_t * vm, * Choose source address based on destination lookup * adjacency. */ - if (ip6_src_address_for_packet (im, p0, &h0->ip.src_address, - sw_if_index0)) { - //There is no address on the interface + if (ip6_src_address_for_packet (lm, + sw_if_index0, + &h0->ip.src_address)) + { + /* There is no address on the interface */ p0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]; vlib_buffer_free(vm, &bi0, 1); continue; - } + } /* * Destination address is a solicited node multicast address. @@ -2262,6 +1635,22 @@ ip6_discover_neighbor (vlib_main_t * vm, return frame->n_vectors; } +static uword +ip6_discover_neighbor (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip6_discover_neighbor_inline(vm, node, frame, 0)); +} + +static uword +ip6_glean (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip6_discover_neighbor_inline(vm, node, frame, 1)); +} + static char * ip6_discover_neighbor_error_strings[] = { [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops", [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] @@ -2287,6 +1676,23 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = { }, }; +VLIB_REGISTER_NODE (ip6_glean_node) = { + .function = ip6_glean, + .name = "ip6-glean", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings), + .error_strings = ip6_discover_neighbor_error_strings, + + .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT, + .next_nodes = { + [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop", + [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output", + }, +}; + clib_error_t * ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) { @@ -2474,31 +1880,17 @@ ip6_rewrite_inline (vlib_main_t * vm, adj0 = ip_get_adjacency (lm, adj_index0); adj1 = ip_get_adjacency (lm, adj_index1); - if (rewrite_for_locally_received_packets) - { - /* - * If someone sends e.g. an icmp6 w/ src = dst = interface addr, - * we end up here with a local adjacency in hand - */ - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; - if (PREDICT_FALSE(adj1->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error1 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; - } - rw_len0 = adj0[0].rewrite_header.data_bytes; rw_len1 = adj1[0].rewrite_header.data_bytes; vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; vnet_buffer(p1)->ip.save_rewrite_length = rw_len1; - vlib_increment_combined_counter (&lm->adjacency_counters, + vlib_increment_combined_counter (&adjacency_counters, cpu_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0); - vlib_increment_combined_counter (&lm->adjacency_counters, + vlib_increment_combined_counter (&adjacency_counters, cpu_index, adj_index1, /* packet increment */ 0, @@ -2621,13 +2013,6 @@ ip6_rewrite_inline (vlib_main_t * vm, } } - if (rewrite_for_locally_received_packets) - { - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; - } - /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); @@ -2635,7 +2020,7 @@ ip6_rewrite_inline (vlib_main_t * vm, rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; - vlib_increment_combined_counter (&lm->adjacency_counters, + vlib_increment_combined_counter (&adjacency_counters, cpu_index, adj_index0, /* packet increment */ 0, @@ -2712,6 +2097,29 @@ ip6_rewrite_local (vlib_main_t * vm, /* rewrite_for_locally_received_packets */ 1); } +static uword +ip6_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip6_rewrite_inline (vm, node, frame, + /* rewrite_for_locally_received_packets */ 0); +} + +VLIB_REGISTER_NODE (ip6_midchain_node) = { + .function = ip6_midchain, + .name = "ip6-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .next_nodes = { + [IP6_REWRITE_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain) + VLIB_REGISTER_NODE (ip6_rewrite_node) = { .function = ip6_rewrite_transit, .name = "ip6-rewrite", @@ -3207,12 +2615,17 @@ ip6_lookup_init (vlib_main_t * vm) if (im->lookup_table_size == 0) im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE; - BV(clib_bihash_init) (&im->ip6_lookup_table, "ip6 lookup table", + BV(clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash), + "ip6 FIB fwding table", im->lookup_table_nbuckets, im->lookup_table_size); - + BV(clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash, + "ip6 FIB non-fwding table", + im->lookup_table_nbuckets, + im->lookup_table_size); + /* Create FIB with index 0 and table id of 0. */ - find_ip6_fib_by_table_index_or_id (im, /* table id */ 0, IP6_ROUTE_FLAG_TABLE_ID); + fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 0); { pg_node_t * pn; @@ -3282,17 +2695,14 @@ add_del_ip6_interface_table (vlib_main_t * vm, } { - ip6_main_t * im = &ip6_main; - ip6_fib_t * fib = - find_ip6_fib_by_table_index_or_id (im, table_id, IP6_ROUTE_FLAG_TABLE_ID); + u32 fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, + table_id); - if (fib) - { - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = fib->index; - } + vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); + ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; } + done: return error; } @@ -3368,7 +2778,7 @@ int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) if (p == 0) return -1; - fib = vec_elt_at_index (im6->fibs, p[0]); + fib = ip6_fib_get (p[0]); fib->flow_hash_config = flow_hash_config; return 1; diff --git a/vnet/vnet/ip/ip6_hop_by_hop.c b/vnet/vnet/ip/ip6_hop_by_hop.c index 2a037033..d927d279 100644 --- a/vnet/vnet/ip/ip6_hop_by_hop.c +++ b/vnet/vnet/ip/ip6_hop_by_hop.c @@ -24,6 +24,7 @@ #include <vppinfra/elog.h> #include <vnet/ip/ip6_hop_by_hop.h> +#include <vnet/fib/ip6_fib.h> char *ppc_state[] = { "None", "Encap", "Decap" }; @@ -935,48 +936,22 @@ ip6_ioam_set_destination (ip6_address_t * addr, u32 mask_width, u32 vrf_id, ip_lookup_main_t *lm = &im->lookup_main; ip_adjacency_t *adj; u32 fib_index; - u32 len, adj_index; - int i, rv; - uword *p; - BVT (clib_bihash_kv) kv, value; + u32 adj_index; if ((is_add + is_pop + is_none) != 1) return VNET_API_ERROR_INVALID_VALUE_2; /* Go find the adjacency we're supposed to tickle */ - p = hash_get (im->fib_index_by_table_id, vrf_id); + fib_index = ip6_fib_index_from_table_id (vrf_id); - if (p == 0) + if (~0 == fib_index) return VNET_API_ERROR_NO_SUCH_FIB; - fib_index = p[0]; + adj_index = ip6_fib_table_fwding_lookup (im, fib_index, addr); - len = vec_len (im->prefix_lengths_in_search_order); - - for (i = 0; i < len; i++) - { - int dst_address_length = im->prefix_lengths_in_search_order[i]; - ip6_address_t *mask = &im->fib_masks[dst_address_length]; - - if (dst_address_length != mask_width) - continue; - - kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; - kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; - kv.key[2] = ((u64) ((fib_index)) << 32) | dst_address_length; - - rv = - BV (clib_bihash_search_inline_2) (&im->ip6_lookup_table, &kv, &value); - if (rv == 0) - goto found; - - } - return VNET_API_ERROR_NO_SUCH_ENTRY; - -found: + ASSERT (!"Not an ADJ"); /* Got it, modify as directed... */ - adj_index = value.value; adj = ip_get_adjacency (lm, adj_index); /* Restore original lookup-next action */ @@ -1015,7 +990,7 @@ ip6_set_ioam_destination_command_fn (vlib_main_t * vm, int is_pop = 0; int is_none = 0; u32 vrf_id = 0; - int rv; + // int rv; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -1038,19 +1013,23 @@ ip6_set_ioam_destination_command_fn (vlib_main_t * vm, if (mask_width == ~0) return clib_error_return (0, "<address>/<mask-width> required"); - rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id, - is_add, is_pop, is_none); + /* rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id, */ + /* is_add, is_pop, is_none); */ - switch (rv) - { - case 0: - break; - default: - return clib_error_return (0, "ip6_ioam_set_destination returned %d", - rv); - } + /* switch (rv) */ + /* { */ + /* case 0: */ + /* break; */ + /* default: */ + /* return clib_error_return (0, "ip6_ioam_set_destination returned %d", */ + /* rv); */ + /* } */ - return 0; + /* return 0; */ + + return clib_error_return (0, + "ip6_ioam_set_destination Currnetly Disabled due to FIB2.0", + 1); } /* *INDENT-OFF* */ diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c index a35f58a3..11df776e 100644 --- a/vnet/vnet/ip/ip6_neighbor.c +++ b/vnet/vnet/ip/ip6_neighbor.c @@ -19,6 +19,9 @@ #include <vnet/ethernet/ethernet.h> #include <vppinfra/mhash.h> #include <vppinfra/md5.h> +#include <vnet/adj/adj.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip6_fib.h> #if DPDK==1 #include <vnet/devices/dpdk/dpdk.h> @@ -38,9 +41,9 @@ typedef struct { u8 link_layer_address[8]; u16 flags; #define IP6_NEIGHBOR_FLAG_STATIC (1 << 0) -#define IP6_NEIGHBOR_FLAG_GLEAN (2 << 0) +#define IP6_NEIGHBOR_FLAG_DYNAMIC (2 << 0) u64 cpu_time_last_updated; - u32 *adjacencies; + adj_index_t adj_index; } ip6_neighbor_t; /* advertised prefix option */ @@ -121,9 +124,9 @@ typedef struct { u32 seed; u64 randomizer; int ref_count; - u32 all_nodes_adj_index; - u32 all_routers_adj_index; - u32 all_mldv2_routers_adj_index; + adj_index_t all_nodes_adj_index; + adj_index_t all_routers_adj_index; + adj_index_t all_mldv2_routers_adj_index; /* timing information */ #define DEF_MAX_RADV_INTERVAL 200 @@ -217,8 +220,8 @@ static u8 * format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) if (! n) return format (s, "%=12s%=20s%=6s%=20s%=40s", "Time", "Address", "Flags", "Link layer", "Interface"); - if (n->flags & IP6_NEIGHBOR_FLAG_GLEAN) - flags = format(flags, "G"); + if (n->flags & IP6_NEIGHBOR_FLAG_DYNAMIC) + flags = format(flags, "D"); if (n->flags & IP6_NEIGHBOR_FLAG_STATIC) flags = format(flags, "S"); @@ -330,6 +333,52 @@ static void set_unset_ip6_neighbor_rpc } #endif +static void +ip6_nd_mk_complete (ip6_neighbor_t * nbr) +{ + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = nbr->key.ip6_address, + }, + }; + ip6_main_t *im; + u32 fib_index; + + im = &ip6_main; + fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index]; + + /* only once please */ + if (ADJ_INDEX_INVALID == nbr->adj_index) + { + nbr->adj_index = + adj_nbr_add_or_lock_w_rewrite(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + &pfx.fp_addr, + nbr->key.sw_if_index, + nbr->link_layer_address); + ASSERT(ADJ_INDEX_INVALID != nbr->adj_index); + + fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &pfx.fp_addr, + nbr->key.sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + } + else + { + adj_nbr_update_rewrite(nbr->adj_index, + nbr->link_layer_address); + } +} + int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, @@ -338,17 +387,12 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, uword n_bytes_link_layer_address, int is_static) { - vnet_main_t * vnm = vnet_get_main(); ip6_neighbor_main_t * nm = &ip6_neighbor_main; ip6_neighbor_key_t k; ip6_neighbor_t * n = 0; - ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; int make_new_nd_cache_entry=1; uword * p; u32 next_index; - u32 adj_index; - ip_adjacency_t *existing_adj; pending_resolution_t * pr, * mc; #if DPDK > 0 @@ -376,77 +420,26 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, make_new_nd_cache_entry = 0; } - /* Note: always install the route. It might have been deleted */ - ip6_add_del_route_args_t args; - ip_adjacency_t adj; - - memset (&adj, 0, sizeof(adj)); - adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj.explicit_fib_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - ip6_rewrite_node.index, - link_layer_address, - &adj.rewrite_header, - sizeof (adj.rewrite_data)); - - /* result of this lookup should be next-hop adjacency */ - adj_index = ip6_fib_lookup_with_table (im, im->fib_index_by_sw_if_index[sw_if_index], a); - existing_adj = ip_get_adjacency(lm, adj_index); - - if (existing_adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - existing_adj->arp.next_hop.ip6.as_u64[0] == a->as_u64[0] && - existing_adj->arp.next_hop.ip6.as_u64[1] == a->as_u64[1]) - { - u32 * ai; - u32 * adjs = 0; - - if (n) - adjs = vec_dup(n->adjacencies); - else - clib_warning ("ip6 neighbor n not set"); - - /* Update all adj assigned to this arp entry */ - vec_foreach(ai, adjs) - { - int i; - ip_adjacency_t * uadj = ip_get_adjacency(lm, *ai); - for (i = 0; i < uadj->n_adj; i++) - if (uadj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP && - uadj[i].arp.next_hop.ip6.as_u64[0] == a->as_u64[0] && - uadj[i].arp.next_hop.ip6.as_u64[1] == a->as_u64[1]) - ip_update_adjacency (lm, *ai + i, &adj); - } - vec_free(adjs); - } - else - { - /* create new adj */ - args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index]; - args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_ADD | IP6_ROUTE_FLAG_NEIGHBOR; - args.dst_address = a[0]; - args.dst_address_length = 128; - args.adj_index = ~0; - args.add_adj = &adj; - args.n_add_adj = 1; - ip6_add_del_route (im, &args); - } - if (make_new_nd_cache_entry) { pool_get (nm->neighbor_pool, n); mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool, /* old value */ 0); n->key = k; + n->adj_index = ADJ_INDEX_INVALID; } /* Update time stamp and ethernet address. */ - clib_memcpy (n->link_layer_address, link_layer_address, n_bytes_link_layer_address); + clib_memcpy (n->link_layer_address, + link_layer_address, + n_bytes_link_layer_address); + n->cpu_time_last_updated = clib_cpu_time_now (); if (is_static) n->flags |= IP6_NEIGHBOR_FLAG_STATIC; + else + n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC; + + ip6_nd_mk_complete(n); /* Customer(s) waiting for this address to be resolved? */ p = mhash_get (&nm->pending_resolutions_by_address, a); @@ -499,6 +492,40 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, return 0; } +static void +ip6_nd_mk_incomplete (ip6_neighbor_t *nbr) +{ + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = nbr->key.ip6_address, + }, + }; + u32 fib_index; + ip6_main_t *im; + + im = &ip6_main; + fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index]; + + /* + * revert the adj this ND entry sourced to incomplete + */ + adj_nbr_update_rewrite(nbr->adj_index, + NULL); + + /* + * remove the FIB entry the ND entry sourced + */ + fib_table_entry_delete(fib_index, &pfx, FIB_SOURCE_ADJ); + + /* + * Unlock the adj now that the ARP entry is no longer a source + */ + adj_unlock(nbr->adj_index); + nbr->adj_index = ADJ_INDEX_INVALID; +} + int vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, @@ -509,8 +536,6 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, ip6_neighbor_main_t * nm = &ip6_neighbor_main; ip6_neighbor_key_t k; ip6_neighbor_t * n; - ip6_main_t * im = &ip6_main; - ip6_add_del_route_args_t args; uword * p; int rv = 0; @@ -537,73 +562,16 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, } n = pool_elt_at_index (nm->neighbor_pool, p[0]); + + ip6_nd_mk_incomplete(n); mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); pool_put (nm->neighbor_pool, n); - args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index]; - args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_DEL - | IP6_ROUTE_FLAG_NEIGHBOR; - args.dst_address = a[0]; - args.dst_address_length = 128; - args.adj_index = ~0; - args.add_adj = NULL; - args.n_add_adj = 0; - ip6_add_del_route (im, &args); out: vlib_worker_thread_barrier_release(vm); return rv; } - -u32 -vnet_ip6_neighbor_glean_add(u32 fib_index, void * next_hop_arg) -{ - ip6_neighbor_main_t * nm = &ip6_neighbor_main; - ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; - ip6_address_t * next_hop = next_hop_arg; - ip_adjacency_t add_adj, *adj; - ip6_add_del_route_args_t args; - ip6_neighbor_t * n; - ip6_neighbor_key_t k; - u32 adj_index; - - adj_index = ip6_fib_lookup_with_table(im, fib_index, next_hop); - adj = ip_get_adjacency(lm, adj_index); - - if (!adj || adj->lookup_next_index != IP_LOOKUP_NEXT_ARP) - return ~0; - - if (adj->arp.next_hop.ip6.as_u64[0] || - adj->arp.next_hop.ip6.as_u64[1]) - return adj_index; - - k.sw_if_index = adj->rewrite_header.sw_if_index; - k.ip6_address = *next_hop; - k.pad = 0; - if (mhash_get (&nm->neighbor_index_by_key, &k)) - return adj_index; - - pool_get (nm->neighbor_pool, n); - mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool, /* old value */ 0); - n->key = k; - n->cpu_time_last_updated = clib_cpu_time_now (); - n->flags = IP6_NEIGHBOR_FLAG_GLEAN; - - memset(&args, 0, sizeof(args)); - memcpy(&add_adj, adj, sizeof(add_adj)); - add_adj.arp.next_hop.ip6 = *next_hop; /* install neighbor /128 route */ - args.table_index_or_table_id = fib_index; - args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_ADD | IP6_ROUTE_FLAG_NEIGHBOR; - args.dst_address = *next_hop; - args.dst_address_length = 128; - args.adj_index = ~0; - args.add_adj = &add_adj; - args.n_add_adj = 1; - ip6_add_del_route (im, &args); - return ip6_fib_lookup_with_table (im, fib_index, next_hop); -} - #if DPDK > 0 static void ip6_neighbor_set_unset_rpc_callback ( ip6_neighbor_set_unset_rpc_args_t * a) @@ -728,7 +696,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, { vnet_main_t * vnm = vnet_get_main(); ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; uword n_packets = frame->n_vectors; u32 * from, * to_next; u32 n_left_from, n_left_to_next, next_index, n_advertisements_sent; @@ -787,17 +754,25 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, if (!ip6_sadd_unspecified && !ip6_sadd_link_local) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0); - /* Allow all realistic-looking rewrite adjacencies to pass */ - ni0 = adj0->lookup_next_index; - is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) && - (ni0 < IP6_LOOKUP_N_NEXT); + if (ADJ_INDEX_INVALID != src_adj_index0) + { + ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0); - error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0 - || ! is_rewrite0) - ? ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK - : error0); + /* Allow all realistic-looking rewrite adjacencies to pass */ + ni0 = adj0->lookup_next_index; + is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) && + (ni0 < IP6_LOOKUP_N_NEXT); + + error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0 + || ! is_rewrite0) + ? ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK + : error0); + } + else + { + error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK; + } } o0 = (void *) (h0 + 1); @@ -820,21 +795,28 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, if (is_solicitation && error0 == ICMP6_ERROR_NONE) { - /* Check that target address is one that we know about. */ - ip_interface_address_t * ia0; - ip6_address_fib_t ip6_af0; - void * oldheap; - - ip6_addr_fib_init (&ip6_af0, &h0->target_address, - vec_elt (im->fib_index_by_sw_if_index, - sw_if_index0)); - - /* Gross kludge, "thank you" MJ, don't even ask */ - oldheap = clib_mem_set_heap (clib_per_cpu_mheaps[0]); - ia0 = ip_get_interface_address (lm, &ip6_af0); - clib_mem_set_heap (oldheap); - error0 = ia0 == 0 ? - ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN : error0; + /* Check that target address is local to this router. */ + fib_node_index_t fei; + u32 fib_index; + + fib_index = ip6_fib_table_get_index_for_sw_if_index(sw_if_index0); + + if (~0 == fib_index) + { + error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN; + } + else + { + fei = ip6_fib_table_lookup_exact_match(fib_index, + &h0->target_address, + 128); + + if (FIB_NODE_INDEX_INVALID == fei || + !(FIB_ENTRY_FLAG_LOCAL & fib_entry_get_flags(fei))) + { + error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN; + } + } } if (is_solicitation) @@ -1052,13 +1034,20 @@ icmp6_router_solicitation(vlib_main_t * vm, if (!is_unspecified && !is_link_local) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0); - error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0 - || (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP - && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE)) - ? ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK - : error0); + if (ADJ_INDEX_INVALID != src_adj_index0) + { + ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, + src_adj_index0); + + error0 = (adj0->rewrite_header.sw_if_index != sw_if_index0 + ? ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK + : error0); + } + else + { + error0 = ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK; + } } /* check for source LL option and process */ @@ -1472,8 +1461,7 @@ icmp6_router_advertisement(vlib_main_t * vm, /* check for MTU or prefix options or .. */ u8 * opt_hdr = (u8 *)(h0 + 1); - while( options_len0 > 0 && - opt_hdr < p0->data + p0->current_data) + while( options_len0 > 0) { icmp6_neighbor_discovery_option_header_t *o0 = ( icmp6_neighbor_discovery_option_header_t *)opt_hdr; int opt_len = o0->n_data_u64s << 3; @@ -1606,11 +1594,9 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) { - ip6_main_t * im = &ip6_main; ip6_neighbor_main_t * nm = &ip6_neighbor_main; - ip_lookup_main_t * lm = &im->lookup_main; ip6_radv_t * a= 0; - u32 ri = ~0;; + u32 ri = ~0; vnet_sw_interface_t * sw_if0; ethernet_interface_t * eth_if0 = 0; @@ -1636,9 +1622,9 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, ip6_mldp_group_t *m; /* remove adjacencies */ - ip_del_adjacency (lm, a->all_nodes_adj_index); - ip_del_adjacency (lm, a->all_routers_adj_index); - ip_del_adjacency (lm, a->all_mldv2_routers_adj_index); + adj_unlock(a->all_nodes_adj_index); + adj_unlock(a->all_routers_adj_index); + adj_unlock(a->all_mldv2_routers_adj_index); /* clean up prefix_pool */ pool_foreach (p, a->adv_prefixes_pool, ({ @@ -1672,6 +1658,7 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, pool_put (nm->if_radv_pool, a); nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ~0; ri = ~0; + ip6_sw_interface_enable_disable(sw_if_index, 0); } } else @@ -1680,6 +1667,7 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, { vnet_hw_interface_t * hw_if0; + ip6_sw_interface_enable_disable(sw_if_index, 1); hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index); pool_get (nm->if_radv_pool, a); @@ -1702,10 +1690,11 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, a->min_delay_between_radv = MIN_DELAY_BETWEEN_RAS; a->max_delay_between_radv = MAX_DELAY_BETWEEN_RAS; a->max_rtr_default_lifetime = MAX_DEF_RTR_LIFETIME; - a->seed = (u32) (clib_cpu_time_now() & 0xFFFFFFFF); + a->seed = random_default_seed(); /* for generating random interface ids */ - a->randomizer = random_u64 (&a->seed); + a->randomizer = 0x1119194911191949; + a->randomizer = random_u64 ((u32 *)&a->randomizer); a->initial_adverts_count = MAX_INITIAL_RTR_ADVERTISEMENTS ; a->initial_adverts_sent = a->initial_adverts_count-1; @@ -1727,66 +1716,34 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, mhash_init (&a->address_to_mldp_index, sizeof (uword), sizeof (ip6_address_t)); { - ip_adjacency_t *adj; u8 link_layer_address[6] = {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_hosts}; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a->all_nodes_adj_index); - - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj->if_address_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - ip6_rewrite_node.index, - link_layer_address, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); + a->all_nodes_adj_index = adj_rewrite_add_and_lock(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + sw_if_index, + link_layer_address); } { - ip_adjacency_t *adj; u8 link_layer_address[6] = {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_routers}; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a->all_routers_adj_index); - - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj->if_address_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - ip6_rewrite_node.index, - link_layer_address, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); + a->all_routers_adj_index = adj_rewrite_add_and_lock(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + sw_if_index, + link_layer_address); } { - ip_adjacency_t *adj; u8 link_layer_address[6] = {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_mldv2_routers}; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a->all_mldv2_routers_adj_index); - - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj->if_address_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - ip6_rewrite_node.index, - link_layer_address, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); + a->all_mldv2_routers_adj_index = + adj_rewrite_add_and_lock(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + sw_if_index, + link_layer_address); } /* add multicast groups we will always be reporting */ @@ -2969,7 +2926,8 @@ enable_ip6_interface(vlib_main_t * vm, /* essentially "enables" ipv6 on this interface */ error = ip6_add_del_interface_address (vm, sw_if_index, - &link_local_address, 64 /* address width */, + &link_local_address, + 128 /* address width */, 0 /* is_del */); if(error) @@ -3255,87 +3213,10 @@ clib_error_t *ip6_set_neighbor_limit (u32 neighbor_limit) return 0; } - -static void -ip6_neighbor_entry_del_adj(ip6_neighbor_t *n, u32 adj_index) -{ - int done = 0; - int i; - while (!done) - { - vec_foreach_index(i, n->adjacencies) - if (vec_elt(n->adjacencies, i) == adj_index) - { - vec_del1(n->adjacencies, i); - continue; - } - done = 1; - } -} - -static void -ip6_neighbor_entry_add_adj(ip6_neighbor_t *n, u32 adj_index) -{ - int i; - vec_foreach_index(i, n->adjacencies) - if (vec_elt(n->adjacencies, i) == adj_index) - return; - vec_add1(n->adjacencies, adj_index); -} - -static void -ip6_neighbor_add_del_adj_cb (struct ip_lookup_main_t * lm, - u32 adj_index, - ip_adjacency_t * adj, - u32 is_del) -{ - ip6_neighbor_main_t * nm = &ip6_neighbor_main; - ip6_neighbor_key_t k; - ip6_neighbor_t *n = 0; - uword * p; - u32 ai; - - for(ai = adj->heap_handle; ai < adj->heap_handle + adj->n_adj ; ai++) - { - adj = ip_get_adjacency (lm, ai); - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - (adj->arp.next_hop.ip6.as_u64[0] || adj->arp.next_hop.ip6.as_u64[1])) - { - k.sw_if_index = adj->rewrite_header.sw_if_index; - k.ip6_address.as_u64[0] = adj->arp.next_hop.ip6.as_u64[0]; - k.ip6_address.as_u64[1] = adj->arp.next_hop.ip6.as_u64[1]; - k.pad = 0; - p = mhash_get (&nm->neighbor_index_by_key, &k); - if (p) - n = pool_elt_at_index (nm->neighbor_pool, p[0]); - } - else - continue; - - if (is_del) - { - if (!n) - clib_warning("Adjacency contains unknown ND next hop %U (del)", - format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP6); - else - ip6_neighbor_entry_del_adj(n, adj->heap_handle); - } - else /* add */ - { - if (!n) - clib_warning("Adjacency contains unknown ND next hop %U (add)", - format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP6); - else - ip6_neighbor_entry_add_adj(n, adj->heap_handle); - } - } -} - static clib_error_t * ip6_neighbor_init (vlib_main_t * vm) { ip6_neighbor_main_t * nm = &ip6_neighbor_main; ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; mhash_init (&nm->neighbor_index_by_key, /* value size */ sizeof (uword), @@ -3375,8 +3256,6 @@ static clib_error_t * ip6_neighbor_init (vlib_main_t * vm) (im->discover_neighbor_next_index_by_hw_if_index, 32, 0 /* drop */); #endif - ip_register_add_del_adjacency_callback(lm, ip6_neighbor_add_del_adj_cb); - return 0; } @@ -3593,5 +3472,3 @@ int vnet_ip6_nd_term (vlib_main_t * vm, return 0; } - - diff --git a/vnet/vnet/ip/ip6_packet.h b/vnet/vnet/ip/ip6_packet.h index c83e5764..29fa4a4e 100644 --- a/vnet/vnet/ip/ip6_packet.h +++ b/vnet/vnet/ip/ip6_packet.h @@ -70,6 +70,8 @@ typedef CLIB_PACKED (union { #define ip46_address_mask_ip4(ip46) ((ip46)->pad[0] = (ip46)->pad[1] = (ip46)->pad[2] = 0) #define ip46_address_set_ip4(ip46, ip) (ip46_address_mask_ip4(ip46), (ip46)->ip4 = (ip)[0]) #define ip46_address_reset(ip46) ((ip46)->as_u64[0] = (ip46)->as_u64[1] = 0) +#define ip46_address_cmp(ip46_1, ip46_2) (memcmp(ip46_1, ip46_2, sizeof(*ip46_1))) +#define ip46_address_is_zero(ip46) (((ip46)->as_u64[0] == 0) && ((ip46)->as_u64[1] == 0)) always_inline void ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address, @@ -303,6 +305,22 @@ ip6_next_header (ip6_header_t * i) { return (void *) (i + 1); } always_inline void +ip6_copy_header (ip6_header_t * dst, + const ip6_header_t *src) +{ + dst->ip_version_traffic_class_and_flow_label = + src->ip_version_traffic_class_and_flow_label; + dst->payload_length = src->payload_length; + dst->protocol = src->protocol; + dst->hop_limit = src->hop_limit; + + dst->src_address.as_uword[0] = src->src_address.as_uword[0]; + dst->src_address.as_uword[1] = src->src_address.as_uword[1]; + dst->dst_address.as_uword[0] = src->dst_address.as_uword[0]; + dst->dst_address.as_uword[1] = src->dst_address.as_uword[1]; +} + +always_inline void ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0) { { diff --git a/vnet/vnet/ip/ip_feature_registration.c b/vnet/vnet/ip/ip_feature_registration.c index 9505a09e..b96f81bd 100644 --- a/vnet/vnet/ip/ip_feature_registration.c +++ b/vnet/vnet/ip/ip_feature_registration.c @@ -15,6 +15,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <vnet/mpls/mpls.h> /** \file @@ -131,7 +132,7 @@ ip_feature_init_cast (vlib_main_t * vm, vnet_config_main_t * vcm, char **feature_start_nodes, int num_feature_start_nodes, - vnet_cast_t cast, int is_ip4) + vnet_cast_t cast, vnet_l3_packet_type_t proto) { uword *index_by_name; uword *reg_by_index; @@ -155,33 +156,43 @@ ip_feature_init_cast (vlib_main_t * vm, u8 **keys_to_delete = 0; ip4_main_t *im4 = &ip4_main; ip6_main_t *im6 = &ip6_main; + mpls_main_t *mm = &mpls_main; index_by_name = hash_create_string (0, sizeof (uword)); reg_by_index = hash_create (0, sizeof (uword)); if (cast == VNET_IP_RX_UNICAST_FEAT) { - if (is_ip4) + if (proto == VNET_L3_PACKET_TYPE_IP4) first_reg = im4->next_uc_feature; - else + else if (proto == VNET_L3_PACKET_TYPE_IP6) first_reg = im6->next_uc_feature; + else if (proto == VNET_L3_PACKET_TYPE_MPLS_UNICAST) + first_reg = mm->next_feature; + else + return clib_error_return (0, + "protocol %d cast %d unsupport for features", + proto, cast); } else if (cast == VNET_IP_RX_MULTICAST_FEAT) { - if (is_ip4) + if (proto == VNET_L3_PACKET_TYPE_IP4) first_reg = im4->next_mc_feature; - else + else if (proto == VNET_L3_PACKET_TYPE_IP6) first_reg = im6->next_mc_feature; + else + return clib_error_return (0, + "protocol %d cast %d unsupport for features", + proto, cast); } else if (cast == VNET_IP_TX_FEAT) { - if (is_ip4) + if (proto == VNET_L3_PACKET_TYPE_IP4) first_reg = im4->next_tx_feature; else first_reg = im6->next_tx_feature; } - this_reg = first_reg; /* pass 1, collect feature node names, construct a before b pairs */ @@ -281,8 +292,7 @@ again: /* see if we got a partial order... */ if (vec_len (result) != n_features) return clib_error_return - (0, "ip%s_feature_init_cast (cast=%d), no partial order!", - is_ip4 ? "4" : "6", cast); + (0, "%d feature_init_cast (cast=%d), no partial order!", proto, cast); /* * We win. @@ -308,10 +318,12 @@ again: feature_nodes, vec_len (feature_nodes)); /* Save a copy for show command */ - if (is_ip4) + if (proto == VNET_L3_PACKET_TYPE_IP4) im4->feature_nodes[cast] = feature_nodes; - else + else if (proto == VNET_L3_PACKET_TYPE_IP6) im6->feature_nodes[cast] = feature_nodes; + else if (proto == VNET_L3_PACKET_TYPE_MPLS_UNICAST) + mm->feature_nodes = feature_nodes; /* Finally, clean up all the shit we allocated */ /* *INDENT-OFF* */ diff --git a/vnet/vnet/ip/ip_feature_registration.h b/vnet/vnet/ip/ip_feature_registration.h index 2d9a15bc..95ee78ad 100644 --- a/vnet/vnet/ip/ip_feature_registration.h +++ b/vnet/vnet/ip/ip_feature_registration.h @@ -39,7 +39,8 @@ clib_error_t *ip_feature_init_cast (vlib_main_t * vm, vnet_config_main_t * vcm, char **feature_start_nodes, int num_feature_start_nodes, - vnet_cast_t cast, int is_ip4); + vnet_cast_t cast, + vnet_l3_packet_type_t proto); #endif /* included_ip_feature_registration_h */ diff --git a/vnet/vnet/ip/ip_source_and_port_range_check.h b/vnet/vnet/ip/ip_source_and_port_range_check.h index 5b49aabd..fefe5ff1 100644 --- a/vnet/vnet/ip/ip_source_and_port_range_check.h +++ b/vnet/vnet/ip/ip_source_and_port_range_check.h @@ -19,9 +19,6 @@ typedef struct { - u32 ranges_per_adjacency; - u32 special_adjacency_format_function_index; - /* convenience */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; @@ -60,6 +57,69 @@ typedef struct u16x8vec_t hi; } protocol_port_range_t; +/** + * @brief The number of supported ranges per-data path object. + * If more ranges are required, bump this number. + */ +#define N_PORT_RANGES_PER_DPO 64 +#define N_RANGES_PER_BLOCK (sizeof(u16x8vec_t)/2) +#define N_BLOCKS_PER_DPO (N_PORT_RANGES_PER_DPO/N_RANGES_PER_BLOCK) + +/** + * @brief + * The object that is in the data-path to perform the check. + * + * Some trade-offs here; memory vs performance. + * + * performance: + * the principle factor is d-cache line misses/hits. + * so we want the data layout to minimise the d-cache misses. This + * means not following dependent reads. i.e. not doing + * + * struct B { + * u16 n_ranges; + * range_t *ragnes; // vector of ranges. + * } + * + * so to read ranges[0] we would first d-cache miss on the address + * of the object of type B, for which we would need to wait before we + * can get the address of B->ranges. + * So this layout is better: + * + * struct B { + * u16 n_ranges; + * range_t ragnes[N]; + * } + * + * memory: + * the latter layout above is more memory hungry. And N needs to be: + * 1 - sized for the maximum required + * 2 - fixed, so that objects of type B can be pool allocated and so + * 'get'-able using an index. + * An option over fixed might be to allocate contiguous chunk from + * the pool (like we used to do for multi-path adjs). + */ +typedef struct protocol_port_range_dpo_t_ +{ + /** + * The number of blocks from the 'block' array below + * that have rnages configured. We keep this count so that in the data-path + * we can limit the loop to be only over the blocks we need + */ + u16 n_used_blocks; + + /** + * The total number of free ranges from all blocks. + * Used to prevent overrun of the ranges available. + */ + u16 n_free_ranges; + + /** + * the fixed size array of ranges + */ + protocol_port_range_t blocks[N_BLOCKS_PER_DPO]; +} protocol_port_range_dpo_t; + int ip4_source_and_port_range_check_add_del (ip4_address_t * address, u32 length, u32 vrf_id, diff --git a/vnet/vnet/ip/lookup.c b/vnet/vnet/ip/lookup.c index 47138071..a695ef76 100644 --- a/vnet/vnet/ip/lookup.c +++ b/vnet/vnet/ip/lookup.c @@ -37,728 +37,16 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include <vppinfra/math.h> /* for fabs */ #include <vnet/ip/ip.h> -#include <vnet/ip/adj_alloc.h> - -static void -ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index); - -always_inline void -ip_poison_adjacencies (ip_adjacency_t * adj, uword n_adj) -{ - if (CLIB_DEBUG > 0) - { - u32 save_handle = adj->heap_handle;; - u32 save_n_adj = adj->n_adj; - - memset (adj, 0xfe, n_adj * sizeof (adj[0])); - - adj->heap_handle = save_handle; - adj->n_adj = save_n_adj; - } -} - -static void -ip_share_adjacency(ip_lookup_main_t * lm, u32 adj_index) -{ - ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index); - uword * p; - u32 old_ai; - uword signature = vnet_ip_adjacency_signature (adj); - - p = hash_get (lm->adj_index_by_signature, signature); - /* Hash collision? */ - if (p) - { - /* Save the adj index, p[0] will be toast after the unset! */ - old_ai = p[0]; - hash_unset (lm->adj_index_by_signature, signature); - hash_set (lm->adj_index_by_signature, signature, adj_index); - adj->next_adj_with_signature = old_ai; - } - else - { - adj->next_adj_with_signature = 0; - hash_set (lm->adj_index_by_signature, signature, adj_index); - } -} - -static void -ip_unshare_adjacency(ip_lookup_main_t * lm, u32 adj_index) -{ - ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index); - uword signature; - uword * p; - u32 this_ai; - ip_adjacency_t * this_adj, * prev_adj = 0; - - signature = vnet_ip_adjacency_signature (adj); - p = hash_get (lm->adj_index_by_signature, signature); - if (p == 0) - return; - - this_ai = p[0]; - /* At the top of the signature chain (likely)? */ - if (this_ai == adj_index) - { - if (adj->next_adj_with_signature == 0) - { - hash_unset (lm->adj_index_by_signature, signature); - return; - } - else - { - this_adj = ip_get_adjacency (lm, adj->next_adj_with_signature); - hash_unset (lm->adj_index_by_signature, signature); - hash_set (lm->adj_index_by_signature, signature, - this_adj->heap_handle); - } - } - else /* walk signature chain */ - { - this_adj = ip_get_adjacency (lm, this_ai); - while (this_adj != adj) - { - prev_adj = this_adj; - this_adj = ip_get_adjacency - (lm, this_adj->next_adj_with_signature); - /* - * This can happen when creating the first multipath adj of a set - * We end up looking at the miss adjacency (handle==0). - */ - if (this_adj->heap_handle == 0) - return; - } - prev_adj->next_adj_with_signature = this_adj->next_adj_with_signature; - } -} - -int ip_register_adjacency(vlib_main_t *vm, - u8 is_ip4, - ip_adj_register_t *reg) -{ - ip_lookup_main_t *lm = (is_ip4)?&ip4_main.lookup_main:&ip6_main.lookup_main; - vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) ((is_ip4)?"ip4-lookup":"ip6-lookup")); - vlib_node_t *next_node = vlib_get_node_by_name(vm, (u8 *) reg->node_name); - *reg->next_index = vlib_node_add_next (vm, node->index, next_node->index); - vec_validate(lm->registered_adjacencies, *reg->next_index); - lm->registered_adjacencies[*reg->next_index] = *reg; - return 0; -} - -int ip_init_registered_adjacencies(u8 is_ip4) -{ - vlib_main_t *vm = vlib_get_main(); - ip_lookup_main_t *lm = (is_ip4)?&ip4_main.lookup_main:&ip6_main.lookup_main; - ip_adj_register_t *reg = lm->registered_adjacencies; - lm->registered_adjacencies = 0; //Init vector - int rv; - while (reg) { - if((rv = ip_register_adjacency(vm, is_ip4, reg))) - return rv; - reg = reg->next; - } - return 0; -} - -/* Create new block of given number of contiguous adjacencies. */ -ip_adjacency_t * -ip_add_adjacency (ip_lookup_main_t * lm, - ip_adjacency_t * copy_adj, - u32 n_adj, - u32 * adj_index_return) -{ - ip_adjacency_t * adj; - u32 ai, i, handle; - - /* See if we know enough to attempt to share an existing adjacency */ - if (copy_adj && n_adj == 1) - { - uword signature; - uword * p; - - switch (copy_adj->lookup_next_index) - { - case IP_LOOKUP_NEXT_DROP: - if (lm->drop_adj_index) - { - adj = ip_get_adjacency (lm, lm->drop_adj_index); - *adj_index_return = lm->drop_adj_index; - return (adj); - } - break; - - case IP_LOOKUP_NEXT_LOCAL: - if (lm->local_adj_index) - { - adj = ip_get_adjacency (lm, lm->local_adj_index); - *adj_index_return = lm->local_adj_index; - return (adj); - } - default: - break; - } - - signature = vnet_ip_adjacency_signature (copy_adj); - p = hash_get (lm->adj_index_by_signature, signature); - if (p) - { - adj = vec_elt_at_index (lm->adjacency_heap, p[0]); - while (1) - { - if (vnet_ip_adjacency_share_compare (adj, copy_adj)) - { - adj->share_count++; - *adj_index_return = p[0]; - return adj; - } - if (adj->next_adj_with_signature == 0) - break; - adj = vec_elt_at_index (lm->adjacency_heap, - adj->next_adj_with_signature); - } - } - } - - lm->adjacency_heap = aa_alloc (lm->adjacency_heap, &adj, n_adj); - handle = ai = adj->heap_handle; - - ip_poison_adjacencies (adj, n_adj); - - /* Validate adjacency counters. */ - vlib_validate_combined_counter (&lm->adjacency_counters, ai + n_adj - 1); - - for (i = 0; i < n_adj; i++) - { - /* Make sure certain fields are always initialized. */ - adj[i].rewrite_header.sw_if_index = ~0; - adj[i].explicit_fib_index = ~0; - adj[i].mcast_group_index = ~0; - adj[i].classify.table_index = ~0; - adj[i].saved_lookup_next_index = 0; - adj[i].special_adjacency_format_function_index = 0; - - if (copy_adj) - adj[i] = copy_adj[i]; - - adj[i].heap_handle = handle; - adj[i].n_adj = n_adj; - adj[i].share_count = 0; - adj[i].next_adj_with_signature = 0; - - /* Zero possibly stale counters for re-used adjacencies. */ - vlib_zero_combined_counter (&lm->adjacency_counters, ai + i); - } - - /* Set up to share the adj later */ - if (copy_adj && n_adj == 1) - ip_share_adjacency(lm, ai); - - *adj_index_return = ai; - return adj; -} - -void -ip_update_adjacency (ip_lookup_main_t * lm, - u32 adj_index, - ip_adjacency_t * copy_adj) -{ - ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index); - - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1); - ip_unshare_adjacency(lm, adj_index); - - /* temporary redirect to drop while updating rewrite data */ - adj->lookup_next_index = IP_LOOKUP_NEXT_ARP; - CLIB_MEMORY_BARRIER(); - - clib_memcpy (&adj->rewrite_header, ©_adj->rewrite_header, - VLIB_BUFFER_PRE_DATA_SIZE); - adj->lookup_next_index = copy_adj->lookup_next_index; - ip_share_adjacency(lm, adj_index); - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); -} - -static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_multipath_adjacency) -{ - ip_adjacency_t * adj; - - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1); - - adj = ip_get_adjacency (lm, adj_index); - - /* Special-case miss, local, drop adjs */ - if (adj_index < 3) - return; - - if (adj->n_adj == 1) - { - if (adj->share_count > 0) - { - adj->share_count --; - return; - } - - ip_unshare_adjacency(lm, adj_index); - } - - if (delete_multipath_adjacency) - ip_multipath_del_adjacency (lm, adj_index); - - ip_poison_adjacencies (adj, adj->n_adj); - - aa_free (lm->adjacency_heap, adj); -} - -void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index) -{ ip_del_adjacency2 (lm, adj_index, /* delete_multipath_adjacency */ 1); } - -static int -next_hop_sort_by_weight (ip_multipath_next_hop_t * n1, - ip_multipath_next_hop_t * n2) -{ - int cmp = (int) n1->weight - (int) n2->weight; - return (cmp == 0 - ? (int) n1->next_hop_adj_index - (int) n2->next_hop_adj_index - : (cmp > 0 ? +1 : -1)); -} - -/* Given next hop vector is over-written with normalized one with sorted weights and - with weights corresponding to the number of adjacencies for each next hop. - Returns number of adjacencies in block. */ -static u32 ip_multipath_normalize_next_hops (ip_lookup_main_t * lm, - ip_multipath_next_hop_t * raw_next_hops, - ip_multipath_next_hop_t ** normalized_next_hops) -{ - ip_multipath_next_hop_t * nhs; - uword n_nhs, n_adj, n_adj_left, i; - f64 sum_weight, norm, error; - - n_nhs = vec_len (raw_next_hops); - ASSERT (n_nhs > 0); - if (n_nhs == 0) - return 0; - - /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */ - nhs = *normalized_next_hops; - vec_validate (nhs, 2*n_nhs - 1); - - /* Fast path: 1 next hop in block. */ - n_adj = n_nhs; - if (n_nhs == 1) - { - nhs[0] = raw_next_hops[0]; - nhs[0].weight = 1; - _vec_len (nhs) = 1; - goto done; - } - - else if (n_nhs == 2) - { - int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0; - - /* Fast sort. */ - nhs[0] = raw_next_hops[cmp]; - nhs[1] = raw_next_hops[cmp ^ 1]; - - /* Fast path: equal cost multipath with 2 next hops. */ - if (nhs[0].weight == nhs[1].weight) - { - nhs[0].weight = nhs[1].weight = 1; - _vec_len (nhs) = 2; - goto done; - } - } - else - { - clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0])); - qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight); - } - - /* Find total weight to normalize weights. */ - sum_weight = 0; - for (i = 0; i < n_nhs; i++) - sum_weight += nhs[i].weight; - - /* In the unlikely case that all weights are given as 0, set them all to 1. */ - if (sum_weight == 0) - { - for (i = 0; i < n_nhs; i++) - nhs[i].weight = 1; - sum_weight = n_nhs; - } - - /* Save copies of all next hop weights to avoid being overwritten in loop below. */ - for (i = 0; i < n_nhs; i++) - nhs[n_nhs + i].weight = nhs[i].weight; - - /* Try larger and larger power of 2 sized adjacency blocks until we - find one where traffic flows to within 1% of specified weights. */ - for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2) - { - error = 0; - - norm = n_adj / sum_weight; - n_adj_left = n_adj; - for (i = 0; i < n_nhs; i++) - { - f64 nf = nhs[n_nhs + i].weight * norm; /* use saved weights */ - word n = flt_round_nearest (nf); - - n = n > n_adj_left ? n_adj_left : n; - n_adj_left -= n; - error += fabs (nf - n); - nhs[i].weight = n; - } - - nhs[0].weight += n_adj_left; - - /* Less than 5% average error per adjacency with this size adjacency block? */ - if (error <= lm->multipath_next_hop_error_tolerance*n_adj) - { - /* Truncate any next hops with zero weight. */ - _vec_len (nhs) = i; - break; - } - } - - done: - /* Save vector for next call. */ - *normalized_next_hops = nhs; - return n_adj; -} - -always_inline uword -ip_next_hop_hash_key_from_handle (uword handle) -{ return 1 + 2*handle; } - -always_inline uword -ip_next_hop_hash_key_is_heap_handle (uword k) -{ return k & 1; } - -always_inline uword -ip_next_hop_hash_key_get_heap_handle (uword k) -{ - ASSERT (ip_next_hop_hash_key_is_heap_handle (k)); - return k / 2; -} - -static u32 -ip_multipath_adjacency_get (ip_lookup_main_t * lm, - ip_multipath_next_hop_t * raw_next_hops, - uword create_if_non_existent) -{ - uword * p; - u32 i, j, n_adj, adj_index, adj_heap_handle; - ip_adjacency_t * adj, * copy_adj; - ip_multipath_next_hop_t * nh, * nhs; - ip_multipath_adjacency_t * madj; - - n_adj = ip_multipath_normalize_next_hops (lm, raw_next_hops, &lm->next_hop_hash_lookup_key_normalized); - nhs = lm->next_hop_hash_lookup_key_normalized; - - /* Basic sanity. */ - ASSERT (n_adj >= vec_len (raw_next_hops)); - - /* Use normalized next hops to see if we've seen a block equivalent to this one before. */ - p = hash_get_mem (lm->multipath_adjacency_by_next_hops, nhs); - if (p) - return p[0]; - - if (! create_if_non_existent) - return 0; - - adj = ip_add_adjacency (lm, /* copy_adj */ 0, n_adj, &adj_index); - adj_heap_handle = adj[0].heap_handle; - - /* Fill in adjacencies in block based on corresponding next hop adjacencies. */ - i = 0; - vec_foreach (nh, nhs) - { - copy_adj = ip_get_adjacency (lm, nh->next_hop_adj_index); - for (j = 0; j < nh->weight; j++) - { - adj[i] = copy_adj[0]; - adj[i].heap_handle = adj_heap_handle; - adj[i].n_adj = n_adj; - i++; - } - } - - /* All adjacencies should have been initialized. */ - ASSERT (i == n_adj); - - vec_validate (lm->multipath_adjacencies, adj_heap_handle); - madj = vec_elt_at_index (lm->multipath_adjacencies, adj_heap_handle); - - madj->adj_index = adj_index; - madj->n_adj_in_block = n_adj; - madj->reference_count = 0; /* caller will set to one. */ - - madj->normalized_next_hops.count = vec_len (nhs); - madj->normalized_next_hops.heap_offset - = heap_alloc (lm->next_hop_heap, vec_len (nhs), - madj->normalized_next_hops.heap_handle); - clib_memcpy (lm->next_hop_heap + madj->normalized_next_hops.heap_offset, - nhs, vec_bytes (nhs)); - - hash_set (lm->multipath_adjacency_by_next_hops, - ip_next_hop_hash_key_from_handle (madj->normalized_next_hops.heap_handle), - madj - lm->multipath_adjacencies); - - madj->unnormalized_next_hops.count = vec_len (raw_next_hops); - madj->unnormalized_next_hops.heap_offset - = heap_alloc (lm->next_hop_heap, vec_len (raw_next_hops), - madj->unnormalized_next_hops.heap_handle); - clib_memcpy (lm->next_hop_heap + madj->unnormalized_next_hops.heap_offset, - raw_next_hops, vec_bytes (raw_next_hops)); - - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); - - return adj_heap_handle; -} - -/* Returns 0 for next hop not found. */ -u32 -ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm, - u32 is_del, - u32 old_mp_adj_index, - u32 next_hop_adj_index, - u32 next_hop_weight, - u32 * new_mp_adj_index) -{ - ip_multipath_adjacency_t * mp_old, * mp_new; - ip_multipath_next_hop_t * nh, * nhs, * hash_nhs; - u32 n_nhs, i_nh; - - mp_new = mp_old = 0; - n_nhs = 0; - i_nh = 0; - nhs = 0; - - /* If old adj is not multipath, we need to "convert" it by calling this - * function recursively */ - if (old_mp_adj_index != ~0 && !ip_adjacency_is_multipath(lm, old_mp_adj_index)) - { - ip_multipath_adjacency_add_del_next_hop(lm, /* is_del */ 0, - /* old_mp_adj_index */ ~0, - /* nh_adj_index */ old_mp_adj_index, - /* weight * */ 1, - &old_mp_adj_index); - } - - /* If old multipath adjacency is valid, find requested next hop. */ - if (old_mp_adj_index < vec_len (lm->multipath_adjacencies) - && lm->multipath_adjacencies[old_mp_adj_index].normalized_next_hops.count > 0) - { - mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); - - nhs = vec_elt_at_index (lm->next_hop_heap, mp_old->unnormalized_next_hops.heap_offset); - n_nhs = mp_old->unnormalized_next_hops.count; - - /* Linear search: ok since n_next_hops is small. */ - for (i_nh = 0; i_nh < n_nhs; i_nh++) - if (nhs[i_nh].next_hop_adj_index == next_hop_adj_index) - break; - - /* Given next hop not found. */ - if (i_nh >= n_nhs && is_del) - return 0; - } - - hash_nhs = lm->next_hop_hash_lookup_key; - if (hash_nhs) - _vec_len (hash_nhs) = 0; - - if (is_del) - { - if (n_nhs > 1) - { - /* Prepare lookup key for multipath with target next hop deleted. */ - if (i_nh > 0) - vec_add (hash_nhs, nhs + 0, i_nh); - if (i_nh + 1 < n_nhs) - vec_add (hash_nhs, nhs + i_nh + 1, n_nhs - (i_nh + 1)); - } - } - else /* it's an add. */ - { - /* If next hop is already there with the same weight, we have nothing to do. */ - if (i_nh < n_nhs && nhs[i_nh].weight == next_hop_weight) - { - new_mp_adj_index[0] = ~0; - goto done; - } - - /* Copy old next hops to lookup key vector. */ - if (n_nhs > 0) - vec_add (hash_nhs, nhs, n_nhs); - - if (i_nh < n_nhs) - { - /* Change weight of existing next hop. */ - nh = vec_elt_at_index (hash_nhs, i_nh); - } - else - { - /* Add a new next hop. */ - vec_add2 (hash_nhs, nh, 1); - nh->next_hop_adj_index = next_hop_adj_index; - } - - /* Set weight for added or old next hop. */ - nh->weight = next_hop_weight; - } - - if (vec_len (hash_nhs) > 0) - { - u32 tmp = ip_multipath_adjacency_get (lm, hash_nhs, - /* create_if_non_existent */ 1); - if (tmp != ~0) - mp_new = vec_elt_at_index (lm->multipath_adjacencies, tmp); - - /* Fetch again since pool may have moved. */ - if (mp_old) - mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); - } - - new_mp_adj_index[0] = mp_new ? mp_new - lm->multipath_adjacencies : ~0; - - if (mp_new != mp_old) - { - if (mp_old) - { - ASSERT (mp_old->reference_count > 0); - mp_old->reference_count -= 1; - } - if (mp_new) - mp_new->reference_count += 1; - } - - if (mp_old && mp_old->reference_count == 0) - ip_multipath_adjacency_free (lm, mp_old); - - done: - /* Save key vector next call. */ - lm->next_hop_hash_lookup_key = hash_nhs; - - return 1; -} - -static void -ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index) -{ - ip_adjacency_t * adj = ip_get_adjacency (lm, del_adj_index); - ip_multipath_adjacency_t * madj, * new_madj; - ip_multipath_next_hop_t * nhs, * hash_nhs; - u32 i, n_nhs, madj_index, new_madj_index; - - if (adj->heap_handle >= vec_len (lm->multipath_adjacencies)) - return; - - vec_validate (lm->adjacency_remap_table, vec_len (lm->adjacency_heap) - 1); - - for (madj_index = 0; madj_index < vec_len (lm->multipath_adjacencies); madj_index++) - { - madj = vec_elt_at_index (lm->multipath_adjacencies, madj_index); - if (madj->n_adj_in_block == 0) - continue; - - nhs = heap_elt_at_index (lm->next_hop_heap, madj->unnormalized_next_hops.heap_offset); - n_nhs = madj->unnormalized_next_hops.count; - for (i = 0; i < n_nhs; i++) - if (nhs[i].next_hop_adj_index == del_adj_index) - break; - - /* del_adj_index not found in unnormalized_next_hops? We're done. */ - if (i >= n_nhs) - continue; - - new_madj = 0; - if (n_nhs > 1) - { - hash_nhs = lm->next_hop_hash_lookup_key; - if (hash_nhs) - _vec_len (hash_nhs) = 0; - if (i > 0) - vec_add (hash_nhs, nhs + 0, i); - if (i + 1 < n_nhs) - vec_add (hash_nhs, nhs + i + 1, n_nhs - (i + 1)); - - new_madj_index = ip_multipath_adjacency_get (lm, hash_nhs, /* create_if_non_existent */ 1); - - lm->next_hop_hash_lookup_key = hash_nhs; - - if (new_madj_index == madj_index) - continue; - - new_madj = vec_elt_at_index (lm->multipath_adjacencies, new_madj_index); - } - - lm->adjacency_remap_table[madj->adj_index] = new_madj ? 1 + new_madj->adj_index : ~0; - lm->n_adjacency_remaps += 1; - ip_multipath_adjacency_free (lm, madj); - } -} - -void -ip_multipath_adjacency_free (ip_lookup_main_t * lm, - ip_multipath_adjacency_t * a) -{ - hash_unset (lm->multipath_adjacency_by_next_hops, - ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle)); - heap_dealloc (lm->next_hop_heap, a->normalized_next_hops.heap_handle); - heap_dealloc (lm->next_hop_heap, a->unnormalized_next_hops.heap_handle); - - ip_del_adjacency2 (lm, a->adj_index, a->reference_count == 0); - memset (a, 0, sizeof (a[0])); -} - -always_inline ip_multipath_next_hop_t * -ip_next_hop_hash_key_get_next_hops (ip_lookup_main_t * lm, uword k, - uword * n_next_hops) -{ - ip_multipath_next_hop_t * nhs; - uword n_nhs; - if (ip_next_hop_hash_key_is_heap_handle (k)) - { - uword handle = ip_next_hop_hash_key_get_heap_handle (k); - nhs = heap_elt_with_handle (lm->next_hop_heap, handle); - n_nhs = heap_len (lm->next_hop_heap, handle); - } - else - { - nhs = uword_to_pointer (k, ip_multipath_next_hop_t *); - n_nhs = vec_len (nhs); - } - *n_next_hops = n_nhs; - return nhs; -} - -static uword -ip_next_hop_hash_key_sum (hash_t * h, uword key0) -{ - ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *); - ip_multipath_next_hop_t * k0; - uword n0; - - k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0); - return hash_memory (k0, n0 * sizeof (k0[0]), /* seed */ n0); -} - -static uword -ip_next_hop_hash_key_equal (hash_t * h, uword key0, uword key1) -{ - ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *); - ip_multipath_next_hop_t * k0, * k1; - uword n0, n1; - - k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0); - k1 = ip_next_hop_hash_key_get_next_hops (lm, key1, &n1); - - return n0 == n1 && ! memcmp (k0, k1, n0 * sizeof (k0[0])); -} +#include <vnet/adj/adj_alloc.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/mpls/mpls.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/classify_dpo.h> +#include <vnet/dpo/punt_dpo.h> +#include <vnet/dpo/receive_dpo.h> clib_error_t * ip_interface_address_add_del (ip_lookup_main_t * lm, @@ -869,52 +157,16 @@ ip_interface_address_add_del (ip_lookup_main_t * lm, void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) { - ip_adjacency_t * adj; - ip_adjacency_t template_adj; - /* ensure that adjacency is cacheline aligned and sized */ ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0); ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) == CLIB_CACHE_LINE_BYTES); - lm->adj_index_by_signature = hash_create (0, sizeof (uword)); - memset (&template_adj, 0, sizeof (template_adj)); - /* Preallocate three "special" adjacencies */ - lm->adjacency_heap = aa_bootstrap (0, 3 /* n=1 free items */); - - /* Hand-craft special miss adjacency to use when nothing matches in the - routing table. Same for drop adjacency. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, - &lm->miss_adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_MISS; - ASSERT (lm->miss_adj_index == IP_LOOKUP_MISS_ADJ_INDEX); - - /* Make the "drop" adj sharable */ - template_adj.lookup_next_index = IP_LOOKUP_NEXT_DROP; - adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, - &lm->drop_adj_index); - - /* Make the "local" adj sharable */ - template_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - template_adj.if_address_index = ~0; - adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, - &lm->local_adj_index); + lm->adjacency_heap = adj_heap; if (! lm->fib_result_n_bytes) lm->fib_result_n_bytes = sizeof (uword); - lm->multipath_adjacency_by_next_hops - = hash_create2 (/* elts */ 0, - /* user */ pointer_to_uword (lm), - /* value_bytes */ sizeof (uword), - ip_next_hop_hash_key_sum, - ip_next_hop_hash_key_equal, - /* format pair/arg */ - 0, 0); - - /* 1% max error tolerance for multipath. */ - lm->multipath_next_hop_error_tolerance = .01; - lm->is_ip6 = is_ip6; if (is_ip6) { @@ -944,14 +196,12 @@ void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] = IP_BUILTIN_PROTOCOL_UDP; lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_BUILTIN_PROTOCOL_ICMP; } - - ip_init_registered_adjacencies(!is_ip6); } u8 * format_ip_flow_hash_config (u8 * s, va_list * args) { - u32 flow_hash_config = va_arg (*args, u32); - + flow_hash_config_t flow_hash_config = va_arg (*args, u32); + #define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n); foreach_flow_hash_bit; #undef _ @@ -961,31 +211,20 @@ u8 * format_ip_flow_hash_config (u8 * s, va_list * args) u8 * format_ip_lookup_next (u8 * s, va_list * args) { - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); - ip_lookup_next_t n = va_arg (*args, u32); - ip_adj_register_t *reg; - + ip_lookup_next_t n = va_arg (*args, ip_lookup_next_t); char * t = 0; switch (n) { default: - vec_validate(lm->registered_adjacencies, n); - reg = vec_elt_at_index(lm->registered_adjacencies, n); - if (reg->node_name) { - s = format (s, "%s:", reg->node_name); - } + s = format (s, "unknown %d", n); return s; - case IP_LOOKUP_NEXT_MISS: t = "miss"; break; case IP_LOOKUP_NEXT_DROP: t = "drop"; break; case IP_LOOKUP_NEXT_PUNT: t = "punt"; break; - case IP_LOOKUP_NEXT_LOCAL: t = "local"; break; case IP_LOOKUP_NEXT_ARP: t = "arp"; break; - case IP_LOOKUP_NEXT_CLASSIFY: t = "classify"; break; - case IP_LOOKUP_NEXT_MAP: t = "map"; break; - case IP_LOOKUP_NEXT_MAP_T: t = "map-t"; break; - case IP_LOOKUP_NEXT_INDIRECT: t="indirect"; break; + case IP_LOOKUP_NEXT_MIDCHAIN: t="midchain"; break; + case IP_LOOKUP_NEXT_GLEAN: t="glean"; break; case IP_LOOKUP_NEXT_REWRITE: break; } @@ -996,120 +235,13 @@ u8 * format_ip_lookup_next (u8 * s, va_list * args) return s; } -static u8 * format_ip_interface_address (u8 * s, va_list * args) -{ - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); - u32 if_address_index = va_arg (*args, u32); - ip_interface_address_t * ia = pool_elt_at_index (lm->if_address_pool, if_address_index); - void * a = ip_interface_address_get_address (lm, ia); - - if (lm->is_ip6) - return format (s, "%U", format_ip6_address_and_length, a, ia->address_length); - else - return format (s, "%U", format_ip4_address_and_length, a, ia->address_length); -} - -u32 vnet_register_special_adjacency_format_function -(ip_lookup_main_t * lm, format_function_t * fp) -{ - u32 rv; - /* - * Initialize the format function registration vector - * Index 0 must be invalid, to avoid finding and fixing trivial bugs - * all over the place - */ - if (vec_len (lm->special_adjacency_format_functions) == 0) - { - vec_add1 (lm->special_adjacency_format_functions, - (format_function_t *) 0); - } - - rv = vec_len (lm->special_adjacency_format_functions); - vec_add1 (lm->special_adjacency_format_functions, fp); - return rv; -} - -/** @brief Pretty print helper function for formatting specific adjacencies. - @param s - input string to format - @param args - other args passed to format function such as: - - vnet_main_t - - ip_lookup_main_t - - adj_index -*/ -u8 * format_ip_adjacency (u8 * s, va_list * args) -{ - vnet_main_t * vnm = va_arg (*args, vnet_main_t *); - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); - u32 adj_index = va_arg (*args, u32); - ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); - ip_adj_register_t *reg; - - if (adj->lookup_next_index < vec_len (lm->registered_adjacencies)) - { - reg = vec_elt_at_index(lm->registered_adjacencies, - adj->lookup_next_index); - if (reg->fn) - { - s = format(s, " %U", reg->fn, lm, adj); - goto format_done; - } - } - - switch (adj->lookup_next_index) - { - case IP_LOOKUP_NEXT_REWRITE: - s = format (s, "%U", - format_vnet_rewrite, - vnm->vlib_main, &adj->rewrite_header, - sizeof (adj->rewrite_data)); - break; - - case IP_LOOKUP_NEXT_ARP: - if (adj->if_address_index != ~0) - s = format (s, " %U", format_ip_interface_address, lm, - adj->if_address_index); - if (adj->arp.next_hop.ip6.as_u64[0] || adj->arp.next_hop.ip6.as_u64[1]) - s = format (s, " via %U", format_ip46_address, - &adj->arp.next_hop, IP46_TYPE_ANY); - break; - case IP_LOOKUP_NEXT_LOCAL: - if (adj->if_address_index != ~0) - s = format (s, " %U", format_ip_interface_address, lm, - adj->if_address_index); - break; - - case IP_LOOKUP_NEXT_CLASSIFY: - s = format (s, " table %d", adj->classify.table_index); - break; - case IP_LOOKUP_NEXT_INDIRECT: - s = format (s, " via %U", format_ip46_address, - &adj->indirect.next_hop, IP46_TYPE_ANY); - break; - - default: - s = format (s, " unknown %d", adj->lookup_next_index); - break; - } - - format_done: - if (adj->explicit_fib_index != ~0 && adj->explicit_fib_index != 0) - s = format (s, " lookup fib index %d", adj->explicit_fib_index); - if (adj->share_count > 0) - s = format (s, " shared %d", adj->share_count + 1); - if (adj->next_adj_with_signature) - s = format (s, " next_adj_with_signature %d", adj->next_adj_with_signature); - - return s; -} - u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args) { vnet_main_t * vnm = va_arg (*args, vnet_main_t *); - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); u32 adj_index = va_arg (*args, u32); u8 * packet_data = va_arg (*args, u8 *); u32 n_packet_data_bytes = va_arg (*args, u32); - ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); + ip_adjacency_t * adj = adj_get(adj_index); switch (adj->lookup_next_index) { @@ -1126,119 +258,90 @@ u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args) return s; } -static uword unformat_ip_lookup_next (unformat_input_t * input, va_list * args) +static uword unformat_dpo (unformat_input_t * input, va_list * args) { - ip_lookup_next_t * result = va_arg (*args, ip_lookup_next_t *); - ip_lookup_next_t n; + dpo_id_t *dpo = va_arg (*args, dpo_id_t *); + fib_protocol_t fp = va_arg (*args, int); + dpo_proto_t proto; - if (unformat (input, "drop")) - n = IP_LOOKUP_NEXT_DROP; + proto = fib_proto_to_dpo(fp); + if (unformat (input, "drop")) + dpo_copy(dpo, drop_dpo_get(proto)); else if (unformat (input, "punt")) - n = IP_LOOKUP_NEXT_PUNT; - + dpo_copy(dpo, punt_dpo_get(proto)); else if (unformat (input, "local")) - n = IP_LOOKUP_NEXT_LOCAL; - - else if (unformat (input, "arp")) - n = IP_LOOKUP_NEXT_ARP; - + receive_dpo_add_or_lock(proto, ~0, NULL, dpo); else if (unformat (input, "classify")) - n = IP_LOOKUP_NEXT_CLASSIFY; + { + u32 classify_table_index; + + if (!unformat (input, "%d", &classify_table_index)) + { + clib_warning ("classify adj must specify table index"); + return 0; + } + dpo_set(dpo, DPO_CLASSIFY, proto, + classify_dpo_create(fp, classify_table_index)); + } else return 0; - - *result = n; + return 1; } -static uword unformat_ip_adjacency (unformat_input_t * input, va_list * args) -{ - vlib_main_t * vm = va_arg (*args, vlib_main_t *); - ip_adjacency_t * adj = va_arg (*args, ip_adjacency_t *); - u32 node_index = va_arg (*args, u32); - vnet_main_t * vnm = vnet_get_main(); - u32 sw_if_index, is_ip6; - ip46_address_t a46; - ip_lookup_next_t next; +const ip46_address_t zero_addr = { + .as_u64 = { + 0, 0 + }, +}; - is_ip6 = node_index == ip6_rewrite_node.index; - adj->rewrite_header.node_index = node_index; - adj->explicit_fib_index = ~0; +u32 +fib_table_id_find_fib_index (fib_protocol_t proto, + u32 table_id) +{ + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + uword * p; - if (unformat (input, "arp %U %U", - unformat_vnet_sw_interface, vnm, &sw_if_index, - unformat_ip46_address, &a46, is_ip6?IP46_TYPE_IP6:IP46_TYPE_IP4)) + switch (proto) { - ip_lookup_main_t * lm = is_ip6 ? &ip6_main.lookup_main : &ip4_main.lookup_main; - ip_adjacency_t * a_adj; - u32 adj_index; - - if (is_ip6) - adj_index = ip6_fib_lookup (&ip6_main, sw_if_index, &a46.ip6); - else - adj_index = ip4_fib_lookup (&ip4_main, sw_if_index, &a46.ip4); - - a_adj = ip_get_adjacency (lm, adj_index); - - if (a_adj->rewrite_header.sw_if_index != sw_if_index) - return 0; - - if (is_ip6) - ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index); - else - ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index); + case FIB_PROTOCOL_IP4: + p = hash_get(im4->fib_index_by_table_id, table_id); + break; + case FIB_PROTOCOL_IP6: + p = hash_get(im6->fib_index_by_table_id, table_id); + break; + default: + p = NULL; + break; } - - else if (unformat_user (input, unformat_ip_lookup_next, &next)) + if (NULL != p) { - adj->lookup_next_index = next; - adj->if_address_index = ~0; - if (next == IP_LOOKUP_NEXT_LOCAL) - (void) unformat (input, "%d", &adj->if_address_index); - else if (next == IP_LOOKUP_NEXT_CLASSIFY) - { - if (!unformat (input, "%d", &adj->classify.table_index)) - { - clib_warning ("classify adj must specify table index"); - return 0; - } - } - else if (next == IP_LOOKUP_NEXT_DROP) - { - adj->rewrite_header.node_index = 0; - } + return (p[0]); } - - else if (unformat_user (input, - unformat_vnet_rewrite, - vm, &adj->rewrite_header, sizeof (adj->rewrite_data))) - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - - else - return 0; - - return 1; + return (~0); } clib_error_t * -vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd) +vnet_ip_route_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmd) { - vnet_main_t * vnm = vnet_get_main(); - clib_error_t * error = 0; - u32 table_id, is_del; - u32 weight, * weights = 0; - u32 * table_ids = 0; - u32 sw_if_index, * sw_if_indices = 0; - ip4_address_t ip4_addr, * ip4_dst_addresses = 0, * ip4_via_next_hops = 0; - ip6_address_t ip6_addr, * ip6_dst_addresses = 0, * ip6_via_next_hops = 0; - u32 dst_address_length, * dst_address_lengths = 0; - ip_adjacency_t parse_adj, * add_adj = 0; unformat_input_t _line_input, * line_input = &_line_input; + fib_route_path_t *rpaths = NULL, rpath; + dpo_id_t dpo = DPO_NULL, *dpos = NULL; + fib_prefix_t *prefixs = NULL, pfx; + clib_error_t * error = NULL; + mpls_label_t out_label; + u32 table_id, is_del; + vnet_main_t * vnm; + u32 fib_index; f64 count; - u32 outer_table_id; + int i; + vnm = vnet_get_main(); is_del = 0; table_id = 0; count = 1; @@ -1247,410 +350,311 @@ vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_com if (! unformat_user (main_input, unformat_line_input, line_input)) return 0; - memset(&parse_adj, 0, sizeof (parse_adj)); - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + memset(&rpath, 0, sizeof(rpath)); + memset(&pfx, 0, sizeof(pfx)); + if (unformat (line_input, "table %d", &table_id)) ; else if (unformat (line_input, "del")) is_del = 1; else if (unformat (line_input, "add")) is_del = 0; + else if (unformat (line_input, "resolve-via-host")) + { + if (vec_len(rpaths) == 0) + { + error = clib_error_return(0 , "Paths then flags"); + goto done; + } + rpaths[vec_len(rpaths)-1].frp_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST; + } + else if (unformat (line_input, "resolve-via-attached")) + { + if (vec_len(rpaths) == 0) + { + error = clib_error_return(0 , "Paths then flags"); + goto done; + } + rpaths[vec_len(rpaths)-1].frp_flags |= + FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED; + } + else if (unformat (line_input, "out-label %U", + unformat_mpls_unicast_label, &out_label)) + { + if (vec_len(rpaths) == 0) + { + error = clib_error_return(0 , "Paths then labels"); + goto done; + } + rpaths[vec_len(rpaths)-1].frp_label = out_label; + } else if (unformat (line_input, "count %f", &count)) ; else if (unformat (line_input, "%U/%d", - unformat_ip4_address, &ip4_addr, - &dst_address_length)) - { - vec_add1 (ip4_dst_addresses, ip4_addr); - vec_add1 (dst_address_lengths, dst_address_length); - } - + unformat_ip4_address, + &pfx.fp_addr.ip4, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + vec_add1(prefixs, pfx); + } else if (unformat (line_input, "%U/%d", - unformat_ip6_address, &ip6_addr, - &dst_address_length)) - { - vec_add1 (ip6_dst_addresses, ip6_addr); - vec_add1 (dst_address_lengths, dst_address_length); - } - + unformat_ip6_address, + &pfx.fp_addr.ip6, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + vec_add1(prefixs, pfx); + } else if (unformat (line_input, "via %U %U weight %u", - unformat_ip4_address, &ip4_addr, - unformat_vnet_sw_interface, vnm, &sw_if_index, - &weight)) - { - vec_add1 (ip4_via_next_hops, ip4_addr); - vec_add1 (sw_if_indices, sw_if_index); - vec_add1 (weights, weight); - vec_add1 (table_ids, (u32)~0); - } + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index, + &rpath.frp_weight)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U %U weight %u", - unformat_ip6_address, &ip6_addr, - unformat_vnet_sw_interface, vnm, &sw_if_index, - &weight)) - { - vec_add1 (ip6_via_next_hops, ip6_addr); - vec_add1 (sw_if_indices, sw_if_index); - vec_add1 (weights, weight); - vec_add1 (table_ids, (u32)~0); - } + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index, + &rpath.frp_weight)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U %U", - unformat_ip4_address, &ip4_addr, - unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - vec_add1 (ip4_via_next_hops, ip4_addr); - vec_add1 (sw_if_indices, sw_if_index); - vec_add1 (weights, 1); - vec_add1 (table_ids, (u32)~0); - } + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U %U", - unformat_ip6_address, &ip6_addr, - unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - vec_add1 (ip6_via_next_hops, ip6_addr); - vec_add1 (sw_if_indices, sw_if_index); - vec_add1 (weights, 1); - vec_add1 (table_ids, (u32)~0); - } + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip4_address, + &rpath.frp_addr.ip4, + &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip6_address, + &rpath.frp_addr.ip6, + &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U", - unformat_ip4_address, &ip4_addr)) - { - vec_add1 (ip4_via_next_hops, ip4_addr); - vec_add1 (sw_if_indices, (u32)~0); - vec_add1 (weights, 1); - vec_add1 (table_ids, table_id); - } + unformat_ip4_address, + &rpath.frp_addr.ip4)) + { + /* + * the recursive next-hops are by default in the same table + * as the prefix + */ + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U", - unformat_ip6_address, &ip6_addr)) - { - vec_add1 (ip6_via_next_hops, ip6_addr); - vec_add1 (sw_if_indices, (u32)~0); - vec_add1 (weights, 1); - vec_add1 (table_ids, (u32)table_id); - } - - else if (vec_len (ip4_dst_addresses) > 0 - && unformat (line_input, "via %U", - unformat_ip_adjacency, vm, &parse_adj, ip4_rewrite_node.index)) - vec_add1 (add_adj, parse_adj); - - else if (vec_len (ip6_dst_addresses) > 0 - && unformat (line_input, "via %U", - unformat_ip_adjacency, vm, &parse_adj, ip6_rewrite_node.index)) - vec_add1 (add_adj, parse_adj); - else if (unformat (line_input, "lookup in table %d", &outer_table_id)) - { - uword * p; - - if (vec_len (ip4_dst_addresses) > 0) - p = hash_get (ip4_main.fib_index_by_table_id, outer_table_id); - else - p = hash_get (ip6_main.fib_index_by_table_id, outer_table_id); - - if (p == 0) - { - error = clib_error_return (0, "Nonexistent outer table id %d", - outer_table_id); - goto done; - } - - parse_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - parse_adj.explicit_fib_index = p[0]; - vec_add1 (add_adj, parse_adj); - } + unformat_ip6_address, + &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, + "lookup in table %d", + &rpath.frp_fib_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = pfx.fp_proto; + vec_add1(rpaths, rpath); + } + else if (vec_len (prefixs) > 0 && + unformat (line_input, "via %U", + unformat_dpo, &dpo, prefixs[0].fp_proto)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + vec_add1 (dpos, dpo); + } else - { + { error = unformat_parse_error (line_input); goto done; - } + } } unformat_free (line_input); - if (vec_len (ip4_dst_addresses) + vec_len (ip6_dst_addresses) == 0) - { + if (vec_len (prefixs) == 0) + { error = clib_error_return (0, "expected ip4/ip6 destination address/length."); goto done; } - if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_dst_addresses) > 0) - { - error = clib_error_return (0, "mixed ip4/ip6 address/length."); - goto done; - } - - if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_via_next_hops) > 0) - { - error = clib_error_return (0, "ip4 destinations with ip6 next hops."); - goto done; - } - - if (vec_len (ip6_dst_addresses) > 0 && vec_len (ip4_via_next_hops) > 0) - { - error = clib_error_return (0, "ip6 destinations with ip4 next hops."); - goto done; - } - - if (! is_del && vec_len (add_adj) + vec_len (weights) == 0) + if (!is_del && vec_len (rpaths) + vec_len (dpos) == 0) { - error = clib_error_return (0, "no next hops or adjacencies to add."); + error = clib_error_return (0, "expected paths."); goto done; } + if (~0 == table_id) { - int i; - ip4_main_t * im4 = &ip4_main; - ip6_main_t * im6 = &ip6_main; + /* + * if no table_id is passed we will manipulate the default + */ + fib_index = 0; + } + else + { + fib_index = fib_table_id_find_fib_index(prefixs[0].fp_proto, + table_id); - for (i = 0; i < vec_len (ip4_dst_addresses); i++) + if (~0 == fib_index) { - ip4_add_del_route_args_t a; - - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; - a.dst_address = ip4_dst_addresses[i]; - a.dst_address_length = dst_address_lengths[i]; - a.adj_index = ~0; - - if (is_del) - { - if (vec_len (ip4_via_next_hops) == 0) - { - uword * dst_hash, * dst_result; - u32 dst_address_u32; - ip4_fib_t * fib; - - fib = find_ip4_fib_by_table_index_or_id (im4, table_id, - 0 /* by table id */); - - a.flags |= IP4_ROUTE_FLAG_DEL; - dst_address_u32 = a.dst_address.as_u32 - & im4->fib_masks[a.dst_address_length]; - - dst_hash = - fib->adj_index_by_dst_address[a.dst_address_length]; - dst_result = hash_get (dst_hash, dst_address_u32); - if (dst_result) - a.adj_index = dst_result[0]; - else - { - clib_warning ("%U/%d not in FIB", - format_ip4_address, &a.dst_address, - a.dst_address_length); - continue; - } - - ip4_add_del_route (im4, &a); - ip4_maybe_remap_adjacencies (im4, table_id, - IP4_ROUTE_FLAG_TABLE_ID); - } - else - { - u32 i, j, n, f, incr; - ip4_address_t dst = a.dst_address; - f64 t[2]; - n = count; - t[0] = vlib_time_now (vm); - incr = 1<<(32 - a.dst_address_length); - for (i = 0; i < n; i++) - { - f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0; - a.dst_address = dst; - for (j = 0; j < vec_len (ip4_via_next_hops); j++) - { - if (table_ids[j] != (u32)~0) - { - uword * p = hash_get (im4->fib_index_by_table_id, - table_ids[j]); - if (p == 0) - { - clib_warning ("no such FIB table %d", - table_ids[j]); - continue; - } - table_ids[j] = p[0]; - } - - ip4_add_del_route_next_hop (im4, - IP4_ROUTE_FLAG_DEL | f, - &a.dst_address, - a.dst_address_length, - &ip4_via_next_hops[j], - sw_if_indices[j], - weights[j], (u32)~0, - table_ids[j] /* fib index */); - } - dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32)); - } - t[1] = vlib_time_now (vm); - if (count > 1) - vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0])); - } - } - else - { - if (vec_len (add_adj) > 0) - { - a.flags |= IP4_ROUTE_FLAG_ADD; - a.add_adj = add_adj; - a.n_add_adj = vec_len (add_adj); - - ip4_add_del_route (im4, &a); - } - else if (vec_len (ip4_via_next_hops) > 0) - { - u32 i, j, n, f, incr; - ip4_address_t dst = a.dst_address; - f64 t[2]; - n = count; - t[0] = vlib_time_now (vm); - incr = 1<<(32 - a.dst_address_length); - for (i = 0; i < n; i++) - { - f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0; - a.dst_address = dst; - for (j = 0; j < vec_len (ip4_via_next_hops); j++) - { - if (table_ids[j] != (u32)~0) - { - uword * p = hash_get (im4->fib_index_by_table_id, - table_ids[j]); - if (p == 0) - { - clib_warning ("no such FIB table %d", - table_ids[j]); - continue; - } - table_ids[j] = p[0]; - } - ip4_add_del_route_next_hop (im4, - IP4_ROUTE_FLAG_ADD | f, - &a.dst_address, - a.dst_address_length, - &ip4_via_next_hops[j], - sw_if_indices[j], - weights[j], (u32)~0, - table_ids[j] /* fib index */); - } - dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32)); - } - t[1] = vlib_time_now (vm); - if (count > 1) - vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0])); - } - } + error = clib_error_return (0, + "Nonexistent table id %d", + table_id); + goto done; } + } - for (i = 0; i < vec_len (ip6_dst_addresses); i++) + for (i = 0; i < vec_len (prefixs); i++) + { + if (is_del && 0 == vec_len (rpaths)) { - ip6_add_del_route_args_t a; - - - memset (&a, 0, sizeof (a)); - a.flags = IP6_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; - a.dst_address = ip6_dst_addresses[i]; - a.dst_address_length = dst_address_lengths[i]; - a.adj_index = ~0; - - if (is_del) + fib_table_entry_delete(fib_index, + &prefixs[i], + FIB_SOURCE_CLI); + } + else if (!is_del && 1 == vec_len (dpos)) + { + fib_table_entry_special_dpo_add(fib_index, + &prefixs[i], + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpos[0]); + dpo_reset(&dpos[0]); + } + else if (vec_len (dpos) > 0) + { + error = clib_error_return(0 , "Load-balancing over multiple special adjacencies is unsupported"); + goto done; + } + else if (0 < vec_len (rpaths)) + { + u32 k, j, n, incr; + ip46_address_t dst = prefixs[i].fp_addr; + f64 t[2]; + n = count; + t[0] = vlib_time_now (vm); + incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) - + prefixs[i].fp_len); + + for (k = 0; k < n; k++) { - if (vec_len (ip6_via_next_hops) == 0) + for (j = 0; j < vec_len (rpaths); j++) { - BVT(clib_bihash_kv) kv, value; - ip6_address_t dst_address; - ip6_fib_t * fib; - - fib = find_ip6_fib_by_table_index_or_id (im6, table_id, - 0 /* by table id */); - - a.flags |= IP4_ROUTE_FLAG_DEL; - - dst_address = ip6_dst_addresses[i]; - - ip6_address_mask (&dst_address, - &im6->fib_masks[dst_address_length]); - - kv.key[0] = dst_address.as_u64[0]; - kv.key[1] = dst_address.as_u64[1]; - kv.key[2] = ((u64)(fib - im6->fibs)<<32) - | a.dst_address_length; - - if (BV(clib_bihash_search)(&im6->ip6_lookup_table, - &kv, &value) == 0) - a.adj_index = value.value; - else - { - clib_warning ("%U/%d not in FIB", - format_ip6_address, &a.dst_address, - a.dst_address_length); - continue; - } - - a.flags |= IP6_ROUTE_FLAG_DEL; - ip6_add_del_route (im6, &a); - ip6_maybe_remap_adjacencies (im6, table_id, - IP6_ROUTE_FLAG_TABLE_ID); + /* + * the CLI parsing stored table Ids, swap to FIB indicies + */ + rpaths[i].frp_fib_index = + fib_table_id_find_fib_index(prefixs[i].fp_proto, + rpaths[i].frp_fib_index); + + fib_prefix_t rpfx = { + .fp_len = prefixs[i].fp_len, + .fp_proto = prefixs[i].fp_proto, + .fp_addr = dst, + }; + + if (is_del) + fib_table_entry_path_remove2(fib_index, + &rpfx, + FIB_SOURCE_CLI, + &rpaths[j]); + else + fib_table_entry_path_add2(fib_index, + &rpfx, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + &rpaths[j]); } - else - { - u32 i; - for (i = 0; i < vec_len (ip6_via_next_hops); i++) - { - ip6_add_del_route_next_hop (im6, - IP6_ROUTE_FLAG_DEL, - &a.dst_address, - a.dst_address_length, - &ip6_via_next_hops[i], - sw_if_indices[i], - weights[i], (u32)~0, - table_ids[i] /* fib index */); - } - } - } - else - { - if (vec_len (add_adj) > 0) + + if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto) { - a.flags |= IP6_ROUTE_FLAG_ADD; - a.add_adj = add_adj; - a.n_add_adj = vec_len (add_adj); - - ip6_add_del_route (im6, &a); + dst.ip4.as_u32 = + clib_host_to_net_u32(incr + + clib_net_to_host_u32 (dst.ip4.as_u32)); } - else if (vec_len (ip6_via_next_hops) > 0) + else { - u32 i; - for (i = 0; i < vec_len (ip6_via_next_hops); i++) - { - ip6_add_del_route_next_hop (im6, - IP6_ROUTE_FLAG_ADD, - &a.dst_address, - a.dst_address_length, - &ip6_via_next_hops[i], - sw_if_indices[i], - weights[i], (u32)~0, - table_ids[i]); - } + int bucket = (incr < 64 ? 0 : 1); + dst.ip6.as_u64[bucket] = + clib_host_to_net_u64(incr + + clib_net_to_host_u64 ( + dst.ip6.as_u64[bucket])); + } } + t[1] = vlib_time_now (vm); + if (count > 1) + vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0])); + } + else + { + error = clib_error_return(0 , "Don't understand what you want..."); + goto done; } } + done: - vec_free (add_adj); - vec_free (weights); - vec_free (dst_address_lengths); - vec_free (ip4_dst_addresses); - vec_free (ip6_dst_addresses); - vec_free (ip4_via_next_hops); - vec_free (ip6_via_next_hops); + vec_free (dpos); + vec_free (prefixs); + vec_free (rpaths); return error; } @@ -1708,14 +712,14 @@ VLIB_CLI_COMMAND (ip_route_command, static) = { .is_mp_safe = 1, }; -/* +/* * The next two routines address a longstanding script hemorrhoid. * Probing a v4 or v6 neighbor needs to appear to be synchronous, * or dependent route-adds will simply fail. */ static clib_error_t * ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index, - int retry_count) + int retry_count) { vnet_main_t * vnm = vnet_get_main(); clib_error_t * e; @@ -1727,7 +731,7 @@ ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index, ASSERT (vlib_in_process_context(vm)); if (retry_count > 0) - vnet_register_ip6_neighbor_resolution_event + vnet_register_ip6_neighbor_resolution_event (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index, 1 /* event */, 0 /* data */); @@ -1735,17 +739,17 @@ ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index, { /* The interface may be down, etc. */ e = ip6_probe_neighbor (vm, a, sw_if_index); - + if (e) - return e; - + return e; + vlib_process_wait_for_event_or_clock (vm, 1.0); event_type = vlib_process_get_events (vm, &event_data); - switch (event_type) - { - case 1: /* resolved... */ - vlib_cli_output (vm, "Resolved %U", - format_ip6_address, a); + switch (event_type) + { + case 1: /* resolved... */ + vlib_cli_output (vm, "Resolved %U", + format_ip6_address, a); resolved = 1; goto done; @@ -1883,526 +887,3 @@ VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = { .short_help = "ip probe-neighbor <intfc> <ip4-addr> | <ip6-addr> [retry nn]", .is_mp_safe = 1, }; - -typedef CLIB_PACKED (struct { - ip4_address_t address; - - u32 address_length : 6; - - u32 index : 26; -}) ip4_route_t; - -static int -ip4_route_cmp (void * a1, void * a2) -{ - ip4_route_t * r1 = a1; - ip4_route_t * r2 = a2; - - int cmp = ip4_address_compare (&r1->address, &r2->address); - return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length); -} - -static clib_error_t * -ip4_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vnet_main_t * vnm = vnet_get_main(); - ip4_main_t * im4 = &ip4_main; - ip4_route_t * routes, * r; - ip4_fib_t * fib; - ip_lookup_main_t * lm = &im4->lookup_main; - uword * results, i; - int verbose, matching, mtrie, include_empty_fibs; - ip4_address_t matching_address; - u8 clear = 0; - int table_id = -1; - - routes = 0; - results = 0; - verbose = 1; - include_empty_fibs = 0; - matching = 0; - mtrie = 0; - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "brief") || unformat (input, "summary") - || unformat (input, "sum")) - verbose = 0; - - else if (unformat (input, "mtrie")) - mtrie = 1; - - else if (unformat (input, "include-empty")) - include_empty_fibs = 1; - - else if (unformat (input, "%U", unformat_ip4_address, &matching_address)) - matching = 1; - - else if (unformat (input, "clear")) - clear = 1; - - else if (unformat (input, "table %d", &table_id)) - ; - else - break; - } - - vec_foreach (fib, im4->fibs) - { - int fib_not_empty; - - fib_not_empty = 0; - for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) - { - uword * hash = fib->adj_index_by_dst_address[i]; - uword n_elts = hash_elts (hash); - if (n_elts) - { - fib_not_empty = 1; - break; - } - } - - if (fib_not_empty == 0 && include_empty_fibs == 0) - continue; - - if (table_id >= 0 && table_id != (int)fib->table_id) - continue; - - if (include_empty_fibs) - vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U", - fib->table_id, fib - im4->fibs, - format_ip_flow_hash_config, fib->flow_hash_config); - - /* Show summary? */ - if (! verbose) - { - if (include_empty_fibs == 0) - vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U", - fib->table_id, fib - im4->fibs, - format_ip_flow_hash_config, fib->flow_hash_config); - vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); - for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) - { - uword * hash = fib->adj_index_by_dst_address[i]; - uword n_elts = hash_elts (hash); - if (n_elts > 0) - vlib_cli_output (vm, "%20d%16d", i, n_elts); - } - continue; - } - - if (routes) - _vec_len (routes) = 0; - if (results) - _vec_len (results) = 0; - - for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) - { - uword * hash = fib->adj_index_by_dst_address[i]; - hash_pair_t * p; - ip4_route_t x; - - x.address_length = i; - - if (matching) - { - x.address.as_u32 = matching_address.as_u32 & im4->fib_masks[i]; - p = hash_get_pair (hash, x.address.as_u32); - if (p) - { - if (lm->fib_result_n_words > 1) - { - x.index = vec_len (results); - vec_add (results, p->value, lm->fib_result_n_words); - } - else - x.index = p->value[0]; - vec_add1 (routes, x); - } - } - else - { - hash_foreach_pair (p, hash, ({ - x.address.data_u32 = p->key; - if (lm->fib_result_n_words > 1) - { - x.index = vec_len (results); - vec_add (results, p->value, lm->fib_result_n_words); - } - else - x.index = p->value[0]; - - vec_add1 (routes, x); - })); - } - } - - vec_sort_with_function (routes, ip4_route_cmp); - if (vec_len(routes)) { - if (include_empty_fibs == 0) - vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U", - fib->table_id, fib - im4->fibs, - format_ip_flow_hash_config, fib->flow_hash_config); - if (mtrie) - vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); - vlib_cli_output (vm, "%=20s%=16s%=16s%=16s", - "Destination", "Packets", "Bytes", "Adjacency"); - } - vec_foreach (r, routes) - { - vlib_counter_t c, sum; - uword i, j, n_left, n_nhs, adj_index, * result = 0; - ip_adjacency_t * adj; - ip_multipath_next_hop_t * nhs, tmp_nhs[1]; - - adj_index = r->index; - if (lm->fib_result_n_words > 1) - { - result = vec_elt_at_index (results, adj_index); - adj_index = result[0]; - } - - adj = ip_get_adjacency (lm, adj_index); - if (adj->n_adj == 1) - { - nhs = &tmp_nhs[0]; - nhs[0].next_hop_adj_index = ~0; /* not used */ - nhs[0].weight = 1; - n_nhs = 1; - } - else - { - ip_multipath_adjacency_t * madj; - madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle); - nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset); - n_nhs = madj->normalized_next_hops.count; - } - - n_left = nhs[0].weight; - vlib_counter_zero (&sum); - for (i = j = 0; i < adj->n_adj; i++) - { - n_left -= 1; - vlib_get_combined_counter (&lm->adjacency_counters, - adj_index + i, &c); - if (clear) - vlib_zero_combined_counter (&lm->adjacency_counters, - adj_index + i); - vlib_counter_add (&sum, &c); - if (n_left == 0) - { - u8 * msg = 0; - uword indent; - - if (j == 0) - msg = format (msg, "%-20U", - format_ip4_address_and_length, - r->address.data, r->address_length); - else - msg = format (msg, "%U", format_white_space, 20); - - msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes); - - indent = vec_len (msg); - msg = format (msg, "weight %d, index %d", - nhs[j].weight, adj_index + i); - - if (ip_adjacency_is_multipath(lm, adj_index)) - msg = format (msg, ", multipath"); - - msg = format (msg, "\n%U%U", - format_white_space, indent, - format_ip_adjacency, - vnm, lm, adj_index + i); - - vlib_cli_output (vm, "%v", msg); - vec_free (msg); - - if (result && lm->format_fib_result) - vlib_cli_output (vm, "%20s%U", "", - lm->format_fib_result, vm, lm, result, - i + 1 - nhs[j].weight, - nhs[j].weight); - - j++; - if (j < n_nhs) - { - n_left = nhs[j].weight; - vlib_counter_zero (&sum); - } - } - } - } - } - - vec_free (routes); - vec_free (results); - - return 0; -} - -/*? - * Show FIB/route entries - * - * @cliexpar - * @cliexstart{show ip fib} - * Display the IPv4 FIB. - * This command will run for a long time when the FIBs comprise millions of entries. - * vpp# sh ip fib - * Table 0 - * Destination Packets Bytes Adjacency - * 6.0.0.0/8 0 0 weight 1, index 3 - * arp fake-eth0 6.0.0.1/8 - * 6.0.0.1/32 0 0 weight 1, index 4 - * local 6.0.0.1/8 - * - * And so forth. Use 'show ip fib summary' for a summary: - * - * vpp# sh ip fib summary - * Table 0 - * Prefix length Count - * 8 1 - * 32 4 - * @cliexend - ?*/ -VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { - .path = "show ip fib", - .short_help = "show ip fib [mtrie] [summary] [table <n>] [<ip4-addr>] [clear] [include-empty]", - .function = ip4_show_fib, -}; - -typedef struct { - ip6_address_t address; - - u32 address_length; - - u32 index; -} ip6_route_t; - -typedef struct { - u32 fib_index; - ip6_route_t ** routep; -} add_routes_in_fib_arg_t; - -static void add_routes_in_fib (BVT(clib_bihash_kv) * kvp, void *arg) -{ - add_routes_in_fib_arg_t * ap = arg; - - if (kvp->key[2]>>32 == ap->fib_index) - { - ip6_address_t *addr; - ip6_route_t * r; - addr = (ip6_address_t *) kvp; - vec_add2 (*ap->routep, r, 1); - r->address = addr[0]; - r->address_length = kvp->key[2] & 0xFF; - r->index = kvp->value; - } -} - -typedef struct { - u32 fib_index; - u64 count_by_prefix_length[129]; -} count_routes_in_fib_at_prefix_length_arg_t; - -static void count_routes_in_fib_at_prefix_length -(BVT(clib_bihash_kv) * kvp, void *arg) -{ - count_routes_in_fib_at_prefix_length_arg_t * ap = arg; - int mask_width; - - if ((kvp->key[2]>>32) != ap->fib_index) - return; - - mask_width = kvp->key[2] & 0xFF; - - ap->count_by_prefix_length[mask_width]++; -} - -static int -ip6_route_cmp (void * a1, void * a2) -{ - ip6_route_t * r1 = a1; - ip6_route_t * r2 = a2; - - int cmp = ip6_address_compare (&r1->address, &r2->address); - return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length); -} - -static clib_error_t * -ip6_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vnet_main_t * vnm = vnet_get_main(); - ip6_main_t * im6 = &ip6_main; - ip6_route_t * routes, * r; - ip6_fib_t * fib; - ip_lookup_main_t * lm = &im6->lookup_main; - uword * results; - int verbose; - BVT(clib_bihash) * h = &im6->ip6_lookup_table; - __attribute__((unused)) u8 clear = 0; - add_routes_in_fib_arg_t _a, *a=&_a; - count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca; - - routes = 0; - results = 0; - verbose = 1; - if (unformat (input, "brief") || unformat (input, "summary") - || unformat (input, "sum")) - verbose = 0; - - if (unformat (input, "clear")) - clear = 1; - - vlib_cli_output (vm, "FIB lookup table: %d buckets, %lld MB heap", - im6->lookup_table_nbuckets, im6->lookup_table_size>>20); - vlib_cli_output (vm, "%U", format_mheap, h->mheap, 0 /*verbose*/); - vlib_cli_output (vm, " "); - - vec_foreach (fib, im6->fibs) - { - vlib_cli_output (vm, "VRF %d, fib_index %d, flow hash: %U", - fib->table_id, fib - im6->fibs, - format_ip_flow_hash_config, fib->flow_hash_config); - - /* Show summary? */ - if (! verbose) - { - int len; - vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); - - memset (ca, 0, sizeof(*ca)); - ca->fib_index = fib - im6->fibs; - - BV(clib_bihash_foreach_key_value_pair) - (h, count_routes_in_fib_at_prefix_length, ca); - - for (len = 128; len >= 0; len--) - { - if (ca->count_by_prefix_length[len]) - vlib_cli_output (vm, "%=20d%=16lld", - len, ca->count_by_prefix_length[len]); - } - continue; - } - - if (routes) - _vec_len (routes) = 0; - if (results) - _vec_len (results) = 0; - - a->fib_index = fib - im6->fibs; - a->routep = &routes; - - BV(clib_bihash_foreach_key_value_pair)(h, add_routes_in_fib, a); - - vec_sort_with_function (routes, ip6_route_cmp); - - vlib_cli_output (vm, "%=45s%=16s%=16s%=16s", - "Destination", "Packets", "Bytes", "Adjacency"); - vec_foreach (r, routes) - { - vlib_counter_t c, sum; - uword i, j, n_left, n_nhs, adj_index, * result = 0; - ip_adjacency_t * adj; - ip_multipath_next_hop_t * nhs, tmp_nhs[1]; - - adj_index = r->index; - if (lm->fib_result_n_words > 1) - { - result = vec_elt_at_index (results, adj_index); - adj_index = result[0]; - } - - adj = ip_get_adjacency (lm, adj_index); - if (adj->n_adj == 1) - { - nhs = &tmp_nhs[0]; - nhs[0].next_hop_adj_index = ~0; /* not used */ - nhs[0].weight = 1; - n_nhs = 1; - } - else - { - ip_multipath_adjacency_t * madj; - madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle); - nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset); - n_nhs = madj->normalized_next_hops.count; - } - - n_left = nhs[0].weight; - vlib_counter_zero (&sum); - for (i = j = 0; i < adj->n_adj; i++) - { - n_left -= 1; - vlib_get_combined_counter (&lm->adjacency_counters, - adj_index + i, &c); - if (clear) - vlib_zero_combined_counter (&lm->adjacency_counters, - adj_index + i); - vlib_counter_add (&sum, &c); - if (n_left == 0) - { - u8 * msg = 0; - uword indent; - - if (j == 0) - msg = format (msg, "%-45U", - format_ip6_address_and_length, - r->address.as_u8, r->address_length); - else - msg = format (msg, "%U", format_white_space, 20); - - msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes); - - indent = vec_len (msg); - msg = format (msg, "weight %d, index %d", - nhs[j].weight, adj_index + i); - - if (ip_adjacency_is_multipath(lm, adj_index + i)) - msg = format (msg, ", multipath"); - - msg = format (msg, "\n%U%U", - format_white_space, indent, - format_ip_adjacency, - vnm, lm, adj_index + i); - - vlib_cli_output (vm, "%v", msg); - vec_free (msg); - - j++; - if (j < n_nhs) - { - n_left = nhs[j].weight; - vlib_counter_zero (&sum); - } - } - } - - if (result && lm->format_fib_result) - vlib_cli_output (vm, "%20s%U", "", lm->format_fib_result, vm, lm, result, 0); - } - vlib_cli_output (vm, " "); - } - - vec_free (routes); - vec_free (results); - - return 0; -} - -/*? - * Show FIB6/route entries - * - * @cliexpar - * @cliexstart{show ip fib} - * Display the IPv6 FIB. - * This command will run for a long time when the FIBs comprise millions of entries. - * See 'show ip fib' - * @cliexend - ?*/ -VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { - .path = "show ip6 fib", - .short_help = "show ip6 fib [summary] [clear]", - .function = ip6_show_fib, -}; diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h index dcc9d25f..c8dcc141 100644 --- a/vnet/vnet/ip/lookup.h +++ b/vnet/vnet/ip/lookup.h @@ -45,7 +45,6 @@ * - Callbacks on route add. * - Callbacks on interface address change. */ - #ifndef included_ip_lookup_h #define included_ip_lookup_h @@ -53,12 +52,11 @@ #include <vlib/buffer.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> +#include <vnet/fib/fib_node.h> +#include <vnet/dpo/dpo.h> /** @brief Common (IP4/IP6) next index stored in adjacency. */ typedef enum { - /** Packet does not match any route in table. */ - IP_LOOKUP_NEXT_MISS, - /** Adjacency to drop this packet. */ IP_LOOKUP_NEXT_DROP, /** Adjacency to punt this packet. */ @@ -67,27 +65,26 @@ typedef enum { /** This packet is for one of our own IP addresses. */ IP_LOOKUP_NEXT_LOCAL, - /** This packet matches an "interface route" and packets + /** This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite string for this destination. */ IP_LOOKUP_NEXT_ARP, + /** This packet matches an "interface route" and packets + need to be passed to ARP to find rewrite string for + this destination. */ + IP_LOOKUP_NEXT_GLEAN, + /** This packet is to be rewritten and forwarded to the next processing node. This is typically the output interface but might be another node for further output processing. */ IP_LOOKUP_NEXT_REWRITE, - /** This packet needs to be classified */ - IP_LOOKUP_NEXT_CLASSIFY, - - /** This packet needs to go to MAP - RFC7596, RFC7597 */ - IP_LOOKUP_NEXT_MAP, + /** This packets follow a load-balance */ + IP_LOOKUP_NEXT_LOAD_BALANCE, - /** This packet needs to go to MAP with Translation - RFC7599 */ - IP_LOOKUP_NEXT_MAP_T, - - /** This packets needs to go to indirect next hop */ - IP_LOOKUP_NEXT_INDIRECT, + /** This packets follow a mid-chain adjacency */ + IP_LOOKUP_NEXT_MIDCHAIN, /** This packets needs to go to ICMP error */ IP_LOOKUP_NEXT_ICMP_ERROR, @@ -100,7 +97,7 @@ typedef enum { } ip4_lookup_next_t; typedef enum { - /** Hop-by-hop header handling */ + /* Hop-by-hop header handling */ IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT, IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP, IP6_LOOKUP_NEXT_POP_HOP_BY_HOP, @@ -108,30 +105,26 @@ typedef enum { } ip6_lookup_next_t; #define IP4_LOOKUP_NEXT_NODES { \ - [IP_LOOKUP_NEXT_MISS] = "ip4-miss", \ [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \ [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \ + [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", \ - [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", \ - [IP_LOOKUP_NEXT_MAP] = "ip4-map", \ - [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", \ - [IP_LOOKUP_NEXT_INDIRECT] = "ip4-indirect", \ + [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \ + [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \ } #define IP6_LOOKUP_NEXT_NODES { \ - [IP_LOOKUP_NEXT_MISS] = "ip6-miss", \ [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \ + [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \ - [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", \ - [IP_LOOKUP_NEXT_MAP] = "ip6-map", \ - [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", \ - [IP_LOOKUP_NEXT_INDIRECT] = "ip6-indirect", \ + [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \ + [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip6-load-balance", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \ [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \ [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \ @@ -157,20 +150,20 @@ _(dport, IP_FLOW_HASH_DST_PORT) \ _(proto, IP_FLOW_HASH_PROTO) \ _(reverse, IP_FLOW_HASH_REVERSE_SRC_DST) +/** + * A flow hash configuration is a mask of the flow hash options + */ +typedef u32 flow_hash_config_t; + #define IP_ADJACENCY_OPAQUE_SZ 16 /** @brief IP unicast adjacency. @note cache aligned. */ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); - /** Handle for this adjacency in adjacency heap. */ + /* Handle for this adjacency in adjacency heap. */ u32 heap_handle; - STRUCT_MARK(signature_start); - - /** Interface address index for this local/arp adjacency. */ - u32 if_address_index; - /** Number of adjecencies in block. Greater than 1 means multipath; otherwise equal to 1. */ u16 n_adj; @@ -181,27 +174,63 @@ typedef struct { u16 lookup_next_index_as_int; }; + /** Interface address index for this local/arp adjacency. */ + u32 if_address_index; + /** Force re-lookup in a different FIB. ~0 => normal behavior */ - i16 explicit_fib_index; u16 mcast_group_index; /** Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */ u16 saved_lookup_next_index; + /* + * link/ether-type + */ + u8 ia_link; + u8 ia_nh_proto; + union { - /** IP_LOOKUP_NEXT_ARP only */ - struct { - ip46_address_t next_hop; - } arp; - /** IP_LOOKUP_NEXT_CLASSIFY only */ - struct { - u16 table_index; - } classify; - /** IP_LOOKUP_NEXT_INDIRECT only */ - struct { - ip46_address_t next_hop; - } indirect; - u8 opaque[IP_ADJACENCY_OPAQUE_SZ]; + union { + /** + * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE + * + * neighbour adjacency sub-type; + */ + struct { + ip46_address_t next_hop; + } nbr; + /** + * IP_LOOKUP_NEXT_MIDCHAIN + * + * A nbr adj that is also recursive. Think tunnels. + * A nbr adj can transition to be of type MDICHAIN + * so be sure to leave the two structs with the next_hop + * fields aligned. + */ + struct { + /** + * The recursive next-hop + */ + ip46_address_t next_hop; + /** + * The node index of the tunnel's post rewrite/TX function. + */ + u32 tx_function_node; + /** + * The next DPO to use + */ + dpo_id_t next_dpo; + } midchain; + /** + * IP_LOOKUP_NEXT_GLEAN + * + * Glean the address to ARP for from the packet's destination + */ + struct { + ip46_address_t receive_addr; + } glean; + } sub_type; + u16 opaque[IP_ADJACENCY_OPAQUE_SZ]; }; /** @brief Special format function for this adjacency. @@ -210,63 +239,32 @@ typedef struct { * the first cache line reads "full" on the free space gas gauge. */ u32 special_adjacency_format_function_index; /* 0 is invalid */ - STRUCT_MARK(signature_end); - - /** Number of FIB entries sharing this adjacency */ - u32 share_count; - /** Use this adjacency instead */ - u32 next_adj_with_signature; CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); - /** Rewrite in second/third cache lines */ + /* Rewrite in second/third cache lines */ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); + + /* + * member not accessed in the data plane are relgated to the + * remaining cachelines + */ + fib_node_t ia_node; } ip_adjacency_t; -static inline uword -vnet_ip_adjacency_signature (ip_adjacency_t * adj) -{ - uword signature = 0xfeedfaceULL; - - /* Skip heap handle, sum everything up to but not including share_count */ - signature = hash_memory - (STRUCT_MARK_PTR(adj, signature_start), - STRUCT_OFFSET_OF(ip_adjacency_t, signature_end) - - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start), - signature); - - /* and the rewrite */ - signature = hash_memory (&adj->rewrite_header, VLIB_BUFFER_PRE_DATA_SIZE, - signature); - return signature; -} +_Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0), + "IP adjacency cachline 0 is not offset"); +_Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) == + CLIB_CACHE_LINE_BYTES), + "IP adjacency cachline 1 is more than one cachline size offset"); -static inline int -vnet_ip_adjacency_share_compare (ip_adjacency_t * a1, ip_adjacency_t *a2) -{ - if (memcmp (STRUCT_MARK_PTR(a1, signature_start), - STRUCT_MARK_PTR(a2, signature_start), - STRUCT_OFFSET_OF(ip_adjacency_t, signature_end) - - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start))) - return 0; - if (memcmp (&a1->rewrite_header, &a2->rewrite_header, - VLIB_BUFFER_PRE_DATA_SIZE)) - return 0; - return 1; -} +/* An all zeros address */ +extern const ip46_address_t zero_addr; /* Index into adjacency table. */ typedef u32 ip_adjacency_index_t; typedef struct { - /* Directly connected next-hop adjacency index. */ - u32 next_hop_adj_index; - - /* Path weight for this adjacency. */ - u32 weight; -} ip_multipath_next_hop_t; - -typedef struct { /* Adjacency index of first index in block. */ u32 adj_index; @@ -276,11 +274,7 @@ typedef struct { /* Number of prefixes that point to this adjacency. */ u32 reference_count; - /* Normalized next hops are used as hash keys: they are sorted by weight - and weights are chosen so they add up to 1 << log2_n_adj_in_block (with - zero-weighted next hops being deleted). - Unnormalized next hops are saved so that control plane has a record of exactly - what the RIB told it. */ + /* Normalized next hops are saved for stats/display purposes */ struct { /* Number of hops in the multipath. */ u32 count; @@ -290,7 +284,7 @@ typedef struct { /* Heap handle used to for example free block when we're done with it. */ u32 heap_handle; - } normalized_next_hops, unnormalized_next_hops; + } normalized_next_hops; } ip_multipath_adjacency_t; /* IP multicast adjacency. */ @@ -397,20 +391,11 @@ typedef struct ip_adj_register_struct { } ip_adj_register_t; typedef struct ip_lookup_main_t { - /** Adjacency heap. */ + /* Adjacency heap. */ ip_adjacency_t * adjacency_heap; - /** Adjacency packet/byte counters indexed by adjacency index. */ - vlib_combined_counter_main_t adjacency_counters; - - /** Heap of (next hop, weight) blocks. Sorted by next hop. */ - ip_multipath_next_hop_t * next_hop_heap; - - /** Indexed by heap_handle from ip_adjacency_t. */ - ip_multipath_adjacency_t * multipath_adjacencies; - - /** Adjacency by signature hash */ - uword * adj_index_by_signature; + /** load-balance packet/byte counters indexed by LB index. */ + vlib_combined_counter_main_t load_balance_counters; /** any-tx-feature-enabled interface bitmap */ uword * tx_sw_if_has_ip_output_features; @@ -418,29 +403,6 @@ typedef struct ip_lookup_main_t { /** count of enabled features, per sw_if_index, to maintain bitmap */ i16 * tx_feature_count_by_sw_if_index; - /** Temporary vectors for looking up next hops in hash. */ - ip_multipath_next_hop_t * next_hop_hash_lookup_key; - ip_multipath_next_hop_t * next_hop_hash_lookup_key_normalized; - - /** Hash table mapping normalized next hops and weights - to multipath adjacency index. */ - uword * multipath_adjacency_by_next_hops; - - u32 * adjacency_remap_table; - u32 n_adjacency_remaps; - - /** If average error per adjacency is less than this threshold adjacency block - size is accepted. */ - f64 multipath_next_hop_error_tolerance; - - /** Adjacency index for routing table misses, local punts, and drops. */ - u32 miss_adj_index, drop_adj_index, local_adj_index; - - /** Miss adjacency is always first in adjacency table. */ -#define IP_LOOKUP_MISS_ADJ_INDEX 0 - - ip_add_del_adjacency_callback_t * add_del_adjacency_callbacks; - /** Pool of addresses that are assigned to interfaces. */ ip_interface_address_t * if_address_pool; @@ -501,54 +463,6 @@ do { \ CLIB_PREFETCH (_adj, sizeof (_adj[0]), type); \ } while (0) -/* Adds a next node to ip4 or ip6 lookup node which can be then used in adjacencies. - * @param vlib_main pointer - * @param lm ip4_main.lookup_main or ip6_main.lookup_main - * @param reg registration structure - * @param next_node_index Returned index to be used in adjacencies. - * @return 0 on success. -1 on failure. - */ -int ip_register_adjacency(vlib_main_t *vm, u8 is_ip4, - ip_adj_register_t *reg); - -/* - * Construction helpers to add IP adjacency at init. - */ -#define VNET_IP_REGISTER_ADJACENCY(ip,x,...) \ - __VA_ARGS__ ip_adj_register_t ip##adj_##x; \ -static void __vnet_##ip##_register_adjacency_##x (void) \ - __attribute__((__constructor__)) ; \ -static void __vnet_##ip##_register_adjacency_##x (void) \ -{ \ - ip_lookup_main_t *lm = &ip##_main.lookup_main; \ - ip##adj_##x.next = lm->registered_adjacencies; \ - lm->registered_adjacencies = &ip##adj_##x; \ -} \ -__VA_ARGS__ ip_adj_register_t ip##adj_##x - -#define VNET_IP4_REGISTER_ADJACENCY(x,...) \ - VNET_IP_REGISTER_ADJACENCY(ip4, x, __VA_ARGS__) - -#define VNET_IP6_REGISTER_ADJACENCY(x,...) \ - VNET_IP_REGISTER_ADJACENCY(ip6, x, __VA_ARGS__) - -static inline void -ip_register_add_del_adjacency_callback(ip_lookup_main_t * lm, - ip_add_del_adjacency_callback_t cb) -{ - vec_add1(lm->add_del_adjacency_callbacks, cb); -} - -always_inline void -ip_call_add_del_adjacency_callbacks (ip_lookup_main_t * lm, u32 adj_index, u32 is_del) -{ - ip_adjacency_t * adj; - uword i; - adj = ip_get_adjacency (lm, adj_index); - for (i = 0; i < vec_len (lm->add_del_adjacency_callbacks); i++) - lm->add_del_adjacency_callbacks[i] (lm, adj_index, adj, is_del); -} - /* Create new block of given number of contiguous adjacencies. */ ip_adjacency_t * ip_add_adjacency (ip_lookup_main_t * lm, @@ -556,38 +470,6 @@ ip_add_adjacency (ip_lookup_main_t * lm, u32 n_adj, u32 * adj_index_result); -void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index); -void -ip_update_adjacency (ip_lookup_main_t * lm, - u32 adj_index, - ip_adjacency_t * copy_adj); - -static inline int -ip_adjacency_is_multipath(ip_lookup_main_t * lm, u32 adj_index) -{ - if (!vec_len(lm->multipath_adjacencies)) - return 0; - - if (vec_len(lm->multipath_adjacencies) < adj_index - 1) - return 0; - - - return (lm->multipath_adjacencies[adj_index].adj_index == adj_index && - lm->multipath_adjacencies[adj_index].n_adj_in_block > 0); -} - -void -ip_multipath_adjacency_free (ip_lookup_main_t * lm, - ip_multipath_adjacency_t * a); - -u32 -ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm, - u32 is_del, - u32 old_mp_adj_index, - u32 next_hop_adj_index, - u32 next_hop_weight, - u32 * new_mp_adj_index); - clib_error_t * ip_interface_address_add_del (ip_lookup_main_t * lm, u32 sw_if_index, @@ -596,6 +478,9 @@ ip_interface_address_add_del (ip_lookup_main_t * lm, u32 is_del, u32 * result_index); +u8 * +format_ip_flow_hash_config (u8 * s, va_list * args); + always_inline ip_interface_address_t * ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib) { @@ -603,28 +488,14 @@ ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib) return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0; } +u32 +fib_table_id_find_fib_index (fib_protocol_t proto, + u32 table_id); + always_inline void * ip_interface_address_get_address (ip_lookup_main_t * lm, ip_interface_address_t * a) { return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); } -always_inline ip_interface_address_t * -ip_interface_address_for_packet (ip_lookup_main_t * lm, vlib_buffer_t * b, u32 sw_if_index) -{ - ip_adjacency_t * adj; - u32 if_address_index; - - adj = ip_get_adjacency (lm, vnet_buffer (b)->ip.adj_index[VLIB_TX]); - - ASSERT (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP - || adj->lookup_next_index == IP_LOOKUP_NEXT_LOCAL); - if_address_index = adj->if_address_index; - if_address_index = (if_address_index == ~0 ? - vec_elt (lm->if_address_pool_index_by_sw_if_index, sw_if_index) - : if_address_index); - - return (if_address_index != ~0)?pool_elt_at_index (lm->if_address_pool, if_address_index):NULL; -} - #define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \ do { \ vnet_main_t *_vnm = vnet_get_main(); \ @@ -653,7 +524,5 @@ do { \ } while (0) void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index); -u32 vnet_register_special_adjacency_format_function -(ip_lookup_main_t * lm, format_function_t * fp); #endif /* included_ip_lookup_h */ diff --git a/vnet/vnet/ip/ping.c b/vnet/vnet/ip/ping.c index b5842a69..3bc4da88 100644 --- a/vnet/vnet/ip/ping.c +++ b/vnet/vnet/ip/ping.c @@ -14,6 +14,9 @@ */ #include <vnet/ip/ping.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/fib_entry.h> u8 * format_icmp4_input_trace (u8 * s, va_list * va) @@ -278,7 +281,14 @@ send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6, vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0; vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */ fib_index0 = 0; - adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, pa6); + adj_index0 = fib_entry_get_adj(ip6_fib_table_lookup(fib_index0, pa6, 128)); + + if (ADJ_INDEX_INVALID == adj_index0) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_INTERFACE; + } + sw_if_index0 = adj_index_to_sw_if_index (vm, lm, ip6_lookup_next_nodes, adj_index0, sw_if_index, verbose); @@ -362,7 +372,15 @@ send_ip4_ping (vlib_main_t * vm, vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0; vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */ fib_index0 = 0; - adj_index0 = ip4_fib_lookup_with_table (im, fib_index0, pa4, 0); + adj_index0 = fib_entry_get_adj(ip4_fib_table_lookup( + ip4_fib_get(fib_index0), pa4, 32)); + + if (ADJ_INDEX_INVALID == adj_index0) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_INTERFACE; + } + sw_if_index0 = adj_index_to_sw_if_index (vm, lm, ip4_lookup_next_nodes, adj_index0, sw_if_index, verbose); diff --git a/vnet/vnet/ip/udp.h b/vnet/vnet/ip/udp.h index 1cf525c6..1845fa74 100644 --- a/vnet/vnet/ip/udp.h +++ b/vnet/vnet/ip/udp.h @@ -115,14 +115,13 @@ void udp_register_dst_port (vlib_main_t * vm, u32 node_index, u8 is_ip4); always_inline void -ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, +ip_udp_fixup_one (vlib_main_t * vm, + vlib_buffer_t * b0, u8 is_ip4) { u16 new_l0; udp_header_t * udp0; - vlib_buffer_advance (b0, - ec_len); - if (is_ip4) { ip4_header_t * ip0; @@ -131,9 +130,6 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, ip0 = vlib_buffer_get_current(b0); - /* Apply the encap string. */ - clib_memcpy(ip0, ec0, ec_len); - /* fix the <bleep>ing outer-IP checksum */ sum0 = ip0->checksum; /* old_l0 always 0, see the rewrite setup */ @@ -157,9 +153,6 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, ip0 = vlib_buffer_get_current(b0); - /* Apply the encap string. */ - clib_memcpy(ip0, ec0, ec_len); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - sizeof (*ip0)); ip0->payload_length = new_l0; @@ -175,6 +168,33 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, udp0->checksum = 0xffff; } } +always_inline void +ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, + u8 is_ip4) +{ + vlib_buffer_advance (b0, - ec_len); + + if (is_ip4) + { + ip4_header_t * ip0; + + ip0 = vlib_buffer_get_current(b0); + + /* Apply the encap string. */ + clib_memcpy(ip0, ec0, ec_len); + ip_udp_fixup_one(vm, b0, 1); + } + else + { + ip6_header_t * ip0; + + ip0 = vlib_buffer_get_current(b0); + + /* Apply the encap string. */ + clib_memcpy(ip0, ec0, ec_len); + ip_udp_fixup_one(vm, b0, 0); + } +} always_inline void ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, |