diff options
author | Neale Ranns <nranns@cisco.com> | 2016-10-03 09:40:25 +0100 |
---|---|---|
committer | Florin Coras <florin.coras@gmail.com> | 2016-10-03 11:45:15 +0000 |
commit | 5e575b1d59a5a4c1590ca09d6383a876fb9fdd25 (patch) | |
tree | fcba6d058fe32d9b32fe246577565020d10455ff /vnet | |
parent | 553d808fc44e61846e4cda108083dd88beb338e3 (diff) |
L2 over LISP and GRE (VPP-457)
Change-Id: I0d7f9c7f41a9f9e0acb0950adedb90d45df08c2a
Signed-off-by: Neale Ranns <nranns@cisco.com>
Diffstat (limited to 'vnet')
55 files changed, 3596 insertions, 2234 deletions
diff --git a/vnet/Makefile.am b/vnet/Makefile.am index bcc7faf028f..6aa4254f2af 100644 --- a/vnet/Makefile.am +++ b/vnet/Makefile.am @@ -521,12 +521,15 @@ libvnet_la_SOURCES += \ vnet/lisp-gpe/lisp_gpe_sub_interface.c \ vnet/lisp-gpe/lisp_gpe_adjacency.c \ vnet/lisp-gpe/lisp_gpe_tunnel.c \ + vnet/lisp-gpe/lisp_gpe_fwd_entry.c \ + vnet/lisp-gpe/lisp_gpe_tenant.c \ vnet/lisp-gpe/interface.c \ - vnet/lisp-gpe/ip_forward.c \ vnet/lisp-gpe/decap.c nobase_include_HEADERS += \ vnet/lisp-gpe/lisp_gpe.h \ + vnet/lisp-gpe/lisp_gpe_fwd_entry.h \ + vnet/lisp-gpe/lisp_gpe_tenant.h \ vnet/lisp-gpe/lisp_gpe_packet.h \ vnet/lisp-gpe/lisp_gpe_error.def @@ -779,6 +782,7 @@ libvnet_la_SOURCES += \ vnet/adj/adj_rewrite.c \ vnet/adj/adj_glean.c \ vnet/adj/adj_midchain.c \ + vnet/adj/adj_l2.c \ vnet/adj/adj.c nobase_include_HEADERS += \ diff --git a/vnet/vnet/adj/adj_internal.h b/vnet/vnet/adj/adj_internal.h index 79042d1fd2a..f2d0ce0dbb2 100644 --- a/vnet/vnet/adj/adj_internal.h +++ b/vnet/vnet/adj/adj_internal.h @@ -19,6 +19,7 @@ #include <vnet/adj/adj.h> #include <vnet/ip/ip.h> #include <vnet/mpls/mpls.h> +#include <vnet/adj/adj_l2.h> /** @@ -50,6 +51,8 @@ adj_get_rewrite_node (fib_link_t linkt) return (&ip6_rewrite_node); case FIB_LINK_MPLS: return (&mpls_output_node); + case FIB_LINK_ETHERNET: + return (&adj_l2_rewrite_node); } ASSERT(0); return (NULL); @@ -66,6 +69,8 @@ adj_fib_link_2_vnet (fib_link_t linkt) return (VNET_L3_PACKET_TYPE_IP6); case FIB_LINK_MPLS: return (VNET_L3_PACKET_TYPE_MPLS_UNICAST); + case FIB_LINK_ETHERNET: + break; } return (0); } diff --git a/vnet/vnet/adj/adj_l2.c b/vnet/vnet/adj/adj_l2.c new file mode 100644 index 00000000000..cf0f04442cf --- /dev/null +++ b/vnet/vnet/adj/adj_l2.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/adj/adj_l2.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> + +/** + * @brief Trace data for a L2 Midchain + */ +typedef struct adj_l2_trace_t_ { + /** Adjacency index taken. */ + u32 adj_index; +} adj_l2_trace_t; + +static u8 * +format_adj_l2_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + adj_l2_trace_t * t = va_arg (*args, adj_l2_trace_t *); + vnet_main_t * vnm = vnet_get_main(); + + s = format (s, "adj-idx %d : %U", + t->adj_index, + format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE); + return s; +} + +typedef enum adj_l2_rewrite_next_t_ +{ + ADJ_L2_REWRITE_NEXT_DROP, +} adj_l2_rewrite_next_t; + +always_inline uword +adj_l2_rewrite_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_midchain) +{ + u32 * from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, * to_next, next_index; + u32 cpu_index = os_get_cpu_number(); + ip_config_main_t * cm = ðernet_main.feature_config_mains[VNET_IP_TX_FEAT]; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + char *h0; + u32 pi0, rw_len0, adj_index0, next0; + u32 tx_sw_if_index0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + h0 = vlib_buffer_get_current (p0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0); + + adj0 = adj_get (adj_index0); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], h0, + sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; + + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + + /* Check MTU of outgoing interface. */ + if (PREDICT_TRUE((vlib_buffer_length_in_chain (vm, p0) <= + adj0[0].rewrite_header.max_l3_packet_bytes))) + { + /* Don't adjust the buffer for ttl issue; icmp-error node wants + * to see the IP headerr */ + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + } + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0; + + /* + * Follow the feature ARC. this will result eventually in + * the midchain-tx node + */ + p0->current_config_index = vec_elt(cm->config_index_by_sw_if_index, + tx_sw_if_index0); + vnet_get_config_data (&cm->config_main, + &p0->current_config_index, + &next0, + /* # bytes of config data */ 0); + } + else + { + /* can't fragment L2 */ + next0 = ADJ_L2_REWRITE_NEXT_DROP; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + adj_l2_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +adj_l2_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return adj_l2_rewrite_inline (vm, node, frame, 0); +} + +static uword +adj_l2_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return adj_l2_rewrite_inline (vm, node, frame, 1); +} + +VLIB_REGISTER_NODE (adj_l2_rewrite_node) = { + .function = adj_l2_rewrite, + .name = "adj-l2-rewrite", + .vector_size = sizeof (u32), + + .format_trace = format_adj_l2_trace, + + .n_next_nodes = 1, + .next_nodes = { + [ADJ_L2_REWRITE_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (adj_l2_rewrite_node, adj_l2_rewrite) + +VLIB_REGISTER_NODE (adj_l2_midchain_node) = { + .function = adj_l2_midchain, + .name = "adj-l2-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_adj_l2_trace, + + .n_next_nodes = 1, + .next_nodes = { + [ADJ_L2_REWRITE_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (adj_l2_midchain_node, adj_l2_midchain) diff --git a/vnet/vnet/adj/adj_l2.h b/vnet/vnet/adj/adj_l2.h new file mode 100644 index 00000000000..3aa1c74b224 --- /dev/null +++ b/vnet/vnet/adj/adj_l2.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ADJ_L2_H__ +#define __ADJ_L2_H__ + +#include <vnet/adj/adj.h> + +extern vlib_node_registration_t adj_l2_midchain_node; +extern vlib_node_registration_t adj_l2_rewrite_node; + +#endif diff --git a/vnet/vnet/adj/adj_midchain.c b/vnet/vnet/adj/adj_midchain.c index 4b9b6a414d2..562a90d6e1d 100644 --- a/vnet/vnet/adj/adj_midchain.c +++ b/vnet/vnet/adj/adj_midchain.c @@ -15,10 +15,210 @@ #include <vnet/adj/adj_nbr.h> #include <vnet/adj/adj_internal.h> +#include <vnet/adj/adj_l2.h> +#include <vnet/adj/adj_midchain.h> #include <vnet/ethernet/arp_packet.h> #include <vnet/dpo/drop_dpo.h> #include <vnet/fib/fib_walk.h> +/** + * The two midchain tx feature node indices + */ +static u32 adj_midchain_tx_feature_node[FIB_LINK_NUM]; +static u32 adj_midchain_tx_no_count_feature_node[FIB_LINK_NUM]; + +/** + * @brief Trace data for packets traversing the midchain tx node + */ +typedef struct adj_midchain_tx_trace_t_ +{ + /** + * @brief the midchain adj we are traversing + */ + adj_index_t ai; +} adj_midchain_tx_trace_t; + +always_inline uword +adj_mdichain_tx_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int interface_count) +{ + u32 * from, * to_next, n_left_from, n_left_to_next; + u32 next_index; + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + u32 cpu_index = vm->cpu_index; + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * FIXME DUAL LOOP + */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, adj_index0, next0; + const ip_adjacency_t * adj0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer(vm, bi0); + + /* Follow the DPO on which the midchain is stacked */ + adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + adj0 = adj_get(adj_index0); + dpo0 = &adj0->sub_type.midchain.next_dpo; + next0 = dpo0->dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (interface_count) + { + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + cpu_index, + adj0->rewrite_header.sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b0)); + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->ai = adj_index0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, gre_input_node.index, + GRE_ERROR_PKTS_ENCAP, frame->n_vectors); + + return frame->n_vectors; +} + +static u8 * +format_adj_midchain_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + adj_midchain_tx_trace_t *tr = va_arg (*args, adj_midchain_tx_trace_t*); + + s = format(s, "adj-midchain:[%d]:%U", tr->ai, + format_ip_adjacency, vnet_get_main(), tr->ai, + FORMAT_IP_ADJACENCY_NONE); + + return (s); +} + +static uword +adj_midchain_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (adj_mdichain_tx_inline(vm, node, frame, 1)); +} + +VLIB_REGISTER_NODE (adj_midchain_tx_node, static) = { + .function = adj_midchain_tx, + .name = "adj-midchain-tx", + .vector_size = sizeof (u32), + + .format_trace = format_adj_midchain_tx_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +static uword +adj_midchain_tx_no_count (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (adj_mdichain_tx_inline(vm, node, frame, 0)); +} + +VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node, static) = { + .function = adj_midchain_tx_no_count, + .name = "adj-midchain-tx-no-count", + .vector_size = sizeof (u32), + + .format_trace = format_adj_midchain_tx_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VNET_IP4_TX_FEATURE_INIT (adj_midchain_tx_ip4, static) = { + .node_name = "adj-midchain-tx", + .runs_before = ORDER_CONSTRAINTS {"interface-output"}, + .feature_index = &adj_midchain_tx_feature_node[FIB_LINK_IP4], +}; +VNET_IP4_TX_FEATURE_INIT (adj_midchain_tx_no_count_ip4, static) = { + .node_name = "adj-midchain-tx-no-count", + .runs_before = ORDER_CONSTRAINTS {"interface-output"}, + .feature_index = &adj_midchain_tx_no_count_feature_node[FIB_LINK_IP4], +}; +VNET_IP6_TX_FEATURE_INIT (adj_midchain_tx_ip6, static) = { + .node_name = "adj-midchain-tx", + .runs_before = ORDER_CONSTRAINTS {"interface-output"}, + .feature_index = &adj_midchain_tx_feature_node[FIB_LINK_IP6], +}; +VNET_IP6_TX_FEATURE_INIT (adj_midchain_tx_no_count_ip6, static) = { + .node_name = "adj-midchain-tx-no-count", + .runs_before = ORDER_CONSTRAINTS {"interface-output"}, + .feature_index = &adj_midchain_tx_no_count_feature_node[FIB_LINK_IP6], +}; +VNET_MPLS_TX_FEATURE_INIT (adj_midchain_tx_mpls, static) = { + .node_name = "adj-midchain-txs", + .runs_before = ORDER_CONSTRAINTS {"interface-output"}, + .feature_index = &adj_midchain_tx_feature_node[FIB_LINK_MPLS], +}; +VNET_MPLS_TX_FEATURE_INIT (adj_midchain_tx_no_count_mpls, static) = { + .node_name = "adj-midchain-tx-no-count", + .runs_before = ORDER_CONSTRAINTS {"interface-output"}, + .feature_index = &adj_midchain_tx_no_count_feature_node[FIB_LINK_MPLS], +}; +VNET_ETHERNET_TX_FEATURE_INIT (adj_midchain_tx_ethernet, static) = { + .node_name = "adj-midchain-tx", + .runs_before = ORDER_CONSTRAINTS {"error-drop"}, + .feature_index = &adj_midchain_tx_feature_node[FIB_LINK_ETHERNET], +}; +VNET_ETHERNET_TX_FEATURE_INIT (adj_midchain_tx_no_count_ethernet, static) = { + .node_name = "adj-midchain-tx-no-count", + .runs_before = ORDER_CONSTRAINTS {"error-drop"}, + .feature_index = &adj_midchain_tx_no_count_feature_node[FIB_LINK_ETHERNET], +}; + static inline u32 adj_get_midchain_node (fib_link_t link) { @@ -29,11 +229,50 @@ adj_get_midchain_node (fib_link_t link) return (ip6_midchain_node.index); case FIB_LINK_MPLS: return (mpls_midchain_node.index); + case FIB_LINK_ETHERNET: + return (adj_l2_midchain_node.index); } ASSERT(0); return (0); } +static ip_config_main_t * +adj_midchain_get_cofing_for_link_type (const ip_adjacency_t *adj) +{ + ip_config_main_t *cm = NULL; + + switch (adj->ia_link) + { + case FIB_LINK_IP4: + { + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + cm = &lm->feature_config_mains[VNET_IP_TX_FEAT]; + break; + } + case FIB_LINK_IP6: + { + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + cm = &lm->feature_config_mains[VNET_IP_TX_FEAT]; + break; + } + case FIB_LINK_MPLS: + { + mpls_main_t * mm = &mpls_main; + cm = &mm->feature_config_mains[VNET_IP_TX_FEAT]; + break; + } + case FIB_LINK_ETHERNET: + { + cm = ðernet_main.feature_config_mains[VNET_IP_TX_FEAT]; + break; + } + } + + return (cm); +} + /** * adj_nbr_midchain_update_rewrite * @@ -43,16 +282,57 @@ adj_get_midchain_node (fib_link_t link) */ void adj_nbr_midchain_update_rewrite (adj_index_t adj_index, - u32 post_rewrite_node, + adj_midchain_fixup_t fixup, + adj_midchain_flag_t flags, u8 *rewrite) { + vnet_config_main_t * vcm; + ip_config_main_t *cm; ip_adjacency_t *adj; + u32 ci; ASSERT(ADJ_INDEX_INVALID != adj_index); adj = adj_get(adj_index); adj->lookup_next_index = IP_LOOKUP_NEXT_MIDCHAIN; - adj->sub_type.midchain.tx_function_node = post_rewrite_node; + adj->sub_type.midchain.fixup_func = fixup; + + cm = adj_midchain_get_cofing_for_link_type(adj); + vcm = &(cm->config_main); + vec_validate_init_empty(cm->config_index_by_sw_if_index, + adj->rewrite_header.sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[adj->rewrite_header.sw_if_index]; + + /* + * Choose the adj tx function based on whether the client wants + * to count against the interface or not and insert the appropriate + * TX feature. + */ + if (flags & ADJ_MIDCHAIN_FLAG_NO_COUNT) + { + adj->sub_type.midchain.tx_function_node = + adj_midchain_tx_no_count_node.index; + + ci = vnet_config_add_feature( + vlib_get_main(), + vcm, ci, + adj_midchain_tx_no_count_feature_node[adj->ia_link], + /* config data */ 0, + /* # bytes of config data */ 0); + } + else + { + adj->sub_type.midchain.tx_function_node = + adj_midchain_tx_node.index; + ci = vnet_config_add_feature( + vlib_get_main(), + vcm, ci, + adj_midchain_tx_feature_node[adj->ia_link], + /* config data */ 0, + /* # bytes of config data */ 0); + } + + cm->config_index_by_sw_if_index[adj->rewrite_header.sw_if_index] = ci; if (NULL != rewrite) { @@ -61,7 +341,7 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, * use a dummy rewrite header to get the interface to print into. */ ip_adjacency_t dummy; - dpo_id_t tmp = DPO_NULL; + dpo_id_t tmp = DPO_NULL; vnet_rewrite_for_tunnel(vnet_get_main(), adj->rewrite_header.sw_if_index, @@ -73,26 +353,26 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, /* * this is an update of an existing rewrite. - * packets are in flight. we'll need to briefly stack on the drop DPO - * whilst the rewrite is written, so any packets that see the partial update - * are binned. - */ - if (!dpo_id_is_valid(&adj->sub_type.midchain.next_dpo)) - { - /* - * not stacked yet. stack on the drop - */ - dpo_stack(DPO_ADJACENCY_MIDCHAIN, - fib_proto_to_dpo(adj->ia_nh_proto), - &adj->sub_type.midchain.next_dpo, - drop_dpo_get(fib_proto_to_dpo(adj->ia_nh_proto))); - } - - dpo_copy(&tmp, &adj->sub_type.midchain.next_dpo); - dpo_stack(DPO_ADJACENCY_MIDCHAIN, - fib_proto_to_dpo(adj->ia_nh_proto), - &adj->sub_type.midchain.next_dpo, - drop_dpo_get(fib_proto_to_dpo(adj->ia_nh_proto))); + * packets are in flight. we'll need to briefly stack on the drop DPO + * whilst the rewrite is written, so any packets that see the partial update + * are binned. + */ + if (!dpo_id_is_valid(&adj->sub_type.midchain.next_dpo)) + { + /* + * not stacked yet. stack on the drop + */ + dpo_stack(DPO_ADJACENCY_MIDCHAIN, + fib_link_to_dpo_proto(adj->ia_link), + &adj->sub_type.midchain.next_dpo, + drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link))); + } + + dpo_copy(&tmp, &adj->sub_type.midchain.next_dpo); + dpo_stack(DPO_ADJACENCY_MIDCHAIN, + fib_link_to_dpo_proto(adj->ia_link), + &adj->sub_type.midchain.next_dpo, + drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link))); CLIB_MEMORY_BARRIER(); @@ -102,17 +382,16 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, CLIB_MEMORY_BARRIER(); - /* - * The graph arc used/created here is from the post-rewirte node to the - * child's registered node. This is because post adj processing the next - * node is the interface's specific node, then the post-write-node (aka - * the interface's tx-function) - from there we need to get to the stacked - * child's node. - */ - dpo_stack_from_node(adj->sub_type.midchain.tx_function_node, - &adj->sub_type.midchain.next_dpo, - &tmp); - dpo_reset(&tmp); + /* + * The graph arc used/created here is from the midchain-tx node to the + * child's registered node. This is because post adj processing the next + * node are any output features, then the midchain-tx. from there we + * need to get to the stacked child's node. + */ + dpo_stack_from_node(adj->sub_type.midchain.tx_function_node, + &adj->sub_type.midchain.next_dpo, + &tmp); + dpo_reset(&tmp); } else { @@ -130,6 +409,31 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, } /** + * adj_nbr_midchain_unstack + * + * Unstack the adj. stack it on drop + */ +void +adj_nbr_midchain_unstack (adj_index_t adj_index) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + + /* + * stack on the drop + */ + dpo_stack(DPO_ADJACENCY_MIDCHAIN, + fib_link_to_dpo_proto(adj->ia_link), + &adj->sub_type.midchain.next_dpo, + drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link))); + + CLIB_MEMORY_BARRIER(); +} + +/** * adj_nbr_midchain_stack */ void @@ -145,8 +449,8 @@ adj_nbr_midchain_stack (adj_index_t adj_index, ASSERT(IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index); dpo_stack_from_node(adj->sub_type.midchain.tx_function_node, - &adj->sub_type.midchain.next_dpo, - next); + &adj->sub_type.midchain.next_dpo, + next); } u8* @@ -161,13 +465,13 @@ format_adj_midchain (u8* s, va_list *ap) s = format (s, " via %U ", format_ip46_address, &adj->sub_type.nbr.next_hop); s = format (s, " %U", - format_vnet_rewrite, - vnm->vlib_main, &adj->rewrite_header, - sizeof (adj->rewrite_data), indent); + format_vnet_rewrite, + vnm->vlib_main, &adj->rewrite_header, + sizeof (adj->rewrite_data), indent); s = format (s, "\n%Ustacked-on:\n%U%U", - format_white_space, indent, - format_white_space, indent+2, - format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2); + format_white_space, indent, + format_white_space, indent+2, + format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2); return (s); } @@ -211,12 +515,18 @@ const static char* const midchain_mpls_nodes[] = "mpls-midchain", NULL, }; +const static char* const midchain_ethernet_nodes[] = +{ + "adj-l2-midchain", + NULL, +}; const static char* const * const midchain_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = midchain_ip4_nodes, [DPO_PROTO_IP6] = midchain_ip6_nodes, [DPO_PROTO_MPLS] = midchain_mpls_nodes, + [DPO_PROTO_ETHERNET] = midchain_ethernet_nodes, }; void diff --git a/vnet/vnet/adj/adj_midchain.h b/vnet/vnet/adj/adj_midchain.h index adf86f1d007..ae414aea6dc 100644 --- a/vnet/vnet/adj/adj_midchain.h +++ b/vnet/vnet/adj/adj_midchain.h @@ -25,6 +25,26 @@ #include <vnet/adj/adj.h> /** + * @brief Flags controlling the midchain adjacency + */ +typedef enum adj_midchain_flag_t_ +{ + /** + * No flags + */ + ADJ_MIDCHAIN_FLAG_NONE = 0, + + /** + * Packets TX through the midchain do not increment the interface + * counters. This should be used when the adj is associated with an L2 + * interface and that L2 interface is in a bridege domain. In that case + * the packet will have traversed the interface's TX node, and hence have + * been counted, before it traverses ths midchain + */ + ADJ_MIDCHAIN_FLAG_NO_COUNT = (1 << 0), +} adj_midchain_flag_t; + +/** * @brief * Convert an existing neighbour adjacency into a midchain * @@ -39,7 +59,8 @@ * The rewrite. */ extern void adj_nbr_midchain_update_rewrite(adj_index_t adj_index, - u32 post_rewrite_node, + adj_midchain_fixup_t fixup, + adj_midchain_flag_t flags, u8 *rewrite); /** @@ -58,6 +79,16 @@ extern void adj_nbr_midchain_stack(adj_index_t adj_index, /** * @brief + * unstack a midchain. This will break the chain between the midchain and + * the next graph section. This is a implemented as stack-on-drop + * + * @param adj_index + * The index of the midchain to stack + */ +extern void adj_nbr_midchain_unstack(adj_index_t adj_index); + +/** + * @brief * Module initialisation */ extern void adj_midchain_module_init(void); diff --git a/vnet/vnet/adj/adj_nbr.c b/vnet/vnet/adj/adj_nbr.c index de137d1dac0..0913cfd791c 100644 --- a/vnet/vnet/adj/adj_nbr.c +++ b/vnet/vnet/adj/adj_nbr.c @@ -793,11 +793,17 @@ const static char* const nbr_mpls_nodes[] = "mpls-output", NULL, }; +const static char* const nbr_ethernet_nodes[] = +{ + "adj-l2-rewrite", + NULL, +}; const static char* const * const nbr_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = nbr_ip4_nodes, [DPO_PROTO_IP6] = nbr_ip6_nodes, [DPO_PROTO_MPLS] = nbr_mpls_nodes, + [DPO_PROTO_ETHERNET] = nbr_ethernet_nodes, }; const static char* const nbr_incomplete_ip4_nodes[] = diff --git a/vnet/vnet/dpo/dpo.h b/vnet/vnet/dpo/dpo.h index 8c22f00b091..452a07e3104 100644 --- a/vnet/vnet/dpo/dpo.h +++ b/vnet/vnet/dpo/dpo.h @@ -65,6 +65,7 @@ typedef enum dpo_proto_t_ DPO_PROTO_IP4 = 0, #endif DPO_PROTO_IP6, + DPO_PROTO_ETHERNET, DPO_PROTO_MPLS, } __attribute__((packed)) dpo_proto_t; @@ -74,9 +75,15 @@ typedef enum dpo_proto_t_ #define DPO_PROTOS { \ [DPO_PROTO_IP4] = "ip4", \ [DPO_PROTO_IP6] = "ip6", \ + [DPO_PROTO_ETHERNET] = "ethernet", \ [DPO_PROTO_MPLS] = "mpls", \ } +#define FOR_EACH_DPO_PROTO(_proto) \ + for (_proto = DPO_PROTO_IP4; \ + _proto <= DPO_PROTO_MPLS; \ + _proto++) + /** * @brief Common types of data-path objects * New types can be dynamically added using dpo_register_new_type() diff --git a/vnet/vnet/dpo/drop_dpo.c b/vnet/vnet/dpo/drop_dpo.c index 62f56488a01..048518c918b 100644 --- a/vnet/vnet/dpo/drop_dpo.c +++ b/vnet/vnet/dpo/drop_dpo.c @@ -55,7 +55,7 @@ format_drop_dpo (u8 *s, va_list *ap) CLIB_UNUSED(index_t index) = va_arg(ap, index_t); CLIB_UNUSED(u32 indent) = va_arg(ap, u32); - return (format(s, "dpo-drop")); + return (format(s, "dpo-drop %U", format_dpo_proto, index)); } const static dpo_vft_t drop_vft = { @@ -86,11 +86,17 @@ const static char* const drop_mpls_nodes[] = "mpls-drop", NULL, }; +const static char* const drop_ethernet_nodes[] = +{ + "error-drop", + NULL, +}; const static char* const * const drop_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = drop_ip4_nodes, [DPO_PROTO_IP6] = drop_ip6_nodes, [DPO_PROTO_MPLS] = drop_mpls_nodes, + [DPO_PROTO_ETHERNET] = drop_ethernet_nodes, }; void diff --git a/vnet/vnet/dpo/load_balance.c b/vnet/vnet/dpo/load_balance.c index 963ff0ba160..1250694a9e3 100644 --- a/vnet/vnet/dpo/load_balance.c +++ b/vnet/vnet/dpo/load_balance.c @@ -703,11 +703,17 @@ const static char* const load_balance_mpls_nodes[] = "mpls-load-balance", NULL, }; +const static char* const load_balance_l2_nodes[] = +{ + "l2-load-balance", + NULL, +}; const static char* const * const load_balance_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP4] = load_balance_ip4_nodes, [DPO_PROTO_IP6] = load_balance_ip6_nodes, [DPO_PROTO_MPLS] = load_balance_mpls_nodes, + [DPO_PROTO_ETHERNET] = load_balance_l2_nodes, }; void @@ -758,3 +764,142 @@ VLIB_CLI_COMMAND (load_balance_show_command, static) = { .short_help = "show load-balance [<index>]", .function = load_balance_show, }; + + +always_inline u32 +ip_flow_hash (void *data) +{ + ip4_header_t *iph = (ip4_header_t *) data; + + if ((iph->ip_version_and_header_length & 0xF0) == 0x40) + return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT); + else + return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT); +} + +always_inline u64 +mac_to_u64 (u8 * m) +{ + return (*((u64 *) m) & 0xffffffffffff); +} + +always_inline u32 +l2_flow_hash (vlib_buffer_t * b0) +{ + ethernet_header_t *eh; + u64 a, b, c; + uword is_ip, eh_size; + u16 eh_type; + + eh = vlib_buffer_get_current (b0); + eh_type = clib_net_to_host_u16 (eh->type); + eh_size = ethernet_buffer_header_size (b0); + + is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6); + + /* since we have 2 cache lines, use them */ + if (is_ip) + a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size); + else + a = eh->type; + + b = mac_to_u64 ((u8 *) eh->dst_address); + c = mac_to_u64 ((u8 *) eh->src_address); + hash_mix64 (a, b, c); + + return (u32) c; +} + +typedef struct load_balance_trace_t_ +{ + index_t lb_index; +} load_balance_trace_t; + +static uword +l2_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + u32 bi0, lbi0, next0; + const dpo_id_t *dpo0; + const load_balance_t *lb0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* lookup dst + src mac */ + lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + lb0 = load_balance_get(lbi0); + + vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0); + + dpo0 = load_balance_get_bucket_i(lb0, + vnet_buffer(b0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + load_balance_trace_t *tr = vlib_add_trace (vm, node, b0, + sizeof (*tr)); + tr->lb_index = lbi0; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static u8 * +format_load_balance_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *); + + s = format (s, "L2-load-balance: index %d", t->lb_index); + return s; +} + +/** + * @brief + */ +VLIB_REGISTER_NODE (l2_load_balance_node) = { + .function = l2_load_balance, + .name = "l2-load-balance", + .vector_size = sizeof (u32), + + .format_trace = format_load_balance_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; diff --git a/vnet/vnet/dpo/lookup_dpo.c b/vnet/vnet/dpo/lookup_dpo.c index 0bfc0651a63..6e3f0792d07 100644 --- a/vnet/vnet/dpo/lookup_dpo.c +++ b/vnet/vnet/dpo/lookup_dpo.c @@ -292,12 +292,10 @@ lookup_dpo_ip4_inline (vlib_main_t * vm, vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); - /* while (n_left_from >= 4 && n_left_to_next >= 2) */ - /* } */ - while (n_left_from > 0 && n_left_to_next > 0) { - u32 bi0, lkdi0, lbi0, fib_index0, next0; + u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0; + flow_hash_config_t flow_hash_config0; const ip4_address_t *input_addr; const load_balance_t *lb0; const lookup_dpo_t * lkd0; @@ -349,7 +347,20 @@ lookup_dpo_ip4_inline (vlib_main_t * vm, /* do lookup */ ip4_src_fib_lookup_one (fib_index0, input_addr, &lbi0); lb0 = load_balance_get(lbi0); - dpo0 = load_balance_get_bucket_i(lb0, 0); + + /* Use flow hash to compute multipath adjacency. */ + hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + flow_hash_config0 = lb0->lb_hash_config; + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + ip4_compute_flow_hash (ip0, flow_hash_config0); + } + + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); next0 = dpo0->dpoi_next_node; vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; @@ -462,13 +473,10 @@ lookup_dpo_ip6_inline (vlib_main_t * vm, vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); - /* while (n_left_from >= 4 && n_left_to_next >= 2) */ - /* { */ - /* } */ - while (n_left_from > 0 && n_left_to_next > 0) { - u32 bi0, lkdi0, lbi0, fib_index0, next0; + u32 bi0, lkdi0, lbi0, fib_index0, next0, hash_c0; + flow_hash_config_t flow_hash_config0; const ip6_address_t *input_addr0; const load_balance_t *lb0; const lookup_dpo_t * lkd0; @@ -508,7 +516,20 @@ lookup_dpo_ip6_inline (vlib_main_t * vm, fib_index0, input_addr0); lb0 = load_balance_get(lbi0); - dpo0 = load_balance_get_bucket_i(lb0, 0); + + /* Use flow hash to compute multipath adjacency. */ + hash_c0 = vnet_buffer (b0)->ip.flow_hash = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + flow_hash_config0 = lb0->lb_hash_config; + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + ip6_compute_flow_hash (ip0, flow_hash_config0); + } + + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); next0 = dpo0->dpoi_next_node; vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; diff --git a/vnet/vnet/ethernet/ethernet.h b/vnet/vnet/ethernet/ethernet.h index 1855b9144db..728da522e3a 100644 --- a/vnet/vnet/ethernet/ethernet.h +++ b/vnet/vnet/ethernet/ethernet.h @@ -43,6 +43,7 @@ #include <vnet/vnet.h> #include <vnet/ethernet/packet.h> #include <vnet/pg/pg.h> +#include <vnet/ip/ip_feature_registration.h> always_inline u64 ethernet_mac_address_u64 (u8 * a) @@ -224,7 +225,6 @@ typedef struct u32 input_next_mpls; } next_by_ethertype_t; - typedef struct { vlib_main_t *vlib_main; @@ -263,10 +263,34 @@ typedef struct /* debug: make sure we don't wipe out an ethernet registration by mistake */ u8 next_by_ethertype_register_called; + /** per-interface features */ + ip_config_main_t feature_config_mains[VNET_N_IP_FEAT]; + + /** Feature path configuration lists */ + vnet_ip_feature_registration_t *next_feature[VNET_N_IP_FEAT]; + + /** Save results for show command */ + char **feature_nodes[VNET_N_IP_FEAT]; + + /** feature node indicies */ + u32 ethernet_tx_feature_drop; } ethernet_main_t; ethernet_main_t ethernet_main; +#define VNET_ETHERNET_TX_FEATURE_INIT(x,...) \ + __VA_ARGS__ vnet_ip_feature_registration_t tx_##x; \ +static void __vnet_add_feature_registration_tx_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vnet_add_feature_registration_tx_##x (void) \ +{ \ + ethernet_main_t * im = ðernet_main; \ + tx_##x.next = im->next_feature[VNET_IP_TX_FEAT]; \ + im->next_feature[VNET_IP_TX_FEAT] = &tx_##x; \ +} \ +__VA_ARGS__ vnet_ip_feature_registration_t tx_##x + + always_inline ethernet_type_info_t * ethernet_get_type_info (ethernet_main_t * em, ethernet_type_t type) { diff --git a/vnet/vnet/ethernet/init.c b/vnet/vnet/ethernet/init.c index 86597c4f66c..513d9c66c18 100644 --- a/vnet/vnet/ethernet/init.c +++ b/vnet/vnet/ethernet/init.c @@ -39,6 +39,7 @@ #include <vlib/vlib.h> #include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> // for feature registration /* Global main structure. */ ethernet_main_t ethernet_main; @@ -60,6 +61,34 @@ add_type (ethernet_main_t * em, ethernet_type_t type, char *type_name) hash_set_mem (em->type_info_by_name, ti->name, i); } +static char *feature_start_nodes[] = { + "adj-midchain-tx-no-count", +}; + +/* Built-in ip4 tx feature path definition */ +/* *INDENT-OFF* */ +VNET_ETHERNET_TX_FEATURE_INIT (ethernet_tx_drop, static) = +{ + .node_name = "error-drop", + .runs_before = 0, /* not before any other features */ + .feature_index = ðernet_main.ethernet_tx_feature_drop, +}; +/* *INDENT-ON* */ + +static clib_error_t * +ethernet_feature_init (vlib_main_t * vm) +{ + ip_config_main_t *cm = ðernet_main.feature_config_mains[VNET_IP_TX_FEAT]; + vnet_config_main_t *vcm = &cm->config_main; + + return (ip_feature_init_cast (vm, cm, vcm, + feature_start_nodes, + ARRAY_LEN (feature_start_nodes), + ethernet_main.next_feature[VNET_IP_TX_FEAT], + ðernet_main.feature_nodes + [VNET_IP_TX_FEAT])); +} + static clib_error_t * ethernet_init (vlib_main_t * vm) { @@ -87,7 +116,7 @@ ethernet_init (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, ethernet_input_init))) return error; - return error; + return (ethernet_feature_init (vm)); } VLIB_INIT_FUNCTION (ethernet_init); @@ -99,7 +128,6 @@ ethernet_get_main (vlib_main_t * vm) return ðernet_main; } - /* * fd.io coding-style-patch-verification: ON * diff --git a/vnet/vnet/ethernet/interface.c b/vnet/vnet/ethernet/interface.c index 91d5a3c6aa3..43f1cd4af44 100644 --- a/vnet/vnet/ethernet/interface.c +++ b/vnet/vnet/ethernet/interface.c @@ -46,6 +46,13 @@ #include <vnet/lisp-gpe/lisp_gpe.h> #include <vnet/devices/af_packet/af_packet.h> +/** + * @file + * @brief Loopback Interfaces. + * + * This file contains code to manage loopback interfaces. + */ + int vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index) { @@ -57,13 +64,6 @@ vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index) hw->hw_class_index == srp_hw_interface_class.index)); } -/** - * @file - * @brief Loopback Interfaces. - * - * This file contains code to manage loopback interfaces. - */ - static uword ethernet_set_rewrite (vnet_main_t * vnm, u32 sw_if_index, @@ -691,6 +691,33 @@ VLIB_CLI_COMMAND (delete_sub_interface_command, static) = { }; /* *INDENT-ON* */ +static clib_error_t * +show_ethernet_interface_features_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + u32 sw_if_index; + + if (!unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + return clib_error_return (0, "Interface not specified..."); + + vlib_cli_output (vm, "Ethernet feature paths configured on %U...", + format_vnet_sw_if_index_name, vnm, sw_if_index); + + ip_interface_features_show (vm, "Ethernet", + em->feature_config_mains, sw_if_index); + + return 0; +} + +VLIB_CLI_COMMAND (show_ethernet_interface_features_command, static) = +{ +.path = "show ethernet interface features",.short_help = + "show ethernet interface features <intfc>",.function = + show_ethernet_interface_features_command_fn,}; + /* * fd.io coding-style-patch-verification: ON * diff --git a/vnet/vnet/fib/fib_entry.c b/vnet/vnet/fib/fib_entry.c index 8b275c61912..a75d5c9cf8c 100644 --- a/vnet/vnet/fib/fib_entry.c +++ b/vnet/vnet/fib/fib_entry.c @@ -90,7 +90,8 @@ fib_entry_chain_type_fixup (const fib_entry_t *entry, * then use the payload-protocol field, that we stashed there * for just this purpose */ - return (fib_proto_to_forw_chain_type(entry->fe_prefix.fp_payload_proto)); + return (fib_forw_chain_type_from_dpo_proto( + entry->fe_prefix.fp_payload_proto)); } /* * else give them what this entry would be by default. i.e. if it's a v6 @@ -120,7 +121,8 @@ fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) * then use the payload-protocol field, that we stashed there * for just this purpose */ - return (fib_proto_to_forw_chain_type(fib_entry->fe_prefix.fp_payload_proto)); + return (fib_forw_chain_type_from_dpo_proto( + fib_entry->fe_prefix.fp_payload_proto)); else return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); } diff --git a/vnet/vnet/fib/fib_entry_src.c b/vnet/vnet/fib/fib_entry_src.c index 70550069356..0bca17dd884 100644 --- a/vnet/vnet/fib/fib_entry_src.c +++ b/vnet/vnet/fib/fib_entry_src.c @@ -298,6 +298,7 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index, } break; case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_ETHERNET: ASSERT(0); break; } diff --git a/vnet/vnet/fib/fib_node.h b/vnet/vnet/fib/fib_node.h index a05b6f1b61a..6a54c6f565b 100644 --- a/vnet/vnet/fib/fib_node.h +++ b/vnet/vnet/fib/fib_node.h @@ -35,7 +35,7 @@ typedef enum fib_node_type_t_ { FIB_NODE_TYPE_PATH, FIB_NODE_TYPE_ADJ, FIB_NODE_TYPE_MPLS_ENTRY, - FIB_NODE_TYPE_LISP_GPE_TUNNEL, + FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY, FIB_NODE_TYPE_LISP_ADJ, FIB_NODE_TYPE_MPLS_GRE_TUNNEL, FIB_NODE_TYPE_GRE_TUNNEL, @@ -55,7 +55,7 @@ typedef enum fib_node_type_t_ { [FIB_NODE_TYPE_PATH] = "path", \ [FIB_NODE_TYPE_MPLS_ENTRY] = "mpls-entry", \ [FIB_NODE_TYPE_ADJ] = "adj", \ - [FIB_NODE_TYPE_LISP_GPE_TUNNEL] = "lisp-gpe-tunnel", \ + [FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY] = "lisp-gpe-fwd-entry", \ [FIB_NODE_TYPE_LISP_ADJ] = "lisp-adj", \ [FIB_NODE_TYPE_MPLS_GRE_TUNNEL] = "mpls-gre-tunnel", \ [FIB_NODE_TYPE_GRE_TUNNEL] = "gre-tunnel", \ diff --git a/vnet/vnet/fib/fib_path.c b/vnet/vnet/fib/fib_path.c index d2e5e319afd..d5453fde256 100644 --- a/vnet/vnet/fib/fib_path.c +++ b/vnet/vnet/fib/fib_path.c @@ -1564,7 +1564,8 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, { dpo_copy(dpo, &path->fp_dpo); } - else { + else + { switch (path->fp_type) { case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: @@ -1574,6 +1575,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: case FIB_FORW_CHAIN_TYPE_MPLS_EOS: case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + case FIB_FORW_CHAIN_TYPE_ETHERNET: { adj_index_t ai; @@ -1606,6 +1608,9 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: fib_path_recursive_adj_update(path, fct, dpo); break; + case FIB_FORW_CHAIN_TYPE_ETHERNET: + ASSERT(0); + break; } break; case FIB_PATH_TYPE_DEAG: @@ -1623,6 +1628,9 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, case FIB_FORW_CHAIN_TYPE_MPLS_EOS: dpo_copy(dpo, &path->fp_dpo); break; + case FIB_FORW_CHAIN_TYPE_ETHERNET: + ASSERT(0); + break; } break; case FIB_PATH_TYPE_EXCLUSIVE: @@ -1656,7 +1664,7 @@ fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index, mnh->path_weight = path->fp_weight; mnh->path_index = path_index; - dpo_copy(&mnh->path_dpo, &path->fp_dpo); + fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo); } return (hash_key); diff --git a/vnet/vnet/fib/fib_path_list.c b/vnet/vnet/fib/fib_path_list.c index 1df73968614..3523d93a7bb 100644 --- a/vnet/vnet/fib/fib_path_list.c +++ b/vnet/vnet/fib/fib_path_list.c @@ -353,7 +353,7 @@ fib_path_list_last_lock_gone (fib_node_t *node) */ static void fib_path_list_mk_lb (fib_path_list_t *path_list, - fib_forward_chain_type_t type, + fib_forward_chain_type_t fct, dpo_id_t *dpo) { load_balance_path_t *hash_key; @@ -361,6 +361,19 @@ fib_path_list_mk_lb (fib_path_list_t *path_list, hash_key = NULL; + if (!dpo_id_is_valid(dpo)) + { + /* + * first time create + */ + dpo_set(dpo, + DPO_LOAD_BALANCE, + fib_forw_chain_type_to_dpo_proto(fct), + load_balance_create(0, + fib_forw_chain_type_to_dpo_proto(fct), + 0 /* FIXME FLOW HASH */)); + } + /* * We gather the DPOs from resolved paths. */ @@ -368,7 +381,7 @@ fib_path_list_mk_lb (fib_path_list_t *path_list, { hash_key = fib_path_append_nh_for_multipath_hash( *path_index, - type, + fct, hash_key); } diff --git a/vnet/vnet/fib/fib_types.c b/vnet/vnet/fib/fib_types.c index bf76c5536e6..f881a8f3ff2 100644 --- a/vnet/vnet/fib/fib_types.c +++ b/vnet/vnet/fib/fib_types.c @@ -214,6 +214,24 @@ fib_proto_to_dpo (fib_protocol_t fib_proto) return (0); } +dpo_proto_t +fib_link_to_dpo_proto (fib_link_t linkt) +{ + switch (linkt) + { + case FIB_LINK_IP6: + return (DPO_PROTO_IP6); + case FIB_LINK_IP4: + return (DPO_PROTO_IP4); + case FIB_LINK_MPLS: + return (DPO_PROTO_MPLS); + case FIB_LINK_ETHERNET: + return (DPO_PROTO_ETHERNET); + } + ASSERT(0); + return (0); +} + fib_protocol_t dpo_proto_to_fib (dpo_proto_t dpo_proto) { @@ -225,6 +243,8 @@ dpo_proto_to_fib (dpo_proto_t dpo_proto) return (FIB_PROTOCOL_IP4); case DPO_PROTO_MPLS: return (FIB_PROTOCOL_MPLS); + default: + break; } ASSERT(0); return (0); @@ -247,16 +267,18 @@ fib_proto_to_link (fib_protocol_t proto) } fib_forward_chain_type_t -fib_proto_to_forw_chain_type (fib_protocol_t proto) +fib_forw_chain_type_from_dpo_proto (dpo_proto_t proto) { switch (proto) { - case FIB_PROTOCOL_IP4: + case DPO_PROTO_IP4: return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); - case FIB_PROTOCOL_IP6: + case DPO_PROTO_IP6: return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); - case FIB_PROTOCOL_MPLS: + case DPO_PROTO_MPLS: return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + case DPO_PROTO_ETHERNET: + return (FIB_FORW_CHAIN_TYPE_ETHERNET); } ASSERT(0); return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); @@ -271,6 +293,8 @@ fib_forw_chain_type_to_link_type (fib_forward_chain_type_t fct) return (FIB_LINK_IP4); case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: return (FIB_LINK_IP6); + case FIB_FORW_CHAIN_TYPE_ETHERNET: + return (FIB_LINK_ETHERNET); case FIB_FORW_CHAIN_TYPE_MPLS_EOS: /* * insufficient information to to convert @@ -292,6 +316,8 @@ fib_forw_chain_type_to_dpo_proto (fib_forward_chain_type_t fct) return (DPO_PROTO_IP4); case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: return (DPO_PROTO_IP6); + case FIB_FORW_CHAIN_TYPE_ETHERNET: + return (DPO_PROTO_ETHERNET); case FIB_FORW_CHAIN_TYPE_MPLS_EOS: /* * insufficient information to to convert diff --git a/vnet/vnet/fib/fib_types.h b/vnet/vnet/fib/fib_types.h index 4ebd68d1450..232a2301606 100644 --- a/vnet/vnet/fib/fib_types.h +++ b/vnet/vnet/fib/fib_types.h @@ -1,4 +1,4 @@ -/* + /* * Copyright (c) 2016 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,6 +58,17 @@ typedef enum fib_protocol_t_ { */ #define FIB_PROTOCOL_NONE (FIB_PROTOCOL_MAX+1) +#define FOR_EACH_FIB_PROTOCOL(_item) \ + for (_item = FIB_PROTOCOL_IP4; \ + _item <= FIB_PROTOCOL_MPLS; \ + _item++) + +#define FOR_EACH_FIB_IP_PROTOCOL(_item) \ + for (_item = FIB_PROTOCOL_IP4; \ + _item <= FIB_PROTOCOL_IP6; \ + _item++) + + /** * Link Type. This maps directly into the ethertype. */ @@ -68,6 +79,7 @@ typedef enum fib_link_t_ { FIB_LINK_IP4 = 0, #endif FIB_LINK_IP6, + FIB_LINK_ETHERNET, FIB_LINK_MPLS, } __attribute__ ((packed)) fib_link_t; @@ -77,15 +89,16 @@ typedef enum fib_link_t_ { */ #define FIB_LINK_NUM (FIB_LINK_MPLS+1) -#define FIB_LINKS { \ - [FIB_LINK_IP4] = "ipv4", \ - [FIB_LINK_IP6] = "ipv6", \ - [FIB_LINK_MPLS] = "mpls", \ +#define FIB_LINKS { \ + [FIB_LINK_ETHERNET] = "ethernet", \ + [FIB_LINK_IP4] = "ipv4", \ + [FIB_LINK_IP6] = "ipv6", \ + [FIB_LINK_MPLS] = "mpls", \ } -#define FOR_EACH_FIB_LINK(_item) \ - for (_item = FIB_LINK_IP4; \ - _item <= FIB_LINK_MPLS; \ +#define FOR_EACH_FIB_LINK(_item) \ + for (_item = FIB_LINK_IP4; \ + _item <= FIB_LINK_MPLS; \ _item++) #define FOR_EACH_FIB_IP_LINK(_item) \ @@ -105,6 +118,10 @@ fib_link_t fib_proto_to_link (fib_protocol_t proto); */ typedef enum fib_forward_chain_type_t_ { /** + * Contribute an object that is to be used to forward Ethernet packets + */ + FIB_FORW_CHAIN_TYPE_ETHERNET, + /** * Contribute an object that is to be used to forward IP4 packets */ FIB_FORW_CHAIN_TYPE_UNICAST_IP4, @@ -127,6 +144,7 @@ typedef enum fib_forward_chain_type_t_ { } __attribute__ ((packed)) fib_forward_chain_type_t; #define FIB_FORW_CHAINS { \ + [FIB_FORW_CHAIN_TYPE_ETHERNET] = "ehternet", \ [FIB_FORW_CHAIN_TYPE_UNICAST_IP4] = "unicast-ip4", \ [FIB_FORW_CHAIN_TYPE_UNICAST_IP6] = "unicast-ip6", \ [FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS] = "mpls-neos", \ @@ -136,7 +154,7 @@ typedef enum fib_forward_chain_type_t_ { #define FIB_FORW_CHAIN_NUM (FIB_FORW_CHAIN_TYPE_MPLS_EOS+1) #define FOR_EACH_FIB_FORW_CHAIN(_item) \ - for (_item = FIB_FORW_CHAIN_TYPE_UNICAST_IP4; \ + for (_item = FIB_FORW_CHAIN_TYPE_ETHERNET; \ _item <= FIB_FORW_CHAIN_TYPE_MPLS_EOS; \ _item++) @@ -148,7 +166,7 @@ extern fib_link_t fib_forw_chain_type_to_link_type(fib_forward_chain_type_t fct) /** * @brief Convert from a payload-protocol to a chain type. */ -extern fib_forward_chain_type_t fib_proto_to_forw_chain_type(fib_protocol_t proto); +extern fib_forward_chain_type_t fib_forw_chain_type_from_dpo_proto(dpo_proto_t proto); /** * @brief Convert from a chain type to the DPO proto it will install @@ -222,6 +240,7 @@ extern u8 * format_fib_prefix(u8 * s, va_list * args); extern u8 * format_fib_forw_chain_type(u8 * s, va_list * args); extern dpo_proto_t fib_proto_to_dpo(fib_protocol_t fib_proto); +extern dpo_proto_t fib_link_to_dpo_proto(fib_link_t linkt); extern fib_protocol_t dpo_proto_to_fib(dpo_proto_t dpo_proto); /** diff --git a/vnet/vnet/gre/gre.c b/vnet/vnet/gre/gre.c index 9f8adc79ff0..0028118df94 100644 --- a/vnet/vnet/gre/gre.c +++ b/vnet/vnet/gre/gre.c @@ -47,11 +47,11 @@ u8 * format_gre_tx_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); gre_tx_trace_t * t = va_arg (*args, gre_tx_trace_t *); - + s = format (s, "GRE: tunnel %d len %d src %U dst %U", - t->tunnel_id, clib_net_to_host_u16 (t->length), - format_ip4_address, &t->src.as_u8, - format_ip4_address, &t->dst.as_u8); + t->tunnel_id, clib_net_to_host_u16 (t->length), + format_ip4_address, &t->src.as_u8, + format_ip4_address, &t->dst.as_u8); return s; } @@ -158,7 +158,7 @@ unformat_gre_header (unformat_input_t * input, va_list * args) vec_add2 (*result, p, n_bytes); clib_memcpy (p, h, n_bytes); } - + return 1; } @@ -198,15 +198,15 @@ static uword gre_set_rewrite (vnet_main_t * vnm, h->ip4.ttl = 64; h->ip4.protocol = IP_PROTOCOL_GRE; h->gre.protocol = clib_host_to_net_u16 (protocol); - + return sizeof (h[0]); -#endif +#endif } static uword gre_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) + vlib_node_runtime_t * node, + vlib_frame_t * frame) { gre_main_t * gm = &gre_main; u32 next_index; @@ -218,71 +218,140 @@ gre_interface_tx (vlib_main_t * vm, from = vlib_frame_vector_args (frame); /* Number of buffers / pkts */ - n_left_from = frame->n_vectors; + n_left_from = frame->n_vectors; /* Speculatively send the first buffer to the last disposition we used */ next_index = node->cached_next_index; - + while (n_left_from > 0) { /* set up to enqueue to our disposition with index = next_index */ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - /* + /* * FIXME DUAL LOOP */ while (n_left_from > 0 && n_left_to_next > 0) { - u32 bi0, adj_index0, next0; + u32 bi0, adj_index0, next0; const ip_adjacency_t * adj0; - const dpo_id_t *dpo0; - ip4_header_t * ip0; - vlib_buffer_t * b0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer(vm, bi0); - ip0 = vlib_buffer_get_current (b0); - - /* Fixup the checksum and len fields in the LISP tunnel encap - * that was applied at the midchain node */ - ip0->length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - ip0->checksum = ip4_header_checksum (ip0); - - /* Follow the DPO on which the midchain is stacked */ - adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + const dpo_id_t *dpo0; + ip4_header_t * ip0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer(vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* Fixup the checksum and len fields in the GRE tunnel encap + * that was applied at the midchain node */ + ip0->length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + ip0->checksum = ip4_header_checksum (ip0); + + /* Follow the DPO on which the midchain is stacked */ + adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; adj0 = adj_get(adj_index0); - dpo0 = &adj0->sub_type.midchain.next_dpo; - next0 = dpo0->dpoi_next_node; - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - gre_tx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_id = t - gm->tunnels; - tr->length = ip0->length; - tr->src.as_u32 = ip0->src_address.as_u32; - tr->dst.as_u32 = ip0->dst_address.as_u32; - } + dpo0 = &adj0->sub_type.midchain.next_dpo; + next0 = dpo0->dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = t - gm->tunnels; + tr->length = ip0->length; + tr->src.as_u32 = ip0->src_address.as_u32; + tr->dst.as_u32 = ip0->dst_address.as_u32; + } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } - + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, gre_input_node.index, + GRE_ERROR_PKTS_ENCAP, frame->n_vectors); + + return frame->n_vectors; +} + +static uword +gre_l2_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + gre_main_t * gm = &gre_main; + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; + const gre_tunnel_t *gt = pool_elt_at_index (gm->tunnels, rd->dev_instance); + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * FIXME DUAL LOOP + */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer(vm, bi0); + + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = gt->adj_index[FIB_LINK_ETHERNET]; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = gt - gm->tunnels; + tr->length = vlib_buffer_length_in_chain (vm, b0); + tr->src.as_u32 = gt->tunnel_src.as_u32; + tr->dst.as_u32 = gt->tunnel_src.as_u32; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, gt->l2_tx_arc); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); } vlib_node_increment_counter (vm, gre_input_node.index, - GRE_ERROR_PKTS_ENCAP, frame->n_vectors); + GRE_ERROR_PKTS_ENCAP, frame->n_vectors); return frame->n_vectors; } @@ -290,11 +359,27 @@ gre_interface_tx (vlib_main_t * vm, static clib_error_t * gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { + gre_main_t * gm = &gre_main; + vnet_hw_interface_t * hi; + gre_tunnel_t *t; + u32 ti; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index]; + + if (~0 == ti) + /* not one of ours */ + return (NULL); + + t = pool_elt_at_index(gm->tunnels, ti); + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); else vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); + gre_tunnel_stack(t); + return /* no error */ 0; } @@ -313,6 +398,15 @@ static u8 * format_gre_device (u8 * s, va_list * args) return s; } +static u8 * format_gre_l2_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + s = format (s, "GRE L2-tunnel: id %d\n", dev_instance); + return s; +} + VNET_DEVICE_CLASS (gre_device_class) = { .name = "GRE tunnel device", .format_device_name = format_gre_tunnel_name, @@ -328,6 +422,21 @@ VNET_DEVICE_CLASS (gre_device_class) = { VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_class, gre_interface_tx) +VNET_DEVICE_CLASS (gre_l2_device_class) = { + .name = "GRE L2 tunnel device", + .format_device_name = format_gre_tunnel_name, + .format_device = format_gre_l2_device, + .format_tx_trace = format_gre_tx_trace, + .tx_function = gre_l2_interface_tx, + .admin_up_down_function = gre_interface_admin_up_down, +#ifdef SOON + .clear counter = 0; +#endif +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_l2_device_class, + gre_l2_interface_tx) + VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = { .name = "GRE", diff --git a/vnet/vnet/gre/gre.h b/vnet/vnet/gre/gre.h index beb13d989ee..d1a6f319bac 100644 --- a/vnet/vnet/gre/gre.h +++ b/vnet/vnet/gre/gre.h @@ -89,6 +89,11 @@ typedef struct { * The index of the midchain adjacency created for this tunnel */ adj_index_t adj_index[FIB_LINK_NUM]; + + /** + * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain + */ + u32 l2_tx_arc; } gre_tunnel_t; typedef struct { @@ -137,9 +142,7 @@ gre_register_input_type (vlib_main_t * vm, gre_protocol_t protocol, u32 node_index); -void gre_set_adjacency (vnet_rewrite_header_t * rw, - uword max_data_bytes, - gre_protocol_t protocol); +extern void gre_tunnel_stack (gre_tunnel_t *gt); format_function_t format_gre_protocol; format_function_t format_gre_header; @@ -147,6 +150,7 @@ format_function_t format_gre_header_with_length; extern vlib_node_registration_t gre_input_node; extern vnet_device_class_t gre_device_class; +extern vnet_device_class_t gre_l2_device_class; /* Parse gre protocol as 0xXXXX or protocol name. In either host or network byte order. */ diff --git a/vnet/vnet/gre/interface.c b/vnet/vnet/gre/interface.c index 10e9ff9be8c..0550c0bdab1 100644 --- a/vnet/vnet/gre/interface.c +++ b/vnet/vnet/gre/interface.c @@ -107,12 +107,12 @@ gre_tunnel_from_fib_node (fib_node_t *node) STRUCT_OFFSET_OF(gre_tunnel_t, node))); } -/* +/** * gre_tunnel_stack * * 'stack' (resolve the recursion for) the tunnel's midchain adjacency */ -static void +void gre_tunnel_stack (gre_tunnel_t *gt) { fib_link_t linkt; @@ -126,9 +126,18 @@ gre_tunnel_stack (gre_tunnel_t *gt) { if (ADJ_INDEX_INVALID != gt->adj_index[linkt]) { - adj_nbr_midchain_stack( - gt->adj_index[linkt], - fib_entry_contribute_ip_forwarding(gt->fib_entry_index)); + if (vnet_hw_interface_get_flags(vnet_get_main(), + gt->hw_if_index) & + VNET_HW_INTERFACE_FLAG_LINK_UP) + { + adj_nbr_midchain_stack( + gt->adj_index[linkt], + fib_entry_contribute_ip_forwarding(gt->fib_entry_index)); + } + else + { + adj_nbr_midchain_unstack(gt->adj_index[linkt]); + } } } } @@ -194,6 +203,8 @@ gre_proto_from_fib_link (fib_link_t link) return (GRE_PROTOCOL_ip6); case FIB_LINK_MPLS: return (GRE_PROTOCOL_mpls_unicast); + case FIB_LINK_ETHERNET: + return (GRE_PROTOCOL_teb); } ASSERT(0); return (GRE_PROTOCOL_ip4); @@ -210,14 +221,7 @@ gre_rewrite (gre_tunnel_t * t, h0 = (ip4_and_gre_header_t *) rewrite_data; - if (t->teb) - { - h0->gre.protocol = clib_net_to_host_u16(GRE_PROTOCOL_teb); - } - else - { - h0->gre.protocol = clib_host_to_net_u16(gre_proto_from_fib_link(link)); - } + h0->gre.protocol = clib_host_to_net_u16(gre_proto_from_fib_link(link)); h0->ip4.ip_version_and_header_length = 0x45; h0->ip4.ttl = 254; @@ -230,6 +234,21 @@ gre_rewrite (gre_tunnel_t * t, return (rewrite_data); } +static void +gre_fixup (vlib_main_t *vm, + ip_adjacency_t *adj, + vlib_buffer_t *b0) +{ + ip4_header_t * ip0; + + ip0 = vlib_buffer_get_current (b0); + + /* Fixup the checksum and len fields in the GRE tunnel encap + * that was applied at the midchain node */ + ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + ip0->checksum = ip4_header_checksum (ip0); +} + static int vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, u32 * sw_if_indexp) @@ -259,6 +278,10 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES); memset (t, 0, sizeof (*t)); fib_node_init(&t->node, FIB_NODE_TYPE_GRE_TUNNEL); + FOR_EACH_FIB_LINK(linkt) + { + t->adj_index[linkt] = ADJ_INDEX_INVALID; + } if (vec_len (gm->free_gre_tunnel_hw_if_indices) > 0) { vnet_interface_main_t * im = &vnm->interface_main; @@ -281,6 +304,12 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, vlib_zero_simple_counter (&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index); vnet_interface_counter_unlock(im); + if (a->teb) + { + t->l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), + hi->tx_node_index, + "adj-l2-midchain"); + } } else { if (a->teb) { @@ -294,7 +323,7 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, error = ethernet_register_interface (vnm, - gre_device_class.index, t - gm->tunnels, address, &hw_if_index, + gre_l2_device_class.index, t - gm->tunnels, address, &hw_if_index, 0); if (error) @@ -302,6 +331,11 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, clib_error_report (error); return VNET_API_ERROR_INVALID_REGISTRATION; } + hi = vnet_get_hw_interface (vnm, hw_if_index); + + t->l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), + hi->tx_node_index, + "adj-l2-midchain"); } else { hw_if_index = vnet_register_interface (vnm, gre_device_class.index, t - gm->tunnels, @@ -315,6 +349,7 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, t->hw_if_index = hw_if_index; t->outer_fib_index = outer_fib_index; t->sw_if_index = sw_if_index; + t->teb = a->teb; vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0); gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels; @@ -365,22 +400,40 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, * We could be smarter here and trigger this on an interface proto enable, * like we do for MPLS. */ - for (linkt = FIB_LINK_IP4; linkt <= FIB_LINK_IP6; linkt++) + if (t->teb) { - t->adj_index[linkt] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, - linkt, - &zero_addr, - sw_if_index); - - rewrite = gre_rewrite(t, linkt); - adj_nbr_midchain_update_rewrite(t->adj_index[linkt], - hi->tx_node_index, - rewrite); + t->adj_index[FIB_LINK_ETHERNET] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_ETHERNET, + &zero_addr, + sw_if_index); + + rewrite = gre_rewrite(t, FIB_LINK_ETHERNET); + adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_ETHERNET], + gre_fixup, + ADJ_MIDCHAIN_FLAG_NO_COUNT, + rewrite); vec_free(rewrite); } + else + { + FOR_EACH_FIB_IP_LINK (linkt) + { + t->adj_index[linkt] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + linkt, + &zero_addr, + sw_if_index); + + rewrite = gre_rewrite(t, linkt); + adj_nbr_midchain_update_rewrite(t->adj_index[linkt], + gre_fixup, + ADJ_MIDCHAIN_FLAG_NONE, + rewrite); + vec_free(rewrite); + } + } + t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID; - t->teb = a->teb; clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src)); clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst)); gre_tunnel_stack(t); @@ -449,7 +502,6 @@ gre_sw_interface_mpls_state_change (u32 sw_if_index, u32 is_enable) { gre_main_t *gm = &gre_main; - vnet_hw_interface_t * hi; gre_tunnel_t *t; u8 *rewrite; @@ -462,7 +514,6 @@ gre_sw_interface_mpls_state_change (u32 sw_if_index, if (is_enable) { - hi = vnet_get_hw_interface (vnet_get_main(), t->hw_if_index); t->adj_index[FIB_LINK_MPLS] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, FIB_LINK_MPLS, @@ -471,7 +522,8 @@ gre_sw_interface_mpls_state_change (u32 sw_if_index, rewrite = gre_rewrite(t, FIB_LINK_MPLS); adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_MPLS], - hi->tx_node_index, + gre_fixup, + ADJ_MIDCHAIN_FLAG_NONE, rewrite); vec_free(rewrite); } diff --git a/vnet/vnet/interface.c b/vnet/vnet/interface.c index ca0df9d1073..08db68324c6 100644 --- a/vnet/vnet/interface.c +++ b/vnet/vnet/interface.c @@ -639,6 +639,7 @@ setup_tx_node (vlib_main_t * vm, n->function = dev_class->tx_function; n->format_trace = dev_class->format_tx_trace; + vlib_register_errors (vm, node_index, dev_class->tx_function_n_errors, dev_class->tx_function_error_strings); diff --git a/vnet/vnet/interface.h b/vnet/vnet/interface.h index 5fbf830c01c..245b86f5830 100644 --- a/vnet/vnet/interface.h +++ b/vnet/vnet/interface.h @@ -157,7 +157,6 @@ typedef struct _vnet_device_class /* Function to set mac address. */ vnet_interface_set_mac_address_function_t *mac_addr_change_function; - } vnet_device_class_t; #define VNET_DEVICE_CLASS(x,...) \ diff --git a/vnet/vnet/ip/ip4.h b/vnet/vnet/ip/ip4.h index f9fe48687c1..d3db4debd8c 100644 --- a/vnet/vnet/ip/ip4.h +++ b/vnet/vnet/ip/ip4.h @@ -117,9 +117,7 @@ typedef struct ip4_main_t { vlib_packet_template_t ip4_arp_request_packet_template; /** Feature path configuration lists */ - vnet_ip_feature_registration_t * next_uc_feature; - vnet_ip_feature_registration_t * next_mc_feature; - vnet_ip_feature_registration_t * next_tx_feature; + vnet_ip_feature_registration_t * next_feature[VNET_N_IP_FEAT]; /** Built-in unicast feature path index, see @ref ip_feature_init_cast() */ u32 ip4_unicast_rx_feature_check_access; @@ -181,8 +179,8 @@ static void __vnet_add_feature_registration_uc_##x (void) \ static void __vnet_add_feature_registration_uc_##x (void) \ { \ ip4_main_t * im = &ip4_main; \ - uc_##x.next = im->next_uc_feature; \ - im->next_uc_feature = &uc_##x; \ + uc_##x.next = im->next_feature[VNET_IP_RX_UNICAST_FEAT]; \ + im->next_feature[VNET_IP_RX_UNICAST_FEAT] = &uc_##x; \ } \ __VA_ARGS__ vnet_ip_feature_registration_t uc_##x @@ -193,8 +191,8 @@ static void __vnet_add_feature_registration_mc_##x (void) \ static void __vnet_add_feature_registration_mc_##x (void) \ { \ ip4_main_t * im = &ip4_main; \ - mc_##x.next = im->next_mc_feature; \ - im->next_mc_feature = &mc_##x; \ + mc_##x.next = im->next_feature[VNET_IP_RX_MULTICAST_FEAT]; \ + im->next_feature[VNET_IP_RX_MULTICAST_FEAT] = &mc_##x; \ } \ __VA_ARGS__ vnet_ip_feature_registration_t mc_##x @@ -205,8 +203,8 @@ static void __vnet_add_feature_registration_tx_##x (void) \ static void __vnet_add_feature_registration_tx_##x (void) \ { \ ip4_main_t * im = &ip4_main; \ - tx_##x.next = im->next_tx_feature; \ - im->next_tx_feature = &tx_##x; \ + tx_##x.next = im->next_feature[VNET_IP_TX_FEAT]; \ + im->next_feature[VNET_IP_TX_FEAT] = &tx_##x; \ } \ __VA_ARGS__ vnet_ip_feature_registration_t tx_##x diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index 08fe8162260..a44afb4a529 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -878,7 +878,10 @@ static char * rx_feature_start_nodes[] = { "ip4-input", "ip4-input-no-checksum"}; static char * tx_feature_start_nodes[] = -{ "ip4-rewrite-transit"}; +{ + "ip4-rewrite-transit", + "ip4-midchain", +}; /* Source and port-range check ip4 tx feature path definition */ VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = { @@ -926,8 +929,8 @@ ip4_feature_init (vlib_main_t * vm, ip4_main_t * im) if ((error = ip_feature_init_cast (vm, cm, vcm, feature_start_nodes, feature_start_len, - cast, - VNET_L3_PACKET_TYPE_IP4))) + im->next_feature[cast], + &im->feature_nodes[cast]))) return error; } @@ -2093,7 +2096,8 @@ always_inline uword ip4_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, - int rewrite_for_locally_received_packets) + int rewrite_for_locally_received_packets, + int is_midchain) { ip_lookup_main_t * lm = &ip4_main.lookup_main; u32 * from = vlib_frame_vector_args (frame); @@ -2318,6 +2322,12 @@ ip4_rewrite_inline (vlib_main_t * vm, vnet_rewrite_two_headers (adj0[0], adj1[0], ip0, ip1, sizeof (ethernet_header_t)); + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + adj1->sub_type.midchain.fixup_func(vm, adj1, p1); + } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, @@ -2430,6 +2440,11 @@ ip4_rewrite_inline (vlib_main_t * vm, vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0; next0 = adj0[0].rewrite_header.next_index; + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + } + if (PREDICT_FALSE (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, tx_sw_if_index0))) @@ -2505,7 +2520,7 @@ ip4_rewrite_transit (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 0); + /* rewrite_for_locally_received_packets */ 0, 0); } /** @brief IPv4 local rewrite node. @@ -2547,7 +2562,7 @@ ip4_rewrite_local (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 1); + /* rewrite_for_locally_received_packets */ 1, 0); } static uword @@ -2556,7 +2571,7 @@ ip4_midchain (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 0); + /* rewrite_for_locally_received_packets */ 0, 1); } VLIB_REGISTER_NODE (ip4_rewrite_node) = { @@ -2583,11 +2598,7 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = { .format_trace = format_ip4_forward_next_trace, - .n_next_nodes = 2, - .next_nodes = { - [IP4_REWRITE_NEXT_DROP] = "error-drop", - [IP4_REWRITE_NEXT_ARP] = "ip4-arp", - }, + .sibling_of = "ip4-rewrite-transit", }; VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain) diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h index 36be64948c9..58bc7f825c3 100644 --- a/vnet/vnet/ip/ip6.h +++ b/vnet/vnet/ip/ip6.h @@ -161,9 +161,7 @@ typedef struct ip6_main_t { uword lookup_table_size; /* feature path configuration lists */ - vnet_ip_feature_registration_t * next_uc_feature; - vnet_ip_feature_registration_t * next_mc_feature; - vnet_ip_feature_registration_t * next_tx_feature; + vnet_ip_feature_registration_t * next_feature[VNET_N_IP_FEAT]; /* Built-in unicast feature path indices, see ip_feature_init_cast(...) */ u32 ip6_unicast_rx_feature_check_access; @@ -209,8 +207,8 @@ static void __vnet_add_feature_registration_uc_##x (void) \ static void __vnet_add_feature_registration_uc_##x (void) \ { \ ip6_main_t * im = &ip6_main; \ - uc_##x.next = im->next_uc_feature; \ - im->next_uc_feature = &uc_##x; \ + uc_##x.next = im->next_feature[VNET_IP_RX_UNICAST_FEAT]; \ + im->next_feature[VNET_IP_RX_UNICAST_FEAT] = &uc_##x; \ } \ __VA_ARGS__ vnet_ip_feature_registration_t uc_##x @@ -221,8 +219,8 @@ static void __vnet_add_feature_registration_mc_##x (void) \ static void __vnet_add_feature_registration_mc_##x (void) \ { \ ip6_main_t * im = &ip6_main; \ - mc_##x.next = im->next_mc_feature; \ - im->next_mc_feature = &mc_##x; \ + mc_##x.next = im->next_feature[VNET_IP_RX_MULTICAST_FEAT]; \ + im->next_feature[VNET_IP_RX_MULTICAST_FEAT] = &mc_##x; \ } \ __VA_ARGS__ vnet_ip_feature_registration_t mc_##x @@ -233,8 +231,8 @@ static void __vnet_add_feature_registration_tx_##x (void) \ static void __vnet_add_feature_registration_tx_##x (void) \ { \ ip6_main_t * im = &ip6_main; \ - tx_##x.next = im->next_tx_feature; \ - im->next_tx_feature = &tx_##x; \ + tx_##x.next = im->next_feature[VNET_IP_TX_FEAT]; \ + im->next_feature[VNET_IP_TX_FEAT] = &tx_##x; \ } \ __VA_ARGS__ vnet_ip_feature_registration_t tx_##x diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index 65e87595d06..4833c11711f 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -641,7 +641,10 @@ static char * rx_feature_start_nodes[] = {"ip6-input"}; static char * tx_feature_start_nodes[] = - {"ip6-rewrite"}; +{ + "ip6-rewrite", + "ip6-midchain", +}; /* Built-in ip4 tx feature path definition */ VNET_IP6_TX_FEATURE_INIT (interface_output, static) = { @@ -680,8 +683,8 @@ ip6_feature_init (vlib_main_t * vm, ip6_main_t * im) if ((error = ip_feature_init_cast (vm, cm, vcm, feature_start_nodes, feature_start_len, - cast, - VNET_L3_PACKET_TYPE_IP6))) + im->next_feature[cast], + &im->feature_nodes[cast]))) return error; } return 0; @@ -1797,7 +1800,8 @@ always_inline uword ip6_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, - int rewrite_for_locally_received_packets) + int rewrite_for_locally_received_packets, + int is_midchain) { ip_lookup_main_t * lm = &ip6_main.lookup_main; u32 * from = vlib_frame_vector_args (frame); @@ -1980,6 +1984,12 @@ ip6_rewrite_inline (vlib_main_t * vm, ip0, ip1, sizeof (ethernet_header_t)); + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + adj1->sub_type.midchain.fixup_func(vm, adj1, p1); + } + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1); @@ -2079,6 +2089,11 @@ ip6_rewrite_inline (vlib_main_t * vm, } } + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + } + p0->error = error_node->errors[error0]; from += 1; @@ -2107,7 +2122,8 @@ ip6_rewrite_transit (vlib_main_t * vm, vlib_frame_t * frame) { return ip6_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 0); + /* rewrite_for_locally_received_packets */ 0, + /* midchain */ 0); } static uword @@ -2116,7 +2132,8 @@ ip6_rewrite_local (vlib_main_t * vm, vlib_frame_t * frame) { return ip6_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 1); + /* rewrite_for_locally_received_packets */ 1, + /* midchain */ 0); } static uword @@ -2125,7 +2142,8 @@ ip6_midchain (vlib_main_t * vm, vlib_frame_t * frame) { return ip6_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 0); + /* rewrite_for_locally_received_packets */ 0, + /* midchain */ 1); } VLIB_REGISTER_NODE (ip6_midchain_node) = { @@ -2135,6 +2153,8 @@ VLIB_REGISTER_NODE (ip6_midchain_node) = { .format_trace = format_ip6_forward_next_trace, + .sibling_of = "ip6-rewrite", + .next_nodes = { [IP6_REWRITE_NEXT_DROP] = "error-drop", }, diff --git a/vnet/vnet/ip/ip_feature_registration.c b/vnet/vnet/ip/ip_feature_registration.c index b96f81bd58d..f09a894ca03 100644 --- a/vnet/vnet/ip/ip_feature_registration.c +++ b/vnet/vnet/ip/ip_feature_registration.c @@ -109,6 +109,8 @@ vnet_get_config_data. */ +static const char *vnet_cast_names[] = VNET_CAST_NAMES; + static int comma_split (u8 * s, u8 ** a, u8 ** b) { @@ -132,7 +134,8 @@ ip_feature_init_cast (vlib_main_t * vm, vnet_config_main_t * vcm, char **feature_start_nodes, int num_feature_start_nodes, - vnet_cast_t cast, vnet_l3_packet_type_t proto) + vnet_ip_feature_registration_t * first_reg, + char ***in_feature_nodes) { uword *index_by_name; uword *reg_by_index; @@ -150,49 +153,14 @@ ip_feature_init_cast (vlib_main_t * vm, int a_index, b_index; int n_features; u32 *result = 0; - vnet_ip_feature_registration_t *this_reg, *first_reg = 0; + vnet_ip_feature_registration_t *this_reg = 0; char **feature_nodes = 0; hash_pair_t *hp; u8 **keys_to_delete = 0; - ip4_main_t *im4 = &ip4_main; - ip6_main_t *im6 = &ip6_main; - mpls_main_t *mm = &mpls_main; index_by_name = hash_create_string (0, sizeof (uword)); reg_by_index = hash_create (0, sizeof (uword)); - if (cast == VNET_IP_RX_UNICAST_FEAT) - { - if (proto == VNET_L3_PACKET_TYPE_IP4) - first_reg = im4->next_uc_feature; - else if (proto == VNET_L3_PACKET_TYPE_IP6) - first_reg = im6->next_uc_feature; - else if (proto == VNET_L3_PACKET_TYPE_MPLS_UNICAST) - first_reg = mm->next_feature; - else - return clib_error_return (0, - "protocol %d cast %d unsupport for features", - proto, cast); - } - else if (cast == VNET_IP_RX_MULTICAST_FEAT) - { - if (proto == VNET_L3_PACKET_TYPE_IP4) - first_reg = im4->next_mc_feature; - else if (proto == VNET_L3_PACKET_TYPE_IP6) - first_reg = im6->next_mc_feature; - else - return clib_error_return (0, - "protocol %d cast %d unsupport for features", - proto, cast); - } - else if (cast == VNET_IP_TX_FEAT) - { - if (proto == VNET_L3_PACKET_TYPE_IP4) - first_reg = im4->next_tx_feature; - else - first_reg = im6->next_tx_feature; - } - this_reg = first_reg; /* pass 1, collect feature node names, construct a before b pairs */ @@ -291,8 +259,7 @@ again: /* see if we got a partial order... */ if (vec_len (result) != n_features) - return clib_error_return - (0, "%d feature_init_cast (cast=%d), no partial order!", proto, cast); + return clib_error_return (0, "%d feature_init_cast no partial order!"); /* * We win. @@ -318,12 +285,7 @@ again: feature_nodes, vec_len (feature_nodes)); /* Save a copy for show command */ - if (proto == VNET_L3_PACKET_TYPE_IP4) - im4->feature_nodes[cast] = feature_nodes; - else if (proto == VNET_L3_PACKET_TYPE_IP6) - im6->feature_nodes[cast] = feature_nodes; - else if (proto == VNET_L3_PACKET_TYPE_MPLS_UNICAST) - mm->feature_nodes = feature_nodes; + *in_feature_nodes = feature_nodes; /* Finally, clean up all the shit we allocated */ /* *INDENT-OFF* */ @@ -391,6 +353,56 @@ VLIB_CLI_COMMAND (show_ip_features_command, static) = { /** Display the set of IP features configured on a specific interface */ +void +ip_interface_features_show (vlib_main_t * vm, + const char *pname, + ip_config_main_t * cm, u32 sw_if_index) +{ + u32 node_index, current_config_index; + vnet_cast_t cast; + vnet_config_main_t *vcm; + vnet_config_t *cfg; + u32 cfg_index; + vnet_config_feature_t *feat; + vlib_node_t *n; + int i; + + vlib_cli_output (vm, "%s feature paths configured on %U...", + pname, format_vnet_sw_if_index_name, + vnet_get_main (), sw_if_index); + + for (cast = VNET_IP_RX_UNICAST_FEAT; cast < VNET_N_IP_FEAT; cast++) + { + vcm = &(cm[cast].config_main); + + vlib_cli_output (vm, "\n%s %s:", pname, vnet_cast_names[cast]); + + if (NULL == cm[cast].config_index_by_sw_if_index || + vec_len (cm[cast].config_index_by_sw_if_index) < sw_if_index) + { + vlib_cli_output (vm, "none configured"); + continue; + } + + current_config_index = vec_elt (cm[cast].config_index_by_sw_if_index, + sw_if_index); + + ASSERT (current_config_index + < vec_len (vcm->config_pool_index_by_user_index)); + + cfg_index = vcm->config_pool_index_by_user_index[current_config_index]; + cfg = pool_elt_at_index (vcm->config_pool, cfg_index); + + for (i = 0; i < vec_len (cfg->features); i++) + { + feat = cfg->features + i; + node_index = feat->node_index; + n = vlib_get_node (vm, node_index); + vlib_cli_output (vm, " %v", n->name); + } + } +} + static clib_error_t * show_ip_interface_features_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -403,17 +415,7 @@ show_ip_interface_features_command_fn (vlib_main_t * vm, ip_lookup_main_t *lm6 = &im6->lookup_main; ip_lookup_main_t *lm; - ip_config_main_t *cm; - vnet_config_main_t *vcm; - vnet_config_t *cfg; - u32 cfg_index; - vnet_config_feature_t *feat; - vlib_node_t *n; - u32 sw_if_index; - u32 node_index; - u32 current_config_index; - int i, af; - u32 cast; + u32 sw_if_index, af; if (!unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) return clib_error_return (0, "Interface not specified..."); @@ -421,7 +423,6 @@ show_ip_interface_features_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "IP feature paths configured on %U...", format_vnet_sw_if_index_name, vnm, sw_if_index); - for (af = 0; af < 2; af++) { if (af == 0) @@ -429,33 +430,8 @@ show_ip_interface_features_command_fn (vlib_main_t * vm, else lm = lm6; - for (cast = VNET_IP_RX_UNICAST_FEAT; cast < VNET_N_IP_FEAT; cast++) - { - cm = lm->feature_config_mains + cast; - vcm = &cm->config_main; - - vlib_cli_output (vm, "\nipv%s %scast:", - (af == 0) ? "4" : "6", - cast == VNET_IP_RX_UNICAST_FEAT ? "uni" : "multi"); - - current_config_index = vec_elt (cm->config_index_by_sw_if_index, - sw_if_index); - - ASSERT (current_config_index - < vec_len (vcm->config_pool_index_by_user_index)); - - cfg_index = - vcm->config_pool_index_by_user_index[current_config_index]; - cfg = pool_elt_at_index (vcm->config_pool, cfg_index); - - for (i = 0; i < vec_len (cfg->features); i++) - { - feat = cfg->features + i; - node_index = feat->node_index; - n = vlib_get_node (vm, node_index); - vlib_cli_output (vm, " %v", n->name); - } - } + ip_interface_features_show (vm, (af == 0) ? "ip4" : "ip6", + lm->feature_config_mains, sw_if_index); } return 0; diff --git a/vnet/vnet/ip/ip_feature_registration.h b/vnet/vnet/ip/ip_feature_registration.h index 95ee78ad8fe..b86e3a626e1 100644 --- a/vnet/vnet/ip/ip_feature_registration.h +++ b/vnet/vnet/ip/ip_feature_registration.h @@ -31,6 +31,12 @@ typedef struct _vnet_ip_feature_registration char **runs_after; } vnet_ip_feature_registration_t; +typedef struct ip_config_main_t_ +{ + vnet_config_main_t config_main; + u32 *config_index_by_sw_if_index; +} ip_config_main_t; + /** Syntactic sugar, the c-compiler won't initialize registrations without it */ #define ORDER_CONSTRAINTS (char*[]) @@ -39,8 +45,12 @@ clib_error_t *ip_feature_init_cast (vlib_main_t * vm, vnet_config_main_t * vcm, char **feature_start_nodes, int num_feature_start_nodes, - vnet_cast_t cast, - vnet_l3_packet_type_t proto); + vnet_ip_feature_registration_t * + first_reg, char ***feature_nodes); + +void ip_interface_features_show (vlib_main_t * vm, + const char *pname, + ip_config_main_t * cm, u32 sw_if_index); #endif /* included_ip_feature_registration_h */ diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h index c8dcc141430..a21e1810983 100644 --- a/vnet/vnet/ip/lookup.h +++ b/vnet/vnet/ip/lookup.h @@ -54,6 +54,7 @@ #include <vnet/ip/ip6_packet.h> #include <vnet/fib/fib_node.h> #include <vnet/dpo/dpo.h> +#include <vnet/ip/ip_feature_registration.h> /** @brief Common (IP4/IP6) next index stored in adjacency. */ typedef enum { @@ -155,11 +156,22 @@ _(reverse, IP_FLOW_HASH_REVERSE_SRC_DST) */ typedef u32 flow_hash_config_t; -#define IP_ADJACENCY_OPAQUE_SZ 16 +/** + * Forward delcartion + */ +struct ip_adjacency_t_; + +/** + * @brief A function type for post-rewrite fixups on midchain adjacency + */ +typedef void (*adj_midchain_fixup_t)(vlib_main_t * vm, + struct ip_adjacency_t_ *adj, + vlib_buffer_t * b0); + /** @brief IP unicast adjacency. @note cache aligned. */ -typedef struct { +typedef struct ip_adjacency_t_ { CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); /* Handle for this adjacency in adjacency heap. */ u32 heap_handle; @@ -220,6 +232,11 @@ typedef struct { * The next DPO to use */ dpo_id_t next_dpo; + + /** + * A function to perform the post-rewrite fixup + */ + adj_midchain_fixup_t fixup_func; } midchain; /** * IP_LOOKUP_NEXT_GLEAN @@ -230,26 +247,18 @@ typedef struct { ip46_address_t receive_addr; } glean; } sub_type; - u16 opaque[IP_ADJACENCY_OPAQUE_SZ]; }; - /** @brief Special format function for this adjacency. - * Specifically good for cases which use the entire rewrite - * for their own purposes. Can easily reduce to a u16 or a u8 if/when - * the first cache line reads "full" on the free space gas gauge. - */ - u32 special_adjacency_format_function_index; /* 0 is invalid */ - CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); /* Rewrite in second/third cache lines */ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); - /* - * member not accessed in the data plane are relgated to the - * remaining cachelines - */ - fib_node_t ia_node; + /* + * member not accessed in the data plane are relgated to the + * remaining cachelines + */ + fib_node_t ia_node; } ip_adjacency_t; _Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0), @@ -261,32 +270,6 @@ _Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) == /* An all zeros address */ extern const ip46_address_t zero_addr; -/* Index into adjacency table. */ -typedef u32 ip_adjacency_index_t; - -typedef struct { - /* Adjacency index of first index in block. */ - u32 adj_index; - - /* Power of 2 size of adjacency block. */ - u32 n_adj_in_block; - - /* Number of prefixes that point to this adjacency. */ - u32 reference_count; - - /* Normalized next hops are saved for stats/display purposes */ - struct { - /* Number of hops in the multipath. */ - u32 count; - - /* Offset into next hop heap for this block. */ - u32 heap_offset; - - /* Heap handle used to for example free block when we're done with it. */ - u32 heap_handle; - } normalized_next_hops; -} ip_multipath_adjacency_t; - /* IP multicast adjacency. */ typedef struct { /* Handle for this adjacency in adjacency heap. */ @@ -357,39 +340,6 @@ typedef void (* ip_add_del_adjacency_callback_t) (struct ip_lookup_main_t * lm, ip_adjacency_t * adj, u32 is_del); -typedef struct { - vnet_config_main_t config_main; - - u32 * config_index_by_sw_if_index; -} ip_config_main_t; - -/** - * This structure is used to dynamically register a custom adjacency - * for ip lookup. - * Typically used with - * VNET_IP4_REGISTER_ADJACENCY or - * VNET_IP6_REGISTER_ADJACENCY macros. - */ -typedef struct ip_adj_register_struct { - /** Name of the node for this registered adjacency. */ - char *node_name; - - /** Formatting function for the adjacency. - * Variadic arguments given to the function are: - * - struct ip_lookup_main_t * - * - ip_adjacency_t *adj - */ - format_function_t *fn; - - /** - * When the adjacency is registered, the ip-lookup next index will - * be written where this pointer points. - */ - u32 *next_index; - - struct ip_adj_register_struct *next; -} ip_adj_register_t; - typedef struct ip_lookup_main_t { /* Adjacency heap. */ ip_adjacency_t * adjacency_heap; @@ -439,9 +389,6 @@ typedef struct ip_lookup_main_t { /** IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */ u8 builtin_protocol_by_ip_protocol[256]; - - /** Registered adjacencies */ - ip_adj_register_t *registered_adjacencies; } ip_lookup_main_t; always_inline ip_adjacency_t * diff --git a/vnet/vnet/lisp-cp/control.c b/vnet/vnet/lisp-cp/control.c index 4ca30f95479..82fcb4f9b95 100644 --- a/vnet/vnet/lisp-cp/control.c +++ b/vnet/vnet/lisp-cp/control.c @@ -18,6 +18,8 @@ #include <vnet/lisp-cp/packets.h> #include <vnet/lisp-cp/lisp_msg_serdes.h> #include <vnet/lisp-gpe/lisp_gpe.h> +#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h> +#include <vnet/lisp-gpe/lisp_gpe_tenant.h> #include <vnet/fib/fib_entry.h> #include <vnet/fib/fib_table.h> @@ -167,8 +169,7 @@ ip_fib_get_first_egress_ip_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst, static int dp_add_del_iface (lisp_cp_main_t * lcm, u32 vni, u8 is_l2, u8 is_add) { - uword *dp_table, *intf; - vnet_lisp_gpe_add_del_iface_args_t _ai, *ai = &_ai; + uword *dp_table; if (!is_l2) { @@ -190,38 +191,20 @@ dp_add_del_iface (lisp_cp_main_t * lcm, u32 vni, u8 is_l2, u8 is_add) } } - intf = hash_get (is_l2 ? lcm->l2_dp_intf_by_vni : lcm->dp_intf_by_vni, vni); - /* enable/disable data-plane interface */ if (is_add) { - /* create interface */ - if (!intf) - { - ai->is_add = 1; - ai->vni = vni; - ai->is_l2 = is_l2; - ai->dp_table = dp_table[0]; - - vnet_lisp_gpe_add_del_iface (ai, 0); - - /* keep track of vnis for which interfaces have been created */ - hash_set (lcm->dp_intf_by_vni, vni, 1); - } + if (is_l2) + lisp_gpe_tenant_l2_iface_add_or_lock (vni, dp_table[0]); + else + lisp_gpe_tenant_l3_iface_add_or_lock (vni, dp_table[0]); } else { - if (intf == 0) - { - clib_warning ("interface for vni %d doesn't exist!", vni); - return VNET_API_ERROR_INVALID_VALUE; - } - - ai->is_add = 0; - ai->vni = vni; - ai->dp_table = dp_table[0]; - vnet_lisp_gpe_add_del_iface (ai, 0); - hash_unset (lcm->dp_intf_by_vni, vni); + if (is_l2) + lisp_gpe_tenant_l2_iface_unlock (vni); + else + lisp_gpe_tenant_l3_iface_unlock (vni); } return 0; @@ -2041,7 +2024,6 @@ vnet_lisp_enable_disable (u8 is_enable) else { /* clear interface table */ - hash_free (lcm->dp_intf_by_vni); hash_free (lcm->fwd_entry_by_mapping_index); pool_free (lcm->fwd_entry_pool); } @@ -2647,8 +2629,6 @@ typedef enum typedef enum { LISP_CP_LOOKUP_NEXT_DROP, - LISP_CP_LOOKUP_NEXT_IP4_LOOKUP, - LISP_CP_LOOKUP_NEXT_IP6_LOOKUP, LISP_CP_LOOKUP_N_NEXT, } lisp_cp_lookup_next_t; @@ -3207,6 +3187,13 @@ lisp_cp_lookup_ip6 (vlib_main_t * vm, return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_IP6)); } +static uword +lisp_cp_lookup_l2 (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_MAC)); +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = { .function = lisp_cp_lookup_ip4, @@ -3222,8 +3209,6 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = { .next_nodes = { [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop", - [LISP_CP_LOOKUP_NEXT_IP4_LOOKUP] = "ip4-lookup", - [LISP_CP_LOOKUP_NEXT_IP6_LOOKUP] = "ip6-lookup", }, }; /* *INDENT-ON* */ @@ -3243,8 +3228,25 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_ip6_node) = { .next_nodes = { [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop", - [LISP_CP_LOOKUP_NEXT_IP4_LOOKUP] = "ip4-lookup", - [LISP_CP_LOOKUP_NEXT_IP6_LOOKUP] = "ip6-lookup", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (lisp_cp_lookup_l2_node) = { + .function = lisp_cp_lookup_l2, + .name = "lisp-cp-lookup-l2", + .vector_size = sizeof (u32), + .format_trace = format_lisp_cp_lookup_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = LISP_CP_LOOKUP_N_ERROR, + .error_strings = lisp_cp_lookup_error_strings, + + .n_next_nodes = LISP_CP_LOOKUP_N_NEXT, + + .next_nodes = { + [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop", }, }; /* *INDENT-ON* */ diff --git a/vnet/vnet/lisp-cp/control.h b/vnet/vnet/lisp-cp/control.h index d9dc81d1a78..a65ec4cbab2 100644 --- a/vnet/vnet/lisp-cp/control.h +++ b/vnet/vnet/lisp-cp/control.h @@ -136,7 +136,6 @@ typedef struct uword *vni_by_bd_id; /* track l2 and l3 interfaces that have been created for vni */ - uword *dp_intf_by_vni; uword *l2_dp_intf_by_vni; /* Proxy ETR map index */ diff --git a/vnet/vnet/lisp-cp/lisp_cp_dpo.c b/vnet/vnet/lisp-cp/lisp_cp_dpo.c index 0bb8098d6fc..185b07a2c1b 100644 --- a/vnet/vnet/lisp-cp/lisp_cp_dpo.c +++ b/vnet/vnet/lisp-cp/lisp_cp_dpo.c @@ -17,40 +17,44 @@ #include <vnet/lisp-gpe/lisp_gpe.h> #include <vnet/lisp-cp/control.h> -index_t -lisp_cp_dpo_get (fib_protocol_t proto) +/** + * The static array of LISP punt DPOs + */ +static dpo_id_t lisp_cp_dpos[DPO_PROTO_NUM]; + +const dpo_id_t * +lisp_cp_dpo_get (dpo_proto_t proto) { - /* - * there are only two instances of this DPO type. - * we can use the protocol as the index - */ - return (proto); + /* + * there are only two instances of this DPO type. + * we can use the protocol as the index + */ + return (&lisp_cp_dpos[proto]); } -static u8* -format_lisp_cp_dpo (u8 *s, va_list *args) +static u8 * +format_lisp_cp_dpo (u8 * s, va_list * args) { - index_t index = va_arg (*args, index_t); - CLIB_UNUSED(u32 indent) = va_arg (*args, u32); + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); - return (format(s, "lisp-cp-punt-%U", - format_fib_protocol, index)); + return (format (s, "lisp-cp-punt-%U", format_dpo_proto, index)); } static void -lisp_cp_dpo_lock (dpo_id_t *dpo) +lisp_cp_dpo_lock (dpo_id_t * dpo) { } static void -lisp_cp_dpo_unlock (dpo_id_t *dpo) +lisp_cp_dpo_unlock (dpo_id_t * dpo) { } const static dpo_vft_t lisp_cp_vft = { - .dv_lock = lisp_cp_dpo_lock, - .dv_unlock = lisp_cp_dpo_unlock, - .dv_format = format_lisp_cp_dpo, + .dv_lock = lisp_cp_dpo_lock, + .dv_unlock = lisp_cp_dpo_unlock, + .dv_format = format_lisp_cp_dpo, }; /** @@ -60,34 +64,54 @@ const static dpo_vft_t lisp_cp_vft = { * this means that these graph nodes are ones from which a LISP-CP is the * parent object in the DPO-graph. */ -const static char* const lisp_cp_ip4_nodes[] = -{ - "lisp-cp-lookup-ip4", - NULL, +const static char *const lisp_cp_ip4_nodes[] = { + "lisp-cp-lookup-ip4", + NULL, }; -const static char* const lisp_cp_ip6_nodes[] = -{ - "lisp-cp-lookup-ip6", - NULL, + +const static char *const lisp_cp_ip6_nodes[] = { + "lisp-cp-lookup-ip6", + NULL, }; -const static char* const * const lisp_cp_nodes[DPO_PROTO_NUM] = -{ - [DPO_PROTO_IP4] = lisp_cp_ip4_nodes, - [DPO_PROTO_IP6] = lisp_cp_ip6_nodes, - [DPO_PROTO_MPLS] = NULL, +const static char *const lisp_cp_ethernet_nodes[] = { + "lisp-cp-lookup-l2", + NULL, +}; + + +const static char *const *const lisp_cp_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = lisp_cp_ip4_nodes, + [DPO_PROTO_IP6] = lisp_cp_ip6_nodes, + [DPO_PROTO_ETHERNET] = lisp_cp_ethernet_nodes, + [DPO_PROTO_MPLS] = NULL, }; clib_error_t * lisp_cp_dpo_module_init (vlib_main_t * vm) { - /* - * there are no exit arcs from the LIS-CP VLIB node, so we - * pass NULL as said node array. - */ - dpo_register(DPO_LISP_CP, &lisp_cp_vft, lisp_cp_nodes); + dpo_proto_t dproto; + + /* + * there are no exit arcs from the LIS-CP VLIB node, so we + * pass NULL as said node array. + */ + dpo_register (DPO_LISP_CP, &lisp_cp_vft, lisp_cp_nodes); + + FOR_EACH_DPO_PROTO (dproto) + { + dpo_set (&lisp_cp_dpos[dproto], DPO_LISP_CP, dproto, dproto); + } - return (NULL); + return (NULL); } -VLIB_INIT_FUNCTION(lisp_cp_dpo_module_init); +VLIB_INIT_FUNCTION (lisp_cp_dpo_module_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/lisp-cp/lisp_cp_dpo.h b/vnet/vnet/lisp-cp/lisp_cp_dpo.h index ea97711a8de..f0f3fae81a4 100644 --- a/vnet/vnet/lisp-cp/lisp_cp_dpo.h +++ b/vnet/vnet/lisp-cp/lisp_cp_dpo.h @@ -17,7 +17,6 @@ #define __LISP_CP_DPO_H__ #include <vnet/vnet.h> -#include <vnet/fib/fib_types.h> #include <vnet/dpo/dpo.h> /** @@ -25,14 +24,22 @@ */ typedef struct lisp_cp_dpo_t { - /** - * The transport payload type. - */ - fib_protocol_t lcd_proto; + /** + * The transport payload type. + */ + dpo_proto_t lcd_proto; } lisp_cp_dpo_t; -extern index_t lisp_cp_dpo_get(fib_protocol_t proto); +extern const dpo_id_t *lisp_cp_dpo_get (dpo_proto_t proto); -extern void lisp_cp_dpo_module_init(void); +extern void lisp_cp_dpo_module_init (void); #endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/lisp-cp/lisp_types.h b/vnet/vnet/lisp-cp/lisp_types.h index e6811f0187e..b37315edf8d 100644 --- a/vnet/vnet/lisp-cp/lisp_types.h +++ b/vnet/vnet/lisp-cp/lisp_types.h @@ -90,11 +90,11 @@ typedef enum LCAF_TYPES } lcaf_type_t; -typedef enum +typedef enum fid_addr_type_t_ { FID_ADDR_IP_PREF, FID_ADDR_MAC -} fid_addr_type_t; +} __attribute__ ((packed)) fid_addr_type_t; /* flat address type */ typedef struct @@ -104,7 +104,7 @@ typedef struct ip_prefix_t ippref; u8 mac[6]; }; - u8 type; /* fid_addr_type_t */ + fid_addr_type_t type; } fid_address_t; typedef fid_address_t dp_address_t; diff --git a/vnet/vnet/lisp-gpe/interface.c b/vnet/vnet/lisp-gpe/interface.c index 52db1eb3628..0b4f7ed92ba 100644 --- a/vnet/vnet/lisp-gpe/interface.c +++ b/vnet/vnet/lisp-gpe/interface.c @@ -26,10 +26,19 @@ #include <vnet/ip/udp.h> #include <vnet/ethernet/ethernet.h> #include <vnet/lisp-gpe/lisp_gpe.h> +#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h> +#include <vnet/lisp-gpe/lisp_gpe_tenant.h> #include <vnet/adj/adj.h> #include <vnet/fib/fib_table.h> #include <vnet/fib/ip4_fib.h> #include <vnet/fib/ip6_fib.h> +#include <vnet/lisp-cp/lisp_cp_dpo.h> + +/** + * @brief The VLIB node arc/edge from the interface's TX node, to the L2 + * load-balanceing node. Which is where all packets go + */ +static uword l2_arc_to_lb; #define foreach_lisp_gpe_tx_next \ _(DROP, "error-drop") \ @@ -195,201 +204,23 @@ VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = { }; /* *INDENT-ON* */ -static void -add_del_lisp_gpe_default_route (u32 table_id, fib_protocol_t proto, u8 is_add) -{ - fib_prefix_t prefix = { - .fp_proto = proto, - }; - u32 fib_index; - - if (is_add) - { - /* - * Add a deafult route that results in a control plane punt DPO - */ - dpo_id_t cp_punt = DPO_NULL; - - dpo_set (&cp_punt, DPO_LISP_CP, fib_proto_to_dpo (proto), proto); - - fib_index = - fib_table_find_or_create_and_lock (prefix.fp_proto, table_id); - fib_table_entry_special_dpo_add (fib_index, &prefix, FIB_SOURCE_LISP, - FIB_ENTRY_FLAG_EXCLUSIVE, &cp_punt); - dpo_unlock (&cp_punt); - } - else - { - fib_index = fib_table_find (prefix.fp_proto, table_id); - fib_table_entry_special_remove (fib_index, &prefix, FIB_SOURCE_LISP); - fib_table_unlock (fib_index, prefix.fp_proto); - } -} - -void -lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id) -{ - fib_node_index_t fib_index; - - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); - ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - ip4_sw_interface_enable_disable (sw_if_index, 1); - - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); - vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); - ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - ip6_sw_interface_enable_disable (sw_if_index, 1); -} - -#define foreach_l2_lisp_gpe_tx_next \ - _(DROP, "error-drop") \ - _(IP4_LOOKUP, "ip4-lookup") \ - _(IP6_LOOKUP, "ip6-lookup") \ - _(LISP_CP_LOOKUP, "lisp-cp-lookup") - -typedef enum -{ -#define _(sym,str) L2_LISP_GPE_TX_NEXT_##sym, - foreach_l2_lisp_gpe_tx_next -#undef _ - L2_LISP_GPE_TX_N_NEXT, -} l2_lisp_gpe_tx_next_t; typedef struct { - u32 tunnel_index; + u32 lb_index; } l2_lisp_gpe_tx_trace_t; -u8 * +static u8 * format_l2_lisp_gpe_tx_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); l2_lisp_gpe_tx_trace_t *t = va_arg (*args, l2_lisp_gpe_tx_trace_t *); - s = format (s, "L2-LISP-GPE-TX: tunnel %d", t->tunnel_index); + s = format (s, "L2-LISP-GPE-TX: load-balance %d", t->lb_index); return s; } -always_inline void -l2_process_tunnel_action (vlib_buffer_t * b0, u8 action, u32 * next0) -{ - if (LISP_SEND_MAP_REQUEST == action) - { - next0[0] = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; - vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; - } - else - { - next0[0] = L2_LISP_GPE_TX_NEXT_DROP; - } -} - -always_inline u32 -ip_flow_hash (void *data) -{ - ip4_header_t *iph = (ip4_header_t *) data; - - if ((iph->ip_version_and_header_length & 0xF0) == 0x40) - return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT); - else - return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT); -} - -always_inline u32 -l2_flow_hash (vlib_buffer_t * b0) -{ - ethernet_header_t *eh; - u64 a, b, c; - uword is_ip, eh_size; - u16 eh_type; - - eh = vlib_buffer_get_current (b0); - eh_type = clib_net_to_host_u16 (eh->type); - eh_size = ethernet_buffer_header_size (b0); - - is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6); - - /* since we have 2 cache lines, use them */ - if (is_ip) - a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size); - else - a = eh->type; - - b = mac_to_u64 ((u8 *) eh->dst_address); - c = mac_to_u64 ((u8 *) eh->src_address); - hash_mix64 (a, b, c); - - return (u32) c; -} - -/* always_inline void */ -/* l2_process_one (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, u32 ti0, */ -/* u32 * next0) */ -/* { */ -/* lisp_gpe_tunnel_t *t0; */ - -/* t0 = pool_elt_at_index (lgm->tunnels, ti0); */ -/* ASSERT (0 != t0); */ - -/* if (PREDICT_TRUE (LISP_NO_ACTION == t0->action)) */ -/* { */ -/* /\* compute 'flow' hash *\/ */ -/* if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) */ -/* vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); */ -/* encap_one_inline (lgm, b0, t0, next0); */ -/* } */ -/* else */ -/* { */ -/* l2_process_tunnel_action (b0, t0->action, next0); */ -/* } */ -/* } */ - -/* always_inline void */ -/* l2_process_two (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1, */ -/* u32 ti0, u32 ti1, u32 * next0, u32 * next1) */ -/* { */ -/* lisp_gpe_tunnel_t *t0, *t1; */ - -/* t0 = pool_elt_at_index (lgm->tunnels, ti0); */ -/* t1 = pool_elt_at_index (lgm->tunnels, ti1); */ - -/* ASSERT (0 != t0 && 0 != t1); */ - -/* if (PREDICT_TRUE (LISP_NO_ACTION == t0->action */ -/* && LISP_NO_ACTION == t1->action)) */ -/* { */ -/* if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) */ -/* vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); */ -/* if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1)) */ -/* vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); */ -/* encap_two_inline (lgm, b0, b1, t0, t1, next0, next1); */ -/* } */ -/* else */ -/* { */ -/* if (LISP_NO_ACTION == t0->action) */ -/* { */ -/* if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) */ -/* vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); */ -/* encap_one_inline (lgm, b0, t0, next0); */ -/* l2_process_tunnel_action (b1, t1->action, next1); */ -/* } */ -/* else if (LISP_NO_ACTION == t1->action) */ -/* { */ -/* if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1)) */ -/* vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); */ -/* encap_one_inline (lgm, b1, t1, next1); */ -/* l2_process_tunnel_action (b0, t0->action, next0); */ -/* } */ -/* else */ -/* { */ -/* l2_process_tunnel_action (b0, t0->action, next0); */ -/* l2_process_tunnel_action (b1, t1->action, next1); */ -/* } */ -/* } */ -/* } */ - /** * @brief LISP-GPE interface TX (encap) function for L2 overlays. * @node l2_lisp_gpe_interface_tx @@ -425,108 +256,10 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - u32 next0, next1; - lisp_gpe_tunnel_t *t0 = 0, *t1 = 0; - // ethernet_header_t *e0, *e1; - - next0 = next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* e0 = vlib_buffer_get_current (b0); */ - /* e1 = vlib_buffer_get_current (b1); */ - - /* lookup dst + src mac */ - /* ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index, */ - /* e0->src_address, e0->dst_address); */ - /* ti1 = lisp_l2_fib_lookup (lgm, vnet_buffer (b1)->l2.bd_index, */ - /* e1->src_address, e1->dst_address); */ - - /* if (PREDICT_TRUE ((u32) ~ 0 != ti0) && (u32) ~ 0 != ti1) */ - /* { */ - /* /\* process both tunnels *\/ */ - /* l2_process_two (lgm, b0, b1, ti0, ti1, &next0, &next1); */ - /* } */ - /* else */ - /* { */ - /* if ((u32) ~ 0 != ti0) */ - /* { */ - /* /\* process tunnel for b0 *\/ */ - /* l2_process_one (lgm, b0, ti0, &next0); */ - - /* /\* no tunnel found for b1, send to control plane *\/ */ - /* next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ - /* vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC; */ - /* } */ - /* else if ((u32) ~ 0 != ti1) */ - /* { */ - /* /\* process tunnel for b1 *\/ */ - /* l2_process_one (lgm, b1, ti1, &next1); */ - - /* /\* no tunnel found b0, send to control plane *\/ */ - /* next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ - /* vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; */ - /* } */ - /* else */ - /* { */ - /* /\* no tunnels found *\/ */ - /* next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ - /* vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; */ - /* next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ - /* vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC; */ - /* } */ - /* } */ - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - l2_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0, - sizeof (*tr)); - tr->tunnel_index = t0 - lgm->tunnels; - } - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - l2_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b1, - sizeof (*tr)); - tr->tunnel_index = t1 - lgm->tunnels; - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, next0, - next1); - } - while (n_left_from > 0 && n_left_to_next > 0) { vlib_buffer_t *b0; - u32 bi0, ti0, next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; + u32 bi0, lbi0; ethernet_header_t *e0; bi0 = from[0]; @@ -539,29 +272,22 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); e0 = vlib_buffer_get_current (b0); + vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; + /* lookup dst + src mac */ - ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index, - e0->src_address, e0->dst_address); - - /* if (PREDICT_TRUE ((u32) ~ 0 != ti0)) */ - /* { */ - /* l2_process_one (lgm, b0, ti0, &next0); */ - /* } */ - /* else */ - /* { */ - /* /\* no tunnel found send to control plane *\/ */ - /* next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ - /* vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; */ - /* } */ + lbi0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index, + e0->src_address, e0->dst_address); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = lbi0; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { l2_lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->tunnel_index = ti0 ? ti0 : ~0; + tr->lb_index = lbi0; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); + n_left_to_next, bi0, l2_arc_to_lb); } vlib_put_next_frame (vm, node, next_index, n_left_to_next); @@ -581,14 +307,14 @@ format_l2_lisp_gpe_name (u8 * s, va_list * args) VNET_DEVICE_CLASS (l2_lisp_gpe_device_class,static) = { .name = "L2_LISP_GPE", .format_device_name = format_l2_lisp_gpe_name, - .format_tx_trace = format_lisp_gpe_tx_trace, + .format_tx_trace = format_l2_lisp_gpe_tx_trace, .tx_function = l2_lisp_gpe_interface_tx, .no_flatten_output_chains = 1, }; /* *INDENT-ON* */ static vnet_hw_interface_t * -create_lisp_gpe_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table, +lisp_gpe_create_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table, vnet_device_class_t * dev_class, tunnel_lookup_t * tuns) { @@ -646,7 +372,7 @@ create_lisp_gpe_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table, } static void -remove_lisp_gpe_iface (lisp_gpe_main_t * lgm, u32 hi_index, u32 dp_table, +lisp_gpe_remove_iface (lisp_gpe_main_t * lgm, u32 hi_index, u32 dp_table, tunnel_lookup_t * tuns) { vnet_main_t *vnm = lgm->vnet_main; @@ -672,6 +398,64 @@ remove_lisp_gpe_iface (lisp_gpe_main_t * lgm, u32 hi_index, u32 dp_table, hash_unset (tuns->vni_by_sw_if_index, hi->sw_if_index); } +static void +lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id) +{ + fib_node_index_t fib_index; + + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); + vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); + ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip4_sw_interface_enable_disable (sw_if_index, 1); + + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); + vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); + ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip6_sw_interface_enable_disable (sw_if_index, 1); +} + +static void +lisp_gpe_tenant_del_default_routes (u32 table_id) +{ + fib_protocol_t proto; + + FOR_EACH_FIB_IP_PROTOCOL (proto) + { + fib_prefix_t prefix = { + .fp_proto = proto, + }; + u32 fib_index; + + fib_index = fib_table_find (prefix.fp_proto, table_id); + fib_table_entry_special_remove (fib_index, &prefix, FIB_SOURCE_LISP); + fib_table_unlock (fib_index, prefix.fp_proto); + } +} + +static void +lisp_gpe_tenant_add_default_routes (u32 table_id) +{ + fib_protocol_t proto; + + FOR_EACH_FIB_IP_PROTOCOL (proto) + { + fib_prefix_t prefix = { + .fp_proto = proto, + }; + u32 fib_index; + + /* + * Add a deafult route that results in a control plane punt DPO + */ + fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id); + fib_table_entry_special_dpo_add (fib_index, &prefix, FIB_SOURCE_LISP, + FIB_ENTRY_FLAG_EXCLUSIVE, + lisp_cp_dpo_get (fib_proto_to_dpo + (proto))); + } +} + + /** * @brief Add/del LISP-GPE L3 interface. * @@ -685,68 +469,70 @@ remove_lisp_gpe_iface (lisp_gpe_main_t * lgm, u32 hi_index, u32 dp_table, * * @return number of vectors in frame. */ -static int -lisp_gpe_add_del_l3_iface (lisp_gpe_main_t * lgm, - vnet_lisp_gpe_add_del_iface_args_t * a) +u32 +lisp_gpe_add_l3_iface (lisp_gpe_main_t * lgm, u32 vni, u32 table_id) { vnet_main_t *vnm = lgm->vnet_main; tunnel_lookup_t *l3_ifaces = &lgm->l3_ifaces; vnet_hw_interface_t *hi; uword *hip, *si; - hip = hash_get (l3_ifaces->hw_if_index_by_dp_table, a->table_id); + hip = hash_get (l3_ifaces->hw_if_index_by_dp_table, table_id); - if (a->is_add) + if (hip) { - if (hip) - { - clib_warning ("vrf %d already mapped to a vni", a->table_id); - return -1; - } + clib_warning ("vrf %d already mapped to a vni", table_id); + return ~0; + } - si = hash_get (l3_ifaces->sw_if_index_by_vni, a->vni); - if (si) - { - clib_warning ("Interface for vni %d already exists", a->vni); - return -1; - } + si = hash_get (l3_ifaces->sw_if_index_by_vni, vni); - /* create lisp iface and populate tunnel tables */ - hi = create_lisp_gpe_iface (lgm, a->vni, a->table_id, - &lisp_gpe_device_class, l3_ifaces); + if (si) + { + clib_warning ("Interface for vni %d already exists", vni); + } - /* insert default routes that point to lisp-cp lookup */ - lisp_gpe_iface_set_table (hi->sw_if_index, a->table_id); - add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP4, 1); - add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP6, 1); + /* create lisp iface and populate tunnel tables */ + hi = lisp_gpe_create_iface (lgm, vni, table_id, + &lisp_gpe_device_class, l3_ifaces); - /* enable interface */ - vnet_sw_interface_set_flags (vnm, hi->sw_if_index, - VNET_SW_INTERFACE_FLAG_ADMIN_UP); - vnet_hw_interface_set_flags (vnm, hi->hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); - } - else - { - if (hip == 0) - { - clib_warning ("The interface for vrf %d doesn't exist", - a->table_id); - return -1; - } + /* insert default routes that point to lisp-cp lookup */ + lisp_gpe_iface_set_table (hi->sw_if_index, table_id); + lisp_gpe_tenant_add_default_routes (table_id); + + /* enable interface */ + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + vnet_hw_interface_set_flags (vnm, hi->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); - hi = vnet_get_hw_interface (vnm, hip[0]); + return (hi->sw_if_index); +} + +void +lisp_gpe_del_l3_iface (lisp_gpe_main_t * lgm, u32 vni, u32 table_id) +{ + vnet_main_t *vnm = lgm->vnet_main; + tunnel_lookup_t *l3_ifaces = &lgm->l3_ifaces; + vnet_hw_interface_t *hi; + uword *hip; - remove_lisp_gpe_iface (lgm, hip[0], a->table_id, &lgm->l3_ifaces); + hip = hash_get (l3_ifaces->hw_if_index_by_dp_table, table_id); - /* unset default routes */ - ip4_sw_interface_enable_disable (hi->sw_if_index, 0); - ip6_sw_interface_enable_disable (hi->sw_if_index, 0); - add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP4, 0); - add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP6, 0); + if (hip == 0) + { + clib_warning ("The interface for vrf %d doesn't exist", table_id); + return; } - return 0; + hi = vnet_get_hw_interface (vnm, hip[0]); + + lisp_gpe_remove_iface (lgm, hip[0], table_id, &lgm->l3_ifaces); + + /* unset default routes */ + ip4_sw_interface_enable_disable (hi->sw_if_index, 0); + ip6_sw_interface_enable_disable (hi->sw_if_index, 0); + lisp_gpe_tenant_del_default_routes (table_id); } /** @@ -760,9 +546,8 @@ lisp_gpe_add_del_l3_iface (lisp_gpe_main_t * lgm, * * @return number of vectors in frame. */ -static int -lisp_gpe_add_del_l2_iface (lisp_gpe_main_t * lgm, - vnet_lisp_gpe_add_del_iface_args_t * a) +u32 +lisp_gpe_add_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id) { vnet_main_t *vnm = lgm->vnet_main; tunnel_lookup_t *l2_ifaces = &lgm->l2_ifaces; @@ -770,74 +555,71 @@ lisp_gpe_add_del_l2_iface (lisp_gpe_main_t * lgm, uword *hip, *si; u16 bd_index; - bd_index = bd_find_or_add_bd_index (&bd_main, a->bd_id); + bd_index = bd_find_or_add_bd_index (&bd_main, bd_id); hip = hash_get (l2_ifaces->hw_if_index_by_dp_table, bd_index); - if (a->is_add) + if (hip) { - if (hip) - { - clib_warning ("bridge domain %d already mapped to a vni", a->bd_id); - return -1; - } + clib_warning ("bridge domain %d already mapped to a vni", bd_id); + return ~0; + } - si = hash_get (l2_ifaces->sw_if_index_by_vni, a->vni); - if (si) - { - clib_warning ("Interface for vni %d already exists", a->vni); - return -1; - } + si = hash_get (l2_ifaces->sw_if_index_by_vni, vni); + if (si) + { + clib_warning ("Interface for vni %d already exists", vni); + return ~0; + } - /* create lisp iface and populate tunnel tables */ - hi = create_lisp_gpe_iface (lgm, a->vni, bd_index, - &l2_lisp_gpe_device_class, &lgm->l2_ifaces); + /* create lisp iface and populate tunnel tables */ + hi = lisp_gpe_create_iface (lgm, vni, bd_index, + &l2_lisp_gpe_device_class, &lgm->l2_ifaces); - /* add iface to l2 bridge domain */ - set_int_l2_mode (lgm->vlib_main, vnm, MODE_L2_BRIDGE, hi->sw_if_index, - bd_index, 0, 0, 0); + /* enable interface */ + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + vnet_hw_interface_set_flags (vnm, hi->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); - /* set egress arcs */ -#define _(sym,str) vlib_node_add_named_next_with_slot (vnm->vlib_main, \ - hi->tx_node_index, str, L2_LISP_GPE_TX_NEXT_##sym); - foreach_l2_lisp_gpe_tx_next -#undef _ - /* enable interface */ - vnet_sw_interface_set_flags (vnm, hi->sw_if_index, - VNET_SW_INTERFACE_FLAG_ADMIN_UP); - vnet_hw_interface_set_flags (vnm, hi->hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); - } - else - { - if (hip == 0) - { - clib_warning ("The interface for bridge domain %d doesn't exist", - a->bd_id); - return -1; - } - remove_lisp_gpe_iface (lgm, hip[0], bd_index, &lgm->l2_ifaces); - } + l2_arc_to_lb = vlib_node_add_named_next (vlib_get_main (), + hi->tx_node_index, + "l2-load-balance"); - return 0; + /* we're ready. add iface to l2 bridge domain */ + set_int_l2_mode (lgm->vlib_main, vnm, MODE_L2_BRIDGE, hi->sw_if_index, + bd_index, 0, 0, 0); + + return (hi->sw_if_index); } -/** Add/del L2 or L3 LISP-GPE interface. */ -int -vnet_lisp_gpe_add_del_iface (vnet_lisp_gpe_add_del_iface_args_t * a, - u32 * hw_if_indexp) +/** + * @brief Add/del LISP-GPE L2 interface. + * + * Creates LISP-GPE interface, sets it in L2 mode in the appropriate + * bridge domain, sets egress arcs and enables it. + * + * @param[in] lgm Reference to @ref lisp_gpe_main_t. + * @param[in] a Parameters to create interface. + * + * @return number of vectors in frame. + */ +void +lisp_gpe_del_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id) { - lisp_gpe_main_t *lgm = &lisp_gpe_main; + tunnel_lookup_t *l2_ifaces = &lgm->l2_ifaces; + u16 bd_index; + uword *hip; - if (vnet_lisp_gpe_enable_disable_status () == 0) + bd_index = bd_find_or_add_bd_index (&bd_main, bd_id); + hip = hash_get (l2_ifaces->hw_if_index_by_dp_table, bd_index); + + if (hip == 0) { - clib_warning ("LISP is disabled!"); - return VNET_API_ERROR_LISP_DISABLED; + clib_warning ("The interface for bridge domain %d doesn't exist", + bd_id); + return; } - - if (!a->is_l2) - return lisp_gpe_add_del_l3_iface (lgm, a); - else - return lisp_gpe_add_del_l2_iface (lgm, a); + lisp_gpe_remove_iface (lgm, hip[0], bd_index, &lgm->l2_ifaces); } static clib_error_t * @@ -846,12 +628,13 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; u8 is_add = 1; - clib_error_t *error = 0; - int rv = 0; u32 table_id, vni, bd_id; u8 vni_is_set = 0, vrf_is_set = 0, bd_index_is_set = 0; - vnet_lisp_gpe_add_del_iface_args_t _a, *a = &_a; + if (vnet_lisp_gpe_enable_disable_status () == 0) + { + return clib_error_return (0, "LISP is disabled"); + } /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -892,19 +675,28 @@ lisp_gpe_add_del_iface_command_fn (vlib_main_t * vm, unformat_input_t * input, if (!vrf_is_set && !bd_index_is_set) return clib_error_return (0, "vrf or bridge domain index must be set!"); - a->is_add = is_add; - a->dp_table = vrf_is_set ? table_id : bd_id; - a->vni = vni; - a->is_l2 = bd_index_is_set; - - rv = vnet_lisp_gpe_add_del_iface (a, 0); - if (0 != rv) + if (bd_index_is_set) { - error = clib_error_return (0, "failed to %s gpe iface!", - is_add ? "add" : "delete"); + if (is_add) + { + if (~0 == lisp_gpe_tenant_l2_iface_add_or_lock (vni, bd_id)) + return clib_error_return (0, "L2 interface not created"); + } + else + lisp_gpe_tenant_l2_iface_unlock (vni); + } + else + { + if (is_add) + { + if (~0 == lisp_gpe_tenant_l3_iface_add_or_lock (vni, table_id)) + return clib_error_return (0, "L3 interface not created"); + } + else + lisp_gpe_tenant_l3_iface_unlock (vni); } - return error; + return (NULL); } /* *INDENT-OFF* */ diff --git a/vnet/vnet/lisp-gpe/ip_forward.c b/vnet/vnet/lisp-gpe/ip_forward.c deleted file mode 100644 index 8a24ec0322c..00000000000 --- a/vnet/vnet/lisp-gpe/ip_forward.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vnet/lisp-gpe/lisp_gpe_adjacency.h> -#include <vnet/fib/fib_table.h> -#include <vnet/fib/fib_entry.h> -#include <vnet/fib/ip6_fib.h> -#include <vnet/fib/ip4_fib.h> -#include <vnet/dpo/lookup_dpo.h> -#include <vnet/dpo/load_balance.h> - -/** - * @brief Add route to IP4 or IP6 Destination FIB. - * - * Add a route to the destination FIB that results in the lookup - * in the SRC FIB. The SRC FIB is created is it does not yet exist. - * - * @param[in] dst_table_id Destination FIB Table-ID - * @param[in] dst_prefix Destination IP prefix. - * @param[out] src_fib_index The index/ID of the SRC FIB created. - */ -u32 -ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix) -{ - fib_node_index_t src_fib_index; - fib_prefix_t dst_fib_prefix; - fib_node_index_t dst_fei; - - ASSERT (NULL != dst_prefix); - - ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix); - - /* - * lookup the destination prefix in the VRF table and retrieve the - * LISP associated data - */ - dst_fei = fib_table_lookup_exact_match (dst_fib_index, &dst_fib_prefix); - - /* - * If the FIB entry is not present, or not LISP sourced, add it - */ - if (dst_fei == FIB_NODE_INDEX_INVALID || - NULL == fib_entry_get_source_data (dst_fei, FIB_SOURCE_LISP)) - { - dpo_id_t src_lkup_dpo = DPO_NULL; - - /* create a new src FIB. */ - src_fib_index = - fib_table_create_and_lock (dst_fib_prefix.fp_proto, - "LISP-src for [%d,%U]", - dst_fib_index, - format_fib_prefix, &dst_fib_prefix); - - /* - * create a data-path object to perform the source address lookup - * in the SRC FIB - */ - lookup_dpo_add_or_lock_w_fib_index (src_fib_index, - (ip_prefix_version (dst_prefix) == - IP6 ? DPO_PROTO_IP6 : - DPO_PROTO_IP4), - LOOKUP_INPUT_SRC_ADDR, - LOOKUP_TABLE_FROM_CONFIG, - &src_lkup_dpo); - - /* - * add the entry to the destination FIB that uses the lookup DPO - */ - dst_fei = fib_table_entry_special_dpo_add (dst_fib_index, - &dst_fib_prefix, - FIB_SOURCE_LISP, - FIB_ENTRY_FLAG_EXCLUSIVE, - &src_lkup_dpo); - - /* - * the DPO is locked by the FIB entry, and we have no further - * need for it. - */ - dpo_unlock (&src_lkup_dpo); - - /* - * save the SRC FIB index on the entry so we can retrieve it for - * subsequent routes. - */ - fib_entry_set_source_data (dst_fei, FIB_SOURCE_LISP, &src_fib_index); - } - else - { - /* - * destination FIB entry already present - */ - src_fib_index = *(u32 *) fib_entry_get_source_data (dst_fei, - FIB_SOURCE_LISP); - } - - return (src_fib_index); -} - -/** - * @brief Del route to IP4 or IP6 SD FIB. - * - * Remove routes from both destination and source FIBs. - * - * @param[in] src_fib_index The index/ID of the SRC FIB - * @param[in] src_prefix Source IP prefix. - * @param[in] dst_fib_index The index/ID of the DST FIB - * @param[in] dst_prefix Destination IP prefix. - */ -void -ip_src_dst_fib_del_route (u32 src_fib_index, - const ip_prefix_t * src_prefix, - u32 dst_fib_index, const ip_prefix_t * dst_prefix) -{ - fib_prefix_t dst_fib_prefix, src_fib_prefix; - - ASSERT (NULL != dst_prefix); - ASSERT (NULL != src_prefix); - - ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix); - ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix); - - fib_table_entry_delete (src_fib_index, &src_fib_prefix, FIB_SOURCE_LISP); - - if (0 == fib_table_get_num_entries (src_fib_index, - src_fib_prefix.fp_proto, - FIB_SOURCE_LISP)) - { - /* - * there's nothing left, unlock the source FIB and the - * destination route - */ - fib_table_entry_special_remove (dst_fib_index, - &dst_fib_prefix, FIB_SOURCE_LISP); - fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto); - } -} - -/** - * @brief Add route to IP4 or IP6 SRC FIB. - * - * Adds a route to in the LISP SRC FIB with the result of the route - * being the DPO passed. - * - * @param[in] src_fib_index The index/ID of the SRC FIB - * @param[in] src_prefix Source IP prefix. - * @param[in] src_dpo The DPO the route will link to. - */ -void -ip_src_fib_add_route_w_dpo (u32 src_fib_index, - const ip_prefix_t * src_prefix, - const dpo_id_t * src_dpo) -{ - fib_prefix_t src_fib_prefix; - - ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix); - - /* - * add the entry into the source fib. - */ - fib_node_index_t src_fei; - - src_fei = fib_table_lookup_exact_match (src_fib_index, &src_fib_prefix); - - if (FIB_NODE_INDEX_INVALID == src_fei || - !fib_entry_is_sourced (src_fei, FIB_SOURCE_LISP)) - { - fib_table_entry_special_dpo_add (src_fib_index, - &src_fib_prefix, - FIB_SOURCE_LISP, - FIB_ENTRY_FLAG_EXCLUSIVE, src_dpo); - } -} - -static void -ip_address_to_46 (const ip_address_t * addr, - ip46_address_t * a, fib_protocol_t * proto) -{ - *proto = (IP4 == ip_addr_version (addr) ? - FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - switch (*proto) - { - case FIB_PROTOCOL_IP4: - a->ip4 = addr->ip.v4; - break; - case FIB_PROTOCOL_IP6: - a->ip6 = addr->ip.v6; - break; - default: - ASSERT (0); - break; - } -} - -static fib_route_path_t * -ip_src_fib_mk_paths (const lisp_fwd_path_t * paths) -{ - const lisp_gpe_adjacency_t *ladj; - fib_route_path_t *rpaths = NULL; - u8 best_priority; - u32 ii; - - vec_validate (rpaths, vec_len (paths) - 1); - - best_priority = paths[0].priority; - - vec_foreach_index (ii, paths) - { - if (paths[0].priority != best_priority) - break; - - ladj = lisp_gpe_adjacency_get (paths[ii].lisp_adj); - - ip_address_to_46 (&ladj->remote_rloc, - &rpaths[ii].frp_addr, &rpaths[ii].frp_proto); - - rpaths[ii].frp_sw_if_index = ladj->sw_if_index; - rpaths[ii].frp_weight = (paths[ii].weight ? paths[ii].weight : 1); - rpaths[ii].frp_label = MPLS_LABEL_INVALID; - } - - ASSERT (0 != vec_len (rpaths)); - - return (rpaths); -} - -/** - * @brief Add route to IP4 or IP6 SRC FIB. - * - * Adds a route to in the LISP SRC FIB for the tunnel. - * - * @param[in] src_fib_index The index/ID of the SRC FIB - * @param[in] src_prefix Source IP prefix. - * @param[in] paths The paths from which to construct the - * load balance - */ -void -ip_src_fib_add_route (u32 src_fib_index, - const ip_prefix_t * src_prefix, - const lisp_fwd_path_t * paths) -{ - fib_prefix_t src_fib_prefix; - fib_route_path_t *rpaths; - - ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix); - - rpaths = ip_src_fib_mk_paths (paths); - - fib_table_entry_update (src_fib_index, - &src_fib_prefix, - FIB_SOURCE_LISP, FIB_ENTRY_FLAG_NONE, rpaths); - vec_free (rpaths); -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.c b/vnet/vnet/lisp-gpe/lisp_gpe.c index d0ab37e9637..fbda8687c3b 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe.c +++ b/vnet/vnet/lisp-gpe/lisp_gpe.c @@ -19,475 +19,13 @@ */ #include <vnet/lisp-gpe/lisp_gpe.h> +#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h> #include <vnet/lisp-gpe/lisp_gpe_adjacency.h> -#include <vnet/adj/adj_midchain.h> -#include <vnet/fib/fib_table.h> -#include <vnet/fib/fib_entry.h> -#include <vnet/fib/fib_path_list.h> -#include <vnet/dpo/drop_dpo.h> -#include <vnet/dpo/load_balance.h> +#include <vnet/lisp-gpe/lisp_gpe_tenant.h> /** LISP-GPE global state */ lisp_gpe_main_t lisp_gpe_main; -/** - * @brief A Pool of all LISP forwarding entries - */ -static lisp_fwd_entry_t *lisp_fwd_entry_pool; - -/** - * DB of all forwarding entries. The Key is:{l-EID,r-EID,vni} - * where the EID encodes L2 or L3 - */ -static uword *lisp_gpe_fwd_entries; - -static void -create_fib_entries (lisp_fwd_entry_t * lfe) -{ - dpo_proto_t dproto; - - dproto = (ip_prefix_version (&lfe->key->rmt.ippref) == IP4 ? - FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - - lfe->src_fib_index = ip_dst_fib_add_route (lfe->eid_fib_index, - &lfe->key->rmt.ippref); - - if (LISP_FWD_ENTRY_TYPE_NEGATIVE == lfe->type) - { - dpo_id_t dpo = DPO_NULL; - - switch (lfe->action) - { - case LISP_NO_ACTION: - /* TODO update timers? */ - case LISP_FORWARD_NATIVE: - /* TODO check if route/next-hop for eid exists in fib and add - * more specific for the eid with the next-hop found */ - case LISP_SEND_MAP_REQUEST: - /* insert tunnel that always sends map-request */ - dpo_set (&dpo, DPO_LISP_CP, 0, dproto); - break; - case LISP_DROP: - /* for drop fwd entries, just add route, no need to add encap tunnel */ - dpo_copy (&dpo, drop_dpo_get (dproto)); - break; - } - ip_src_fib_add_route_w_dpo (lfe->src_fib_index, - &lfe->key->lcl.ippref, &dpo); - dpo_reset (&dpo); - } - else - { - ip_src_fib_add_route (lfe->src_fib_index, - &lfe->key->lcl.ippref, lfe->paths); - } -} - -static void -delete_fib_entries (lisp_fwd_entry_t * lfe) -{ - ip_src_dst_fib_del_route (lfe->src_fib_index, - &lfe->key->lcl.ippref, - lfe->eid_fib_index, &lfe->key->rmt.ippref); -} - -static void -gid_to_dp_address (gid_address_t * g, dp_address_t * d) -{ - switch (gid_address_type (g)) - { - case GID_ADDR_IP_PREFIX: - case GID_ADDR_SRC_DST: - ip_prefix_copy (&d->ippref, &gid_address_ippref (g)); - d->type = FID_ADDR_IP_PREF; - break; - case GID_ADDR_MAC: - default: - mac_copy (&d->mac, &gid_address_mac (g)); - d->type = FID_ADDR_MAC; - break; - } -} - -static lisp_fwd_entry_t * -find_fwd_entry (lisp_gpe_main_t * lgm, - vnet_lisp_gpe_add_del_fwd_entry_args_t * a, - lisp_gpe_fwd_entry_key_t * key) -{ - uword *p; - - memset (key, 0, sizeof (*key)); - - if (GID_ADDR_IP_PREFIX == gid_address_type (&a->rmt_eid)) - { - /* - * the ip version of the source is not set to ip6 when the - * source is all zeros. force it. - */ - ip_prefix_version (&gid_address_ippref (&a->lcl_eid)) = - ip_prefix_version (&gid_address_ippref (&a->rmt_eid)); - } - - gid_to_dp_address (&a->rmt_eid, &key->rmt); - gid_to_dp_address (&a->lcl_eid, &key->lcl); - key->vni = a->vni; - - p = hash_get_mem (lisp_gpe_fwd_entries, key); - - if (NULL != p) - { - return (pool_elt_at_index (lisp_fwd_entry_pool, p[0])); - } - return (NULL); -} - -static int -lisp_gpe_fwd_entry_path_sort (void *a1, void *a2) -{ - lisp_fwd_path_t *p1 = a1, *p2 = a2; - - return (p1->priority - p2->priority); -} - -/** - * @brief Add/Delete LISP IP forwarding entry. - * - * creation of forwarding entries for IP LISP overlay: - * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[in] a Parameters for building the forwarding entry. - * - * @return 0 on success. - */ -static int -add_ip_fwd_entry (lisp_gpe_main_t * lgm, - vnet_lisp_gpe_add_del_fwd_entry_args_t * a) -{ - lisp_gpe_fwd_entry_key_t key; - lisp_fwd_entry_t *lfe; - fib_protocol_t fproto; - - lfe = find_fwd_entry (lgm, a, &key); - - if (NULL != lfe) - /* don't support updates */ - return VNET_API_ERROR_INVALID_VALUE; - - pool_get (lisp_fwd_entry_pool, lfe); - memset (lfe, 0, sizeof (*lfe)); - lfe->key = clib_mem_alloc (sizeof (key)); - memcpy (lfe->key, &key, sizeof (key)); - - hash_set_mem (lisp_gpe_fwd_entries, lfe->key, lfe - lisp_fwd_entry_pool); - - fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ? - FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - - lfe->type = (a->is_negative ? - LISP_FWD_ENTRY_TYPE_NEGATIVE : LISP_FWD_ENTRY_TYPE_NORMAL); - lfe->eid_table_id = a->table_id; - lfe->eid_fib_index = fib_table_find_or_create_and_lock (fproto, - lfe->eid_table_id); - - if (LISP_FWD_ENTRY_TYPE_NEGATIVE != lfe->type) - { - lisp_fwd_path_t *path; - u32 index; - - vec_validate (lfe->paths, vec_len (a->locator_pairs) - 1); - - vec_foreach_index (index, a->locator_pairs) - { - path = &lfe->paths[index]; - - path->priority = a->locator_pairs[index].priority; - path->weight = a->locator_pairs[index].weight; - - path->lisp_adj = - lisp_gpe_adjacency_find_or_create_and_lock (&a->locator_pairs - [index], - lfe->eid_table_id, - lfe->key->vni); - } - vec_sort_with_function (lfe->paths, lisp_gpe_fwd_entry_path_sort); - } - - create_fib_entries (lfe); - - return (0); -} - -static void -del_ip_fwd_entry_i (lisp_fwd_entry_t * lfe) -{ - lisp_fwd_path_t *path; - fib_protocol_t fproto; - - vec_foreach (path, lfe->paths) - { - lisp_gpe_adjacency_unlock (path->lisp_adj); - } - - delete_fib_entries (lfe); - - fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ? - FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - fib_table_unlock (lfe->eid_fib_index, fproto); - - hash_unset_mem (lisp_gpe_fwd_entries, lfe->key); - clib_mem_free (lfe->key); - pool_put (lisp_fwd_entry_pool, lfe); -} - -/** - * @brief Add/Delete LISP IP forwarding entry. - * - * removal of forwarding entries for IP LISP overlay: - * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[in] a Parameters for building the forwarding entry. - * - * @return 0 on success. - */ -static int -del_ip_fwd_entry (lisp_gpe_main_t * lgm, - vnet_lisp_gpe_add_del_fwd_entry_args_t * a) -{ - lisp_gpe_fwd_entry_key_t key; - lisp_fwd_entry_t *lfe; - - lfe = find_fwd_entry (lgm, a, &key); - - if (NULL == lfe) - /* no such entry */ - return VNET_API_ERROR_INVALID_VALUE; - - del_ip_fwd_entry_i (lfe); - - return (0); -} - -static void -make_mac_fib_key (BVT (clib_bihash_kv) * kv, u16 bd_index, u8 src_mac[6], - u8 dst_mac[6]) -{ - kv->key[0] = (((u64) bd_index) << 48) | mac_to_u64 (dst_mac); - kv->key[1] = mac_to_u64 (src_mac); - kv->key[2] = 0; -} - -/** - * @brief Lookup L2 SD FIB entry - * - * Does a vni + dest + source lookup in the L2 LISP FIB. If the lookup fails - * it tries a second time with source set to 0 (i.e., a simple dest lookup). - * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[in] bd_index Bridge domain index. - * @param[in] src_mac Source mac address. - * @param[in] dst_mac Destination mac address. - * - * @return index of mapping matching the lookup key. - */ -index_t -lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6], - u8 dst_mac[6]) -{ - int rv; - BVT (clib_bihash_kv) kv, value; - - make_mac_fib_key (&kv, bd_index, src_mac, dst_mac); - rv = BV (clib_bihash_search_inline_2) (&lgm->l2_fib, &kv, &value); - - /* no match, try with src 0, catch all for dst */ - if (rv != 0) - { - kv.key[1] = 0; - rv = BV (clib_bihash_search_inline_2) (&lgm->l2_fib, &kv, &value); - if (rv == 0) - return value.value; - } - - return lisp_gpe_main.l2_lb_miss; -} - -/** - * @brief Add/del L2 SD FIB entry - * - * Inserts value in L2 FIB keyed by vni + dest + source. If entry is - * overwritten the associated value is returned. - * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[in] bd_index Bridge domain index. - * @param[in] src_mac Source mac address. - * @param[in] dst_mac Destination mac address. - * @param[in] val Value to add. - * @param[in] is_add Add/del flag. - * - * @return ~0 or value of overwritten entry. - */ -u32 -lisp_l2_fib_add_del_entry (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6], - u8 dst_mac[6], u32 val, u8 is_add) -{ - BVT (clib_bihash_kv) kv, value; - u32 old_val = ~0; - - make_mac_fib_key (&kv, bd_index, src_mac, dst_mac); - - if (BV (clib_bihash_search) (&lgm->l2_fib, &kv, &value) == 0) - old_val = value.value; - - if (!is_add) - BV (clib_bihash_add_del) (&lgm->l2_fib, &kv, 0 /* is_add */ ); - else - { - kv.value = val; - BV (clib_bihash_add_del) (&lgm->l2_fib, &kv, 1 /* is_add */ ); - } - return old_val; -} - -static void -l2_fib_init (lisp_gpe_main_t * lgm) -{ - BV (clib_bihash_init) (&lgm->l2_fib, "l2 fib", - 1 << max_log2 (L2_FIB_DEFAULT_HASH_NUM_BUCKETS), - L2_FIB_DEFAULT_HASH_MEMORY_SIZE); - - /* - * the result from a 'miss' in a L2 Table - */ - lgm->l2_lb_miss = load_balance_create (1, DPO_PROTO_IP4, 0); - load_balance_set_bucket (lgm->l2_lb_miss, 0, drop_dpo_get (DPO_PROTO_IP4)); -} - -/** - * @brief Add/Delete LISP L2 forwarding entry. - * - * Coordinates the creation/removal of forwarding entries for L2 LISP overlay: - * creates lisp-gpe tunnel and injects new entry in Source/Dest L2 FIB. - * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[in] a Parameters for building the forwarding entry. - * - * @return 0 on success. - */ -static int -add_del_l2_fwd_entry (lisp_gpe_main_t * lgm, - vnet_lisp_gpe_add_del_fwd_entry_args_t * a) -{ - /* lisp_gpe_fwd_entry_key_t key; */ - /* lisp_fwd_entry_t *lfe; */ - /* fib_protocol_t fproto; */ - /* uword *bd_indexp; */ - - /* bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id); */ - /* if (!bd_indexp) */ - /* { */ - /* clib_warning ("bridge domain %d doesn't exist", a->bd_id); */ - /* return -1; */ - /* } */ - - /* lfe = find_fwd_entry(lgm, a, &key); */ - - /* if (NULL != lfe) */ - /* /\* don't support updates *\/ */ - /* return VNET_API_ERROR_INVALID_VALUE; */ - - /* int rv; */ - /* u32 tun_index; */ - /* fib_node_index_t old_path_list; */ - /* bd_main_t *bdm = &bd_main; */ - /* fib_route_path_t *rpaths; */ - /* lisp_gpe_tunnel_t *t; */ - /* const dpo_id_t *dpo; */ - /* index_t lbi; */ - - /* /\* create tunnel *\/ */ - /* rv = add_del_ip_tunnel (a, 1 /\* is_l2 *\/ , &tun_index, NULL); */ - /* if (rv) */ - /* return rv; */ - - /* bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id); */ - /* if (!bd_indexp) */ - /* { */ - /* clib_warning ("bridge domain %d doesn't exist", a->bd_id); */ - /* return -1; */ - /* } */ - - /* t = pool_elt_at_index (lgm->tunnels, tun_index); */ - /* old_path_list = t->l2_path_list; */ - - /* if (LISP_NO_ACTION == t->action) */ - /* { */ - /* rpaths = lisp_gpe_mk_paths_for_sub_tunnels (t); */ - - /* t->l2_path_list = fib_path_list_create (FIB_PATH_LIST_FLAG_NONE, */ - /* rpaths); */ - - /* vec_free (rpaths); */ - /* fib_path_list_lock (t->l2_path_list); */ - - /* dpo = fib_path_list_contribute_forwarding (t->l2_path_list, */ - /* FIB_FORW_CHAIN_TYPE_UNICAST_IP); */ - /* lbi = dpo->dpoi_index; */ - /* } */ - /* else if (LISP_SEND_MAP_REQUEST == t->action) */ - /* { */ - /* lbi = lgm->l2_lb_cp_lkup; */ - /* } */ - /* else */ - /* { */ - /* lbi = lgm->l2_lb_miss; */ - /* } */ - /* fib_path_list_unlock (old_path_list); */ - - /* /\* add entry to l2 lisp fib *\/ */ - /* lisp_l2_fib_add_del_entry (lgm, bd_indexp[0], gid_address_mac (&a->lcl_eid), */ - /* gid_address_mac (&a->rmt_eid), lbi, a->is_add); */ - return 0; -} - -/** - * @brief Forwarding entry create/remove dispatcher. - * - * Calls l2 or l3 forwarding entry add/del function based on input data. - * - * @param[in] a Forwarding entry parameters. - * @param[out] hw_if_indexp NOT USED - * - * @return 0 on success. - */ -int -vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, - u32 * hw_if_indexp) -{ - lisp_gpe_main_t *lgm = &lisp_gpe_main; - u8 type; - - if (vnet_lisp_gpe_enable_disable_status () == 0) - { - clib_warning ("LISP is disabled!"); - return VNET_API_ERROR_LISP_DISABLED; - } - - type = gid_address_type (&a->rmt_eid); - switch (type) - { - case GID_ADDR_IP_PREFIX: - if (a->is_add) - return add_ip_fwd_entry (lgm, a); - else - return del_ip_fwd_entry (lgm, a); - break; - case GID_ADDR_MAC: - return add_del_l2_fwd_entry (lgm, a); - default: - clib_warning ("Forwarding entries for type %d not supported!", type); - return -1; - } -} /** CLI command to add/del forwarding entry. */ static clib_error_t * @@ -533,6 +71,10 @@ lisp_gpe_add_del_fwd_entry_command_fn (vlib_main_t * vm, { vrf_set = 1; } + else if (unformat (line_input, "bd %u", &vrf)) + { + vrf_set = 1; + } else if (unformat (line_input, "negative action %U", unformat_negative_mapping_action, &action)) { @@ -627,72 +169,6 @@ VLIB_CLI_COMMAND (lisp_gpe_add_del_fwd_entry_command, static) = { }; /* *INDENT-ON* */ -static u8 * -format_lisp_fwd_path (u8 * s, va_list ap) -{ - lisp_fwd_path_t *lfp = va_arg (ap, lisp_fwd_path_t *); - - s = format (s, "pirority:%d weight:%d ", lfp->priority, lfp->weight); - s = format (s, "adj:[%U]\n", - format_lisp_gpe_adjacency, - lisp_gpe_adjacency_get (lfp->lisp_adj), - LISP_GPE_ADJ_FORMAT_FLAG_NONE); - - return (s); -} - -static u8 * -format_lisp_gpe_fwd_entry (u8 * s, va_list ap) -{ - lisp_fwd_entry_t *lfe = va_arg (ap, lisp_fwd_entry_t *); - - s = format (s, "VNI:%d VRF:%d EID: %U -> %U", - lfe->key->vni, lfe->eid_table_id, - format_fid_address, &lfe->key->lcl, - format_fid_address, &lfe->key->rmt); - if (LISP_FWD_ENTRY_TYPE_NEGATIVE == lfe->type) - { - s = format (s, "\n Negative - action:%U", - format_negative_mapping_action, lfe->action); - } - else - { - lisp_fwd_path_t *path; - - s = format (s, "\n via:"); - vec_foreach (path, lfe->paths) - { - s = format (s, "\n %U", format_lisp_fwd_path, path); - } - } - - return (s); -} - -static clib_error_t * -lisp_gpe_fwd_entry_show (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - lisp_fwd_entry_t *lfe; - -/* *INDENT-OFF* */ - pool_foreach (lfe, lisp_fwd_entry_pool, - ({ - vlib_cli_output (vm, "%U", format_lisp_gpe_fwd_entry, lfe); - })); -/* *INDENT-ON* */ - - return (NULL); -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (lisp_gpe_fwd_entry_show_command, static) = { - .path = "show lisp gpe entry", - .short_help = "show lisp gpe entry vni <vni> vrf <vrf> [leid <leid>] reid <reid>", - .function = lisp_gpe_fwd_entry_show, -}; -/* *INDENT-ON* */ - /** Check if LISP-GPE is enabled. */ u8 vnet_lisp_gpe_enable_disable_status (void) @@ -714,58 +190,12 @@ vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a) } else { - CLIB_UNUSED (uword * val); - hash_pair_t *p; - u32 *dp_tables = 0, *dp_table; - vnet_lisp_gpe_add_del_iface_args_t _ai, *ai = &_ai; - lisp_fwd_entry_t *lfe; - /* remove all entries */ - /* *INDENT-OFF* */ - pool_foreach (lfe, lisp_fwd_entry_pool, - ({ - del_ip_fwd_entry_i (lfe); - })); - /* *INDENT-ON* */ + vnet_lisp_gpe_fwd_entry_flush (); /* disable all l3 ifaces */ + lisp_gpe_tenant_flush (); - /* *INDENT-OFF* */ - hash_foreach_pair(p, lgm->l3_ifaces.hw_if_index_by_dp_table, ({ - vec_add1(dp_tables, p->key); - })); - /* *INDENT-ON* */ - - vec_foreach (dp_table, dp_tables) - { - ai->is_add = 0; - ai->table_id = dp_table[0]; - ai->is_l2 = 0; - - /* disables interface and removes defaults */ - vnet_lisp_gpe_add_del_iface (ai, 0); - } - - /* disable all l2 ifaces */ - _vec_len (dp_tables) = 0; - - /* *INDENT-OFF* */ - hash_foreach_pair(p, lgm->l2_ifaces.hw_if_index_by_dp_table, ({ - vec_add1(dp_tables, p->key); - })); - /* *INDENT-ON* */ - - vec_foreach (dp_table, dp_tables) - { - ai->is_add = 0; - ai->bd_id = dp_table[0]; - ai->is_l2 = 1; - - /* disables interface and removes defaults */ - vnet_lisp_gpe_add_del_iface (ai, 0); - } - - vec_free (dp_tables); lgm->is_en = 0; } @@ -876,11 +306,8 @@ lisp_gpe_init (vlib_main_t * vm) lgm->lm4 = &ip4_main.lookup_main; lgm->lm6 = &ip6_main.lookup_main; - lisp_gpe_fwd_entries = hash_create_mem (0, - sizeof (lisp_gpe_fwd_entry_key_t), - sizeof (uword)); - - l2_fib_init (lgm); + lgm->lisp_gpe_fwd_entries = + hash_create_mem (0, sizeof (lisp_gpe_fwd_entry_key_t), sizeof (uword)); udp_register_dst_port (vm, UDP_DST_PORT_lisp_gpe, lisp_gpe_ip4_input_node.index, 1 /* is_ip4 */ ); diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.h b/vnet/vnet/lisp-gpe/lisp_gpe.h index 66009cc1947..bb0f788b197 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe.h +++ b/vnet/vnet/lisp-gpe/lisp_gpe.h @@ -50,152 +50,6 @@ typedef CLIB_PACKED (struct { }) ip6_udp_lisp_gpe_header_t; /* *INDENT-ON* */ -/** LISP-GPE tunnel structure */ -typedef struct -{ - /** tunnel src and dst addresses */ - locator_pair_t *locator_pairs; - - /** locator-pairs with best priority become sub-tunnels */ - u32 *sub_tunnels; - - /** decap next index */ - u32 decap_next_index; - - /* TODO remove */ - ip_address_t src, dst; - - /** FIB indices */ - u32 encap_fib_index; /* tunnel partner lookup here */ - u32 decap_fib_index; /* inner IP lookup here */ - - /** index of the source address lookup FIB */ - u32 src_fib_index; - - /** vnet intfc hw/sw_if_index */ - u32 hw_if_index; - u32 sw_if_index; - - /** L2 path-list */ - fib_node_index_t l2_path_list; - - /** action for 'negative' tunnels */ - u8 action; - - /** LISP header fields in HOST byte order */ - u8 flags; - u8 ver_res; - u8 res; - u8 next_protocol; - u32 vni; -} lisp_gpe_tunnel_t; - -/** - * @brief A path on which to forward lisp traffic - */ -typedef struct lisp_fwd_path_t_ -{ - /** - * The adjacency constructed for the locator pair - */ - index_t lisp_adj; - - /** - * Priority. Only the paths with the best priority will be installed in FIB - */ - u8 priority; - - /** - * [UE]CMP weigt for the path - */ - u8 weight; - -} lisp_fwd_path_t; - -/** - * @brief A Forwarding entry can be 'normal' or 'negative' - * Negative implies we deliberately want to add a FIB entry for an EID - * that results in 'spcial' behaviour determined by an 'action'. - * @normal' means send it down some tunnels. - */ -typedef enum lisp_fwd_entry_type_t_ -{ - LISP_FWD_ENTRY_TYPE_NORMAL, - LISP_FWD_ENTRY_TYPE_NEGATIVE, -} lisp_fwd_entry_type_t; - -typedef enum -{ - NO_ACTION, - FORWARD_NATIVE, - SEND_MAP_REQUEST, - DROP -} negative_fwd_actions_e; - -/** - * LISP-GPE fwd entry key - */ -typedef struct lisp_gpe_fwd_entry_key_t_ -{ - dp_address_t rmt; - dp_address_t lcl; - u32 vni; -} lisp_gpe_fwd_entry_key_t; - -/** - * @brief A LISP Forwarding Entry - * - * A forwarding entry is from a locai EID to a remote EID over a set of rloc pairs - */ -typedef struct lisp_fwd_entry_t_ -{ - /** - * The Entry's key: {lEID,r-EID,vni} - */ - lisp_gpe_fwd_entry_key_t *key; - - /** - * The VRF (in the case of L3) or Bridge-Domain (for L2) index - */ - union - { - u32 eid_table_id; - u32 eid_bd_index; - }; - - /** - * The forwarding entry type - */ - lisp_fwd_entry_type_t type; - - union - { - /** - * @brief When the type is 'normal' - * The RLOC pair that form the route's paths. i.e. where to send - * packets for this route. - */ - lisp_fwd_path_t *paths; - - /** - * @brief When the type is negative. The action to take. - */ - negative_fwd_actions_e action; - }; - - /** - * The FIB index for the overlay, i.e. the FIB in which the EIDs - * are present - */ - u32 eid_fib_index; - - /** - * The SRC-FIB index for created for anding source-route entries - */ - u32 src_fib_index; -} lisp_fwd_entry_t; - - #define foreach_lisp_gpe_ip_input_next \ _(DROP, "error-drop") \ _(IP4_INPUT, "ip4-input") \ @@ -234,8 +88,16 @@ typedef struct tunnel_lookup /** LISP-GPE global state*/ typedef struct lisp_gpe_main { - /** pool of encap tunnel instances */ - lisp_gpe_tunnel_t *tunnels; + /** + * @brief DB of all forwarding entries. The Key is:{l-EID,r-EID,vni} + * where the EID encodes L2 or L3 + */ + uword *lisp_gpe_fwd_entries; + + /** + * @brief A Pool of all LISP forwarding entries + */ + struct lisp_gpe_fwd_entry_t_ *lisp_fwd_entry_pool; /** Free vlib hw_if_indices */ u32 *free_tunnel_hw_if_indices; @@ -255,8 +117,7 @@ typedef struct lisp_gpe_main tunnel_lookup_t l2_ifaces; /** Load-balance for a miss in the table */ - index_t l2_lb_miss; - index_t l2_lb_cp_lkup; + dpo_id_t l2_lb_cp_lkup; /** convenience */ vlib_main_t *vlib_main; @@ -283,34 +144,19 @@ extern vnet_hw_interface_class_t lisp_gpe_hw_class; u8 *format_lisp_gpe_header_with_length (u8 * s, va_list * args); -/** Arguments to add an L2/L3 LISP-GPE interface*/ -typedef struct -{ - u8 is_add; - union - { - /** vrf */ - u32 table_id; - - /** bridge domain */ - u16 bd_id; - - /** generic access */ - u32 dp_table; - }; - u8 is_l2; - - /** virtual network identifier in host byte order */ - u32 vni; -} vnet_lisp_gpe_add_del_iface_args_t; - /** Read LISP-GPE status */ u8 vnet_lisp_gpe_enable_disable_status (void); +u32 +lisp_gpe_l3_iface_find_or_create (lisp_gpe_main_t * lgm, + u32 overlay_table_id, u32 vni); + /** Add/del LISP-GPE interface. */ -int -vnet_lisp_gpe_add_del_iface (vnet_lisp_gpe_add_del_iface_args_t * a, - u32 * hw_if_indexp); +extern void lisp_gpe_del_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id); +extern u32 lisp_gpe_add_l2_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id); +extern void lisp_gpe_del_l3_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id); +extern u32 lisp_gpe_add_l3_iface (lisp_gpe_main_t * lgm, u32 vni, u32 bd_id); + typedef struct { @@ -320,6 +166,14 @@ typedef struct clib_error_t * vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a); +typedef enum +{ + NO_ACTION, + FORWARD_NATIVE, + SEND_MAP_REQUEST, + DROP +} negative_fwd_actions_e; + /** */ typedef struct { @@ -366,28 +220,6 @@ typedef struct }; } vnet_lisp_gpe_add_del_fwd_entry_args_t; -int -vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, - u32 * hw_if_indexp); - -extern void -ip_src_fib_add_route (u32 src_fib_index, - const ip_prefix_t * src_prefix, - const lisp_fwd_path_t * paths); -extern void -ip_src_dst_fib_del_route (u32 src_fib_index, - const ip_prefix_t * src_prefix, - u32 dst_table_id, const ip_prefix_t * dst_prefix); -extern void -ip_src_fib_add_route_w_dpo (u32 src_fib_index, - const ip_prefix_t * src_prefix, - const dpo_id_t * src_dpo); -extern u32 -ip_dst_fib_add_route (u32 dst_table_id, const ip_prefix_t * dst_prefix); - -extern fib_route_path_t *lisp_gpe_mk_paths_for_sub_tunnels (lisp_gpe_tunnel_t - * t); - #define foreach_lgpe_ip4_lookup_next \ _(DROP, "error-drop") \ _(LISP_CP_LOOKUP, "lisp-cp-lookup") @@ -414,13 +246,6 @@ typedef enum lgpe_ip6_lookup_next u8 *format_vnet_lisp_gpe_status (u8 * s, va_list * args); -#define L2_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024) -#define L2_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20) - -u32 -lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[8], - u8 dst_mac[8]); - #endif /* included_vnet_lisp_gpe_h */ /* diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c index 861f0dd38c0..d042f116dea 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c +++ b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c @@ -112,7 +112,7 @@ lisp_gpe_adj_get_fib_chain_type (const lisp_gpe_adjacency_t * ladj) static void lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj) { - const lisp_gpe_tunnel_2_t *lgt; + const lisp_gpe_tunnel_t *lgt; dpo_id_t tmp = DPO_NULL; fib_link_t linkt; @@ -121,8 +121,10 @@ lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj) lisp_gpe_adj_get_fib_chain_type (ladj), &tmp); - FOR_EACH_FIB_IP_LINK (linkt) + FOR_EACH_FIB_LINK (linkt) { + if (FIB_LINK_MPLS == linkt) + continue; adj_nbr_midchain_stack (ladj->adjs[linkt], &tmp); } dpo_reset (&tmp); @@ -134,20 +136,32 @@ lisp_gpe_adj_proto_from_fib_link_type (fib_link_t linkt) switch (linkt) { case FIB_LINK_IP4: - return (LISP_GPE_INPUT_NEXT_IP4_INPUT); + return (LISP_GPE_NEXT_PROTO_IP4); case FIB_LINK_IP6: - return (LISP_GPE_INPUT_NEXT_IP6_INPUT); + return (LISP_GPE_NEXT_PROTO_IP6); + case FIB_LINK_ETHERNET: + return (LISP_GPE_NEXT_PROTO_ETHERNET); default: ASSERT (0); } - return (LISP_GPE_INPUT_NEXT_DROP); + return (LISP_GPE_NEXT_PROTO_IP4); +} + +#define is_v4_packet(_h) ((*(u8*) _h) & 0xF0) == 0x40 + +static void +lisp_gpe_fixup (vlib_main_t * vm, ip_adjacency_t * adj, vlib_buffer_t * b) +{ + /* Fixup the checksum and len fields in the LISP tunnel encap + * that was applied at the midchain node */ + ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b))); } index_t lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair, u32 overlay_table_id, u32 vni) { - const lisp_gpe_tunnel_2_t *lgt; + const lisp_gpe_tunnel_t *lgt; lisp_gpe_adjacency_t *ladj; index_t lai, l3si; @@ -210,8 +224,11 @@ lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair, /* * construct and stack the FIB midchain adjacencies */ - FOR_EACH_FIB_IP_LINK (linkt) + FOR_EACH_FIB_LINK (linkt) { + if (FIB_LINK_MPLS == linkt) + continue; + ladj->adjs[linkt] = adj_nbr_add_or_lock (nh.fp_proto, linkt, &nh.fp_addr, @@ -223,10 +240,10 @@ lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair, (linkt)); adj_nbr_midchain_update_rewrite (ladj->adjs[linkt], - vnet_get_sup_hw_interface - (vnet_get_main (), - ladj->sw_if_index)->tx_node_index, - rewrite); + lisp_gpe_fixup, + (FIB_LINK_ETHERNET == linkt ? + ADJ_MIDCHAIN_FLAG_NO_COUNT : + ADJ_MIDCHAIN_FLAG_NONE), rewrite); vec_free (rewrite); } @@ -358,8 +375,9 @@ format_lisp_gpe_adjacency (u8 * s, va_list * args) s = format (s, " %U\n", format_lisp_gpe_tunnel, lisp_gpe_tunnel_get (ladj->tunnel_index)); - s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d\n", - ladj->adjs[FIB_LINK_IP4], ladj->adjs[FIB_LINK_IP6]); + s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d L2:%d\n", + ladj->adjs[FIB_LINK_IP4], + ladj->adjs[FIB_LINK_IP6], ladj->adjs[FIB_LINK_ETHERNET]); } else { diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c new file mode 100644 index 00000000000..80710cdd62d --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -0,0 +1,1043 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h> +#include <vnet/lisp-gpe/lisp_gpe_adjacency.h> +#include <vnet/lisp-gpe/lisp_gpe_tenant.h> +#include <vnet/lisp-cp/lisp_cp_dpo.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_path_list.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/adj/adj_midchain.h> + +/** + * @brief Add route to IP4 or IP6 Destination FIB. + * + * Add a route to the destination FIB that results in the lookup + * in the SRC FIB. The SRC FIB is created is it does not yet exist. + * + * @param[in] dst_table_id Destination FIB Table-ID + * @param[in] dst_prefix Destination IP prefix. + * + * @return src_fib_index The index/ID of the SRC FIB created. + */ +static u32 +ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix) +{ + fib_node_index_t src_fib_index; + fib_prefix_t dst_fib_prefix; + fib_node_index_t dst_fei; + + ASSERT (NULL != dst_prefix); + + ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix); + + /* + * lookup the destination prefix in the VRF table and retrieve the + * LISP associated data + */ + dst_fei = fib_table_lookup_exact_match (dst_fib_index, &dst_fib_prefix); + + /* + * If the FIB entry is not present, or not LISP sourced, add it + */ + if (dst_fei == FIB_NODE_INDEX_INVALID || + NULL == fib_entry_get_source_data (dst_fei, FIB_SOURCE_LISP)) + { + dpo_id_t src_lkup_dpo = DPO_NULL; + + /* create a new src FIB. */ + src_fib_index = + fib_table_create_and_lock (dst_fib_prefix.fp_proto, + "LISP-src for [%d,%U]", + dst_fib_index, + format_fib_prefix, &dst_fib_prefix); + + /* + * create a data-path object to perform the source address lookup + * in the SRC FIB + */ + lookup_dpo_add_or_lock_w_fib_index (src_fib_index, + (ip_prefix_version (dst_prefix) == + IP6 ? DPO_PROTO_IP6 : + DPO_PROTO_IP4), + LOOKUP_INPUT_SRC_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + &src_lkup_dpo); + + /* + * add the entry to the destination FIB that uses the lookup DPO + */ + dst_fei = fib_table_entry_special_dpo_add (dst_fib_index, + &dst_fib_prefix, + FIB_SOURCE_LISP, + FIB_ENTRY_FLAG_EXCLUSIVE, + &src_lkup_dpo); + + /* + * the DPO is locked by the FIB entry, and we have no further + * need for it. + */ + dpo_unlock (&src_lkup_dpo); + + /* + * save the SRC FIB index on the entry so we can retrieve it for + * subsequent routes. + */ + fib_entry_set_source_data (dst_fei, FIB_SOURCE_LISP, &src_fib_index); + } + else + { + /* + * destination FIB entry already present + */ + src_fib_index = *(u32 *) fib_entry_get_source_data (dst_fei, + FIB_SOURCE_LISP); + } + + return (src_fib_index); +} + +/** + * @brief Del route to IP4 or IP6 SD FIB. + * + * Remove routes from both destination and source FIBs. + * + * @param[in] src_fib_index The index/ID of the SRC FIB + * @param[in] src_prefix Source IP prefix. + * @param[in] dst_fib_index The index/ID of the DST FIB + * @param[in] dst_prefix Destination IP prefix. + */ +static void +ip_src_dst_fib_del_route (u32 src_fib_index, + const ip_prefix_t * src_prefix, + u32 dst_fib_index, const ip_prefix_t * dst_prefix) +{ + fib_prefix_t dst_fib_prefix, src_fib_prefix; + + ASSERT (NULL != dst_prefix); + ASSERT (NULL != src_prefix); + + ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix); + ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix); + + fib_table_entry_delete (src_fib_index, &src_fib_prefix, FIB_SOURCE_LISP); + + if (0 == fib_table_get_num_entries (src_fib_index, + src_fib_prefix.fp_proto, + FIB_SOURCE_LISP)) + { + /* + * there's nothing left, unlock the source FIB and the + * destination route + */ + fib_table_entry_special_remove (dst_fib_index, + &dst_fib_prefix, FIB_SOURCE_LISP); + fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto); + } +} + +/** + * @brief Add route to IP4 or IP6 SRC FIB. + * + * Adds a route to in the LISP SRC FIB with the result of the route + * being the DPO passed. + * + * @param[in] src_fib_index The index/ID of the SRC FIB + * @param[in] src_prefix Source IP prefix. + * @param[in] src_dpo The DPO the route will link to. + */ +static void +ip_src_fib_add_route_w_dpo (u32 src_fib_index, + const ip_prefix_t * src_prefix, + const dpo_id_t * src_dpo) +{ + fib_prefix_t src_fib_prefix; + + ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix); + + /* + * add the entry into the source fib. + */ + fib_node_index_t src_fei; + + src_fei = fib_table_lookup_exact_match (src_fib_index, &src_fib_prefix); + + if (FIB_NODE_INDEX_INVALID == src_fei || + !fib_entry_is_sourced (src_fei, FIB_SOURCE_LISP)) + { + fib_table_entry_special_dpo_add (src_fib_index, + &src_fib_prefix, + FIB_SOURCE_LISP, + FIB_ENTRY_FLAG_EXCLUSIVE, src_dpo); + } +} + +static void +ip_address_to_46 (const ip_address_t * addr, + ip46_address_t * a, fib_protocol_t * proto) +{ + *proto = (IP4 == ip_addr_version (addr) ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); + switch (*proto) + { + case FIB_PROTOCOL_IP4: + a->ip4 = addr->ip.v4; + break; + case FIB_PROTOCOL_IP6: + a->ip6 = addr->ip.v6; + break; + default: + ASSERT (0); + break; + } +} + +static fib_route_path_t * +lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths) +{ + const lisp_gpe_adjacency_t *ladj; + fib_route_path_t *rpaths = NULL; + u8 best_priority; + u32 ii; + + vec_validate (rpaths, vec_len (paths) - 1); + + best_priority = paths[0].priority; + + vec_foreach_index (ii, paths) + { + if (paths[0].priority != best_priority) + break; + + ladj = lisp_gpe_adjacency_get (paths[ii].lisp_adj); + + ip_address_to_46 (&ladj->remote_rloc, + &rpaths[ii].frp_addr, &rpaths[ii].frp_proto); + + rpaths[ii].frp_sw_if_index = ladj->sw_if_index; + rpaths[ii].frp_weight = (paths[ii].weight ? paths[ii].weight : 1); + rpaths[ii].frp_label = MPLS_LABEL_INVALID; + } + + ASSERT (0 != vec_len (rpaths)); + + return (rpaths); +} + +/** + * @brief Add route to IP4 or IP6 SRC FIB. + * + * Adds a route to in the LISP SRC FIB for the tunnel. + * + * @param[in] src_fib_index The index/ID of the SRC FIB + * @param[in] src_prefix Source IP prefix. + * @param[in] paths The paths from which to construct the + * load balance + */ +static void +ip_src_fib_add_route (u32 src_fib_index, + const ip_prefix_t * src_prefix, + const lisp_fwd_path_t * paths) +{ + fib_prefix_t src_fib_prefix; + fib_route_path_t *rpaths; + + ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix); + + rpaths = lisp_gpe_mk_fib_paths (paths); + + fib_table_entry_update (src_fib_index, + &src_fib_prefix, + FIB_SOURCE_LISP, FIB_ENTRY_FLAG_NONE, rpaths); + vec_free (rpaths); +} + + +static void +create_fib_entries (lisp_gpe_fwd_entry_t * lfe) +{ + dpo_proto_t dproto; + + dproto = (ip_prefix_version (&lfe->key->rmt.ippref) == IP4 ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); + + lfe->src_fib_index = ip_dst_fib_add_route (lfe->eid_fib_index, + &lfe->key->rmt.ippref); + + if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE == lfe->type) + { + dpo_id_t dpo = DPO_NULL; + + switch (lfe->action) + { + case LISP_NO_ACTION: + /* TODO update timers? */ + case LISP_FORWARD_NATIVE: + /* TODO check if route/next-hop for eid exists in fib and add + * more specific for the eid with the next-hop found */ + case LISP_SEND_MAP_REQUEST: + /* insert tunnel that always sends map-request */ + dpo_copy (&dpo, lisp_cp_dpo_get (dproto)); + break; + case LISP_DROP: + /* for drop fwd entries, just add route, no need to add encap tunnel */ + dpo_copy (&dpo, drop_dpo_get (dproto)); + break; + } + ip_src_fib_add_route_w_dpo (lfe->src_fib_index, + &lfe->key->lcl.ippref, &dpo); + dpo_reset (&dpo); + } + else + { + ip_src_fib_add_route (lfe->src_fib_index, + &lfe->key->lcl.ippref, lfe->paths); + } +} + +static void +delete_fib_entries (lisp_gpe_fwd_entry_t * lfe) +{ + ip_src_dst_fib_del_route (lfe->src_fib_index, + &lfe->key->lcl.ippref, + lfe->eid_fib_index, &lfe->key->rmt.ippref); +} + +static void +gid_to_dp_address (gid_address_t * g, dp_address_t * d) +{ + switch (gid_address_type (g)) + { + case GID_ADDR_IP_PREFIX: + case GID_ADDR_SRC_DST: + ip_prefix_copy (&d->ippref, &gid_address_ippref (g)); + d->type = FID_ADDR_IP_PREF; + break; + case GID_ADDR_MAC: + default: + mac_copy (&d->mac, &gid_address_mac (g)); + d->type = FID_ADDR_MAC; + break; + } +} + +static lisp_gpe_fwd_entry_t * +find_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a, + lisp_gpe_fwd_entry_key_t * key) +{ + uword *p; + + memset (key, 0, sizeof (*key)); + + if (GID_ADDR_IP_PREFIX == gid_address_type (&a->rmt_eid)) + { + /* + * the ip version of the source is not set to ip6 when the + * source is all zeros. force it. + */ + ip_prefix_version (&gid_address_ippref (&a->lcl_eid)) = + ip_prefix_version (&gid_address_ippref (&a->rmt_eid)); + } + + gid_to_dp_address (&a->rmt_eid, &key->rmt); + gid_to_dp_address (&a->lcl_eid, &key->lcl); + key->vni = a->vni; + + p = hash_get_mem (lgm->lisp_gpe_fwd_entries, key); + + if (NULL != p) + { + return (pool_elt_at_index (lgm->lisp_fwd_entry_pool, p[0])); + } + return (NULL); +} + +static int +lisp_gpe_fwd_entry_path_sort (void *a1, void *a2) +{ + lisp_fwd_path_t *p1 = a1, *p2 = a2; + + return (p1->priority - p2->priority); +} + +static void +lisp_gpe_fwd_entry_mk_paths (lisp_gpe_fwd_entry_t * lfe, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a) +{ + const lisp_gpe_tenant_t *lt; + lisp_fwd_path_t *path; + u32 index; + + lt = lisp_gpe_tenant_get (lfe->tenant); + vec_validate (lfe->paths, vec_len (a->locator_pairs) - 1); + + vec_foreach_index (index, a->locator_pairs) + { + path = &lfe->paths[index]; + + path->priority = a->locator_pairs[index].priority; + path->weight = a->locator_pairs[index].weight; + + path->lisp_adj = + lisp_gpe_adjacency_find_or_create_and_lock (&a->locator_pairs + [index], + lt->lt_table_id, + lfe->key->vni); + } + vec_sort_with_function (lfe->paths, lisp_gpe_fwd_entry_path_sort); +} + +/** + * @brief Add/Delete LISP IP forwarding entry. + * + * creation of forwarding entries for IP LISP overlay: + * + * @param[in] lgm Reference to @ref lisp_gpe_main_t. + * @param[in] a Parameters for building the forwarding entry. + * + * @return 0 on success. + */ +static int +add_ip_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a) +{ + lisp_gpe_fwd_entry_key_t key; + lisp_gpe_fwd_entry_t *lfe; + fib_protocol_t fproto; + + lfe = find_fwd_entry (lgm, a, &key); + + if (NULL != lfe) + /* don't support updates */ + return VNET_API_ERROR_INVALID_VALUE; + + pool_get (lgm->lisp_fwd_entry_pool, lfe); + memset (lfe, 0, sizeof (*lfe)); + lfe->key = clib_mem_alloc (sizeof (key)); + memcpy (lfe->key, &key, sizeof (key)); + + hash_set_mem (lgm->lisp_gpe_fwd_entries, lfe->key, + lfe - lgm->lisp_fwd_entry_pool); + + fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); + + lfe->type = (a->is_negative ? + LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE : + LISP_GPE_FWD_ENTRY_TYPE_NORMAL); + lfe->tenant = lisp_gpe_tenant_find_or_create (lfe->key->vni); + lfe->eid_table_id = a->table_id; + lfe->eid_fib_index = fib_table_find_or_create_and_lock (fproto, + lfe->eid_table_id); + + if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type) + { + lisp_gpe_fwd_entry_mk_paths (lfe, a); + } + + create_fib_entries (lfe); + + return (0); +} + +static void +del_ip_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe) +{ + lisp_fwd_path_t *path; + fib_protocol_t fproto; + + vec_foreach (path, lfe->paths) + { + lisp_gpe_adjacency_unlock (path->lisp_adj); + } + + delete_fib_entries (lfe); + + fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); + fib_table_unlock (lfe->eid_fib_index, fproto); + + hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key); + clib_mem_free (lfe->key); + pool_put (lgm->lisp_fwd_entry_pool, lfe); +} + +/** + * @brief Add/Delete LISP IP forwarding entry. + * + * removal of forwarding entries for IP LISP overlay: + * + * @param[in] lgm Reference to @ref lisp_gpe_main_t. + * @param[in] a Parameters for building the forwarding entry. + * + * @return 0 on success. + */ +static int +del_ip_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a) +{ + lisp_gpe_fwd_entry_key_t key; + lisp_gpe_fwd_entry_t *lfe; + + lfe = find_fwd_entry (lgm, a, &key); + + if (NULL == lfe) + /* no such entry */ + return VNET_API_ERROR_INVALID_VALUE; + + del_ip_fwd_entry_i (lgm, lfe); + + return (0); +} + +static void +make_mac_fib_key (BVT (clib_bihash_kv) * kv, u16 bd_index, u8 src_mac[6], + u8 dst_mac[6]) +{ + kv->key[0] = (((u64) bd_index) << 48) | mac_to_u64 (dst_mac); + kv->key[1] = mac_to_u64 (src_mac); + kv->key[2] = 0; +} + +/** + * @brief Lookup L2 SD FIB entry + * + * Does a vni + dest + source lookup in the L2 LISP FIB. If the lookup fails + * it tries a second time with source set to 0 (i.e., a simple dest lookup). + * + * @param[in] lgm Reference to @ref lisp_gpe_main_t. + * @param[in] bd_index Bridge domain index. + * @param[in] src_mac Source mac address. + * @param[in] dst_mac Destination mac address. + * + * @return index of mapping matching the lookup key. + */ +index_t +lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6], + u8 dst_mac[6]) +{ + int rv; + BVT (clib_bihash_kv) kv, value; + + make_mac_fib_key (&kv, bd_index, src_mac, dst_mac); + rv = BV (clib_bihash_search_inline_2) (&lgm->l2_fib, &kv, &value); + + /* no match, try with src 0, catch all for dst */ + if (rv != 0) + { + kv.key[1] = 0; + rv = BV (clib_bihash_search_inline_2) (&lgm->l2_fib, &kv, &value); + if (rv == 0) + return value.value; + } + + return lisp_gpe_main.l2_lb_cp_lkup.dpoi_index; +} + +/** + * @brief Add/del L2 SD FIB entry + * + * Inserts value in L2 FIB keyed by vni + dest + source. If entry is + * overwritten the associated value is returned. + * + * @param[in] lgm Reference to @ref lisp_gpe_main_t. + * @param[in] bd_index Bridge domain index. + * @param[in] src_mac Source mac address. + * @param[in] dst_mac Destination mac address. + * @param[in] val Value to add. + * @param[in] is_add Add/del flag. + * + * @return ~0 or value of overwritten entry. + */ +static u32 +lisp_l2_fib_add_del_entry (u16 bd_index, u8 src_mac[6], + u8 dst_mac[6], const dpo_id_t * dpo, u8 is_add) +{ + lisp_gpe_main_t *lgm = &lisp_gpe_main; + BVT (clib_bihash_kv) kv, value; + u32 old_val = ~0; + + make_mac_fib_key (&kv, bd_index, src_mac, dst_mac); + + if (BV (clib_bihash_search) (&lgm->l2_fib, &kv, &value) == 0) + old_val = value.value; + + if (!is_add) + BV (clib_bihash_add_del) (&lgm->l2_fib, &kv, 0 /* is_add */ ); + else + { + kv.value = dpo->dpoi_index; + BV (clib_bihash_add_del) (&lgm->l2_fib, &kv, 1 /* is_add */ ); + } + return old_val; +} + +#define L2_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024) +#define L2_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20) + +static void +l2_fib_init (lisp_gpe_main_t * lgm) +{ + index_t lbi; + + BV (clib_bihash_init) (&lgm->l2_fib, "l2 fib", + 1 << max_log2 (L2_FIB_DEFAULT_HASH_NUM_BUCKETS), + L2_FIB_DEFAULT_HASH_MEMORY_SIZE); + + /* + * the result from a 'miss' in a L2 Table + */ + lbi = load_balance_create (1, DPO_PROTO_ETHERNET, 0); + load_balance_set_bucket (lbi, 0, lisp_cp_dpo_get (DPO_PROTO_ETHERNET)); + + dpo_set (&lgm->l2_lb_cp_lkup, DPO_LOAD_BALANCE, DPO_PROTO_ETHERNET, lbi); +} + +static void +del_l2_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe) +{ + lisp_fwd_path_t *path; + + vec_foreach (path, lfe->paths) + { + lisp_gpe_adjacency_unlock (path->lisp_adj); + } + + lisp_l2_fib_add_del_entry (lfe->l2.eid_bd_index, + fid_addr_mac (&lfe->key->lcl), + fid_addr_mac (&lfe->key->rmt), NULL, 0); + + fib_path_list_child_remove (lfe->l2.path_list_index, lfe->l2.child_index); + + hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key); + clib_mem_free (lfe->key); + pool_put (lgm->lisp_fwd_entry_pool, lfe); +} + +/** + * @brief Delete LISP L2 forwarding entry. + * + * Coordinates the removal of forwarding entries for L2 LISP overlay: + * + * @param[in] lgm Reference to @ref lisp_gpe_main_t. + * @param[in] a Parameters for building the forwarding entry. + * + * @return 0 on success. + */ +static int +del_l2_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a) +{ + lisp_gpe_fwd_entry_key_t key; + lisp_gpe_fwd_entry_t *lfe; + + lfe = find_fwd_entry (lgm, a, &key); + + if (NULL != lfe) + return VNET_API_ERROR_INVALID_VALUE; + + del_l2_fwd_entry_i (lgm, lfe); + + return (0); +} + +/** + * @brief Construct and insert the forwarding information used by a L2 entry + */ +static void +lisp_gpe_l2_update_fwding (lisp_gpe_fwd_entry_t * lfe) +{ + lisp_gpe_main_t *lgm = &lisp_gpe_main; + dpo_id_t dpo = DPO_NULL; + + if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type) + { + fib_path_list_contribute_forwarding (lfe->l2.path_list_index, + FIB_FORW_CHAIN_TYPE_ETHERNET, + &lfe->l2.dpo); + dpo_copy (&dpo, &lfe->l2.dpo); + } + else + { + dpo_copy (&dpo, &lgm->l2_lb_cp_lkup); + } + + /* add entry to l2 lisp fib */ + lisp_l2_fib_add_del_entry (lfe->l2.eid_bd_index, + fid_addr_mac (&lfe->key->lcl), + fid_addr_mac (&lfe->key->rmt), &dpo, 1); + + dpo_reset (&dpo); +} + +/** + * @brief Add LISP L2 forwarding entry. + * + * Coordinates the creation of forwarding entries for L2 LISP overlay: + * creates lisp-gpe tunnel and injects new entry in Source/Dest L2 FIB. + * + * @param[in] lgm Reference to @ref lisp_gpe_main_t. + * @param[in] a Parameters for building the forwarding entry. + * + * @return 0 on success. + */ +static int +add_l2_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a) +{ + lisp_gpe_fwd_entry_key_t key; + bd_main_t *bdm = &bd_main; + lisp_gpe_fwd_entry_t *lfe; + uword *bd_indexp; + + bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id); + if (!bd_indexp) + { + clib_warning ("bridge domain %d doesn't exist", a->bd_id); + return -1; + } + + lfe = find_fwd_entry (lgm, a, &key); + + if (NULL != lfe) + /* don't support updates */ + return VNET_API_ERROR_INVALID_VALUE; + + pool_get (lgm->lisp_fwd_entry_pool, lfe); + memset (lfe, 0, sizeof (*lfe)); + lfe->key = clib_mem_alloc (sizeof (key)); + memcpy (lfe->key, &key, sizeof (key)); + + hash_set_mem (lgm->lisp_gpe_fwd_entries, lfe->key, + lfe - lgm->lisp_fwd_entry_pool); + + lfe->type = (a->is_negative ? + LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE : + LISP_GPE_FWD_ENTRY_TYPE_NORMAL); + lfe->l2.eid_bd_id = a->bd_id; + lfe->l2.eid_bd_index = bd_indexp[0]; + lfe->tenant = lisp_gpe_tenant_find_or_create (lfe->key->vni); + + if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type) + { + fib_route_path_t *rpaths; + + /* + * Make the sorted array of LISP paths with their resp. adjacency + */ + lisp_gpe_fwd_entry_mk_paths (lfe, a); + + /* + * From the LISP paths, construct a FIB path list that will + * contribute a load-balance. + */ + rpaths = lisp_gpe_mk_fib_paths (lfe->paths); + + lfe->l2.path_list_index = + fib_path_list_create (FIB_PATH_LIST_FLAG_NONE, rpaths); + + /* + * become a child of the path-list so we receive updates when + * its forwarding state changes. this includes an implicit lock. + */ + lfe->l2.child_index = + fib_path_list_child_add (lfe->l2.path_list_index, + FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY, + lfe - lgm->lisp_fwd_entry_pool); + } + else + { + lfe->action = a->action; + } + + lisp_gpe_l2_update_fwding (lfe); + + return 0; +} + +/** + * @brief conver from the embedded fib_node_t struct to the LSIP entry + */ +static lisp_gpe_fwd_entry_t * +lisp_gpe_fwd_entry_from_fib_node (fib_node_t * node) +{ + return ((lisp_gpe_fwd_entry_t *) (((char *) node) - + STRUCT_OFFSET_OF (lisp_gpe_fwd_entry_t, + node))); +} + +/** + * @brief Function invoked during a backwalk of the FIB graph + */ +static fib_node_back_walk_rc_t +lisp_gpe_fib_node_back_walk (fib_node_t * node, + fib_node_back_walk_ctx_t * ctx) +{ + lisp_gpe_l2_update_fwding (lisp_gpe_fwd_entry_from_fib_node (node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * @brief Get a fib_node_t struct from the index of a LISP fwd entry + */ +static fib_node_t * +lisp_gpe_fwd_entry_get_fib_node (fib_node_index_t index) +{ + lisp_gpe_main_t *lgm = &lisp_gpe_main; + lisp_gpe_fwd_entry_t *lfe; + + lfe = pool_elt_at_index (lgm->lisp_fwd_entry_pool, index); + + return (&(lfe->node)); +} + +/** + * @brief An indication from the graph that the last lock has gone + */ +static void +lisp_gpe_fwd_entry_fib_node_last_lock_gone (fib_node_t * node) +{ + /* We don't manage the locks of the LISP objects via the graph, since + * this object has no children. so this is a no-op. */ +} + +/** + * @brief Virtual function table to register with FIB for the LISP type + */ +const static fib_node_vft_t lisp_fwd_vft = { + .fnv_get = lisp_gpe_fwd_entry_get_fib_node, + .fnv_last_lock = lisp_gpe_fwd_entry_fib_node_last_lock_gone, + .fnv_back_walk = lisp_gpe_fib_node_back_walk, +}; + +/** + * @brief Forwarding entry create/remove dispatcher. + * + * Calls l2 or l3 forwarding entry add/del function based on input data. + * + * @param[in] a Forwarding entry parameters. + * @param[out] hw_if_indexp NOT USED + * + * @return 0 on success. + */ +int +vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, + u32 * hw_if_indexp) +{ + lisp_gpe_main_t *lgm = &lisp_gpe_main; + u8 type; + + if (vnet_lisp_gpe_enable_disable_status () == 0) + { + clib_warning ("LISP is disabled!"); + return VNET_API_ERROR_LISP_DISABLED; + } + + type = gid_address_type (&a->rmt_eid); + switch (type) + { + case GID_ADDR_IP_PREFIX: + if (a->is_add) + return add_ip_fwd_entry (lgm, a); + else + return del_ip_fwd_entry (lgm, a); + break; + case GID_ADDR_MAC: + if (a->is_add) + return add_l2_fwd_entry (lgm, a); + else + return del_l2_fwd_entry (lgm, a); + default: + clib_warning ("Forwarding entries for type %d not supported!", type); + return -1; + } +} + +/** + * @brief Flush all the forwrding entries + */ +void +vnet_lisp_gpe_fwd_entry_flush (void) +{ + lisp_gpe_main_t *lgm = &lisp_gpe_main; + lisp_gpe_fwd_entry_t *lfe; + + /* *INDENT-OFF* */ + pool_foreach (lfe, lgm->lisp_fwd_entry_pool, + ({ + switch (fid_addr_type(&lfe->key->rmt)) + { + case FID_ADDR_MAC: + del_l2_fwd_entry_i (lgm, lfe); + break; + case FID_ADDR_IP_PREF: + del_ip_fwd_entry_i (lgm, lfe); + break; + } + })); + /* *INDENT-ON* */ +} + +static u8 * +format_lisp_fwd_path (u8 * s, va_list ap) +{ + lisp_fwd_path_t *lfp = va_arg (ap, lisp_fwd_path_t *); + + s = format (s, "pirority:%d weight:%d ", lfp->priority, lfp->weight); + s = format (s, "adj:[%U]\n", + format_lisp_gpe_adjacency, + lisp_gpe_adjacency_get (lfp->lisp_adj), + LISP_GPE_ADJ_FORMAT_FLAG_NONE); + + return (s); +} + +typedef enum lisp_gpe_fwd_entry_format_flag_t_ +{ + LISP_GPE_FWD_ENTRY_FORMAT_NONE = (0 << 0), + LISP_GPE_FWD_ENTRY_FORMAT_DETAIL = (1 << 1), +} lisp_gpe_fwd_entry_format_flag_t; + + +static u8 * +format_lisp_gpe_fwd_entry (u8 * s, va_list ap) +{ + lisp_gpe_main_t *lgm = &lisp_gpe_main; + lisp_gpe_fwd_entry_t *lfe = va_arg (ap, lisp_gpe_fwd_entry_t *); + lisp_gpe_fwd_entry_format_flag_t flags = + va_arg (ap, lisp_gpe_fwd_entry_format_flag_t); + + s = format (s, "VNI:%d VRF:%d EID: %U -> %U [index:%d]", + lfe->key->vni, lfe->eid_table_id, + format_fid_address, &lfe->key->lcl, + format_fid_address, &lfe->key->rmt, + lfe - lgm->lisp_fwd_entry_pool); + + if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE == lfe->type) + { + s = format (s, "\n Negative - action:%U", + format_negative_mapping_action, lfe->action); + } + else + { + lisp_fwd_path_t *path; + + s = format (s, "\n via:"); + vec_foreach (path, lfe->paths) + { + s = format (s, "\n %U", format_lisp_fwd_path, path); + } + } + + if (flags & LISP_GPE_FWD_ENTRY_FORMAT_DETAIL) + { + switch (fid_addr_type (&lfe->key->rmt)) + { + case FID_ADDR_MAC: + s = format (s, " fib-path-list:%d\n", lfe->l2.path_list_index); + s = format (s, " dpo:%U\n", format_dpo_id, &lfe->l2.dpo, 0); + break; + case FID_ADDR_IP_PREF: + break; + } + } + + return (s); +} + +static clib_error_t * +lisp_gpe_fwd_entry_show (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + lisp_gpe_main_t *lgm = &lisp_gpe_main; + lisp_gpe_fwd_entry_t *lfe; + index_t index; + u32 vni = ~0; + + if (unformat (input, "vni %d", &vni)) + ; + else if (unformat (input, "%d", &index)) + { + if (!pool_is_free_index (lgm->lisp_fwd_entry_pool, index)) + { + lfe = pool_elt_at_index (lgm->lisp_fwd_entry_pool, index); + + vlib_cli_output (vm, "[%d@] %U", + index, + format_lisp_gpe_fwd_entry, lfe, + LISP_GPE_FWD_ENTRY_FORMAT_DETAIL); + } + else + { + vlib_cli_output (vm, "entry %d invalid", index); + } + + return (NULL); + } + + /* *INDENT-OFF* */ + pool_foreach (lfe, lgm->lisp_fwd_entry_pool, + ({ + if ((vni == ~0) || + (lfe->key->vni == vni)) + vlib_cli_output (vm, "%U", format_lisp_gpe_fwd_entry, lfe, + LISP_GPE_FWD_ENTRY_FORMAT_NONE); + })); + /* *INDENT-ON* */ + + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (lisp_gpe_fwd_entry_show_command, static) = { + .path = "show lisp gpe entry", + .short_help = "show lisp gpe entry vni <vni> vrf <vrf> [leid <leid>] reid <reid>", + .function = lisp_gpe_fwd_entry_show, +}; +/* *INDENT-ON* */ + +clib_error_t * +lisp_gpe_fwd_entry_init (vlib_main_t * vm) +{ + lisp_gpe_main_t *lgm = &lisp_gpe_main; + clib_error_t *error = NULL; + + if ((error = vlib_call_init_function (vm, lisp_cp_dpo_module_init))) + return (error); + + l2_fib_init (lgm); + + fib_node_register_type (FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY, &lisp_fwd_vft); + + return (error); +} + +VLIB_INIT_FUNCTION (lisp_gpe_fwd_entry_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.h b/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.h new file mode 100644 index 00000000000..f79236711ea --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief LISP-GPE definitions. + */ + +#ifndef __LISP_GPE_FWD_ENTRY_H__ +#define __LISP_GPE_FWD_ENTRY_H__ + +#include <vnet/lisp-gpe/lisp_gpe.h> + +/** + * @brief A path on which to forward lisp traffic + */ +typedef struct lisp_fwd_path_t_ +{ + /** + * The adjacency constructed for the locator pair + */ + index_t lisp_adj; + + /** + * Priority. Only the paths with the best priority will be installed in FIB + */ + u8 priority; + + /** + * [UE]CMP weigt for the path + */ + u8 weight; + +} lisp_fwd_path_t; + +/** + * @brief A Forwarding entry can be 'normal' or 'negative' + * Negative implies we deliberately want to add a FIB entry for an EID + * that results in 'special' behaviour determined by an 'action'. + * @normal means send it down some tunnels. + */ +typedef enum lisp_gpe_fwd_entry_type_t_ +{ + LISP_GPE_FWD_ENTRY_TYPE_NORMAL, + LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE, +} lisp_gpe_fwd_entry_type_t; + + +/** + * LISP-GPE fwd entry key + */ +typedef struct lisp_gpe_fwd_entry_key_t_ +{ + dp_address_t rmt; + dp_address_t lcl; + u32 vni; +} lisp_gpe_fwd_entry_key_t; + +/** + * @brief A LISP Forwarding Entry + * + * A forwarding entry is from a locai EID to a remote EID over a set of rloc pairs + */ +typedef struct lisp_gpe_fwd_entry_t_ +{ + /** + * This object joins the FIB control plane graph to receive updates to + * for changes to the graph. + */ + fib_node_t node; + + /** + * The Entry's key: {lEID,r-EID,vni} + */ + lisp_gpe_fwd_entry_key_t *key; + + /** + * The forwarding entry type + */ + lisp_gpe_fwd_entry_type_t type; + + /** + * The tenant the entry belongs to + */ + u32 tenant; + + /** + * The VRF (in the case of L3) or Bridge-Domain (for L2) index + */ + union + { + /** + * Fields relevant to an L2 entry + */ + struct + { + /** + * The VRF ID + */ + u32 eid_table_id; + + /** + * The FIB index for the overlay, i.e. the FIB in which the EIDs + * are present + */ + u32 eid_fib_index; + /** + * The SRC-FIB index for created for anding source-route entries + */ + u32 src_fib_index; + }; + /** + * Fields relevant to an L2 entry + */ + struct + { + /** + * The Bridge-Domain (for L2) index + */ + u32 eid_bd_id; + + /** + * The Bridge-domain index for the overlay EIDs + */ + u32 eid_bd_index; + + /** + * The path-list created for the forwarding + */ + fib_node_index_t path_list_index; + + /** + * Child index of this entry on the path-list + */ + u32 child_index; + + /** + * The DPO used to forward + */ + dpo_id_t dpo; + } l2; + }; + + union + { + /** + * @brief When the type is 'normal' + * The RLOC pair that form the route's paths. i.e. where to send + * packets for this route. + */ + lisp_fwd_path_t *paths; + + /** + * @brief When the type is negative. The action to take. + */ + negative_fwd_actions_e action; + }; +} lisp_gpe_fwd_entry_t; + +extern int +vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, + u32 * hw_if_indexp); + +extern void vnet_lisp_gpe_fwd_entry_flush (void); + +extern u32 lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, + u16 bd_index, u8 src_mac[8], u8 dst_mac[8]); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c index 220802b17c7..b7802ff2b47 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c +++ b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c @@ -17,6 +17,7 @@ * @brief LISP sub-interfaces. * */ +#include <vnet/lisp-gpe/lisp_gpe_tenant.h> #include <vnet/lisp-gpe/lisp_gpe_sub_interface.h> #include <vnet/fib/fib_table.h> #include <vnet/interface.h> @@ -93,16 +94,12 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id) vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - // FIXME. enable When we get an adj - ip4_sw_interface_enable_disable (sw_if_index, 1); fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); ASSERT (FIB_NODE_INDEX_INVALID != fib_index); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - // FIXME. enable When we get an adj - ip6_sw_interface_enable_disable (sw_if_index, 1); } static void @@ -120,41 +117,29 @@ lisp_gpe_sub_interface_find_or_create_and_lock (const ip_address_t * lrloc, u32 overlay_table_id, u32 vni) { lisp_gpe_sub_interface_t *l3s; - lisp_gpe_main_t *lgm = &lisp_gpe_main; index_t l3si; - l3si = lisp_gpe_sub_interface_db_find (lrloc, vni); + l3si = lisp_gpe_sub_interface_db_find (lrloc, clib_host_to_net_u32 (vni)); if (INDEX_INVALID == l3si) { - vnet_hw_interface_t *hi; - clib_error_t *error; - u32 sub_sw_if_index; - uword *p; + u32 main_sw_if_index, sub_sw_if_index; /* * find the main interface from the VNI */ - p = hash_get (lgm->l3_ifaces.sw_if_index_by_vni, vni); - - if (NULL == p) - return (INDEX_INVALID); - - hi = vnet_get_hw_interface (vnet_get_main (), p[0]); - - if (NULL == hi) - return (INDEX_INVALID); + main_sw_if_index = + lisp_gpe_tenant_l3_iface_add_or_lock (vni, overlay_table_id); vnet_sw_interface_t sub_itf_template = { .type = VNET_SW_INTERFACE_TYPE_SUB, - .sup_sw_if_index = hi->sw_if_index, + .sup_sw_if_index = main_sw_if_index, .sub.id = lisp_gpe_sub_interface_id++, }; - error = vnet_create_sw_interface (vnet_get_main (), - &sub_itf_template, &sub_sw_if_index); - - if (NULL != error) + if (NULL != vnet_create_sw_interface (vnet_get_main (), + &sub_itf_template, + &sub_sw_if_index)) return (INDEX_INVALID); pool_get (lisp_gpe_sub_interface_pool, l3s); @@ -164,13 +149,16 @@ lisp_gpe_sub_interface_find_or_create_and_lock (const ip_address_t * lrloc, l3s->key->local_rloc = *lrloc; l3s->key->vni = clib_host_to_net_u32 (vni); - l3s->main_sw_if_index = hi->sw_if_index; + l3s->main_sw_if_index = main_sw_if_index; l3s->sw_if_index = sub_sw_if_index; l3s->eid_table_id = overlay_table_id; l3si = (l3s - lisp_gpe_sub_interface_pool); - lisp_gpe_sub_interface_set_table (l3s->sw_if_index, l3s->eid_table_id); + // FIXME. enable When we get an adj + ip6_sw_interface_enable_disable (l3s->sw_if_index, 1); + ip4_sw_interface_enable_disable (l3s->sw_if_index, 1); + vnet_sw_interface_set_flags (vnet_get_main (), l3s->sw_if_index, VNET_SW_INTERFACE_FLAG_ADMIN_UP); @@ -180,8 +168,10 @@ lisp_gpe_sub_interface_find_or_create_and_lock (const ip_address_t * lrloc, else { l3s = lisp_gpe_sub_interface_get_i (l3si); + l3s->eid_table_id = overlay_table_id; } + lisp_gpe_sub_interface_set_table (l3s->sw_if_index, l3s->eid_table_id); l3s->locks++; return (l3si); @@ -201,6 +191,7 @@ lisp_gpe_sub_interface_unlock (index_t l3si) lisp_gpe_sub_interface_unset_table (l3s->sw_if_index, l3s->eid_table_id); + lisp_gpe_tenant_l3_iface_unlock (clib_net_to_host_u32 (l3s->key->vni)); vnet_sw_interface_set_flags (vnet_get_main (), l3s->sw_if_index, 0); vnet_delete_sub_interface (l3s->sw_if_index); diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_tenant.c b/vnet/vnet/lisp-gpe/lisp_gpe_tenant.c new file mode 100644 index 00000000000..a7b0dd24b99 --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_tenant.c @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/lisp-gpe/lisp_gpe_tenant.h> + +/** + * The pool of all tenants + */ +static lisp_gpe_tenant_t *lisp_gpe_tenant_pool; + +/** + * The hash table of all tenants: key:{VNI}. + */ +uword *lisp_gpe_tenant_db; + +static lisp_gpe_tenant_t * +lisp_gpe_tenant_find (u32 vni) +{ + uword *p; + + p = hash_get (lisp_gpe_tenant_db, vni); + + if (NULL == p) + return (NULL); + + return (pool_elt_at_index (lisp_gpe_tenant_pool, p[0])); +} + +static lisp_gpe_tenant_t * +lisp_gpe_tenant_find_or_create_i (u32 vni) +{ + lisp_gpe_tenant_t *lt; + + lt = lisp_gpe_tenant_find (vni); + + if (NULL == lt) + { + pool_get (lisp_gpe_tenant_pool, lt); + memset (lt, 0, sizeof (*lt)); + + lt->lt_vni = vni; + lt->lt_table_id = ~0; + lt->lt_bd_id = ~0; + + hash_set (lisp_gpe_tenant_db, vni, lt - lisp_gpe_tenant_pool); + } + + return (lt); +} + +/** + * @brief Find or create a tenant for the given VNI + */ +u32 +lisp_gpe_tenant_find_or_create (u32 vni) +{ + lisp_gpe_tenant_t *lt; + + lt = lisp_gpe_tenant_find (vni); + + if (NULL == lt) + { + lt = lisp_gpe_tenant_find_or_create_i (vni); + } + + return (lt - lisp_gpe_tenant_pool); +} + +/** + * @brief If there are no more locks/users of te tenant, then delete it + */ +static void +lisp_gpe_tenant_delete_if_empty (lisp_gpe_tenant_t * lt) +{ + int i; + + for (i = 0; i < LISP_GPE_TENANT_LOCK_NUM; i++) + { + if (lt->lt_locks[i]) + return; + } + + hash_unset (lisp_gpe_tenant_db, lt->lt_vni); + pool_put (lisp_gpe_tenant_pool, lt); +} + +/** + * @brief Add/create and lock a new or find and lock the existing L3 + * interface for the tenant + * + * @paran vni The tenant's VNI + * @param table_id the Tenant's L3 table ID. + * + * @return the SW IF index of the L3 interface + */ +u32 +lisp_gpe_tenant_l3_iface_add_or_lock (u32 vni, u32 table_id) +{ + lisp_gpe_tenant_t *lt; + + lt = lisp_gpe_tenant_find_or_create_i (vni); + + if (~0 == lt->lt_table_id) + lt->lt_table_id = table_id; + + ASSERT (lt->lt_table_id == table_id); + + if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE]) + { + /* create the l3 interface since there are currently no users of it */ + lt->lt_l3_sw_if_index = + lisp_gpe_add_l3_iface (&lisp_gpe_main, vni, table_id); + } + + lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE]++; + + return (lt->lt_l3_sw_if_index); +} + +/** + * @brief Release the lock held on the tenant's L3 interface + */ +void +lisp_gpe_tenant_l3_iface_unlock (u32 vni) +{ + lisp_gpe_tenant_t *lt; + + lt = lisp_gpe_tenant_find (vni); + + if (NULL == lt) + { + clib_warning ("No tenant for VNI %d", vni); + } + + if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE]) + { + clib_warning ("No L3 interface for tenant VNI %d", vni); + return; + } + + lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE]--; + + if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L3_IFACE]) + { + /* the last user has gone, so delete the l3 interface */ + lisp_gpe_del_l3_iface (&lisp_gpe_main, vni, lt->lt_table_id); + } + + /* + * If there are no more locks on any tenant managed resource, then + * this tenant is toast. + */ + lisp_gpe_tenant_delete_if_empty (lt); +} + +/** + * @brief Add/create and lock a new or find and lock the existing L2 + * interface for the tenant + * + * @paran vni The tenant's VNI + * @param table_id the Tenant's L2 Bridge Domain ID. + * + * @return the SW IF index of the L2 interface + */ +u32 +lisp_gpe_tenant_l2_iface_add_or_lock (u32 vni, u32 bd_id) +{ + lisp_gpe_tenant_t *lt; + + lt = lisp_gpe_tenant_find_or_create_i (vni); + + if (NULL == lt) + { + clib_warning ("No tenant for VNI %d", vni); + return ~0; + } + + if (~0 == lt->lt_bd_id) + lt->lt_bd_id = bd_id; + + ASSERT (lt->lt_bd_id == bd_id); + + if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE]) + { + /* create the l2 interface since there are currently no users of it */ + lt->lt_l2_sw_if_index = + lisp_gpe_add_l2_iface (&lisp_gpe_main, vni, bd_id); + } + + lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE]++; + + return (lt->lt_l2_sw_if_index); +} + +/** + * @brief Release the lock held on the tenant's L3 interface + */ +void +lisp_gpe_tenant_l2_iface_unlock (u32 vni) +{ + lisp_gpe_tenant_t *lt; + + lt = lisp_gpe_tenant_find (vni); + + if (NULL == lt) + { + clib_warning ("No tenant for VNI %d", vni); + return; + } + + if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE]) + { + clib_warning ("No L2 interface for tenant VNI %d", vni); + return; + } + + lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE]--; + + if (0 == lt->lt_locks[LISP_GPE_TENANT_LOCK_L2_IFACE]) + { + /* the last user has gone, so delete the l2 interface */ + lisp_gpe_del_l2_iface (&lisp_gpe_main, vni, lt->lt_bd_id); + } + + /* + * If there are no more locks on any tenant managed resource, then + * this tenant is toast. + */ + lisp_gpe_tenant_delete_if_empty (lt); +} + +/** + * @brief get a const pointer to the tenant object + */ +const lisp_gpe_tenant_t * +lisp_gpe_tenant_get (u32 index) +{ + return (pool_elt_at_index (lisp_gpe_tenant_pool, index)); +} + +/** + * @brief Flush/delete ALL the tenants + */ +void +lisp_gpe_tenant_flush (void) +{ + lisp_gpe_tenant_t *lt; + + /* *INDENT-OFF* */ + pool_foreach(lt, lisp_gpe_tenant_pool, + ({ + lisp_gpe_tenant_l2_iface_unlock(lt->lt_vni); + lisp_gpe_tenant_l3_iface_unlock(lt->lt_vni); + })); + /* *INDENT-ON* */ +} + +/** + * @brif Show/display one tenant + */ +static u8 * +format_lisp_gpe_tenant (u8 * s, va_list ap) +{ + const lisp_gpe_tenant_t *lt = va_arg (ap, lisp_gpe_tenant_t *); + + s = format (s, "VNI:%d ", lt->lt_vni); + + if (lt->lt_table_id != ~0) + { + s = format (s, "VRF:%d ", lt->lt_table_id); + s = format (s, "L3-SW-IF:%d ", lt->lt_l3_sw_if_index); + } + + if (lt->lt_bd_id != ~0) + { + s = format (s, "BD-ID:%d ", lt->lt_bd_id); + s = format (s, "L2-SW-IF:%d ", lt->lt_l2_sw_if_index); + } + + return (s); +} + +/** + * @brief CLI command to show LISP-GPE tenant. + */ +static clib_error_t * +lisp_gpe_tenant_show (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + lisp_gpe_tenant_t *lt; + + /* *INDENT-OFF* */ + pool_foreach (lt, lisp_gpe_tenant_pool, + ({ + vlib_cli_output (vm, "%U", format_lisp_gpe_tenant, lt); + })); + /* *INDENT-ON* */ + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (lisp_gpe_tenant_command) = { + .path = "show lisp gpe tenant", + .short_help = "show lisp gpe tenant", + .function = lisp_gpe_tenant_show, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_tenant.h b/vnet/vnet/lisp-gpe/lisp_gpe_tenant.h new file mode 100644 index 00000000000..5db7dde833b --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_tenant.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LISP_GPE_TENANT_H__ +#define __LISP_GPE_TENANT_H__ + +#include <vnet/lisp-gpe/lisp_gpe.h> + +/** + * Refernece counting lock types on the tenant. + * When all of these counters drop to zero, we no longer need the tenant. + */ +typedef enum lisp_gpe_tenant_lock_t_ +{ + LISP_GPE_TENANT_LOCK_L2_IFACE, + LISP_GPE_TENANT_LOCK_L3_IFACE, + LISP_GPE_TENANT_LOCK_NUM, +} lisp_gpe_tenant_lock_t; + +/** + * @brief Representation of the data associated with a LISP overlay tenant + * + * This object exists to manage the shared resources of the L2 and L3 interface + * of a given tenant. + */ +typedef struct lisp_gpe_tenant_t_ +{ + /** + * The VNI is the identifier of the tenant + */ + u32 lt_vni; + + /** + * The tenant can have both L2 and L3 services enabled. + */ + u32 lt_table_id; + u32 lt_bd_id; + + /** + * The number of locks on the tenant's L3 interface. + */ + u32 lt_locks[LISP_GPE_TENANT_LOCK_NUM]; + + /** + * The L3 SW interface index + */ + u32 lt_l3_sw_if_index; + + /** + * The L2 SW interface index + */ + u32 lt_l2_sw_if_index; + +} lisp_gpe_tenant_t; + +extern u32 lisp_gpe_tenant_find_or_create (u32 vni); + +extern u32 lisp_gpe_tenant_l3_iface_add_or_lock (u32 vni, u32 vrf); +extern void lisp_gpe_tenant_l3_iface_unlock (u32 vni); + +extern u32 lisp_gpe_tenant_l2_iface_add_or_lock (u32 vni, u32 vrf); +extern void lisp_gpe_tenant_l2_iface_unlock (u32 vni); + +extern const lisp_gpe_tenant_t *lisp_gpe_tenant_get (u32 index); + +extern void lisp_gpe_tenant_flush (void); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c index 0aecc0a1aa4..4d5587b7d3e 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c +++ b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c @@ -26,7 +26,7 @@ /** * @brief Pool of all LISP tunnels */ -static lisp_gpe_tunnel_2_t *lisp_gpe_tunnel_pool; +static lisp_gpe_tunnel_t *lisp_gpe_tunnel_pool; /** * @brief a DB of all tunnels @@ -43,7 +43,7 @@ static uword *lisp_gpe_tunnel_db; * @return 0 on success. */ u8 * -lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_2_t * lgt, +lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_t * lgt, const lisp_gpe_adjacency_t * ladj, lisp_gpe_next_protocol_e payload_proto) { @@ -119,7 +119,7 @@ lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_2_t * lgt, return (rw); } -static lisp_gpe_tunnel_2_t * +static lisp_gpe_tunnel_t * lisp_gpe_tunnel_db_find (const lisp_gpe_tunnel_key_t * key) { uword *p; @@ -133,7 +133,7 @@ lisp_gpe_tunnel_db_find (const lisp_gpe_tunnel_key_t * key) return (NULL); } -lisp_gpe_tunnel_2_t * +lisp_gpe_tunnel_t * lisp_gpe_tunnel_get_i (index_t lgti) { return (pool_elt_at_index (lisp_gpe_tunnel_pool, lgti)); @@ -148,7 +148,7 @@ lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t * pair, .rmt = pair->rmt_loc, .fib_index = rloc_fib_index, }; - lisp_gpe_tunnel_2_t *lgt; + lisp_gpe_tunnel_t *lgt; fib_prefix_t pfx; lgt = lisp_gpe_tunnel_db_find (&key); @@ -189,7 +189,7 @@ lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t * pair, void lisp_gpe_tunnel_unlock (index_t lgti) { - lisp_gpe_tunnel_2_t *lgt; + lisp_gpe_tunnel_t *lgt; lgt = lisp_gpe_tunnel_get_i (lgti); lgt->locks--; @@ -202,7 +202,7 @@ lisp_gpe_tunnel_unlock (index_t lgti) } } -const lisp_gpe_tunnel_2_t * +const lisp_gpe_tunnel_t * lisp_gpe_tunnel_get (index_t lgti) { return (lisp_gpe_tunnel_get_i (lgti)); @@ -212,7 +212,7 @@ lisp_gpe_tunnel_get (index_t lgti) u8 * format_lisp_gpe_tunnel (u8 * s, va_list * args) { - lisp_gpe_tunnel_2_t *lgt = va_arg (*args, lisp_gpe_tunnel_2_t *); + lisp_gpe_tunnel_t *lgt = va_arg (*args, lisp_gpe_tunnel_t *); s = format (s, "tunnel %d\n", lgt - lisp_gpe_tunnel_pool); s = format (s, " fib-index: %d, locks:%d \n", @@ -236,7 +236,7 @@ show_lisp_gpe_tunnel_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - lisp_gpe_tunnel_2_t *lgt; + lisp_gpe_tunnel_t *lgt; index_t index; if (pool_elts (lisp_gpe_tunnel_pool) == 0) @@ -272,7 +272,7 @@ static clib_error_t * lisp_gpe_tunnel_module_init (vlib_main_t * vm) { lisp_gpe_tunnel_db = hash_create_mem (0, - sizeof (lisp_gpe_fwd_entry_key_t), + sizeof (lisp_gpe_tunnel_key_t), sizeof (uword)); return (NULL); diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h index d417fa991a8..333d2882883 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h +++ b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h @@ -45,7 +45,7 @@ typedef struct lisp_gpe_tunnel_key_t_ * A tunnel represents an associatation between a local and remote RLOC. * As such it represents a unique LISP rewrite. */ -typedef struct lisp_gpe_tunnel_2_t_ +typedef struct lisp_gpe_tunnel_t_ { /** * RLOC pair and rloc fib_index. This is the tunnel's key. @@ -61,7 +61,7 @@ typedef struct lisp_gpe_tunnel_2_t_ * the FIB entry through which the remote rloc is reachable s */ fib_node_index_t fib_entry_index; -} lisp_gpe_tunnel_2_t; +} lisp_gpe_tunnel_t; extern index_t lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t * pair, @@ -69,9 +69,9 @@ extern index_t lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t * extern void lisp_gpe_tunnel_unlock (index_t lgti); -extern const lisp_gpe_tunnel_2_t *lisp_gpe_tunnel_get (index_t lgti); +extern const lisp_gpe_tunnel_t *lisp_gpe_tunnel_get (index_t lgti); -extern u8 *lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_2_t * lgt, +extern u8 *lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_t * lgt, const struct lisp_gpe_adjacency_t_ *ladj, lisp_gpe_next_protocol_e diff --git a/vnet/vnet/mpls/interface.c b/vnet/vnet/mpls/interface.c index 9ef4c293494..dd40fc29e25 100644 --- a/vnet/vnet/mpls/interface.c +++ b/vnet/vnet/mpls/interface.c @@ -536,190 +536,21 @@ static const int dpo_next_2_mpls_post_rewrite[DPO_LAST] = { [DPO_LOAD_BALANCE] = IP_LOOKUP_NEXT_LOAD_BALANCE, }; -static uword -mpls_post_rewrite (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) +static void +mpls_gre_fixup (vlib_main_t *vm, + ip_adjacency_t *adj, + vlib_buffer_t * b0) { - ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - u32 n_left_from, next_index, * from, * to_next; - u16 old_l0 = 0; //, old_l1 = 0; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - /* while (n_left_from >= 4 && n_left_to_next >= 2) */ - /* { */ - /* u32 bi0, bi1; */ - /* vlib_buffer_t * b0, * b1; */ - /* ip4_header_t * ip0, * ip1; */ - /* u32 next0; */ - /* u32 next1; */ - /* u16 new_l0, new_l1, adj_index0, adj_index1; */ - /* ip_csum_t sum0, sum1; */ - /* ip_adjacency_t *adj0, *adj1; */ - - /* /\* Prefetch next iteration. *\/ */ - /* { */ - /* vlib_buffer_t * p2, * p3; */ - - /* p2 = vlib_get_buffer (vm, from[2]); */ - /* p3 = vlib_get_buffer (vm, from[3]); */ - - /* vlib_prefetch_buffer_header (p2, LOAD); */ - /* vlib_prefetch_buffer_header (p3, LOAD); */ - - /* CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); */ - /* CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); */ - /* } */ - - /* bi0 = from[0]; */ - /* bi1 = from[1]; */ - /* to_next[0] = bi0; */ - /* to_next[1] = bi1; */ - /* from += 2; */ - /* to_next += 2; */ - /* n_left_to_next -= 2; */ - /* n_left_from -= 2; */ - - - /* b0 = vlib_get_buffer (vm, bi0); */ - /* b1 = vlib_get_buffer (vm, bi1); */ - /* ip0 = vlib_buffer_get_current (b0); */ - /* ip1 = vlib_buffer_get_current (b1); */ - - /* /\* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] *\/ */ - - /* /\* set the GRE (outer) ip packet length, fix the bloody checksum *\/ */ - /* sum0 = ip0->checksum; */ - /* sum1 = ip1->checksum; */ - - /* /\* old_l0, old_l1 always 0, see the rewrite setup *\/ */ - /* new_l0 = */ - /* clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); */ - /* new_l1 = */ - /* clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); */ - - /* sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, */ - /* length /\* changed member *\/); */ - /* sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, */ - /* length /\* changed member *\/); */ - /* ip0->checksum = ip_csum_fold (sum0); */ - /* ip1->checksum = ip_csum_fold (sum1); */ - /* ip0->length = new_l0; */ - /* ip1->length = new_l1; */ + ip4_header_t * ip0; - /* /\* replace the TX adj in the packet with the next in the chain *\/ */ - /* adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; */ - /* adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; */ + ip0 = vlib_buffer_get_current (b0); - /* adj0 = ip_get_adjacency (lm, adj_index0); */ - /* adj1 = ip_get_adjacency (lm, adj_index1); */ - - /* ASSERT(adj0->sub_type.midchain.adj_index != ADJ_INDEX_INVALID); */ - /* ASSERT(adj1->sub_type.midchain.adj_index != ADJ_INDEX_INVALID); */ - - /* adj_index0 = adj0->sub_type.midchain.adj_index; */ - /* adj_index1 = adj1->sub_type.midchain.adj_index; */ - - /* vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0; */ - /* vnet_buffer (b1)->ip.adj_index[VLIB_TX] = adj_index1; */ - - /* /\* get the next adj in the chain to determine the next graph node *\/ */ - /* adj0 = ip_get_adjacency (lm, adj_index0); */ - /* adj1 = ip_get_adjacency (lm, adj_index1); */ - - /* next0 = adj0->lookup_next_index; */ - /* next1 = adj1->lookup_next_index; */ - - /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */ - /* to_next, n_left_to_next, */ - /* bi0, bi1, next0, next1); */ - /* } */ - - while (n_left_from > 0 && n_left_to_next > 0) - { - ip_adjacency_t * adj0; - u32 bi0; - vlib_buffer_t * b0; - ip4_header_t * ip0; - u32 next0; - u16 new_l0, adj_index0; - ip_csum_t sum0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - - /* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] */ - - /* set the GRE (outer) ip packet length, fix the bloody checksum */ - sum0 = ip0->checksum; - /* old_l0 always 0, see the rewrite setup */ - new_l0 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - ip0->length = new_l0; - - /* replace the TX adj in the packet with the next in the chain */ - adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - - ASSERT(adj_index0); - - adj0 = ip_get_adjacency (lm, adj_index0); - ASSERT(adj0->sub_type.midchain.next_dpo.dpoi_index != ADJ_INDEX_INVALID); - adj_index0 = adj0->sub_type.midchain.next_dpo.dpoi_index; - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0; - - /* get the next adj in the chain to determine the next graph node */ - ASSERT(0); - next0 = 0; //adj0->sub_type.midchain.next_dpo.dpoi_next; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, mpls_input_node.index, - MPLS_ERROR_PKTS_ENCAP, from_frame->n_vectors); - return from_frame->n_vectors; + /* Fixup the checksum and len fields in the GRE tunnel encap + * that was applied at the midchain node */ + ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + ip0->checksum = ip4_header_checksum (ip0); } -VLIB_REGISTER_NODE (mpls_post_rewrite_node) = { - .function = mpls_post_rewrite, - .name = "mpls-post-rewrite", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = 0, - - .n_next_nodes = IP_LOOKUP_N_NEXT, - .next_nodes = IP4_LOOKUP_NEXT_NODES, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (mpls_post_rewrite_node, mpls_post_rewrite) - static u8 * mpls_gre_rewrite (mpls_main_t *mm, mpls_gre_tunnel_t * t) { ip4_header_t * ip0; @@ -780,7 +611,7 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm, { mpls_interface_state_change_callback_t *callback; vlib_main_t * vm = vlib_get_main(); - ip_config_main_t * cm = &mm->rx_config_mains; + ip_config_main_t * cm = &mm->feature_config_mains[VNET_IP_RX_UNICAST_FEAT]; vnet_config_main_t * vcm = &cm->config_main; u32 lookup_feature_index; fib_node_index_t lfib_index; @@ -1101,7 +932,8 @@ int mpls_gre_tunnel_add (ip4_address_t *src, hi->sw_if_index); adj_nbr_midchain_update_rewrite(tp->adj_index, - mpls_post_rewrite_node.index, + mpls_gre_fixup, + ADJ_MIDCHAIN_FLAG_NONE, rewrite_data); mpls_gre_tunnel_stack(tp); diff --git a/vnet/vnet/mpls/mpls.h b/vnet/vnet/mpls/mpls.h index 2aeae49df43..e7fed192e57 100644 --- a/vnet/vnet/mpls/mpls.h +++ b/vnet/vnet/mpls/mpls.h @@ -127,11 +127,12 @@ typedef struct { uword *fib_index_by_table_id; /* rx/tx interface/feature configuration. */ - ip_config_main_t rx_config_mains, tx_config_main; + ip_config_main_t feature_config_mains[VNET_N_IP_FEAT]; /* Built-in unicast feature path indices, see ip_feature_init_cast(...) */ u32 mpls_rx_feature_lookup; u32 mpls_rx_feature_not_enabled; + u32 mpls_tx_feature_interface_output; /* pool of gre tunnel instances */ mpls_gre_tunnel_t *gre_tunnels; @@ -154,10 +155,10 @@ typedef struct { u32 ip6_classify_mpls_policy_encap_next_index; /* feature path configuration lists */ - vnet_ip_feature_registration_t * next_feature; + vnet_ip_feature_registration_t * next_feature[VNET_N_IP_FEAT]; /* Save feature results for show command */ - char **feature_nodes; + char **feature_nodes[VNET_N_IP_FEAT]; /* IP4 enabled count by software interface */ u8 * mpls_enabled_by_sw_if_index; @@ -179,11 +180,23 @@ static void __vnet_add_feature_registration_uc_##x (void) \ static void __vnet_add_feature_registration_uc_##x (void) \ { \ mpls_main_t * mm = &mpls_main; \ - uc_##x.next = mm->next_feature; \ - mm->next_feature = &uc_##x; \ + uc_##x.next = mm->next_feature[VNET_IP_RX_UNICAST_FEAT]; \ + mm->next_feature[VNET_IP_RX_UNICAST_FEAT] = &uc_##x; \ } \ __VA_ARGS__ vnet_ip_feature_registration_t uc_##x +#define VNET_MPLS_TX_FEATURE_INIT(x,...) \ + __VA_ARGS__ vnet_ip_feature_registration_t tx_##x; \ +static void __vnet_add_feature_registration_tx_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vnet_add_feature_registration_tx_##x (void) \ +{ \ + mpls_main_t * mm = &mpls_main; \ + tx_##x.next = mm->next_feature[VNET_IP_TX_FEAT]; \ + mm->next_feature[VNET_IP_TX_FEAT] = &tx_##x; \ +} \ +__VA_ARGS__ vnet_ip_feature_registration_t tx_##x + extern clib_error_t * mpls_feature_init(vlib_main_t * vm); format_function_t format_mpls_protocol; diff --git a/vnet/vnet/mpls/mpls_features.c b/vnet/vnet/mpls/mpls_features.c index d3a726afd04..80002fcb6f9 100644 --- a/vnet/vnet/mpls/mpls_features.c +++ b/vnet/vnet/mpls/mpls_features.c @@ -112,22 +112,49 @@ VNET_MPLS_FEATURE_INIT (mpls_not_enabled, static) = { .feature_index = &mpls_main.mpls_rx_feature_not_enabled, }; -static char * feature_start_nodes[] = +/* Built-in ip4 tx feature path definition */ +VNET_MPLS_TX_FEATURE_INIT (interface_output, static) = { + .node_name = "interface-output", + .runs_before = 0, /* not before any other features */ + .feature_index = &mpls_main.mpls_tx_feature_interface_output, +}; + + +static char * rx_feature_start_nodes[] = { "mpls-input", }; +static char * tx_feature_start_nodes[] = +{ + "mpls-output", + "mpls-midchain", +}; clib_error_t * mpls_feature_init (vlib_main_t * vm) { - ip_config_main_t * cm = &mpls_main.rx_config_mains; + ip_config_main_t * cm = &mpls_main.feature_config_mains[VNET_IP_RX_UNICAST_FEAT]; vnet_config_main_t * vcm = &cm->config_main; + clib_error_t *error; + + if ((error = ip_feature_init_cast (vm, cm, vcm, + rx_feature_start_nodes, + ARRAY_LEN(rx_feature_start_nodes), + mpls_main.next_feature[VNET_IP_RX_UNICAST_FEAT], + &mpls_main.feature_nodes[VNET_IP_RX_UNICAST_FEAT]))) + return error; - return (ip_feature_init_cast (vm, cm, vcm, - feature_start_nodes, - ARRAY_LEN(feature_start_nodes), - VNET_IP_RX_UNICAST_FEAT, - VNET_L3_PACKET_TYPE_MPLS_UNICAST)); + cm = &mpls_main.feature_config_mains[VNET_IP_TX_FEAT]; + vcm = &cm->config_main; + + if ((error = ip_feature_init_cast (vm, cm, vcm, + tx_feature_start_nodes, + ARRAY_LEN(tx_feature_start_nodes), + mpls_main.next_feature[VNET_IP_TX_FEAT], + &mpls_main.feature_nodes[VNET_IP_TX_FEAT]))) + return error; + + return error; } static clib_error_t * @@ -137,39 +164,53 @@ mpls_sw_interface_add_del (vnet_main_t * vnm, { vlib_main_t * vm = vnm->vlib_main; mpls_main_t * mm = &mpls_main; - ip_config_main_t * cm = &mm->rx_config_mains; - vnet_config_main_t * vcm = &cm->config_main; - u32 drop_feature_index; - u32 ci; - - vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); - vec_validate_init_empty (mm->fib_index_by_sw_if_index, sw_if_index, 0); - vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); - ci = cm->config_index_by_sw_if_index[sw_if_index]; - - drop_feature_index = mm->mpls_rx_feature_not_enabled; - - if (is_add) - ci = vnet_config_add_feature (vm, vcm, ci, - drop_feature_index, - /* config data */ 0, - /* # bytes of config data */ 0); - else - { - ci = vnet_config_del_feature (vm, vcm, ci, - drop_feature_index, - /* config data */ 0, - /* # bytes of config data */ 0); - mm->mpls_enabled_by_sw_if_index[sw_if_index] = 0;; - } - - cm->config_index_by_sw_if_index[sw_if_index] = ci; + u32 feature_index; + u32 ci, cast; + + for (cast = 0; cast < VNET_N_IP_FEAT; cast++) + { + ip_config_main_t * cm = &mm->feature_config_mains[cast]; + vnet_config_main_t * vcm = &cm->config_main; + + if (VNET_IP_RX_MULTICAST_FEAT == cast) + continue; + + vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + vec_validate_init_empty (mm->fib_index_by_sw_if_index, sw_if_index, 0); + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + if (cast == VNET_IP_RX_UNICAST_FEAT) + feature_index = mm->mpls_rx_feature_not_enabled; + else + feature_index = mm->mpls_tx_feature_interface_output; + + + if (is_add) + ci = vnet_config_add_feature (vm, vcm, ci, + feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + { + ci = vnet_config_del_feature (vm, vcm, ci, + feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + mm->mpls_enabled_by_sw_if_index[sw_if_index] = 0;; + } + cm->config_index_by_sw_if_index[sw_if_index] = ci; + } return /* no error */ 0; } VNET_SW_INTERFACE_ADD_DEL_FUNCTION (mpls_sw_interface_add_del); +#define foreach_af_cast \ +_(VNET_IP_RX_UNICAST_FEAT, "mpls input") \ +_(VNET_IP_TX_FEAT, "mpls output") \ + static clib_error_t * show_mpls_features_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -181,11 +222,15 @@ show_mpls_features_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "Available MPLS feature nodes"); - do { - features = mm->feature_nodes; - for (i = 0; i < vec_len(features); i++) - vlib_cli_output (vm, " %s\n", features[i]); +#define _(c,s) \ + do { \ + features = mm->feature_nodes[c]; \ + vlib_cli_output (vm, "%s:", s); \ + for (i = 0; i < vec_len(features); i++) \ + vlib_cli_output (vm, " %s\n", features[i]); \ } while(0); + foreach_af_cast; +#undef _ return 0; } @@ -202,18 +247,7 @@ show_mpls_interface_features_command_fn (vlib_main_t * vm, vlib_cli_command_t * cmd) { vnet_main_t * vnm = vnet_get_main(); - mpls_main_t * mm = &mpls_main; - - ip_config_main_t * cm; - vnet_config_main_t * vcm; - vnet_config_t * cfg; - u32 cfg_index; - vnet_config_feature_t * feat; - vlib_node_t * n; u32 sw_if_index; - u32 node_index; - u32 current_config_index; - int i; if (! unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) @@ -222,26 +256,9 @@ show_mpls_interface_features_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "MPLS feature paths configured on %U...", format_vnet_sw_if_index_name, vnm, sw_if_index); - cm = &mm->rx_config_mains; - vcm = &cm->config_main; - - current_config_index = vec_elt (cm->config_index_by_sw_if_index, - sw_if_index); - - ASSERT(current_config_index - < vec_len (vcm->config_pool_index_by_user_index)); - - cfg_index = - vcm->config_pool_index_by_user_index[current_config_index]; - cfg = pool_elt_at_index (vcm->config_pool, cfg_index); - - for (i = 0; i < vec_len(cfg->features); i++) - { - feat = cfg->features + i; - node_index = feat->node_index; - n = vlib_get_node (vm, node_index); - vlib_cli_output (vm, " %v", n->name); - } + ip_interface_features_show (vm, "MPLS", + mpls_main.feature_config_mains, + sw_if_index); return 0; } diff --git a/vnet/vnet/mpls/node.c b/vnet/vnet/mpls/node.c index 6801cc7b3ae..1b435f31f0b 100644 --- a/vnet/vnet/mpls/node.c +++ b/vnet/vnet/mpls/node.c @@ -110,7 +110,7 @@ mpls_input_inline (vlib_main_t * vm, h0 = vlib_buffer_get_current (b0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; - cm0 = &mm->rx_config_mains; + cm0 = &mm->feature_config_mains[VNET_IP_RX_UNICAST_FEAT]; b0->current_config_index = vec_elt (cm0->config_index_by_sw_if_index, sw_if_index0); diff --git a/vnet/vnet/vnet.h b/vnet/vnet/vnet.h index 31faef7cbb9..a3cbd37903c 100644 --- a/vnet/vnet/vnet.h +++ b/vnet/vnet/vnet.h @@ -50,6 +50,13 @@ typedef enum VNET_N_IP_FEAT, } vnet_cast_t; +#define VNET_CAST_NAMES { \ + [VNET_IP_RX_UNICAST_FEAT] = "rx-unicast", \ + [VNET_IP_RX_MULTICAST_FEAT] = "rx-multicast", \ + [VNET_IP_TX_FEAT] = "tx", \ +} + + #include <vnet/unix/pcap.h> #include <vnet/buffer.h> #include <vnet/config.h> |