From 4ec36c5535849a4e456ed99b57968d54d5e03b62 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 31 Mar 2020 09:21:29 -0400 Subject: fib: midchain adjacency optimisations Type: improvement - inline some common encap fixup functions into the midchain rewrite node so we don't incur the cost of the virtual function call - change the copy 'guess' from ethernet_header (which will never happen) to an ip4 header - add adj-midchain-tx to multiarch sources - don't run adj-midchain-tx as a feature, instead put this node as the adj's next and at the end of the feature arc. - cache the feature arc config index (to save the cache miss going to fetch it) - don't check if features are enabled when taking the arc (since we know they are) the last two changes will also benefit normal adjacencies taking the arc (i.e. for NAT, ACLs, etc) for IPSec: - don't run esp_encrypt as a feature, instead when required insert this node into the adj's next and into the end of the feature arc. this implies that encrypt is always 'the last feature' run, which is symmetric with decrypt always being the first. - esp_encrpyt for tunnels has adj-midchain-tx as next node Change-Id: Ida0af56a704302cf2d7797ded5f118a781e8acb7 Signed-off-by: Neale Ranns --- src/vnet/CMakeLists.txt | 1 + src/vnet/adj/adj.c | 9 + src/vnet/adj/adj.h | 84 ++++--- src/vnet/adj/adj_delegate.c | 14 ++ src/vnet/adj/adj_delegate.h | 7 + src/vnet/adj/adj_dp.h | 66 ++++++ src/vnet/adj/adj_internal.h | 3 +- src/vnet/adj/adj_l2.c | 44 ++-- src/vnet/adj/adj_midchain.c | 546 ++++++++++++++++++++----------------------- src/vnet/adj/adj_midchain.h | 23 ++ src/vnet/adj/adj_nbr.c | 11 +- src/vnet/adj/rewrite.c | 2 +- src/vnet/adj/rewrite.h | 19 +- src/vnet/config.c | 56 +++++ src/vnet/config.h | 5 + src/vnet/feature/feature.c | 29 +++ src/vnet/feature/feature.h | 20 ++ src/vnet/ip/ip4_forward.c | 86 ++++--- src/vnet/ip/ip6_forward.c | 37 +-- src/vnet/ipip/ipip.c | 38 ++- src/vnet/ipsec/esp_encrypt.c | 79 +------ src/vnet/ipsec/ipsec.c | 30 +-- src/vnet/ipsec/ipsec.h | 28 +-- src/vnet/ipsec/ipsec_tun.c | 188 ++++++++------- src/vnet/tunnel/tunnel_dp.h | 2 + 25 files changed, 822 insertions(+), 605 deletions(-) create mode 100644 src/vnet/adj/adj_dp.h (limited to 'src/vnet') diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index eb59ab07108..bd1b75b54a1 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -1346,6 +1346,7 @@ list(APPEND VNET_SOURCES list(APPEND VNET_MULTIARCH_SOURCES adj/adj_nsh.c adj/adj_l2.c + adj/adj_midchain.c ) list(APPEND VNET_HEADERS diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index c601e6bd19d..2fb77fe78d3 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -79,6 +79,7 @@ adj_alloc (fib_protocol_t proto) adj->ia_nh_proto = proto; adj->ia_flags = 0; + adj->ia_cfg_index = 0; adj->rewrite_header.sw_if_index = ~0; adj->rewrite_header.flags = 0; adj->lookup_next_index = 0; @@ -399,10 +400,18 @@ adj_feature_update_walk_cb (adj_index_t ai, ((ctx->arc == mpls_main.output_feature_arc_index) && (VNET_LINK_MPLS == adj->ia_link))) { + vnet_feature_main_t *fm = &feature_main; + vnet_feature_config_main_t *cm; + + cm = &fm->feature_config_mains[ctx->arc]; + if (ctx->enable) adj->rewrite_header.flags |= VNET_REWRITE_HAS_FEATURES; else adj->rewrite_header.flags &= ~VNET_REWRITE_HAS_FEATURES; + + adj->ia_cfg_index = vec_elt (cm->config_index_by_sw_if_index, + adj->rewrite_header.sw_if_index); } return (ADJ_WALK_RC_CONTINUE); } diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h index e22e7ecd58a..a53122711a8 100644 --- a/src/vnet/adj/adj.h +++ b/src/vnet/adj/adj.h @@ -181,6 +181,10 @@ typedef enum adj_attr_t_ * If the midchain were to stack on its FIB entry a loop would form. */ ADJ_ATTR_MIDCHAIN_LOOPED, + /** + * the fixup function is standard IP4o4 header + */ + ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR, } adj_attr_t; #define ADJ_ATTR_NAMES { \ @@ -188,11 +192,12 @@ typedef enum adj_attr_t_ [ADJ_ATTR_MIDCHAIN_NO_COUNT] = "midchain-no-count", \ [ADJ_ATTR_MIDCHAIN_IP_STACK] = "midchain-ip-stack", \ [ADJ_ATTR_MIDCHAIN_LOOPED] = "midchain-looped", \ + [ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR] = "midchain-ip4o4-hdr-fixup", \ } -#define FOR_EACH_ADJ_ATTR(_attr) \ - for (_attr = ADJ_ATTR_SYNC_WALK_ACTIVE; \ - _attr <= ADJ_ATTR_MIDCHAIN_LOOPED; \ +#define FOR_EACH_ADJ_ATTR(_attr) \ + for (_attr = ADJ_ATTR_SYNC_WALK_ACTIVE; \ + _attr <= ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR; \ _attr++) /** @@ -205,6 +210,7 @@ typedef enum adj_flags_t_ ADJ_FLAG_MIDCHAIN_NO_COUNT = (1 << ADJ_ATTR_MIDCHAIN_NO_COUNT), ADJ_FLAG_MIDCHAIN_IP_STACK = (1 << ADJ_ATTR_MIDCHAIN_IP_STACK), ADJ_FLAG_MIDCHAIN_LOOPED = (1 << ADJ_ATTR_MIDCHAIN_LOOPED), + ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR = (1 << ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR), } __attribute__ ((packed)) adj_flags_t; /** @@ -227,32 +233,10 @@ typedef struct ip_adjacency_t_ * has 8 byte alignment requirements. */ fib_node_t ia_node; - - /** - * Next hop after ip4-lookup. - * This is not accessed in the rewrite nodes. - * 1-bytes - */ - ip_lookup_next_t lookup_next_index; - - /** - * link/ether-type - * 1 bytes - */ - vnet_link_t ia_link; - /** - * The protocol of the neighbor/peer. i.e. the protocol with - * which to interpret the 'next-hop' attributes of the sub-types. - * 1-bytes + * feature [arc] config index */ - fib_protocol_t ia_nh_proto; - - /** - * Flags on the adjacency - * 1-bytes - */ - adj_flags_t ia_flags; + u32 ia_cfg_index; union { @@ -298,6 +282,10 @@ typedef struct ip_adjacency_t_ * loop detection. */ fib_node_index_t fei; + + /** spare space */ + u8 __ia_midchain_pad[4]; + } midchain; /** * IP_LOOKUP_NEXT_GLEAN @@ -315,12 +303,14 @@ typedef struct ip_adjacency_t_ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); - /* Rewrite in second/third cache lines */ + /** Rewrite in second and third cache lines */ VNET_DECLARE_REWRITE; /** * more control plane members that do not fit on the first cacheline */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline3); + /** * A sorted vector of delegates */ @@ -330,6 +320,37 @@ typedef struct ip_adjacency_t_ * The VLIB node in which this adj is used to forward packets */ u32 ia_node_index; + + /** + * Next hop after ip4-lookup. + * This is not accessed in the rewrite nodes. + * 1-bytes + */ + ip_lookup_next_t lookup_next_index; + + /** + * link/ether-type + * 1 bytes + */ + vnet_link_t ia_link; + + /** + * The protocol of the neighbor/peer. i.e. the protocol with + * which to interpret the 'next-hop' attributes of the sub-types. + * 1-bytes + */ + fib_protocol_t ia_nh_proto; + + /** + * Flags on the adjacency + * 1-bytes + */ + adj_flags_t ia_flags; + + /** + * Free space on the fourth cacheline (not used in the DP) + */ + u8 __ia_pad[48]; } ip_adjacency_t; STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0), @@ -337,6 +358,13 @@ STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0), STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) == CLIB_CACHE_LINE_BYTES), "IP adjacency cacheline 1 is more than one cacheline size offset"); +#if defined __x86_64__ +STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline3) == + 3 * CLIB_CACHE_LINE_BYTES), + "IP adjacency cacheline 3 is more than one cacheline size offset"); +/* An adj fits into 4 cachelines on your average machine */ +STATIC_ASSERT_SIZEOF (ip_adjacency_t, 4 * 64); +#endif /** * @brief diff --git a/src/vnet/adj/adj_delegate.c b/src/vnet/adj/adj_delegate.c index 87a83fb4239..8f590461b20 100644 --- a/src/vnet/adj/adj_delegate.c +++ b/src/vnet/adj/adj_delegate.c @@ -124,6 +124,20 @@ adj_delegate_add (ip_adjacency_t *adj, return (0); } +void +adj_delegate_adj_modified (ip_adjacency_t *adj) +{ + adj_delegate_t *aed; + + vec_foreach(aed, adj->ia_delegates) + { + if (ad_vfts[aed->ad_type].adv_adj_modified) + { + ad_vfts[aed->ad_type].adv_adj_modified(aed); + } + } +} + void adj_delegate_adj_deleted (ip_adjacency_t *adj) { diff --git a/src/vnet/adj/adj_delegate.h b/src/vnet/adj/adj_delegate.h index d49c6661c19..c83a00cd271 100644 --- a/src/vnet/adj/adj_delegate.h +++ b/src/vnet/adj/adj_delegate.h @@ -82,12 +82,19 @@ typedef u8 * (*adj_delegate_format_t)(const adj_delegate_t *aed, u8 *s); */ typedef void (*adj_delegate_adj_created_t)(adj_index_t ai); +/** + * Indication that the adjacency has been modified. + * the delegate. + */ +typedef void (*adj_delegate_adj_modified_t)(adj_delegate_t *aed); + /** * An ADJ delegate virtual function table */ typedef struct adj_delegate_vft_t_ { adj_delegate_format_t adv_format; adj_delegate_adj_deleted_t adv_adj_deleted; + adj_delegate_adj_modified_t adv_adj_modified; adj_delegate_adj_created_t adv_adj_created; } adj_delegate_vft_t; diff --git a/src/vnet/adj/adj_dp.h b/src/vnet/adj/adj_dp.h new file mode 100644 index 00000000000..27c0581fcfb --- /dev/null +++ b/src/vnet/adj/adj_dp.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ADJ_DP_H__ +#define __ADJ_DP_H__ + +#include +#include + +static_always_inline void +adj_midchain_ipip44_fixup (vlib_main_t * vm, + const ip_adjacency_t * adj, + vlib_buffer_t * b) +{ + tunnel_encap_decap_flags_t flags; + ip4_header_t *ip4; + + flags = pointer_to_uword (adj->sub_type.midchain.fixup_data); + + ip4 = vlib_buffer_get_current (b); + ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); + + if (PREDICT_TRUE(TUNNEL_ENCAP_DECAP_FLAG_NONE == flags)) + { + ip_csum_t sum; + u16 old,new; + + old = 0; + new = ip4->length; + + sum = ip4->checksum; + sum = ip_csum_update (sum, old, new, ip4_header_t, length); + ip4->checksum = ip_csum_fold (sum); + } + else + { + tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4); + ip4->checksum = ip4_header_checksum (ip4); + } +} + +static_always_inline void +adj_midchain_fixup (vlib_main_t *vm, + const ip_adjacency_t *adj, + vlib_buffer_t * b) +{ + if (PREDICT_TRUE(adj->rewrite_header.flags & VNET_REWRITE_FIXUP_IP4_O_4)) + adj_midchain_ipip44_fixup (vm, adj, b); + else if (adj->sub_type.midchain.fixup_func) + adj->sub_type.midchain.fixup_func + (vm, adj, b, adj->sub_type.midchain.fixup_data); +} + +#endif diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h index c4dda51324c..11214932a3a 100644 --- a/src/vnet/adj/adj_internal.h +++ b/src/vnet/adj/adj_internal.h @@ -53,7 +53,7 @@ adj_get_rewrite_node (vnet_link_t linkt) case VNET_LINK_MPLS: return (mpls_output_node.index); case VNET_LINK_ETHERNET: - return (adj_l2_rewrite_node.index); + return (adj_l2_rewrite_node.index); case VNET_LINK_NSH: return (adj_nsh_rewrite_node.index); case VNET_LINK_ARP: @@ -138,6 +138,7 @@ extern int adj_bfd_is_up (adj_index_t ai); */ extern void adj_delegate_adj_deleted(ip_adjacency_t *adj); extern void adj_delegate_adj_created(ip_adjacency_t *adj); +extern void adj_delegate_adj_modified(ip_adjacency_t *adj); extern u8* adj_delegate_format(u8* s, ip_adjacency_t *adj); #endif diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c index a289158c7f3..5413eca6212 100644 --- a/src/vnet/adj/adj_l2.c +++ b/src/vnet/adj/adj_l2.c @@ -48,7 +48,8 @@ always_inline uword adj_l2_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, - int is_midchain) + int is_midchain, + int do_counters) { u32 * from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, * to_next, next_index; @@ -67,7 +68,7 @@ adj_l2_rewrite_inline (vlib_main_t * vm, ip_adjacency_t * adj0; vlib_buffer_t * p0; char *h0; - u32 pi0, rw_len0, adj_index0, next0 = 0; + u32 pi0, rw_len0, len0, adj_index0, next0 = 0; u32 tx_sw_if_index0; pi0 = to_next[0] = from[0]; @@ -83,29 +84,30 @@ adj_l2_rewrite_inline (vlib_main_t * vm, adj0 = adj_get (adj_index0); - /* Guess we are only writing on simple Ethernet header. */ + /* Guess we are writing on ip4 header. */ vnet_rewrite_one_header (adj0[0], h0, - sizeof (ethernet_header_t)); + //sizeof (gre_header_t) + + sizeof (ip4_header_t)); /* Update packet buffer attributes/set output interface. */ rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; vnet_buffer(p0)->sw_if_index[VLIB_TX] = adj0->rewrite_header.sw_if_index; + len0 = vlib_buffer_length_in_chain (vm, p0); /* since we are coming out of the L2 world, where the vlib_buffer * union is used for other things, make sure it is clean for * MPLS from now on. */ vnet_buffer(p0)->mpls.first = 0; - vlib_increment_combined_counter(&adjacency_counters, - thread_index, - adj_index0, - /* packet increment */ 0, - /* byte increment */ rw_len0); + if (do_counters) + vlib_increment_combined_counter(&adjacency_counters, + thread_index, + adj_index0, + 0, len0); /* Check MTU of outgoing interface. */ - if (PREDICT_TRUE((vlib_buffer_length_in_chain (vm, p0) <= - adj0[0].rewrite_header.max_l3_packet_bytes))) + if (PREDICT_TRUE(len0 <= adj0[0].rewrite_header.max_l3_packet_bytes)) { /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP header */ @@ -126,7 +128,15 @@ adj_l2_rewrite_inline (vlib_main_t * vm, * Follow the feature ARC. this will result eventually in * the midchain-tx node */ - vnet_feature_arc_start(em->output_feature_arc_index, tx_sw_if_index0, &next0, p0); + if (PREDICT_FALSE (adj0->rewrite_header.flags & + VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start_w_cfg_index ( + em->output_feature_arc_index, + tx_sw_if_index0, + &next0, p0, + adj0->ia_cfg_index); + else + next0 = adj0[0].rewrite_header.next_index; } else { @@ -156,14 +166,20 @@ VLIB_NODE_FN (adj_l2_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return adj_l2_rewrite_inline (vm, node, frame, 0); + if (adj_are_counters_enabled ()) + return adj_l2_rewrite_inline (vm, node, frame, 0, 1); + else + return adj_l2_rewrite_inline (vm, node, frame, 0, 0); } VLIB_NODE_FN (adj_l2_midchain_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return adj_l2_rewrite_inline (vm, node, frame, 1); + if (adj_are_counters_enabled ()) + return adj_l2_rewrite_inline (vm, node, frame, 1, 1); + else + return adj_l2_rewrite_inline (vm, node, frame, 1, 0); } VLIB_REGISTER_NODE (adj_l2_rewrite_node) = { diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c index 88648fea0a9..4741ec9a953 100644 --- a/src/vnet/adj/adj_midchain.c +++ b/src/vnet/adj/adj_midchain.c @@ -24,14 +24,6 @@ #include #include -/** - * The two midchain tx feature node indices - */ -static u32 adj_midchain_tx_feature_node[VNET_LINK_NUM]; -static u32 adj_midchain_tx_no_count_feature_node[VNET_LINK_NUM]; - -static u32 *adj_midchain_feat_count_per_sw_if_index[VNET_LINK_NUM]; - /** * @brief Trace data for packets traversing the midchain tx node */ @@ -49,203 +41,158 @@ adj_midchain_tx_inline (vlib_main_t * vm, vlib_frame_t * frame, int interface_count) { - u32 * from, * to_next, n_left_from, n_left_to_next; - u32 next_index; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u16 nexts[VLIB_FRAME_SIZE], *next; + u32 * from, n_left, thread_index; vnet_main_t *vnm = vnet_get_main (); vnet_interface_main_t *im = &vnm->interface_main; - u32 thread_index = vm->thread_index; - /* Vector of buffer / pkt indices we're supposed to process */ + thread_index = vm->thread_index; + n_left = frame->n_vectors; from = vlib_frame_vector_args (frame); - /* Number of buffers / pkts */ - n_left_from = frame->n_vectors; + vlib_get_buffers (vm, from, bufs, n_left); - /* Speculatively send the first buffer to the last disposition we used */ - next_index = node->cached_next_index; + next = nexts; + b = bufs; - while (n_left_from > 0) + while (n_left > 8) { - /* set up to enqueue to our disposition with index = next_index */ - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + u32 adj_index0, adj_index1, adj_index2, adj_index3; + const ip_adjacency_t *adj0, *adj1, *adj2, *adj3; + const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3; - while (n_left_from >= 8 && n_left_to_next > 4) - { - const ip_adjacency_t *adj0, *adj1, *adj2, *adj3; - const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3; - vlib_buffer_t * b0, *b1, *b2, *b3; - u32 bi0, adj_index0, next0; - u32 bi1, adj_index1, next1; - u32 bi2, adj_index2, next2; - u32 bi3, adj_index3, next3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t * p4, * p5; - vlib_buffer_t * p6, * p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - } - - bi0 = from[0]; - to_next[0] = bi0; - bi1 = from[1]; - to_next[1] = bi1; - bi2 = from[2]; - to_next[2] = bi2; - bi3 = from[3]; - to_next[3] = bi3; - - from += 4; - to_next += 4; - n_left_from -= 4; - n_left_to_next -= 4; - - b0 = vlib_get_buffer(vm, bi0); - b1 = vlib_get_buffer(vm, bi1); - b2 = vlib_get_buffer(vm, bi2); - b3 = vlib_get_buffer(vm, bi3); - - /* Follow the DPO on which the midchain is stacked */ - adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; - adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX]; - adj_index2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX]; - adj_index3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX]; - - adj0 = adj_get(adj_index0); - adj1 = adj_get(adj_index1); - adj2 = adj_get(adj_index2); - adj3 = adj_get(adj_index3); - - dpo0 = &adj0->sub_type.midchain.next_dpo; - dpo1 = &adj1->sub_type.midchain.next_dpo; - dpo2 = &adj2->sub_type.midchain.next_dpo; - dpo3 = &adj3->sub_type.midchain.next_dpo; - - next0 = dpo0->dpoi_next_node; - next1 = dpo1->dpoi_next_node; - next2 = dpo2->dpoi_next_node; - next3 = dpo3->dpoi_next_node; - - vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vnet_buffer(b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; - vnet_buffer(b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; - - if (interface_count) - { - vlib_increment_combined_counter (im->combined_sw_if_counters - + VNET_INTERFACE_COUNTER_TX, - thread_index, - adj0->rewrite_header.sw_if_index, - 1, - vlib_buffer_length_in_chain (vm, b0)); - vlib_increment_combined_counter (im->combined_sw_if_counters - + VNET_INTERFACE_COUNTER_TX, - thread_index, - adj1->rewrite_header.sw_if_index, - 1, - vlib_buffer_length_in_chain (vm, b1)); - vlib_increment_combined_counter (im->combined_sw_if_counters - + VNET_INTERFACE_COUNTER_TX, - thread_index, - adj2->rewrite_header.sw_if_index, - 1, - vlib_buffer_length_in_chain (vm, b2)); - vlib_increment_combined_counter (im->combined_sw_if_counters - + VNET_INTERFACE_COUNTER_TX, - thread_index, - adj3->rewrite_header.sw_if_index, - 1, - vlib_buffer_length_in_chain (vm, b3)); - } - - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->ai = adj_index0; - } - if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) - { - adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, - b1, sizeof (*tr)); - tr->ai = adj_index1; - } - if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED)) - { - adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, - b2, sizeof (*tr)); - tr->ai = adj_index2; - } - if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED)) - { - adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, - b3, sizeof (*tr)); - tr->ai = adj_index3; - } - - vlib_validate_buffer_enqueue_x4 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, bi2, bi3, - next0, next1, next2, next3); - } - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, adj_index0, next0; - const ip_adjacency_t * adj0; - const dpo_id_t *dpo0; - vlib_buffer_t * b0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer(vm, bi0); - - /* Follow the DPO on which the midchain is stacked */ - adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; - adj0 = adj_get(adj_index0); - dpo0 = &adj0->sub_type.midchain.next_dpo; - next0 = dpo0->dpoi_next_node; - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - - if (interface_count) - { - vlib_increment_combined_counter (im->combined_sw_if_counters - + VNET_INTERFACE_COUNTER_TX, - thread_index, - adj0->rewrite_header.sw_if_index, - 1, - vlib_buffer_length_in_chain (vm, b0)); - } - - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->ai = adj_index0; - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } + /* Prefetch next iteration. */ + { + vlib_prefetch_buffer_header (b[4], LOAD); + vlib_prefetch_buffer_header (b[5], LOAD); + vlib_prefetch_buffer_header (b[6], LOAD); + vlib_prefetch_buffer_header (b[7], LOAD); + } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + /* Follow the DPO on which the midchain is stacked */ + adj_index0 = vnet_buffer(b[0])->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer(b[1])->ip.adj_index[VLIB_TX]; + adj_index2 = vnet_buffer(b[2])->ip.adj_index[VLIB_TX]; + adj_index3 = vnet_buffer(b[3])->ip.adj_index[VLIB_TX]; + + adj0 = adj_get(adj_index0); + adj1 = adj_get(adj_index1); + adj2 = adj_get(adj_index2); + adj3 = adj_get(adj_index3); + + dpo0 = &adj0->sub_type.midchain.next_dpo; + dpo1 = &adj1->sub_type.midchain.next_dpo; + dpo2 = &adj2->sub_type.midchain.next_dpo; + dpo3 = &adj3->sub_type.midchain.next_dpo; + + next[0] = dpo0->dpoi_next_node; + next[1] = dpo1->dpoi_next_node; + next[2] = dpo2->dpoi_next_node; + next[3] = dpo3->dpoi_next_node; + + vnet_buffer(b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer(b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + vnet_buffer(b[2])->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; + vnet_buffer(b[3])->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; + + if (interface_count) + { + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + thread_index, + adj0->rewrite_header.sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b[0])); + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + thread_index, + adj1->rewrite_header.sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b[1])); + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + thread_index, + adj2->rewrite_header.sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b[2])); + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + thread_index, + adj3->rewrite_header.sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b[3])); + } + + if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE)) + { + if (PREDICT_FALSE(b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, + b[0], sizeof (*tr)); + tr->ai = adj_index0; + } + if (PREDICT_FALSE(b[1]->flags & VLIB_BUFFER_IS_TRACED)) + { + adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, + b[1], sizeof (*tr)); + tr->ai = adj_index1; + } + if (PREDICT_FALSE(b[2]->flags & VLIB_BUFFER_IS_TRACED)) + { + adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, + b[2], sizeof (*tr)); + tr->ai = adj_index2; + } + if (PREDICT_FALSE(b[3]->flags & VLIB_BUFFER_IS_TRACED)) + { + adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, + b[3], sizeof (*tr)); + tr->ai = adj_index3; + } + } + n_left -= 4; + b += 4; + next += 4; } + while (n_left) + { + const ip_adjacency_t * adj0; + const dpo_id_t *dpo0; + u32 adj_index0; + + /* Follow the DPO on which the midchain is stacked */ + adj_index0 = vnet_buffer(b[0])->ip.adj_index[VLIB_TX]; + adj0 = adj_get(adj_index0); + dpo0 = &adj0->sub_type.midchain.next_dpo; + next[0] = dpo0->dpoi_next_node; + vnet_buffer(b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (interface_count) + { + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + thread_index, + adj0->rewrite_header.sw_if_index, + 1, + vlib_buffer_length_in_chain (vm, b[0])); + } + + if (PREDICT_FALSE(b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + adj_midchain_tx_trace_t *tr = vlib_add_trace (vm, node, + b[0], sizeof (*tr)); + tr->ai = adj_index0; + } + + n_left -= 1; + b += 1; + next += 1; + } + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + return frame->n_vectors; } @@ -271,7 +218,7 @@ adj_midchain_tx (vlib_main_t * vm, return (adj_midchain_tx_inline(vm, node, frame, 1)); } -VLIB_REGISTER_NODE (adj_midchain_tx_node, static) = { +VLIB_REGISTER_NODE (adj_midchain_tx_node) = { .function = adj_midchain_tx, .name = "adj-midchain-tx", .vector_size = sizeof (u32), @@ -292,79 +239,44 @@ adj_midchain_tx_no_count (vlib_main_t * vm, return (adj_midchain_tx_inline(vm, node, frame, 0)); } -VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node, static) = { +VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node) = { .function = adj_midchain_tx_no_count, .name = "adj-midchain-tx-no-count", .vector_size = sizeof (u32), .format_trace = format_adj_midchain_tx_trace, - - .n_next_nodes = 1, - .next_nodes = { - [0] = "error-drop", - }, + .sibling_of = "adj-midchain-tx", }; -VNET_FEATURE_INIT (adj_midchain_tx_ip4, static) = { - .arc_name = "ip4-output", - .node_name = "adj-midchain-tx", - .runs_before = VNET_FEATURES ("interface-output"), - .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP4], -}; -VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip4, static) = { - .arc_name = "ip4-output", - .node_name = "adj-midchain-tx-no-count", - .runs_before = VNET_FEATURES ("interface-output"), - .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP4], -}; -VNET_FEATURE_INIT (adj_midchain_tx_ip6, static) = { - .arc_name = "ip6-output", - .node_name = "adj-midchain-tx", - .runs_before = VNET_FEATURES ("interface-output"), - .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_IP6], -}; -VNET_FEATURE_INIT (adj_midchain_tx_no_count_ip6, static) = { - .arc_name = "ip6-output", - .node_name = "adj-midchain-tx-no-count", - .runs_before = VNET_FEATURES ("interface-output"), - .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_IP6], -}; -VNET_FEATURE_INIT (adj_midchain_tx_mpls, static) = { - .arc_name = "mpls-output", - .node_name = "adj-midchain-tx", - .runs_before = VNET_FEATURES ("interface-output"), - .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_MPLS], -}; -VNET_FEATURE_INIT (adj_midchain_tx_no_count_mpls, static) = { - .arc_name = "mpls-output", - .node_name = "adj-midchain-tx-no-count", - .runs_before = VNET_FEATURES ("interface-output"), - .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_MPLS], -}; -VNET_FEATURE_INIT (adj_midchain_tx_ethernet, static) = { - .arc_name = "ethernet-output", - .node_name = "adj-midchain-tx", - .runs_before = VNET_FEATURES ("error-drop"), - .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_ETHERNET], -}; -VNET_FEATURE_INIT (adj_midchain_tx_no_count_ethernet, static) = { - .arc_name = "ethernet-output", - .node_name = "adj-midchain-tx-no-count", - .runs_before = VNET_FEATURES ("error-drop"), - .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_ETHERNET], -}; -VNET_FEATURE_INIT (adj_midchain_tx_nsh, static) = { - .arc_name = "nsh-output", - .node_name = "adj-midchain-tx", - .runs_before = VNET_FEATURES ("error-drop"), - .feature_index_ptr = &adj_midchain_tx_feature_node[VNET_LINK_NSH], -}; -VNET_FEATURE_INIT (adj_midchain_tx_no_count_nsh, static) = { - .arc_name = "nsh-output", - .node_name = "adj-midchain-tx-no-count", - .runs_before = VNET_FEATURES ("error-drop"), - .feature_index_ptr = &adj_midchain_tx_no_count_feature_node[VNET_LINK_NSH], -}; +#ifndef CLIB_MARCH_VARIANT + +u8 +adj_is_midchain (adj_index_t ai) +{ + ip_adjacency_t *adj; + + adj = adj_get(ai); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_MIDCHAIN: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + return (1); + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_GLEAN: + case IP_LOOKUP_NEXT_BCAST: + case IP_LOOKUP_NEXT_MCAST: + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: + return (0); + } + + return (0); +} static inline u32 adj_get_midchain_node (vnet_link_t link) @@ -436,17 +348,6 @@ adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj) adj_midchain_tx_node.index); } -static u32 -adj_nbr_midchain_get_feature_node (ip_adjacency_t *adj) -{ - if (adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) - { - return (adj_midchain_tx_no_count_feature_node[adj->ia_link]); - } - - return (adj_midchain_tx_feature_node[adj->ia_link]); -} - /** * adj_midchain_setup * @@ -455,20 +356,17 @@ adj_nbr_midchain_get_feature_node (ip_adjacency_t *adj) void adj_midchain_teardown (ip_adjacency_t *adj) { - u32 feature_index; - u8 arc_index; + vlib_main_t *vm = vlib_get_main(); dpo_reset(&adj->sub_type.midchain.next_dpo); - arc_index = adj_midchain_get_feature_arc_index_for_link_type (adj); - feature_index = adj_nbr_midchain_get_feature_node(adj); - - if (0 == --adj_midchain_feat_count_per_sw_if_index[adj->ia_link][adj->rewrite_header.sw_if_index]) - { - vnet_feature_enable_disable_with_index (arc_index, feature_index, - adj->rewrite_header.sw_if_index, - 0, 0, 0); - } + vlib_worker_thread_barrier_sync(vm); + vnet_feature_modify_end_node( + adj_midchain_get_feature_arc_index_for_link_type (adj), + adj->rewrite_header.sw_if_index, + vlib_get_node_by_name (vlib_get_main(), + (u8*) "interface-output")->index); + vlib_worker_thread_barrier_release(vm); } /** @@ -482,9 +380,9 @@ adj_midchain_setup (adj_index_t adj_index, const void *data, adj_flags_t flags) { - u32 feature_index, tx_node; + vlib_main_t *vm = vlib_get_main(); ip_adjacency_t *adj; - u8 arc_index; + u32 tx_node; ASSERT(ADJ_INDEX_INVALID != adj_index); @@ -495,19 +393,23 @@ adj_midchain_setup (adj_index_t adj_index, adj->sub_type.midchain.fei = FIB_NODE_INDEX_INVALID; adj->ia_flags |= flags; - arc_index = adj_midchain_get_feature_arc_index_for_link_type (adj); - feature_index = adj_nbr_midchain_get_feature_node(adj); - tx_node = adj_nbr_midchain_get_tx_node(adj); - - vec_validate (adj_midchain_feat_count_per_sw_if_index[adj->ia_link], - adj->rewrite_header.sw_if_index); - - if (0 == adj_midchain_feat_count_per_sw_if_index[adj->ia_link][adj->rewrite_header.sw_if_index]++) + if (flags & ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR) { - vnet_feature_enable_disable_with_index (arc_index, feature_index, - adj->rewrite_header.sw_if_index, - 1 /* enable */, 0, 0); + adj->rewrite_header.flags |= VNET_REWRITE_FIXUP_IP4_O_4; } + else + { + adj->rewrite_header.flags &= ~VNET_REWRITE_FIXUP_IP4_O_4; + } + + tx_node = adj_nbr_midchain_get_tx_node(adj); + + vlib_worker_thread_barrier_sync(vm); + vnet_feature_modify_end_node( + adj_midchain_get_feature_arc_index_for_link_type (adj), + adj->rewrite_header.sw_if_index, + tx_node); + vlib_worker_thread_barrier_release(vm); /* * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx. @@ -561,6 +463,58 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, rewrite); } +void +adj_nbr_midchain_update_next_node (adj_index_t adj_index, + u32 next_node) +{ + ip_adjacency_t *adj; + vlib_main_t * vm; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + vm = vlib_get_main(); + + vlib_worker_thread_barrier_sync(vm); + + adj->rewrite_header.next_index = vlib_node_add_next(vlib_get_main(), + adj->ia_node_index, + next_node); + + vnet_feature_modify_end_node( + adj_midchain_get_feature_arc_index_for_link_type (adj), + adj->rewrite_header.sw_if_index, + next_node); + + vlib_worker_thread_barrier_release(vm); +} + +void +adj_nbr_midchain_reset_next_node(adj_index_t adj_index) +{ + ip_adjacency_t *adj; + vlib_main_t * vm; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + vm = vlib_get_main(); + + vlib_worker_thread_barrier_sync(vm); + + adj->rewrite_header.next_index = + vlib_node_add_next(vlib_get_main(), + adj->ia_node_index, + adj_nbr_midchain_get_tx_node(adj)); + + vnet_feature_modify_end_node( + adj_midchain_get_feature_arc_index_for_link_type (adj), + adj->rewrite_header.sw_if_index, + adj_nbr_midchain_get_tx_node(adj)); + + vlib_worker_thread_barrier_release(vm); +} + /** * adj_nbr_midchain_unstack * @@ -810,3 +764,5 @@ adj_midchain_module_init (void) { dpo_register(DPO_ADJACENCY_MIDCHAIN, &adj_midchain_dpo_vft, midchain_nodes); } + +#endif diff --git a/src/vnet/adj/adj_midchain.h b/src/vnet/adj/adj_midchain.h index 1f5deaecd95..5fb0ee8efb3 100644 --- a/src/vnet/adj/adj_midchain.h +++ b/src/vnet/adj/adj_midchain.h @@ -50,6 +50,27 @@ extern void adj_nbr_midchain_update_rewrite(adj_index_t adj_index, adj_flags_t flags, u8 *rewrite); +/** + * @brief + * Return the adjacency's next node to its default value + * + * @param adj_index + * The index of the neighbour adjacency. + */ +extern void adj_nbr_midchain_reset_next_node(adj_index_t adj_index); + +/** + * @brief + * Update the VLIB node to which packets are sent post processing + * + * @param adj_index + * The index of the neighbour adjacency. + * + * @param node node-index to send to + */ +extern void adj_nbr_midchain_update_next_node(adj_index_t adj_index, + u32 node_index); + /** * @brief * [re]stack a midchain. 'Stacking' is the act of forming parent-child @@ -139,4 +160,6 @@ extern void adj_midchain_delegate_restack(adj_index_t ai); */ extern void adj_midchain_delegate_unstack(adj_index_t ai); +extern u8 adj_is_midchain (adj_index_t ai); + #endif diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c index 7acdccc72b0..8604bf73dd6 100644 --- a/src/vnet/adj/adj_nbr.c +++ b/src/vnet/adj/adj_nbr.c @@ -168,9 +168,16 @@ adj_nbr_evaluate_feature (adj_index_t ai) { feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index]; if (feature_count > 0) + { + vnet_feature_config_main_t *cm; + adj->rewrite_header.flags |= VNET_REWRITE_HAS_FEATURES; - } + cm = &fm->feature_config_mains[arc_index]; + adj->ia_cfg_index = vec_elt (cm->config_index_by_sw_if_index, + sw_if_index); + } + } return; } @@ -521,6 +528,7 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, walk_adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE; } + adj_delegate_adj_modified(adj); adj_unlock(ai); adj_unlock(walk_ai); } @@ -578,7 +586,6 @@ adj_nbr_walk_cb (BVT(clib_bihash_kv) * kvp, { adj_walk_ctx_t *ctx = arg; - // FIXME: can't stop early... if (ADJ_WALK_RC_STOP == ctx->awc_cb(kvp->value, ctx->awc_ctx)) return (BIHASH_WALK_STOP); return (BIHASH_WALK_CONTINUE); diff --git a/src/vnet/adj/rewrite.c b/src/vnet/adj/rewrite.c index c8508c4b37a..fc2df9af109 100644 --- a/src/vnet/adj/rewrite.c +++ b/src/vnet/adj/rewrite.c @@ -60,7 +60,7 @@ format_vnet_rewrite (u8 * s, va_list * args) s = format (s, "DELETED:%d", rw->sw_if_index); } - s = format (s, " mtu:%d", rw->max_l3_packet_bytes); + s = format (s, " mtu:%d next:%d", rw->max_l3_packet_bytes, rw->next_index); /* Format rewrite string. */ if (rw->data_bytes > 0) diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h index 5c1d24ea890..c23edbe1f60 100644 --- a/src/vnet/adj/rewrite.h +++ b/src/vnet/adj/rewrite.h @@ -55,10 +55,15 @@ typedef enum vnet_rewrite_flags_t_ * This adjacency/interface has output features configured */ VNET_REWRITE_HAS_FEATURES = (1 << 0), + + /** + * this adj performs IP4 over IP4 fixup + */ + VNET_REWRITE_FIXUP_IP4_O_4 = (1 << 1), } __attribute__ ((packed)) vnet_rewrite_flags_t; -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct { +typedef struct vnet_rewrite_header_t_ +{ /* Interface to mark re-written packets with. */ u32 sw_if_index; @@ -83,8 +88,7 @@ typedef CLIB_PACKED (struct { /* Rewrite string starting at end and going backwards. */ u8 data[0]; -}) vnet_rewrite_header_t; -/* *INDENT-ON* */ +} __clib_packed vnet_rewrite_header_t; /** * At 16 bytes of rewrite herader we have enought space left for a IPv6 @@ -114,6 +118,13 @@ STATIC_ASSERT (sizeof (vnet_rewrite_header_t) <= 16, sizeof (vnet_rewrite_header_t)]; \ } +typedef struct __rewrite_unused_t__ +{ + VNET_DECLARE_REWRITE; +} __rewrite_unused_t; + +STATIC_ASSERT_SIZEOF (__rewrite_unused_t, 128); + always_inline void vnet_rewrite_clear_data_internal (vnet_rewrite_header_t * rw, int max_size) { diff --git a/src/vnet/config.c b/src/vnet/config.c index 9beda4a5706..e341c697044 100644 --- a/src/vnet/config.c +++ b/src/vnet/config.c @@ -234,6 +234,62 @@ vnet_get_config_heap (vnet_config_main_t * cm, u32 ci) return heap_elt_at_index (cm->config_string_heap, ci); } +u32 +vnet_config_modify_end_node (vlib_main_t * vm, + vnet_config_main_t * cm, + u32 config_string_heap_index, u32 end_node_index) +{ + vnet_config_feature_t *new_features; + vnet_config_t *old, *new; + + if (end_node_index == ~0) // feature node does not exist + return ~0; + + if (config_string_heap_index == ~0) + { + old = 0; + new_features = 0; + } + else + { + u32 *p = vnet_get_config_heap (cm, config_string_heap_index); + old = pool_elt_at_index (cm->config_pool, p[-1]); + new_features = old->features; + if (new_features) + new_features = duplicate_feature_vector (new_features); + } + + if (vec_len (new_features)) + { + /* is the last feature the cuurent end node */ + u32 last = vec_len (new_features) - 1; + if (new_features[last].node_index == cm->end_node_index) + { + vec_free (new_features->feature_config); + _vec_len (new_features) = last; + } + } + + if (old) + remove_reference (cm, old); + + cm->end_node_index = end_node_index; + + new = find_config_with_features (vm, cm, new_features); + new->reference_count += 1; + + /* + * User gets pointer to config string first element + * (which defines the pool index + * this config string comes from). + */ + vec_validate (cm->config_pool_index_by_user_index, + new->config_string_heap_index + 1); + cm->config_pool_index_by_user_index[new->config_string_heap_index + 1] + = new - cm->config_pool; + return new->config_string_heap_index + 1; +} + u32 vnet_config_add_feature (vlib_main_t * vm, vnet_config_main_t * cm, diff --git a/src/vnet/config.h b/src/vnet/config.h index b77a7794a6e..ab9e4b19886 100644 --- a/src/vnet/config.h +++ b/src/vnet/config.h @@ -161,6 +161,11 @@ u32 vnet_config_del_feature (vlib_main_t * vm, void *feature_config, u32 n_feature_config_bytes); +u32 vnet_config_modify_end_node (vlib_main_t * vm, + vnet_config_main_t * cm, + u32 config_string_heap_index, + u32 end_node_index); + u8 *vnet_config_format_features (vlib_main_t * vm, vnet_config_main_t * cm, u32 config_index, u8 * s); diff --git a/src/vnet/feature/feature.c b/src/vnet/feature/feature.c index 0f0bfeb11d2..4a5127db6ac 100644 --- a/src/vnet/feature/feature.c +++ b/src/vnet/feature/feature.c @@ -321,6 +321,35 @@ vnet_feature_enable_disable (const char *arc_name, const char *node_name, n_feature_config_bytes); } +int +vnet_feature_modify_end_node (u8 arc_index, + u32 sw_if_index, u32 end_node_index) +{ + vnet_feature_main_t *fm = &feature_main; + vnet_feature_config_main_t *cm; + u32 ci; + + if (arc_index == (u8) ~ 0) + return VNET_API_ERROR_INVALID_VALUE; + + if (end_node_index == ~0) + return VNET_API_ERROR_INVALID_VALUE_2; + + cm = &fm->feature_config_mains[arc_index]; + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + ci = vnet_config_modify_end_node (vlib_get_main (), &cm->config_main, + ci, end_node_index); + + if (ci == ~0) + return 0; + + cm->config_index_by_sw_if_index[sw_if_index] = ci; + + return 0; +} + static int feature_cmp (void *a1, void *a2) { diff --git a/src/vnet/feature/feature.h b/src/vnet/feature/feature.h index cd016735aca..4d568a512a8 100644 --- a/src/vnet/feature/feature.h +++ b/src/vnet/feature/feature.h @@ -219,6 +219,9 @@ vnet_feature_enable_disable (const char *arc_name, const char *node_name, void *feature_config, u32 n_feature_config_bytes); +int +vnet_feature_modify_end_node (u8 arc_index, u32 sw_if_index, u32 node_index); + static_always_inline u32 vnet_get_feature_count (u8 arc, u32 sw_if_index) { @@ -278,6 +281,23 @@ vnet_feature_arc_start_with_data (u8 arc, u32 sw_if_index, u32 * next, return 0; } +static_always_inline void * +vnet_feature_arc_start_w_cfg_index (u8 arc, + u32 sw_if_index, + u32 * next, + vlib_buffer_t * b, u32 cfg_index) +{ + vnet_feature_main_t *fm = &feature_main; + vnet_feature_config_main_t *cm; + cm = &fm->feature_config_mains[arc]; + + vnet_buffer (b)->feature_arc_index = arc; + b->current_config_index = cfg_index; + + return vnet_get_config_data (&cm->config_main, &b->current_config_index, + next, 0); +} + static_always_inline void vnet_feature_arc_start (u8 arc, u32 sw_if_index, u32 * next0, vlib_buffer_t * b0) diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index acff66d994b..ea78d550789 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -54,6 +54,7 @@ #include #include #include /* for mFIB table and entry creation */ +#include #include #include @@ -2222,8 +2223,11 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index0, &next_index, b[0]); + vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index, + tx_sw_if_index0, + &next_index, b[0], + adj0->ia_cfg_index); + next[0] = next_index; if (is_midchain) vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , @@ -2246,8 +2250,10 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, if (PREDICT_FALSE (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index1, &next_index, b[1]); + vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index, + tx_sw_if_index1, + &next_index, b[1], + adj1->ia_cfg_index); next[1] = next_index; if (is_midchain) vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , @@ -2261,9 +2267,14 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, ip4_ttl_inc (b[1], ip1); } - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_two_headers (adj0[0], adj1[0], - ip0, ip1, sizeof (ethernet_header_t)); + if (is_midchain) + /* Guess we are only writing on ipv4 header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ip4_header_t)); + else + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ethernet_header_t)); if (do_counters) { @@ -2284,12 +2295,10 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, if (is_midchain) { - if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func) - adj0->sub_type.midchain.fixup_func - (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data); - if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func) - adj1->sub_type.midchain.fixup_func - (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data); + if (error0 == IP4_ERROR_NONE) + adj_midchain_fixup (vm, adj0, b[0]); + if (error1 == IP4_ERROR_NONE) + adj_midchain_fixup (vm, adj1, b[1]); } if (is_mcast) @@ -2391,17 +2400,25 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index0, &next_index, b[0]); + vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index, + tx_sw_if_index0, + &next_index, b[0], + adj0->ia_cfg_index); next[0] = next_index; if (is_midchain) - vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , - 0 /* is_ip6 */ , - 0 /* with gso */ ); + { + vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , + 0 /* is_ip6 */ , + 0 /* with gso */ ); - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + /* Guess we are only writing on ipv4 header. */ + vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t)); + } + else + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, + sizeof (ethernet_header_t)); /* * Bump the per-adjacency counters @@ -2413,9 +2430,8 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0); - if (is_midchain && adj0->sub_type.midchain.fixup_func) - adj0->sub_type.midchain.fixup_func - (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data); + if (is_midchain) + adj_midchain_fixup (vm, adj0, b[0]); if (is_mcast) /* copy bytes from the IP address into the MAC rewrite */ @@ -2491,18 +2507,26 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index0, &next_index, b[0]); + vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index, + tx_sw_if_index0, + &next_index, b[0], + adj0->ia_cfg_index); next[0] = next_index; if (is_midchain) - /* this acts on the packet that is about to be encapped */ - vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , - 0 /* is_ip6 */ , - 0 /* with gso */ ); + { + /* this acts on the packet that is about to be encapped */ + vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , + 0 /* is_ip6 */ , + 0 /* with gso */ ); - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + /* Guess we are only writing on ipv4 header. */ + vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t)); + } + else + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, + sizeof (ethernet_header_t)); if (do_counters) vlib_increment_combined_counter diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 0a455351ad2..1d6c1b7f105 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1893,8 +1893,9 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index0, &next0, p0); + vnet_feature_arc_start_w_cfg_index + (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0, + adj0->ia_cfg_index); } else { @@ -1911,8 +1912,9 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, if (PREDICT_FALSE (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index1, &next1, p1); + vnet_feature_arc_start_w_cfg_index + (lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1, + adj1->ia_cfg_index); } else { @@ -1929,11 +1931,15 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ , 1 /* is_ip6 */ , 0 /* with gso */ ); - } - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_two_headers (adj0[0], adj1[0], - ip0, ip1, sizeof (ethernet_header_t)); + /* Guess we are only writing on ipv6 header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ip6_header_t)); + } + else + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ethernet_header_t)); if (is_midchain) { @@ -2022,10 +2028,14 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ , 1 /* is_ip6 */ , 0 /* with gso */ ); - } - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + /* Guess we are only writing on ip6 header. */ + vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t)); + } + else + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, + sizeof (ethernet_header_t)); /* Update packet buffer attributes/set output interface. */ rw_len0 = adj0[0].rewrite_header.data_bytes; @@ -2065,8 +2075,9 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm, if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) - vnet_feature_arc_start (lm->output_feature_arc_index, - tx_sw_if_index0, &next0, p0); + vnet_feature_arc_start_w_cfg_index + (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0, + adj0->ia_cfg_index); } else { diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c index 047a2df811e..089e2b659c7 100644 --- a/src/vnet/ipip/ipip.c +++ b/src/vnet/ipip/ipip.c @@ -84,7 +84,6 @@ ipip_build_rewrite (vnet_main_t * vnm, u32 sw_if_index, ip4_header_set_dscp (ip4, t->dscp); if (t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_SET_DF) ip4_header_set_df (ip4); - ip4->checksum = ip4_header_checksum (ip4); switch (link_type) { @@ -97,6 +96,7 @@ ipip_build_rewrite (vnet_main_t * vnm, u32 sw_if_index, default: break; } + ip4->checksum = ip4_header_checksum (ip4); break; case IPIP_TRANSPORT_IP6: @@ -259,7 +259,7 @@ ipip_tunnel_restack (ipip_tunnel_t * gt) } static adj_midchain_fixup_t -ipip_get_fixup (const ipip_tunnel_t * t, vnet_link_t lt) +ipip_get_fixup (const ipip_tunnel_t * t, vnet_link_t lt, adj_flags_t * aflags) { if (t->transport == IPIP_TRANSPORT_IP6 && lt == VNET_LINK_IP6) return (ipip66_fixup); @@ -268,7 +268,10 @@ ipip_get_fixup (const ipip_tunnel_t * t, vnet_link_t lt) if (t->transport == IPIP_TRANSPORT_IP4 && lt == VNET_LINK_IP6) return (ipip64_fixup); if (t->transport == IPIP_TRANSPORT_IP4 && lt == VNET_LINK_IP4) - return (ipip44_fixup); + { + *aflags = *aflags | ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR; + return (ipip44_fixup); + } ASSERT (0); return (ipip44_fixup); @@ -277,6 +280,7 @@ ipip_get_fixup (const ipip_tunnel_t * t, vnet_link_t lt) void ipip_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai) { + adj_midchain_fixup_t fixup; ipip_tunnel_t *t; adj_flags_t af; @@ -288,8 +292,9 @@ ipip_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai) if (VNET_LINK_ETHERNET == adj_get_link_type (ai)) af |= ADJ_FLAG_MIDCHAIN_NO_COUNT; + fixup = ipip_get_fixup (t, adj_get_link_type (ai), &af); adj_nbr_midchain_update_rewrite - (ai, ipip_get_fixup (t, adj_get_link_type (ai)), + (ai, fixup, uword_to_pointer (t->flags, void *), af, ipip_build_rewrite (vnm, sw_if_index, adj_get_link_type (ai), &t->tunnel_dst)); @@ -305,10 +310,15 @@ typedef struct mipip_walk_ctx_t_ static adj_walk_rc_t mipip_mk_complete_walk (adj_index_t ai, void *data) { + adj_midchain_fixup_t fixup; mipip_walk_ctx_t *ctx = data; + adj_flags_t af; + + af = ADJ_FLAG_NONE; + fixup = ipip_get_fixup (ctx->t, adj_get_link_type (ai), &af); adj_nbr_midchain_update_rewrite - (ai, ipip_get_fixup (ctx->t, adj_get_link_type (ai)), + (ai, fixup, uword_to_pointer (ctx->t->flags, void *), ADJ_FLAG_MIDCHAIN_IP_STACK, ipip_build_rewrite (vnet_get_main (), ctx->t->sw_if_index, @@ -324,11 +334,14 @@ mipip_mk_complete_walk (adj_index_t ai, void *data) static adj_walk_rc_t mipip_mk_incomplete_walk (adj_index_t ai, void *data) { + adj_midchain_fixup_t fixup; ipip_tunnel_t *t = data; + adj_flags_t af; - adj_nbr_midchain_update_rewrite - (ai, ipip_get_fixup (t, adj_get_link_type (ai)), - NULL, ADJ_FLAG_NONE, NULL); + af = ADJ_FLAG_NONE; + fixup = ipip_get_fixup (t, adj_get_link_type (ai), &af); + + adj_nbr_midchain_update_rewrite (ai, fixup, NULL, ADJ_FLAG_NONE, NULL); adj_midchain_delegate_unstack (ai); @@ -339,11 +352,14 @@ void mipip_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai) { ipip_main_t *gm = &ipip_main; + adj_midchain_fixup_t fixup; ip_adjacency_t *adj; teib_entry_t *ne; ipip_tunnel_t *t; + adj_flags_t af; u32 ti; + af = ADJ_FLAG_NONE; adj = adj_get (ai); ti = gm->tunnel_index_by_sw_if_index[sw_if_index]; t = pool_elt_at_index (gm->tunnels, ti); @@ -352,10 +368,10 @@ mipip_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai) if (NULL == ne) { - // no NHRP entry to provide the next-hop + // no TEIB entry to provide the next-hop + fixup = ipip_get_fixup (t, adj_get_link_type (ai), &af); adj_nbr_midchain_update_rewrite - (ai, ipip_get_fixup (t, adj_get_link_type (ai)), - uword_to_pointer (t->flags, void *), ADJ_FLAG_NONE, NULL); + (ai, fixup, uword_to_pointer (t->flags, void *), ADJ_FLAG_NONE, NULL); return; } diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c index dce887f6fa8..15f590acbdb 100644 --- a/src/vnet/ipsec/esp_encrypt.c +++ b/src/vnet/ipsec/esp_encrypt.c @@ -608,7 +608,7 @@ esp_encrypt_inline (vlib_main_t * vm, vlib_node_runtime_t * node, esp_header_t *esp; u8 *payload, *next_hdr_ptr; u16 payload_len, payload_len_total, n_bufs; - u32 hdr_len, config_index; + u32 hdr_len; if (n_left > 2) { @@ -623,8 +623,6 @@ esp_encrypt_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (is_tun) { /* we are on a ipsec tunnel's feature arc */ - config_index = b[0]->current_config_index; - vnet_feature_next_u16 (&next[0], b[0]); vnet_buffer (b[0])->ipsec.sad_index = sa_index0 = ipsec_tun_protect_get_sa_out (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]); @@ -675,10 +673,6 @@ esp_encrypt_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (thread_index != sa0->encrypt_thread_index)) { next[0] = ESP_ENCRYPT_NEXT_HANDOFF; - if (is_tun) - { - b[0]->current_config_index = config_index; - } goto trace; } @@ -778,6 +772,8 @@ esp_encrypt_inline (vlib_main_t * vm, vlib_node_runtime_t * node, next[0] = dpo->dpoi_next_node; vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo->dpoi_index; } + else + next[0] = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; } else /* transport mode */ { @@ -872,8 +868,7 @@ esp_encrypt_inline (vlib_main_t * vm, vlib_node_runtime_t * node, esp_fill_udp_hdr (sa0, udp, udp_len); } - if (!is_tun) - next[0] = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; + next[0] = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT; } esp->spi = spi; @@ -1154,7 +1149,7 @@ VLIB_REGISTER_NODE (esp4_encrypt_tun_node) = { .next_nodes = { [ESP_ENCRYPT_NEXT_DROP] = "ip4-drop", [ESP_ENCRYPT_NEXT_HANDOFF] = "esp4-encrypt-tun-handoff", - [ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "error-drop", + [ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx", [ESP_ENCRYPT_NEXT_PENDING] = "esp-encrypt-pending", }, }; @@ -1177,27 +1172,6 @@ VLIB_REGISTER_NODE (esp4_encrypt_tun_post_node) = { .n_errors = ARRAY_LEN(esp_encrypt_error_strings), .error_strings = esp_encrypt_error_strings, }; - -VNET_FEATURE_INIT (esp4_encrypt_tun_feat_node, static) = -{ - .arc_name = "ip4-output", - .node_name = "esp4-encrypt-tun", - .runs_before = VNET_FEATURES ("adj-midchain-tx"), -}; - -VNET_FEATURE_INIT (esp6o4_encrypt_tun_feat_node, static) = -{ - .arc_name = "ip6-output", - .node_name = "esp4-encrypt-tun", - .runs_before = VNET_FEATURES ("adj-midchain-tx"), -}; - -VNET_FEATURE_INIT (esp4_ethernet_encrypt_tun_feat_node, static) = -{ - .arc_name = "ethernet-output", - .node_name = "esp4-encrypt-tun", - .runs_before = VNET_FEATURES ("adj-midchain-tx", "adj-midchain-tx-no-count"), -}; /* *INDENT-ON* */ VLIB_NODE_FN (esp6_encrypt_tun_node) (vlib_main_t * vm, @@ -1222,25 +1196,11 @@ VLIB_REGISTER_NODE (esp6_encrypt_tun_node) = { .next_nodes = { [ESP_ENCRYPT_NEXT_DROP] = "ip6-drop", [ESP_ENCRYPT_NEXT_HANDOFF] = "esp6-encrypt-tun-handoff", - [ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "error-drop", [ESP_ENCRYPT_NEXT_PENDING] = "esp-encrypt-pending", + [ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx", }, }; -VNET_FEATURE_INIT (esp6_encrypt_tun_feat_node, static) = -{ - .arc_name = "ip6-output", - .node_name = "esp6-encrypt-tun", - .runs_before = VNET_FEATURES ("adj-midchain-tx"), -}; - -VNET_FEATURE_INIT (esp4o6_encrypt_tun_feat_node, static) = -{ - .arc_name = "ip4-output", - .node_name = "esp6-encrypt-tun", - .runs_before = VNET_FEATURES ("adj-midchain-tx"), -}; - /* *INDENT-ON* */ VLIB_NODE_FN (esp6_encrypt_tun_post_node) (vlib_main_t * vm, @@ -1300,7 +1260,6 @@ esp_no_crypto_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - u16 nexts[VLIB_FRAME_SIZE], *next = nexts; u32 *from = vlib_frame_vector_args (frame); u32 n_left = frame->n_vectors; @@ -1308,14 +1267,11 @@ esp_no_crypto_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (n_left > 0) { - u32 next0; u32 sa_index0; /* packets are always going to be dropped, but get the sa_index */ - sa_index0 = *(u32 *) vnet_feature_next_with_data (&next0, b[0], - sizeof (sa_index0)); - - next[0] = ESP_NO_CRYPTO_NEXT_DROP; + sa_index0 = ipsec_tun_protect_get_sa_out + (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]); if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) { @@ -1325,14 +1281,15 @@ esp_no_crypto_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } n_left -= 1; - next += 1; b += 1; } vlib_node_increment_counter (vm, node->node_index, ESP_NO_CRYPTO_ERROR_RX_PKTS, frame->n_vectors); - vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + vlib_buffer_enqueue_to_single_next (vm, node, from, + ESP_NO_CRYPTO_NEXT_DROP, + frame->n_vectors); return frame->n_vectors; } @@ -1358,13 +1315,6 @@ VLIB_REGISTER_NODE (esp4_no_crypto_tun_node) = }, }; -VNET_FEATURE_INIT (esp4_no_crypto_tun_feat_node, static) = -{ - .arc_name = "ip4-output", - .node_name = "esp4-no-crypto", - .runs_before = VNET_FEATURES ("adj-midchain-tx"), -}; - VLIB_NODE_FN (esp6_no_crypto_tun_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) @@ -1385,13 +1335,6 @@ VLIB_REGISTER_NODE (esp6_no_crypto_tun_node) = [ESP_NO_CRYPTO_NEXT_DROP] = "ip6-drop", }, }; - -VNET_FEATURE_INIT (esp6_no_crypto_tun_feat_node, static) = -{ - .arc_name = "ip6-output", - .node_name = "esp6-no-crypto", - .runs_before = VNET_FEATURES ("adj-midchain-tx"), -}; /* *INDENT-ON* */ VLIB_NODE_FN (esp_encrypt_pending_node) (vlib_main_t * vm, diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c index 6b31926be4f..95e322e87a6 100644 --- a/src/vnet/ipsec/ipsec.c +++ b/src/vnet/ipsec/ipsec.c @@ -124,17 +124,6 @@ ipsec_add_node (vlib_main_t * vm, const char *node_name, *out_next_index = vlib_node_add_next (vm, prev_node->index, node->index); } -void -ipsec_add_feature (const char *arc_name, - const char *node_name, u32 * out_feature_index) -{ - u8 arc; - - arc = vnet_get_feature_arc_index (arc_name); - ASSERT (arc != (u8) ~ 0); - *out_feature_index = vnet_get_feature_index (arc, node_name); -} - u32 ipsec_register_ah_backend (vlib_main_t * vm, ipsec_main_t * im, const char *name, @@ -198,14 +187,10 @@ ipsec_register_esp_backend (vlib_main_t * vm, ipsec_main_t * im, &b->esp6_decrypt_tun_node_index, &b->esp6_decrypt_tun_next_index); - ipsec_add_feature ("ip4-output", esp4_encrypt_node_tun_name, - &b->esp44_encrypt_tun_feature_index); - ipsec_add_feature ("ip4-output", esp6_encrypt_node_tun_name, - &b->esp46_encrypt_tun_feature_index); - ipsec_add_feature ("ip6-output", esp6_encrypt_node_tun_name, - &b->esp66_encrypt_tun_feature_index); - ipsec_add_feature ("ip6-output", esp4_encrypt_node_tun_name, - &b->esp64_encrypt_tun_feature_index); + b->esp6_encrypt_tun_node_index = + vlib_get_node_by_name (vm, (u8 *) esp6_encrypt_node_tun_name)->index; + b->esp4_encrypt_tun_node_index = + vlib_get_node_by_name (vm, (u8 *) esp4_encrypt_node_tun_name)->index; b->check_support_cb = esp_check_support_cb; b->add_del_sa_sess_cb = esp_add_del_sa_sess_cb; @@ -284,11 +269,8 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx) im->esp4_decrypt_tun_next_index = b->esp4_decrypt_tun_next_index; im->esp6_decrypt_tun_node_index = b->esp6_decrypt_tun_node_index; im->esp6_decrypt_tun_next_index = b->esp6_decrypt_tun_next_index; - - im->esp44_encrypt_tun_feature_index = b->esp44_encrypt_tun_feature_index; - im->esp64_encrypt_tun_feature_index = b->esp64_encrypt_tun_feature_index; - im->esp46_encrypt_tun_feature_index = b->esp46_encrypt_tun_feature_index; - im->esp66_encrypt_tun_feature_index = b->esp66_encrypt_tun_feature_index; + im->esp4_encrypt_tun_node_index = b->esp4_encrypt_tun_node_index; + im->esp6_encrypt_tun_node_index = b->esp6_encrypt_tun_node_index; if (b->enable_disable_cb) { diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h index 712e16dab4c..7646ffb2ddd 100644 --- a/src/vnet/ipsec/ipsec.h +++ b/src/vnet/ipsec/ipsec.h @@ -66,12 +66,10 @@ typedef struct u32 esp6_decrypt_next_index; u32 esp4_decrypt_tun_node_index; u32 esp4_decrypt_tun_next_index; + u32 esp4_encrypt_tun_node_index; u32 esp6_decrypt_tun_node_index; u32 esp6_decrypt_tun_next_index; - u32 esp44_encrypt_tun_feature_index; - u32 esp46_encrypt_tun_feature_index; - u32 esp66_encrypt_tun_feature_index; - u32 esp64_encrypt_tun_feature_index; + u32 esp6_encrypt_tun_node_index; } ipsec_esp_backend_t; typedef struct @@ -132,11 +130,13 @@ typedef struct u32 esp4_encrypt_node_index; u32 esp4_decrypt_node_index; u32 esp4_decrypt_tun_node_index; + u32 esp4_encrypt_tun_node_index; u32 ah4_encrypt_node_index; u32 ah4_decrypt_node_index; u32 esp6_encrypt_node_index; u32 esp6_decrypt_node_index; u32 esp6_decrypt_tun_node_index; + u32 esp6_encrypt_tun_node_index; u32 ah6_encrypt_node_index; u32 ah6_decrypt_node_index; /* next node indices */ @@ -151,15 +151,13 @@ typedef struct u32 ah6_encrypt_next_index; u32 ah6_decrypt_next_index; - /* tun encrypt arcs and feature nodes */ - u32 esp44_encrypt_tun_feature_index; - u32 esp64_encrypt_tun_feature_index; - u32 esp46_encrypt_tun_feature_index; - u32 esp66_encrypt_tun_feature_index; - /* tun nodes to drop packets when no crypto alg set on outbound SA */ - u32 esp4_no_crypto_tun_feature_index; - u32 esp6_no_crypto_tun_feature_index; + u32 esp4_no_crypto_tun_node_index; + u32 esp6_no_crypto_tun_node_index; + + /* tun nodes for encrypt on L2 interfaces */ + u32 esp4_encrypt_l2_tun_node_index; + u32 esp6_encrypt_l2_tun_node_index; /* pool of ah backends */ ipsec_ah_backend_t *ah_backends; @@ -278,6 +276,7 @@ int ipsec_select_ah_backend (ipsec_main_t * im, u32 ah_backend_idx); int ipsec_select_esp_backend (ipsec_main_t * im, u32 esp_backend_idx); clib_error_t *ipsec_rsc_in_use (ipsec_main_t * im); +void ipsec_set_async_mode (u32 is_enabled); always_inline ipsec_sa_t * ipsec_sa_get (u32 sa_index) @@ -285,11 +284,6 @@ ipsec_sa_get (u32 sa_index) return (pool_elt_at_index (ipsec_main.sad, sa_index)); } -void ipsec_add_feature (const char *arc_name, const char *node_name, - u32 * out_feature_index); - -void ipsec_set_async_mode (u32 is_enabled); - #endif /* __IPSEC_H__ */ /* diff --git a/src/vnet/ipsec/ipsec_tun.c b/src/vnet/ipsec/ipsec_tun.c index 23598b7b085..a93e66a8775 100644 --- a/src/vnet/ipsec/ipsec_tun.c +++ b/src/vnet/ipsec/ipsec_tun.c @@ -19,6 +19,7 @@ #include #include #include +#include #include /** @@ -118,15 +119,6 @@ ipsec_tun_unregister_nodes (ip_address_family_t af) } } -static void -ipsec_tun_protect_add_adj (adj_index_t ai, index_t sai) -{ - vec_validate_init_empty (ipsec_tun_protect_sa_by_adj_index, ai, - INDEX_INVALID); - - ipsec_tun_protect_sa_by_adj_index[ai] = sai; -} - static inline const ipsec_tun_protect_t * ipsec_tun_protect_from_const_base (const adj_delegate_t * ad) { @@ -135,60 +127,51 @@ ipsec_tun_protect_from_const_base (const adj_delegate_t * ad) return (pool_elt_at_index (ipsec_tun_protect_pool, ad->ad_index)); } +static u32 +ipsec_tun_protect_get_adj_next (const ipsec_tun_protect_t * itp) +{ + ipsec_main_t *im; + ipsec_sa_t *sa; + bool is_ip4; + u32 next; + + is_ip4 = ip46_address_is_ip4 (&itp->itp_tun.src); + sa = ipsec_sa_get (itp->itp_out_sa); + im = &ipsec_main; + + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE && + sa->integ_alg == IPSEC_INTEG_ALG_NONE) + next = (is_ip4 ? + im->esp4_no_crypto_tun_node_index : + im->esp6_no_crypto_tun_node_index); + else if (itp->itp_flags & IPSEC_PROTECT_L2) + next = (is_ip4 ? + im->esp4_encrypt_l2_tun_node_index : + im->esp6_encrypt_l2_tun_node_index); + else + next = (is_ip4 ? + im->esp4_encrypt_tun_node_index : + im->esp6_encrypt_tun_node_index); + + return (next); +} + static void -ipsec_tun_protect_feature_set (ipsec_tun_protect_t * itp, u8 enable) +ipsec_tun_protect_add_adj (adj_index_t ai, const ipsec_tun_protect_t * itp) { - ITP_DBG2 ("%s on %U", (enable ? "enable" : "disable"), - format_vnet_sw_if_index_name, vnet_get_main (), - itp->itp_sw_if_index); + vec_validate_init_empty (ipsec_tun_protect_sa_by_adj_index, ai, + INDEX_INVALID); - if (itp->itp_flags & IPSEC_PROTECT_L2) + if (NULL == itp) { - /* l2-GRE only supported by the vnet ipsec code */ - vnet_feature_enable_disable ("ethernet-output", - (ip46_address_is_ip4 (&itp->itp_tun.src) ? - "esp4-encrypt-tun" : - "esp6-encrypt-tun"), - itp->itp_sw_if_index, enable, NULL, 0); + ipsec_tun_protect_sa_by_adj_index[ai] = INDEX_INVALID; + adj_nbr_midchain_reset_next_node (ai); } else { - u32 fi4, fi6, sai; - ipsec_main_t *im; - ipsec_sa_t *sa; - - im = &ipsec_main; - sai = itp->itp_out_sa; - sa = ipsec_sa_get (sai); - - if (sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE && - sa->integ_alg == IPSEC_INTEG_ALG_NONE) - { - fi4 = im->esp4_no_crypto_tun_feature_index; - fi6 = im->esp6_no_crypto_tun_feature_index; - } - else - { - if (ip46_address_is_ip4 (&itp->itp_tun.src)) - { - /* tunnel destination is v4 so we need the Xo4 indexes */ - fi4 = im->esp44_encrypt_tun_feature_index; - fi6 = im->esp64_encrypt_tun_feature_index; - } - else - { - /* tunnel destination is v6 so we need the Xo6 indexes */ - fi4 = im->esp46_encrypt_tun_feature_index; - fi6 = im->esp66_encrypt_tun_feature_index; - } - } - - vnet_feature_enable_disable_with_index - (vnet_get_feature_arc_index ("ip4-output"), - fi4, itp->itp_sw_if_index, enable, NULL, 0); - vnet_feature_enable_disable_with_index - (vnet_get_feature_arc_index ("ip6-output"), - fi6, itp->itp_sw_if_index, enable, NULL, 0); + ipsec_tun_protect_sa_by_adj_index[ai] = itp->itp_out_sa; + adj_nbr_midchain_update_next_node + (ai, ipsec_tun_protect_get_adj_next (itp)); } } @@ -266,7 +249,7 @@ ipsec_tun_protect_adj_add (adj_index_t ai, void *arg) ipsec_tun_protect_t *itp = arg; adj_delegate_add (adj_get (ai), ipsec_tun_adj_delegate_type, itp - ipsec_tun_protect_pool); - ipsec_tun_protect_add_adj (ai, itp->itp_out_sa); + ipsec_tun_protect_add_adj (ai, itp); return (ADJ_WALK_RC_CONTINUE); } @@ -291,7 +274,7 @@ ipsec_tun_protect_tx_db_add (ipsec_tun_protect_t * itp) { if (INDEX_INVALID == idi->id_itp) { - ipsec_tun_protect_feature_set (itp, 1); + // ipsec_tun_protect_feature_set (itp, 1); } idi->id_itp = itp - ipsec_tun_protect_pool; @@ -309,7 +292,7 @@ ipsec_tun_protect_tx_db_add (ipsec_tun_protect_t * itp) * enable the encrypt feature for egress if this is the first addition * on this interface */ - ipsec_tun_protect_feature_set (itp, 1); + // ipsec_tun_protect_feature_set (itp, 1); } hash_set_mem (idi->id_hash, itp->itp_key, itp - ipsec_tun_protect_pool); @@ -337,31 +320,31 @@ ipsec_tun_protect_rx_db_remove (ipsec_main_t * im, /* *INDENT-OFF* */ FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa, ({ - if (ip46_address_is_ip4 (&itp->itp_crypto.dst)) - { - ipsec4_tunnel_key_t key = { - .remote_ip = itp->itp_crypto.dst.ip4, - .spi = clib_host_to_net_u32 (sa->spi), - }; - if (hash_get(im->tun4_protect_by_key, key.as_u64)) - { - hash_unset (im->tun4_protect_by_key, key.as_u64); - ipsec_tun_unregister_nodes(AF_IP4); - } - } - else - { - ipsec6_tunnel_key_t key = { - .remote_ip = itp->itp_crypto.dst.ip6, - .spi = clib_host_to_net_u32 (sa->spi), - }; - if (hash_get_mem(im->tun6_protect_by_key, &key)) - { - hash_unset_mem_free (&im->tun6_protect_by_key, &key); - ipsec_tun_unregister_nodes(AF_IP6); - } - } - })) + if (ip46_address_is_ip4 (&itp->itp_crypto.dst)) + { + ipsec4_tunnel_key_t key = { + .remote_ip = itp->itp_crypto.dst.ip4, + .spi = clib_host_to_net_u32 (sa->spi), + }; + if (hash_get(im->tun4_protect_by_key, key.as_u64)) + { + hash_unset (im->tun4_protect_by_key, key.as_u64); + ipsec_tun_unregister_nodes(AF_IP4); + } + } + else + { + ipsec6_tunnel_key_t key = { + .remote_ip = itp->itp_crypto.dst.ip6, + .spi = clib_host_to_net_u32 (sa->spi), + }; + if (hash_get_mem(im->tun6_protect_by_key, &key)) + { + hash_unset_mem_free (&im->tun6_protect_by_key, &key); + ipsec_tun_unregister_nodes(AF_IP6); + } + } + })); /* *INDENT-ON* */ } @@ -369,7 +352,7 @@ static adj_walk_rc_t ipsec_tun_protect_adj_remove (adj_index_t ai, void *arg) { adj_delegate_remove (ai, ipsec_tun_adj_delegate_type); - ipsec_tun_protect_add_adj (ai, INDEX_INVALID); + ipsec_tun_protect_add_adj (ai, NULL); return (ADJ_WALK_RC_CONTINUE); } @@ -386,7 +369,7 @@ ipsec_tun_protect_tx_db_remove (ipsec_tun_protect_t * itp) if (vnet_sw_interface_is_p2p (vnet_get_main (), itp->itp_sw_if_index)) { - ipsec_tun_protect_feature_set (itp, 0); + // ipsec_tun_protect_feature_set (itp, 0); idi->id_itp = INDEX_INVALID; FOR_EACH_FIB_IP_PROTOCOL (nh_proto) @@ -402,7 +385,7 @@ ipsec_tun_protect_tx_db_remove (ipsec_tun_protect_t * itp) if (0 == hash_elts (idi->id_hash)) { - ipsec_tun_protect_feature_set (itp, 0); + // ipsec_tun_protect_feature_set (itp, 0); hash_free (idi->id_hash); idi->id_hash = NULL; } @@ -817,10 +800,17 @@ static void ipsec_tun_protect_adj_delegate_adj_deleted (adj_delegate_t * ad) { /* remove our delegate */ - ipsec_tun_protect_add_adj (ad->ad_adj_index, INDEX_INVALID); + ipsec_tun_protect_add_adj (ad->ad_adj_index, NULL); adj_delegate_remove (ad->ad_adj_index, ipsec_tun_adj_delegate_type); } +static void +ipsec_tun_protect_adj_delegate_adj_modified (adj_delegate_t * ad) +{ + ipsec_tun_protect_add_adj (ad->ad_adj_index, + ipsec_tun_protect_get (ad->ad_index)); +} + static void ipsec_tun_protect_adj_delegate_adj_created (adj_index_t ai) { @@ -829,11 +819,11 @@ ipsec_tun_protect_adj_delegate_adj_created (adj_index_t ai) ip_adjacency_t *adj; index_t itpi; - adj = adj_get (ai); - - if (adj->lookup_next_index != IP_LOOKUP_NEXT_MIDCHAIN) + if (!adj_is_midchain (ai)) return; + adj = adj_get (ai); + ip_address_from_46 (&adj->sub_type.midchain.next_hop, adj->ia_nh_proto, &ip); @@ -845,7 +835,7 @@ ipsec_tun_protect_adj_delegate_adj_created (adj_index_t ai) itp = ipsec_tun_protect_get (itpi); adj_delegate_add (adj_get (ai), ipsec_tun_adj_delegate_type, itpi); - ipsec_tun_protect_add_adj (ai, itp->itp_out_sa); + ipsec_tun_protect_add_adj (ai, itp); } } @@ -919,6 +909,7 @@ ipsec_tun_teib_entry_deleted (const teib_entry_t * ne) const static adj_delegate_vft_t ipsec_tun_adj_delegate_vft = { .adv_adj_deleted = ipsec_tun_protect_adj_delegate_adj_deleted, .adv_adj_created = ipsec_tun_protect_adj_delegate_adj_created, + .adv_adj_modified = ipsec_tun_protect_adj_delegate_adj_modified, .adv_format = ipsec_tun_protect_adj_delegate_format, }; @@ -927,6 +918,7 @@ const static teib_vft_t ipsec_tun_teib_vft = { .nv_deleted = ipsec_tun_teib_entry_deleted, }; + clib_error_t * ipsec_tunnel_protect_init (vlib_main_t * vm) { @@ -939,10 +931,14 @@ ipsec_tunnel_protect_init (vlib_main_t * vm) im->tun4_protect_by_key = hash_create (0, sizeof (u64)); /* set up feature nodes to drop outbound packets with no crypto alg set */ - ipsec_add_feature ("ip4-output", "esp4-no-crypto", - &im->esp4_no_crypto_tun_feature_index); - ipsec_add_feature ("ip6-output", "esp6-no-crypto", - &im->esp6_no_crypto_tun_feature_index); + im->esp4_no_crypto_tun_node_index = + vlib_get_node_by_name (vm, (u8 *) "esp4-no-crypto")->index; + im->esp6_no_crypto_tun_node_index = + vlib_get_node_by_name (vm, (u8 *) "esp6-no-crypto")->index; + im->esp6_encrypt_l2_tun_node_index = + vlib_get_node_by_name (vm, (u8 *) "esp6-encrypt-tun")->index; + im->esp4_encrypt_l2_tun_node_index = + vlib_get_node_by_name (vm, (u8 *) "esp4-encrypt-tun")->index; ipsec_tun_adj_delegate_type = adj_delegate_register_new_type (&ipsec_tun_adj_delegate_vft); diff --git a/src/vnet/tunnel/tunnel_dp.h b/src/vnet/tunnel/tunnel_dp.h index 3e3a81307f1..a00a3b3e222 100644 --- a/src/vnet/tunnel/tunnel_dp.h +++ b/src/vnet/tunnel/tunnel_dp.h @@ -18,6 +18,8 @@ #ifndef __TUNNEL_DP_H__ #define __TUNNEL_DP_H__ +#include + static_always_inline void tunnel_encap_fixup_4o4 (tunnel_encap_decap_flags_t flags, const ip4_header_t * inner, ip4_header_t * outer) -- cgit 1.2.3-korg