From 8c1fd9479513cd466f643f9f0fb132e5da91651c Mon Sep 17 00:00:00 2001 From: Hongjun Ni Date: Tue, 23 May 2017 22:33:08 +0800 Subject: Add Eth as NSH transport. NSHSFC-31 Change-Id: Ie0bac56af68ba0b1b45baf9d9a5822fbc1b2c0ac Signed-off-by: Hongjun Ni --- nsh-plugin/Makefile.am | 1 + nsh-plugin/nsh/nsh.c | 12 +- nsh-plugin/nsh/nsh.h | 9 +- nsh-plugin/nsh/nsh_output.c | 495 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 514 insertions(+), 3 deletions(-) create mode 100644 nsh-plugin/nsh/nsh_output.c diff --git a/nsh-plugin/Makefile.am b/nsh-plugin/Makefile.am index 64be6b6..3f73fda 100644 --- a/nsh-plugin/Makefile.am +++ b/nsh-plugin/Makefile.am @@ -23,6 +23,7 @@ CPPFLAGS += -DDEBUG -g lib_LTLIBRARIES = nsh_plugin.la nsh_test_plugin.la nsh_plugin_la_SOURCES = nsh/nsh.c \ nsh/nsh_pop.c \ + nsh/nsh_output.c \ vpp-api/nsh.api.h \ nsh-md2-ioam/nsh_md2_ioam.c \ nsh-md2-ioam/nsh_md2_ioam_trace.c \ diff --git a/nsh-plugin/nsh/nsh.c b/nsh-plugin/nsh/nsh.c index 2f500b2..5d4ad41 100644 --- a/nsh-plugin/nsh/nsh.c +++ b/nsh-plugin/nsh/nsh.c @@ -377,6 +377,11 @@ u8 * format_nsh_map (u8 * s, va_list * args) s = format (s, "encapped by LISP GPE intf: %d", map->sw_if_index); break; } + case NSH_NODE_NEXT_ENCAP_ETHERNET: + { + s = format (s, "encapped by Ethernet intf: %d", map->sw_if_index); + break; + } default: s = format (s, "only GRE and VXLANGPE support in this rev"); } @@ -703,6 +708,8 @@ nsh_add_del_map_command_fn (vlib_main_t * vm, next_node = NSH_NODE_NEXT_ENCAP_VXLAN4; else if (unformat (line_input, "encap-vxlan6-intf %d", &sw_if_index)) next_node = NSH_NODE_NEXT_ENCAP_VXLAN6; + else if (unformat (line_input, "encap-eth-intf %d", &sw_if_index)) + next_node = NSH_NODE_NEXT_ENCAP_ETHERNET; else if (unformat (line_input, "encap-none %d %d", &sw_if_index, &rx_sw_if_index)) next_node = NSH_NODE_NEXT_DECAP_ETH_INPUT; else @@ -781,7 +788,7 @@ VLIB_CLI_COMMAND (create_nsh_map_command, static) = { .short_help = "create nsh map nsp nsi [del] mapped-nsp mapped-nsi nsh_action [swap|push|pop] " "[encap-gre4-intf | encap-gre4-intf | encap-vxlan-gpe-intf | encap-lisp-gpe-intf " - " encap-vxlan4-intf | encap-vxlan6-intf | encap-none]\n", + " encap-vxlan4-intf | encap-vxlan6-intf | encap-eth-intf | encap-none]\n", .function = nsh_add_del_map_command_fn, }; @@ -2400,6 +2407,9 @@ clib_error_t *nsh_init (vlib_main_t *vm) vlib_node_add_next (vm, ip6_classify_node.index, nsh_classifier_node.index); vlib_node_add_next (vm, l2_input_classify_node.index, nsh_classifier_node.index); + /* Add Ethernet+NSH support */ + ethernet_register_input_type (vm, ETHERNET_TYPE_NSH, nsh_input_node.index); + vec_free(name); return error; diff --git a/nsh-plugin/nsh/nsh.h b/nsh-plugin/nsh/nsh.h index 4efad8d..3d48b94 100644 --- a/nsh-plugin/nsh/nsh.h +++ b/nsh-plugin/nsh/nsh.h @@ -148,6 +148,10 @@ typedef struct { u8 *(*trace[MAX_MD2_OPTIONS]) (u8 * s, nsh_tlv_header_t * opt); uword decap_v4_next_override; + /* Feature arc indices */ + u8 input_feature_arc_index; + u8 output_feature_arc_index; + /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; @@ -156,6 +160,8 @@ typedef struct { nsh_main_t nsh_main; extern vlib_node_registration_t nsh_aware_vnf_proxy_node; +extern vlib_node_registration_t nsh_output_node; + typedef struct { u8 trace_data[256]; } nsh_input_trace_t; @@ -197,8 +203,7 @@ typedef enum { _(ENCAP_VXLAN6, "vxlan6-encap" ) \ _(DECAP_ETH_INPUT, "ethernet-input" ) \ _(ENCAP_LISP_GPE, "interface-output" ) \ -/* /\* TODO once moved to Project:NSH_SFC *\/ */ - /* _(ENCAP_ETHERNET, "*** TX TO ETHERNET ***") \ */ + _(ENCAP_ETHERNET, "nsh-eth-output") \ /* _(DECAP_IP4_INPUT, "ip4-input") \ */ /* _(DECAP_IP6_INPUT, "ip6-input" ) \ */ diff --git a/nsh-plugin/nsh/nsh_output.c b/nsh-plugin/nsh/nsh_output.c new file mode 100644 index 0000000..1e1b772 --- /dev/null +++ b/nsh-plugin/nsh/nsh_output.c @@ -0,0 +1,495 @@ +/* + * nsh_output.c: NSH Adj rewrite + * + * Copyright (c) 2017-2019 Intel and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +typedef struct { + /* Adjacency taken. */ + u32 adj_index; + u32 flow_hash; + + /* Packet data, possibly *after* rewrite. */ + u8 packet_data[64 - 1*sizeof(u32)]; +} nsh_output_trace_t; + +#define foreach_nsh_output_next \ +_(DROP, "error-drop") \ +_(INTERFACE, "interface-output" ) + +typedef enum { +#define _(s,n) NSH_OUTPUT_NEXT_##s, + foreach_nsh_output_next +#undef _ + NSH_OUTPUT_N_NEXT, +} nsh_output_next_t; + +static u8 * +format_nsh_output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nsh_output_trace_t * t = va_arg (*args, nsh_output_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "adj-idx %d : %U flow hash: 0x%08x", + t->adj_index, + format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + t->adj_index, t->packet_data, sizeof (t->packet_data)); + return s; +} + +static inline uword +nsh_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int is_midchain) +{ + u32 n_left_from, next_index, * from, * to_next, thread_index; + vlib_node_runtime_t * error_node; + u32 n_left_to_next; + nsh_main_t *nm; + + thread_index = vlib_get_thread_index(); + error_node = vlib_node_get_runtime (vm, nsh_output_node.index); + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + nm = &nsh_main; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_adjacency_t * adj0; + nsh_base_header_t *hdr0; + ethernet_header_t * eth_hdr0; + vlib_buffer_t * p0; + u32 pi0, rw_len0, adj_index0, next0, error0; + + ip_adjacency_t * adj1; + nsh_base_header_t *hdr1; + ethernet_header_t * eth_hdr1; + vlib_buffer_t * p1; + u32 pi1, rw_len1, adj_index1, next1, error1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (hdr1[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + adj0 = adj_get(adj_index0); + adj1 = adj_get(adj_index1); + hdr0 = vlib_buffer_get_current (p0); + hdr1 = vlib_buffer_get_current (p1); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], hdr0, hdr1, + sizeof (ethernet_header_t)); + + eth_hdr0 = (ethernet_header_t*)((u8 *)hdr0-sizeof(ethernet_header_t)); + eth_hdr0->type = clib_host_to_net_u16(ETHERNET_TYPE_NSH); + eth_hdr1 = (ethernet_header_t*)((u8 *)hdr1-sizeof(ethernet_header_t)); + eth_hdr1->type = clib_host_to_net_u16(ETHERNET_TYPE_NSH); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + rw_len1 = adj1[0].rewrite_header.data_bytes; + + /* Bump the adj counters for packet and bytes */ + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index0, + 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index1, + 1, + vlib_buffer_length_in_chain (vm, p1) + rw_len1); + + /* Check MTU of outgoing interface. */ + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= + adj0[0].rewrite_header.max_l3_packet_bytes)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = NSH_OUTPUT_NEXT_INTERFACE; + error0 = IP4_ERROR_NONE; + + if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (nm->output_feature_arc_index, + adj0[0].rewrite_header.sw_if_index, + &next0, p0); + } + else + { + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = NSH_OUTPUT_NEXT_DROP; + } + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <= + adj1[0].rewrite_header.max_l3_packet_bytes)) + { + p1->current_data -= rw_len1; + p1->current_length += rw_len1; + + vnet_buffer (p1)->sw_if_index[VLIB_TX] = + adj1[0].rewrite_header.sw_if_index; + next1 = NSH_OUTPUT_NEXT_INTERFACE; + error1 = IP4_ERROR_NONE; + + if (PREDICT_FALSE(adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (nm->output_feature_arc_index, + adj1[0].rewrite_header.sw_if_index, + &next1, p1); + } + else + { + error1 = IP4_ERROR_MTU_EXCEEDED; + next1 = NSH_OUTPUT_NEXT_DROP; + } + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + adj1->sub_type.midchain.fixup_func(vm, adj1, p1); + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_output_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; + } + if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_output_trace_t *tr = vlib_add_trace (vm, node, + p1, sizeof (*tr)); + tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p1)->ip.flow_hash; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + nsh_base_header_t *hdr0; + ethernet_header_t * eth_hdr0; + vlib_buffer_t * p0; + u32 pi0, rw_len0, adj_index0, next0, error0; + + pi0 = to_next[0] = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + adj0 = adj_get(adj_index0); + hdr0 = vlib_buffer_get_current (p0); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], hdr0, + sizeof (ethernet_header_t)); + + eth_hdr0 = (ethernet_header_t*)((u8 *)hdr0-sizeof(ethernet_header_t)); + eth_hdr0->type = clib_host_to_net_u16(ETHERNET_TYPE_NSH); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index0, + 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); + + /* Check MTU of outgoing interface. */ + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= + adj0[0].rewrite_header.max_l3_packet_bytes)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = NSH_OUTPUT_NEXT_INTERFACE; + error0 = IP4_ERROR_NONE; + + if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (nm->output_feature_arc_index, + adj0[0].rewrite_header.sw_if_index, + &next0, p0); + } + else + { + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = NSH_OUTPUT_NEXT_DROP; + } + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + } + + p0->error = error_node->errors[error0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_output_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + + +static inline uword +nsh_eth_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (nsh_output_inline(vm, node, from_frame, /* is_midchain */ 0)); +} + +VLIB_REGISTER_NODE (nsh_eth_output_node) = { + .function = nsh_eth_output, + .name = "nsh-eth-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_next_nodes = NSH_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [NSH_OUTPUT_NEXT_##s] = n, + foreach_nsh_output_next +#undef _ + }, + + .format_trace = format_nsh_output_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (nsh_eth_output_node, nsh_eth_output) + +static inline uword +nsh_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (nsh_output_inline(vm, node, from_frame, /* is_midchain */ 1)); +} + +VLIB_REGISTER_NODE (nsh_midchain_node) = { + .function = nsh_midchain, + .name = "nsh-midchain", + .vector_size = sizeof (u32), + .format_trace = format_nsh_output_trace, + .sibling_of = "nsh-eth-output", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (nsh_midchain_node, nsh_midchain) + +/* Built-in nsh tx feature path definition */ +VNET_FEATURE_INIT (nsh_interface_output, static) = { + .arc_name = "nsh-eth-output", + .node_name = "interface-output", + .runs_before = 0, /* not before any other features */ +}; + +/** + * @brief Next index values from the NSH incomplete adj node + */ +#define foreach_nsh_adj_incomplete_next \ +_(DROP, "error-drop") \ +_(IP4, "ip4-arp") \ +_(IP6, "ip6-discover-neighbor") + +typedef enum { +#define _(s,n) NSH_ADJ_INCOMPLETE_NEXT_##s, + foreach_nsh_adj_incomplete_next +#undef _ + NSH_ADJ_INCOMPLETE_N_NEXT, +} nsh_adj_incomplete_next_t; + +/** + * @brief A struct to hold tracing information for the NSH label imposition + * node. + */ +typedef struct nsh_adj_incomplete_trace_t_ +{ + u32 next; +} nsh_adj_incomplete_trace_t; + + +/** + * @brief Graph node for incomplete NSH adjacency. + * This node will push traffic to either the v4-arp or v6-nd node + * based on the next-hop proto of the adj. + * We pay a cost for this 'routing' node, but an incomplete adj is the + * exception case. + */ +static inline uword +nsh_adj_incomplete (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0, next0, adj_index0; + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + + pi0 = to_next[0] = from[0]; + p0 = vlib_get_buffer (vm, pi0); + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + adj0 = adj_get(adj_index0); + + if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto)) + { + next0 = NSH_ADJ_INCOMPLETE_NEXT_IP4; + } + else + { + next0 = NSH_ADJ_INCOMPLETE_NEXT_IP6; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + nsh_adj_incomplete_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->next = next0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static u8 * +format_nsh_adj_incomplete_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + nsh_adj_incomplete_trace_t * t; + uword indent; + + t = va_arg (*args, nsh_adj_incomplete_trace_t *); + indent = format_get_indent (s); + + s = format (s, "%Unext:%d", + format_white_space, indent, + t->next); + return (s); +} + +VLIB_REGISTER_NODE (nsh_adj_incomplete_node) = { + .function = nsh_adj_incomplete, + .name = "nsh-adj-incomplete", + .format_trace = format_nsh_adj_incomplete_trace, + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_next_nodes = NSH_ADJ_INCOMPLETE_N_NEXT, + .next_nodes = { +#define _(s,n) [NSH_ADJ_INCOMPLETE_NEXT_##s] = n, + foreach_nsh_adj_incomplete_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (nsh_adj_incomplete_node, + nsh_adj_incomplete) -- cgit 1.2.3-korg