From f06aea5c7267cffabc1cc2438510a8fe7784079c Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 29 Nov 2016 06:51:37 -0800 Subject: Locally generated packet go through lookup/load-balance - locally-generated is an attribute of the packet and checked where necessary Add a flag to the vnet_buffer to indicate a packet is locally originated. Then in the rewrite nodes we can check this flag and not perform the TTL decrement. The switch path cost is expected to be fractions of a clock - the flags will be hot in the cache. The cehcks are necessary to due the requirements that VPP must be able to emit an IP packet with TTL=255. Change-Id: Ieb9cf06e34df54fd5c950293de8b665016295c51 Signed-off-by: Neale Ranns --- vnet/vnet/adj/adj_nbr.c | 2 +- vnet/vnet/buffer.h | 4 +- vnet/vnet/fib/fib_types.c | 14 +- vnet/vnet/ip/icmp4.c | 7 +- vnet/vnet/ip/ip4_forward.c | 183 ++++++++----------------- vnet/vnet/ip/ip4_source_and_port_range_check.c | 4 +- vnet/vnet/ip/ip6_forward.c | 86 ++++++------ vnet/vnet/ip/ip6_neighbor.c | 12 +- vnet/vnet/ip/lookup.h | 2 +- vnet/vnet/ip/ping.c | 167 ++++++++++++---------- vnet/vnet/ip/ping.h | 1 + vnet/vnet/map/ip6_map.c | 2 +- 12 files changed, 218 insertions(+), 266 deletions(-) diff --git a/vnet/vnet/adj/adj_nbr.c b/vnet/vnet/adj/adj_nbr.c index 003e18e8d66..95d1254a8c4 100644 --- a/vnet/vnet/adj/adj_nbr.c +++ b/vnet/vnet/adj/adj_nbr.c @@ -979,7 +979,7 @@ const static dpo_vft_t adj_nbr_incompl_dpo_vft = { */ const static char* const nbr_ip4_nodes[] = { - "ip4-rewrite-transit", + "ip4-rewrite", NULL, }; const static char* const nbr_ip6_nodes[] = diff --git a/vnet/vnet/buffer.h b/vnet/vnet/buffer.h index b3c71c127cc..6da699369f8 100644 --- a/vnet/vnet/buffer.h +++ b/vnet/vnet/buffer.h @@ -67,8 +67,8 @@ #define LOG2_BUFFER_HANDOFF_NEXT_VALID LOG2_VLIB_BUFFER_FLAG_USER(6) #define BUFFER_HANDOFF_NEXT_VALID (1 << LOG2_BUFFER_HANDOFF_NEXT_VALID) -#define LOG2_VNET_BUFFER_RTE_MBUF_IS_VALID LOG2_VLIB_BUFFER_FLAG_USER(7) -#define VNET_BUFFER_RTE_MBUF_IS_VALID (1 << LOG2_VNET_BUFFER_RTE_MBUF_IS_VALID) +#define LOG2_VNET_BUFFER_LOCALLY_ORIGINATED LOG2_VLIB_BUFFER_FLAG_USER(7) +#define VNET_BUFFER_LOCALLY_ORIGINATED (1 << LOG2_VNET_BUFFER_LOCALLY_ORIGINATED) #define foreach_buffer_opaque_union_subtype \ _(ethernet) \ diff --git a/vnet/vnet/fib/fib_types.c b/vnet/vnet/fib/fib_types.c index d25a7731c64..b66e71940a5 100644 --- a/vnet/vnet/fib/fib_types.c +++ b/vnet/vnet/fib/fib_types.c @@ -194,9 +194,17 @@ fib_route_path_cmp (const fib_route_path_t *rpath1, if (0 != res) return (res); - res = vnet_sw_interface_compare(vnet_get_main(), - rpath1->frp_sw_if_index, - rpath2->frp_sw_if_index); + if (~0 != rpath1->frp_sw_if_index && + ~0 != rpath2->frp_sw_if_index) + { + res = vnet_sw_interface_compare(vnet_get_main(), + rpath1->frp_sw_if_index, + rpath2->frp_sw_if_index); + } + else + { + res = rpath1->frp_sw_if_index - rpath2->frp_sw_if_index; + } if (0 != res) return (res); diff --git a/vnet/vnet/ip/icmp4.c b/vnet/vnet/ip/icmp4.c index c160f88a600..b1834ac70da 100644 --- a/vnet/vnet/ip/icmp4.c +++ b/vnet/vnet/ip/icmp4.c @@ -328,6 +328,9 @@ ip4_icmp_echo_request (vlib_main_t * vm, ASSERT (ip0->checksum == ip4_header_checksum (ip0)); ASSERT (ip1->checksum == ip4_header_checksum (ip1)); + + p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; + p1->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; } while (n_left_from > 0 && n_left_to_next > 0) @@ -380,6 +383,8 @@ ip4_icmp_echo_request (vlib_main_t * vm, ip0->checksum = ip_csum_fold (sum0); ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + + p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; } vlib_put_next_frame (vm, node, next, n_left_to_next); @@ -402,7 +407,7 @@ VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = { .n_next_nodes = 1, .next_nodes = { - [0] = "ip4-rewrite-local", + [0] = "ip4-load-balance", }, }; diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index 2a6791e5055..fc7b34963fe 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -1025,7 +1025,7 @@ VNET_FEATURE_INIT (ip4_mc_drop, static) = { VNET_FEATURE_ARC_INIT (ip4_output, static) = { .arc_name = "ip4-output", - .start_nodes = VNET_FEATURES ("ip4-rewrite-transit", "ip4-midchain"), + .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"), .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index, }; @@ -1749,8 +1749,7 @@ ip4_local (vlib_main_t * vm, dpo0 = load_balance_get_bucket_i(lb0, 0); vnet_buffer (p0)->ip.adj_index[VLIB_TX] = - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = - dpo0->dpoi_index; + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0; error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && dpo0->dpoi_type == DPO_RECEIVE) ? @@ -2186,7 +2185,6 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) typedef enum { IP4_REWRITE_NEXT_DROP, - IP4_REWRITE_NEXT_ARP, IP4_REWRITE_NEXT_ICMP_ERROR, } ip4_rewrite_next_t; @@ -2194,14 +2192,12 @@ always_inline uword ip4_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, - int rewrite_for_locally_received_packets, int is_midchain) { ip_lookup_main_t * lm = &ip4_main.lookup_main; u32 * from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, * to_next, next_index; vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index); - vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX; n_left_from = frame->n_vectors; next_index = node->cached_next_index; @@ -2218,12 +2214,8 @@ ip4_rewrite_inline (vlib_main_t * vm, ip4_header_t * ip0, * ip1; u32 pi0, rw_len0, next0, error0, checksum0, adj_index0; u32 pi1, rw_len1, next1, error1, checksum1, adj_index1; - u32 next0_override, next1_override; u32 tx_sw_if_index0, tx_sw_if_index1; - if (rewrite_for_locally_received_packets) - next0_override = next1_override = 0; - /* Prefetch next iteration. */ { vlib_buffer_t * p2, * p3; @@ -2249,8 +2241,8 @@ ip4_rewrite_inline (vlib_main_t * vm, p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); - adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx]; - adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx]; + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; /* We should never rewrite a pkt using the MISS adjacency */ ASSERT(adj_index0 && adj_index1); @@ -2263,28 +2255,19 @@ ip4_rewrite_inline (vlib_main_t * vm, /* Decrement TTL & update checksum. Works either endian, so no need for byte swap. */ - if (! rewrite_for_locally_received_packets) + if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) { - i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl; + i32 ttl0 = ip0->ttl; /* Input node should have reject packets with ttl 0. */ ASSERT (ip0->ttl > 0); - ASSERT (ip1->ttl > 0); checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); - checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100); - checksum0 += checksum0 >= 0xffff; - checksum1 += checksum1 >= 0xffff; ip0->checksum = checksum0; - ip1->checksum = checksum1; - ttl0 -= 1; - ttl1 -= 1; - ip0->ttl = ttl0; - ip1->ttl = ttl1; /* * If the ttl drops below 1 when forwarding, generate @@ -2298,6 +2281,32 @@ ip4_rewrite_inline (vlib_main_t * vm, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); next0 = IP4_REWRITE_NEXT_ICMP_ERROR; } + + /* Verify checksum. */ + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + } + else + { + p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + } + if (PREDICT_TRUE(!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + { + i32 ttl1 = ip1->ttl; + + /* Input node should have reject packets with ttl 0. */ + ASSERT (ip1->ttl > 0); + + checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100); + checksum1 += checksum1 >= 0xffff; + + ip1->checksum = checksum1; + ttl1 -= 1; + ip1->ttl = ttl1; + + /* + * If the ttl drops below 1 when forwarding, generate + * an ICMP response. + */ if (PREDICT_FALSE(ttl1 <= 0)) { error1 = IP4_ERROR_TIME_EXPIRED; @@ -2311,21 +2320,15 @@ ip4_rewrite_inline (vlib_main_t * vm, ASSERT (ip0->checksum == ip4_header_checksum (ip0)); ASSERT (ip1->checksum == ip4_header_checksum (ip1)); } + else + { + p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + } /* Rewrite packet header and updates lengths. */ adj0 = ip_get_adjacency (lm, adj_index0); adj1 = ip_get_adjacency (lm, adj_index1); - if (rewrite_for_locally_received_packets) - { - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_ARP)) - next0_override = IP4_REWRITE_NEXT_ARP; - if (PREDICT_FALSE(adj1->lookup_next_index - == IP_LOOKUP_NEXT_ARP)) - next1_override = IP4_REWRITE_NEXT_ARP; - } - /* Worth pipelining. No guarantee that adj0,1 are hot... */ rw_len0 = adj0[0].rewrite_header.data_bytes; rw_len1 = adj1[0].rewrite_header.data_bytes; @@ -2343,15 +2346,9 @@ ip4_rewrite_inline (vlib_main_t * vm, next0 = (error0 == IP4_ERROR_NONE) ? adj0[0].rewrite_header.next_index : next0; - if (rewrite_for_locally_received_packets) - next0 = next0 && next0_override ? next0_override : next0; - next1 = (error1 == IP4_ERROR_NONE) ? adj1[0].rewrite_header.next_index : next1; - if (rewrite_for_locally_received_packets) - next1 = next1 && next1_override ? next1_override : next1; - /* * We've already accounted for an ethernet_header_t elsewhere */ @@ -2417,17 +2414,13 @@ ip4_rewrite_inline (vlib_main_t * vm, vlib_buffer_t * p0; ip4_header_t * ip0; u32 pi0, rw_len0, adj_index0, next0, error0, checksum0; - u32 next0_override; u32 tx_sw_if_index0; - if (rewrite_for_locally_received_packets) - next0_override = 0; - pi0 = to_next[0] = from[0]; p0 = vlib_get_buffer (vm, pi0); - adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx]; + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; /* We should never rewrite a pkt using the MISS adjacency */ ASSERT(adj_index0); @@ -2440,7 +2433,7 @@ ip4_rewrite_inline (vlib_main_t * vm, next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */ /* Decrement TTL & update checksum. */ - if (! rewrite_for_locally_received_packets) + if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) { i32 ttl0 = ip0->ttl; @@ -2471,16 +2464,9 @@ ip4_rewrite_inline (vlib_main_t * vm, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); } } - - if (rewrite_for_locally_received_packets) + else { - /* - * We have to override the next_index in ARP adjacencies, - * because they're set up for ip4-arp, not this node... - */ - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_ARP)) - next0_override = IP4_REWRITE_NEXT_ARP; + p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; } /* Guess we are only writing on simple Ethernet header. */ @@ -2527,9 +2513,6 @@ ip4_rewrite_inline (vlib_main_t * vm, } - if (rewrite_for_locally_received_packets) - next0 = next0 && next0_override ? next0_override : next0; - from += 1; n_left_from -= 1; to_next += 1; @@ -2545,14 +2528,14 @@ ip4_rewrite_inline (vlib_main_t * vm, /* Need to do trace after rewrites to pick up new packet data. */ if (node->flags & VLIB_NODE_FLAG_TRACE) - ip4_forward_next_trace (vm, node, frame, adj_rx_tx); + ip4_forward_next_trace (vm, node, frame, VLIB_TX); return frame->n_vectors; } -/** @brief IPv4 transit rewrite node. - @node ip4-rewrite-transit +/** @brief IPv4 rewrite node. + @node ip4-rewrite This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4 header checksum, fetch the ip adjacency, check the outbound mtu, @@ -2583,54 +2566,11 @@ ip4_rewrite_inline (vlib_main_t * vm, or @c error-drop */ static uword -ip4_rewrite_transit (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +ip4_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return ip4_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 0, 0); -} - -/** @brief IPv4 local rewrite node. - @node ip4-rewrite-local - - This is the IPv4 local rewrite node. Fetch the ip adjacency, check - the outbound interface mtu, apply the adjacency rewrite, and send - pkts to the adjacency rewrite header's rewrite_next_index. Deal - with hemorrhoids of the form "some clown sends an icmp4 w/ src = - dst = interface addr." - - @param vm vlib_main_t corresponding to the current thread - @param node vlib_node_runtime_t - @param frame vlib_frame_t whose contents should be dispatched - - @par Graph mechanics: buffer metadata, next index usage - - @em Uses: - - vnet_buffer(b)->ip.adj_index[VLIB_RX] - - the rewrite adjacency index - - adj->lookup_next_index - - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise - the packet will be dropped. - - adj->rewrite_header - - Rewrite string length, rewrite string, next_index - - @em Sets: - - b->current_data, b->current_length - - Updated net of applying the rewrite string - - Next Indices: - - adj->rewrite_header.next_index - or @c error-drop -*/ - -static uword -ip4_rewrite_local (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return ip4_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 1, 0); + return ip4_rewrite_inline (vm, node, frame, 0); } static uword @@ -2638,26 +2578,25 @@ ip4_midchain (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 0, 1); + return ip4_rewrite_inline (vm, node, frame, 1); } + VLIB_REGISTER_NODE (ip4_rewrite_node) = { - .function = ip4_rewrite_transit, - .name = "ip4-rewrite-transit", + .function = ip4_rewrite, + .name = "ip4-rewrite", .vector_size = sizeof (u32), .format_trace = format_ip4_rewrite_trace, - .n_next_nodes = 3, + .n_next_nodes = 2, .next_nodes = { [IP4_REWRITE_NEXT_DROP] = "error-drop", - [IP4_REWRITE_NEXT_ARP] = "ip4-arp", [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit) +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite) VLIB_REGISTER_NODE (ip4_midchain_node) = { .function = ip4_midchain, @@ -2666,25 +2605,11 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = { .format_trace = format_ip4_forward_next_trace, - .sibling_of = "ip4-rewrite-transit", + .sibling_of = "ip4-rewrite", }; VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain) -VLIB_REGISTER_NODE (ip4_rewrite_local_node) = { - .function = ip4_rewrite_local, - .name = "ip4-rewrite-local", - .vector_size = sizeof (u32), - - .sibling_of = "ip4-rewrite-transit", - - .format_trace = format_ip4_rewrite_trace, - - .n_next_nodes = 0, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local) - static clib_error_t * add_del_interface_table (vlib_main_t * vm, unformat_input_t * input, diff --git a/vnet/vnet/ip/ip4_source_and_port_range_check.c b/vnet/vnet/ip/ip4_source_and_port_range_check.c index 28dabeb3f41..ae836a113a5 100644 --- a/vnet/vnet/ip/ip4_source_and_port_range_check.c +++ b/vnet/vnet/ip/ip4_source_and_port_range_check.c @@ -784,8 +784,8 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm, * Example of graph node after range checking is enabled: * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx} * Name Next Previous - * ip4-source-and-port-range- error-drop [0] ip4-rewrite-local - * interface-output [1] ip4-rewrite-transit + * ip4-source-and-port-range- error-drop [0] ip4-rewrite + * interface-output [1] * @cliexend * * Example of how to display the features enabed on an interface: diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index 1f40c429310..a4ce65a6396 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -1831,14 +1831,12 @@ always_inline uword ip6_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, - int rewrite_for_locally_received_packets, int is_midchain) { ip_lookup_main_t * lm = &ip6_main.lookup_main; u32 * from = vlib_frame_vector_args (frame); u32 n_left_from, n_left_to_next, * to_next, next_index; vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index); - vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX; n_left_from = frame->n_vectors; next_index = node->cached_next_index; @@ -1885,8 +1883,8 @@ ip6_rewrite_inline (vlib_main_t * vm, p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); - adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx]; - adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx]; + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; /* We should never rewrite a pkt using the MISS adjacency */ ASSERT(adj_index0 && adj_index1); @@ -1897,19 +1895,16 @@ ip6_rewrite_inline (vlib_main_t * vm, error0 = error1 = IP6_ERROR_NONE; next0 = next1 = IP6_REWRITE_NEXT_DROP; - if (! rewrite_for_locally_received_packets) + if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) { - i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit; + i32 hop_limit0 = ip0->hop_limit; /* Input node should have reject packets with hop limit 0. */ ASSERT (ip0->hop_limit > 0); - ASSERT (ip1->hop_limit > 0); hop_limit0 -= 1; - hop_limit1 -= 1; ip0->hop_limit = hop_limit0; - ip1->hop_limit = hop_limit1; /* * If the hop count drops below 1 when forwarding, generate @@ -1923,6 +1918,26 @@ ip6_rewrite_inline (vlib_main_t * vm, icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded, ICMP6_time_exceeded_ttl_exceeded_in_transit, 0); } + } + else + { + p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + } + if (PREDICT_TRUE(!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + { + i32 hop_limit1 = ip1->hop_limit; + + /* Input node should have reject packets with hop limit 0. */ + ASSERT (ip1->hop_limit > 0); + + hop_limit1 -= 1; + + ip1->hop_limit = hop_limit1; + + /* + * If the hop count drops below 1 when forwarding, generate + * an ICMP response. + */ if (PREDICT_FALSE(hop_limit1 <= 0)) { error1 = IP6_ERROR_TIME_EXPIRED; @@ -1931,8 +1946,11 @@ ip6_rewrite_inline (vlib_main_t * vm, icmp6_error_set_vnet_buffer(p1, ICMP6_time_exceeded, ICMP6_time_exceeded_ttl_exceeded_in_transit, 0); } - } - + } + else + { + p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + } adj0 = ip_get_adjacency (lm, adj_index0); adj1 = ip_get_adjacency (lm, adj_index1); @@ -2018,7 +2036,7 @@ ip6_rewrite_inline (vlib_main_t * vm, p0 = vlib_get_buffer (vm, pi0); - adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx]; + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; /* We should never rewrite a pkt using the MISS adjacency */ ASSERT(adj_index0); @@ -2031,7 +2049,7 @@ ip6_rewrite_inline (vlib_main_t * vm, next0 = IP6_REWRITE_NEXT_DROP; /* Check hop limit */ - if (! rewrite_for_locally_received_packets) + if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) { i32 hop_limit0 = ip0->hop_limit; @@ -2054,6 +2072,10 @@ ip6_rewrite_inline (vlib_main_t * vm, ICMP6_time_exceeded_ttl_exceeded_in_transit, 0); } } + else + { + p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + } /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); @@ -2111,28 +2133,17 @@ ip6_rewrite_inline (vlib_main_t * vm, /* Need to do trace after rewrites to pick up new packet data. */ if (node->flags & VLIB_NODE_FLAG_TRACE) - ip6_forward_next_trace (vm, node, frame, adj_rx_tx); + ip6_forward_next_trace (vm, node, frame, VLIB_TX); return frame->n_vectors; } static uword -ip6_rewrite_transit (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +ip6_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { return ip6_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 0, - /* midchain */ 0); -} - -static uword -ip6_rewrite_local (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return ip6_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 1, /* midchain */ 0); } @@ -2142,7 +2153,6 @@ ip6_midchain (vlib_main_t * vm, vlib_frame_t * frame) { return ip6_rewrite_inline (vm, node, frame, - /* rewrite_for_locally_received_packets */ 0, /* midchain */ 1); } @@ -2159,7 +2169,7 @@ VLIB_REGISTER_NODE (ip6_midchain_node) = { VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain) VLIB_REGISTER_NODE (ip6_rewrite_node) = { - .function = ip6_rewrite_transit, + .function = ip6_rewrite, .name = "ip6-rewrite", .vector_size = sizeof (u32), @@ -2172,21 +2182,7 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite_transit); - -VLIB_REGISTER_NODE (ip6_rewrite_local_node) = { - .function = ip6_rewrite_local, - .name = "ip6-rewrite-local", - .vector_size = sizeof (u32), - - .sibling_of = "ip6-rewrite", - - .format_trace = format_ip6_rewrite_trace, - - .n_next_nodes = 0, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_local_node, ip6_rewrite_local); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite); /* * Hop-by-Hop handling diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c index a407978b3fa..5380950ae6b 100644 --- a/vnet/vnet/ip/ip6_neighbor.c +++ b/vnet/vnet/ip/ip6_neighbor.c @@ -1469,9 +1469,10 @@ icmp6_router_solicitation(vlib_main_t * vm, : error0); next0 = is_dropped ? next0 : ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW; - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; } } + p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; radv_info->n_solicitations_dropped += is_dropped; radv_info->n_solicitations_rcvd += is_solicitation; @@ -2130,15 +2131,16 @@ ip6_neighbor_send_mldpv2_report(u32 sw_if_index) /* * OK to override w/ no regard for actual FIB, because - * ip6-rewrite-local only looks at the adjacency. + * ip6-rewrite only looks at the adjacency. */ vnet_buffer (b0)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index; - vnet_buffer (b0)->ip.adj_index[VLIB_RX] = + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = radv_info->all_mldv2_routers_adj_index; + b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; - vlib_node_t * node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-local"); + vlib_node_t * node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite"); f = vlib_get_frame_to_node (vm, node->index); to_next = vlib_frame_vector_args (f); @@ -2160,7 +2162,7 @@ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) = { .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT, .next_nodes = { [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop", - [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-local", + [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite", [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output", }, }; diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h index 7f9b9846cd9..a609e2fe7c0 100644 --- a/vnet/vnet/ip/lookup.h +++ b/vnet/vnet/ip/lookup.h @@ -111,7 +111,7 @@ typedef enum { [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \ [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \ [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \ - [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", \ + [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \ [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \ diff --git a/vnet/vnet/ip/ping.c b/vnet/vnet/ip/ping.c index 0bf83e9ea0f..08e770132f0 100644 --- a/vnet/vnet/ip/ping.c +++ b/vnet/vnet/ip/ping.c @@ -256,81 +256,65 @@ init_icmp46_echo_request (icmp46_echo_request_t * icmp46_echo, return data_len; } -/* - * Given adj index, return sw_if_index, possibly overwritten - * by a parameter. There is mostly debug outputs here, - * but it turned out handy to have these. - */ - -static u32 -adj_index_to_sw_if_index (vlib_main_t * vm, ip_lookup_main_t * lm, - char *lookup_next_nodes[], u32 adj_index0, - u32 sw_if_index, u8 verbose) -{ - ip_adjacency_t *adj0 = ip_get_adjacency (lm, adj_index0); - u32 sw_if_index0 = adj0->rewrite_header.sw_if_index; - if (verbose) - { - vlib_cli_output (vm, "Adjacency index: %u, sw_if_index: %u\n", - adj_index0, sw_if_index0); - vlib_cli_output (vm, "Adj: %s\n", - lookup_next_nodes[adj0->lookup_next_index]); - vlib_cli_output (vm, "Adj Interface: %d\n", adj0->if_address_index); - } - - if (~0 != sw_if_index) - { - sw_if_index0 = sw_if_index; - if (verbose) - { - vlib_cli_output (vm, "Forced set interface: %d\n", sw_if_index0); - } - } - return sw_if_index0; -} - static send_ip46_ping_result_t -send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6, +send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, + u32 table_id, ip6_address_t * pa6, u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len, u8 verbose) { icmp6_echo_request_header_t *h0; u32 bi0 = 0; - u32 sw_if_index0; - ip_lookup_main_t *lm = &im->lookup_main; int bogus_length = 0; - u32 adj_index0; vlib_buffer_t *p0; vlib_frame_t *f; u32 *to_next; - u32 fib_index0; if (vlib_buffer_alloc (vm, &bi0, 1) != 1) return SEND_PING_ALLOC_FAIL; p0 = vlib_get_buffer (vm, bi0); - /* Determine sw_if_index0 of source intf, may be force-set via sw_if_index. */ - vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0; - vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */ - fib_index0 = 0; - adj_index0 = fib_entry_get_adj(ip6_fib_table_lookup(fib_index0, pa6, 128)); + /* + * if the user did not provide a source interface, use the any interface + * that the destination resolves via. + */ + if (~0 == sw_if_index) + { + fib_node_index_t fib_entry_index; + u32 fib_index; + + fib_index = ip6_fib_index_from_table_id(table_id); - if (ADJ_INDEX_INVALID == adj_index0) + if (~0 == fib_index) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_TABLE; + } + + fib_entry_index = ip6_fib_table_lookup(fib_index, pa6, 128); + sw_if_index = fib_entry_get_resolving_interface(fib_entry_index); + /* + * Set the TX interface to force ip-lookup to use its table ID + */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index; + } + else { - vlib_buffer_free (vm, &bi0, 1); - return SEND_PING_NO_INTERFACE; + /* + * force an IP lookup in the table bound to the user's chosen + * source interface. + */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + ip6_fib_table_get_index_for_sw_if_index(sw_if_index); } - sw_if_index0 = - adj_index_to_sw_if_index (vm, lm, ip6_lookup_next_nodes, adj_index0, - sw_if_index, verbose); - if ((~0 == sw_if_index0) && (~0 == sw_if_index)) + if (~0 == sw_if_index) { vlib_buffer_free (vm, &bi0, 1); return SEND_PING_NO_INTERFACE; } - vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index0; + + vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index; h0 = vlib_buffer_get_current (p0); @@ -344,7 +328,7 @@ send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6, h0->ip6.src_address = *pa6; /* Fill in the correct source now */ - ip6_address_t *a = ip6_interface_first_address (im, sw_if_index0); + ip6_address_t *a = ip6_interface_first_address (im, sw_if_index); h0->ip6.src_address = a[0]; /* Fill in icmp fields */ @@ -381,19 +365,17 @@ send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6, static send_ip46_ping_result_t send_ip4_ping (vlib_main_t * vm, ip4_main_t * im, + u32 table_id, ip4_address_t * pa4, u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len, u8 verbose) { icmp4_echo_request_header_t *h0; u32 bi0 = 0; - u32 sw_if_index0; ip_lookup_main_t *lm = &im->lookup_main; - u32 adj_index0; vlib_buffer_t *p0; vlib_frame_t *f; u32 *to_next; - u32 fib_index0; u32 if_add_index0; if (vlib_buffer_alloc (vm, &bi0, 1) != 1) @@ -401,28 +383,47 @@ send_ip4_ping (vlib_main_t * vm, p0 = vlib_get_buffer (vm, bi0); - /* Determine sw_if_index0 of the source intf, may be force-set via sw_if_index. */ - vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0; - vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */ - fib_index0 = 0; - adj_index0 = fib_entry_get_adj(ip4_fib_table_lookup( - ip4_fib_get(fib_index0), pa4, 32)); + /* + * if the user did not provide a source interface, use the any interface + * that the destination resolves via. + */ + if (~0 == sw_if_index) + { + fib_node_index_t fib_entry_index; + u32 fib_index; + + fib_index = ip4_fib_index_from_table_id(table_id); - if (ADJ_INDEX_INVALID == adj_index0) + if (~0 == fib_index) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_TABLE; + } + + fib_entry_index = ip4_fib_table_lookup(ip4_fib_get(fib_index), pa4, 32); + sw_if_index = fib_entry_get_resolving_interface(fib_entry_index); + /* + * Set the TX interface to force ip-lookup to use the user's table ID + */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index; + } + else { - vlib_buffer_free (vm, &bi0, 1); - return SEND_PING_NO_INTERFACE; + /* + * force an IP lookup in the table bound to the user's chosen + * source interface. + */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + ip4_fib_table_get_index_for_sw_if_index(sw_if_index); } - sw_if_index0 = - adj_index_to_sw_if_index (vm, lm, ip4_lookup_next_nodes, adj_index0, - sw_if_index, verbose); - if ((~0 == sw_if_index0) && (~0 == sw_if_index)) + if (~0 == sw_if_index) { vlib_buffer_free (vm, &bi0, 1); return SEND_PING_NO_INTERFACE; } - vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index0; + + vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index; h0 = vlib_buffer_get_current (p0); @@ -439,7 +440,7 @@ send_ip4_ping (vlib_main_t * vm, h0->ip4.src_address = *pa4; /* Fill in the correct source now */ - if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index0]; + if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index]; if (PREDICT_TRUE (if_add_index0 != ~0)) { ip_interface_address_t *if_add = @@ -532,7 +533,7 @@ print_ip4_icmp_reply (vlib_main_t * vm, u32 bi0) */ static void -run_ping_ip46_address (vlib_main_t * vm, ip4_address_t * pa4, +run_ping_ip46_address (vlib_main_t * vm, u32 table_id, ip4_address_t * pa4, ip6_address_t * pa6, u32 sw_if_index, f64 ping_interval, u32 ping_repeat, u32 data_len, u32 verbose) @@ -571,14 +572,14 @@ run_ping_ip46_address (vlib_main_t * vm, ip4_address_t * pa4, pr = vec_elt_at_index (pm->ping_runs, ping_run_index); pr->curr_seq = i; if (pa6 && - (SEND_PING_OK == send_ip6_ping (vm, ping_main.ip6_main, pa6, + (SEND_PING_OK == send_ip6_ping (vm, ping_main.ip6_main, table_id, pa6, sw_if_index, i, icmp_id, data_len, verbose))) { n_requests++; } if (pa4 && - (SEND_PING_OK == send_ip4_ping (vm, ping_main.ip4_main, pa4, + (SEND_PING_OK == send_ip4_ping (vm, ping_main.ip4_main, table_id, pa4, sw_if_index, i, icmp_id, data_len, verbose))) { @@ -667,9 +668,12 @@ ping_ip_address (vlib_main_t * vm, u32 data_len = PING_DEFAULT_DATA_LEN; u32 verbose = 0; f64 ping_interval = PING_DEFAULT_INTERVAL; + u32 sw_if_index, table_id; + + table_id = 0; ping_ip4 = ping_ip6 = 0; - u32 sw_if_index; sw_if_index = ~0; + if (unformat (input, "%U", unformat_ip4_address, &a4)) { ping_ip4 = 1; @@ -757,6 +761,17 @@ ping_ip_address (vlib_main_t * vm, goto done; } } + else if (unformat (input, "table-id")) + { + if (!unformat (input, "du", &table_id)) + { + error = + clib_error_return (0, + "expecting table-id but got `%U'", + format_unformat_error, input); + goto done; + } + } else if (unformat (input, "interval")) { if (!unformat (input, "%f", &ping_interval)) @@ -791,7 +806,7 @@ ping_ip_address (vlib_main_t * vm, } } - run_ping_ip46_address (vm, ping_ip4 ? &a4 : NULL, ping_ip6 ? &a6 : NULL, + run_ping_ip46_address (vm, table_id, ping_ip4 ? &a4 : NULL, ping_ip6 ? &a6 : NULL, sw_if_index, ping_interval, ping_repeat, data_len, verbose); done: @@ -844,7 +859,7 @@ VLIB_CLI_COMMAND (ping_command, static) = { .path = "ping", .function = ping_ip_address, - .short_help = "ping { | ipv4 | ipv6 } [ipv4 | ipv6 ] [source ] [size ] [interval ] [repeat ] [verbose]", + .short_help = "ping { | ipv4 | ipv6 } [ipv4 | ipv6 ] [source ] [size ] [interval ] [repeat ] [table-id ] [verbose]", }; /* *INDENT-ON* */ diff --git a/vnet/vnet/ip/ping.h b/vnet/vnet/ip/ping.h index 579638cf4c3..58c6f4b4b6b 100644 --- a/vnet/vnet/ip/ping.h +++ b/vnet/vnet/ip/ping.h @@ -29,6 +29,7 @@ typedef enum { SEND_PING_OK = 0, SEND_PING_ALLOC_FAIL, SEND_PING_NO_INTERFACE, + SEND_PING_NO_TABLE, } send_ip46_ping_result_t; /* diff --git a/vnet/vnet/map/ip6_map.c b/vnet/vnet/map/ip6_map.c index 2e38b0db626..d2945059df7 100644 --- a/vnet/vnet/map/ip6_map.c +++ b/vnet/vnet/map/ip6_map.c @@ -1195,7 +1195,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = { .next_nodes = { [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup", #ifdef MAP_SKIP_IP6_LOOKUP - [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite-transit", + [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite", #endif [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass", [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass", -- cgit 1.2.3-korg