summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeale Ranns <nranns@cisco.com>2016-11-29 06:51:37 -0800
committerDamjan Marion <dmarion.lists@gmail.com>2016-12-05 09:20:26 +0000
commitf06aea5c7267cffabc1cc2438510a8fe7784079c (patch)
treeb0ded9cd5d10d96b5c416dfc93935c5baca1d475
parent072f8debf21c786ab785ed623229935e0a6cddb6 (diff)
Locally generated packet go through lookup/load-balance - locally-generated is an attribute of the packet and checked where necessary
Add a flag to the vnet_buffer to indicate a packet is locally originated. Then in the rewrite nodes we can check this flag and not perform the TTL decrement. The switch path cost is expected to be fractions of a clock - the flags will be hot in the cache. The cehcks are necessary to due the requirements that VPP must be able to emit an IP packet with TTL=255. Change-Id: Ieb9cf06e34df54fd5c950293de8b665016295c51 Signed-off-by: Neale Ranns <nranns@cisco.com>
-rw-r--r--vnet/vnet/adj/adj_nbr.c2
-rw-r--r--vnet/vnet/buffer.h4
-rw-r--r--vnet/vnet/fib/fib_types.c14
-rw-r--r--vnet/vnet/ip/icmp4.c7
-rw-r--r--vnet/vnet/ip/ip4_forward.c183
-rw-r--r--vnet/vnet/ip/ip4_source_and_port_range_check.c4
-rw-r--r--vnet/vnet/ip/ip6_forward.c86
-rw-r--r--vnet/vnet/ip/ip6_neighbor.c12
-rw-r--r--vnet/vnet/ip/lookup.h2
-rw-r--r--vnet/vnet/ip/ping.c167
-rw-r--r--vnet/vnet/ip/ping.h1
-rw-r--r--vnet/vnet/map/ip6_map.c2
12 files changed, 218 insertions, 266 deletions
diff --git a/vnet/vnet/adj/adj_nbr.c b/vnet/vnet/adj/adj_nbr.c
index 003e18e8d66..95d1254a8c4 100644
--- a/vnet/vnet/adj/adj_nbr.c
+++ b/vnet/vnet/adj/adj_nbr.c
@@ -979,7 +979,7 @@ const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
*/
const static char* const nbr_ip4_nodes[] =
{
- "ip4-rewrite-transit",
+ "ip4-rewrite",
NULL,
};
const static char* const nbr_ip6_nodes[] =
diff --git a/vnet/vnet/buffer.h b/vnet/vnet/buffer.h
index b3c71c127cc..6da699369f8 100644
--- a/vnet/vnet/buffer.h
+++ b/vnet/vnet/buffer.h
@@ -67,8 +67,8 @@
#define LOG2_BUFFER_HANDOFF_NEXT_VALID LOG2_VLIB_BUFFER_FLAG_USER(6)
#define BUFFER_HANDOFF_NEXT_VALID (1 << LOG2_BUFFER_HANDOFF_NEXT_VALID)
-#define LOG2_VNET_BUFFER_RTE_MBUF_IS_VALID LOG2_VLIB_BUFFER_FLAG_USER(7)
-#define VNET_BUFFER_RTE_MBUF_IS_VALID (1 << LOG2_VNET_BUFFER_RTE_MBUF_IS_VALID)
+#define LOG2_VNET_BUFFER_LOCALLY_ORIGINATED LOG2_VLIB_BUFFER_FLAG_USER(7)
+#define VNET_BUFFER_LOCALLY_ORIGINATED (1 << LOG2_VNET_BUFFER_LOCALLY_ORIGINATED)
#define foreach_buffer_opaque_union_subtype \
_(ethernet) \
diff --git a/vnet/vnet/fib/fib_types.c b/vnet/vnet/fib/fib_types.c
index d25a7731c64..b66e71940a5 100644
--- a/vnet/vnet/fib/fib_types.c
+++ b/vnet/vnet/fib/fib_types.c
@@ -194,9 +194,17 @@ fib_route_path_cmp (const fib_route_path_t *rpath1,
if (0 != res) return (res);
- res = vnet_sw_interface_compare(vnet_get_main(),
- rpath1->frp_sw_if_index,
- rpath2->frp_sw_if_index);
+ if (~0 != rpath1->frp_sw_if_index &&
+ ~0 != rpath2->frp_sw_if_index)
+ {
+ res = vnet_sw_interface_compare(vnet_get_main(),
+ rpath1->frp_sw_if_index,
+ rpath2->frp_sw_if_index);
+ }
+ else
+ {
+ res = rpath1->frp_sw_if_index - rpath2->frp_sw_if_index;
+ }
if (0 != res) return (res);
diff --git a/vnet/vnet/ip/icmp4.c b/vnet/vnet/ip/icmp4.c
index c160f88a600..b1834ac70da 100644
--- a/vnet/vnet/ip/icmp4.c
+++ b/vnet/vnet/ip/icmp4.c
@@ -328,6 +328,9 @@ ip4_icmp_echo_request (vlib_main_t * vm,
ASSERT (ip0->checksum == ip4_header_checksum (ip0));
ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+
+ p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+ p1->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
}
while (n_left_from > 0 && n_left_to_next > 0)
@@ -380,6 +383,8 @@ ip4_icmp_echo_request (vlib_main_t * vm,
ip0->checksum = ip_csum_fold (sum0);
ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+
+ p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
}
vlib_put_next_frame (vm, node, next, n_left_to_next);
@@ -402,7 +407,7 @@ VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
.n_next_nodes = 1,
.next_nodes = {
- [0] = "ip4-rewrite-local",
+ [0] = "ip4-load-balance",
},
};
diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c
index 2a6791e5055..fc7b34963fe 100644
--- a/vnet/vnet/ip/ip4_forward.c
+++ b/vnet/vnet/ip/ip4_forward.c
@@ -1025,7 +1025,7 @@ VNET_FEATURE_INIT (ip4_mc_drop, static) = {
VNET_FEATURE_ARC_INIT (ip4_output, static) =
{
.arc_name = "ip4-output",
- .start_nodes = VNET_FEATURES ("ip4-rewrite-transit", "ip4-midchain"),
+ .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
.arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
};
@@ -1749,8 +1749,7 @@ ip4_local (vlib_main_t * vm,
dpo0 = load_balance_get_bucket_i(lb0, 0);
vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
- vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
- dpo0->dpoi_index;
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
dpo0->dpoi_type == DPO_RECEIVE) ?
@@ -2186,7 +2185,6 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
typedef enum {
IP4_REWRITE_NEXT_DROP,
- IP4_REWRITE_NEXT_ARP,
IP4_REWRITE_NEXT_ICMP_ERROR,
} ip4_rewrite_next_t;
@@ -2194,14 +2192,12 @@ always_inline uword
ip4_rewrite_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame,
- int rewrite_for_locally_received_packets,
int is_midchain)
{
ip_lookup_main_t * lm = &ip4_main.lookup_main;
u32 * from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, * to_next, next_index;
vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
- vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
@@ -2218,12 +2214,8 @@ ip4_rewrite_inline (vlib_main_t * vm,
ip4_header_t * ip0, * ip1;
u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
- u32 next0_override, next1_override;
u32 tx_sw_if_index0, tx_sw_if_index1;
- if (rewrite_for_locally_received_packets)
- next0_override = next1_override = 0;
-
/* Prefetch next iteration. */
{
vlib_buffer_t * p2, * p3;
@@ -2249,8 +2241,8 @@ ip4_rewrite_inline (vlib_main_t * vm,
p0 = vlib_get_buffer (vm, pi0);
p1 = vlib_get_buffer (vm, pi1);
- adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
- adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
/* We should never rewrite a pkt using the MISS adjacency */
ASSERT(adj_index0 && adj_index1);
@@ -2263,28 +2255,19 @@ ip4_rewrite_inline (vlib_main_t * vm,
/* Decrement TTL & update checksum.
Works either endian, so no need for byte swap. */
- if (! rewrite_for_locally_received_packets)
+ if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
{
- i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
+ i32 ttl0 = ip0->ttl;
/* Input node should have reject packets with ttl 0. */
ASSERT (ip0->ttl > 0);
- ASSERT (ip1->ttl > 0);
checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
- checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
-
checksum0 += checksum0 >= 0xffff;
- checksum1 += checksum1 >= 0xffff;
ip0->checksum = checksum0;
- ip1->checksum = checksum1;
-
ttl0 -= 1;
- ttl1 -= 1;
-
ip0->ttl = ttl0;
- ip1->ttl = ttl1;
/*
* If the ttl drops below 1 when forwarding, generate
@@ -2298,6 +2281,32 @@ ip4_rewrite_inline (vlib_main_t * vm,
ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
}
+
+ /* Verify checksum. */
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+ if (PREDICT_TRUE(!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl1 = ip1->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip1->ttl > 0);
+
+ checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+ checksum1 += checksum1 >= 0xffff;
+
+ ip1->checksum = checksum1;
+ ttl1 -= 1;
+ ip1->ttl = ttl1;
+
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
if (PREDICT_FALSE(ttl1 <= 0))
{
error1 = IP4_ERROR_TIME_EXPIRED;
@@ -2311,21 +2320,15 @@ ip4_rewrite_inline (vlib_main_t * vm,
ASSERT (ip0->checksum == ip4_header_checksum (ip0));
ASSERT (ip1->checksum == ip4_header_checksum (ip1));
}
+ else
+ {
+ p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
/* Rewrite packet header and updates lengths. */
adj0 = ip_get_adjacency (lm, adj_index0);
adj1 = ip_get_adjacency (lm, adj_index1);
- if (rewrite_for_locally_received_packets)
- {
- if (PREDICT_FALSE(adj0->lookup_next_index
- == IP_LOOKUP_NEXT_ARP))
- next0_override = IP4_REWRITE_NEXT_ARP;
- if (PREDICT_FALSE(adj1->lookup_next_index
- == IP_LOOKUP_NEXT_ARP))
- next1_override = IP4_REWRITE_NEXT_ARP;
- }
-
/* Worth pipelining. No guarantee that adj0,1 are hot... */
rw_len0 = adj0[0].rewrite_header.data_bytes;
rw_len1 = adj1[0].rewrite_header.data_bytes;
@@ -2343,15 +2346,9 @@ ip4_rewrite_inline (vlib_main_t * vm,
next0 = (error0 == IP4_ERROR_NONE)
? adj0[0].rewrite_header.next_index : next0;
- if (rewrite_for_locally_received_packets)
- next0 = next0 && next0_override ? next0_override : next0;
-
next1 = (error1 == IP4_ERROR_NONE)
? adj1[0].rewrite_header.next_index : next1;
- if (rewrite_for_locally_received_packets)
- next1 = next1 && next1_override ? next1_override : next1;
-
/*
* We've already accounted for an ethernet_header_t elsewhere
*/
@@ -2417,17 +2414,13 @@ ip4_rewrite_inline (vlib_main_t * vm,
vlib_buffer_t * p0;
ip4_header_t * ip0;
u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
- u32 next0_override;
u32 tx_sw_if_index0;
- if (rewrite_for_locally_received_packets)
- next0_override = 0;
-
pi0 = to_next[0] = from[0];
p0 = vlib_get_buffer (vm, pi0);
- adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
/* We should never rewrite a pkt using the MISS adjacency */
ASSERT(adj_index0);
@@ -2440,7 +2433,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
/* Decrement TTL & update checksum. */
- if (! rewrite_for_locally_received_packets)
+ if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
{
i32 ttl0 = ip0->ttl;
@@ -2471,16 +2464,9 @@ ip4_rewrite_inline (vlib_main_t * vm,
ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
}
}
-
- if (rewrite_for_locally_received_packets)
+ else
{
- /*
- * We have to override the next_index in ARP adjacencies,
- * because they're set up for ip4-arp, not this node...
- */
- if (PREDICT_FALSE(adj0->lookup_next_index
- == IP_LOOKUP_NEXT_ARP))
- next0_override = IP4_REWRITE_NEXT_ARP;
+ p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
}
/* Guess we are only writing on simple Ethernet header. */
@@ -2527,9 +2513,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
}
- if (rewrite_for_locally_received_packets)
- next0 = next0 && next0_override ? next0_override : next0;
-
from += 1;
n_left_from -= 1;
to_next += 1;
@@ -2545,14 +2528,14 @@ ip4_rewrite_inline (vlib_main_t * vm,
/* Need to do trace after rewrites to pick up new packet data. */
if (node->flags & VLIB_NODE_FLAG_TRACE)
- ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
return frame->n_vectors;
}
-/** @brief IPv4 transit rewrite node.
- @node ip4-rewrite-transit
+/** @brief IPv4 rewrite node.
+ @node ip4-rewrite
This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
header checksum, fetch the ip adjacency, check the outbound mtu,
@@ -2583,54 +2566,11 @@ ip4_rewrite_inline (vlib_main_t * vm,
or @c error-drop
*/
static uword
-ip4_rewrite_transit (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+ip4_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
- return ip4_rewrite_inline (vm, node, frame,
- /* rewrite_for_locally_received_packets */ 0, 0);
-}
-
-/** @brief IPv4 local rewrite node.
- @node ip4-rewrite-local
-
- This is the IPv4 local rewrite node. Fetch the ip adjacency, check
- the outbound interface mtu, apply the adjacency rewrite, and send
- pkts to the adjacency rewrite header's rewrite_next_index. Deal
- with hemorrhoids of the form "some clown sends an icmp4 w/ src =
- dst = interface addr."
-
- @param vm vlib_main_t corresponding to the current thread
- @param node vlib_node_runtime_t
- @param frame vlib_frame_t whose contents should be dispatched
-
- @par Graph mechanics: buffer metadata, next index usage
-
- @em Uses:
- - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
- - the rewrite adjacency index
- - <code>adj->lookup_next_index</code>
- - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
- the packet will be dropped.
- - <code>adj->rewrite_header</code>
- - Rewrite string length, rewrite string, next_index
-
- @em Sets:
- - <code>b->current_data, b->current_length</code>
- - Updated net of applying the rewrite string
-
- <em>Next Indices:</em>
- - <code> adj->rewrite_header.next_index </code>
- or @c error-drop
-*/
-
-static uword
-ip4_rewrite_local (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip4_rewrite_inline (vm, node, frame,
- /* rewrite_for_locally_received_packets */ 1, 0);
+ return ip4_rewrite_inline (vm, node, frame, 0);
}
static uword
@@ -2638,26 +2578,25 @@ ip4_midchain (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_rewrite_inline (vm, node, frame,
- /* rewrite_for_locally_received_packets */ 0, 1);
+ return ip4_rewrite_inline (vm, node, frame, 1);
}
+
VLIB_REGISTER_NODE (ip4_rewrite_node) = {
- .function = ip4_rewrite_transit,
- .name = "ip4-rewrite-transit",
+ .function = ip4_rewrite,
+ .name = "ip4-rewrite",
.vector_size = sizeof (u32),
.format_trace = format_ip4_rewrite_trace,
- .n_next_nodes = 3,
+ .n_next_nodes = 2,
.next_nodes = {
[IP4_REWRITE_NEXT_DROP] = "error-drop",
- [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
[IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
},
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
VLIB_REGISTER_NODE (ip4_midchain_node) = {
.function = ip4_midchain,
@@ -2666,25 +2605,11 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = {
.format_trace = format_ip4_forward_next_trace,
- .sibling_of = "ip4-rewrite-transit",
+ .sibling_of = "ip4-rewrite",
};
VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
-VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
- .function = ip4_rewrite_local,
- .name = "ip4-rewrite-local",
- .vector_size = sizeof (u32),
-
- .sibling_of = "ip4-rewrite-transit",
-
- .format_trace = format_ip4_rewrite_trace,
-
- .n_next_nodes = 0,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
-
static clib_error_t *
add_del_interface_table (vlib_main_t * vm,
unformat_input_t * input,
diff --git a/vnet/vnet/ip/ip4_source_and_port_range_check.c b/vnet/vnet/ip/ip4_source_and_port_range_check.c
index 28dabeb3f41..ae836a113a5 100644
--- a/vnet/vnet/ip/ip4_source_and_port_range_check.c
+++ b/vnet/vnet/ip/ip4_source_and_port_range_check.c
@@ -784,8 +784,8 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
* Example of graph node after range checking is enabled:
* @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
* Name Next Previous
- * ip4-source-and-port-range- error-drop [0] ip4-rewrite-local
- * interface-output [1] ip4-rewrite-transit
+ * ip4-source-and-port-range- error-drop [0] ip4-rewrite
+ * interface-output [1]
* @cliexend
*
* Example of how to display the features enabed on an interface:
diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c
index 1f40c429310..a4ce65a6396 100644
--- a/vnet/vnet/ip/ip6_forward.c
+++ b/vnet/vnet/ip/ip6_forward.c
@@ -1831,14 +1831,12 @@ always_inline uword
ip6_rewrite_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame,
- int rewrite_for_locally_received_packets,
int is_midchain)
{
ip_lookup_main_t * lm = &ip6_main.lookup_main;
u32 * from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, * to_next, next_index;
vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
- vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
@@ -1885,8 +1883,8 @@ ip6_rewrite_inline (vlib_main_t * vm,
p0 = vlib_get_buffer (vm, pi0);
p1 = vlib_get_buffer (vm, pi1);
- adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
- adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
/* We should never rewrite a pkt using the MISS adjacency */
ASSERT(adj_index0 && adj_index1);
@@ -1897,19 +1895,16 @@ ip6_rewrite_inline (vlib_main_t * vm,
error0 = error1 = IP6_ERROR_NONE;
next0 = next1 = IP6_REWRITE_NEXT_DROP;
- if (! rewrite_for_locally_received_packets)
+ if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
{
- i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit;
+ i32 hop_limit0 = ip0->hop_limit;
/* Input node should have reject packets with hop limit 0. */
ASSERT (ip0->hop_limit > 0);
- ASSERT (ip1->hop_limit > 0);
hop_limit0 -= 1;
- hop_limit1 -= 1;
ip0->hop_limit = hop_limit0;
- ip1->hop_limit = hop_limit1;
/*
* If the hop count drops below 1 when forwarding, generate
@@ -1923,6 +1918,26 @@ ip6_rewrite_inline (vlib_main_t * vm,
icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded,
ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
}
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+ if (PREDICT_TRUE(!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit1 = ip1->hop_limit;
+
+ /* Input node should have reject packets with hop limit 0. */
+ ASSERT (ip1->hop_limit > 0);
+
+ hop_limit1 -= 1;
+
+ ip1->hop_limit = hop_limit1;
+
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
if (PREDICT_FALSE(hop_limit1 <= 0))
{
error1 = IP6_ERROR_TIME_EXPIRED;
@@ -1931,8 +1946,11 @@ ip6_rewrite_inline (vlib_main_t * vm,
icmp6_error_set_vnet_buffer(p1, ICMP6_time_exceeded,
ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
}
- }
-
+ }
+ else
+ {
+ p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
adj0 = ip_get_adjacency (lm, adj_index0);
adj1 = ip_get_adjacency (lm, adj_index1);
@@ -2018,7 +2036,7 @@ ip6_rewrite_inline (vlib_main_t * vm,
p0 = vlib_get_buffer (vm, pi0);
- adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
/* We should never rewrite a pkt using the MISS adjacency */
ASSERT(adj_index0);
@@ -2031,7 +2049,7 @@ ip6_rewrite_inline (vlib_main_t * vm,
next0 = IP6_REWRITE_NEXT_DROP;
/* Check hop limit */
- if (! rewrite_for_locally_received_packets)
+ if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
{
i32 hop_limit0 = ip0->hop_limit;
@@ -2054,6 +2072,10 @@ ip6_rewrite_inline (vlib_main_t * vm,
ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
}
}
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
@@ -2111,28 +2133,17 @@ ip6_rewrite_inline (vlib_main_t * vm,
/* Need to do trace after rewrites to pick up new packet data. */
if (node->flags & VLIB_NODE_FLAG_TRACE)
- ip6_forward_next_trace (vm, node, frame, adj_rx_tx);
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
return frame->n_vectors;
}
static uword
-ip6_rewrite_transit (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+ip6_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
return ip6_rewrite_inline (vm, node, frame,
- /* rewrite_for_locally_received_packets */ 0,
- /* midchain */ 0);
-}
-
-static uword
-ip6_rewrite_local (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip6_rewrite_inline (vm, node, frame,
- /* rewrite_for_locally_received_packets */ 1,
/* midchain */ 0);
}
@@ -2142,7 +2153,6 @@ ip6_midchain (vlib_main_t * vm,
vlib_frame_t * frame)
{
return ip6_rewrite_inline (vm, node, frame,
- /* rewrite_for_locally_received_packets */ 0,
/* midchain */ 1);
}
@@ -2159,7 +2169,7 @@ VLIB_REGISTER_NODE (ip6_midchain_node) = {
VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain)
VLIB_REGISTER_NODE (ip6_rewrite_node) = {
- .function = ip6_rewrite_transit,
+ .function = ip6_rewrite,
.name = "ip6-rewrite",
.vector_size = sizeof (u32),
@@ -2172,21 +2182,7 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) = {
},
};
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite_transit);
-
-VLIB_REGISTER_NODE (ip6_rewrite_local_node) = {
- .function = ip6_rewrite_local,
- .name = "ip6-rewrite-local",
- .vector_size = sizeof (u32),
-
- .sibling_of = "ip6-rewrite",
-
- .format_trace = format_ip6_rewrite_trace,
-
- .n_next_nodes = 0,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_local_node, ip6_rewrite_local);
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite);
/*
* Hop-by-Hop handling
diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c
index a407978b3fa..5380950ae6b 100644
--- a/vnet/vnet/ip/ip6_neighbor.c
+++ b/vnet/vnet/ip/ip6_neighbor.c
@@ -1469,9 +1469,10 @@ icmp6_router_solicitation(vlib_main_t * vm,
: error0);
next0 = is_dropped ?
next0 : ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW;
- vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
}
}
+ p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
radv_info->n_solicitations_dropped += is_dropped;
radv_info->n_solicitations_rcvd += is_solicitation;
@@ -2130,15 +2131,16 @@ ip6_neighbor_send_mldpv2_report(u32 sw_if_index)
/*
* OK to override w/ no regard for actual FIB, because
- * ip6-rewrite-local only looks at the adjacency.
+ * ip6-rewrite only looks at the adjacency.
*/
vnet_buffer (b0)->sw_if_index[VLIB_RX] =
vnet_main.local_interface_sw_if_index;
- vnet_buffer (b0)->ip.adj_index[VLIB_RX] =
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
radv_info->all_mldv2_routers_adj_index;
+ b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
- vlib_node_t * node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-local");
+ vlib_node_t * node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
f = vlib_get_frame_to_node (vm, node->index);
to_next = vlib_frame_vector_args (f);
@@ -2160,7 +2162,7 @@ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) = {
.n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT,
.next_nodes = {
[ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop",
- [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-local",
+ [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite",
[ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
},
};
diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h
index 7f9b9846cd9..a609e2fe7c0 100644
--- a/vnet/vnet/ip/lookup.h
+++ b/vnet/vnet/ip/lookup.h
@@ -111,7 +111,7 @@ typedef enum {
[IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \
[IP_LOOKUP_NEXT_ARP] = "ip4-arp", \
[IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \
- [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", \
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \
[IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \
[IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance", \
[IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \
diff --git a/vnet/vnet/ip/ping.c b/vnet/vnet/ip/ping.c
index 0bf83e9ea0f..08e770132f0 100644
--- a/vnet/vnet/ip/ping.c
+++ b/vnet/vnet/ip/ping.c
@@ -256,81 +256,65 @@ init_icmp46_echo_request (icmp46_echo_request_t * icmp46_echo,
return data_len;
}
-/*
- * Given adj index, return sw_if_index, possibly overwritten
- * by a parameter. There is mostly debug outputs here,
- * but it turned out handy to have these.
- */
-
-static u32
-adj_index_to_sw_if_index (vlib_main_t * vm, ip_lookup_main_t * lm,
- char *lookup_next_nodes[], u32 adj_index0,
- u32 sw_if_index, u8 verbose)
-{
- ip_adjacency_t *adj0 = ip_get_adjacency (lm, adj_index0);
- u32 sw_if_index0 = adj0->rewrite_header.sw_if_index;
- if (verbose)
- {
- vlib_cli_output (vm, "Adjacency index: %u, sw_if_index: %u\n",
- adj_index0, sw_if_index0);
- vlib_cli_output (vm, "Adj: %s\n",
- lookup_next_nodes[adj0->lookup_next_index]);
- vlib_cli_output (vm, "Adj Interface: %d\n", adj0->if_address_index);
- }
-
- if (~0 != sw_if_index)
- {
- sw_if_index0 = sw_if_index;
- if (verbose)
- {
- vlib_cli_output (vm, "Forced set interface: %d\n", sw_if_index0);
- }
- }
- return sw_if_index0;
-}
-
static send_ip46_ping_result_t
-send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6,
+send_ip6_ping (vlib_main_t * vm, ip6_main_t * im,
+ u32 table_id, ip6_address_t * pa6,
u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len,
u8 verbose)
{
icmp6_echo_request_header_t *h0;
u32 bi0 = 0;
- u32 sw_if_index0;
- ip_lookup_main_t *lm = &im->lookup_main;
int bogus_length = 0;
- u32 adj_index0;
vlib_buffer_t *p0;
vlib_frame_t *f;
u32 *to_next;
- u32 fib_index0;
if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
return SEND_PING_ALLOC_FAIL;
p0 = vlib_get_buffer (vm, bi0);
- /* Determine sw_if_index0 of source intf, may be force-set via sw_if_index. */
- vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */
- fib_index0 = 0;
- adj_index0 = fib_entry_get_adj(ip6_fib_table_lookup(fib_index0, pa6, 128));
+ /*
+ * if the user did not provide a source interface, use the any interface
+ * that the destination resolves via.
+ */
+ if (~0 == sw_if_index)
+ {
+ fib_node_index_t fib_entry_index;
+ u32 fib_index;
+
+ fib_index = ip6_fib_index_from_table_id(table_id);
- if (ADJ_INDEX_INVALID == adj_index0)
+ if (~0 == fib_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_TABLE;
+ }
+
+ fib_entry_index = ip6_fib_table_lookup(fib_index, pa6, 128);
+ sw_if_index = fib_entry_get_resolving_interface(fib_entry_index);
+ /*
+ * Set the TX interface to force ip-lookup to use its table ID
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+ }
+ else
{
- vlib_buffer_free (vm, &bi0, 1);
- return SEND_PING_NO_INTERFACE;
+ /*
+ * force an IP lookup in the table bound to the user's chosen
+ * source interface.
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ ip6_fib_table_get_index_for_sw_if_index(sw_if_index);
}
- sw_if_index0 =
- adj_index_to_sw_if_index (vm, lm, ip6_lookup_next_nodes, adj_index0,
- sw_if_index, verbose);
- if ((~0 == sw_if_index0) && (~0 == sw_if_index))
+ if (~0 == sw_if_index)
{
vlib_buffer_free (vm, &bi0, 1);
return SEND_PING_NO_INTERFACE;
}
- vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
h0 = vlib_buffer_get_current (p0);
@@ -344,7 +328,7 @@ send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6,
h0->ip6.src_address = *pa6;
/* Fill in the correct source now */
- ip6_address_t *a = ip6_interface_first_address (im, sw_if_index0);
+ ip6_address_t *a = ip6_interface_first_address (im, sw_if_index);
h0->ip6.src_address = a[0];
/* Fill in icmp fields */
@@ -381,19 +365,17 @@ send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6,
static send_ip46_ping_result_t
send_ip4_ping (vlib_main_t * vm,
ip4_main_t * im,
+ u32 table_id,
ip4_address_t * pa4,
u32 sw_if_index,
u16 seq_host, u16 id_host, u16 data_len, u8 verbose)
{
icmp4_echo_request_header_t *h0;
u32 bi0 = 0;
- u32 sw_if_index0;
ip_lookup_main_t *lm = &im->lookup_main;
- u32 adj_index0;
vlib_buffer_t *p0;
vlib_frame_t *f;
u32 *to_next;
- u32 fib_index0;
u32 if_add_index0;
if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
@@ -401,28 +383,47 @@ send_ip4_ping (vlib_main_t * vm,
p0 = vlib_get_buffer (vm, bi0);
- /* Determine sw_if_index0 of the source intf, may be force-set via sw_if_index. */
- vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */
- fib_index0 = 0;
- adj_index0 = fib_entry_get_adj(ip4_fib_table_lookup(
- ip4_fib_get(fib_index0), pa4, 32));
+ /*
+ * if the user did not provide a source interface, use the any interface
+ * that the destination resolves via.
+ */
+ if (~0 == sw_if_index)
+ {
+ fib_node_index_t fib_entry_index;
+ u32 fib_index;
+
+ fib_index = ip4_fib_index_from_table_id(table_id);
- if (ADJ_INDEX_INVALID == adj_index0)
+ if (~0 == fib_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_TABLE;
+ }
+
+ fib_entry_index = ip4_fib_table_lookup(ip4_fib_get(fib_index), pa4, 32);
+ sw_if_index = fib_entry_get_resolving_interface(fib_entry_index);
+ /*
+ * Set the TX interface to force ip-lookup to use the user's table ID
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+ }
+ else
{
- vlib_buffer_free (vm, &bi0, 1);
- return SEND_PING_NO_INTERFACE;
+ /*
+ * force an IP lookup in the table bound to the user's chosen
+ * source interface.
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ ip4_fib_table_get_index_for_sw_if_index(sw_if_index);
}
- sw_if_index0 =
- adj_index_to_sw_if_index (vm, lm, ip4_lookup_next_nodes, adj_index0,
- sw_if_index, verbose);
- if ((~0 == sw_if_index0) && (~0 == sw_if_index))
+ if (~0 == sw_if_index)
{
vlib_buffer_free (vm, &bi0, 1);
return SEND_PING_NO_INTERFACE;
}
- vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
h0 = vlib_buffer_get_current (p0);
@@ -439,7 +440,7 @@ send_ip4_ping (vlib_main_t * vm,
h0->ip4.src_address = *pa4;
/* Fill in the correct source now */
- if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+ if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
if (PREDICT_TRUE (if_add_index0 != ~0))
{
ip_interface_address_t *if_add =
@@ -532,7 +533,7 @@ print_ip4_icmp_reply (vlib_main_t * vm, u32 bi0)
*/
static void
-run_ping_ip46_address (vlib_main_t * vm, ip4_address_t * pa4,
+run_ping_ip46_address (vlib_main_t * vm, u32 table_id, ip4_address_t * pa4,
ip6_address_t * pa6, u32 sw_if_index,
f64 ping_interval, u32 ping_repeat, u32 data_len,
u32 verbose)
@@ -571,14 +572,14 @@ run_ping_ip46_address (vlib_main_t * vm, ip4_address_t * pa4,
pr = vec_elt_at_index (pm->ping_runs, ping_run_index);
pr->curr_seq = i;
if (pa6 &&
- (SEND_PING_OK == send_ip6_ping (vm, ping_main.ip6_main, pa6,
+ (SEND_PING_OK == send_ip6_ping (vm, ping_main.ip6_main, table_id, pa6,
sw_if_index, i, icmp_id, data_len,
verbose)))
{
n_requests++;
}
if (pa4 &&
- (SEND_PING_OK == send_ip4_ping (vm, ping_main.ip4_main, pa4,
+ (SEND_PING_OK == send_ip4_ping (vm, ping_main.ip4_main, table_id, pa4,
sw_if_index, i, icmp_id, data_len,
verbose)))
{
@@ -667,9 +668,12 @@ ping_ip_address (vlib_main_t * vm,
u32 data_len = PING_DEFAULT_DATA_LEN;
u32 verbose = 0;
f64 ping_interval = PING_DEFAULT_INTERVAL;
+ u32 sw_if_index, table_id;
+
+ table_id = 0;
ping_ip4 = ping_ip6 = 0;
- u32 sw_if_index;
sw_if_index = ~0;
+
if (unformat (input, "%U", unformat_ip4_address, &a4))
{
ping_ip4 = 1;
@@ -757,6 +761,17 @@ ping_ip_address (vlib_main_t * vm,
goto done;
}
}
+ else if (unformat (input, "table-id"))
+ {
+ if (!unformat (input, "du", &table_id))
+ {
+ error =
+ clib_error_return (0,
+ "expecting table-id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
else if (unformat (input, "interval"))
{
if (!unformat (input, "%f", &ping_interval))
@@ -791,7 +806,7 @@ ping_ip_address (vlib_main_t * vm,
}
}
- run_ping_ip46_address (vm, ping_ip4 ? &a4 : NULL, ping_ip6 ? &a6 : NULL,
+ run_ping_ip46_address (vm, table_id, ping_ip4 ? &a4 : NULL, ping_ip6 ? &a6 : NULL,
sw_if_index, ping_interval, ping_repeat, data_len,
verbose);
done:
@@ -844,7 +859,7 @@ VLIB_CLI_COMMAND (ping_command, static) =
{
.path = "ping",
.function = ping_ip_address,
- .short_help = "ping {<ip-addr> | ipv4 <ip4-addr> | ipv6 <ip6-addr>} [ipv4 <ip4-addr> | ipv6 <ip6-addr>] [source <interface>] [size <pktsize>] [interval <sec>] [repeat <cnt>] [verbose]",
+ .short_help = "ping {<ip-addr> | ipv4 <ip4-addr> | ipv6 <ip6-addr>} [ipv4 <ip4-addr> | ipv6 <ip6-addr>] [source <interface>] [size <pktsize>] [interval <sec>] [repeat <cnt>] [table-id <id>] [verbose]",
};
/* *INDENT-ON* */
diff --git a/vnet/vnet/ip/ping.h b/vnet/vnet/ip/ping.h
index 579638cf4c3..58c6f4b4b6b 100644
--- a/vnet/vnet/ip/ping.h
+++ b/vnet/vnet/ip/ping.h
@@ -29,6 +29,7 @@ typedef enum {
SEND_PING_OK = 0,
SEND_PING_ALLOC_FAIL,
SEND_PING_NO_INTERFACE,
+ SEND_PING_NO_TABLE,
} send_ip46_ping_result_t;
/*
diff --git a/vnet/vnet/map/ip6_map.c b/vnet/vnet/map/ip6_map.c
index 2e38b0db626..d2945059df7 100644
--- a/vnet/vnet/map/ip6_map.c
+++ b/vnet/vnet/map/ip6_map.c
@@ -1195,7 +1195,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = {
.next_nodes = {
[IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
#ifdef MAP_SKIP_IP6_LOOKUP
- [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite-transit",
+ [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite",
#endif
[IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
[IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",