aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/ip
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/ip')
-rw-r--r--src/vnet/ip/ip4_forward.c217
-rw-r--r--src/vnet/ip/ip6_forward.c2
-rw-r--r--src/vnet/ip/ip_frag.c30
-rw-r--r--src/vnet/ip/ip_frag.h2
-rw-r--r--src/vnet/ip/lookup.c19
5 files changed, 155 insertions, 115 deletions
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 40c396c4f3b..1550b313915 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -1203,7 +1203,7 @@ format_ip4_rewrite_trace (u8 * s, va_list * args)
s = format (s, "\n%U%U",
format_white_space, indent,
format_ip_adjacency_packet_data,
- t->dpo_index, t->packet_data, sizeof (t->packet_data));
+ t->packet_data, sizeof (t->packet_data));
return s;
}
@@ -2293,7 +2293,8 @@ typedef enum
always_inline void
ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
- u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
+ u16 adj_packet_bytes, bool df, u16 * next, u32 * error,
+ u8 is_midchain)
{
if (packet_len > adj_packet_bytes)
{
@@ -2310,12 +2311,39 @@ ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
{
/* IP fragmentation */
ip_frag_set_vnet_buffer (b, adj_packet_bytes,
- IP4_FRAG_NEXT_IP4_REWRITE, 0);
+ (is_midchain ?
+ IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN :
+ IP4_FRAG_NEXT_IP4_REWRITE), 0);
*next = IP4_REWRITE_NEXT_FRAGMENT;
}
}
}
+/* increment TTL & update checksum.
+ Works either endian, so no need for byte swap. */
+static_always_inline void
+ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
+{
+ i32 ttl;
+ u32 checksum;
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
+ {
+ b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ return;
+ }
+
+ ttl = ip->ttl;
+
+ checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
+ checksum += checksum >= 0xffff;
+
+ ip->checksum = checksum;
+ ttl += 1;
+ ip->ttl = ttl;
+
+ ASSERT (ip->checksum == ip4_header_checksum (ip));
+}
+
/* Decrement TTL & update checksum.
Works either endian, so no need for byte swap. */
static_always_inline void
@@ -2458,12 +2486,12 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0);
+ next + 0, &error0, is_midchain);
ip4_mtu_check (b[1], ip1_len,
adj1[0].rewrite_header.max_l3_packet_bytes,
ip1->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 1, &error1);
+ next + 1, &error1, is_midchain);
if (is_mcast)
{
@@ -2481,6 +2509,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
{
u32 next_index = adj0[0].rewrite_header.next_index;
vlib_buffer_advance (b[0], -(word) rw_len0);
+
tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
@@ -2489,10 +2518,14 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
vnet_feature_arc_start (lm->output_feature_arc_index,
tx_sw_if_index0, &next_index, b[0]);
next[0] = next_index;
+ if (is_midchain)
+ calc_checksums (vm, b[0]);
}
else
{
b[0]->error = error_node->errors[error0];
+ if (error0 == IP4_ERROR_MTU_EXCEEDED)
+ ip4_ttl_inc (b[0], ip0);
}
if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
{
@@ -2507,57 +2540,58 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
vnet_feature_arc_start (lm->output_feature_arc_index,
tx_sw_if_index1, &next_index, b[1]);
next[1] = next_index;
+ if (is_midchain)
+ calc_checksums (vm, b[1]);
}
else
{
b[1]->error = error_node->errors[error1];
+ if (error1 == IP4_ERROR_MTU_EXCEEDED)
+ ip4_ttl_inc (b[1], ip1);
}
- if (is_midchain)
- {
- calc_checksums (vm, b[0]);
- calc_checksums (vm, b[1]);
- }
+
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_two_headers (adj0[0], adj1[0],
ip0, ip1, sizeof (ethernet_header_t));
- /*
- * Bump the per-adjacency counters
- */
if (do_counters)
{
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index,
- adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
-
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index,
- adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
+ if (error0 == IP4_ERROR_NONE)
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
+
+ if (error1 == IP4_ERROR_NONE)
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index1, 1,
+ vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
}
if (is_midchain)
{
- if (adj0->sub_type.midchain.fixup_func)
+ if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
adj0->sub_type.midchain.fixup_func
(vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
- if (adj1->sub_type.midchain.fixup_func)
+ if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
adj1->sub_type.midchain.fixup_func
(vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
}
if (is_mcast)
{
- /*
- * copy bytes from the IP address into the MAC rewrite
- */
- vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
- adj0->rewrite_header.dst_mcast_offset,
- &ip0->dst_address.as_u32, (u8 *) ip0);
- vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
- adj1->rewrite_header.dst_mcast_offset,
- &ip1->dst_address.as_u32, (u8 *) ip1);
+ /* copy bytes from the IP address into the MAC rewrite */
+ if (error0 == IP4_ERROR_NONE)
+ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+ adj0->rewrite_header.dst_mcast_offset,
+ &ip0->dst_address.as_u32, (u8 *) ip0);
+ if (error1 == IP4_ERROR_NONE)
+ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+ adj1->rewrite_header.dst_mcast_offset,
+ &ip1->dst_address.as_u32, (u8 *) ip1);
}
next += 2;
@@ -2626,7 +2660,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0);
+ next + 0, &error0, is_midchain);
if (is_mcast)
{
@@ -2649,44 +2683,38 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
vnet_feature_arc_start (lm->output_feature_arc_index,
tx_sw_if_index0, &next_index, b[0]);
next[0] = next_index;
- }
- else
- {
- b[0]->error = error_node->errors[error0];
- }
- if (is_midchain)
- {
- calc_checksums (vm, b[0]);
- }
- /* Guess we are only writing on simple Ethernet header. */
- vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
- /*
- * Bump the per-adjacency counters
- */
- if (do_counters)
- {
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index,
- adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
- }
+ if (is_midchain)
+ calc_checksums (vm, b[0]);
- if (is_midchain)
- {
- if (adj0->sub_type.midchain.fixup_func)
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+ /*
+ * Bump the per-adjacency counters
+ */
+ if (do_counters)
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0, 1, vlib_buffer_length_in_chain (vm,
+ b[0]) + rw_len0);
+
+ if (is_midchain && adj0->sub_type.midchain.fixup_func)
adj0->sub_type.midchain.fixup_func
(vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
- }
- if (is_mcast)
+ if (is_mcast)
+ /* copy bytes from the IP address into the MAC rewrite */
+ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+ adj0->rewrite_header.dst_mcast_offset,
+ &ip0->dst_address.as_u32, (u8 *) ip0);
+ }
+ else
{
- /*
- * copy bytes from the IP address into the MAC rewrite
- */
- vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
- adj0->rewrite_header.dst_mcast_offset,
- &ip0->dst_address.as_u32, (u8 *) ip0);
+ b[0]->error = error_node->errors[error0];
+ if (error0 == IP4_ERROR_MTU_EXCEEDED)
+ ip4_ttl_inc (b[0], ip0);
}
next += 1;
@@ -2730,7 +2758,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
adj0[0].rewrite_header.max_l3_packet_bytes,
ip0->flags_and_fragment_offset &
clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
- next + 0, &error0);
+ next + 0, &error0, is_midchain);
if (is_mcast)
{
@@ -2753,39 +2781,36 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm,
vnet_feature_arc_start (lm->output_feature_arc_index,
tx_sw_if_index0, &next_index, b[0]);
next[0] = next_index;
- }
- else
- {
- b[0]->error = error_node->errors[error0];
- }
- if (is_midchain)
- {
- calc_checksums (vm, b[0]);
- }
- /* Guess we are only writing on simple Ethernet header. */
- vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
- if (do_counters)
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index, adj_index0, 1,
- vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
+ if (is_midchain)
+ /* this acts on the packet that is about to be encapped */
+ calc_checksums (vm, b[0]);
- if (is_midchain)
- {
- if (adj0->sub_type.midchain.fixup_func)
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+ if (do_counters)
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
+
+ if (is_midchain && adj0->sub_type.midchain.fixup_func)
adj0->sub_type.midchain.fixup_func
(vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
- }
- if (is_mcast)
+ if (is_mcast)
+ /* copy bytes from the IP address into the MAC rewrite */
+ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
+ adj0->rewrite_header.dst_mcast_offset,
+ &ip0->dst_address.as_u32, (u8 *) ip0);
+ }
+ else
{
- /*
- * copy bytes from the IP address into the MAC rewrite
- */
- vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
- adj0->rewrite_header.dst_mcast_offset,
- &ip0->dst_address.as_u32, (u8 *) ip0);
+ b[0]->error = error_node->errors[error0];
+ /* undo the TTL decrement - we'll be back to do it again */
+ if (error0 == IP4_ERROR_MTU_EXCEEDED)
+ ip4_ttl_inc (b[0], ip0);
}
next += 1;
@@ -2943,8 +2968,8 @@ VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
VLIB_REGISTER_NODE (ip4_midchain_node) = {
.name = "ip4-midchain",
.vector_size = sizeof (u32),
- .format_trace = format_ip4_forward_next_trace,
- .sibling_of = "ip4-rewrite",
+ .format_trace = format_ip4_rewrite_trace,
+ .sibling_of = "ip4-rewrite",
};
/* *INDENT-ON */
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 47fb57ae201..50de501fe0d 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -908,7 +908,7 @@ format_ip6_rewrite_trace (u8 * s, va_list * args)
s = format (s, "\n%U%U",
format_white_space, indent,
format_ip_adjacency_packet_data,
- t->adj_index, t->packet_data, sizeof (t->packet_data));
+ t->packet_data, sizeof (t->packet_data));
return s;
}
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index 230722c45db..54efb63c986 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -200,6 +200,17 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
clib_memcpy_fast (to_b->data, org_from_packet, sizeof (ip4_header_t));
to_ip4 = vlib_buffer_get_current (to_b);
to_data = (void *) (to_ip4 + 1);
+ vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data;
+ to_b->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
+
+ if (from_b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID)
+ {
+ vnet_buffer (to_b)->l4_hdr_offset =
+ (vnet_buffer (to_b)->l3_hdr_offset +
+ (vnet_buffer (from_b)->l4_hdr_offset -
+ vnet_buffer (from_b)->l3_hdr_offset));
+ to_b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
+ }
/* Spin through from buffers filling up the to buffer */
u16 left_in_to_buffer = len, to_ptr = 0;
@@ -232,6 +243,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
}
to_b->current_length = len + sizeof (ip4_header_t);
+ to_b->flags |= VNET_BUFFER_F_IS_IP4;
to_ip4->fragment_id = ip_frag_id;
to_ip4->flags_and_fragment_offset =
@@ -241,6 +253,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
to_ip4->length = clib_host_to_net_u16 (len + sizeof (ip4_header_t));
to_ip4->checksum = ip4_header_checksum (to_ip4);
+ /* we've just done the IP checksum .. */
+ to_b->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM;
+
if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
{
/* Encapsulating ipv4 header */
@@ -482,6 +497,19 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
to_data = (void *) (to_frag_hdr + 1);
+ vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data;
+ to_b->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
+
+ if (from_b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID)
+ {
+ vnet_buffer (to_b)->l4_hdr_offset =
+ (vnet_buffer (to_b)->l3_hdr_offset +
+ (vnet_buffer (from_b)->l4_hdr_offset -
+ vnet_buffer (from_b)->l3_hdr_offset));
+ to_b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
+ }
+ to_b->flags |= VNET_BUFFER_F_IS_IP6;
+
/* Spin through from buffers filling up the to buffer */
u16 left_in_to_buffer = len, to_ptr = 0;
while (1)
@@ -551,6 +579,7 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
.n_next_nodes = IP4_FRAG_N_NEXT,
.next_nodes = {
[IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite",
+ [IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN] = "ip4-midchain",
[IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
[IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
[IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
@@ -574,6 +603,7 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
.n_next_nodes = IP6_FRAG_N_NEXT,
.next_nodes = {
[IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite",
+ [IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN] = "ip6-midchain",
[IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
[IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
[IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output",
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
index b66db416129..ce4236b8465 100644
--- a/src/vnet/ip/ip_frag.h
+++ b/src/vnet/ip/ip_frag.h
@@ -50,6 +50,7 @@ extern vlib_node_registration_t ip6_frag_node;
typedef enum
{
IP4_FRAG_NEXT_IP4_REWRITE,
+ IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN,
IP4_FRAG_NEXT_IP4_LOOKUP,
IP4_FRAG_NEXT_IP6_LOOKUP,
IP4_FRAG_NEXT_MPLS_OUTPUT,
@@ -63,6 +64,7 @@ typedef enum
IP6_FRAG_NEXT_IP4_LOOKUP,
IP6_FRAG_NEXT_IP6_LOOKUP,
IP6_FRAG_NEXT_IP6_REWRITE,
+ IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN,
IP6_FRAG_NEXT_MPLS_OUTPUT,
IP6_FRAG_NEXT_DROP,
IP6_FRAG_N_NEXT
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index 4db7660eea9..c1fbc429b97 100644
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -258,27 +258,10 @@ format_ip_flow_hash_config (u8 * s, va_list * args)
u8 *
format_ip_adjacency_packet_data (u8 * s, va_list * args)
{
- u32 adj_index = va_arg (*args, u32);
u8 *packet_data = va_arg (*args, u8 *);
u32 n_packet_data_bytes = va_arg (*args, u32);
- ip_adjacency_t *adj;
- if (!adj_is_valid (adj_index))
- return format (s, "<invalid adjacency>");
-
- adj = adj_get (adj_index);
-
- switch (adj->lookup_next_index)
- {
- case IP_LOOKUP_NEXT_REWRITE:
- case IP_LOOKUP_NEXT_MCAST:
- s =
- format (s, "%U", format_hex_bytes, packet_data, n_packet_data_bytes);
- break;
-
- default:
- break;
- }
+ s = format (s, "%U", format_hex_bytes, packet_data, n_packet_data_bytes);
return s;
}