diff options
Diffstat (limited to 'src/vnet/ip')
-rw-r--r-- | src/vnet/ip/ip4_forward.c | 217 | ||||
-rw-r--r-- | src/vnet/ip/ip6_forward.c | 2 | ||||
-rw-r--r-- | src/vnet/ip/ip_frag.c | 30 | ||||
-rw-r--r-- | src/vnet/ip/ip_frag.h | 2 | ||||
-rw-r--r-- | src/vnet/ip/lookup.c | 19 |
5 files changed, 155 insertions, 115 deletions
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 40c396c4f3b..1550b313915 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1203,7 +1203,7 @@ format_ip4_rewrite_trace (u8 * s, va_list * args) s = format (s, "\n%U%U", format_white_space, indent, format_ip_adjacency_packet_data, - t->dpo_index, t->packet_data, sizeof (t->packet_data)); + t->packet_data, sizeof (t->packet_data)); return s; } @@ -2293,7 +2293,8 @@ typedef enum always_inline void ip4_mtu_check (vlib_buffer_t * b, u16 packet_len, - u16 adj_packet_bytes, bool df, u16 * next, u32 * error) + u16 adj_packet_bytes, bool df, u16 * next, u32 * error, + u8 is_midchain) { if (packet_len > adj_packet_bytes) { @@ -2310,12 +2311,39 @@ ip4_mtu_check (vlib_buffer_t * b, u16 packet_len, { /* IP fragmentation */ ip_frag_set_vnet_buffer (b, adj_packet_bytes, - IP4_FRAG_NEXT_IP4_REWRITE, 0); + (is_midchain ? + IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN : + IP4_FRAG_NEXT_IP4_REWRITE), 0); *next = IP4_REWRITE_NEXT_FRAGMENT; } } } +/* increment TTL & update checksum. + Works either endian, so no need for byte swap. */ +static_always_inline void +ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip) +{ + i32 ttl; + u32 checksum; + if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)) + { + b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; + return; + } + + ttl = ip->ttl; + + checksum = ip->checksum - clib_host_to_net_u16 (0x0100); + checksum += checksum >= 0xffff; + + ip->checksum = checksum; + ttl += 1; + ip->ttl = ttl; + + ASSERT (ip->checksum == ip4_header_checksum (ip)); +} + /* Decrement TTL & update checksum. Works either endian, so no need for byte swap. */ static_always_inline void @@ -2458,12 +2486,12 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), - next + 0, &error0); + next + 0, &error0, is_midchain); ip4_mtu_check (b[1], ip1_len, adj1[0].rewrite_header.max_l3_packet_bytes, ip1->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), - next + 1, &error1); + next + 1, &error1, is_midchain); if (is_mcast) { @@ -2481,6 +2509,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, { u32 next_index = adj0[0].rewrite_header.next_index; vlib_buffer_advance (b[0], -(word) rw_len0); + tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0; @@ -2489,10 +2518,14 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, vnet_feature_arc_start (lm->output_feature_arc_index, tx_sw_if_index0, &next_index, b[0]); next[0] = next_index; + if (is_midchain) + calc_checksums (vm, b[0]); } else { b[0]->error = error_node->errors[error0]; + if (error0 == IP4_ERROR_MTU_EXCEEDED) + ip4_ttl_inc (b[0], ip0); } if (PREDICT_TRUE (error1 == IP4_ERROR_NONE)) { @@ -2507,57 +2540,58 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, vnet_feature_arc_start (lm->output_feature_arc_index, tx_sw_if_index1, &next_index, b[1]); next[1] = next_index; + if (is_midchain) + calc_checksums (vm, b[1]); } else { b[1]->error = error_node->errors[error1]; + if (error1 == IP4_ERROR_MTU_EXCEEDED) + ip4_ttl_inc (b[1], ip1); } - if (is_midchain) - { - calc_checksums (vm, b[0]); - calc_checksums (vm, b[1]); - } + /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_two_headers (adj0[0], adj1[0], ip0, ip1, sizeof (ethernet_header_t)); - /* - * Bump the per-adjacency counters - */ if (do_counters) { - vlib_increment_combined_counter - (&adjacency_counters, - thread_index, - adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0); - - vlib_increment_combined_counter - (&adjacency_counters, - thread_index, - adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1); + if (error0 == IP4_ERROR_NONE) + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index0, 1, + vlib_buffer_length_in_chain (vm, b[0]) + rw_len0); + + if (error1 == IP4_ERROR_NONE) + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index1, 1, + vlib_buffer_length_in_chain (vm, b[1]) + rw_len1); } if (is_midchain) { - if (adj0->sub_type.midchain.fixup_func) + if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func) adj0->sub_type.midchain.fixup_func (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data); - if (adj1->sub_type.midchain.fixup_func) + if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func) adj1->sub_type.midchain.fixup_func (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data); } if (is_mcast) { - /* - * copy bytes from the IP address into the MAC rewrite - */ - vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, - adj0->rewrite_header.dst_mcast_offset, - &ip0->dst_address.as_u32, (u8 *) ip0); - vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, - adj1->rewrite_header.dst_mcast_offset, - &ip1->dst_address.as_u32, (u8 *) ip1); + /* copy bytes from the IP address into the MAC rewrite */ + if (error0 == IP4_ERROR_NONE) + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0->rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32, (u8 *) ip0); + if (error1 == IP4_ERROR_NONE) + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj1->rewrite_header.dst_mcast_offset, + &ip1->dst_address.as_u32, (u8 *) ip1); } next += 2; @@ -2626,7 +2660,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), - next + 0, &error0); + next + 0, &error0, is_midchain); if (is_mcast) { @@ -2649,44 +2683,38 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, vnet_feature_arc_start (lm->output_feature_arc_index, tx_sw_if_index0, &next_index, b[0]); next[0] = next_index; - } - else - { - b[0]->error = error_node->errors[error0]; - } - if (is_midchain) - { - calc_checksums (vm, b[0]); - } - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); - /* - * Bump the per-adjacency counters - */ - if (do_counters) - { - vlib_increment_combined_counter - (&adjacency_counters, - thread_index, - adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0); - } + if (is_midchain) + calc_checksums (vm, b[0]); - if (is_midchain) - { - if (adj0->sub_type.midchain.fixup_func) + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + + /* + * Bump the per-adjacency counters + */ + if (do_counters) + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index0, 1, vlib_buffer_length_in_chain (vm, + b[0]) + rw_len0); + + if (is_midchain && adj0->sub_type.midchain.fixup_func) adj0->sub_type.midchain.fixup_func (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data); - } - if (is_mcast) + if (is_mcast) + /* copy bytes from the IP address into the MAC rewrite */ + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0->rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32, (u8 *) ip0); + } + else { - /* - * copy bytes from the IP address into the MAC rewrite - */ - vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, - adj0->rewrite_header.dst_mcast_offset, - &ip0->dst_address.as_u32, (u8 *) ip0); + b[0]->error = error_node->errors[error0]; + if (error0 == IP4_ERROR_MTU_EXCEEDED) + ip4_ttl_inc (b[0], ip0); } next += 1; @@ -2730,7 +2758,7 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), - next + 0, &error0); + next + 0, &error0, is_midchain); if (is_mcast) { @@ -2753,39 +2781,36 @@ ip4_rewrite_inline_with_gso (vlib_main_t * vm, vnet_feature_arc_start (lm->output_feature_arc_index, tx_sw_if_index0, &next_index, b[0]); next[0] = next_index; - } - else - { - b[0]->error = error_node->errors[error0]; - } - if (is_midchain) - { - calc_checksums (vm, b[0]); - } - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); - if (do_counters) - vlib_increment_combined_counter - (&adjacency_counters, - thread_index, adj_index0, 1, - vlib_buffer_length_in_chain (vm, b[0]) + rw_len0); + if (is_midchain) + /* this acts on the packet that is about to be encapped */ + calc_checksums (vm, b[0]); - if (is_midchain) - { - if (adj0->sub_type.midchain.fixup_func) + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + + if (do_counters) + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, b[0]) + rw_len0); + + if (is_midchain && adj0->sub_type.midchain.fixup_func) adj0->sub_type.midchain.fixup_func (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data); - } - if (is_mcast) + if (is_mcast) + /* copy bytes from the IP address into the MAC rewrite */ + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0->rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32, (u8 *) ip0); + } + else { - /* - * copy bytes from the IP address into the MAC rewrite - */ - vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, - adj0->rewrite_header.dst_mcast_offset, - &ip0->dst_address.as_u32, (u8 *) ip0); + b[0]->error = error_node->errors[error0]; + /* undo the TTL decrement - we'll be back to do it again */ + if (error0 == IP4_ERROR_MTU_EXCEEDED) + ip4_ttl_inc (b[0], ip0); } next += 1; @@ -2943,8 +2968,8 @@ VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = { VLIB_REGISTER_NODE (ip4_midchain_node) = { .name = "ip4-midchain", .vector_size = sizeof (u32), - .format_trace = format_ip4_forward_next_trace, - .sibling_of = "ip4-rewrite", + .format_trace = format_ip4_rewrite_trace, + .sibling_of = "ip4-rewrite", }; /* *INDENT-ON */ diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 47fb57ae201..50de501fe0d 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -908,7 +908,7 @@ format_ip6_rewrite_trace (u8 * s, va_list * args) s = format (s, "\n%U%U", format_white_space, indent, format_ip_adjacency_packet_data, - t->adj_index, t->packet_data, sizeof (t->packet_data)); + t->packet_data, sizeof (t->packet_data)); return s; } diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c index 230722c45db..54efb63c986 100644 --- a/src/vnet/ip/ip_frag.c +++ b/src/vnet/ip/ip_frag.c @@ -200,6 +200,17 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, clib_memcpy_fast (to_b->data, org_from_packet, sizeof (ip4_header_t)); to_ip4 = vlib_buffer_get_current (to_b); to_data = (void *) (to_ip4 + 1); + vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data; + to_b->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID; + + if (from_b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) + { + vnet_buffer (to_b)->l4_hdr_offset = + (vnet_buffer (to_b)->l3_hdr_offset + + (vnet_buffer (from_b)->l4_hdr_offset - + vnet_buffer (from_b)->l3_hdr_offset)); + to_b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID; + } /* Spin through from buffers filling up the to buffer */ u16 left_in_to_buffer = len, to_ptr = 0; @@ -232,6 +243,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, } to_b->current_length = len + sizeof (ip4_header_t); + to_b->flags |= VNET_BUFFER_F_IS_IP4; to_ip4->fragment_id = ip_frag_id; to_ip4->flags_and_fragment_offset = @@ -241,6 +253,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, to_ip4->length = clib_host_to_net_u16 (len + sizeof (ip4_header_t)); to_ip4->checksum = ip4_header_checksum (to_ip4); + /* we've just done the IP checksum .. */ + to_b->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM; + if (vnet_buffer (org_from_b)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) { /* Encapsulating ipv4 header */ @@ -482,6 +497,19 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer, to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1); to_data = (void *) (to_frag_hdr + 1); + vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data; + to_b->flags |= VNET_BUFFER_F_L3_HDR_OFFSET_VALID; + + if (from_b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) + { + vnet_buffer (to_b)->l4_hdr_offset = + (vnet_buffer (to_b)->l3_hdr_offset + + (vnet_buffer (from_b)->l4_hdr_offset - + vnet_buffer (from_b)->l3_hdr_offset)); + to_b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID; + } + to_b->flags |= VNET_BUFFER_F_IS_IP6; + /* Spin through from buffers filling up the to buffer */ u16 left_in_to_buffer = len, to_ptr = 0; while (1) @@ -551,6 +579,7 @@ VLIB_REGISTER_NODE (ip4_frag_node) = { .n_next_nodes = IP4_FRAG_N_NEXT, .next_nodes = { [IP4_FRAG_NEXT_IP4_REWRITE] = "ip4-rewrite", + [IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN] = "ip4-midchain", [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", [IP4_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output", @@ -574,6 +603,7 @@ VLIB_REGISTER_NODE (ip6_frag_node) = { .n_next_nodes = IP6_FRAG_N_NEXT, .next_nodes = { [IP6_FRAG_NEXT_IP6_REWRITE] = "ip6-rewrite", + [IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN] = "ip6-midchain", [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", [IP6_FRAG_NEXT_MPLS_OUTPUT] = "mpls-output", diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h index b66db416129..ce4236b8465 100644 --- a/src/vnet/ip/ip_frag.h +++ b/src/vnet/ip/ip_frag.h @@ -50,6 +50,7 @@ extern vlib_node_registration_t ip6_frag_node; typedef enum { IP4_FRAG_NEXT_IP4_REWRITE, + IP4_FRAG_NEXT_IP4_REWRITE_MIDCHAIN, IP4_FRAG_NEXT_IP4_LOOKUP, IP4_FRAG_NEXT_IP6_LOOKUP, IP4_FRAG_NEXT_MPLS_OUTPUT, @@ -63,6 +64,7 @@ typedef enum IP6_FRAG_NEXT_IP4_LOOKUP, IP6_FRAG_NEXT_IP6_LOOKUP, IP6_FRAG_NEXT_IP6_REWRITE, + IP6_FRAG_NEXT_IP6_REWRITE_MIDCHAIN, IP6_FRAG_NEXT_MPLS_OUTPUT, IP6_FRAG_NEXT_DROP, IP6_FRAG_N_NEXT diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 4db7660eea9..c1fbc429b97 100644 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -258,27 +258,10 @@ format_ip_flow_hash_config (u8 * s, va_list * args) u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args) { - u32 adj_index = va_arg (*args, u32); u8 *packet_data = va_arg (*args, u8 *); u32 n_packet_data_bytes = va_arg (*args, u32); - ip_adjacency_t *adj; - if (!adj_is_valid (adj_index)) - return format (s, "<invalid adjacency>"); - - adj = adj_get (adj_index); - - switch (adj->lookup_next_index) - { - case IP_LOOKUP_NEXT_REWRITE: - case IP_LOOKUP_NEXT_MCAST: - s = - format (s, "%U", format_hex_bytes, packet_data, n_packet_data_bytes); - break; - - default: - break; - } + s = format (s, "%U", format_hex_bytes, packet_data, n_packet_data_bytes); return s; } |