aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/ip
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/ip')
-rw-r--r--src/vnet/ip/icmp4.c49
-rw-r--r--src/vnet/ip/icmp4.h23
-rw-r--r--src/vnet/ip/icmp46_packet.h4
-rw-r--r--src/vnet/ip/icmp6.c230
-rw-r--r--src/vnet/ip/icmp6.h42
-rw-r--r--src/vnet/ip/ip.api905
-rw-r--r--src/vnet/ip/ip.c19
-rw-r--r--src/vnet/ip/ip.h12
-rw-r--r--src/vnet/ip/ip4.h3
-rw-r--r--src/vnet/ip/ip46_address.h2
-rw-r--r--src/vnet/ip/ip46_cli.c10
-rw-r--r--src/vnet/ip/ip4_error.h108
-rw-r--r--src/vnet/ip/ip4_forward.c181
-rw-r--r--src/vnet/ip/ip4_inlines.h26
-rw-r--r--src/vnet/ip/ip4_input.c13
-rw-r--r--src/vnet/ip/ip4_input.h19
-rw-r--r--src/vnet/ip/ip4_mtrie.c92
-rw-r--r--src/vnet/ip/ip4_mtrie.h22
-rw-r--r--src/vnet/ip/ip4_options.c9
-rw-r--r--src/vnet/ip/ip4_packet.h50
-rw-r--r--src/vnet/ip/ip4_punt_drop.c23
-rw-r--r--src/vnet/ip/ip4_source_and_port_range_check.c30
-rw-r--r--src/vnet/ip/ip4_to_ip6.h2
-rw-r--r--src/vnet/ip/ip6.h2
-rw-r--r--src/vnet/ip/ip6_error.h106
-rw-r--r--src/vnet/ip/ip6_format.c4
-rw-r--r--src/vnet/ip/ip6_forward.c164
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.c18
-rw-r--r--src/vnet/ip/ip6_inlines.h103
-rw-r--r--src/vnet/ip/ip6_input.c12
-rw-r--r--src/vnet/ip/ip6_input.h2
-rw-r--r--src/vnet/ip/ip6_link.c25
-rw-r--r--src/vnet/ip/ip6_ll_table.c40
-rw-r--r--src/vnet/ip/ip6_ll_types.c6
-rw-r--r--src/vnet/ip/ip6_packet.h357
-rw-r--r--src/vnet/ip/ip6_punt_drop.c31
-rw-r--r--src/vnet/ip/ip6_to_ip4.h56
-rw-r--r--src/vnet/ip/ip_api.c160
-rw-r--r--src/vnet/ip/ip_checksum.c2
-rw-r--r--src/vnet/ip/ip_container_proxy.c6
-rw-r--r--src/vnet/ip/ip_flow_hash.h12
-rw-r--r--src/vnet/ip/ip_frag.c82
-rw-r--r--src/vnet/ip/ip_frag.h20
-rw-r--r--src/vnet/ip/ip_in_out_acl.c545
-rw-r--r--src/vnet/ip/ip_init.c2
-rw-r--r--src/vnet/ip/ip_interface.c18
-rw-r--r--src/vnet/ip/ip_interface.h5
-rwxr-xr-x[-rw-r--r--]src/vnet/ip/ip_packet.h108
-rw-r--r--src/vnet/ip/ip_path_mtu.c28
-rw-r--r--src/vnet/ip/ip_path_mtu.h3
-rw-r--r--src/vnet/ip/ip_path_mtu_node.c9
-rw-r--r--src/vnet/ip/ip_psh_cksum.h55
-rw-r--r--src/vnet/ip/ip_punt_drop.c5
-rw-r--r--src/vnet/ip/ip_sas.c7
-rw-r--r--src/vnet/ip/ip_test.c65
-rw-r--r--src/vnet/ip/ip_types.c32
-rw-r--r--src/vnet/ip/ip_types.h8
-rw-r--r--src/vnet/ip/lookup.c158
-rw-r--r--src/vnet/ip/lookup.h5
-rw-r--r--src/vnet/ip/punt.c77
-rw-r--r--src/vnet/ip/punt.h6
-rw-r--r--src/vnet/ip/punt_api.c2
-rw-r--r--src/vnet/ip/punt_node.c79
-rw-r--r--src/vnet/ip/reass/ip4_full_reass.c687
-rw-r--r--src/vnet/ip/reass/ip4_full_reass.h3
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.c486
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.h1
-rw-r--r--src/vnet/ip/reass/ip6_full_reass.c741
-rw-r--r--src/vnet/ip/reass/ip6_full_reass.h2
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.c309
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.h1
-rw-r--r--src/vnet/ip/reass/reassembly.rst221
-rw-r--r--src/vnet/ip/vtep.h6
73 files changed, 4081 insertions, 2675 deletions
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
index 5f9ffa3b2b7..fa4a0e12276 100644
--- a/src/vnet/ip/icmp4.c
+++ b/src/vnet/ip/icmp4.c
@@ -41,12 +41,10 @@
#include <vnet/ip/ip.h>
#include <vnet/pg/pg.h>
#include <vnet/ip/ip_sas.h>
+#include <vnet/util/throttle.h>
-static char *icmp_error_strings[] = {
-#define _(f,s) s,
- foreach_icmp4_error
-#undef _
-};
+/** ICMP throttling */
+static throttle_t icmp_throttle;
static u8 *
format_ip4_icmp_type_and_code (u8 * s, va_list * args)
@@ -206,7 +204,6 @@ ip4_icmp_input (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_input_node) = {
.function = ip4_icmp_input,
.name = "ip4-icmp-input",
@@ -215,15 +212,14 @@ VLIB_REGISTER_NODE (ip4_icmp_input_node) = {
.format_trace = format_icmp_input_trace,
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP4_N_ERROR,
+ .error_counters = icmp4_error_counters,
.n_next_nodes = 1,
.next_nodes = {
[ICMP_INPUT_NEXT_ERROR] = "ip4-punt",
},
};
-/* *INDENT-ON* */
typedef enum
{
@@ -255,11 +251,14 @@ ip4_icmp_error (vlib_main_t * vm,
u32 *from, *to_next;
uword n_left_from, n_left_to_next;
ip4_icmp_error_next_t next_index;
+ u32 thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
+ u64 seed = throttle_seed (&icmp_throttle, thread_index, vlib_time_now (vm));
+
if (node->flags & VLIB_NODE_FLAG_TRACE)
vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
/* stride */ 1,
@@ -289,6 +288,21 @@ ip4_icmp_error (vlib_main_t * vm,
ip_csum_t sum;
org_p0 = vlib_get_buffer (vm, org_pi0);
+ ip0 = vlib_buffer_get_current (org_p0);
+
+ /* Rate limit based on the src,dst addresses in the original packet
+ */
+ u64 r0 =
+ (u64) ip0->dst_address.as_u32 << 32 | ip0->src_address.as_u32;
+
+ if (throttle_check (&icmp_throttle, thread_index, r0, seed))
+ {
+ vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1);
+ from += 1;
+ n_left_from -= 1;
+ continue;
+ }
+
p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0);
if (!p0 || pi0 == ~0) /* Out of buffers */
continue;
@@ -300,14 +314,16 @@ ip4_icmp_error (vlib_main_t * vm,
n_left_from -= 1;
n_left_to_next -= 1;
- ip0 = vlib_buffer_get_current (p0);
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ vlib_buffer_copy_trace_flag (vm, org_p0, pi0);
+
/* Add IP header and ICMPv4 header including a 4 byte data field */
vlib_buffer_advance (p0,
-sizeof (ip4_header_t) -
sizeof (icmp46_header_t) - 4);
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
p0->current_length =
p0->current_length > 576 ? 576 : p0->current_length;
out_ip0 = vlib_buffer_get_current (p0);
@@ -325,7 +341,7 @@ ip4_icmp_error (vlib_main_t * vm,
/* Prefer a source address from "offending interface" */
if (!ip4_sas_by_sw_if_index (sw_if_index0, &out_ip0->dst_address,
&out_ip0->src_address))
- { /* interface has no IP6 address - should not happen */
+ { /* interface has no IP4 address - should not happen */
next0 = IP4_ICMP_ERROR_NEXT_DROP;
error0 = ICMP4_ERROR_DROP;
}
@@ -370,14 +386,13 @@ ip4_icmp_error (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
.function = ip4_icmp_error,
.name = "ip4-icmp-error",
.vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP4_N_ERROR,
+ .error_counters = icmp4_error_counters,
.n_next_nodes = IP4_ICMP_ERROR_N_NEXT,
.next_nodes = {
@@ -387,7 +402,6 @@ VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
.format_trace = format_icmp_input_trace,
};
-/* *INDENT-ON* */
static uword
@@ -570,6 +584,11 @@ icmp4_init (vlib_main_t * vm)
ICMP_INPUT_NEXT_ERROR,
sizeof (cm->ip4_input_next_index_by_type));
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ u32 n_vlib_mains = tm->n_vlib_mains;
+
+ throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-5);
+
return 0;
}
diff --git a/src/vnet/ip/icmp4.h b/src/vnet/ip/icmp4.h
index e2a95673fc7..22a4fc508e5 100644
--- a/src/vnet/ip/icmp4.h
+++ b/src/vnet/ip/icmp4.h
@@ -15,29 +15,6 @@
#ifndef included_vnet_icmp4_h
#define included_vnet_icmp4_h
-#define foreach_icmp4_error \
- _ (NONE, "valid packets") \
- _ (UNKNOWN_TYPE, "unknown type") \
- _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
- _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
- _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
- _ (OPTIONS_WITH_ODD_LENGTH, \
- "total option length not multiple of 8 bytes") \
- _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
- _ (ECHO_REPLIES_SENT, "echo replies sent") \
- _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
- _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
- _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
- _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \
- _ (DROP, "error message dropped")
-
-typedef enum
-{
-#define _(f,s) ICMP4_ERROR_##f,
- foreach_icmp4_error
-#undef _
-} icmp4_error_t;
-
typedef struct
{
u8 packet_data[64];
diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h
index 0545046fe60..08e73f6cd7d 100644
--- a/src/vnet/ip/icmp46_packet.h
+++ b/src/vnet/ip/icmp46_packet.h
@@ -187,7 +187,6 @@ typedef enum
#undef _
} icmp6_code_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u8 type;
@@ -195,7 +194,6 @@ typedef CLIB_PACKED (struct
/* IP checksum of icmp header plus data which follows. */
u16 checksum;
}) icmp46_header_t;
-/* *INDENT-ON* */
/* ip6 neighbor discovery */
#define foreach_icmp6_neighbor_discovery_option \
@@ -238,7 +236,6 @@ typedef enum icmp6_neighbor_discovery_option_type
#undef _
} icmp6_neighbor_discovery_option_type_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
/* Option type. */
@@ -357,6 +354,5 @@ typedef CLIB_PACKED (struct
icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
link_layer_option;
}) icmp6_neighbor_solicitation_header_t;
-/* *INDENT-ON* */
#endif /* included_vnet_icmp46_packet_h */
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
index b6ed3ea0ec9..b095f679cc8 100644
--- a/src/vnet/ip/icmp6.c
+++ b/src/vnet/ip/icmp6.c
@@ -41,6 +41,10 @@
#include <vnet/ip/ip.h>
#include <vnet/pg/pg.h>
#include <vnet/ip/ip_sas.h>
+#include <vnet/util/throttle.h>
+
+/** ICMP throttling */
+static throttle_t icmp_throttle;
static u8 *
format_ip6_icmp_type_and_code (u8 * s, va_list * args)
@@ -123,12 +127,6 @@ format_icmp6_input_trace (u8 * s, va_list * va)
return s;
}
-static char *icmp_error_strings[] = {
-#define _(f,s) s,
- foreach_icmp6_error
-#undef _
-};
-
typedef enum
{
ICMP_INPUT_NEXT_PUNT,
@@ -237,7 +235,6 @@ ip6_icmp_input (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
.function = ip6_icmp_input,
.name = "ip6-icmp-input",
@@ -246,195 +243,14 @@ VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
.format_trace = format_icmp6_input_trace,
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP6_N_ERROR,
+ .error_counters = icmp6_error_counters,
.n_next_nodes = 1,
.next_nodes = {
[ICMP_INPUT_NEXT_PUNT] = "ip6-punt",
},
};
-/* *INDENT-ON* */
-
-typedef enum
-{
- ICMP6_ECHO_REQUEST_NEXT_LOOKUP,
- ICMP6_ECHO_REQUEST_NEXT_OUTPUT,
- ICMP6_ECHO_REQUEST_N_NEXT,
-} icmp6_echo_request_next_t;
-
-static uword
-ip6_icmp_echo_request (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- u32 *from, *to_next;
- u32 n_left_from, n_left_to_next, next_index;
- ip6_main_t *im = &ip6_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- if (node->flags & VLIB_NODE_FLAG_TRACE)
- vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
- /* stride */ 1,
- sizeof (icmp6_input_trace_t));
-
- while (n_left_from > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 2 && n_left_to_next > 2)
- {
- vlib_buffer_t *p0, *p1;
- ip6_header_t *ip0, *ip1;
- icmp46_header_t *icmp0, *icmp1;
- ip6_address_t tmp0, tmp1;
- ip_csum_t sum0, sum1;
- u32 bi0, bi1;
- u32 fib_index0, fib_index1;
- u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
- u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
-
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- p0 = vlib_get_buffer (vm, bi0);
- p1 = vlib_get_buffer (vm, bi1);
- ip0 = vlib_buffer_get_current (p0);
- ip1 = vlib_buffer_get_current (p1);
- icmp0 = ip6_next_header (ip0);
- icmp1 = ip6_next_header (ip1);
-
- /* Check icmp type to echo reply and update icmp checksum. */
- sum0 = icmp0->checksum;
- sum1 = icmp1->checksum;
-
- ASSERT (icmp0->type == ICMP6_echo_request);
- ASSERT (icmp1->type == ICMP6_echo_request);
- sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
- icmp46_header_t, type);
- sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply,
- icmp46_header_t, type);
-
- icmp0->checksum = ip_csum_fold (sum0);
- icmp1->checksum = ip_csum_fold (sum1);
-
- icmp0->type = ICMP6_echo_reply;
- icmp1->type = ICMP6_echo_reply;
-
- /* Swap source and destination address. */
- tmp0 = ip0->src_address;
- tmp1 = ip1->src_address;
-
- ip0->src_address = ip0->dst_address;
- ip1->src_address = ip1->dst_address;
-
- ip0->dst_address = tmp0;
- ip1->dst_address = tmp1;
-
- /* New hop count. */
- ip0->hop_limit = im->host_config.ttl;
- ip1->hop_limit = im->host_config.ttl;
-
- /* Determine the correct lookup fib indices... */
- fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (p0)->sw_if_index[VLIB_RX]);
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
- /* Determine the correct lookup fib indices... */
- fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (p1)->sw_if_index[VLIB_RX]);
- vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1;
-
- /* verify speculative enqueues, maybe switch current next frame */
- /* if next0==next1==next_index then nothing special needs to be done */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *p0;
- ip6_header_t *ip0;
- icmp46_header_t *icmp0;
- u32 bi0;
- ip6_address_t tmp0;
- ip_csum_t sum0;
- u32 fib_index0;
- u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
-
- bi0 = to_next[0] = from[0];
-
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- p0 = vlib_get_buffer (vm, bi0);
- ip0 = vlib_buffer_get_current (p0);
- icmp0 = ip6_next_header (ip0);
-
- /* Check icmp type to echo reply and update icmp checksum. */
- sum0 = icmp0->checksum;
-
- ASSERT (icmp0->type == ICMP6_echo_request);
- sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
- icmp46_header_t, type);
-
- icmp0->checksum = ip_csum_fold (sum0);
-
- icmp0->type = ICMP6_echo_reply;
-
- /* Swap source and destination address. */
- tmp0 = ip0->src_address;
- ip0->src_address = ip0->dst_address;
- ip0->dst_address = tmp0;
-
- ip0->hop_limit = im->host_config.ttl;
-
- /* if the packet is link local, we'll bounce through the link-local
- * table with the RX interface correctly set */
- fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (p0)->sw_if_index[VLIB_RX]);
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
-
- /* Verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_error_count (vm, ip6_icmp_input_node.index,
- ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
-
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = {
- .function = ip6_icmp_echo_request,
- .name = "ip6-icmp-echo-request",
-
- .vector_size = sizeof (u32),
-
- .format_trace = format_icmp6_input_trace,
-
- .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT,
- .next_nodes = {
- [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup",
- [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output",
- },
-};
-/* *INDENT-ON* */
typedef enum
{
@@ -476,11 +292,14 @@ ip6_icmp_error (vlib_main_t * vm,
u32 *from, *to_next;
uword n_left_from, n_left_to_next;
ip6_icmp_error_next_t next_index;
+ u32 thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
+ u64 seed = throttle_seed (&icmp_throttle, thread_index, vlib_time_now (vm));
+
if (node->flags & VLIB_NODE_FLAG_TRACE)
vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
/* stride */ 1,
@@ -510,6 +329,21 @@ ip6_icmp_error (vlib_main_t * vm,
int bogus_length;
org_p0 = vlib_get_buffer (vm, org_pi0);
+ ip0 = vlib_buffer_get_current (org_p0);
+
+ /* Rate limit based on the src,dst addresses in the original packet
+ */
+ u64 r0 = (ip6_address_hash_to_u64 (&ip0->dst_address) ^
+ ip6_address_hash_to_u64 (&ip0->src_address));
+
+ if (throttle_check (&icmp_throttle, thread_index, r0, seed))
+ {
+ vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1);
+ from += 1;
+ n_left_from -= 1;
+ continue;
+ }
+
p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0);
if (!p0 || pi0 == ~0) /* Out of buffers */
continue;
@@ -521,15 +355,15 @@ ip6_icmp_error (vlib_main_t * vm,
n_left_from -= 1;
n_left_to_next -= 1;
- ip0 = vlib_buffer_get_current (p0);
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ vlib_buffer_copy_trace_flag (vm, org_p0, pi0);
+
/* Add IP header and ICMPv6 header including a 4 byte data field */
vlib_buffer_advance (p0,
-(sizeof (ip6_header_t) +
sizeof (icmp46_header_t) + 4));
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0;
p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
p0->current_length =
p0->current_length > 1280 ? 1280 : p0->current_length;
@@ -590,14 +424,13 @@ ip6_icmp_error (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
.function = ip6_icmp_error,
.name = "ip6-icmp-error",
.vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP6_N_ERROR,
+ .error_counters = icmp6_error_counters,
.n_next_nodes = IP6_ICMP_ERROR_N_NEXT,
.next_nodes = {
@@ -607,7 +440,6 @@ VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
.format_trace = format_icmp6_input_trace,
};
-/* *INDENT-ON* */
static uword
@@ -804,8 +636,10 @@ icmp6_init (vlib_main_t * vm)
cm->min_valid_length_by_type[ICMP6_redirect] =
sizeof (icmp6_redirect_header_t);
- icmp6_register_type (vm, ICMP6_echo_request,
- ip6_icmp_echo_request_node.index);
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ u32 n_vlib_mains = tm->n_vlib_mains;
+
+ throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3);
return (NULL);
}
diff --git a/src/vnet/ip/icmp6.h b/src/vnet/ip/icmp6.h
index 7a5eef5df18..119aaf0bae9 100644
--- a/src/vnet/ip/icmp6.h
+++ b/src/vnet/ip/icmp6.h
@@ -17,48 +17,6 @@
#include <vnet/ip/icmp46_packet.h>
-#define foreach_icmp6_error \
- _ (NONE, "valid packets") \
- _ (UNKNOWN_TYPE, "unknown type") \
- _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
- _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
- _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
- _ (OPTIONS_WITH_ODD_LENGTH, \
- "total option length not multiple of 8 bytes") \
- _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
- _ (ECHO_REPLIES_SENT, "echo replies sent") \
- _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \
- "neighbor solicitations from source not on link") \
- _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \
- "neighbor solicitations for unknown targets") \
- _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \
- _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \
- _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \
- "router solicitations from source not on link") \
- _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \
- "neighbor discovery unsupported interface") \
- _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \
- "neighbor discovery not configured") \
- _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \
- "router advertisement source not link local") \
- _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \
- _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \
- _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
- _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
- _ (PACKET_TOO_BIG_SENT, "packet too big response sent") \
- _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
- _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \
- _ (DROP, "error message dropped") \
- _ (ALLOC_FAILURE, "buffer allocation failure")
-
-
-typedef enum
-{
-#define _(f,s) ICMP6_ERROR_##f,
- foreach_icmp6_error
-#undef _
-} icmp6_error_t;
-
typedef struct
{
u8 packet_data[64];
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index 28786fa5a90..967f56cf917 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -20,7 +20,7 @@
called through a shared memory interface.
*/
-option version = "3.1.0";
+option version = "3.2.0";
import "vnet/interface_types.api";
import "vnet/fib/fib_types.api";
@@ -57,6 +57,35 @@ autoreply define ip_table_add_del
vl_api_ip_table_t table;
};
+/** \brief Allocate an unused table
+ A table can be added multiple times.
+ If a large number of tables are in use (millions), this API might
+ fail to find a free ID with very low probability, and will return
+ EAGAIN. A subsequent attempt may be successful.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table - if table.table_id == ~0, vpp allocates an unused table_id and
+ proceeds as in ip_table_add_del with is_add = true
+ if table.table_id != ~0, vpp uses the table.table_id and
+ proceeds as in ip_table_add_del with is_add = true
+ table.table_id should never be 0
+*/
+define ip_table_allocate
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_ip_table_t table;
+};
+
+define ip_table_allocate_reply
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ip_table_t table;
+};
+
/** \brief Dump IP all fib tables
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -337,6 +366,41 @@ autoreply define set_ip_flow_hash_v2
vl_api_ip_flow_hash_config_t flow_hash_config;
};
+/**
+ @brief flow hash settings for an IP table
+ @param src - include src in flow hash
+ @param dst - include dst in flow hash
+ @param sport - include sport in flow hash
+ @param dport - include dport in flow hash
+ @param proto - include proto in flow hash
+ @param reverse - include reverse in flow hash
+ @param symmetric - include symmetry in flow hash
+ @param flowlabel - include flowlabel in flow hash
+ @param gtpv1teid - include gtpv1teid in flow hash
+*/
+enumflag ip_flow_hash_config_v2
+{
+ IP_API_V2_FLOW_HASH_SRC_IP = 0x01,
+ IP_API_V2_FLOW_HASH_DST_IP = 0x02,
+ IP_API_V2_FLOW_HASH_SRC_PORT = 0x04,
+ IP_API_V2_FLOW_HASH_DST_PORT = 0x08,
+ IP_API_V2_FLOW_HASH_PROTO = 0x10,
+ IP_API_V2_FLOW_HASH_REVERSE = 0x20,
+ IP_API_V2_FLOW_HASH_SYMETRIC = 0x40,
+ IP_API_V2_FLOW_HASH_FLOW_LABEL = 0x80,
+ IP_API_V2_FLOW_HASH_GTPV1_TEID = 0x100,
+};
+
+autoreply define set_ip_flow_hash_v3
+{
+ u32 client_index;
+ u32 context;
+ u32 table_id;
+ vl_api_address_family_t af;
+ vl_api_ip_flow_hash_config_v2_t flow_hash_config;
+ option status="in_progress";
+};
+
/** \brief Set the ip flow hash router ID
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -558,6 +622,7 @@ typedef punt_redirect
autoreply define ip_punt_redirect
{
option deprecated;
+
u32 client_index;
u32 context;
vl_api_punt_redirect_t punt;
@@ -566,6 +631,8 @@ autoreply define ip_punt_redirect
define ip_punt_redirect_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_interface_index_t sw_if_index;
@@ -574,6 +641,8 @@ define ip_punt_redirect_dump
define ip_punt_redirect_details
{
+ option deprecated;
+
u32 context;
vl_api_punt_redirect_t punt;
};
@@ -807,6 +876,30 @@ autoreply define ip_reassembly_enable_disable
vl_api_ip_reass_type_t type;
};
+/** enable/disable full reassembly of packets aimed at our addresses */
+autoreply define ip_local_reass_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ bool enable_ip4;
+ bool enable_ip6;
+};
+
+/** get status of local reassembly */
+define ip_local_reass_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+define ip_local_reass_get_reply
+{
+ u32 context;
+ i32 retval;
+ bool ip4_is_enabled;
+ bool ip6_is_enabled;
+};
+
/**
@brief Set a Path MTU value. i.e. a MTU value for a given neighbour.
The neighbour can be described as attached (w/ interface and next-hop)
@@ -864,6 +957,816 @@ autoreply define ip_path_mtu_replace_end
u32 context;
};
+counters ip_frag {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packet fragmented";
+ };
+ small_packet {
+ severity error;
+ type counter64;
+ units "packets";
+ description "packet smaller than MTU";
+ };
+ fragment_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "number of sent fragments";
+ };
+ cant_fragment_header {
+ severity error;
+ type counter64;
+ units "packets";
+ description "can't fragment header";
+ };
+ dont_fragment_set {
+ severity error;
+ type counter64;
+ units "packets";
+ description "can't fragment this packet";
+ };
+ malformed {
+ severity error;
+ type counter64;
+ units "packets";
+ description "malformed packet";
+ };
+ memory {
+ severity error;
+ type counter64;
+ units "packets";
+ description "could not allocate buffer";
+ };
+ unknown {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown error";
+ };
+};
+
+counters ip4 {
+ /* Must be first. */
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid ip4 packets";
+ };
+
+ /* Errors signalled by ip4-input */
+ too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 length < 20 bytes";
+ };
+ bad_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 length > l2 length";
+ };
+ bad_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad ip4 checksum";
+ };
+ version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 version != 4";
+ };
+ options {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ip4 options present";
+ };
+ fragment_offset_one {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 fragment offset == 1";
+ };
+ time_expired {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 ttl <= 1";
+ };
+ hdr_too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 IHL < 5";
+ };
+
+ /* Errors signalled by ip4-rewrite. */
+ mtu_exceeded {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 MTU exceeded and DF set";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 destination lookup miss";
+ };
+ src_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 source lookup miss";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 drop";
+ };
+ punt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 punt";
+ };
+ same_interface {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 egress interface same as ingress";
+ };
+
+ /* errors signalled by ip4-local. */
+ unknown_protocol {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown ip protocol";
+ };
+ tcp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad tcp checksum";
+ };
+ udp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad udp checksum";
+ };
+ udp_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "inconsistent udp/ip lengths";
+ };
+
+ /* spoofed packets in ip4-rewrite-local */
+ spoofed_local_packets {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 spoofed local-address packet drops";
+ };
+
+ /* Errors signalled by ip4-inacl */
+ inacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL table-miss drops";
+ };
+ inacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL session deny drops";
+ };
+
+ /* Errors singalled by ip4-outacl */
+ outacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL table-miss drops";
+ };
+ outacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL session deny drops";
+ };
+
+ /* Errors from mfib-forward */
+ rpf_failure {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Multicast RPF check failed";
+ };
+
+ /* Errors signalled by ip4-reassembly */
+ reass_duplicate_fragment {
+ severity error;
+ type counter64;
+ units "packets";
+ description "duplicate/overlapping fragments";
+ };
+ reass_limit_reached {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to concurrent reassemblies limit";
+ };
+ reass_fragment_chain_too_long {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragment chain too long (drop)";
+ };
+ reass_no_buf {
+ severity error;
+ type counter64;
+ units "packets";
+ description "out of buffers (drop)";
+ };
+ reass_malformed_packet {
+ severity error;
+ type counter64;
+ units "packets";
+ description "malformed packets";
+ };
+ reass_internal_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to internal reassembly error";
+ };
+ reass_timeout {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragments dropped due to reassembly timeout";
+ };
+ reass_to_custom_app {
+ severity error;
+ type counter64;
+ units "packets";
+ description "send to custom drop app";
+ };
+ reass_success {
+ severity info;
+ type counter64;
+ units "packets";
+ description "successful reassemblies";
+ };
+ reass_fragments_reassembled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments reassembled";
+ };
+ reass_fragments_rcvd {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments received";
+ };
+ reass_unsupp_ip_prot {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported ip protocol";
+ };
+};
+
+/**
+ * IPv6 Error/info counters
+ */
+counters ip6 {
+ /* Must be first. */
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid ip6 packets";
+ };
+
+ /* Errors signalled by ip6-input */
+ too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 length < 40 bytes";
+ };
+ bad_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 length > l2 length";
+ };
+ version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 version != 6";
+ };
+ time_expired {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 ttl <= 1";
+ };
+
+ /* Errors signalled by ip6-rewrite. */
+ mtu_exceeded {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 MTU exceeded";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 destination lookup miss";
+ };
+ src_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 source lookup miss";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 drop";
+ };
+ punt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 punt";
+ };
+
+ /* errors signalled by ip6-local. */
+ unknown_protocol {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown ip protocol";
+ };
+ udp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad udp checksum";
+ };
+ icmp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad icmp checksum";
+ };
+ udp_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "inconsistent udp/ip lengths";
+ };
+ /* Errors signalled by udp6-lookup. */
+ unknown_udp_port {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no listener for udp port";
+ };
+
+ /* spoofed packets in ip6-rewrite-local */
+ spoofed_local_packets {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 spoofed local-address packet drops";
+ };
+
+ /* Errors signalled by ip6-inacl */
+ inacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL table-miss drops";
+ };
+ inacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL session deny drops";
+ };
+
+ /* Errors singalled by ip6-outacl */
+ outacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL table-miss drops";
+ };
+ outacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL session deny drops";
+ };
+
+ /* Errors from mfib-forward */
+ rpf_failure {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Multicast RPF check failed";
+ };
+
+ /* Errors signalled by ip6-reassembly */
+ reass_missing_upper {
+ severity error;
+ type counter64;
+ units "packets";
+ description "missing-upper layer drops";
+ };
+ reass_duplicate_fragment {
+ severity error;
+ type counter64;
+ units "packets";
+ description "duplicate fragments";
+ };
+ reass_overlapping_fragment {
+ severity error;
+ type counter64;
+ units "packets";
+ description "overlapping fragments";
+ };
+ reass_limit_reached {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to concurrent reassemblies limit";
+ };
+ reass_fragment_chain_too_long {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragment chain too long (drop)";
+ };
+ reass_no_buf {
+ severity error;
+ type counter64;
+ units "packets";
+ description "out of buffers (drop)";
+ };
+ reass_timeout {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragments dropped due to reassembly timeout";
+ };
+ reass_internal_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to internal reassembly error";
+ };
+ reass_invalid_frag_len {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid fragment length";
+ };
+ reass_to_custom_app {
+ severity error;
+ type counter64;
+ units "packets";
+ description "send to custom drop app";
+ };
+ reass_no_frag_hdr {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no fragmentation header";
+ };
+ reass_invalid_frag_size {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drop due to invalid fragment size";
+ };
+ reass_success {
+ severity info;
+ type counter64;
+ units "packets";
+ description "successful reassemblies";
+ };
+ reass_fragments_reassembled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments reassembled";
+ };
+ reass_fragments_rcvd {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments received";
+ };
+ reass_unsupp_ip_proto {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported ip protocol";
+ };
+};
+
+counters icmp4 {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid packets";
+ };
+ unknown_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown type";
+ };
+ invalid_code_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid code for type";
+ };
+ invalid_hop_limit_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "hop_limit != 255";
+ };
+ length_too_small_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "payload length too small for type";
+ };
+ options_with_odd_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "total option length not multiple of 8 bytes";
+ };
+ option_with_zero_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "option has zero length";
+ };
+ echo_replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "echo replies sent";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "icmp6 dst address lookup misses";
+ };
+ dest_unreach_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "destination unreachable response sent";
+ };
+ ttl_expire_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "hop limit exceeded response sent";
+ };
+ param_problem_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "parameter problem response sent";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "error message dropped";
+ };
+};
+
+counters icmp6 {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid packets";
+ };
+ unknown_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown type";
+ };
+ invalid_code_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid code for type";
+ };
+ invalid_hop_limit_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "hop_limit != 255";
+ };
+ length_too_small_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "payload length too small for type";
+ };
+ options_with_odd_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "total option length not multiple of 8 bytes";
+ };
+ option_with_zero_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "option has zero length";
+ };
+ echo_replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "echo replies sent";
+ };
+ neighbor_solicitation_source_not_on_link {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor solicitations from source not on link";
+ };
+ neighbor_solicitation_source_unknown {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor solicitations for unknown targets";
+ };
+ neighbor_advertisements_tx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "neighbor advertisements sent";
+ };
+ neighbor_advertisements_rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "neighbor advertisements received";
+ };
+ router_solicitation_source_not_on_link {
+ severity error;
+ type counter64;
+ units "packets";
+ description "router solicitations from source not on link";
+ };
+ router_solicitation_unsupported_intf {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor discovery unsupported interface";
+ };
+ router_solicitation_radv_not_config {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor discovery not configured";
+ };
+ router_advertisement_source_not_link_local {
+ severity error;
+ type counter64;
+ units "packets";
+ description "router advertisement source not link local";
+ };
+ router_advertisements_tx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "router advertisements sent";
+ };
+ router_advertisements_rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "router advertisements received";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "icmp6 dst address lookup misses";
+ };
+ dest_unreach_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "destination unreachable response sent";
+ };
+ packet_too_big_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packet too big response sent";
+ };
+ ttl_expire_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "hop limit exceeded response sent";
+ };
+ param_problem_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "parameter problem response sent";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "error message dropped";
+ };
+ alloc_failure {
+ severity error;
+ type counter64;
+ units "packets";
+ description "buffer allocation failure";
+ };
+};
+
+paths {
+ "/err/ip-frag" "ip_frag";
+ "/err/mpls-frag" "ip_frag";
+ "/err/ip4-mpls-label-disposition-pipe" "ip4";
+ "/err/ip4-mpls-label-disposition-uniform" "ip4";
+ "/err/ip4-local" "ip4";
+ "/err/ip4-input" "ip4";
+ "/err/ip4-full-reassembly" "ip4";
+ "/err/ip4-local-full-reassembly" "ip4";
+ "/err/ip4-full-reassembly-feature" "ip4";
+ "/err/ip4-full-reassembly-custom" "ip4";
+ "/err/ip4-full-reassembly-expire-walk" "ip4";
+ "/err/ip4-sv-reassembly" "ip4";
+ "/err/ip4-sv-reassembly-feature" "ip4";
+ "/err/ip4-sv-reassembly-output-feature" "ip4";
+ "/err/ip4-sv-reassembly-custom-next" "ip4";
+ "/err/ip4-sv-reassembly-expire-walk" "ip4";
+ "/err/ip6-mpls-label-disposition-pipe" "ip6";
+ "/err/ip6-mpls-label-disposition-uniform" "ip6";
+ "/err/ip6-local" "ip6";
+ "/err/ip6-input" "ip6";
+ "/err/ip6-full-reassembly" "ip6";
+ "/err/ip6-local-full-reassembly" "ip6";
+ "/err/ip6-full-reassembly-feature" "ip6";
+ "/err/ip6-full-reassembly-custom" "ip6";
+ "/err/ip6-full-reassembly-expire-walk" "ip6";
+ "/err/ip6-sv-reassembly" "ip6";
+ "/err/ip6-sv-reassembly-feature" "ip6";
+ "/err/ip6-sv-reassembly-output-feature" "ip6";
+ "/err/ip6-sv-reassembly-custom-next" "ip6";
+ "/err/ip6-sv-reassembly-expire-walk" "ip6";
+ "/err/ip4-icmp-input" "icmp4";
+ "/err/ip4-icmp-error" "icmp4";
+ "/err/ip6-icmp-input" "icmp6";
+ "/err/ip6-icmp-error" "icmp6";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/ip/ip.c b/src/vnet/ip/ip.c
index 5d0c7707dd3..586f7dfbc85 100644
--- a/src/vnet/ip/ip.c
+++ b/src/vnet/ip/ip.c
@@ -18,6 +18,20 @@
u32 ip_flow_hash_router_id;
+ethernet_type_t
+ip_address_family_to_ether_type (ip_address_family_t af)
+{
+ switch (af)
+ {
+ case AF_IP4:
+ return (ETHERNET_TYPE_IP4);
+ case AF_IP6:
+ return (ETHERNET_TYPE_IP6);
+ }
+ ASSERT (0);
+ return (ETHERNET_TYPE_IP4);
+}
+
u8
ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4)
{
@@ -104,7 +118,6 @@ ip_set (ip46_address_t * dst, void *src, u8 is_ip4)
sizeof (ip6_address_t));
}
-/* *INDENT-OFF* */
static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = {
[IP_FEATURE_INPUT] = {
[AF_IP4] = {
@@ -157,7 +170,6 @@ static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = {
},
},
};
-/* *INDENT-ON* */
void
ip_feature_enable_disable (ip_address_family_t af,
@@ -189,7 +201,8 @@ ip_feature_enable_disable (ip_address_family_t af,
}
int
-ip_flow_hash_set (ip_address_family_t af, u32 table_id, u32 flow_hash_config)
+ip_flow_hash_set (ip_address_family_t af, u32 table_id,
+ flow_hash_config_t flow_hash_config)
{
fib_protocol_t fproto;
u32 fib_index;
diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
index 6d822d29dbe..9ebefa0cf5d 100644
--- a/src/vnet/ip/ip.h
+++ b/src/vnet/ip/ip.h
@@ -51,19 +51,18 @@
#include <vnet/ip/ip_packet.h>
#include <vnet/ip/lookup.h>
#include <vnet/ip/ip_interface.h>
+#include <vnet/ip/ip.api_enum.h>
#include <vnet/tcp/tcp_packet.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/ip/icmp46_packet.h>
#include <vnet/ip/ip4.h>
-#include <vnet/ip/ip4_error.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/icmp4.h>
#include <vnet/ip/ip6.h>
#include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/ip6_error.h>
#include <vnet/ip/icmp6.h>
/* Per protocol info. */
@@ -267,8 +266,11 @@ void ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api,
void ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api);
-int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index,
- u32 table_id, u8 is_api);
+void fib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 fib_index);
+void mfib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 mfib_index);
+int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 table_id);
+
+u32 ip_table_get_unused_id (fib_protocol_t fproto);
u8 ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4);
u8 ip_is_local_host (ip46_address_t * ip46_address, u8 is_ip4);
@@ -286,6 +288,8 @@ void ip_feature_enable_disable (ip_address_family_t af,
void *feature_config,
u32 n_feature_config_bytes);
+ethernet_type_t ip_address_family_to_ether_type (ip_address_family_t af);
+
always_inline u32 vlib_buffer_get_ip4_fib_index (vlib_buffer_t * b);
always_inline u32 vlib_buffer_get_ip6_fib_index (vlib_buffer_t * b);
always_inline u32
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index dde7b7b9de9..45d07c2e0f6 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -169,7 +169,6 @@ typedef struct ip4_main_t
/** Global ip4 main structure. */
extern ip4_main_t ip4_main;
-extern char *ip4_error_strings[];
/** Global ip4 input node. Errors get attached to ip4 input node. */
extern vlib_node_registration_t ip4_input_node;
@@ -212,7 +211,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im,
ip_interface_address_t *ia;
ip4_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -223,7 +221,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im,
break;
}
}));
- /* *INDENT-ON* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
diff --git a/src/vnet/ip/ip46_address.h b/src/vnet/ip/ip46_address.h
index f726178ee63..90f766464f6 100644
--- a/src/vnet/ip/ip46_address.h
+++ b/src/vnet/ip/ip46_address.h
@@ -34,7 +34,6 @@ typedef enum
extern u8 *format_ip46_type (u8 * s, va_list * args);
-/* *INDENT-OFF* */
typedef CLIB_PACKED (union ip46_address_t_ {
struct {
u32 pad[3];
@@ -44,7 +43,6 @@ typedef CLIB_PACKED (union ip46_address_t_ {
u8 as_u8[16];
u64 as_u64[2];
}) ip46_address_t;
-/* *INDENT-ON* */
format_function_t format_ip46_address;
diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c
index f58be898d9b..e3da27914bd 100644
--- a/src/vnet/ip/ip46_cli.c
+++ b/src/vnet/ip/ip46_cli.c
@@ -71,12 +71,10 @@ ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2)
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_command, static) = {
.path = "set interface ip",
.short_help = "IP4/IP6 commands",
};
-/* *INDENT-ON* */
void
ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
@@ -90,7 +88,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
ip_interface_address_t *ia;
int i;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -99,9 +96,7 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
vec_add1 (ip4_addrs, x[0]);
vec_add1 (ip4_masks, ia->address_length);
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -110,7 +105,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
vec_add1 (ip6_addrs, x[0]);
vec_add1 (ip6_masks, ia->address_length);
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (ip4_addrs); i++)
ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i],
@@ -212,13 +206,11 @@ done:
* @cliexcmd{set interface ip address del GigabitEthernet2/0/0 all}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = {
.path = "set interface ip address",
.function = add_del_ip_address,
.short_help = "set interface ip address [del] <interface> <ip-addr>/<mask> | [all]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_reassembly_command_fn (vlib_main_t * vm,
@@ -294,13 +286,11 @@ set_reassembly_command_fn (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_reassembly_command, static) = {
.path = "set interface reassembly",
.short_help = "set interface reassembly <interface-name> [on|off|ip4|ip6]",
.function = set_reassembly_command_fn,
};
-/* *INDENT-ON* */
/* Dummy init function to get us linked in. */
static clib_error_t *
diff --git a/src/vnet/ip/ip4_error.h b/src/vnet/ip/ip4_error.h
deleted file mode 100644
index dce3dd4c1ab..00000000000
--- a/src/vnet/ip/ip4_error.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip/ip4_error.h: ip4 fast path errors
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef included_ip_ip4_error_h
-#define included_ip_ip4_error_h
-
-#define foreach_ip4_error \
- /* Must be first. */ \
- _ (NONE, "valid ip4 packets") \
- \
- /* Errors signalled by ip4-input */ \
- _ (TOO_SHORT, "ip4 length < 20 bytes") \
- _ (BAD_LENGTH, "ip4 length > l2 length") \
- _ (BAD_CHECKSUM, "bad ip4 checksum") \
- _ (VERSION, "ip4 version != 4") \
- _ (OPTIONS, "ip4 options present") \
- _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \
- _ (TIME_EXPIRED, "ip4 ttl <= 1") \
- \
- /* Errors signalled by ip4-rewrite. */ \
- _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \
- _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \
- _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \
- _ (DROP, "ip4 drop") \
- _ (PUNT, "ip4 punt") \
- _ (SAME_INTERFACE, "ip4 egress interface same as ingress") \
- \
- /* Errors signalled by ip4-local. */ \
- _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
- _ (TCP_CHECKSUM, "bad tcp checksum") \
- _ (UDP_CHECKSUM, "bad udp checksum") \
- _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
- \
- /* Spoofed packets in ip4-rewrite-local */ \
- _ (SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
- \
- /* Errors signalled by ip4-inacl */ \
- _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
- _ (INACL_SESSION_DENY, "input ACL session deny drops") \
- /* Errors singalled by ip4-outacl */ \
- _ (OUTACL_TABLE_MISS, "output ACL table-miss drops") \
- _ (OUTACL_SESSION_DENY, "output ACL session deny drops") \
- \
- /* Errors from mfib-forward */ \
- _ (RPF_FAILURE, "Multicast RPF check failed") \
- \
- /* Errors signalled by ip4-reassembly */ \
- _ (REASS_DUPLICATE_FRAGMENT, "duplicate/overlapping fragments") \
- _ (REASS_LIMIT_REACHED, "drops due to concurrent reassemblies limit") \
- _ (REASS_FRAGMENT_CHAIN_TOO_LONG, "fragment chain too long (drop)") \
- _ (REASS_NO_BUF, "out of buffers (drop)") \
- _ (REASS_MALFORMED_PACKET, "malformed packets") \
- _ (REASS_INTERNAL_ERROR, "drops due to internal reassembly error") \
- _ (REASS_UNSUPP_IP_PROT, "unsupported ip protocol")
-
-typedef enum
-{
-#define _(sym,str) IP4_ERROR_##sym,
- foreach_ip4_error
-#undef _
- IP4_N_ERROR,
-} ip4_error_t;
-
-#endif /* included_ip_ip4_error_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 5cd5e418fd6..ff74b52eb18 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -52,6 +52,7 @@
#include <vnet/mfib/ip4_mfib.h>
#include <vnet/dpo/load_balance.h>
#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/receive_dpo.h>
#include <vnet/dpo/classify_dpo.h>
#include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
#include <vnet/adj/adj_dp.h>
@@ -60,6 +61,7 @@
#include <vnet/ip/ip4_forward.h>
#include <vnet/interface_output.h>
#include <vnet/classify/vnet_classify.h>
+#include <vnet/ip/reass/ip4_full_reass.h>
/** @brief IPv4 lookup node.
@node ip4-lookup
@@ -101,7 +103,6 @@ VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_lookup_node) =
{
.name = "ip4-lookup",
@@ -110,7 +111,6 @@ VLIB_REGISTER_NODE (ip4_lookup_node) =
.n_next_nodes = IP_LOOKUP_N_NEXT,
.next_nodes = IP4_LOOKUP_NEXT_NODES,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -266,7 +266,6 @@ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_load_balance_node) =
{
.name = "ip4-load-balance",
@@ -274,7 +273,6 @@ VLIB_REGISTER_NODE (ip4_load_balance_node) =
.sibling_of = "ip4-lookup",
.format_trace = format_ip4_lookup_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* get first interface address */
@@ -286,7 +284,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
ip_interface_address_t *ia = 0;
ip4_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address
(lm, ia, sw_if_index,
1 /* honor unnumbered */ ,
@@ -296,7 +293,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
result = a;
break;
}));
- /* *INDENT-OFF* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
@@ -655,7 +651,10 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
if (error)
- return error;
+ {
+ vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
+ return error;
+ }
ip4_addr_fib_init (&ip4_af, address,
vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
@@ -666,7 +665,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
* subnets on interfaces. Easy fix - disallow overlapping subnets, like
* most routers do.
*/
- /* *INDENT-OFF* */
if (!is_del)
{
/* When adding an address check that it does not conflict
@@ -727,7 +725,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
@@ -848,7 +845,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable)
* when directed broadcast is enabled, the subnet braodcast route will forward
* packets using an adjacency with a broadcast MAC. otherwise it drops
*/
- /* *INDENT-OFF* */
foreach_ip_interface_address(&im->lookup_main, ia,
sw_if_index, 0,
({
@@ -872,7 +868,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable)
&pfx, sw_if_index);
}
}));
- /* *INDENT-ON* */
}
#endif
@@ -892,7 +887,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -906,7 +900,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
im, fib_index,
a, ia->address_length);
}));
- /* *INDENT-ON* */
return 0;
}
@@ -914,7 +907,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
/* Built-in ip4 unicast rx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
{
.arc_name = "ip4-unicast",
@@ -1053,7 +1045,6 @@ VNET_FEATURE_INIT (ip4_interface_output, static) =
.node_name = "interface-output",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
@@ -1078,14 +1069,21 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
vlib_main_t *vm = vlib_get_main ();
vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
({
address = ip_interface_address_get_address (lm4, ia);
ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
}));
- /* *INDENT-ON* */
ip4_mfib_interface_enable_disable (sw_if_index, 0);
+
+ if (0 != im4->fib_index_by_sw_if_index[sw_if_index])
+ fib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
+ if (0 != im4->mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
+
+ /* Erase the lookup tables just in case */
+ im4->fib_index_by_sw_if_index[sw_if_index] = ~0;
+ im4->mfib_index_by_sw_if_index[sw_if_index] = ~0;
}
vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
@@ -1192,9 +1190,11 @@ format_ip4_forward_next_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%U%U",
- format_white_space, indent,
- format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
+ t->fib_index, t->dpo_index, t->flow_hash);
+ s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header,
+ t->packet_data, sizeof (t->packet_data));
return s;
}
#endif
@@ -1383,14 +1383,11 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
}
#endif
-/* *INDENT-OFF* */
-VNET_FEATURE_ARC_INIT (ip4_local) =
-{
- .arc_name = "ip4-local",
- .start_nodes = VNET_FEATURES ("ip4-local"),
+VNET_FEATURE_ARC_INIT (ip4_local) = {
+ .arc_name = "ip4-local",
+ .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
.last_in_arc = "ip4-local-end-of-arc",
};
-/* *INDENT-ON* */
static inline void
ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
@@ -1466,10 +1463,10 @@ ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
|| ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
{
- if (is_tcp_udp[0])
+ if (is_tcp_udp[0] && !ip4_local_csum_is_offloaded (b[0]))
ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
&good_tcp_udp[0]);
- if (is_tcp_udp[1])
+ if (is_tcp_udp[1] && !ip4_local_csum_is_offloaded (b[1]))
ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
&good_tcp_udp[1]);
}
@@ -1495,9 +1492,8 @@ ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
next_index = *next;
if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
{
- vnet_feature_arc_start (arc_index,
- vnet_buffer (b)->sw_if_index[VLIB_RX],
- &next_index, b);
+ vnet_feature_arc_start (
+ arc_index, vnet_buffer (b)->ip.rx_sw_if_index, &next_index, b);
*next = next_index;
}
}
@@ -1505,15 +1501,18 @@ ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
typedef struct
{
+ /* The src and fib-index together determine if packet n is the same as n-1 */
ip4_address_t src;
+ u32 fib_index;
u32 lbi;
u8 error;
u8 first;
} ip4_local_last_check_t;
static inline void
-ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
- ip4_local_last_check_t * last_check, u8 * error0)
+ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
+ ip4_local_last_check_t *last_check, u8 *error0,
+ int is_receive_dpo)
{
const dpo_id_t *dpo0;
load_balance_t *lb0;
@@ -1523,13 +1522,23 @@ ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
+ vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ if (is_receive_dpo)
+ {
+ receive_dpo_t *rd;
+ rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
+ if (rd->rd_sw_if_index != ~0)
+ vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
+ }
+
/*
* vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
* adjacency for the destination address (the local interface address).
* vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
* adjacency for the source address (the remote sender's address)
*/
- if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
+ if (PREDICT_TRUE ((last_check->src.as_u32 != ip0->src_address.as_u32)) ||
+ (last_check->fib_index != vnet_buffer (b)->ip.fib_index) ||
last_check->first)
{
lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
@@ -1565,6 +1574,7 @@ ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
last_check->lbi = lbi0;
last_check->error = *error0;
last_check->first = 0;
+ last_check->fib_index = vnet_buffer (b)->ip.fib_index;
}
else
{
@@ -1576,8 +1586,9 @@ ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
}
static inline void
-ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
- ip4_local_last_check_t * last_check, u8 * error)
+ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
+ ip4_local_last_check_t *last_check, u8 *error,
+ int is_receive_dpo)
{
const dpo_id_t *dpo[2];
load_balance_t *lb[2];
@@ -1598,6 +1609,24 @@ ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
vnet_buffer (b[1])->ip.fib_index;
+ not_last_hit |= vnet_buffer (b[0])->ip.fib_index ^ last_check->fib_index;
+ not_last_hit |= vnet_buffer (b[1])->ip.fib_index ^ last_check->fib_index;
+
+ vnet_buffer (b[0])->ip.rx_sw_if_index =
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ vnet_buffer (b[1])->ip.rx_sw_if_index =
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX];
+ if (is_receive_dpo)
+ {
+ const receive_dpo_t *rd0, *rd1;
+ rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
+ if (rd0->rd_sw_if_index != ~0)
+ vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
+ if (rd1->rd_sw_if_index != ~0)
+ vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
+ }
+
/*
* vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
* adjacency for the destination address (the local interface address).
@@ -1644,6 +1673,7 @@ ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
last_check->lbi = lbi[1];
last_check->error = error[1];
last_check->first = 0;
+ last_check->fib_index = vnet_buffer (b[1])->ip.fib_index;
}
else
{
@@ -1694,9 +1724,9 @@ ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
}
static inline uword
-ip4_local_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, int head_of_feature_arc)
+ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int head_of_feature_arc,
+ int is_receive_dpo)
{
u32 *from, n_left_from;
vlib_node_runtime_t *error_node =
@@ -1713,10 +1743,11 @@ ip4_local_inline (vlib_main_t * vm,
* member to make sure the .lbi is initialised for the first
* packet.
*/
- .src = {.as_u32 = 0},
+ .src = { .as_u32 = 0 },
.lbi = ~0,
.error = IP4_ERROR_UNKNOWN_PROTOCOL,
.first = 1,
+ .fib_index = 0,
};
from = vlib_frame_vector_args (frame);
@@ -1761,19 +1792,21 @@ ip4_local_inline (vlib_main_t * vm,
if (PREDICT_TRUE (not_batch == 0))
{
ip4_local_check_l4_csum_x2 (vm, b, ip, error);
- ip4_local_check_src_x2 (b, ip, &last_check, error);
+ ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo);
}
else
{
if (!pt[0])
{
ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
- ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
+ ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
+ is_receive_dpo);
}
if (!pt[1])
{
ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
- ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
+ ip4_local_check_src (b[1], ip[1], &last_check, &error[1],
+ is_receive_dpo);
}
}
@@ -1801,7 +1834,8 @@ ip4_local_inline (vlib_main_t * vm,
goto skip_check;
ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
- ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
+ ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
+ is_receive_dpo);
skip_check:
@@ -1820,17 +1854,17 @@ ip4_local_inline (vlib_main_t * vm,
VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
+ return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
+ 0 /* is_receive_dpo */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_local_node) =
{
.name = "ip4-local",
.vector_size = sizeof (u32),
.format_trace = format_ip4_forward_next_trace,
.n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP_LOCAL_N_NEXT,
.next_nodes =
{
@@ -1838,20 +1872,32 @@ VLIB_REGISTER_NODE (ip4_local_node) =
[IP_LOCAL_NEXT_PUNT] = "ip4-punt",
[IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
[IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
- [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
+ [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-local-full-reassembly",
},
};
-/* *INDENT-ON* */
+VLIB_NODE_FN (ip4_receive_local_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
+ 1 /* is_receive_dpo */);
+}
+
+VLIB_REGISTER_NODE (ip4_receive_local_node) = {
+ .name = "ip4-receive",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .sibling_of = "ip4-local"
+};
VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
+ return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */,
+ 0 /* is_receive_dpo */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
.name = "ip4-local-end-of-arc",
.vector_size = sizeof (u32),
@@ -1865,7 +1911,6 @@ VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
.node_name = "ip4-local-end-of-arc",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
@@ -1928,14 +1973,12 @@ show_ip_local_command_fn (vlib_main_t * vm,
* 47
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_local, static) =
{
.path = "show ip local",
.function = show_ip_local_command_fn,
.short_help = "show ip local",
};
-/* *INDENT-ON* */
typedef enum
{
@@ -2002,7 +2045,9 @@ ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
ttl += 1;
ip->ttl = ttl;
- ASSERT (ip4_header_checksum_is_valid (ip));
+ ASSERT (ip4_header_checksum_is_valid (ip) ||
+ (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
+ (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
}
/* Decrement TTL & update checksum.
@@ -2180,9 +2225,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
adj0->ia_cfg_index);
next[0] = next_index;
- if (is_midchain)
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
}
else
{
@@ -2205,9 +2247,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
&next_index, b[1],
adj1->ia_cfg_index);
next[1] = next_index;
- if (is_midchain)
- vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
}
else
{
@@ -2357,9 +2396,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_midchain)
{
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
-
/* Guess we are only writing on ipv4 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
}
@@ -2463,10 +2499,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_midchain)
{
- /* this acts on the packet that is about to be encapped */
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
-
/* Guess we are only writing on ipv4 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
}
@@ -2593,7 +2625,6 @@ VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_rewrite_node) = {
.name = "ip4-rewrite",
.vector_size = sizeof (u32),
@@ -2638,7 +2669,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = {
.format_trace = format_ip4_rewrite_trace,
.sibling_of = "ip4-rewrite",
};
-/* *INDENT-ON */
static clib_error_t *
set_ip_flow_hash_command_fn (vlib_main_t * vm,
@@ -2770,15 +2800,12 @@ set_ip_flow_hash_command_fn (vlib_main_t * vm,
* [0] [@0]: dpo-drop ip6
* @cliexend
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
-{
+VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
.path = "set ip flow-hash",
- .short_help =
- "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] "
+ "[dport] [proto] [reverse] [gtpv1teid]",
.function = set_ip_flow_hash_command_fn,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -2895,7 +2922,6 @@ set_ip_classify_command_fn (vlib_main_t * vm,
* Example of how to assign a classification table to an interface:
* @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip_classify_command, static) =
{
.path = "set ip classify",
@@ -2903,7 +2929,6 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) =
"set ip classify intfc <interface> table-index <classify-idx>",
.function = set_ip_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_inlines.h b/src/vnet/ip/ip4_inlines.h
index 00a47125b8a..b4fcebc9896 100644
--- a/src/vnet/ip/ip4_inlines.h
+++ b/src/vnet/ip/ip4_inlines.h
@@ -42,6 +42,8 @@
#include <vnet/ip/ip_flow_hash.h>
#include <vnet/ip/ip4_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/udp/udp_packet.h>
#define IP_DF 0x4000 /* don't fragment */
@@ -52,9 +54,11 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
flow_hash_config_t flow_hash_config)
{
tcp_header_t *tcp = (void *) (ip + 1);
+ udp_header_t *udp = (void *) (ip + 1);
+ gtpv1u_header_t *gtpu = (void *) (udp + 1);
u32 a, b, c, t1, t2;
- uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
- || ip->protocol == IP_PROTOCOL_UDP);
+ uword is_udp = ip->protocol == IP_PROTOCOL_UDP;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP || is_udp);
t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR)
? ip->src_address.data_u32 : 0;
@@ -89,6 +93,13 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
(t1 << 16) | t2 : (t2 << 16) | t1;
+ if (PREDICT_TRUE (is_udp) &&
+ PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) &&
+ udp->dst_port == GTPV1_PORT_BE))
+ {
+ t1 = gtpu->teid;
+ c ^= t1;
+ }
a ^= ip_flow_hash_router_id;
hash_v3_mix32 (a, b, c);
@@ -98,9 +109,9 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
}
always_inline void *
-vlib_buffer_push_ip4_custom (vlib_main_t * vm, vlib_buffer_t * b,
- ip4_address_t * src, ip4_address_t * dst,
- int proto, u8 csum_offload, u8 is_df)
+vlib_buffer_push_ip4_custom (vlib_main_t *vm, vlib_buffer_t *b,
+ ip4_address_t *src, ip4_address_t *dst, int proto,
+ u8 csum_offload, u8 is_df, u8 dscp)
{
ip4_header_t *ih;
@@ -108,7 +119,8 @@ vlib_buffer_push_ip4_custom (vlib_main_t * vm, vlib_buffer_t * b,
ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t));
ih->ip_version_and_header_length = 0x45;
- ih->tos = 0;
+ ip4_header_set_dscp (ih, dscp);
+ ip4_header_set_ecn (ih, 0);
ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
/* No fragments */
@@ -152,7 +164,7 @@ vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b,
u8 csum_offload)
{
return vlib_buffer_push_ip4_custom (vm, b, src, dst, proto, csum_offload,
- 1 /* is_df */ );
+ 1 /* is_df */, 0);
}
#endif /* included_ip_ip4_inlines_h */
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index 3b3edf9fca7..106d17da3cb 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -374,22 +374,13 @@ VLIB_NODE_FN (ip4_input_no_checksum_node) (vlib_main_t * vm,
return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0);
}
-#ifndef CLIB_MARCH_VARIANT
-char *ip4_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip4_error
-#undef _
-};
-#endif
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_input_node) = {
.name = "ip4-input",
.vector_size = sizeof (u32),
.protocol_hint = VLIB_NODE_PROTO_HINT_IP4,
.n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_INPUT_N_NEXT,
.next_nodes = {
@@ -399,7 +390,6 @@ VLIB_REGISTER_NODE (ip4_input_node) = {
[IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
[IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
[IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [IP4_INPUT_NEXT_REASSEMBLY] = "ip4-full-reassembly",
},
.format_buffer = format_ip4_header,
@@ -414,7 +404,6 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_input_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h
index 383ef31758c..d2ed13fa35f 100644
--- a/src/vnet/ip/ip4_input.h
+++ b/src/vnet/ip/ip4_input.h
@@ -42,6 +42,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/vector/ip_csum.h>
typedef enum
{
@@ -51,7 +52,6 @@ typedef enum
IP4_INPUT_NEXT_LOOKUP,
IP4_INPUT_NEXT_LOOKUP_MULTICAST,
IP4_INPUT_NEXT_ICMP_ERROR,
- IP4_INPUT_NEXT_REASSEMBLY,
IP4_INPUT_N_NEXT,
} ip4_input_next_t;
@@ -60,18 +60,21 @@ check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum)
{
if (PREDICT_FALSE (ip->ip_version_and_header_length != 0x45))
{
- if ((ip->ip_version_and_header_length & 0xf) != 5)
+ if ((ip->ip_version_and_header_length & 0xf0) != 0x40)
+ *error = IP4_ERROR_VERSION;
+ else if ((ip->ip_version_and_header_length & 0x0f) < 5)
+ *error = IP4_ERROR_HDR_TOO_SHORT;
+ else
{
*error = IP4_ERROR_OPTIONS;
- if (verify_checksum && ip_csum (ip, ip4_header_bytes (ip)) != 0)
+ if (verify_checksum &&
+ clib_ip_csum ((u8 *) ip, ip4_header_bytes (ip)) != 0)
*error = IP4_ERROR_BAD_CHECKSUM;
}
- else
- *error = IP4_ERROR_VERSION;
}
- else
- if (PREDICT_FALSE (verify_checksum &&
- ip_csum (ip, sizeof (ip4_header_t)) != 0))
+ else if (PREDICT_FALSE (verify_checksum &&
+ clib_ip_csum ((u8 *) ip, sizeof (ip4_header_t)) !=
+ 0))
*error = IP4_ERROR_BAD_CHECKSUM;
}
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 0f4c47fe11a..00855f7db43 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -91,94 +91,48 @@ ip4_mtrie_leaf_set_next_ply_index (u32 i)
return l;
}
-#ifndef __ALTIVEC__
-#define PLY_X4_SPLAT_INIT(init_x4, init) \
- init_x4 = u32x4_splat (init);
-#else
-#define PLY_X4_SPLAT_INIT(init_x4, init) \
-{ \
- u32x4_union_t y; \
- y.as_u32[0] = init; \
- y.as_u32[1] = init; \
- y.as_u32[2] = init; \
- y.as_u32[3] = init; \
- init_x4 = y.as_u32x4; \
-}
-#endif
-
-#ifdef CLIB_HAVE_VEC128
-#define PLY_INIT_LEAVES(p) \
-{ \
- u32x4 *l, init_x4; \
- \
- PLY_X4_SPLAT_INIT(init_x4, init); \
- for (l = p->leaves_as_u32x4; \
- l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \
- l += 4) \
- { \
- l[0] = init_x4; \
- l[1] = init_x4; \
- l[2] = init_x4; \
- l[3] = init_x4; \
- } \
-}
-#else
-#define PLY_INIT_LEAVES(p) \
-{ \
- u32 *l; \
- \
- for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \
- { \
- l[0] = init; \
- l[1] = init; \
- l[2] = init; \
- l[3] = init; \
- } \
-}
-#endif
-
-#define PLY_INIT(p, init, prefix_len, ply_base_len) \
-{ \
- /* \
- * A leaf is 'empty' if it represents a leaf from the covering PLY \
- * i.e. if the prefix length of the leaf is less than or equal to \
- * the prefix length of the PLY \
- */ \
- p->n_non_empty_leafs = (prefix_len > ply_base_len ? \
- ARRAY_LEN (p->leaves) : 0); \
- clib_memset (p->dst_address_bits_of_leaves, prefix_len, \
- sizeof (p->dst_address_bits_of_leaves)); \
- p->dst_address_bits_base = ply_base_len; \
- \
- /* Initialize leaves. */ \
- PLY_INIT_LEAVES(p); \
-}
-
static void
ply_8_init (ip4_mtrie_8_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len,
u32 ply_base_len)
{
- PLY_INIT (p, init, prefix_len, ply_base_len);
+ p->n_non_empty_leafs = prefix_len > ply_base_len ? ARRAY_LEN (p->leaves) : 0;
+ clib_memset_u8 (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+ p->dst_address_bits_base = ply_base_len;
+
+ clib_memset_u32 (p->leaves, init, ARRAY_LEN (p->leaves));
}
static void
ply_16_init (ip4_mtrie_16_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len)
{
- clib_memset (p->dst_address_bits_of_leaves, prefix_len,
- sizeof (p->dst_address_bits_of_leaves));
- PLY_INIT_LEAVES (p);
+ clib_memset_u8 (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+ clib_memset_u32 (p->leaves, init, ARRAY_LEN (p->leaves));
}
static ip4_mtrie_leaf_t
ply_create (ip4_mtrie_leaf_t init_leaf, u32 leaf_prefix_len, u32 ply_base_len)
{
ip4_mtrie_8_ply_t *p;
- /* Get cache aligned ply. */
+ ip4_mtrie_leaf_t l;
+ u8 need_barrier_sync = pool_get_will_expand (ip4_ply_pool);
+ vlib_main_t *vm = vlib_get_main ();
+ ASSERT (vm->thread_index == 0);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
+ /* Get cache aligned ply. */
pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
- return ip4_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
+ l = ip4_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+
+ return l;
}
always_inline ip4_mtrie_8_ply_t *
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index ec417c9a9f7..16c524745be 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -65,14 +65,7 @@ typedef struct ip4_mtrie_16_ply_t_
/**
* The leaves/slots/buckets to be filed with leafs
*/
- union
- {
- ip4_mtrie_leaf_t leaves[PLY_16_SIZE];
-
-#ifdef CLIB_HAVE_VEC128
- u32x4 leaves_as_u32x4[PLY_16_SIZE / 4];
-#endif
- };
+ ip4_mtrie_leaf_t leaves[PLY_16_SIZE];
/**
* Prefix length for terminal leaves.
@@ -85,17 +78,11 @@ typedef struct ip4_mtrie_16_ply_t_
*/
typedef struct ip4_mtrie_8_ply_t_
{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
/**
* The leaves/slots/buckets to be filed with leafs
*/
- union
- {
- ip4_mtrie_leaf_t leaves[256];
-
-#ifdef CLIB_HAVE_VEC128
- u32x4 leaves_as_u32x4[256 / 4];
-#endif
- };
+ ip4_mtrie_leaf_t leaves[256];
/**
* Prefix length for leaves/ply.
@@ -113,9 +100,6 @@ typedef struct ip4_mtrie_8_ply_t_
* 'non-empty'. Otherwise it is the value of the cover.
*/
i32 dst_address_bits_base;
-
- /* Pad to cache line boundary. */
- u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
} ip4_mtrie_8_ply_t;
STATIC_ASSERT (0 == sizeof (ip4_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
diff --git a/src/vnet/ip/ip4_options.c b/src/vnet/ip/ip4_options.c
index 1b5a7878512..bbe311ffb20 100644
--- a/src/vnet/ip/ip4_options.c
+++ b/src/vnet/ip/ip4_options.c
@@ -78,10 +78,17 @@ VLIB_NODE_FN (ip4_options_node) (vlib_main_t * vm,
{
case IP4_ROUTER_ALERT_OPTION:
/*
+ * check the option length
+ */
+ if (options[1] != 4)
+ break;
+ /*
* if it's an IGMP packet, pass up the local stack
*/
if (IP_PROTOCOL_IGMP == ip4->protocol)
{
+ ip_lookup_set_buffer_fib_index (
+ ip4_main.fib_index_by_sw_if_index, b);
next = IP4_OPTIONS_NEXT_LOCAL;
}
break;
@@ -120,7 +127,6 @@ format_ip4_options_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_options_node) = {
.name = "ip4-options",
.vector_size = sizeof (u32),
@@ -133,7 +139,6 @@ VLIB_REGISTER_NODE (ip4_options_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_options_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
index 513a7449b54..269049194e6 100644
--- a/src/vnet/ip/ip4_packet.h
+++ b/src/vnet/ip/ip4_packet.h
@@ -41,7 +41,6 @@
#define included_ip4_packet_h
#include <vnet/ip/ip_packet.h> /* for ip_csum_t */
-#include <vnet/tcp/tcp_packet.h> /* for tcp_header_t */
#include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */
#include <vppinfra/warnings.h> /* for WARN_OFF/WARN_ON macro */
@@ -130,19 +129,15 @@ typedef union
/* For checksumming we'll want to access IP header in word sized chunks. */
/* For 64 bit machines. */
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
u64 checksum_data_64[2];
u32 checksum_data_64_32[1];
});
- /* *INDENT-ON* */
/* For 32 bit machines. */
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
u32 checksum_data_32[5];
});
- /* *INDENT-ON* */
} ip4_header_t;
/* Value of ip_version_and_header_length for packets w/o options. */
@@ -201,9 +196,7 @@ ip4_next_header (ip4_header_t * i)
/* Turn off array bounds check due to ip4_header_t
option field operations. */
-/* *INDENT-OFF* */
WARN_OFF(array-bounds)
-/* *INDENT-ON* */
static_always_inline u16
ip4_header_checksum_inline (ip4_header_t * i, int with_checksum)
@@ -306,9 +299,7 @@ ip4_header_checksum_inline (ip4_header_t * i, int with_checksum)
return ~((u16) sum);
}
-/* *INDENT-OFF* */
WARN_ON(array-bounds)
-/* *INDENT-ON* */
always_inline u16
ip4_header_checksum (ip4_header_t * i)
@@ -476,47 +467,6 @@ ip4_multicast_ethernet_address (u8 * ethernet_address,
ethernet_address[5] = d[3];
}
-always_inline void
-ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0)
-{
- u32 src0, dst0;
-
- src0 = ip0->src_address.data_u32;
- dst0 = ip0->dst_address.data_u32;
- ip0->src_address.data_u32 = dst0;
- ip0->dst_address.data_u32 = src0;
-
- src0 = tcp0->src;
- dst0 = tcp0->dst;
- tcp0->src = dst0;
- tcp0->dst = src0;
-}
-
-always_inline void
-ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1,
- tcp_header_t * tcp0, tcp_header_t * tcp1)
-{
- u32 src0, dst0, src1, dst1;
-
- src0 = ip0->src_address.data_u32;
- src1 = ip1->src_address.data_u32;
- dst0 = ip0->dst_address.data_u32;
- dst1 = ip1->dst_address.data_u32;
- ip0->src_address.data_u32 = dst0;
- ip1->src_address.data_u32 = dst1;
- ip0->dst_address.data_u32 = src0;
- ip1->dst_address.data_u32 = src1;
-
- src0 = tcp0->src;
- src1 = tcp1->src;
- dst0 = tcp0->dst;
- dst1 = tcp1->dst;
- tcp0->src = dst0;
- tcp1->src = dst1;
- tcp0->dst = src0;
- tcp1->dst = src1;
-}
-
#endif /* included_ip4_packet_h */
/*
diff --git a/src/vnet/ip/ip4_punt_drop.c b/src/vnet/ip/ip4_punt_drop.c
index 89803afb9dd..b8cc3304437 100644
--- a/src/vnet/ip/ip4_punt_drop.c
+++ b/src/vnet/ip/ip4_punt_drop.c
@@ -18,7 +18,6 @@
#include <vnet/policer/policer.h>
#include <vnet/policer/police_inlines.h>
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_punt) =
{
.arc_name = "ip4-punt",
@@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip4_drop) =
.arc_name = "ip4-drop",
.start_nodes = VNET_FEATURES ("ip4-drop", "ip4-not-enabled"),
};
-/* *INDENT-ON* */
extern ip_punt_policer_t ip4_punt_policer_cfg;
@@ -89,7 +87,6 @@ VLIB_NODE_FN (ip4_punt_policer_node) (vlib_main_t * vm,
ip4_punt_policer_cfg.policer_index));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_punt_policer_node) = {
.name = "ip4-punt-policer",
.vector_size = sizeof (u32),
@@ -109,7 +106,6 @@ VNET_FEATURE_INIT (ip4_punt_policer_node) = {
.node_name = "ip4-punt-policer",
.runs_before = VNET_FEATURES("ip4-punt-redirect"),
};
-/* *INDENT-ON* */
#define foreach_ip4_punt_redirect_error \
@@ -138,7 +134,6 @@ VLIB_NODE_FN (ip4_punt_redirect_node) (vlib_main_t * vm,
FIB_PROTOCOL_IP4));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_punt_redirect_node) = {
.name = "ip4-punt-redirect",
.vector_size = sizeof (u32),
@@ -160,7 +155,6 @@ VNET_FEATURE_INIT (ip4_punt_redirect_node, static) = {
.node_name = "ip4-punt-redirect",
.runs_before = VNET_FEATURES("error-punt"),
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -194,7 +188,6 @@ ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vnet_feat_arc_ip4_punt.feature_arc_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_drop_node) =
{
.name = "ip4-drop",
@@ -237,7 +230,6 @@ VNET_FEATURE_INIT (ip4_drop_end_of_arc, static) = {
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON */
#ifndef CLIB_MARCH_VARIANT
void
@@ -301,17 +293,17 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_punt_policer_command, static) =
{
.path = "ip punt policer",
.function = ip4_punt_police_cmd,
.short_help = "ip punt policer [add|del] <index>",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
+static u32 ip4_punt_redirect_enable_counts;
+
void
ip4_punt_redirect_add_paths (u32 rx_sw_if_index,
const fib_route_path_t *rpaths)
@@ -320,13 +312,16 @@ ip4_punt_redirect_add_paths (u32 rx_sw_if_index,
rx_sw_if_index,
FIB_FORW_CHAIN_TYPE_UNICAST_IP4, rpaths);
- vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 1, 0, 0);
+ if (1 == ++ip4_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 1, 0, 0);
}
void
ip4_punt_redirect_del (u32 rx_sw_if_index)
{
- vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 0, 0, 0);
+ ASSERT (ip4_punt_redirect_enable_counts);
+ if (0 == --ip4_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 0, 0, 0);
ip_punt_redirect_del (FIB_PROTOCOL_IP4, rx_sw_if_index);
}
@@ -399,14 +394,12 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_punt_redirect_command, static) =
{
.path = "ip punt redirect",
.function = ip4_punt_redirect_cmd,
.short_help = "ip punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_punt_redirect_show_cmd (vlib_main_t * vm,
@@ -423,7 +416,6 @@ ip4_punt_redirect_show_cmd (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{set ip punt redierect}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) =
{
.path = "show ip punt redirect",
@@ -431,7 +423,6 @@ VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) =
.short_help = "show ip punt redirect",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c
index 00ab51e2440..27b2d549ea7 100644
--- a/src/vnet/ip/ip4_source_and_port_range_check.c
+++ b/src/vnet/ip/ip4_source_and_port_range_check.c
@@ -99,7 +99,9 @@ static inline u32
check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
u16 dst_port, u32 next)
{
+#ifdef CLIB_HAVE_VEC128
u16x8 key = u16x8_splat (dst_port);
+#endif
int i;
if (NULL == ppr_dpo || dst_port == 0)
@@ -107,9 +109,20 @@ check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
for (i = 0; i < ppr_dpo->n_used_blocks; i++)
+#ifdef CLIB_HAVE_VEC128
if (!u16x8_is_all_zero ((ppr_dpo->blocks[i].low.as_u16x8 <= key) &
(ppr_dpo->blocks[i].hi.as_u16x8 >= key)))
return next;
+#else
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ if ((ppr_dpo->blocks[i].low.as_u16[j] <= dst_port) &&
+ (ppr_dpo->blocks[i].hi.as_u16[j] >= dst_port))
+ return next;
+ }
+ };
+#endif
return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
}
@@ -550,7 +563,6 @@ ip4_source_and_port_range_check_tx (vlib_main_t * vm,
if this changes can easily make new function
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
.function = ip4_source_and_port_range_check_rx,
.name = "ip4-source-and-port-range-check-rx",
@@ -567,9 +579,7 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_source_and_port_range_check_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
.function = ip4_source_and_port_range_check_tx,
.name = "ip4-source-and-port-range-check-tx",
@@ -586,7 +596,6 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_source_and_port_range_check_trace,
};
-/* *INDENT-ON* */
int
set_ip_source_and_port_range_check (vlib_main_t * vm,
@@ -749,7 +758,8 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
* @cliexend
*
* Example of how to enable range checking on TX:
- * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0 udp-in-vrf 7}
+ * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0
+ * udp-in-vrf 7}
*
* Example of graph node after range checking is enabled:
* @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
@@ -758,7 +768,7 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
* interface-output [1]
* @cliexend
*
- * Example of how to display the features enabed on an interface:
+ * Example of how to display the features enabled on an interface:
* @cliexstart{show ip interface features GigabitEthernet2/0/0}
* IP feature paths configured on GigabitEthernet2/0/0...
*
@@ -783,13 +793,11 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = {
.path = "set interface ip source-and-port-range-check",
.function = set_ip_source_and_port_range_check_fn,
.short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]",
};
-/* *INDENT-ON* */
static u8 *
format_ppr_dpo (u8 * s, va_list * args)
@@ -1250,14 +1258,12 @@ ip_source_and_port_range_check_command_fn (vlib_main_t * vm,
* Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table:
* @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = {
.path = "set ip source-and-port-range-check",
.function = ip_source_and_port_range_check_command_fn,
.short_help =
"set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1367,7 +1373,7 @@ show_source_and_port_range_check_fn (vlib_main_t * vm,
* @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.0}
* 172.16.2.0: 23 - 101
* @cliexend
- * Example of how to test to determine of a given Pv4 address and port
+ * Example of how to test to determine of a given iPv4 address and port
* are being validated:
* @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 23}
* 172.16.2.2 port 23 PASS
@@ -1376,14 +1382,12 @@ show_source_and_port_range_check_fn (vlib_main_t * vm,
* 172.16.2.2 port 250 FAIL
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = {
.path = "show ip source-and-port-range-check",
.function = show_source_and_port_range_check_fn,
.short_help =
"show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h
index a6d87f1f962..57c2b6ff78b 100644
--- a/src/vnet/ip/ip4_to_ip6.h
+++ b/src/vnet/ip/ip4_to_ip6.h
@@ -28,14 +28,12 @@
typedef int (*ip4_to_ip6_set_fn_t) (vlib_buffer_t * b, ip4_header_t * ip4,
ip6_header_t * ip6, void *ctx);
-/* *INDENT-OFF* */
static u8 icmp_to_icmp6_updater_pointer_table[] =
{ 0, 1, 4, 4, ~0,
~0, ~0, ~0, 7, 6,
~0, ~0, 8, 8, 8,
8, 24, 24, 24, 24
};
-/* *INDENT-ON* */
#define frag_id_4to6(id) (id)
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index f33780f1a98..56eec523d5b 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -238,7 +238,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im,
ip_interface_address_t *ia;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -249,7 +248,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im,
break;
}
}));
- /* *INDENT-ON* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
diff --git a/src/vnet/ip/ip6_error.h b/src/vnet/ip/ip6_error.h
deleted file mode 100644
index a6fb16570b6..00000000000
--- a/src/vnet/ip/ip6_error.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip/ip6_error.h: ip6 fast path errors
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef included_ip_ip6_error_h
-#define included_ip_ip6_error_h
-
-#define foreach_ip6_error \
- /* Must be first. */ \
- _ (NONE, "valid ip6 packets") \
- \
- /* Errors signalled by ip6-input */ \
- _ (TOO_SHORT, "ip6 length < 40 bytes") \
- _ (BAD_LENGTH, "ip6 length > l2 length") \
- _ (VERSION, "ip6 version != 6") \
- _ (TIME_EXPIRED, "ip6 ttl <= 1") \
- \
- /* Errors signalled by ip6-rewrite. */ \
- _ (MTU_EXCEEDED, "ip6 MTU exceeded") \
- _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \
- _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \
- _ (DROP, "ip6 drop") \
- _ (PUNT, "ip6 punt") \
- \
- /* Errors signalled by ip6-local. */ \
- _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
- _ (UDP_CHECKSUM, "bad udp checksum") \
- _ (ICMP_CHECKSUM, "bad icmp checksum") \
- _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
- \
- /* Errors signalled by udp6-lookup. */ \
- _ (UNKNOWN_UDP_PORT, "no listener for udp port") \
- \
- /* Spoofed packets in ip6-rewrite-local */ \
- _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
- \
- /* Erros singalled by ip6-inacl */ \
- _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
- _ (INACL_SESSION_DENY, "input ACL session deny drops") \
- /* Erros singalled by ip6-outacl */ \
- _ (OUTACL_TABLE_MISS, "output ACL table-miss drops") \
- _ (OUTACL_SESSION_DENY, "output ACL session deny drops") \
- \
- /* Errors signalled by ip6-reassembly */ \
- _ (REASS_MISSING_UPPER, "missing-upper layer drops") \
- _ (REASS_DUPLICATE_FRAGMENT, "duplicate fragments") \
- _ (REASS_OVERLAPPING_FRAGMENT, "overlapping fragments") \
- _ (REASS_LIMIT_REACHED, "drops due to concurrent reassemblies limit") \
- _ (REASS_FRAGMENT_CHAIN_TOO_LONG, "fragment chain too long (drop)") \
- _ (REASS_NO_BUF, "out of buffers (drop)") \
- _ (REASS_TIMEOUT, "fragments dropped due to reassembly timeout") \
- _ (REASS_INTERNAL_ERROR, "drops due to internal reassembly error") \
- _ (REASS_UNSUPP_IP_PROTO, "unsupported ip protocol")
-
-typedef enum
-{
-#define _(sym,str) IP6_ERROR_##sym,
- foreach_ip6_error
-#undef _
- IP6_N_ERROR,
-} ip6_error_t;
-
-#endif /* included_ip_ip6_error_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/ip6_format.c b/src/vnet/ip/ip6_format.c
index 1b8ff1e0ab0..1a1bef26aa6 100644
--- a/src/vnet/ip/ip6_format.c
+++ b/src/vnet/ip/ip6_format.c
@@ -288,7 +288,7 @@ format_ip6_header (u8 * s, va_list * args)
"\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d",
format_white_space, indent, traffic_class, flow_label,
ip->hop_limit, clib_net_to_host_u16 (ip->payload_length));
-
+#if 0
/* Recurse into next protocol layer. */
if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes)
{
@@ -301,7 +301,7 @@ format_ip6_header (u8 * s, va_list * args)
/* next protocol header */ (void *) (ip + 1),
max_header_bytes - sizeof (ip[0]));
}
-
+#endif
return s;
}
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 8daf2614c15..48fb633fd32 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -48,6 +48,7 @@
#include <vnet/fib/ip6_fib.h>
#include <vnet/mfib/ip6_mfib.h>
#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/receive_dpo.h>
#include <vnet/dpo/classify_dpo.h>
#include <vnet/classify/vnet_classify.h>
#include <vnet/pg/pg.h>
@@ -70,7 +71,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im,
ip_lookup_main_t *lm = &im->lookup_main;
ip_interface_prefix_t *if_prefix;
- /* *INDENT-OFF* */
ip_interface_prefix_key_t key = {
.prefix = {
.fp_len = address_length,
@@ -84,7 +84,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im,
},
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
/* If prefix already set on interface, just increment ref count & return */
if_prefix = ip_get_interface_prefix (lm, &key);
@@ -177,7 +176,6 @@ ip6_del_interface_prefix_routes (ip6_main_t * im,
ip_lookup_main_t *lm = &im->lookup_main;
ip_interface_prefix_t *if_prefix;
- /* *INDENT-OFF* */
ip_interface_prefix_key_t key = {
.prefix = {
.fp_len = address_length,
@@ -191,13 +189,12 @@ ip6_del_interface_prefix_routes (ip6_main_t * im,
},
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
if_prefix = ip_get_interface_prefix (lm, &key);
if (!if_prefix)
{
clib_warning ("Prefix not found while deleting %U",
- format_ip4_address_and_length, address, address_length);
+ format_ip6_address_and_length, address, address_length);
return;
}
@@ -282,7 +279,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
ip_interface_address_t *ia = 0;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -290,7 +286,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
result = a;
break;
}));
- /* *INDENT-ON* */
return result;
}
@@ -310,7 +305,10 @@ ip6_add_del_interface_address (vlib_main_t * vm,
error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
if (error)
- return error;
+ {
+ vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
+ return error;
+ }
if (ip6_address_is_link_local_unicast (address))
{
@@ -355,7 +353,6 @@ ip6_add_del_interface_address (vlib_main_t * vm,
vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
vec_add1 (addr_fib, ip6_af);
- /* *INDENT-OFF* */
if (!is_del)
{
/* When adding an address check that it does not conflict
@@ -413,7 +410,6 @@ ip6_add_del_interface_address (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
@@ -533,7 +529,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -546,7 +541,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
ip6_del_interface_routes (sw_if_index, im, fib_index,
a, ia->address_length);
}));
- /* *INDENT-ON* */
return 0;
}
@@ -554,7 +548,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
/* Built-in ip6 unicast rx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
{
.arc_name = "ip6-unicast",
@@ -679,7 +672,6 @@ VNET_FEATURE_INIT (ip6_interface_output, static) = {
.node_name = "interface-output",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
@@ -705,14 +697,21 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
vlib_main_t *vm = vlib_get_main ();
vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
({
address = ip_interface_address_get_address (lm6, ia);
ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
}));
- /* *INDENT-ON* */
ip6_mfib_interface_enable_disable (sw_if_index, 0);
+
+ if (0 != im6->fib_index_by_sw_if_index[sw_if_index])
+ fib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
+ if (0 != im6->mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
+
+ /* Erase the lookup tables just in case */
+ im6->fib_index_by_sw_if_index[sw_if_index] = ~0;
+ im6->mfib_index_by_sw_if_index[sw_if_index] = ~0;
}
vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
@@ -735,7 +734,6 @@ VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_lookup_node) =
{
.name = "ip6-lookup",
@@ -744,7 +742,6 @@ VLIB_REGISTER_NODE (ip6_lookup_node) =
.n_next_nodes = IP6_LOOKUP_N_NEXT,
.next_nodes = IP6_LOOKUP_NEXT_NODES,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -922,7 +919,6 @@ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_load_balance_node) =
{
.name = "ip6-load-balance",
@@ -930,7 +926,6 @@ VLIB_REGISTER_NODE (ip6_load_balance_node) =
.sibling_of = "ip6-lookup",
.format_trace = format_ip6_lookup_trace,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -953,8 +948,7 @@ format_ip6_forward_next_trace (u8 * s, va_list * args)
ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%Ufib:%d adj:%d flow:%d",
- format_white_space, indent,
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
t->fib_index, t->adj_index, t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -1214,23 +1208,17 @@ always_inline u8
ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
u32 * udp_offset0)
{
- u32 proto0;
- proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
- if (proto0 != IP_PROTOCOL_UDP)
- {
- proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
- proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
- }
- return proto0;
+ int nh = ip6_locate_header (p0, ip0, -1, udp_offset0);
+ if (nh > 0)
+ if (nh == IP_PROTOCOL_UDP || nh == IP_PROTOCOL_TCP)
+ return nh;
+ return 0;
}
-/* *INDENT-OFF* */
-VNET_FEATURE_ARC_INIT (ip6_local) =
-{
- .arc_name = "ip6-local",
- .start_nodes = VNET_FEATURES ("ip6-local"),
+VNET_FEATURE_ARC_INIT (ip6_local) = {
+ .arc_name = "ip6-local",
+ .start_nodes = VNET_FEATURES ("ip6-local", "ip6-receive"),
};
-/* *INDENT-ON* */
static_always_inline u8
ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
@@ -1267,7 +1255,7 @@ ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
}
n_bytes_left -= n_this_buffer;
- n_bytes_left -= p0->total_length_not_including_first_buffer;
+ n_bytes_left -= vlib_buffer_length_in_chain (vm, p0) - p0->current_length;
if (n_bytes_left == 0)
return 0;
@@ -1275,10 +1263,10 @@ ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
return 1;
}
-
always_inline uword
-ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, int head_of_feature_arc)
+ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int head_of_feature_arc,
+ int is_receive_dpo)
{
ip6_main_t *im = &ip6_main;
ip_lookup_main_t *lm = &im->lookup_main;
@@ -1310,7 +1298,7 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_prefetch_buffer_data (b[3], LOAD);
}
- ip6_error_t error[2];
+ vl_counter_ip6_enum_t error[2];
error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
@@ -1466,6 +1454,23 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
vnet_buffer (b[1])->ip.fib_index;
+
+ vnet_buffer (b[0])->ip.rx_sw_if_index =
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ vnet_buffer (b[1])->ip.rx_sw_if_index =
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX];
+ if (is_receive_dpo)
+ {
+ const receive_dpo_t *rd0, *rd1;
+ rd0 =
+ receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ rd1 =
+ receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
+ if (rd0->rd_sw_if_index != ~0)
+ vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
+ if (rd1->rd_sw_if_index != ~0)
+ vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
+ }
} /* head_of_feature_arc */
next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
@@ -1487,16 +1492,16 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 next32 = next[0];
vnet_feature_arc_start (arc_index,
- vnet_buffer (b[0])->sw_if_index
- [VLIB_RX], &next32, b[0]);
+ vnet_buffer (b[0])->ip.rx_sw_if_index,
+ &next32, b[0]);
next[0] = next32;
}
if (PREDICT_TRUE (ip6_unknown[1]))
{
u32 next32 = next[1];
vnet_feature_arc_start (arc_index,
- vnet_buffer (b[1])->sw_if_index
- [VLIB_RX], &next32, b[1]);
+ vnet_buffer (b[1])->ip.rx_sw_if_index,
+ &next32, b[1]);
next[1] = next32;
}
}
@@ -1593,6 +1598,16 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
vnet_buffer (b[0])->ip.fib_index;
+
+ vnet_buffer (b[0])->ip.rx_sw_if_index =
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ if (is_receive_dpo)
+ {
+ receive_dpo_t *rd;
+ rd = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ if (rd->rd_sw_if_index != ~0)
+ vnet_buffer (b[0])->ip.rx_sw_if_index = rd->rd_sw_if_index;
+ }
} /* head_of_feature_arc */
next[0] = lm->local_next_by_ip_protocol[ip->protocol];
@@ -1607,8 +1622,8 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 next32 = next[0];
vnet_feature_arc_start (arc_index,
- vnet_buffer (b[0])->sw_if_index
- [VLIB_RX], &next32, b[0]);
+ vnet_buffer (b[0])->ip.rx_sw_if_index,
+ &next32, b[0]);
next[0] = next32;
}
}
@@ -1626,15 +1641,17 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
+ return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
+ 0 /* ip6_local_inline */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_local_node) =
{
.name = "ip6-local",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP_LOCAL_N_NEXT,
.next_nodes =
{
@@ -1642,19 +1659,32 @@ VLIB_REGISTER_NODE (ip6_local_node) =
[IP_LOCAL_NEXT_PUNT] = "ip6-punt",
[IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
[IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
- [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-full-reassembly",
+ [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-local-full-reassembly",
},
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_receive_local_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
+ 1 /* is_receive_dpo */);
+}
+
+VLIB_REGISTER_NODE (ip6_receive_local_node) = {
+ .name = "ip6-receive",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .sibling_of = "ip6-local"
+};
VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
+ return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */,
+ 0 /* ip6_local_inline */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
.name = "ip6-local-end-of-arc",
.vector_size = sizeof (u32),
@@ -1668,7 +1698,6 @@ VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
.node_name = "ip6-local-end-of-arc",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
#ifdef CLIB_MARCH_VARIANT
extern vlib_node_registration_t ip6_local_node;
@@ -1941,13 +1970,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
if (is_midchain)
{
- /* before we paint on the next header, update the L4
- * checksums if required, since there's no offload on a tunnel */
- vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
- vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
-
/* Guess we are only writing on ipv6 header. */
vnet_rewrite_two_headers (adj0[0], adj1[0],
ip0, ip1, sizeof (ip6_header_t));
@@ -2041,9 +2063,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
if (is_midchain)
{
- vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
-
/* Guess we are only writing on ip6 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
}
@@ -2193,14 +2212,12 @@ VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_midchain_node) =
-{
+VLIB_REGISTER_NODE (ip6_midchain_node) = {
.name = "ip6-midchain",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
.sibling_of = "ip6-rewrite",
- };
+};
VLIB_REGISTER_NODE (ip6_rewrite_node) =
{
@@ -2241,7 +2258,6 @@ VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
.sibling_of = "ip6-rewrite",
};
-/* *INDENT-ON* */
/*
* Hop-by-Hop handling
@@ -2255,7 +2271,6 @@ _(PROCESSED, "pkts with ip6 hop-by-hop options") \
_(FORMAT, "incorrectly formatted hop-by-hop options") \
_(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
-/* *INDENT-OFF* */
typedef enum
{
#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
@@ -2263,7 +2278,6 @@ typedef enum
#undef _
IP6_HOP_BY_HOP_N_ERROR,
} ip6_hop_by_hop_error_t;
-/* *INDENT-ON* */
/*
* Primary h-b-h handler trace support
@@ -2690,7 +2704,6 @@ VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
{
.name = "ip6-hop-by-hop",
@@ -2702,7 +2715,6 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
.error_strings = ip6_hop_by_hop_error_strings,
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_hop_by_hop_init (vlib_main_t * vm)
@@ -2954,14 +2966,12 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
.path = "set ip6 flow-hash",
.short_help = "set ip6 flow-hash table <table-id> [src] [dst] [sport] "
"[dport] [proto] [reverse] [flowlabel]",
.function = set_ip6_flow_hash_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ip6_local_command_fn (vlib_main_t * vm,
@@ -3002,14 +3012,12 @@ show_ip6_local_command_fn (vlib_main_t * vm,
* 115
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_local, static) =
{
.path = "show ip6 local",
.function = show_ip6_local_command_fn,
.short_help = "show ip6 local",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -3121,7 +3129,6 @@ set_ip6_classify_command_fn (vlib_main_t * vm,
* Example of how to assign a classification table to an interface:
* @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
{
.path = "set ip6 classify",
@@ -3129,7 +3136,6 @@ VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
"set ip6 classify intfc <interface> table-index <classify-idx>",
.function = set_ip6_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c
index e66084c2c4d..412741abcf8 100644
--- a/src/vnet/ip/ip6_hop_by_hop.c
+++ b/src/vnet/ip/ip6_hop_by_hop.c
@@ -438,8 +438,7 @@ VLIB_NODE_FN (ip6_add_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) =
{
.name = "ip6-add-hop-by-hop",
.vector_size = sizeof (u32),
@@ -455,7 +454,6 @@ VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
#undef _
},
};
-/* *INDENT-ON* */
/* The main h-b-h tracer was already invoked, no need to do much here */
typedef struct
@@ -778,7 +776,6 @@ VLIB_NODE_FN (ip6_pop_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
{
.name = "ip6-pop-hop-by-hop",
@@ -791,7 +788,6 @@ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
/* See ip/lookup.h */
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -1006,7 +1002,6 @@ VLIB_NODE_FN (ip6_local_hop_by_hop_node) (vlib_main_t * vm,
}
#ifndef CLIB_MARCH_VARIANT
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) =
{
.name = "ip6-local-hop-by-hop",
@@ -1025,7 +1020,6 @@ VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) =
[IP6_LOCAL_HOP_BY_HOP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
clib_error_t *
show_ip6_hbh_command_fn (vlib_main_t * vm,
@@ -1059,13 +1053,11 @@ show_ip6_hbh_command_fn (vlib_main_t * vm,
* Display ip6 local hop-by-hop next protocol handler nodes
* @cliexcmd{show ip6 hbh}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_hbh, static) = {
.path = "show ip6 hbh",
.short_help = "show ip6 hbh",
.function = show_ip6_hbh_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
@@ -1105,12 +1097,10 @@ ip6_hop_by_hop_ioam_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip6_lookup_init"),
};
-/* *INDENT-ON* */
void
ip6_local_hop_by_hop_register_protocol (u32 protocol, u32 node_index)
@@ -1264,13 +1254,11 @@ clear_ioam_rewrite_command_fn (vlib_main_t * vm,
* Example of how to clear iOAM features:
* @cliexcmd{clear ioam rewrite}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = {
.path = "clear ioam rewrite",
.short_help = "clear ioam rewrite",
.function = clear_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
ip6_ioam_enable (int has_trace_option, int has_pot_option,
@@ -1371,13 +1359,11 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
* Example of how to enable trace and pot with ppc set to encap:
* @cliexcmd{set ioam rewrite trace pot ppc encap}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
.path = "set ioam rewrite",
.short_help = "set ioam [trace] [pot] [seqno] [analyse]",
.function = ip6_set_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
@@ -1455,13 +1441,11 @@ ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
* EDGE TO EDGE - PPC OPTION - 1 (Encap)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
.path = "show ioam summary",
.short_help = "show ioam summary",
.function = ip6_show_ioam_summary_cmd_fn,
};
-/* *INDENT-ON* */
void
vnet_register_ioam_end_of_path_callback (void *cb)
diff --git a/src/vnet/ip/ip6_inlines.h b/src/vnet/ip/ip6_inlines.h
index 2a4bb70573b..9bd475224eb 100644
--- a/src/vnet/ip/ip6_inlines.h
+++ b/src/vnet/ip/ip6_inlines.h
@@ -49,29 +49,40 @@ always_inline u32
ip6_compute_flow_hash (const ip6_header_t * ip,
flow_hash_config_t flow_hash_config)
{
- tcp_header_t *tcp;
+ const tcp_header_t *tcp;
+ const udp_header_t *udp = (void *) (ip + 1);
+ const gtpv1u_header_t *gtpu = (void *) (udp + 1);
u64 a, b, c;
u64 t1, t2;
+ u32 t3;
uword is_tcp_udp = 0;
u8 protocol = ip->protocol;
+ uword is_udp = protocol == IP_PROTOCOL_UDP;
- if (PREDICT_TRUE
- ((ip->protocol == IP_PROTOCOL_TCP)
- || (ip->protocol == IP_PROTOCOL_UDP)))
+ if (PREDICT_TRUE ((protocol == IP_PROTOCOL_TCP) || is_udp))
{
is_tcp_udp = 1;
tcp = (void *) (ip + 1);
}
- else if (ip->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ else
{
- ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip + 1);
- if ((hbh->protocol == IP_PROTOCOL_TCP) ||
- (hbh->protocol == IP_PROTOCOL_UDP))
+ const void *cur = ip + 1;
+ if (protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ const ip6_hop_by_hop_header_t *hbh = cur;
+ protocol = hbh->protocol;
+ cur += (hbh->length + 1) * 8;
+ }
+ if (protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ const ip6_fragment_ext_header_t *frag = cur;
+ protocol = frag->protocol;
+ }
+ else if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_UDP)
{
is_tcp_udp = 1;
- tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ tcp = cur;
}
- protocol = hbh->protocol;
}
t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
@@ -113,7 +124,13 @@ ip6_compute_flow_hash (const ip6_header_t * ip,
((flow_hash_config & IP_FLOW_HASH_FL) ? ip6_flow_label_network_order (ip) :
0);
c ^= t1;
-
+ if (PREDICT_TRUE (is_udp) &&
+ PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) &&
+ udp->dst_port == GTPV1_PORT_BE))
+ {
+ t3 = gtpu->teid;
+ a ^= t3;
+ }
hash_mix64 (a, b, c);
return (u32) c;
}
@@ -134,65 +151,17 @@ ip6_compute_flow_hash (const ip6_header_t * ip,
* it is a non-first fragment -1 is returned.
*/
always_inline int
-ip6_locate_header (vlib_buffer_t * p0,
- ip6_header_t * ip0, int find_hdr_type, u32 * offset)
+ip6_locate_header (vlib_buffer_t *b, ip6_header_t *ip, int find_hdr_type,
+ u32 *offset)
{
- u8 next_proto = ip0->protocol;
- u8 *next_header;
- u8 done = 0;
- u32 cur_offset;
- u8 *temp_nxthdr = 0;
- u32 exthdr_len = 0;
-
- next_header = ip6_next_header (ip0);
- cur_offset = sizeof (ip6_header_t);
- while (1)
+ ip6_ext_hdr_chain_t hdr_chain;
+ int res = ip6_ext_header_walk (b, ip, find_hdr_type, &hdr_chain);
+ if (res >= 0)
{
- done = (next_proto == find_hdr_type);
- if (PREDICT_FALSE
- (next_header >=
- (u8 *) vlib_buffer_get_current (p0) + p0->current_length))
- {
- //A malicious packet could set an extension header with a too big size
- return (-1);
- }
- if (done)
- break;
- if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
- {
- if (find_hdr_type < 0)
- break;
- return -1;
- }
- if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
- {
- ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header;
- u16 frag_off = ip6_frag_hdr_offset (frag_hdr);
- /* Non first fragment return -1 */
- if (frag_off)
- return (-1);
- exthdr_len = sizeof (ip6_frag_hdr_t);
- temp_nxthdr = next_header + exthdr_len;
- }
- else if (next_proto == IP_PROTOCOL_IPSEC_AH)
- {
- exthdr_len =
- ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header));
- temp_nxthdr = next_header + exthdr_len;
- }
- else
- {
- exthdr_len =
- ip6_ext_header_len (((ip6_ext_header_t *) next_header));
- temp_nxthdr = next_header + exthdr_len;
- }
- next_proto = ((ip6_ext_header_t *) next_header)->next_hdr;
- next_header = temp_nxthdr;
- cur_offset += exthdr_len;
+ *offset = hdr_chain.eh[res].offset;
+ return hdr_chain.eh[res].protocol;
}
-
- *offset = cur_offset;
- return (next_proto);
+ return -1;
}
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
index 01b8f46b4d8..64c9d76ebaa 100644
--- a/src/vnet/ip/ip6_input.c
+++ b/src/vnet/ip/ip6_input.c
@@ -219,21 +219,12 @@ VLIB_NODE_FN (ip6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-#ifndef CLIB_MARCH_VARIANT
-char *ip6_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip6_error
-#undef _
-};
-#endif /* CLIB_MARCH_VARIANT */
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_input_node) = {
.name = "ip6-input",
.vector_size = sizeof (u32),
.n_errors = IP6_N_ERROR,
- .error_strings = ip6_error_strings,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_INPUT_N_NEXT,
.next_nodes = {
@@ -246,7 +237,6 @@ VLIB_REGISTER_NODE (ip6_input_node) = {
.format_buffer = format_ip6_header,
.format_trace = format_ip6_input_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip6_input.h b/src/vnet/ip/ip6_input.h
index fe993caa889..49e37ec1808 100644
--- a/src/vnet/ip/ip6_input.h
+++ b/src/vnet/ip/ip6_input.h
@@ -43,8 +43,6 @@
#include <vnet/ip/ip.h>
#include <vnet/ip/icmp6.h>
-extern char *ip6_error_strings[];
-
typedef enum
{
IP6_INPUT_NEXT_DROP,
diff --git a/src/vnet/ip/ip6_link.c b/src/vnet/ip/ip6_link.c
index afa9d8e3ea9..c2a7ccacbc1 100644
--- a/src/vnet/ip/ip6_link.c
+++ b/src/vnet/ip/ip6_link.c
@@ -242,12 +242,10 @@ ip6_link_delegate_flush (ip6_link_t * il)
{
ip6_link_delegate_t *ild;
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
il_delegate_vfts[ild->ild_type].ildv_disable(ild->ild_index);
}));
- /* *INDENT-ON* */
vec_free (il->il_delegates);
il->il_delegates = NULL;
@@ -357,14 +355,12 @@ ip6_link_set_local_address (u32 sw_if_index, const ip6_address_t * address)
ip6_address_copy (&ilp.ilp_addr, address);
ip6_ll_table_entry_update (&ilp, FIB_ROUTE_PATH_LOCAL);
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
if (NULL != il_delegate_vfts[ild->ild_type].ildv_ll_change)
il_delegate_vfts[ild->ild_type].ildv_ll_change(ild->ild_index,
&il->il_ll_addr);
}));
- /* *INDENT-ON* */
return (0);
}
@@ -465,7 +461,6 @@ ip6_link_add_del_address (ip6_main_t * im,
if (NULL == il)
return;
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
if (is_delete)
@@ -481,7 +476,6 @@ ip6_link_add_del_address (ip6_main_t * im,
address, address_length);
}
}));
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -555,14 +549,12 @@ test_ip6_link_command_fn (vlib_main_t * vm,
* Original MAC address: 16:d9:e0:91:79:86
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_link_command, static) =
{
.path = "test ip6 link",
.function = test_ip6_link_command_fn,
.short_help = "test ip6 link <mac-address>",
};
-/* *INDENT-ON* */
static u8 *
ip6_print_addrs (u8 * s, u32 * addrs)
@@ -594,11 +586,10 @@ format_ip6_link (u8 * s, va_list * arg)
if (!ip6_link_is_enabled_i (il))
return (s);
- s = format (s, "%U is admin %s\n",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, il->il_sw_if_index),
- (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ?
- "up" : "down"));
+ s = format (
+ s, "%U is admin %s\n", format_vnet_sw_if_index_name, vnm,
+ il->il_sw_if_index,
+ (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ? "up" : "down"));
u32 ai;
u32 *link_scope = 0, *global_scope = 0;
@@ -660,13 +651,11 @@ format_ip6_link (u8 * s, va_list * arg)
s = format (s, "%U%U\n",
format_white_space, 4, format_ip6_address, &il->il_ll_addr);
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE(ild, il,
({
s = format (s, "%U", il_delegate_vfts[ild->ild_type].ildv_format,
ild->ild_index, 2);
}));
- /* *INDENT-ON* */
return (s);
}
@@ -739,14 +728,12 @@ ip6_link_show (vlib_main_t * vm,
* show ip6 interface: IPv6 not enabled on interface
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_link_show_command, static) =
{
.path = "show ip6 interface",
.function = ip6_link_show,
.short_help = "show ip6 interface <interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
enable_ip6_interface_cmd (vlib_main_t * vm,
@@ -779,14 +766,12 @@ enable_ip6_interface_cmd (vlib_main_t * vm,
* Example of how enable IPv6 on a given interface:
* @cliexcmd{enable ip6 interface GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_ip6_interface_command, static) =
{
.path = "enable ip6 interface",
.function = enable_ip6_interface_cmd,
.short_help = "enable ip6 interface <interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
disable_ip6_interface_cmd (vlib_main_t * vm,
@@ -819,14 +804,12 @@ disable_ip6_interface_cmd (vlib_main_t * vm,
* Example of how disable IPv6 on a given interface:
* @cliexcmd{disable ip6 interface GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (disable_ip6_interface_command, static) =
{
.path = "disable ip6 interface",
.function = disable_ip6_interface_cmd,
.short_help = "disable ip6 interface <interface>",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_ll_table.c b/src/vnet/ip/ip6_ll_table.c
index e4010bc43c4..f9172f6c50c 100644
--- a/src/vnet/ip/ip6_ll_table.c
+++ b/src/vnet/ip/ip6_ll_table.c
@@ -52,9 +52,8 @@ ip6_ll_fib_create (u32 sw_if_index)
vnet_main_t *vnm = vnet_get_main ();
u8 *desc;
- desc = format (NULL, "IP6-link-local:%U",
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, sw_if_index));
+ desc = format (NULL, "IP6-link-local:%U", format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
ip6_ll_table.ilt_fibs[sw_if_index] =
ip6_fib_table_create_and_lock (FIB_SOURCE_IP6_ND,
@@ -64,7 +63,6 @@ ip6_ll_fib_create (u32 sw_if_index)
* leave the default route as a drop, but fix fe::/10 to be a glean
* via the interface.
*/
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = 10,
@@ -90,7 +88,6 @@ ip6_ll_fib_create (u32 sw_if_index)
1,
NULL,
FIB_ROUTE_PATH_FLAG_NONE);
- /* *INDENT-ON* */
}
static void
@@ -111,12 +108,17 @@ ip6_ll_table_entry_update (const ip6_ll_prefix_t * ilp,
.frp_flags = flags,
.frp_sw_if_index = ilp->ilp_sw_if_index,
.frp_proto = DPO_PROTO_IP6,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
};
- fib_prefix_t fp;
+ fib_prefix_t fp = { 0 };
- vec_validate (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index);
+ if (flags & FIB_ROUTE_PATH_LOCAL)
+ rpath.frp_addr.ip6 = ilp->ilp_addr;
- if (0 == ip6_ll_fib_get (ilp->ilp_sw_if_index))
+ vec_validate_init_empty (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index, ~0);
+
+ if (~0 == ip6_ll_fib_get (ilp->ilp_sw_if_index))
{
ip6_ll_fib_create (ilp->ilp_sw_if_index);
}
@@ -151,11 +153,12 @@ ip6_ll_table_entry_delete (const ip6_ll_prefix_t * ilp)
* if there are no ND sourced prefixes left, then we can clean up this FIB
*/
fib_index = ip6_ll_fib_get (ilp->ilp_sw_if_index);
- if (0 == fib_table_get_num_entries (fib_index,
- FIB_PROTOCOL_IP6, FIB_SOURCE_IP6_ND))
+ if (~0 != fib_index &&
+ 0 == fib_table_get_num_entries (fib_index, FIB_PROTOCOL_IP6,
+ FIB_SOURCE_IP6_ND))
{
fib_table_unlock (fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_IP6_ND);
- ip6_ll_table.ilt_fibs[ilp->ilp_sw_if_index] = 0;
+ ip6_ll_table.ilt_fibs[ilp->ilp_sw_if_index] = ~0;
}
}
@@ -273,8 +276,7 @@ ip6_ll_show_fib (vlib_main_t * vm,
u8 *s = NULL;
fib_index = ip6_ll_table.ilt_fibs[sw_if_index];
-
- if (0 == fib_index)
+ if (~0 == fib_index)
continue;
fib_table = fib_table_get (fib_index, FIB_PROTOCOL_IP6);
@@ -345,13 +347,21 @@ ip6_ll_show_fib (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
.path = "show ip6-ll",
.short_help = "show ip6-ll [summary] [interface] [<ip6-addr>[/<width>]] [detail]",
.function = ip6_ll_show_fib,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_ll_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ vec_validate_init_empty (ip6_ll_table.ilt_fibs, sw_if_index, ~0);
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_ll_sw_interface_add_del);
static clib_error_t *
ip6_ll_module_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip6_ll_types.c b/src/vnet/ip/ip6_ll_types.c
index a7ac164b05a..b074b6e991c 100644
--- a/src/vnet/ip/ip6_ll_types.c
+++ b/src/vnet/ip/ip6_ll_types.c
@@ -23,10 +23,8 @@ format_ip6_ll_prefix (u8 * s, va_list * args)
ip6_ll_prefix_t *ilp = va_arg (*args, ip6_ll_prefix_t *);
vnet_main_t *vnm = vnet_get_main ();
- s = format (s, "(%U, %U)",
- format_ip6_address, &ilp->ilp_addr,
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, ilp->ilp_sw_if_index));
+ s = format (s, "(%U, %U)", format_ip6_address, &ilp->ilp_addr,
+ format_vnet_sw_if_index_name, vnm, ilp->ilp_sw_if_index);
return (s);
}
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h
index 7a8c31cee48..c506792ddcf 100644
--- a/src/vnet/ip/ip6_packet.h
+++ b/src/vnet/ip/ip6_packet.h
@@ -40,8 +40,9 @@
#ifndef included_ip6_packet_h
#define included_ip6_packet_h
-#include <vnet/tcp/tcp_packet.h>
+#include <vlib/vlib.h>
#include <vnet/ip/ip4_packet.h>
+#include <stdbool.h>
typedef union
{
@@ -62,13 +63,11 @@ typedef struct
} ip6_address_and_mask_t;
/* Packed so that the mhash key doesn't include uninitialized pad bytes */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/* IP address must be first for ip_interface_address_get_address() to work */
ip6_address_t ip6_addr;
u32 fib_index;
}) ip6_address_fib_t;
-/* *INDENT-ON* */
always_inline void
ip6_addr_fib_init (ip6_address_fib_t * addr_fib,
@@ -424,97 +423,39 @@ ip6_copy_header (ip6_header_t * dst, const ip6_header_t * src)
dst->dst_address.as_uword[1] = src->dst_address.as_uword[1];
}
-always_inline void
-ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
-{
- {
- ip6_address_t src0, dst0;
-
- src0 = ip0->src_address;
- dst0 = ip0->dst_address;
- ip0->src_address = dst0;
- ip0->dst_address = src0;
- }
-
- {
- u16 src0, dst0;
-
- src0 = tcp0->src;
- dst0 = tcp0->dst;
- tcp0->src = dst0;
- tcp0->dst = src0;
- }
-}
-
-always_inline void
-ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1,
- tcp_header_t * tcp0, tcp_header_t * tcp1)
-{
- {
- ip6_address_t src0, dst0, src1, dst1;
-
- src0 = ip0->src_address;
- src1 = ip1->src_address;
- dst0 = ip0->dst_address;
- dst1 = ip1->dst_address;
- ip0->src_address = dst0;
- ip1->src_address = dst1;
- ip0->dst_address = src0;
- ip1->dst_address = src1;
- }
-
- {
- u16 src0, dst0, src1, dst1;
-
- src0 = tcp0->src;
- src1 = tcp1->src;
- dst0 = tcp0->dst;
- dst1 = tcp1->dst;
- tcp0->src = dst0;
- tcp1->src = dst1;
- tcp0->dst = src0;
- tcp1->dst = src1;
- }
-}
-
-
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 data;
}) ip6_pad1_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 type;
u8 len;
u8 data[0];
}) ip6_padN_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
#define IP6_MLDP_ALERT_TYPE 0x5
u8 type;
u8 len;
u16 value;
}) ip6_router_alert_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 protocol;
+ u8 reserved;
+ u16 fragoff;
+ u32 id;
+}) ip6_fragment_ext_header_t;
+
typedef CLIB_PACKED (struct {
u8 next_hdr;
/* Length of this header plus option data in 8 byte units. */
u8 n_data_u64s;
}) ip6_ext_header_t;
-/* *INDENT-ON* */
#define foreach_ext_hdr_type \
_(IP6_HOP_BY_HOP_OPTIONS) \
_(IPV6_ROUTE) \
- _(IPV6_FRAGMENTATION) \
- _(IPSEC_ESP) \
- _(IPSEC_AH) \
_(IP6_DESTINATION_OPTIONS) \
_(MOBILITY) \
_(HIP) \
@@ -542,15 +483,70 @@ ip6_ext_hdr (u8 nexthdr)
#endif
}
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ u8 data[0];
+}) ip6_hop_by_hop_ext_t;
+
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ u8 rsv;
+ u16 fragment_offset_and_more;
+ u32 identification;
+}) ip6_frag_hdr_t;
+
+#define ip6_frag_hdr_offset(hdr) \
+ (clib_net_to_host_u16 ((hdr)->fragment_offset_and_more) >> 3)
+
+#define ip6_frag_hdr_offset_bytes(hdr) (8 * ip6_frag_hdr_offset (hdr))
+
+#define ip6_frag_hdr_more(hdr) \
+ (clib_net_to_host_u16 ((hdr)->fragment_offset_and_more) & 0x1)
+
+#define ip6_frag_hdr_offset_and_more(offset, more) \
+ clib_host_to_net_u16 (((offset) << 3) + !!(more))
+
#define ip6_ext_header_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+1) << 3)
#define ip6_ext_authhdr_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+2) << 2)
+static inline int
+ip6_ext_header_len_s (ip_protocol_t nh, void *p)
+{
+ if (ip6_ext_hdr (nh))
+ return ip6_ext_header_len (p);
+ switch (nh)
+ {
+ case IP_PROTOCOL_IPSEC_AH:
+ return ip6_ext_authhdr_len (p);
+ case IP_PROTOCOL_IPV6_FRAGMENTATION:
+ return sizeof (ip6_frag_hdr_t);
+ case IP_PROTOCOL_ICMP6:
+ return 4;
+ case IP_PROTOCOL_UDP:
+ return 8;
+ case IP_PROTOCOL_TCP:
+ return 20;
+ default: /* Caller is responsible for validating the length of terminating
+ protocols */
+ ;
+ }
+ return 0;
+}
+
always_inline void *
ip6_ext_next_header (ip6_ext_header_t * ext_hdr)
{
return (void *) ((u8 *) ext_hdr + ip6_ext_header_len (ext_hdr));
}
+always_inline void *
+ip6_ext_next_header_offset (void *hdr, u16 offset)
+{
+ return (hdr + offset);
+}
+
always_inline int
vlib_object_within_buffer_data (vlib_main_t * vm, vlib_buffer_t * b,
void *obj, size_t len)
@@ -562,153 +558,144 @@ vlib_object_within_buffer_data (vlib_main_t * vm, vlib_buffer_t * b,
return 1;
}
-/*
- * find ipv6 extension header within ipv6 header within buffer b
- *
- * @param vm
- * @param b buffer to limit search to
- * @param ip6_header ipv6 header
- * @param header_type extension header type to search for
- * @param[out] prev_ext_header address of header preceding found header
- */
+/* Returns the number of bytes left in buffer from p. */
+static inline u32
+vlib_bytes_left_in_buffer (vlib_buffer_t *b, void *obj)
+{
+ return b->current_length - (((u8 *) obj - b->data) - b->current_data);
+}
+
always_inline void *
-ip6_ext_header_find (vlib_main_t * vm, vlib_buffer_t * b,
- ip6_header_t * ip6_header, u8 header_type,
- ip6_ext_header_t ** prev_ext_header)
+ip6_ext_next_header_s (ip_protocol_t cur_nh, void *hdr, u32 max_offset,
+ u32 *offset, int *res_nh, bool *last)
{
- ip6_ext_header_t *prev = NULL;
- ip6_ext_header_t *result = NULL;
- if ((ip6_header)->protocol == header_type)
+ u16 hdrlen = 0;
+ int new_nh = -1;
+ void *res = 0;
+ if (ip6_ext_hdr (cur_nh))
{
- result = (void *) (ip6_header + 1);
- if (!vlib_object_within_buffer_data (vm, b, result,
- ip6_ext_header_len (result)))
- {
- result = NULL;
- }
+ hdrlen = ip6_ext_header_len (hdr);
+ new_nh = ((ip6_ext_header_t *) hdr)->next_hdr;
+ res = hdr + hdrlen;
+ }
+ else if (cur_nh == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) hdr;
+ if (ip6_frag_hdr_offset (frag_hdr) > 0)
+ *last = true;
+ new_nh = frag_hdr->next_hdr;
+ hdrlen = sizeof (ip6_frag_hdr_t);
+ res = hdr + hdrlen;
+ }
+ else if (cur_nh == IP_PROTOCOL_IPSEC_AH)
+ {
+ new_nh = ((ip6_ext_header_t *) hdr)->next_hdr;
+ hdrlen = ip6_ext_authhdr_len (hdr);
+ res = hdr + hdrlen;
}
else
{
- result = NULL;
- prev = (void *) (ip6_header + 1);
- while (ip6_ext_hdr (prev->next_hdr) && prev->next_hdr != header_type)
- {
- prev = ip6_ext_next_header (prev);
- if (!vlib_object_within_buffer_data (vm, b, prev,
- ip6_ext_header_len (prev)))
- {
- prev = NULL;
- break;
- }
- }
- if (prev && (prev->next_hdr == header_type))
- {
- result = ip6_ext_next_header (prev);
- if (!vlib_object_within_buffer_data (vm, b, result,
- ip6_ext_header_len (result)))
- {
- result = NULL;
- }
- }
+ ;
}
- if (prev_ext_header)
+
+ if (res && (*offset + hdrlen) >= max_offset)
{
- *prev_ext_header = prev;
+ return 0;
}
- return result;
+ *res_nh = new_nh;
+ *offset += hdrlen;
+ return res;
}
+#define IP6_EXT_HDR_MAX (4) /* Maximum number of headers */
+#define IP6_EXT_HDR_MAX_DEPTH (256) /* Maximum header depth */
+typedef struct
+{
+ int length;
+ struct
+ {
+ u16 protocol;
+ u16 offset;
+ } eh[IP6_EXT_HDR_MAX];
+} ip6_ext_hdr_chain_t;
+
/*
- * walk extension headers, looking for a specific extension header and last
- * extension header, calculating length of all extension headers
+ * Find ipv6 extension header within ipv6 header within
+ * whichever is smallest of buffer or IP6_EXT_HDR_MAX_DEPTH.
+ * The complete header chain must be in first buffer.
*
- * @param vm
- * @param b buffer to limit search to
- * @param ip6_header ipv6 header
- * @param find_hdr extension header to look for (ignored if ext_hdr is NULL)
- * @param length[out] length of all extension headers
- * @param ext_hdr[out] extension header of type find_hdr (may be NULL)
- * @param last_ext_hdr[out] last extension header (may be NULL)
- *
- * @return 0 on success, -1 on failure (ext headers crossing buffer boundary)
+ * The complete header chain (up to the terminating header) is
+ * returned in res.
+ * Returns the index of the find_hdr_type if > 0. Otherwise
+ * it returns the index of the last header.
*/
always_inline int
-ip6_walk_ext_hdr (vlib_main_t * vm, vlib_buffer_t * b,
- const ip6_header_t * ip6_header, u8 find_hdr, u32 * length,
- ip6_ext_header_t ** ext_hdr,
- ip6_ext_header_t ** last_ext_hdr)
-{
- if (!ip6_ext_hdr (ip6_header->protocol))
- {
- *length = 0;
- *ext_hdr = NULL;
- *last_ext_hdr = NULL;
- return 0;
- }
- *length = 0;
- ip6_ext_header_t *h = (void *) (ip6_header + 1);
- if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h)))
+ip6_ext_header_walk (vlib_buffer_t *b, ip6_header_t *ip, int find_hdr_type,
+ ip6_ext_hdr_chain_t *res)
+{
+ int i = 0;
+ int found = -1;
+ void *next_header = ip6_next_header (ip);
+ int next_proto = ip->protocol;
+ res->length = 0;
+ u32 n_bytes_this_buffer =
+ clib_min (vlib_bytes_left_in_buffer (b, ip), IP6_EXT_HDR_MAX_DEPTH);
+ u32 max_offset = clib_min (n_bytes_this_buffer,
+ sizeof (ip6_header_t) +
+ clib_net_to_host_u16 (ip->payload_length));
+ u32 offset = sizeof (ip6_header_t);
+ if ((ip6_ext_header_len_s (ip->protocol, next_header) + offset) > max_offset)
{
return -1;
}
- *length += ip6_ext_header_len (h);
- *last_ext_hdr = h;
- *ext_hdr = NULL;
- if (ip6_header->protocol == find_hdr)
+ bool last = false;
+ while (next_header)
{
- *ext_hdr = h;
+ /* Move on to next header */
+ res->eh[i].offset = offset;
+ res->eh[i].protocol = next_proto;
+ if (next_proto == find_hdr_type)
+ found = i;
+ i++;
+ if (last)
+ break;
+ if (i >= IP6_EXT_HDR_MAX)
+ break;
+ next_header = ip6_ext_next_header_s (next_proto, next_header, max_offset,
+ &offset, &next_proto, &last);
}
- while (ip6_ext_hdr (h->next_hdr))
+ res->length = i;
+ if (find_hdr_type < 0)
+ return i - 1;
+ return found != -1 ? found : i - 1;
+}
+
+always_inline void *
+ip6_ext_header_find (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip,
+ int find_hdr_type, ip6_ext_header_t **prev_ext_header)
+{
+ ip6_ext_hdr_chain_t hdr_chain;
+ int res = ip6_ext_header_walk (b, ip, find_hdr_type, &hdr_chain);
+ if (res < 0)
+ return 0;
+
+ if (prev_ext_header)
{
- if (h->next_hdr == find_hdr)
+ if (res > 0)
{
- h = ip6_ext_next_header (h);
- *ext_hdr = h;
+ *prev_ext_header =
+ ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset);
}
else
{
- h = ip6_ext_next_header (h);
+ *prev_ext_header = 0;
}
- if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h)))
- {
- return -1;
- }
- *length += ip6_ext_header_len (h);
- *last_ext_hdr = h;
}
+ if (find_hdr_type == hdr_chain.eh[res].protocol)
+ return ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset);
return 0;
}
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- u8 next_hdr;
- /* Length of this header plus option data in 8 byte units. */
- u8 n_data_u64s;
- u8 data[0];
-}) ip6_hop_by_hop_ext_t;
-/* *INDENT-ON* */
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- u8 next_hdr;
- u8 rsv;
- u16 fragment_offset_and_more;
- u32 identification;
-}) ip6_frag_hdr_t;
-/* *INDENT-ON* */
-
-#define ip6_frag_hdr_offset(hdr) \
- (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3)
-
-#define ip6_frag_hdr_offset_bytes(hdr) \
- (8 * ip6_frag_hdr_offset(hdr))
-
-#define ip6_frag_hdr_more(hdr) \
- (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1)
-
-#define ip6_frag_hdr_offset_and_more(offset, more) \
- clib_host_to_net_u16(((offset) << 3) + !!(more))
-
#endif /* included_ip6_packet_h */
/*
diff --git a/src/vnet/ip/ip6_punt_drop.c b/src/vnet/ip/ip6_punt_drop.c
index 4edb673c3fa..78ca9521f53 100644
--- a/src/vnet/ip/ip6_punt_drop.c
+++ b/src/vnet/ip/ip6_punt_drop.c
@@ -18,7 +18,6 @@
#include <vnet/policer/policer.h>
#include <vnet/policer/police_inlines.h>
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_punt) =
{
.arc_name = "ip6-punt",
@@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip6_drop) =
.arc_name = "ip6-drop",
.start_nodes = VNET_FEATURES ("ip6-drop", "ip6-not-enabled"),
};
-/* *INDENT-ON* */
extern ip_punt_policer_t ip6_punt_policer_cfg;
@@ -77,7 +75,6 @@ VLIB_NODE_FN (ip6_punt_policer_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_punt_policer_node) = {
.name = "ip6-punt-policer",
@@ -99,7 +96,6 @@ VNET_FEATURE_INIT (ip6_punt_policer_node, static) = {
.node_name = "ip6-punt-policer",
.runs_before = VNET_FEATURES("ip6-punt-redirect")
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -134,7 +130,6 @@ VLIB_NODE_FN (ip6_punt_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_feat_arc_ip6_punt.feature_arc_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_drop_node) =
{
.name = "ip6-drop",
@@ -146,15 +141,11 @@ VLIB_REGISTER_NODE (ip6_drop_node) =
},
};
-VLIB_REGISTER_NODE (ip6_not_enabled_node) =
-{
+VLIB_REGISTER_NODE (ip6_not_enabled_node) = {
.name = "ip6-not-enabled",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
- .n_next_nodes = 1,
- .next_nodes = {
- [0] = "error-drop",
- },
+ .sibling_of = "ip6-drop",
};
VLIB_REGISTER_NODE (ip6_punt_node) =
@@ -179,7 +170,6 @@ VNET_FEATURE_INIT (ip6_drop_end_of_arc, static) = {
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON */
#ifndef CLIB_MARCH_VARIANT
void
@@ -243,7 +233,6 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_punt_policer_command, static) =
{
.path = "ip6 punt policer",
@@ -251,7 +240,6 @@ VLIB_CLI_COMMAND (ip6_punt_policer_command, static) =
.short_help = "ip6 punt policer [add|del] <index>",
};
-/* *INDENT-ON* */
#define foreach_ip6_punt_redirect_error \
_(DROP, "ip6 punt redirect drop")
@@ -279,7 +267,6 @@ VLIB_NODE_FN (ip6_punt_redirect_node) (vlib_main_t * vm,
FIB_PROTOCOL_IP6));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_punt_redirect_node) = {
.name = "ip6-punt-redirect",
.vector_size = sizeof (u32),
@@ -301,10 +288,11 @@ VNET_FEATURE_INIT (ip6_punt_redirect_node, static) = {
.node_name = "ip6-punt-redirect",
.runs_before = VNET_FEATURES("error-punt")
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
+static u32 ip6_punt_redirect_enable_counts;
+
void
ip6_punt_redirect_add_paths (u32 rx_sw_if_index,
const fib_route_path_t *rpaths)
@@ -313,13 +301,16 @@ ip6_punt_redirect_add_paths (u32 rx_sw_if_index,
rx_sw_if_index,
FIB_FORW_CHAIN_TYPE_UNICAST_IP6, rpaths);
- vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 1, 0, 0);
+ if (1 == ++ip6_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 1, 0, 0);
}
void
ip6_punt_redirect_del (u32 rx_sw_if_index)
{
- vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 0, 0, 0);
+ ASSERT (ip6_punt_redirect_enable_counts);
+ if (0 == --ip6_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 0, 0, 0);
ip_punt_redirect_del (FIB_PROTOCOL_IP6, rx_sw_if_index);
}
@@ -392,14 +383,12 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_punt_redirect_command, static) =
{
.path = "ip6 punt redirect",
.function = ip6_punt_redirect_cmd,
.short_help = "ip6 punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
@@ -420,7 +409,6 @@ ip6_punt_redirect_show_cmd (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) =
{
.path = "show ip6 punt redirect",
@@ -428,7 +416,6 @@ VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) =
.short_help = "show ip6 punt redirect",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h
index 6a533e3b54e..29d5718d4da 100644
--- a/src/vnet/ip/ip6_to_ip4.h
+++ b/src/vnet/ip/ip6_to_ip4.h
@@ -31,7 +31,6 @@ typedef int (*ip6_to_ip4_tcp_udp_set_fn_t) (vlib_buffer_t * b,
ip6_header_t * ip6,
ip4_header_t * ip4, void *ctx);
-/* *INDENT-OFF* */
static u8 icmp6_to_icmp_updater_pointer_table[] =
{ 0, 1, ~0, ~0,
2, 2, 9, 8,
@@ -44,7 +43,6 @@ static u8 icmp6_to_icmp_updater_pointer_table[] =
24, 24, 24, 24,
24, 24, 24, 24
};
-/* *INDENT-ON* */
#define frag_id_6to4(id) ((id) ^ ((id) >> 16))
@@ -62,41 +60,25 @@ static u8 icmp6_to_icmp_updater_pointer_table[] =
* @returns 0 on success, non-zero value otherwise.
*/
static_always_inline int
-ip6_parse (vlib_main_t * vm, vlib_buffer_t * b, const ip6_header_t * ip6,
- u32 buff_len, u8 * l4_protocol, u16 * l4_offset,
- u16 * frag_hdr_offset)
+ip6_parse (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, u32 buff_len,
+ u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset)
{
- ip6_ext_header_t *last_hdr, *frag_hdr;
- u32 length;
- if (ip6_walk_ext_hdr
- (vm, b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &length, &frag_hdr,
- &last_hdr))
+ ip6_ext_hdr_chain_t hdr_chain;
+ int res =
+ ip6_ext_header_walk (b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+ if (res < 0)
{
return -1;
}
-
- if (length > 0)
- {
- if (frag_hdr)
- {
- *frag_hdr_offset = (u8 *) frag_hdr - (u8 *) ip6;
- }
- else
- {
- *frag_hdr_offset = 0;
- }
- *l4_protocol = last_hdr->next_hdr;
- }
+ if (hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ *frag_hdr_offset = hdr_chain.eh[res].offset;
else
- {
- *frag_hdr_offset = 0;
- *l4_protocol = ip6->protocol;
- }
- *l4_offset = sizeof (*ip6) + length;
+ *frag_hdr_offset = 0;
- return (buff_len < (*l4_offset + 4)) ||
- (clib_net_to_host_u16 (ip6->payload_length) <
- (*l4_offset + 4 - sizeof (*ip6)));
+ *l4_protocol = hdr_chain.eh[hdr_chain.length - 1].protocol;
+ *l4_offset = hdr_chain.eh[hdr_chain.length - 1].offset;
+
+ return 0;
}
/**
@@ -124,13 +106,13 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6,
u16 frag_offset;
u8 *l4;
- if (ip6_parse
- (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset))
- return 0;
-
+ if (ip6_parse (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset,
+ &frag_offset))
+ {
+ return 0;
+ }
if (frag_offset &&
- ip6_frag_hdr_offset (((ip6_frag_hdr_t *)
- u8_ptr_add (ip6, frag_offset))))
+ ip6_frag_hdr_offset (((ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset))))
return 0; //Can't deal with non-first fragment for now
if (ip_protocol)
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index f9f9ac783d9..644b4988abc 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -106,7 +106,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (fib_table, ip4_main.fibs)
{
send_ip_table_details(am, reg, mp->context, fib_table);
@@ -118,7 +117,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp)
continue;
send_ip_table_details(am, reg, mp->context, fib_table);
}
- /* *INDENT-ON* */
}
typedef struct vl_api_ip_fib_dump_walk_ctx_t_
@@ -326,7 +324,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (mfib_table, ip4_main.mfibs)
{
send_ip_mtable_details (reg, mp->context, mfib_table);
@@ -335,7 +332,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp)
{
send_ip_mtable_details (reg, mp->context, mfib_table);
}
- /* *INDENT-ON* */
}
typedef struct vl_api_ip_mfib_dump_ctx_t_
@@ -514,7 +510,9 @@ vl_api_add_del_ip_punt_redirect_v2_t_handler (
goto out;
if (0 != n_paths)
- vec_validate (rpaths, n_paths - 1);
+ {
+ vec_validate (rpaths, n_paths - 1);
+ }
for (ii = 0; ii < n_paths; ii++)
{
@@ -601,6 +599,32 @@ ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api)
}
}
+/*
+ * Returns an unused table id, and ~0 if it can't find one.
+ */
+u32
+ip_table_get_unused_id (fib_protocol_t fproto)
+{
+ int i, j;
+ static u32 seed = 0;
+ /* limit to 1M tries */
+ for (j = 0; j < 1 << 10; j++)
+ {
+ seed = random_u32 (&seed);
+ for (i = 0; i < 1 << 10; i++)
+ {
+ /* look around randomly generated id */
+ seed += (2 * (i % 2) - 1) * i;
+ if (seed == ~0)
+ continue;
+ if (fib_table_find (fproto, seed) == ~0)
+ return seed;
+ }
+ }
+
+ return ~0;
+}
+
void
vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp)
{
@@ -622,6 +646,29 @@ vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp)
REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY);
}
+void
+vl_api_ip_table_allocate_t_handler (vl_api_ip_table_allocate_t *mp)
+{
+ vl_api_ip_table_allocate_reply_t *rmp;
+ fib_protocol_t fproto =
+ (mp->table.is_ip6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4);
+ u32 table_id = ntohl (mp->table.table_id);
+ int rv = 0;
+
+ if (~0 == table_id)
+ table_id = ip_table_get_unused_id (fproto);
+
+ if (~0 == table_id)
+ rv = VNET_API_ERROR_EAGAIN;
+ else
+ ip_table_create (fproto, table_id, 1, mp->table.name);
+
+ REPLY_MACRO2 (VL_API_IP_TABLE_ALLOCATE_REPLY, {
+ clib_memcpy_fast (&rmp->table, &mp->table, sizeof (mp->table));
+ rmp->table.table_id = htonl (table_id);
+ })
+}
+
static int
ip_route_add_del_t_handler (vl_api_ip_route_add_del_t * mp, u32 * stats_index)
{
@@ -731,12 +778,10 @@ vl_api_ip_route_add_del_t_handler (vl_api_ip_route_add_del_t * mp)
rv = ip_route_add_del_t_handler (mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_ROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}))
- /* *INDENT-ON* */
}
void
@@ -788,7 +833,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp)
}
}
- /* *INDENT-OFF* */
REPLY_MACRO3_ZERO(VL_API_IP_ROUTE_LOOKUP_REPLY,
npaths * sizeof (*fp),
({
@@ -808,7 +852,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp)
}
}
}));
- /* *INDENT-ON* */
vec_free (rpaths);
}
@@ -895,20 +938,14 @@ ip_table_create (fib_protocol_t fproto,
fib_index = fib_table_find (fproto, table_id);
mfib_index = mfib_table_find (fproto, table_id);
- if (~0 == fib_index)
- {
- fib_table_find_or_create_and_lock_w_name (fproto, table_id,
- (is_api ?
- FIB_SOURCE_API :
- FIB_SOURCE_CLI), name);
- }
- if (~0 == mfib_index)
- {
- mfib_table_find_or_create_and_lock_w_name (fproto, table_id,
- (is_api ?
- MFIB_SOURCE_API :
- MFIB_SOURCE_CLI), name);
- }
+ /*
+ * Always try to re-lock in case the fib was deleted by an API call
+ * but was not yet freed because some other locks were held
+ */
+ fib_table_find_or_create_and_lock_w_name (
+ fproto, table_id, (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI), name);
+ mfib_table_find_or_create_and_lock_w_name (
+ fproto, table_id, (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI), name);
if ((~0 == fib_index) || (~0 == mfib_index))
call_elf_section_ip_table_callbacks (vnm, table_id, 1 /* is_add */ ,
@@ -936,9 +973,8 @@ mroute_add_del_handler (u8 is_add,
{
if (is_add)
{
- mfib_entry_index =
- mfib_table_entry_paths_update (fib_index, prefix,
- MFIB_SOURCE_API, rpaths);
+ mfib_entry_index = mfib_table_entry_paths_update (
+ fib_index, prefix, MFIB_SOURCE_API, entry_flags, rpaths);
}
else
{
@@ -1005,12 +1041,10 @@ vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
rv = api_mroute_add_del_t_handler (mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_MROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1073,7 +1107,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
if (mp->is_ipv6)
{
- /* *INDENT-OFF* */
/* Do not send subnet details of the IP-interface for
* unnumbered interfaces. otherwise listening clients
* will be confused that the subnet is applied on more
@@ -1087,11 +1120,9 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
};
send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context);
}));
- /* *INDENT-ON* */
}
else
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
({
fib_prefix_t pfx = {
@@ -1102,7 +1133,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context);
}));
- /* *INDENT-ON* */
}
BAD_SW_IF_INDEX_LABEL;
@@ -1159,7 +1189,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp)
}
else
{
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if ((si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED))
@@ -1170,7 +1199,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp)
mp->context);
}
}
- /* *INDENT-ON* */
}
BAD_SW_IF_INDEX_LABEL;
@@ -1193,13 +1221,11 @@ vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp)
/* Gather interfaces. */
sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
- _vec_len (sorted_sis) = 0;
- /* *INDENT-OFF* */
+ vec_set_len (sorted_sis, 0);
pool_foreach (si, im->sw_interfaces)
{
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
vec_foreach (si, sorted_sis)
{
@@ -1254,6 +1280,22 @@ vl_api_set_ip_flow_hash_v2_t_handler (vl_api_set_ip_flow_hash_v2_t *mp)
}
static void
+vl_api_set_ip_flow_hash_v3_t_handler (vl_api_set_ip_flow_hash_v3_t *mp)
+{
+ vl_api_set_ip_flow_hash_v3_reply_t *rmp;
+ ip_address_family_t af;
+ int rv;
+
+ rv = ip_address_family_decode (mp->af, &af);
+
+ if (!rv)
+ rv = ip_flow_hash_set (af, htonl (mp->table_id),
+ htonl (mp->flow_hash_config));
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_V3_REPLY);
+}
+
+static void
vl_api_set_ip_flow_hash_router_id_t_handler (
vl_api_set_ip_flow_hash_router_id_t *mp)
{
@@ -1663,7 +1705,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
vnet_sw_interface_t *si;
/* Shut down interfaces in this FIB / clean out intfc routes */
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if (fib_index == fib_table_get_index_for_sw_if_index (fproto,
@@ -1674,7 +1715,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
vnet_sw_interface_set_flags (vnm, si->sw_if_index, flags);
}
}
- /* *INDENT-ON* */
fib_table_flush (fib_index, fproto, FIB_SOURCE_API);
mfib_table_flush (mfib_table_find (fproto, ntohl (mp->table.table_id)),
@@ -1831,6 +1871,30 @@ void
REPLY_MACRO (VL_API_IP_REASSEMBLY_ENABLE_DISABLE_REPLY);
}
+void
+vl_api_ip_local_reass_enable_disable_t_handler (
+ vl_api_ip_local_reass_enable_disable_t *mp)
+{
+ vl_api_ip_local_reass_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ ip4_local_full_reass_enable_disable (mp->enable_ip4);
+ ip6_local_full_reass_enable_disable (mp->enable_ip6);
+
+ REPLY_MACRO (VL_API_IP_LOCAL_REASS_ENABLE_DISABLE_REPLY);
+}
+
+void
+vl_api_ip_local_reass_get_t_handler (vl_api_ip_local_reass_get_t *mp)
+{
+ vl_api_ip_local_reass_get_reply_t *rmp;
+ int rv = 0;
+ REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET, {
+ rmp->ip4_is_enabled = ip4_local_full_reass_enabled ();
+ rmp->ip6_is_enabled = ip6_local_full_reass_enabled ();
+ });
+}
+
static walk_rc_t
send_ip_punt_redirect_details (u32 rx_sw_if_index,
const ip_punt_redirect_rx_t * ipr, void *arg)
@@ -2049,17 +2113,21 @@ ip_api_hookup (vlib_main_t * vm)
api_main_t *am = vlibapi_get_main ();
/*
- * Mark the route add/del API as MP safe
+ * Set up the (msg_name, crc, message-id) table
*/
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_REPLY] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_V2] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_V2_REPLY] = 1;
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
/*
- * Set up the (msg_name, crc, message-id) table
+ * Mark the route add/del API as MP safe
*/
- REPLY_MSG_ID_BASE = setup_message_id_table ();
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL,
+ 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_REPLY, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2_REPLY, 1);
return 0;
}
diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c
index 1ac7248ea05..4fbf1fb74fa 100644
--- a/src/vnet/ip/ip_checksum.c
+++ b/src/vnet/ip/ip_checksum.c
@@ -165,14 +165,12 @@ test_ip_checksum_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_checksum, static) =
{
.path = "test ip checksum",
.short_help = "test ip checksum",
.function = test_ip_checksum_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_DEBUG */
diff --git a/src/vnet/ip/ip_container_proxy.c b/src/vnet/ip/ip_container_proxy.c
index 18d07ba6082..1618704e804 100644
--- a/src/vnet/ip/ip_container_proxy.c
+++ b/src/vnet/ip/ip_container_proxy.c
@@ -138,7 +138,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx)
};
u32 fib_index;
- /* *INDENT-OFF* */
pool_foreach_index (fib_index, ip4_main.fibs)
{
fib_table_walk (fib_index, FIB_PROTOCOL_IP4,
@@ -149,7 +148,6 @@ ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx)
fib_table_walk (fib_index, FIB_PROTOCOL_IP6,
ip_container_proxy_fib_table_walk, &wctx);
}
- /* *INDENT-ON* */
}
clib_error_t *
@@ -216,14 +214,12 @@ ip_container_cmd (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_container_command_node, static) = {
.path = "ip container",
.function = ip_container_cmd,
.short_help = "ip container <address> <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input,
@@ -275,14 +271,12 @@ show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_container_command, static) = {
.path = "show ip container",
.function = show_ip_container_cmd_fn,
.short_help = "show ip container <address> <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_flow_hash.h b/src/vnet/ip/ip_flow_hash.h
index bd37ef7307b..30dfcd70a1b 100644
--- a/src/vnet/ip/ip_flow_hash.h
+++ b/src/vnet/ip/ip_flow_hash.h
@@ -38,7 +38,17 @@
_ (proto, 4, IP_FLOW_HASH_PROTO) \
_ (reverse, 5, IP_FLOW_HASH_REVERSE_SRC_DST) \
_ (symmetric, 6, IP_FLOW_HASH_SYMMETRIC) \
- _ (flowlabel, 7, IP_FLOW_HASH_FL)
+ _ (flowlabel, 7, IP_FLOW_HASH_FL) \
+ _ (gtpv1teid, 8, IP_FLOW_HASH_GTPV1_TEID)
+
+typedef struct
+{
+ u8 ver_flags;
+ u8 type;
+ u16 length;
+ u32 teid;
+} __attribute__ ((packed)) gtpv1u_header_t;
+#define GTPV1_PORT_BE 0x6808
/**
* A flow hash configuration is a mask of the flow hash options
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index cafa9a66d6b..934e40a5d18 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -25,10 +25,10 @@
typedef struct
{
- u8 ipv6;
u16 mtu;
u8 next;
u16 n_fragments;
+ u16 pkt_size;
} ip_frag_trace_t;
static u8 *
@@ -37,8 +37,8 @@ format_ip_frag_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
- s = format (s, "IPv%s mtu: %u fragments: %u next: %d",
- t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next);
+ s = format (s, "mtu: %u pkt-size: %u fragments: %u next: %d", t->mtu,
+ t->pkt_size, t->n_fragments, t->next);
return s;
}
@@ -95,7 +95,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
{
vlib_buffer_t *from_b;
ip4_header_t *ip4;
- u16 len, max, rem, ip_frag_id, ip_frag_offset;
+ u16 len, max, rem, ip_frag_id, ip_frag_offset, head_bytes;
u8 *org_from_packet, more;
from_b = vlib_get_buffer (vm, from_bi);
@@ -103,9 +103,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
ip4 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
- max =
- (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) -
- sizeof (ip4_header_t)) & ~0x7;
+ head_bytes = sizeof (ip4_header_t) + l2unfragmentablesize;
+ max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) &
+ ~0x7;
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t)))
@@ -142,8 +142,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
u8 *from_data = (void *) (ip4 + 1);
vlib_buffer_t *org_from_b = from_b;
u16 fo = 0;
- u16 left_in_from_buffer =
- from_b->current_length - (l2unfragmentablesize + sizeof (ip4_header_t));
+ u16 left_in_from_buffer = from_b->current_length - head_bytes;
u16 ptr = 0;
/* Do the actual fragmentation */
@@ -166,8 +165,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
/* Copy ip4 header */
to_data = vlib_buffer_get_current (to_b);
- clib_memcpy_fast (to_data, org_from_packet,
- l2unfragmentablesize + sizeof (ip4_header_t));
+ clib_memcpy_fast (to_data, org_from_packet, head_bytes);
to_ip4 = (ip4_header_t *) (to_data + l2unfragmentablesize);
to_data = (void *) (to_ip4 + 1);
vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data;
@@ -213,8 +211,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
}
to_b->flags |= VNET_BUFFER_F_IS_IP4;
- to_b->current_length =
- len + sizeof (ip4_header_t) + l2unfragmentablesize;
+ to_b->current_length = len + head_bytes;
to_ip4->fragment_id = ip_frag_id;
to_ip4->flags_and_fragment_offset =
@@ -286,7 +283,7 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ip_frag_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
tr->mtu = mtu;
- tr->ipv6 = is_ip6 ? 1 : 0;
+ tr->pkt_size = vlib_buffer_length_in_chain (vm, p0);
tr->n_fragments = vec_len (buffer);
tr->next = vnet_buffer (p0)->ip_frag.next_index;
}
@@ -385,13 +382,17 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
ip6_header_t *ip6;
u16 len, max, rem, ip_frag_id;
u8 *org_from_packet;
+ u16 head_bytes;
from_b = vlib_get_buffer (vm, from_bi);
org_from_packet = vlib_buffer_get_current (from_b);
ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
+ head_bytes =
+ (sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t) + l2unfragmentablesize);
rem = clib_net_to_host_u16 (ip6->payload_length);
- max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct??
+ max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) &
+ ~0x7;
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
@@ -423,9 +424,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
ip6_frag_hdr_t *to_frag_hdr;
u8 *to_data;
- len =
- (rem >
- (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) ? max : rem);
+ len = (rem > max ? max : rem);
if (len != rem) /* Last fragment does not need to divisible by 8 */
len &= ~0x7;
if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
@@ -438,7 +437,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
/* Copy ip6 header */
clib_memcpy_fast (to_b->data, org_from_packet,
l2unfragmentablesize + sizeof (ip6_header_t));
- to_ip6 = vlib_buffer_get_current (to_b);
+ to_ip6 = vlib_buffer_get_current (to_b) + l2unfragmentablesize;
to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
to_data = (void *) (to_frag_hdr + 1);
@@ -484,8 +483,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
to_ptr += bytes_to_copy;
}
- to_b->current_length =
- len + sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t);
+ to_b->current_length = len + head_bytes;
to_ip6->payload_length =
clib_host_to_net_u16 (len + sizeof (ip6_frag_hdr_t));
to_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
@@ -502,13 +500,6 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
return IP_FRAG_ERROR_NONE;
}
-static char *ip4_frag_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip_frag_error
-#undef _
-};
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_frag_node) = {
.function = ip4_frag,
.name = IP4_FRAG_NODE_NAME,
@@ -517,21 +508,17 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = IP_FRAG_N_ERROR,
- .error_strings = ip4_frag_error_strings,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_FRAG_N_NEXT,
- .next_nodes = {
- [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite",
- [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain",
- [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [IP_FRAG_NEXT_DROP] = "ip4-drop"
- },
+ .next_nodes = { [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP_FRAG_NEXT_DROP] = "ip4-drop" },
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_frag_node) = {
.function = ip6_frag,
.name = IP6_FRAG_NODE_NAME,
@@ -540,19 +527,16 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = IP_FRAG_N_ERROR,
- .error_strings = ip4_frag_error_strings,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_FRAG_N_NEXT,
- .next_nodes = {
- [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite",
- [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain",
- [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
- [IP_FRAG_NEXT_DROP] = "ip6-drop"
- },
+ .next_nodes = { [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
+ [IP_FRAG_NEXT_DROP] = "ip6-drop" },
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
index 86462e6c7d2..4ddd62b89e6 100644
--- a/src/vnet/ip/ip_frag.h
+++ b/src/vnet/ip/ip_frag.h
@@ -36,6 +36,7 @@
#define IP_FRAG_H
#include <vnet/vnet.h>
+#include <vnet/ip/ip.api_enum.h>
#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
@@ -57,24 +58,7 @@ typedef enum
IP_FRAG_N_NEXT
} ip_frag_next_t;
-#define foreach_ip_frag_error \
- /* Must be first. */ \
- _(NONE, "packet fragmented") \
- _(SMALL_PACKET, "packet smaller than MTU") \
- _(FRAGMENT_SENT, "number of sent fragments") \
- _(CANT_FRAGMENT_HEADER, "can't fragment header") \
- _(DONT_FRAGMENT_SET, "can't fragment this packet") \
- _(MALFORMED, "malformed packet") \
- _(MEMORY, "could not allocate buffer") \
- _(UNKNOWN, "unknown error")
-
-typedef enum
-{
-#define _(sym,str) IP_FRAG_ERROR_##sym,
- foreach_ip_frag_error
-#undef _
- IP_FRAG_N_ERROR,
-} ip_frag_error_t;
+typedef vl_counter_ip_frag_enum_t ip_frag_error_t;
void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu,
u8 next_index, u8 flags);
diff --git a/src/vnet/ip/ip_in_out_acl.c b/src/vnet/ip/ip_in_out_acl.c
index a5e652e1ee8..eb3c94a188a 100644
--- a/src/vnet/ip/ip_in_out_acl.c
+++ b/src/vnet/ip/ip_in_out_acl.c
@@ -32,11 +32,26 @@ format_ip_in_out_acl_trace (u8 * s, u32 is_output, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip_in_out_acl_trace_t *t = va_arg (*args, ip_in_out_acl_trace_t *);
-
- s = format (s, "%s: sw_if_index %d, next_index %d, table %d, offset %d",
- is_output ? "OUTACL" : "INACL",
- t->sw_if_index, t->next_index, t->table_index, t->offset);
- return s;
+ const vnet_classify_main_t *vcm = &vnet_classify_main;
+ const u32 indent = format_get_indent (s);
+ vnet_classify_table_t *table;
+ vnet_classify_entry_t *e;
+
+ s =
+ format (s, "%s: sw_if_index %d, next_index %d, table_index %d, offset %d",
+ is_output ? "OUTACL" : "INACL", t->sw_if_index, t->next_index,
+ t->table_index, t->offset);
+
+ if (pool_is_free_index (vcm->tables, t->table_index))
+ return format (s, "\n%Uno table", format_white_space, indent + 4);
+
+ if (~0 == t->offset)
+ return format (s, "\n%Uno match", format_white_space, indent + 4);
+
+ table = vnet_classify_table_get (t->table_index);
+ e = vnet_classify_get_entry (table, t->offset);
+ return format (s, "\n%U%U", format_white_space, indent + 4,
+ format_classify_entry, table, e);
}
static u8 *
@@ -97,57 +112,40 @@ static char *ip_outacl_error_strings[] = {
};
static_always_inline void
-ip_in_out_acl_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_buffer_t ** b,
- u16 * next, u32 n_left, int is_ip4, int is_output,
- int do_trace)
+ip_in_out_acl_inline_trace (
+ vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
+ vlib_buffer_t **b, u16 *next, u32 n_left, u32 *hits__, u32 *misses__,
+ u32 *chain_hits__, const vlib_error_t error_none,
+ const vlib_error_t error_deny, const vlib_error_t error_miss,
+ vnet_classify_table_t *tables, const u32 *table_index_by_sw_if_index,
+ u32 *fib_index_by_sw_if_index, vnet_config_main_t *cm,
+ const vlib_rx_or_tx_t way, const int is_output, const int do_trace)
{
- in_out_acl_main_t *am = &in_out_acl_main;
- vnet_classify_main_t *vcm = am->vnet_classify_main;
f64 now = vlib_time_now (vm);
u32 hits = 0;
u32 misses = 0;
u32 chain_hits = 0;
- in_out_acl_table_id_t tid;
- vlib_node_runtime_t *error_node;
- u32 n_next_nodes;
-
+ u32 n_next_nodes = node->n_next_nodes;
u8 *h[4];
u32 sw_if_index[4];
u32 table_index[4];
vnet_classify_table_t *t[4] = { 0, 0 };
- u64 hash[4];
-
- n_next_nodes = node->n_next_nodes;
-
- if (is_ip4)
- {
- tid = IN_OUT_ACL_TABLE_IP4;
- error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
- }
- else
- {
- tid = IN_OUT_ACL_TABLE_IP6;
- error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
- }
+ u32 hash[4];
/* calculate hashes for b[0] & b[1] */
if (n_left >= 2)
{
- sw_if_index[2] =
- vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
- sw_if_index[3] =
- vnet_buffer (b[1])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ /* ~0 is used as a wildcard to say 'always use sw_if_index 0'
+ * aka local0. It is used when we do not care about the sw_if_index, as
+ * when punting */
+ sw_if_index[2] = ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
+ sw_if_index[3] = ~0 == way ? 0 : vnet_buffer (b[1])->sw_if_index[way];
- table_index[2] =
- am->classify_table_index_by_sw_if_index[is_output][tid]
- [sw_if_index[2]];
- table_index[3] =
- am->classify_table_index_by_sw_if_index[is_output][tid]
- [sw_if_index[3]];
+ table_index[2] = table_index_by_sw_if_index[sw_if_index[2]];
+ table_index[3] = table_index_by_sw_if_index[sw_if_index[3]];
- t[2] = pool_elt_at_index (vcm->tables, table_index[2]);
- t[3] = pool_elt_at_index (vcm->tables, table_index[3]);
+ t[2] = pool_elt_at_index (tables, table_index[2]);
+ t[3] = pool_elt_at_index (tables, table_index[3]);
if (t[2]->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
h[2] =
@@ -164,16 +162,16 @@ ip_in_out_acl_inline (vlib_main_t * vm,
if (is_output)
{
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[0])->l2_classify.pad.l2_len =
+ vnet_buffer (b[0])->l2.l2_len =
vnet_buffer (b[0])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[2] += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h[2] += vnet_buffer (b[0])->l2.l2_len;
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[1])->l2_classify.pad.l2_len =
+ vnet_buffer (b[1])->l2.l2_len =
vnet_buffer (b[1])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[3] += vnet_buffer (b[1])->l2_classify.pad.l2_len;
+ h[3] += vnet_buffer (b[1])->l2.l2_len;
}
hash[2] = vnet_classify_hash_packet_inline (t[2], (u8 *) h[2]);
@@ -198,7 +196,6 @@ ip_in_out_acl_inline (vlib_main_t * vm,
{
vnet_classify_entry_t *e[2] = { 0, 0 };
u32 _next[2] = { ACL_NEXT_INDEX_DENY, ACL_NEXT_INDEX_DENY };
- u8 error[2];
h[0] = h[2];
h[1] = h[3];
@@ -228,19 +225,15 @@ ip_in_out_acl_inline (vlib_main_t * vm,
if (n_left >= 4)
{
sw_if_index[2] =
- vnet_buffer (b[2])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[2])->sw_if_index[way];
sw_if_index[3] =
- vnet_buffer (b[3])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[3])->sw_if_index[way];
- table_index[2] =
- am->classify_table_index_by_sw_if_index[is_output][tid]
- [sw_if_index[2]];
- table_index[3] =
- am->classify_table_index_by_sw_if_index[is_output][tid]
- [sw_if_index[3]];
+ table_index[2] = table_index_by_sw_if_index[sw_if_index[2]];
+ table_index[3] = table_index_by_sw_if_index[sw_if_index[3]];
- t[2] = pool_elt_at_index (vcm->tables, table_index[2]);
- t[3] = pool_elt_at_index (vcm->tables, table_index[3]);
+ t[2] = pool_elt_at_index (tables, table_index[2]);
+ t[3] = pool_elt_at_index (tables, table_index[3]);
if (t[2]->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
h[2] =
@@ -259,16 +252,16 @@ ip_in_out_acl_inline (vlib_main_t * vm,
if (is_output)
{
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[2])->l2_classify.pad.l2_len =
+ vnet_buffer (b[2])->l2.l2_len =
vnet_buffer (b[2])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[2] += vnet_buffer (b[2])->l2_classify.pad.l2_len;
+ h[2] += vnet_buffer (b[2])->l2.l2_len;
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[3])->l2_classify.pad.l2_len =
+ vnet_buffer (b[3])->l2.l2_len =
vnet_buffer (b[3])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[3] += vnet_buffer (b[3])->l2_classify.pad.l2_len;
+ h[3] += vnet_buffer (b[3])->l2.l2_len;
}
hash[2] = vnet_classify_hash_packet_inline (t[2], (u8 *) h[2]);
@@ -292,11 +285,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
}
/* find entry for b[0] & b[1] */
- vnet_get_config_data (am->vnet_config_main[is_output][tid],
- &b[0]->current_config_index, &_next[0],
+ vnet_get_config_data (cm, &b[0]->current_config_index, &_next[0],
/* # bytes of config data */ 0);
- vnet_get_config_data (am->vnet_config_main[is_output][tid],
- &b[1]->current_config_index, &_next[1],
+ vnet_get_config_data (cm, &b[1]->current_config_index, &_next[1],
/* # bytes of config data */ 0);
if (PREDICT_TRUE (table_index[0] != ~0))
@@ -314,15 +305,8 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
- if (is_ip4)
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error[0]];
+ b[0]->error =
+ (_next[0] == ACL_NEXT_INDEX_DENY) ? error_deny : error_none;
if (!is_output)
{
@@ -330,17 +314,22 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e[0]->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = e[0]->metadata;
else if (e[0]->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
- e[0]->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
+ e[0]->metadata;
+ /* For source check in case we skip the lookup node */
+ ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index,
+ b[0]);
+ }
}
}
else
{
while (1)
{
- if (PREDICT_TRUE (t[0]->next_table_index != ~0))
- t[0] = pool_elt_at_index (vcm->tables,
- t[0]->next_table_index);
+ table_index[0] = t[0]->next_table_index;
+ if (PREDICT_TRUE (table_index[0] != ~0))
+ t[0] = pool_elt_at_index (tables, table_index[0]);
else
{
_next[0] = (t[0]->miss_next_index < n_next_nodes) ?
@@ -348,15 +337,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
misses++;
- if (is_ip4)
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_TABLE_MISS :
- IP4_ERROR_INACL_TABLE_MISS) : IP4_ERROR_NONE;
- else
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_TABLE_MISS :
- IP6_ERROR_INACL_TABLE_MISS) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error[0]];
+ b[0]->error = (_next[0] == ACL_NEXT_INDEX_DENY) ?
+ error_miss :
+ error_none;
break;
}
@@ -369,7 +352,7 @@ ip_in_out_acl_inline (vlib_main_t * vm,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h[0] += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h[0] += vnet_buffer (b[0])->l2.l2_len;
hash[0] =
vnet_classify_hash_packet_inline (t[0], (u8 *) h[0]);
@@ -386,15 +369,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
chain_hits++;
- if (is_ip4)
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error[0]];
+ b[0]->error = (_next[0] == ACL_NEXT_INDEX_DENY) ?
+ error_deny :
+ error_none;
if (!is_output)
{
@@ -406,8 +383,14 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e[0]->metadata;
else if (e[0]->action ==
CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
- e[0]->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
+ e[0]->metadata;
+ /* For source check in case we skip the lookup
+ * node */
+ ip_lookup_set_buffer_fib_index (
+ fib_index_by_sw_if_index, b[0]);
+ }
}
break;
}
@@ -430,15 +413,8 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
- if (is_ip4)
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[1]->error = error_node->errors[error[1]];
+ b[1]->error =
+ (_next[1] == ACL_NEXT_INDEX_DENY) ? error_deny : error_none;
if (!is_output)
{
@@ -446,17 +422,22 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e[1]->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
vnet_buffer (b[1])->sw_if_index[VLIB_TX] = e[1]->metadata;
else if (e[1]->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
- e[1]->metadata;
+ {
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
+ e[1]->metadata;
+ /* For source check in case we skip the lookup node */
+ ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index,
+ b[1]);
+ }
}
}
else
{
while (1)
{
- if (PREDICT_TRUE (t[1]->next_table_index != ~0))
- t[1] = pool_elt_at_index (vcm->tables,
- t[1]->next_table_index);
+ table_index[1] = t[1]->next_table_index;
+ if (PREDICT_TRUE (table_index[1] != ~0))
+ t[1] = pool_elt_at_index (tables, table_index[1]);
else
{
_next[1] = (t[1]->miss_next_index < n_next_nodes) ?
@@ -464,15 +445,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
misses++;
- if (is_ip4)
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_TABLE_MISS :
- IP4_ERROR_INACL_TABLE_MISS) : IP4_ERROR_NONE;
- else
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_TABLE_MISS :
- IP6_ERROR_INACL_TABLE_MISS) : IP6_ERROR_NONE;
- b[1]->error = error_node->errors[error[1]];
+ b[1]->error = (_next[1] == ACL_NEXT_INDEX_DENY) ?
+ error_miss :
+ error_none;
break;
}
@@ -485,7 +460,7 @@ ip_in_out_acl_inline (vlib_main_t * vm,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h[1] += vnet_buffer (b[1])->l2_classify.pad.l2_len;
+ h[1] += vnet_buffer (b[1])->l2.l2_len;
hash[1] =
vnet_classify_hash_packet_inline (t[1], (u8 *) h[1]);
@@ -502,15 +477,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
chain_hits++;
- if (is_ip4)
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[1]->error = error_node->errors[error[1]];
+ b[1]->error = (_next[1] == ACL_NEXT_INDEX_DENY) ?
+ error_deny :
+ error_none;
if (!is_output)
{
@@ -522,8 +491,14 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e[1]->metadata;
else if (e[1]->action ==
CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
- e[1]->metadata;
+ {
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
+ e[1]->metadata;
+ /* For source check in case we skip the lookup
+ * node */
+ ip_lookup_set_buffer_fib_index (
+ fib_index_by_sw_if_index, b[1]);
+ }
}
break;
}
@@ -536,9 +511,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
ip_in_out_acl_trace_t *_t =
vlib_add_trace (vm, node, b[0], sizeof (*_t));
_t->sw_if_index =
- vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
_t->next_index = _next[0];
- _t->table_index = t[0] ? t[0] - vcm->tables : ~0;
+ _t->table_index = table_index[0];
_t->offset = (e[0]
&& t[0]) ? vnet_classify_get_offset (t[0], e[0]) : ~0;
}
@@ -548,9 +523,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
ip_in_out_acl_trace_t *_t =
vlib_add_trace (vm, node, b[1], sizeof (*_t));
_t->sw_if_index =
- vnet_buffer (b[1])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[1])->sw_if_index[way];
_t->next_index = _next[1];
- _t->table_index = t[1] ? t[1] - vcm->tables : ~0;
+ _t->table_index = table_index[1];
_t->offset = (e[1]
&& t[1]) ? vnet_classify_get_offset (t[1], e[1]) : ~0;
}
@@ -584,15 +559,12 @@ ip_in_out_acl_inline (vlib_main_t * vm,
vnet_classify_table_t *t0 = 0;
vnet_classify_entry_t *e0 = 0;
u32 next0 = ACL_NEXT_INDEX_DENY;
- u64 hash0;
- u8 error0;
+ u32 hash0;
- sw_if_index0 =
- vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
- table_index0 =
- am->classify_table_index_by_sw_if_index[is_output][tid][sw_if_index0];
+ sw_if_index0 = ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
+ table_index0 = table_index_by_sw_if_index[sw_if_index0];
- t0 = pool_elt_at_index (vcm->tables, table_index0);
+ t0 = pool_elt_at_index (tables, table_index0);
if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
h0 =
@@ -603,10 +575,10 @@ ip_in_out_acl_inline (vlib_main_t * vm,
if (is_output)
{
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[0])->l2_classify.pad.l2_len =
+ vnet_buffer (b[0])->l2.l2_len =
vnet_buffer (b[0])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h0 += vnet_buffer (b[0])->l2.l2_len;
}
vnet_buffer (b[0])->l2_classify.hash =
@@ -615,14 +587,13 @@ ip_in_out_acl_inline (vlib_main_t * vm,
vnet_buffer (b[0])->l2_classify.table_index = table_index0;
vnet_buffer (b[0])->l2_classify.opaque_index = ~0;
- vnet_get_config_data (am->vnet_config_main[is_output][tid],
- &b[0]->current_config_index, &next0,
+ vnet_get_config_data (cm, &b[0]->current_config_index, &next0,
/* # bytes of config data */ 0);
if (PREDICT_TRUE (table_index0 != ~0))
{
hash0 = vnet_buffer (b[0])->l2_classify.hash;
- t0 = pool_elt_at_index (vcm->tables, table_index0);
+ t0 = pool_elt_at_index (tables, table_index0);
if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
h0 =
@@ -633,7 +604,7 @@ ip_in_out_acl_inline (vlib_main_t * vm,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h0 += vnet_buffer (b[0])->l2.l2_len;
e0 = vnet_classify_find_entry_inline (t0, (u8 *) h0, hash0, now);
if (e0)
@@ -646,15 +617,8 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
- if (is_ip4)
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error0];
+ b[0]->error =
+ (next0 == ACL_NEXT_INDEX_DENY) ? error_deny : error_none;
if (!is_output)
{
@@ -662,16 +626,21 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e0->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = e0->metadata;
else if (e0->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = e0->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = e0->metadata;
+ /* For source check in case we skip the lookup node */
+ ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index,
+ b[0]);
+ }
}
}
else
{
while (1)
{
- if (PREDICT_TRUE (t0->next_table_index != ~0))
- t0 =
- pool_elt_at_index (vcm->tables, t0->next_table_index);
+ table_index0 = t0->next_table_index;
+ if (PREDICT_TRUE (table_index0 != ~0))
+ t0 = pool_elt_at_index (tables, table_index0);
else
{
next0 = (t0->miss_next_index < n_next_nodes) ?
@@ -679,15 +648,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
misses++;
- if (is_ip4)
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_TABLE_MISS :
- IP4_ERROR_INACL_TABLE_MISS) : IP4_ERROR_NONE;
- else
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_TABLE_MISS :
- IP6_ERROR_INACL_TABLE_MISS) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error0];
+ b[0]->error = (next0 == ACL_NEXT_INDEX_DENY) ?
+ error_miss :
+ error_none;
break;
}
@@ -700,7 +663,7 @@ ip_in_out_acl_inline (vlib_main_t * vm,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h0 += vnet_buffer (b[0])->l2.l2_len;
hash0 = vnet_classify_hash_packet_inline (t0, (u8 *) h0);
e0 = vnet_classify_find_entry_inline
@@ -714,15 +677,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e0->next_index : next0;
hits++;
- if (is_ip4)
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error0];
+ b[0]->error = (next0 == ACL_NEXT_INDEX_DENY) ?
+ error_deny :
+ error_none;
if (!is_output)
{
@@ -733,8 +690,14 @@ ip_in_out_acl_inline (vlib_main_t * vm,
vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
e0->metadata;
else if (e0->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
- e0->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
+ e0->metadata;
+ /* For source check in case we skip the lookup
+ * node */
+ ip_lookup_set_buffer_fib_index (
+ fib_index_by_sw_if_index, b[0]);
+ }
}
break;
}
@@ -747,9 +710,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
ip_in_out_acl_trace_t *t =
vlib_add_trace (vm, node, b[0], sizeof (*t));
t->sw_if_index =
- vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
t->next_index = next0;
- t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->table_index = table_index0;
t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0;
}
@@ -767,69 +730,92 @@ ip_in_out_acl_inline (vlib_main_t * vm,
n_left--;
}
- vlib_node_increment_counter (vm, node->node_index,
- is_output ? IP_OUTACL_ERROR_MISS :
- IP_INACL_ERROR_MISS, misses);
- vlib_node_increment_counter (vm, node->node_index,
- is_output ? IP_OUTACL_ERROR_HIT :
- IP_INACL_ERROR_HIT, hits);
- vlib_node_increment_counter (vm, node->node_index,
- is_output ? IP_OUTACL_ERROR_CHAIN_HIT :
- IP_INACL_ERROR_CHAIN_HIT, chain_hits);
+ *hits__ = hits;
+ *misses__ = misses;
+ *chain_hits__ = chain_hits;
}
-VLIB_NODE_FN (ip4_inacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static_always_inline uword
+ip_in_out_acl_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, const in_out_acl_table_id_t tid,
+ u32 *fib_index_by_sw_if_index,
+ const vlib_node_registration_t *parent_error_node,
+ const u32 error_none_index, const u32 error_deny_index,
+ const u32 error_miss_index, const vlib_rx_or_tx_t way,
+ const int is_output)
{
-
- u32 *from;
+ const in_out_acl_main_t *am = &in_out_acl_main;
+ vnet_classify_table_t *tables = am->vnet_classify_main->tables;
+ u32 *from = vlib_frame_vector_args (frame);
+ const u32 *table_index_by_sw_if_index =
+ am->classify_table_index_by_sw_if_index[is_output][tid];
+ vnet_config_main_t *cm = am->vnet_config_main[is_output][tid];
+ const vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, parent_error_node->index);
+ const vlib_error_t error_none = error_node->errors[error_none_index];
+ const vlib_error_t error_deny = error_node->errors[error_deny_index];
+ const vlib_error_t error_miss = error_node->errors[error_miss_index];
vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
u16 nexts[VLIB_FRAME_SIZE];
-
- from = vlib_frame_vector_args (frame);
+ u32 hits, misses, chain_hits;
vlib_get_buffers (vm, from, bufs, frame->n_vectors);
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 1 /* is_ip4 */ ,
- 0 /* is_output */ , 1 /* is_trace */ );
+#define ip_in_out_acl_inline_trace__(do_trace) \
+ ip_in_out_acl_inline_trace ( \
+ vm, node, frame, bufs, nexts, frame->n_vectors, &hits, &misses, \
+ &chain_hits, error_deny, error_miss, error_none, tables, \
+ table_index_by_sw_if_index, fib_index_by_sw_if_index, cm, way, is_output, \
+ do_trace)
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ ip_in_out_acl_inline_trace__ (1 /* do_trace */);
else
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 1 /* is_ip4 */ ,
- 0 /* is_output */ , 0 /* is_trace */ );
+ ip_in_out_acl_inline_trace__ (0 /* do_trace */);
+
+ vlib_node_increment_counter (
+ vm, node->node_index,
+ is_output ? IP_OUTACL_ERROR_MISS : IP_INACL_ERROR_MISS, misses);
+ vlib_node_increment_counter (
+ vm, node->node_index, is_output ? IP_OUTACL_ERROR_HIT : IP_INACL_ERROR_HIT,
+ hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ is_output ? IP_OUTACL_ERROR_CHAIN_HIT :
+ IP_INACL_ERROR_CHAIN_HIT,
+ chain_hits);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
-
return frame->n_vectors;
}
-VLIB_NODE_FN (ip4_outacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip4_inacl_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
- u16 nexts[VLIB_FRAME_SIZE];
-
- from = vlib_frame_vector_args (frame);
-
- vlib_get_buffers (vm, from, bufs, frame->n_vectors);
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 1 /* is_ip4 */ ,
- 1 /* is_output */ , 1 /* is_trace */ );
- else
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 1 /* is_ip4 */ ,
- 1 /* is_output */ , 0 /* is_trace */ );
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP4, ip4_main.fib_index_by_sw_if_index,
+ &ip4_input_node, IP4_ERROR_NONE, IP4_ERROR_INACL_SESSION_DENY,
+ IP4_ERROR_INACL_TABLE_MISS, VLIB_RX, 0 /* is_output */);
+}
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+VLIB_NODE_FN (ip4_punt_acl_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP4_PUNT,
+ ip4_main.fib_index_by_sw_if_index, &ip4_input_node, IP4_ERROR_NONE,
+ IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS, ~0 /* way */,
+ 0 /* is_output */);
+}
- return frame->n_vectors;
+VLIB_NODE_FN (ip4_outacl_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP4, NULL, &ip4_input_node,
+ IP4_ERROR_NONE, IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS,
+ VLIB_TX, 1 /* is_output */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_inacl_node) = {
.name = "ip4-inacl",
.vector_size = sizeof (u32),
@@ -843,6 +829,19 @@ VLIB_REGISTER_NODE (ip4_inacl_node) = {
},
};
+VLIB_REGISTER_NODE (ip4_punt_acl_node) = {
+ .name = "ip4-punt-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "ip4-drop",
+ },
+};
+
VLIB_REGISTER_NODE (ip4_outacl_node) = {
.name = "ip4-outacl",
.vector_size = sizeof (u32),
@@ -855,59 +854,41 @@ VLIB_REGISTER_NODE (ip4_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "ip4-drop",
},
};
-/* *INDENT-ON* */
+
+VNET_FEATURE_INIT (ip4_punt_acl_feature) = {
+ .arc_name = "ip4-punt",
+ .node_name = "ip4-punt-acl",
+ .runs_after = VNET_FEATURES ("ip4-punt-policer"),
+};
VLIB_NODE_FN (ip6_inacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
- u16 nexts[VLIB_FRAME_SIZE];
-
- from = vlib_frame_vector_args (frame);
-
- vlib_get_buffers (vm, from, bufs, frame->n_vectors);
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 0 /* is_ip4 */ ,
- 0 /* is_output */ , 1 /* is_trace */ );
- else
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 0 /* is_ip4 */ ,
- 0 /* is_output */ , 0 /* is_trace */ );
-
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP6, ip6_main.fib_index_by_sw_if_index,
+ &ip6_input_node, IP6_ERROR_NONE, IP6_ERROR_INACL_SESSION_DENY,
+ IP6_ERROR_INACL_TABLE_MISS, VLIB_RX, 0 /* is_output */);
+}
- return frame->n_vectors;
+VLIB_NODE_FN (ip6_punt_acl_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP6_PUNT,
+ ip4_main.fib_index_by_sw_if_index, &ip6_input_node, IP6_ERROR_NONE,
+ IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS, ~0 /* way */,
+ 0 /* is_output */);
}
VLIB_NODE_FN (ip6_outacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
- u16 nexts[VLIB_FRAME_SIZE];
-
- from = vlib_frame_vector_args (frame);
-
- vlib_get_buffers (vm, from, bufs, frame->n_vectors);
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 0 /* is_ip4 */ ,
- 1 /* is_output */ , 1 /* is_trace */ );
- else
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 0 /* is_ip4 */ ,
- 1 /* is_output */ , 0 /* is_trace */ );
-
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
-
- return frame->n_vectors;
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP6, NULL, &ip6_input_node,
+ IP6_ERROR_NONE, IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS,
+ VLIB_TX, 1 /* is_output */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_inacl_node) = {
.name = "ip6-inacl",
.vector_size = sizeof (u32),
@@ -921,6 +902,19 @@ VLIB_REGISTER_NODE (ip6_inacl_node) = {
},
};
+VLIB_REGISTER_NODE (ip6_punt_acl_node) = {
+ .name = "ip6-punt-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "ip6-drop",
+ },
+};
+
VLIB_REGISTER_NODE (ip6_outacl_node) = {
.name = "ip6-outacl",
.vector_size = sizeof (u32),
@@ -933,7 +927,12 @@ VLIB_REGISTER_NODE (ip6_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "ip6-drop",
},
};
-/* *INDENT-ON* */
+
+VNET_FEATURE_INIT (ip6_punt_acl_feature) = {
+ .arc_name = "ip6-punt",
+ .node_name = "ip6-punt-acl",
+ .runs_after = VNET_FEATURES ("ip6-punt-policer"),
+};
#ifndef CLIB_MARCH_VARIANT
static clib_error_t *
diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c
index 8894a878881..c2490f196ef 100644
--- a/src/vnet/ip/ip_init.c
+++ b/src/vnet/ip/ip_init.c
@@ -104,7 +104,6 @@ do { \
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_main_init) = {
.init_order = VLIB_INITS ("vnet_main_init", "ip4_init", "ip6_init",
"icmp4_init", "icmp6_init", "ip6_hop_by_hop_init",
@@ -112,7 +111,6 @@ VLIB_INIT_FUNCTION (ip_main_init) = {
"in_out_acl_init", "policer_classify_init",
"flow_classify_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_interface.c b/src/vnet/ip/ip_interface.c
index 48c20a6cf34..ca1938f651a 100644
--- a/src/vnet/ip/ip_interface.c
+++ b/src/vnet/ip/ip_interface.c
@@ -145,27 +145,23 @@ ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4)
{
ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
ip4_address_t *ip4;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip4 = ip_interface_address_get_address (lm4, ia);
if (ip4_address_compare (ip4, &ip->ip4) == 0)
return 1;
}));
- /* *INDENT-ON* */
}
else
{
ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
ip6_address_t *ip6;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip6 = ip_interface_address_get_address (lm6, ia);
if (ip6_address_compare (ip6, &ip->ip6) == 0)
return 1;
}));
- /* *INDENT-ON* */
}
return 0;
}
@@ -179,16 +175,13 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
if (is_ip4)
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
({
return ip_interface_address_get_address (lm4, ia);
}));
- /* *INDENT-ON* */
}
else
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip6_address_t *rv;
@@ -197,21 +190,19 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
if (!ip6_address_is_link_local_unicast (rv))
return rv;
}));
- /* *INDENT-ON* */
}
return 0;
}
-static walk_rc_t
-ip_interface_address_mark_one_interface (vnet_main_t * vnm,
- vnet_sw_interface_t * si, void *ctx)
+walk_rc_t
+ip_interface_address_mark_one_interface (vnet_main_t *vnm,
+ vnet_sw_interface_t *si, void *ctx)
{
ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
ip_interface_address_t *ia = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, si->sw_if_index, 1 /* unnumbered */ ,
({
ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE;
@@ -220,7 +211,6 @@ ip_interface_address_mark_one_interface (vnet_main_t * vnm,
({
ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE;
}));
- /* *INDENT-ON* */
return (WALK_CONTINUE);
}
@@ -246,7 +236,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm,
u32 *ip4_masks = 0;
int i;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im4->lookup_main, ia, si->sw_if_index, 1,
({
if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
@@ -268,7 +257,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm,
vec_add1 (ip6_masks, ia->address_length);
}
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (ip4_addrs); i++)
ip4_add_del_interface_address (vm, si->sw_if_index, &ip4_addrs[i],
diff --git a/src/vnet/ip/ip_interface.h b/src/vnet/ip/ip_interface.h
index b48eebdbc90..f0034ed0314 100644
--- a/src/vnet/ip/ip_interface.h
+++ b/src/vnet/ip/ip_interface.h
@@ -38,6 +38,9 @@ void ip_interface_address_sweep (void);
u32 ip_interface_address_find (ip_lookup_main_t * lm,
void *addr_fib, u32 address_length);
u8 ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4);
+walk_rc_t ip_interface_address_mark_one_interface (vnet_main_t *vnm,
+ vnet_sw_interface_t *si,
+ void *ctx);
always_inline void *
ip_interface_address_get_address (ip_lookup_main_t * lm,
@@ -53,7 +56,6 @@ ip_get_interface_prefix (ip_lookup_main_t * lm, ip_interface_prefix_key_t * k)
return p ? pool_elt_at_index (lm->if_prefix_pool, p[0]) : 0;
}
-/* *INDENT-OFF* */
#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
do { \
vnet_main_t *_vnm = vnet_get_main(); \
@@ -87,7 +89,6 @@ do { \
body; \
} \
} while (0)
-/* *INDENT-ON* */
#endif /* included_ip_interface_h */
diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h
index b0b5f41260c..04cf9f11d70 100644..100755
--- a/src/vnet/ip/ip_packet.h
+++ b/src/vnet/ip/ip_packet.h
@@ -149,98 +149,6 @@ STATIC_ASSERT_SIZEOF (ip_ecn_t, 1);
extern u8 *format_ip_ecn (u8 * s, va_list * va);
-/* IP checksum support. */
-
-static_always_inline u16
-ip_csum (void *data, u16 n_left)
-{
- u32 sum;
-#ifdef CLIB_HAVE_VEC256
- u16x16 v1, v2;
- u32x8 zero = { 0 };
- u32x8 sum8 = { 0 };
- u32x4 sum4;
-#endif
-
- /* if there is odd number of bytes, pad by zero and store in sum */
- sum = (n_left & 1) ? ((u8 *) data)[n_left - 1] << 8 : 0;
-
- /* we deal with words */
- n_left >>= 1;
-
-#ifdef CLIB_HAVE_VEC256
- while (n_left >= 32)
- {
- v1 = u16x16_load_unaligned (data);
- v2 = u16x16_load_unaligned (data + 32);
-
-#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
- v1 = u16x16_byte_swap (v1);
- v2 = u16x16_byte_swap (v2);
-#endif
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v2));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v2));
- n_left -= 32;
- data += 64;
- }
-
- if (n_left >= 16)
- {
- v1 = u16x16_load_unaligned (data);
-#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
- v1 = u16x16_byte_swap (v1);
-#endif
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
- n_left -= 16;
- data += 32;
- }
-
- if (n_left)
- {
- v1 = u16x16_load_unaligned (data);
-#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
- v1 = u16x16_byte_swap (v1);
-#endif
- v1 = u16x16_mask_last (v1, 16 - n_left);
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
- }
-
- sum8 = u32x8_hadd (sum8, zero);
- sum4 = u32x8_extract_lo (sum8) + u32x8_extract_hi (sum8);
- sum += sum4[0] + sum4[1];
-
-#else
- /* scalar version */
- while (n_left >= 8)
- {
- sum += clib_net_to_host_u16 (*((u16 *) data + 0));
- sum += clib_net_to_host_u16 (*((u16 *) data + 1));
- sum += clib_net_to_host_u16 (*((u16 *) data + 2));
- sum += clib_net_to_host_u16 (*((u16 *) data + 3));
- sum += clib_net_to_host_u16 (*((u16 *) data + 4));
- sum += clib_net_to_host_u16 (*((u16 *) data + 5));
- sum += clib_net_to_host_u16 (*((u16 *) data + 6));
- sum += clib_net_to_host_u16 (*((u16 *) data + 7));
- n_left -= 8;
- data += 16;
- }
- while (n_left)
- {
- sum += clib_net_to_host_u16 (*(u16 *) data);
- n_left -= 1;
- data += 2;
- }
-#endif
-
- sum = (sum & 0xffff) + (sum >> 16);
- sum = (sum & 0xffff) + (sum >> 16);
- return ~((u16) sum);
-}
-
/* Incremental checksum update. */
typedef uword ip_csum_t;
@@ -301,6 +209,20 @@ always_inline u16
ip_csum_fold (ip_csum_t c)
{
/* Reduce to 16 bits. */
+#if defined(__x86_64__) && defined(__BMI2__)
+ u64 tmp;
+ asm volatile(
+ /* using ADC is much faster than mov, shift, add sequence
+ * compiler produces */
+ "mov %k[sum], %k[tmp] \n\t"
+ "shr $32, %[sum] \n\t"
+ "add %k[tmp], %k[sum] \n\t"
+ "mov $16, %k[tmp] \n\t"
+ "shrx %k[tmp], %k[sum], %k[tmp] \n\t"
+ "adc %w[tmp], %w[sum] \n\t"
+ "adc $0, %w[sum] \n\t"
+ : [ sum ] "+&r"(c), [ tmp ] "=&r"(tmp));
+#else
#if uword_bits == 64
c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32);
c = (c & 0xffff) + (c >> 16);
@@ -308,7 +230,7 @@ ip_csum_fold (ip_csum_t c)
c = (c & 0xffff) + (c >> 16);
c = (c & 0xffff) + (c >> 16);
-
+#endif
return c;
}
diff --git a/src/vnet/ip/ip_path_mtu.c b/src/vnet/ip/ip_path_mtu.c
index 38adb44065b..ccb57e1e352 100644
--- a/src/vnet/ip/ip_path_mtu.c
+++ b/src/vnet/ip/ip_path_mtu.c
@@ -297,10 +297,19 @@ ip_ptmu_adj_walk_update (adj_index_t ai, void *ctx)
static ip_pmtu_dpo_t *
ip_pmtu_dpo_alloc (void)
{
+ vlib_main_t *vm = vlib_get_main ();
+ u8 need_barrier_sync = pool_get_will_expand (ip_pmtu_dpo_pool);
ip_pmtu_dpo_t *ipm;
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
+
pool_get_aligned_zero (ip_pmtu_dpo_pool, ipm, sizeof (ip_pmtu_dpo_t));
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+
return (ipm);
}
@@ -353,18 +362,16 @@ ip_pmtu_dpo_get_urpf (const dpo_id_t *dpo)
}
void
-ip_pmtu_dpo_add_or_lock (fib_protocol_t fproto, u16 pmtu, dpo_id_t *dpo)
+ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent, dpo_id_t *dpo)
{
ip_pmtu_dpo_t *ipm;
- dpo_id_t parent = DPO_INVALID;
ipm = ip_pmtu_dpo_alloc ();
- ipm->ipm_proto = fib_proto_to_dpo (fproto);
+ ipm->ipm_proto = parent->dpoi_proto;
ipm->ipm_pmtu = pmtu;
- dpo_copy (&parent, drop_dpo_get (ipm->ipm_proto));
- dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, &parent);
+ dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, parent);
dpo_set (dpo, ip_pmtu_dpo_type, ipm->ipm_proto, ip_pmtu_dpo_get_index (ipm));
}
@@ -516,7 +523,9 @@ ip_pmtu_alloc (u32 fib_index, const fib_prefix_t *pfx,
/*
* interpose a policy DPO from the nh so that MTU is applied
*/
- ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo);
+ ip_pmtu_dpo_add_or_lock (ipt->ipt_oper_pmtu,
+ drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)),
+ &ip_dpo);
fib_table_entry_special_dpo_add (fib_index, pfx, ip_pmtu_source,
FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
@@ -587,7 +596,9 @@ ip_pmtu_stack (ip_pmtu_t *ipt)
{
dpo_id_t ip_dpo = DPO_INVALID;
- ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo);
+ ip_pmtu_dpo_add_or_lock (
+ ipt->ipt_oper_pmtu,
+ drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)), &ip_dpo);
fib_table_entry_special_dpo_update (
fib_index, pfx, ip_pmtu_source, FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
@@ -826,7 +837,8 @@ ip_path_module_init (vlib_main_t *vm)
adj_delegate_register_new_type (&ip_path_adj_delegate_vft);
ip_pmtu_source = fib_source_allocate ("path-mtu", FIB_SOURCE_PRIORITY_HI,
FIB_SOURCE_BH_SIMPLE);
- ip_pmtu_fib_type = fib_node_register_new_type (&ip_ptmu_fib_node_vft);
+ ip_pmtu_fib_type =
+ fib_node_register_new_type ("ip-pmtu", &ip_ptmu_fib_node_vft);
ip_pmtu_db = hash_create_mem (0, sizeof (ip_pmtu_key_t), sizeof (index_t));
ip_pmtu_logger = vlib_log_register_class ("ip", "pmtu");
diff --git a/src/vnet/ip/ip_path_mtu.h b/src/vnet/ip/ip_path_mtu.h
index 2c54fcd7401..96a5227237a 100644
--- a/src/vnet/ip/ip_path_mtu.h
+++ b/src/vnet/ip/ip_path_mtu.h
@@ -100,6 +100,9 @@ extern int ip_path_mtu_replace_end (void);
extern u32 ip_pmtu_get_table_id (const ip_pmtu_t *ipt);
extern void ip_pmtu_get_ip (const ip_pmtu_t *ipt, ip_address_t *ip);
+extern void ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent,
+ dpo_id_t *dpo);
+
/**
* Data-plane accessor functions
*/
diff --git a/src/vnet/ip/ip_path_mtu_node.c b/src/vnet/ip/ip_path_mtu_node.c
index b13f9de849c..cadf1cbe137 100644
--- a/src/vnet/ip/ip_path_mtu_node.c
+++ b/src/vnet/ip/ip_path_mtu_node.c
@@ -49,7 +49,6 @@ ip_pmtu_dpo_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, ip_address_family_t af)
{
u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
- u32 frag_sent = 0, small_packets = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -114,8 +113,6 @@ ip_pmtu_dpo_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (error0 == IP_FRAG_ERROR_NONE)
{
/* Free original buffer chain */
- frag_sent += vec_len (buffer);
- small_packets += (vec_len (buffer) == 1);
vlib_buffer_free_one (vm, pi0); /* Free original packet */
}
else
@@ -176,7 +173,8 @@ VLIB_REGISTER_NODE (ip4_ip_pmtu_dpo_node) = {
.name = "ip4-pmtu-dpo",
.vector_size = sizeof (u32),
.format_trace = format_ip_pmtu_trace,
- .n_errors = 0,
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_PMTU_N_NEXT,
.next_nodes =
{
@@ -187,7 +185,8 @@ VLIB_REGISTER_NODE (ip6_ip_pmtu_dpo_node) = {
.name = "ip6-pmtu-dpo",
.vector_size = sizeof (u32),
.format_trace = format_ip_pmtu_trace,
- .n_errors = 0,
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_PMTU_N_NEXT,
.next_nodes =
{
diff --git a/src/vnet/ip/ip_psh_cksum.h b/src/vnet/ip/ip_psh_cksum.h
new file mode 100644
index 00000000000..a80211561b7
--- /dev/null
+++ b/src/vnet/ip/ip_psh_cksum.h
@@ -0,0 +1,55 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_ip_psh_cksum_h
+#define included_ip_psh_cksum_h
+
+#include <vnet/ip/ip.h>
+#include <vppinfra/vector/ip_csum.h>
+
+typedef struct _ip4_psh
+{
+ ip4_address_t src;
+ ip4_address_t dst;
+ u8 zero;
+ u8 proto;
+ u16 l4len;
+} ip4_psh_t;
+
+typedef struct _ip6_psh
+{
+ ip6_address_t src;
+ ip6_address_t dst;
+ u32 l4len;
+ u32 proto;
+} ip6_psh_t;
+
+STATIC_ASSERT (sizeof (ip4_psh_t) == 12, "ipv4 pseudo header is 12B");
+STATIC_ASSERT (sizeof (ip6_psh_t) == 40, "ipv6 pseudo header is 40B");
+
+static_always_inline u16
+ip4_pseudo_header_cksum (ip4_header_t *ip4)
+{
+ ip4_psh_t psh = { 0 };
+ psh.src = ip4->src_address;
+ psh.dst = ip4->dst_address;
+ psh.proto = ip4->protocol;
+ psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ sizeof (ip4_header_t));
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
+}
+
+static_always_inline u16
+ip6_pseudo_header_cksum (ip6_header_t *ip6)
+{
+ ip6_psh_t psh = { 0 };
+ psh.src = ip6->src_address;
+ psh.dst = ip6->dst_address;
+ psh.l4len = ip6->payload_length;
+ psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
+}
+
+#endif /* included_ip_psh_cksum_h */
diff --git a/src/vnet/ip/ip_punt_drop.c b/src/vnet/ip/ip_punt_drop.c
index bf01adadb10..dc113f51386 100644
--- a/src/vnet/ip/ip_punt_drop.c
+++ b/src/vnet/ip/ip_punt_drop.c
@@ -143,9 +143,8 @@ format_ip_punt_redirect (u8 * s, va_list * args)
rx = ip_punt_redirect_get (rxs[rx_sw_if_index]);
- s = format (s, " rx %U via:\n",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, rx_sw_if_index));
+ s = format (s, " rx %U via:\n", format_vnet_sw_if_index_name, vnm,
+ rx_sw_if_index);
s = format (s, " %U", format_fib_path_list, rx->pl, 2);
s = format (s, " forwarding\n", format_dpo_id, &rx->dpo, 0);
s = format (s, " %U\n", format_dpo_id, &rx->dpo, 0);
diff --git a/src/vnet/ip/ip_sas.c b/src/vnet/ip/ip_sas.c
index 7d3632d95ed..0fc261724f1 100644
--- a/src/vnet/ip/ip_sas.c
+++ b/src/vnet/ip/ip_sas.c
@@ -80,7 +80,12 @@ ip6_sas_by_sw_if_index (u32 sw_if_index, const ip6_address_t *dst,
if (ip6_address_is_link_local_unicast (dst) ||
dst->as_u32[0] == clib_host_to_net_u32 (0xff020000))
{
- ip6_address_copy (src, ip6_get_link_local_address (sw_if_index));
+ const ip6_address_t *ll = ip6_get_link_local_address (sw_if_index);
+ if (NULL == ll)
+ {
+ return false;
+ }
+ ip6_address_copy (src, ll);
return true;
}
diff --git a/src/vnet/ip/ip_test.c b/src/vnet/ip/ip_test.c
index 2de8235288d..727afba67f4 100644
--- a/src/vnet/ip/ip_test.c
+++ b/src/vnet/ip/ip_test.c
@@ -30,16 +30,20 @@
#include <vnet/format_fns.h>
#include <vnet/ip/ip.api_enum.h>
#include <vnet/ip/ip.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
#define vl_endianfun /* define message structures */
#include <vnet/ip/ip.api.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vnet/ip/ip.api.h>
+#undef vl_calcsizefun
+
typedef struct
{
/* API message ID base */
u16 msg_id_base;
- u32 ping_id;
vat_main_t *vat_main;
} ip_test_main_t;
@@ -99,7 +103,7 @@ increment_address (vl_api_address_t *a)
static uword
unformat_fib_path (unformat_input_t *input, va_list *args)
{
- vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ vat_main_t *vam = va_arg (*args, vat_main_t *);
vl_api_fib_path_t *path = va_arg (*args, vl_api_fib_path_t *);
u32 weight, preference;
mpls_label_t out_label;
@@ -113,14 +117,14 @@ unformat_fib_path (unformat_input_t *input, va_list *args)
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "%U %U", unformat_vl_api_ip4_address,
- &path->nh.address.ip4, unformat_vnet_sw_interface, vnm,
+ &path->nh.address.ip4, api_unformat_sw_if_index, vam,
&path->sw_if_index))
{
path->proto = FIB_API_PATH_NH_PROTO_IP4;
}
else if (unformat (input, "%U %U", unformat_vl_api_ip6_address,
- &path->nh.address.ip6, unformat_vnet_sw_interface,
- vnm, &path->sw_if_index))
+ &path->nh.address.ip6, api_unformat_sw_if_index, vam,
+ &path->sw_if_index))
{
path->proto = FIB_API_PATH_NH_PROTO_IP6;
}
@@ -237,7 +241,6 @@ unformat_fib_path (unformat_input_t *input, va_list *args)
static int
api_ip_route_add_del (vat_main_t *vam)
{
- vnet_main_t *vnm = vnet_get_main ();
unformat_input_t *i = vam->input;
vl_api_ip_route_add_del_t *mp;
u32 vrf_id = 0;
@@ -273,7 +276,7 @@ api_ip_route_add_del (vat_main_t *vam)
is_multipath = 1;
else if (unformat (i, "seed %d", &random_seed))
;
- else if (unformat (i, "via %U", unformat_fib_path, vnm,
+ else if (unformat (i, "via %U", unformat_fib_path, vam,
&paths[path_count]))
{
path_count++;
@@ -524,6 +527,17 @@ api_ip_table_flush (vat_main_t *vam)
return ret;
}
+static int
+api_ip_table_allocate (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+vl_api_ip_table_allocate_reply_t_handler (vl_api_ip_table_allocate_reply_t *mp)
+{
+}
+
static void
vl_api_ip_route_add_del_v2_reply_t_handler (
vl_api_ip_route_add_del_v2_reply_t *mp)
@@ -692,7 +706,6 @@ vl_api_ip_punt_redirect_v2_details_t_handler (
static int
api_ip_address_dump (vat_main_t *vam)
{
- vnet_main_t *vnm = vnet_get_main ();
unformat_input_t *i = vam->input;
vl_api_ip_address_dump_t *mp;
vl_api_control_ping_t *mp_ping;
@@ -706,8 +719,7 @@ api_ip_address_dump (vat_main_t *vam)
{
if (unformat (i, "sw_if_index %d", &sw_if_index))
sw_if_index_set = 1;
- else if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
sw_if_index_set = 1;
else if (unformat (i, "ipv4"))
ipv4_set = 1;
@@ -1002,6 +1014,24 @@ api_ip_reassembly_enable_disable (vat_main_t *vat)
return -1;
}
+static int
+api_ip_local_reass_enable_disable (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_local_reass_get (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ip_local_reass_get_reply_t_handler (
+ vl_api_ip_local_reass_get_reply_t *mp)
+{
+}
+
static void
vl_api_ip_reassembly_get_reply_t_handler (vl_api_ip_reassembly_get_reply_t *mp)
{
@@ -1010,7 +1040,6 @@ vl_api_ip_reassembly_get_reply_t_handler (vl_api_ip_reassembly_get_reply_t *mp)
int
api_ip_source_and_port_range_check_interface_add_del (vat_main_t *vam)
{
- vnet_main_t *vnm = vnet_get_main ();
unformat_input_t *input = vam->input;
vl_api_ip_source_and_port_range_check_interface_add_del_t *mp;
u32 sw_if_index = ~0;
@@ -1022,8 +1051,7 @@ api_ip_source_and_port_range_check_interface_add_del (vat_main_t *vam)
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
+ if (unformat (input, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
;
else if (unformat (input, "sw_if_index %d", &sw_if_index))
;
@@ -1202,7 +1230,6 @@ api_ip_mroute_dump (vat_main_t *vam)
static int
api_sw_interface_ip6_enable_disable (vat_main_t *vam)
{
- vnet_main_t *vnm = vnet_get_main ();
unformat_input_t *i = vam->input;
vl_api_sw_interface_ip6_enable_disable_t *mp;
u32 sw_if_index;
@@ -1212,7 +1239,7 @@ api_sw_interface_ip6_enable_disable (vat_main_t *vam)
while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (i, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
sw_if_index_set = 1;
else if (unformat (i, "sw_if_index %d", &sw_if_index))
sw_if_index_set = 1;
@@ -1250,6 +1277,12 @@ api_set_ip_flow_hash_v2 (vat_main_t *vat)
}
static int
+api_set_ip_flow_hash_v3 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
api_ip_mroute_add_del (vat_main_t *vam)
{
unformat_input_t *i = vam->input;
@@ -1536,8 +1569,6 @@ vl_api_ip_details_t_handler (vl_api_ip_details_t *mp)
#include <vnet/ip/ip.api_test.c>
-VAT_REGISTER_FEATURE_FUNCTION (vat_ip_plugin_register);
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ip/ip_types.c b/src/vnet/ip/ip_types.c
index 3e5ecebf142..ec80a96f15c 100644
--- a/src/vnet/ip/ip_types.c
+++ b/src/vnet/ip/ip_types.c
@@ -41,14 +41,16 @@ uword
unformat_ip_address (unformat_input_t * input, va_list * args)
{
ip_address_t *a = va_arg (*args, ip_address_t *);
+ ip_address_t tmp, *p_tmp = &tmp;
- clib_memset (a, 0, sizeof (*a));
- if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (a)))
- ip_addr_version (a) = AF_IP4;
- else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (a)))
- ip_addr_version (a) = AF_IP6;
+ clib_memset (p_tmp, 0, sizeof (*p_tmp));
+ if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP4;
+ else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP6;
else
return 0;
+ *a = *p_tmp;
return 1;
}
@@ -288,6 +290,13 @@ ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix)
}
void
+ip_address_to_prefix (const ip_address_t *addr, ip_prefix_t *prefix)
+{
+ prefix->len = (addr->version == AF_IP4 ? 32 : 128);
+ clib_memcpy (&prefix->addr, addr, sizeof (prefix->addr));
+}
+
+void
ip_address_increment (ip_address_t * ip)
{
ip46_address_increment ((ip_addr_version (ip) == AF_IP4 ?
@@ -380,23 +389,24 @@ ip_prefix_copy (void *dst, void *src)
}
int
-ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2)
+ip_prefix_cmp (const ip_prefix_t *ipp1, const ip_prefix_t *ipp2)
{
+ ip_prefix_t p1 = *ipp1, p2 = *ipp2;
int cmp = 0;
- ip_prefix_normalize (p1);
- ip_prefix_normalize (p2);
+ ip_prefix_normalize (&p1);
+ ip_prefix_normalize (&p2);
- cmp = ip_address_cmp (&ip_prefix_addr (p1), &ip_prefix_addr (p2));
+ cmp = ip_address_cmp (&ip_prefix_addr (&p1), &ip_prefix_addr (&p2));
if (cmp == 0)
{
- if (ip_prefix_len (p1) < ip_prefix_len (p2))
+ if (ip_prefix_len (&p1) < ip_prefix_len (&p2))
{
cmp = 1;
}
else
{
- if (ip_prefix_len (p1) > ip_prefix_len (p2))
+ if (ip_prefix_len (&p1) > ip_prefix_len (&p2))
cmp = 2;
}
}
diff --git a/src/vnet/ip/ip_types.h b/src/vnet/ip/ip_types.h
index 83a0f6adc72..f1b387df194 100644
--- a/src/vnet/ip/ip_types.h
+++ b/src/vnet/ip/ip_types.h
@@ -75,13 +75,11 @@ typedef enum ip_feature_location_t_
#define N_IP_FEATURE_LOCATIONS (IP_FEATURE_DROP+1)
-/* *INDENT-OFF* */
typedef struct ip_address
{
ip46_address_t ip;
ip_address_family_t version;
} __clib_packed ip_address_t;
-/* *INDENT-ON* */
#define IP_ADDRESS_V4_ALL_0S {.ip.ip4.as_u32 = 0, .version = AF_IP4}
#define IP_ADDRESS_V6_ALL_0S {.ip.ip6.as_u64 = {0, 0}, .version = AF_IP6}
@@ -112,13 +110,11 @@ extern void ip_address_from_46 (const ip46_address_t * a,
extern void ip_address_increment (ip_address_t * ip);
extern void ip_address_reset (ip_address_t * ip);
-/* *INDENT-OFF* */
typedef struct ip_prefix
{
ip_address_t addr;
u8 len;
} __clib_packed ip_prefix_t;
-/* *INDENT-ON* */
#define ip_prefix_addr(_a) (_a)->addr
#define ip_prefix_version(_a) ip_addr_version(&ip_prefix_addr(_a))
@@ -126,11 +122,13 @@ typedef struct ip_prefix
#define ip_prefix_v4(_a) ip_addr_v4(&ip_prefix_addr(_a))
#define ip_prefix_v6(_a) ip_addr_v6(&ip_prefix_addr(_a))
-extern int ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2);
+extern int ip_prefix_cmp (const ip_prefix_t *p1, const ip_prefix_t *p2);
extern void ip_prefix_normalize (ip_prefix_t * a);
extern void ip_address_to_fib_prefix (const ip_address_t * addr,
fib_prefix_t * prefix);
+extern void ip_address_to_prefix (const ip_address_t *addr,
+ ip_prefix_t *prefix);
extern void ip_prefix_to_fib_prefix (const ip_prefix_t * ipp,
fib_prefix_t * fibp);
extern u8 *format_ip_prefix (u8 * s, va_list * args);
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index 1753ffd9232..c225c222a38 100644
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -128,6 +128,42 @@ format_ip_flow_hash_config (u8 * s, va_list * args)
return s;
}
+uword
+unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args)
+{
+ flow_hash_config_t *flow_hash_config = va_arg (*args, flow_hash_config_t *);
+ uword start_index = unformat_check_input (input);
+ int matched_once = 0;
+
+ if (unformat (input, "default"))
+ {
+ *flow_hash_config = IP_FLOW_HASH_DEFAULT;
+ return 1;
+ }
+ while (!unformat_is_eof (input) &&
+ !is_white_space (unformat_peek_input (input)))
+ {
+ if (unformat (input, "%_,"))
+ ;
+#define _(a, b, c) \
+ else if (unformat (input, "%_" #a)) \
+ { \
+ *flow_hash_config |= c; \
+ matched_once = 1; \
+ }
+ foreach_flow_hash_bit
+#undef _
+ else
+ {
+ /* Roll back to our start */
+ input->index = start_index;
+ return 0;
+ }
+ }
+
+ return matched_once;
+}
+
u8 *
format_ip_adjacency_packet_data (u8 * s, va_list * args)
{
@@ -184,6 +220,27 @@ const ip46_address_t zero_addr = {
0, 0},
};
+bool
+fib_prefix_validate (const fib_prefix_t *prefix)
+{
+ if (FIB_PROTOCOL_IP4 == prefix->fp_proto)
+ {
+ if (prefix->fp_len > 32)
+ {
+ return false;
+ }
+ }
+
+ if (FIB_PROTOCOL_IP6 == prefix->fp_proto)
+ {
+ if (prefix->fp_len > 128)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
static clib_error_t *
vnet_ip_route_cmd (vlib_main_t * vm,
unformat_input_t * main_input, vlib_cli_command_t * cmd)
@@ -304,22 +361,25 @@ vnet_ip_route_cmd (vlib_main_t * vm,
}
else if (0 < vec_len (rpaths))
{
- u32 k, n, incr;
- ip46_address_t dst = prefixs[i].fp_addr;
+ u32 k, n;
f64 t[2];
n = count;
t[0] = vlib_time_now (vm);
- incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) -
- prefixs[i].fp_len);
for (k = 0; k < n; k++)
{
fib_prefix_t rpfx = {
.fp_len = prefixs[i].fp_len,
.fp_proto = prefixs[i].fp_proto,
- .fp_addr = dst,
+ .fp_addr = prefixs[i].fp_addr,
};
+ if (!fib_prefix_validate (&rpfx))
+ {
+ vlib_cli_output (vm, "Invalid prefix len: %d", rpfx.fp_len);
+ continue;
+ }
+
if (is_del)
fib_table_entry_path_remove2 (fib_index,
&rpfx, FIB_SOURCE_CLI, rpaths);
@@ -329,21 +389,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
FIB_SOURCE_CLI,
FIB_ENTRY_FLAG_NONE, rpaths);
- if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto)
- {
- dst.ip4.as_u32 =
- clib_host_to_net_u32 (incr +
- clib_net_to_host_u32 (dst.
- ip4.as_u32));
- }
- else
- {
- int bucket = (incr < 64 ? 0 : 1);
- dst.ip6.as_u64[bucket] =
- clib_host_to_net_u64 (incr +
- clib_net_to_host_u64 (dst.ip6.as_u64
- [bucket]));
- }
+ fib_prefix_increment (&prefixs[i]);
}
t[1] = vlib_time_now (vm);
@@ -399,29 +445,35 @@ vnet_ip_table_cmd (vlib_main_t * vm,
}
}
- if (~0 == table_id)
- {
- error = clib_error_return (0, "No table id");
- goto done;
- }
- else if (0 == table_id)
+ if (0 == table_id)
{
error = clib_error_return (0, "Can't change the default table");
goto done;
}
else
- {
- if (is_add)
- {
- ip_table_create (fproto, table_id, 0, name);
- }
- else
{
- ip_table_delete (fproto, table_id, 0);
+ if (is_add)
+ {
+ if (~0 == table_id)
+ {
+ table_id = ip_table_get_unused_id (fproto);
+ vlib_cli_output (vm, "%u\n", table_id);
+ }
+ ip_table_create (fproto, table_id, 0, name);
+ }
+ else
+ {
+ if (~0 == table_id)
+ {
+ error = clib_error_return (0, "No table id");
+ goto done;
+ }
+ ip_table_delete (fproto, table_id, 0);
+ }
}
- }
done:
+ vec_free (name);
unformat_free (line_input);
return error;
}
@@ -477,13 +529,13 @@ vnet_show_ip_table_cmd (vlib_main_t *vm, unformat_input_t *main_input,
}
fib = fib_table_get (fib_index, fproto);
- vlib_cli_output (vm, "[%3u] table_id:%3u %v", fib->ft_index,
+ vlib_cli_output (vm, "[%u] table_id:%u %v", fib->ft_index,
fib->ft_table_id, fib->ft_desc);
}
else
{
pool_foreach (fib, fibs)
- vlib_cli_output (vm, "[%3u] table_id:%3u %v", fib->ft_index,
+ vlib_cli_output (vm, "[%u] table_id:%u %v", fib->ft_index,
fib->ft_table_id, fib->ft_desc);
}
@@ -505,33 +557,25 @@ vnet_show_ip6_table_cmd (vlib_main_t *vm, unformat_input_t *main_input,
return (vnet_show_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6));
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
.path = "ip",
.short_help = "Internet protocol (IP) commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = {
.path = "ip6",
.short_help = "Internet protocol version 6 (IPv6) commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
.path = "show ip",
.short_help = "Internet protocol (IP) show commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
.path = "show ip6",
.short_help = "Internet protocol version 6 (IPv6) show commands",
};
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 or IPv6 routes. All
@@ -560,37 +604,37 @@ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
* To add a route to a particular FIB table (VRF), use:
* @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_route_command, static) = {
.path = "ip route",
- .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table <table-id>] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]",
+ .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table "
+ "<table-id>] via [next-hop-address] [next-hop-interface] "
+ "[next-hop-table <value>] [weight <value>] [preference "
+ "<value>] [udp-encap <value>] [ip4-lookup-in-table <value>] "
+ "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 "
+ "<interface>] [out-labels <value value value>]",
.function = vnet_ip_route_cmd,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 Tables. All
* Tables must be explicitly added before that can be used. Creating a
* table will add both unicast and multicast FIBs
*
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_table_command, static) = {
.path = "ip table",
.short_help = "ip table [add|del] <table-id>",
.function = vnet_ip4_table_cmd,
};
-/* *INDENT-ON* */
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 Tables. All
* Tables must be explicitly added before that can be used. Creating a
* table will add both unicast and multicast FIBs
*
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_table_command, static) = {
.path = "ip6 table",
.short_help = "ip6 table [add|del] <table-id>",
@@ -638,7 +682,7 @@ ip_table_bind_cmd (vlib_main_t * vm,
goto done;
}
- rv = ip_table_bind (fproto, sw_if_index, table_id, 0);
+ rv = ip_table_bind (fproto, sw_if_index, table_id);
if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv)
{
@@ -695,14 +739,12 @@ ip6_table_bind_cmd (vlib_main_t * vm,
* Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
* @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
{
.path = "set interface ip table",
.function = ip4_table_bind_cmd,
.short_help = "set interface ip table <interface> <table-id>",
};
-/* *INDENT-ON* */
/*?
* Place the indicated interface into the supplied IPv6 FIB table (also known
@@ -723,14 +765,12 @@ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
* Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
* @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) =
{
.path = "set interface ip6 table",
.function = ip6_table_bind_cmd,
.short_help = "set interface ip6 table <interface> <table-id>"
};
-/* *INDENT-ON* */
clib_error_t *
vnet_ip_mroute_cmd (vlib_main_t * vm,
@@ -894,8 +934,8 @@ vnet_ip_mroute_cmd (vlib_main_t * vm,
mfib_table_entry_path_remove (fib_index,
&pfx, MFIB_SOURCE_CLI, rpaths);
else
- mfib_table_entry_path_update (fib_index,
- &pfx, MFIB_SOURCE_CLI, rpaths);
+ mfib_table_entry_path_update (fib_index, &pfx, MFIB_SOURCE_CLI,
+ MFIB_ENTRY_FLAG_NONE, rpaths);
}
if (FIB_PROTOCOL_IP4 == pfx.fp_proto)
@@ -967,7 +1007,6 @@ done:
* @cliexcmd{ip mroute add 232.1.1.1 Signal}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_mroute_command, static) =
{
.path = "ip mroute",
@@ -975,7 +1014,6 @@ VLIB_CLI_COMMAND (ip_mroute_command, static) =
.function = vnet_ip_mroute_cmd,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h
index 48ba468d7c2..8083d974df6 100644
--- a/src/vnet/ip/lookup.h
+++ b/src/vnet/ip/lookup.h
@@ -162,23 +162,22 @@ typedef struct ip_lookup_main_t
} ip_lookup_main_t;
u8 *format_ip_flow_hash_config (u8 * s, va_list * args);
-
+uword unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args);
always_inline void
ip_lookup_set_buffer_fib_index (u32 * fib_index_by_sw_if_index,
vlib_buffer_t * b)
{
- /* *INDENT-OFF* */
vnet_buffer (b)->ip.fib_index =
vec_elt (fib_index_by_sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_RX]);
vnet_buffer (b)->ip.fib_index =
((vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
vnet_buffer (b)->ip.fib_index :
vnet_buffer (b)->sw_if_index[VLIB_TX]);
- /* *INDENT-ON* */
}
void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
+bool fib_prefix_validate (const fib_prefix_t *prefix);
#endif /* included_ip_lookup_h */
/*
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
index fb0cc221950..3c46549634a 100644
--- a/src/vnet/ip/punt.c
+++ b/src/vnet/ip/punt.c
@@ -148,14 +148,31 @@ punt_socket_register_l4 (vlib_main_t * vm,
punt_main_t *pm = &punt_main;
punt_client_t *c;
- /* For now we only support UDP punt */
- if (protocol != IP_PROTOCOL_UDP)
- return clib_error_return (0,
- "only UDP protocol (%d) is supported, got %d",
- IP_PROTOCOL_UDP, protocol);
-
if (port == (u16) ~ 0)
- return clib_error_return (0, "UDP port number required");
+ return clib_error_return (0, "Port number required");
+
+ u32 node_index;
+ switch (protocol)
+ {
+ case IP_PROTOCOL_UDP:
+ node_index = (af == AF_IP4 ? udp4_punt_socket_node.index :
+ udp6_punt_socket_node.index);
+ udp_register_dst_port (vm, port, node_index, af == AF_IP4);
+ break;
+ case IP_PROTOCOL_ICMP6:
+ if (af != AF_IP6)
+ return clib_error_return (
+ 0, "only UDP or ICMP6 protocol (%d, %d) is supported, got %d",
+ IP_PROTOCOL_UDP, IP_PROTOCOL_ICMP6, protocol);
+
+ node_index = icmp6_punt_socket_node.index;
+ icmp6_register_type (vm, port, node_index);
+ break;
+ default:
+ return clib_error_return (
+ 0, "only UDP or ICMP6 protocol (%d) is supported, got %d",
+ IP_PROTOCOL_UDP, protocol);
+ }
c = punt_client_l4_get (af, port);
@@ -165,19 +182,14 @@ punt_socket_register_l4 (vlib_main_t * vm,
punt_client_l4_db_add (af, port, c - pm->punt_client_pool);
}
- memcpy (c->caddr.sun_path, client_pathname, sizeof (c->caddr.sun_path));
+ snprintf (c->caddr.sun_path, sizeof (c->caddr.sun_path), "%s",
+ client_pathname);
c->caddr.sun_family = AF_UNIX;
c->reg.type = PUNT_TYPE_L4;
c->reg.punt.l4.port = port;
c->reg.punt.l4.protocol = protocol;
c->reg.punt.l4.af = af;
- u32 node_index = (af == AF_IP4 ?
- udp4_punt_socket_node.index :
- udp6_punt_socket_node.index);
-
- udp_register_dst_port (vm, port, node_index, af == AF_IP4);
-
return (NULL);
}
@@ -197,7 +209,8 @@ punt_socket_register_ip_proto (vlib_main_t * vm,
punt_client_ip_proto_db_add (af, proto, c - pm->punt_client_pool);
}
- memcpy (c->caddr.sun_path, client_pathname, sizeof (c->caddr.sun_path));
+ snprintf (c->caddr.sun_path, sizeof (c->caddr.sun_path), "%s",
+ client_pathname);
c->caddr.sun_family = AF_UNIX;
c->reg.type = PUNT_TYPE_IP_PROTO;
c->reg.punt.ip_proto.protocol = proto;
@@ -227,7 +240,8 @@ punt_socket_register_exception (vlib_main_t * vm,
punt_client_exception_db_add (reason, pc - pm->punt_client_pool);
}
- memcpy (pc->caddr.sun_path, client_pathname, sizeof (pc->caddr.sun_path));
+ snprintf (pc->caddr.sun_path, sizeof (pc->caddr.sun_path), "%s",
+ client_pathname);
pc->caddr.sun_family = AF_UNIX;
pc->reg.type = PUNT_TYPE_EXCEPTION;
pc->reg.punt.exception.reason = reason;
@@ -369,6 +383,8 @@ punt_l4_add_del (vlib_main_t * vm,
ip_address_family_t af,
ip_protocol_t protocol, u16 port, bool is_add)
{
+ int is_ip4 = af == AF_IP4;
+
/* For now we only support TCP and UDP punt */
if (protocol != IP_PROTOCOL_UDP && protocol != IP_PROTOCOL_TCP)
return clib_error_return (0,
@@ -378,19 +394,22 @@ punt_l4_add_del (vlib_main_t * vm,
if (port == (u16) ~ 0)
{
if (protocol == IP_PROTOCOL_UDP)
- udp_punt_unknown (vm, af == AF_IP4, is_add);
+ udp_punt_unknown (vm, is_ip4, is_add);
else if (protocol == IP_PROTOCOL_TCP)
- tcp_punt_unknown (vm, af == AF_IP4, is_add);
+ tcp_punt_unknown (vm, is_ip4, is_add);
return 0;
}
else if (is_add)
{
+ const vlib_node_registration_t *punt_node =
+ is_ip4 ? &udp4_punt_node : &udp6_punt_node;
+
if (protocol == IP_PROTOCOL_TCP)
return clib_error_return (0, "punt TCP ports is not supported yet");
- udp_register_dst_port (vm, port, udp4_punt_node.index, af == AF_IP4);
+ udp_register_dst_port (vm, port, punt_node->index, is_ip4);
return 0;
}
@@ -399,7 +418,7 @@ punt_l4_add_del (vlib_main_t * vm,
if (protocol == IP_PROTOCOL_TCP)
return clib_error_return (0, "punt TCP ports is not supported yet");
- udp_unregister_dst_port (vm, port, af == AF_IP4);
+ udp_unregister_dst_port (vm, port, is_ip4);
return 0;
}
@@ -455,7 +474,6 @@ punt_cli (vlib_main_t * vm,
unformat_input_t line_input, *input = &line_input;
clib_error_t *error = NULL;
bool is_add = true;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -467,7 +485,6 @@ punt_cli (vlib_main_t * vm,
.type = PUNT_TYPE_L4,
};
u32 port;
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -533,13 +550,11 @@ done:
* @cliexcmd{set punt udp del all}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_command, static) = {
.path = "set punt",
.short_help = "set punt [IPV4|ip6|ipv6] [UDP|tcp] [del] [ALL|<port-num>]",
.function = punt_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_socket_register_cmd (vlib_main_t * vm,
@@ -549,7 +564,6 @@ punt_socket_register_cmd (vlib_main_t * vm,
unformat_input_t line_input, *input = &line_input;
u8 *socket_name = 0;
clib_error_t *error = NULL;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -560,7 +574,6 @@ punt_socket_register_cmd (vlib_main_t * vm,
},
.type = PUNT_TYPE_L4,
};
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -608,7 +621,6 @@ done:
* @cliexcmd{punt socket register socket punt_l4_foo.sock}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_register_command, static) =
{
.path = "punt socket register",
@@ -616,7 +628,6 @@ VLIB_CLI_COMMAND (punt_socket_register_command, static) =
.short_help = "punt socket register [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>] socket <socket>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_socket_deregister_cmd (vlib_main_t * vm,
@@ -625,7 +636,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm,
{
unformat_input_t line_input, *input = &line_input;
clib_error_t *error = NULL;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -636,7 +646,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm,
},
.type = PUNT_TYPE_L4,
};
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -677,7 +686,6 @@ done:
* @cliexpar
* @cliexcmd{punt socket register}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_deregister_command, static) =
{
.path = "punt socket deregister",
@@ -685,7 +693,6 @@ VLIB_CLI_COMMAND (punt_socket_deregister_command, static) =
.short_help = "punt socket deregister [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
void
punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx)
@@ -698,24 +705,20 @@ punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx)
{
u32 pci, key;
- /* *INDENT-OFF* */
hash_foreach(key, pci, pm->db.clients_by_l4_port,
({
cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx);
}));
- /* *INDENT-ON* */
break;
}
case PUNT_TYPE_IP_PROTO:
{
u32 pci, key;
- /* *INDENT-OFF* */
hash_foreach(key, pci, pm->db.clients_by_ip_proto,
({
cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx);
}));
- /* *INDENT-ON* */
break;
}
case PUNT_TYPE_EXCEPTION:
@@ -813,7 +816,6 @@ done:
* @cliexpar
* @cliexcmd{show punt socket ipv4}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) =
{
.path = "show punt socket registrations",
@@ -821,7 +823,6 @@ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) =
.short_help = "show punt socket registrations [l4|exception]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
ip_punt_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h
index a2612d60f07..e8495caad61 100644
--- a/src/vnet/ip/punt.h
+++ b/src/vnet/ip/punt.h
@@ -20,7 +20,12 @@
#ifndef included_punt_h
#define included_punt_h
+#ifdef __linux__
#include <linux/un.h>
+#elif __FreeBSD__
+#include <sys/un.h>
+#define UNIX_PATH_MAX SUNPATHLEN
+#endif /* __linux__ */
#include <stdbool.h>
#include <vnet/ip/ip.h>
@@ -239,6 +244,7 @@ extern vlib_node_registration_t udp4_punt_node;
extern vlib_node_registration_t udp6_punt_node;
extern vlib_node_registration_t udp4_punt_socket_node;
extern vlib_node_registration_t udp6_punt_socket_node;
+extern vlib_node_registration_t icmp6_punt_socket_node;
extern vlib_node_registration_t ip4_proto_punt_socket_node;
extern vlib_node_registration_t ip6_proto_punt_socket_node;
extern vlib_node_registration_t punt_socket_rx_node;
diff --git a/src/vnet/ip/punt_api.c b/src/vnet/ip/punt_api.c
index bcbf939f69d..20297af2e75 100644
--- a/src/vnet/ip/punt_api.c
+++ b/src/vnet/ip/punt_api.c
@@ -224,12 +224,10 @@ vl_api_punt_socket_register_t_handler (vl_api_punt_socket_register_t * mp)
char *p = vnet_punt_get_server_pathname ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_PUNT_SOCKET_REGISTER_REPLY,
({
memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname));
}));
- /* *INDENT-ON* */
}
typedef struct punt_socket_send_ctx_t_
diff --git a/src/vnet/ip/punt_node.c b/src/vnet/ip/punt_node.c
index 7f9beef0ffe..6400e49c626 100644
--- a/src/vnet/ip/punt_node.c
+++ b/src/vnet/ip/punt_node.c
@@ -23,6 +23,7 @@
*/
#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
#include <vlib/vlib.h>
#include <vnet/ip/punt.h>
#include <vlib/unix/unix.h>
@@ -182,7 +183,6 @@ VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm,
return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_node) = {
.name = "ip4-udp-punt",
/* Takes a vector of packets. */
@@ -214,7 +214,6 @@ VLIB_REGISTER_NODE (udp6_punt_node) = {
#undef _
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -243,10 +242,9 @@ format_udp_punt_trace (u8 * s, va_list * args)
}
always_inline uword
-punt_socket_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- punt_type_t pt, ip_address_family_t af)
+punt_socket_inline2 (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, punt_type_t pt,
+ ip_address_family_t af, ip_protocol_t protocol)
{
u32 *buffers = vlib_frame_vector_args (frame);
u32 thread_index = vm->thread_index;
@@ -266,33 +264,42 @@ punt_socket_inline (vlib_main_t * vm,
uword l;
punt_packetdesc_t packetdesc;
punt_client_t *c;
-
+ u16 port = 0;
b = vlib_get_buffer (vm, buffers[i]);
if (PUNT_TYPE_L4 == pt)
{
- /* Reverse UDP Punt advance */
- udp_header_t *udp;
- if (AF_IP4 == af)
+ if (protocol == IP_PROTOCOL_UDP)
{
- vlib_buffer_advance (b, -(sizeof (ip4_header_t) +
- sizeof (udp_header_t)));
- ip4_header_t *ip = vlib_buffer_get_current (b);
- udp = (udp_header_t *) (ip + 1);
+ /* Reverse UDP Punt advance */
+ udp_header_t *udp;
+ if (AF_IP4 == af)
+ {
+ vlib_buffer_advance (
+ b, -(sizeof (ip4_header_t) + sizeof (udp_header_t)));
+ ip4_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ else
+ {
+ vlib_buffer_advance (
+ b, -(sizeof (ip6_header_t) + sizeof (udp_header_t)));
+ ip6_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ port = clib_net_to_host_u16 (udp->dst_port);
}
- else
+ else if (protocol == IP_PROTOCOL_ICMP6)
{
- vlib_buffer_advance (b, -(sizeof (ip6_header_t) +
- sizeof (udp_header_t)));
ip6_header_t *ip = vlib_buffer_get_current (b);
- udp = (udp_header_t *) (ip + 1);
+ icmp46_header_t *icmp = ip6_next_header (ip);
+ port = icmp->type;
}
-
/*
* Find registerered client
* If no registered client, drop packet and count
*/
- c = punt_client_l4_get (af, clib_net_to_host_u16 (udp->dst_port));
+ c = punt_client_l4_get (af, port);
}
else if (PUNT_TYPE_IP_PROTO == pt)
{
@@ -339,7 +346,7 @@ punt_socket_inline (vlib_main_t * vm,
iov->iov_len = sizeof (packetdesc);
/** VLIB buffer chain -> Unix iovec(s). */
- vlib_buffer_advance (b, -(sizeof (ethernet_header_t)));
+ vlib_buffer_advance (b, -ethernet_buffer_header_size (b));
vec_add2 (ptd->iovecs, iov, 1);
iov->iov_base = b->data + b->current_data;
iov->iov_len = l = b->current_length;
@@ -396,6 +403,14 @@ error:
return n_packets;
}
+always_inline uword
+punt_socket_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, punt_type_t pt,
+ ip_address_family_t af)
+{
+ return punt_socket_inline2 (vm, node, frame, pt, af, IP_PROTOCOL_UDP);
+}
+
static uword
udp4_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
@@ -427,6 +442,14 @@ ip6_proto_punt_socket (vlib_main_t * vm,
}
static uword
+icmp6_punt_socket (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *from_frame)
+{
+ return punt_socket_inline2 (vm, node, from_frame, PUNT_TYPE_L4, AF_IP6,
+ IP_PROTOCOL_ICMP6);
+}
+
+static uword
exception_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
@@ -435,7 +458,6 @@ exception_punt_socket (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_socket_node) = {
.function = udp4_punt_socket,
.name = "ip4-udp-punt-socket",
@@ -483,7 +505,16 @@ VLIB_REGISTER_NODE (exception_punt_socket_node) = {
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
};
-/* *INDENT-ON* */
+VLIB_REGISTER_NODE (icmp6_punt_socket_node) = {
+ .function = icmp6_punt_socket,
+ .name = "ip6-icmp-punt-socket",
+ .format_trace = format_udp_punt_trace,
+ .flags = VLIB_NODE_FLAG_IS_DROP,
+ .vector_size = sizeof (u32),
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+};
+
typedef struct
{
@@ -614,7 +645,6 @@ punt_socket_rx (vlib_main_t * vm,
return total_count;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_socket_rx_node) =
{
.function = punt_socket_rx,
@@ -633,7 +663,6 @@ VLIB_REGISTER_NODE (punt_socket_rx_node) =
},
.format_trace = format_punt_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c
index d2069c0876c..bab7d479dcf 100644
--- a/src/vnet/ip/reass/ip4_full_reass.c
+++ b/src/vnet/ip/reass/ip4_full_reass.c
@@ -23,16 +23,21 @@
#include <vppinfra/vec.h>
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/ip/ip.api_enum.h>
#include <vppinfra/fifo.h>
#include <vppinfra/bihash_16_8.h>
#include <vnet/ip/reass/ip4_full_reass.h>
#include <stddef.h>
#define MSEC_PER_SEC 1000
-#define IP4_REASS_TIMEOUT_DEFAULT_MS 100
-#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
+#define IP4_REASS_TIMEOUT_DEFAULT_MS 200
+
+/* As there are only 1024 reass context per thread, either the DDOS attacks
+ * or fractions of real timeouts, would consume these contexts quickly and
+ * running out context space and unable to perform reassembly */
+#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default
#define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024
-#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
+#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
#define IP4_REASS_HT_LOAD_FACTOR (0.75)
#define IP4_REASS_DEBUG_BUFFERS 0
@@ -68,21 +73,19 @@ typedef enum
typedef struct
{
- union
+ struct
{
- struct
- {
- u32 xx_id;
- ip4_address_t src;
- ip4_address_t dst;
- u16 frag_id;
- u8 proto;
- u8 unused;
- };
- u64 as_u64[2];
+ u16 frag_id;
+ u8 proto;
+ u8 unused;
+ u32 fib_index;
+ ip4_address_t src;
+ ip4_address_t dst;
};
} ip4_full_reass_key_t;
+STATIC_ASSERT_SIZEOF (ip4_full_reass_key_t, 16);
+
typedef union
{
struct
@@ -155,6 +158,8 @@ typedef struct
ip4_full_reass_t *pool;
u32 reass_n;
u32 id_counter;
+ // for pacing the main thread timeouts
+ u32 last_id;
clib_spinlock_t lock;
} ip4_full_reass_per_thread_t;
@@ -177,17 +182,19 @@ typedef struct
// convenience
vlib_main_t *vlib_main;
- // node index of ip4-drop node
- u32 ip4_drop_idx;
u32 ip4_full_reass_expire_node_idx;
/** Worker handoff */
u32 fq_index;
+ u32 fq_local_index;
u32 fq_feature_index;
u32 fq_custom_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
+
+ // whether local fragmented packets are reassembled or not
+ int is_local_reass_enabled;
} ip4_full_reass_main_t;
extern ip4_full_reass_main_t ip4_full_reass_main;
@@ -219,6 +226,7 @@ typedef enum
RANGE_OVERLAP,
FINALIZE,
HANDOFF,
+ PASSTHROUGH,
} ip4_full_reass_trace_operation_e;
typedef struct
@@ -329,6 +337,9 @@ format_ip4_full_reass_trace (u8 * s, va_list * args)
format (s, "handoff from thread #%u to thread #%u", t->thread_id,
t->thread_id_to);
break;
+ case PASSTHROUGH:
+ s = format (s, "passthrough - not a fragment");
+ break;
}
return s;
}
@@ -404,13 +415,16 @@ ip4_full_reass_free (ip4_full_reass_main_t * rm,
ip4_full_reass_per_thread_t * rt,
ip4_full_reass_t * reass)
{
- clib_bihash_kv_16_8_t kv;
- kv.key[0] = reass->key.as_u64[0];
- kv.key[1] = reass->key.as_u64[1];
+ clib_bihash_kv_16_8_t kv = {};
+ clib_memcpy_fast (&kv, &reass->key, sizeof (kv.key));
clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
return ip4_full_reass_free_ctx (rt, reass);
}
+/* n_left_to_next, and to_next are taken as input params, as this function
+ * could be called from a graphnode, where its managing local copy of these
+ * variables, and ignoring those and still trying to enqueue the buffers
+ * with local variables would cause either buffer leak or corruption */
always_inline void
ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
ip4_full_reass_t *reass)
@@ -419,58 +433,103 @@ ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *range_b;
vnet_buffer_opaque_t *range_vnb;
u32 *to_free = NULL;
+
while (~0 != range_bi)
{
range_b = vlib_get_buffer (vm, range_bi);
range_vnb = vnet_buffer (range_b);
- u32 bi = range_bi;
- while (~0 != bi)
+
+ if (~0 != range_bi)
{
- vec_add1 (to_free, bi);
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- bi = b->next_buffer;
- b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
- }
- else
- {
- bi = ~0;
- }
+ vec_add1 (to_free, range_bi);
}
+
range_bi = range_vnb->ip.reass.next_range_bi;
}
+
/* send to next_error_index */
- if (~0 != reass->error_next_index)
+ if (~0 != reass->error_next_index &&
+ reass->error_next_index < node->n_next_nodes)
{
- u32 n_left_to_next, *to_next, next_index;
+ u32 n_free = vec_len (to_free);
+
+ /* record number of packets sent to custom app */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TO_CUSTOM_APP, n_free);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ for (u32 i = 0; i < n_free; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, to_free[i]);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ ip4_full_reass_add_trace (vm, node, reass, to_free[i],
+ RANGE_DISCARD, 0, ~0);
+ }
- next_index = reass->error_next_index;
- u32 bi = ~0;
+ vlib_buffer_enqueue_to_single_next (vm, node, to_free,
+ reass->error_next_index, n_free);
+ }
+ else
+ {
+ vlib_buffer_free (vm, to_free, vec_len (to_free));
+ }
+ vec_free (to_free);
+}
- while (vec_len (to_free) > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+always_inline void
+sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip4_full_reass_t *reass,
+ u32 *bi0)
+{
+ u32 range_bi = reass->first_bi;
+ vlib_buffer_t *range_b;
+ vnet_buffer_opaque_t *range_vnb;
- while (vec_len (to_free) > 0 && n_left_to_next > 0)
+ while (~0 != range_bi)
+ {
+ range_b = vlib_get_buffer (vm, range_bi);
+ range_vnb = vnet_buffer (range_b);
+ u32 bi = range_bi;
+ if (~0 != bi)
+ {
+ if (bi == *bi0)
+ *bi0 = ~0;
+ if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT)
{
- bi = vec_pop (to_free);
-
- if (~0 != bi)
+ u32 _bi = bi;
+ vlib_buffer_t *_b = vlib_get_buffer (vm, _bi);
+ while (_b->flags & VLIB_BUFFER_NEXT_PRESENT)
{
- to_next[0] = bi;
- to_next += 1;
- n_left_to_next -= 1;
+ if (_b->next_buffer != range_vnb->ip.reass.next_range_bi)
+ {
+ _bi = _b->next_buffer;
+ _b = vlib_get_buffer (vm, _bi);
+ }
+ else
+ {
+ _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ break;
+ }
}
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ range_bi = range_vnb->ip.reass.next_range_bi;
}
}
- else
+ if (*bi0 != ~0)
{
- vlib_buffer_free (vm, to_free, vec_len (to_free));
+ vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
+ if (~0 != reass->first_bi)
+ {
+ fvnb->ip.reass.next_range_bi = reass->first_bi;
+ reass->first_bi = *bi0;
+ }
+ else
+ {
+ reass->first_bi = *bi0;
+ fvnb->ip.reass.next_range_bi = ~0;
+ }
+ *bi0 = ~0;
}
- vec_free (to_free);
}
always_inline void
@@ -484,10 +543,10 @@ ip4_full_reass_init (ip4_full_reass_t * reass)
}
always_inline ip4_full_reass_t *
-ip4_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip4_full_reass_main_t * rm,
- ip4_full_reass_per_thread_t * rt,
- ip4_full_reass_kv_t * kv, u8 * do_handoff)
+ip4_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_full_reass_main_t *rm,
+ ip4_full_reass_per_thread_t *rt,
+ ip4_full_reass_kv_t *kv, u8 *do_handoff)
{
ip4_full_reass_t *reass;
f64 now;
@@ -510,6 +569,8 @@ again:
if (now > reass->last_heard + rm->timeout)
{
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TIMEOUT, 1);
ip4_full_reass_drop_all (vm, node, reass);
ip4_full_reass_free (rm, rt, reass);
reass = NULL;
@@ -538,8 +599,7 @@ again:
++rt->reass_n;
}
- reass->key.as_u64[0] = kv->kv.key[0];
- reass->key.as_u64[1] = kv->kv.key[1];
+ clib_memcpy_fast (&reass->key, &kv->kv.key, sizeof (reass->key));
kv->v.reass_index = (reass - rt->pool);
kv->v.memory_owner_thread_index = vm->thread_index;
reass->last_heard = now;
@@ -568,7 +628,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *last_b = NULL;
u32 sub_chain_bi = reass->first_bi;
u32 total_length = 0;
- u32 buf_cnt = 0;
do
{
u32 tmp_bi = sub_chain_bi;
@@ -605,7 +664,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
while (1)
{
- ++buf_cnt;
if (trim_front)
{
if (trim_front > tmp->current_length)
@@ -755,6 +813,16 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
*next0 = reass->next_index;
}
vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
+
+ /* Keep track of number of successfully reassembled packets and number of
+ * fragments reassembled */
+ vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_SUCCESS,
+ 1);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_FRAGMENTS_REASSEMBLED,
+ reass->fragments_n);
+
*error0 = IP4_ERROR_NONE;
ip4_full_reass_free (rm, rt, reass);
reass = NULL;
@@ -1090,199 +1158,216 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
}
always_inline uword
-ip4_full_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, ip4_full_reass_node_type_t type)
+ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, ip4_full_reass_node_type_t type,
+ bool is_local)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left, n_next = 0, to_next[VLIB_FRAME_SIZE];
ip4_full_reass_main_t *rm = &ip4_full_reass_main;
ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u16 nexts[VLIB_FRAME_SIZE];
+
clib_spinlock_lock (&rt->lock);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
- while (n_left_from > 0)
+ n_left = frame->n_vectors;
+ while (n_left > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- u32 error0 = IP4_ERROR_NONE;
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 error0 = IP4_ERROR_NONE;
- bi0 = from[0];
- b0 = vlib_get_buffer (vm, bi0);
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
- ip4_header_t *ip0 = vlib_buffer_get_current (b0);
- if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
+ ip4_header_t *ip0 = vlib_buffer_get_current (b0);
+ if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
+ {
+ // this is a whole packet - no fragmentation
+ if (CUSTOM != type)
{
- // this is a whole packet - no fragmentation
- if (CUSTOM != type)
- {
- next0 = IP4_FULL_REASS_NEXT_INPUT;
- }
- else
- {
- next0 = vnet_buffer (b0)->ip.reass.next_index;
- }
- goto packet_enqueue;
+ next0 = IP4_FULL_REASS_NEXT_INPUT;
}
- const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
- const u32 fragment_length =
- clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
- const u32 fragment_last = fragment_first + fragment_length - 1;
- if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
+ else
{
- next0 = IP4_FULL_REASS_NEXT_DROP;
- error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
- goto packet_enqueue;
+ next0 = vnet_buffer (b0)->ip.reass.next_index;
}
- ip4_full_reass_kv_t kv;
- u8 do_handoff = 0;
-
- kv.k.as_u64[0] =
- (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
- (u64) ip0->src_address.as_u32 << 32;
- kv.k.as_u64[1] =
- (u64) ip0->dst_address.
- as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
-
- ip4_full_reass_t *reass =
- ip4_full_reass_find_or_create (vm, node, rm, rt, &kv,
- &do_handoff);
-
- if (reass)
+ ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0, ~0);
+ goto packet_enqueue;
+ }
+
+ if (is_local && !rm->is_local_reass_enabled)
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ goto packet_enqueue;
+ }
+
+ const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
+ const u32 fragment_length =
+ clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
+ const u32 fragment_last = fragment_first + fragment_length - 1;
+
+ /* Keep track of received fragments */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_FRAGMENTS_RCVD, 1);
+
+ if (fragment_first > fragment_last ||
+ fragment_first + fragment_length > UINT16_MAX - 20 ||
+ (fragment_length < 8 && // 8 is minimum frag length per RFC 791
+ ip4_get_fragment_more (ip0)))
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
+ goto packet_enqueue;
+ }
+
+ u32 fib_index = (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ?
+ vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) :
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ ip4_full_reass_kv_t kv = { .k.fib_index = fib_index,
+ .k.src.as_u32 = ip0->src_address.as_u32,
+ .k.dst.as_u32 = ip0->dst_address.as_u32,
+ .k.frag_id = ip0->fragment_id,
+ .k.proto = ip0->protocol
+
+ };
+ u8 do_handoff = 0;
+
+ ip4_full_reass_t *reass =
+ ip4_full_reass_find_or_create (vm, node, rm, rt, &kv, &do_handoff);
+
+ if (reass)
+ {
+ const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
+ if (0 == fragment_first)
{
- const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
- if (0 == fragment_first)
- {
- reass->sendout_thread_index = vm->thread_index;
- }
+ reass->sendout_thread_index = vm->thread_index;
}
+ }
- if (PREDICT_FALSE (do_handoff))
+ if (PREDICT_FALSE (do_handoff))
+ {
+ next0 = IP4_FULL_REASS_NEXT_HANDOFF;
+ vnet_buffer (b0)->ip.reass.owner_thread_index =
+ kv.v.memory_owner_thread_index;
+ }
+ else if (reass)
+ {
+ u32 handoff_thread_idx;
+ u32 counter = ~0;
+ switch (ip4_full_reass_update (vm, node, rm, rt, reass, &bi0, &next0,
+ &error0, CUSTOM == type,
+ &handoff_thread_idx))
{
+ case IP4_REASS_RC_OK:
+ /* nothing to do here */
+ break;
+ case IP4_REASS_RC_HANDOFF:
next0 = IP4_FULL_REASS_NEXT_HANDOFF;
+ b0 = vlib_get_buffer (vm, bi0);
vnet_buffer (b0)->ip.reass.owner_thread_index =
- kv.v.memory_owner_thread_index;
- }
- else if (reass)
- {
- u32 handoff_thread_idx;
- switch (ip4_full_reass_update
- (vm, node, rm, rt, reass, &bi0, &next0,
- &error0, CUSTOM == type, &handoff_thread_idx))
- {
- case IP4_REASS_RC_OK:
- /* nothing to do here */
- break;
- case IP4_REASS_RC_HANDOFF:
- next0 = IP4_FULL_REASS_NEXT_HANDOFF;
- b0 = vlib_get_buffer (vm, bi0);
- vnet_buffer (b0)->ip.reass.owner_thread_index =
- handoff_thread_idx;
- break;
- case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip4_full_reass_drop_all (vm, node, reass);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- break;
- case IP4_REASS_RC_NO_BUF:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_NO_BUF, 1);
- ip4_full_reass_drop_all (vm, node, reass);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- break;
- case IP4_REASS_RC_INTERNAL_ERROR:
- /* drop everything and start with a clean slate */
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_INTERNAL_ERROR,
- 1);
- ip4_full_reass_drop_all (vm, node, reass);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- break;
- }
+ handoff_thread_idx;
+ break;
+ case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
+ counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
+ break;
+ case IP4_REASS_RC_NO_BUF:
+ counter = IP4_ERROR_REASS_NO_BUF;
+ break;
+ case IP4_REASS_RC_INTERNAL_ERROR:
+ counter = IP4_ERROR_REASS_INTERNAL_ERROR;
+ /* Sanitization is needed in internal error cases only, as
+ * the incoming packet is already dropped in other cases,
+ * also adding bi0 back to the reassembly list, fixes the
+ * leaking of buffers during internal errors.
+ *
+ * Also it doesnt make sense to send these buffers custom
+ * app, these fragments are with internal errors */
+ sanitize_reass_buffers_add_missing (vm, reass, &bi0);
+ reass->error_next_index = ~0;
+ break;
}
- else
+
+ if (~0 != counter)
{
- next0 = IP4_FULL_REASS_NEXT_DROP;
- error0 = IP4_ERROR_REASS_LIMIT_REACHED;
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
+ ip4_full_reass_drop_all (vm, node, reass);
+ ip4_full_reass_free (rm, rt, reass);
+ goto next_packet;
}
+ }
+ else
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ error0 = IP4_ERROR_REASS_LIMIT_REACHED;
+ }
+ packet_enqueue:
- packet_enqueue:
-
- if (bi0 != ~0)
+ if (bi0 != ~0)
+ {
+ /* bi0 might have been updated by reass_finalize, reload */
+ b0 = vlib_get_buffer (vm, bi0);
+ if (IP4_ERROR_NONE != error0)
{
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
+ b0->error = node->errors[error0];
+ }
- /* bi0 might have been updated by reass_finalize, reload */
- b0 = vlib_get_buffer (vm, bi0);
- if (IP4_ERROR_NONE != error0)
+ if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- b0->error = node->errors[error0];
+ ip4_full_reass_add_trace (
+ vm, node, NULL, bi0, HANDOFF, 0,
+ vnet_buffer (b0)->ip.reass.owner_thread_index);
}
+ }
+ else if (FEATURE == type && IP4_ERROR_NONE == error0)
+ {
+ vnet_feature_next (&next0, b0);
+ }
- if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
- {
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- ip4_full_reass_add_trace (
- vm, node, NULL, bi0, HANDOFF, 0,
- vnet_buffer (b0)->ip.reass.owner_thread_index);
- }
- }
- else if (FEATURE == type && IP4_ERROR_NONE == error0)
- {
- vnet_feature_next (&next0, b0);
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
+ /* Increment the counter to-custom-app also as this fragment is
+ * also going to application */
+ if (CUSTOM == type)
+ {
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TO_CUSTOM_APP, 1);
}
- next_packet:
- from += 1;
- n_left_from -= 1;
+ to_next[n_next] = bi0;
+ nexts[n_next] = next0;
+ n_next++;
+ IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ next_packet:
+ from += 1;
+ n_left -= 1;
}
clib_spinlock_unlock (&rt->lock);
+
+ vlib_buffer_enqueue_to_next (vm, node, to_next, nexts, n_next);
return frame->n_vectors;
}
-static char *ip4_full_reass_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip4_error
-#undef _
-};
-
VLIB_NODE_FN (ip4_full_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_inline (vm, node, frame, NORMAL);
+ return ip4_full_reass_inline (vm, node, frame, NORMAL, false /* is_local */);
}
VLIB_REGISTER_NODE (ip4_full_reass_node) = {
.name = "ip4-full-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_FULL_REASS_N_NEXT,
.next_nodes =
{
@@ -1293,19 +1378,42 @@ VLIB_REGISTER_NODE (ip4_full_reass_node) = {
},
};
+VLIB_NODE_FN (ip4_local_full_reass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_full_reass_inline (vm, node, frame, NORMAL, true /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip4_local_full_reass_node) = {
+ .name = "ip4-local-full-reassembly",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_full_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
+ .n_next_nodes = IP4_FULL_REASS_N_NEXT,
+ .next_nodes =
+ {
+ [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
+ [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
+ [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-local-full-reassembly-handoff",
+
+ },
+};
+
VLIB_NODE_FN (ip4_full_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_inline (vm, node, frame, FEATURE);
+ return ip4_full_reass_inline (vm, node, frame, FEATURE,
+ false /* is_local */);
}
VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = {
.name = "ip4-full-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_FULL_REASS_N_NEXT,
.next_nodes =
{
@@ -1316,26 +1424,26 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = {
};
VNET_FEATURE_INIT (ip4_full_reass_feature, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "ip4-full-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup",
- "ipsec4-input-feature"),
- .runs_after = 0,
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-full-reassembly-feature",
+ .runs_before = VNET_FEATURES ("ip4-lookup", "ipsec4-input-feature",
+ "ip4-sv-reassembly-feature"),
+ .runs_after = 0,
};
VLIB_NODE_FN (ip4_full_reass_node_custom) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_inline (vm, node, frame, CUSTOM);
+ return ip4_full_reass_inline (vm, node, frame, CUSTOM, false /* is_local */);
}
VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = {
.name = "ip4-full-reassembly-custom",
.vector_size = sizeof (u32),
.format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_FULL_REASS_N_NEXT,
.next_nodes =
{
@@ -1345,15 +1453,6 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = {
},
};
-VNET_FEATURE_INIT (ip4_full_reass_custom, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "ip4-full-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup",
- "ipsec4-input-feature"),
- .runs_after = 0,
-};
-
-
#ifndef CLIB_MARCH_VARIANT
uword
ip4_full_reass_custom_register_next_node (uword node_index)
@@ -1369,7 +1468,9 @@ ip4_full_reass_get_nbuckets ()
u32 nbuckets;
u8 i;
- nbuckets = (u32) (rm->max_reass_n / IP4_REASS_HT_LOAD_FACTOR);
+ /* need more mem with more workers */
+ nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) /
+ IP4_REASS_HT_LOAD_FACTOR);
for (i = 0; i < 31; i++)
if ((1 << i) >= nbuckets)
@@ -1495,17 +1596,17 @@ ip4_full_reass_init_function (vlib_main_t * vm)
nbuckets = ip4_full_reass_get_nbuckets ();
clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
- ASSERT (node);
- rm->ip4_drop_idx = node->index;
-
rm->fq_index = vlib_frame_queue_main_init (ip4_full_reass_node.index, 0);
+ rm->fq_local_index =
+ vlib_frame_queue_main_init (ip4_local_full_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip4_full_reass_node_feature.index, 0);
rm->fq_custom_index =
vlib_frame_queue_main_init (ip4_full_reass_node_custom.index, 0);
rm->feature_use_refcount_per_intf = NULL;
+ rm->is_local_reass_enabled = 1;
+
return error;
}
@@ -1547,6 +1648,7 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
uword thread_index = 0;
int index;
const uword nthreads = vlib_num_workers () + 1;
+
for (thread_index = 0; thread_index < nthreads; ++thread_index)
{
ip4_full_reass_per_thread_t *rt =
@@ -1554,13 +1656,39 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- pool_foreach_index (index, rt->pool) {
- reass = pool_elt_at_index (rt->pool, index);
- if (now > reass->last_heard + rm->timeout)
- {
- vec_add1 (pool_indexes_to_free, index);
- }
- }
+
+ /* Pace the number of timeouts handled per thread,to avoid barrier
+ * sync issues in real world scenarios */
+
+ u32 beg = rt->last_id;
+ /* to ensure we walk at least once per sec per context */
+ u32 end =
+ beg + (IP4_REASS_MAX_REASSEMBLIES_DEFAULT *
+ IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS / MSEC_PER_SEC +
+ 1);
+ if (end > vec_len (rt->pool))
+ {
+ end = vec_len (rt->pool);
+ rt->last_id = 0;
+ }
+ else
+ {
+ rt->last_id = end;
+ }
+
+ pool_foreach_stepping_index (index, beg, end, rt->pool)
+ {
+ reass = pool_elt_at_index (rt->pool, index);
+ if (now > reass->last_heard + rm->timeout)
+ {
+ vec_add1 (pool_indexes_to_free, index);
+ }
+ }
+
+ if (vec_len (pool_indexes_to_free))
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TIMEOUT,
+ vec_len (pool_indexes_to_free));
int *i;
vec_foreach (i, pool_indexes_to_free)
{
@@ -1575,7 +1703,7 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
vec_free (pool_indexes_to_free);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
@@ -1583,13 +1711,12 @@ ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
}
VLIB_REGISTER_NODE (ip4_full_reass_expire_node) = {
- .function = ip4_full_reass_walk_expired,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip4-full-reassembly-expire-walk",
- .format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
-
+ .function = ip4_full_reass_walk_expired,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip4-full-reassembly-expire-walk",
+ .format_trace = format_ip4_full_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
static u8 *
@@ -1597,9 +1724,8 @@ format_ip4_full_reass_key (u8 * s, va_list * args)
{
ip4_full_reass_key_t *key = va_arg (*args, ip4_full_reass_key_t *);
s =
- format (s,
- "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
- key->xx_id, format_ip4_address, &key->src, format_ip4_address,
+ format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
+ key->fib_index, format_ip4_address, &key->src, format_ip4_address,
&key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
return s;
}
@@ -1750,10 +1876,10 @@ format_ip4_full_reass_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip4_full_reass_handoff_node_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- ip4_full_reass_node_type_t type)
+ip4_full_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame,
+ ip4_full_reass_node_type_t type,
+ bool is_local)
{
ip4_full_reass_main_t *rm = &ip4_full_reass_main;
@@ -1772,7 +1898,14 @@ ip4_full_reass_handoff_node_inline (vlib_main_t * vm,
switch (type)
{
case NORMAL:
- fq_index = rm->fq_index;
+ if (is_local)
+ {
+ fq_index = rm->fq_local_index;
+ }
+ else
+ {
+ fq_index = rm->fq_index;
+ }
break;
case FEATURE:
fq_index = rm->fq_feature_index;
@@ -1782,7 +1915,6 @@ ip4_full_reass_handoff_node_inline (vlib_main_t * vm,
break;
default:
clib_warning ("Unexpected `type' (%d)!", type);
- ASSERT (0);
}
while (n_left_from > 0)
@@ -1816,7 +1948,8 @@ VLIB_NODE_FN (ip4_full_reass_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL);
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
+ false /* is_local */);
}
@@ -1834,16 +1967,36 @@ VLIB_REGISTER_NODE (ip4_full_reass_handoff_node) = {
},
};
+VLIB_NODE_FN (ip4_local_full_reass_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
+ true /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip4_local_full_reass_handoff_node) = {
+ .name = "ip4-local-full-reassembly-handoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
+ .error_strings = ip4_full_reass_handoff_error_strings,
+ .format_trace = format_ip4_full_reass_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
VLIB_NODE_FN (ip4_full_reass_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t *
node,
vlib_frame_t * frame)
{
- return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE);
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE,
+ false /* is_local */);
}
-
VLIB_REGISTER_NODE (ip4_full_reass_feature_handoff_node) = {
.name = "ip4-full-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1863,10 +2016,10 @@ VLIB_NODE_FN (ip4_full_reass_custom_handoff_node) (vlib_main_t * vm,
node,
vlib_frame_t * frame)
{
- return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM);
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM,
+ false /* is_local */);
}
-
VLIB_REGISTER_NODE (ip4_full_reass_custom_handoff_node) = {
.name = "ip4-full-reass-custom-hoff",
.vector_size = sizeof (u32),
@@ -1906,8 +2059,28 @@ ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
"ip4-full-reassembly-feature",
sw_if_index, 0, 0, 0);
}
- return -1;
+ return 0;
}
+
+void
+ip4_local_full_reass_enable_disable (int enable)
+{
+ if (enable)
+ {
+ ip4_full_reass_main.is_local_reass_enabled = 1;
+ }
+ else
+ {
+ ip4_full_reass_main.is_local_reass_enabled = 0;
+ }
+}
+
+int
+ip4_local_full_reass_enabled ()
+{
+ return ip4_full_reass_main.is_local_reass_enabled;
+}
+
#endif
/*
diff --git a/src/vnet/ip/reass/ip4_full_reass.h b/src/vnet/ip/reass/ip4_full_reass.h
index 000c80c5906..5df8107ca48 100644
--- a/src/vnet/ip/reass/ip4_full_reass.h
+++ b/src/vnet/ip/reass/ip4_full_reass.h
@@ -47,6 +47,9 @@ int ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
uword ip4_full_reass_custom_register_next_node (uword node_index);
+
+void ip4_local_full_reass_enable_disable (int enable);
+int ip4_local_full_reass_enabled ();
#endif /* __included_ip4_full_reass_h__ */
/*
diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c
index cd5e19b65d3..7c3c2fff217 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.c
+++ b/src/vnet/ip/reass/ip4_sv_reass.c
@@ -48,7 +48,7 @@ typedef struct
{
struct
{
- u32 xx_id;
+ u32 fib_index;
ip4_address_t src;
ip4_address_t dst;
u16 frag_id;
@@ -150,6 +150,7 @@ typedef struct
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
@@ -189,6 +190,7 @@ typedef struct
u8 ip_proto;
u16 l4_src_port;
u16 l4_dst_port;
+ int l4_layer_truncated;
} ip4_sv_reass_trace_t;
extern vlib_node_registration_t ip4_sv_reass_node;
@@ -225,6 +227,10 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args)
s = format (s, "[not-fragmented]");
break;
}
+ if (t->l4_layer_truncated)
+ {
+ s = format (s, " [l4-layer-truncated]");
+ }
return s;
}
@@ -232,7 +238,8 @@ static void
ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
ip4_sv_reass_t *reass, u32 bi,
ip4_sv_reass_trace_operation_e action, u32 ip_proto,
- u16 l4_src_port, u16 l4_dst_port)
+ u16 l4_src_port, u16 l4_dst_port,
+ int l4_layer_truncated)
{
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
if (pool_is_free_index
@@ -253,6 +260,7 @@ ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
t->ip_proto = ip_proto;
t->l4_src_port = l4_src_port;
t->l4_dst_port = l4_dst_port;
+ t->l4_layer_truncated = l4_layer_truncated;
#if 0
static u8 *s = NULL;
s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
@@ -314,6 +322,8 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
ip4_sv_reass_t *reass = NULL;
f64 now = vlib_time_now (vm);
+again:
+
if (!clib_bihash_search_16_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.thread_index)
@@ -368,10 +378,14 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
kv->v.thread_index = vm->thread_index;
reass->last_heard = now;
- if (clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 1))
+ int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
{
ip4_sv_reass_free (vm, rm, rt, reass);
reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
}
return reass;
@@ -407,9 +421,10 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, reass, bi0, REASS_FINISH,
- reass->ip_proto, reass->l4_src_port,
- reass->l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FINISH, reass->ip_proto,
+ reass->l4_src_port, reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
}
vec_add1 (reass->cached_buffers, bi0);
@@ -417,8 +432,9 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
{
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, reass, bi0, REASS_FRAGMENT_CACHE,
- ~0, ~0, ~0);
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
if (vec_len (reass->cached_buffers) > rm->max_reass_len)
{
@@ -428,15 +444,33 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
return rc;
}
+always_inline int
+l4_layer_truncated (ip4_header_t *ip)
+{
+ static const int l4_layer_length[256] = {
+ [IP_PROTOCOL_TCP] = sizeof (tcp_header_t),
+ [IP_PROTOCOL_UDP] = sizeof (udp_header_t),
+ [IP_PROTOCOL_ICMP] = sizeof (icmp46_header_t),
+ };
+
+ return ((u8 *) ip + ip4_header_bytes (ip) + l4_layer_length[ip->protocol] >
+ (u8 *) ip + clib_net_to_host_u16 (ip->length));
+}
+
always_inline uword
-ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature,
- bool is_output_feature, bool is_custom)
+ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_output_feature, bool is_custom,
+ bool with_custom_context)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u32 *context;
+ if (with_custom_context)
+ context = vlib_frame_aux_args (frame);
+
clib_spinlock_lock (&rt->lock);
n_left_from = frame->n_vectors;
@@ -482,6 +516,7 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
(is_output_feature ? 1 : 0) *
vnet_buffer (b1)->
ip.save_rewrite_length);
+
if (PREDICT_FALSE
(ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0))
|| (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1)))
@@ -506,29 +541,40 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (IP_PROTOCOL_TCP == ip0->protocol)
+ if (l4_layer_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b0)->ip.reass.l4_src_port = 0;
+ vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
}
- else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ else
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip0 + 1))->flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip0 + 1))->ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip0 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip0 + 1))->type;
+ }
+ vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
+ vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
}
- vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
- vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, NULL, from[(b - 2) - bufs],
- REASS_PASSTHROUGH,
- vnet_buffer (b0)->ip.reass.ip_proto,
- vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, from[(b - 2) - bufs], REASS_PASSTHROUGH,
+ vnet_buffer (b0)->ip.reass.ip_proto,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
if (is_feature)
{
@@ -541,35 +587,48 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vnet_buffer (b1)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b1)->ip.reass.ip_proto = ip1->protocol;
- if (IP_PROTOCOL_TCP == ip1->protocol)
+ if (l4_layer_truncated (ip1))
{
- vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip1 + 1))->flags;
- vnet_buffer (b1)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip1 + 1))->ack_number;
- vnet_buffer (b1)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip1 + 1))->seq_number;
+ vnet_buffer (b1)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b1)->ip.reass.l4_src_port = 0;
+ vnet_buffer (b1)->ip.reass.l4_dst_port = 0;
}
- else if (IP_PROTOCOL_ICMP == ip1->protocol)
+ else
{
- vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip1 + 1))->type;
+ vnet_buffer (b1)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip1->protocol)
+ {
+ vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip1 + 1))->flags;
+ vnet_buffer (b1)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip1 + 1))->ack_number;
+ vnet_buffer (b1)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip1 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip1->protocol)
+ {
+ vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip1 + 1))->type;
+ }
+ vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1);
+ vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0);
}
- vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1);
- vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0);
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, NULL, from[(b - 1) - bufs],
- REASS_PASSTHROUGH,
- vnet_buffer (b1)->ip.reass.ip_proto,
- vnet_buffer (b1)->ip.reass.l4_src_port,
- vnet_buffer (b1)->ip.reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH,
+ vnet_buffer (b1)->ip.reass.ip_proto,
+ vnet_buffer (b1)->ip.reass.l4_src_port,
+ vnet_buffer (b1)->ip.reass.l4_dst_port,
+ vnet_buffer (b1)->ip.reass.l4_layer_truncated);
}
n_left_from -= 2;
next[0] = next0;
next[1] = next1;
next += 2;
+ if (with_custom_context)
+ context += 2;
}
while (n_left_from > 0)
@@ -608,34 +667,45 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (IP_PROTOCOL_TCP == ip0->protocol)
+ if (l4_layer_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
}
- else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ else
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip0 + 1))->flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip0 + 1))->ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip0 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip0 + 1))->type;
+ }
+ vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
+ vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
}
- vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
- vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, NULL, from[(b - 1) - bufs],
- REASS_PASSTHROUGH,
- vnet_buffer (b0)->ip.reass.ip_proto,
- vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH,
+ vnet_buffer (b0)->ip.reass.ip_proto,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
n_left_from -= 1;
next[0] = next0;
next += 1;
+ if (with_custom_context)
+ context += 1;
}
vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
@@ -649,7 +719,11 @@ slow_path:
while (n_left_from > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ if (with_custom_context)
+ vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
+ to_next_aux, n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -657,6 +731,7 @@ slow_path:
vlib_buffer_t *b0;
u32 next0;
u32 error0 = IP4_ERROR_NONE;
+ u8 forward_context = 0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
@@ -679,29 +754,42 @@ slow_path:
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (IP_PROTOCOL_TCP == ip0->protocol)
+ if (l4_layer_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b0)->ip.reass.l4_src_port = 0;
+ vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
}
- else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ else
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip0 + 1))->flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip0 + 1))->ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip0 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip0 + 1))->type;
+ }
+ vnet_buffer (b0)->ip.reass.l4_src_port =
+ ip4_get_port (ip0, 1);
+ vnet_buffer (b0)->ip.reass.l4_dst_port =
+ ip4_get_port (ip0, 0);
}
- vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
- vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip4_sv_reass_add_trace (
vm, node, NULL, bi0, REASS_PASSTHROUGH,
vnet_buffer (b0)->ip.reass.ip_proto,
vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port);
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
goto packet_enqueue;
}
@@ -719,13 +807,17 @@ slow_path:
ip4_sv_reass_kv_t kv;
u8 do_handoff = 0;
- kv.k.as_u64[0] =
- (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
- (u64) ip0->src_address.as_u32 << 32;
- kv.k.as_u64[1] =
- (u64) ip0->dst_address.
- as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
+ if (with_custom_context)
+ kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32
+ << 32;
+ else
+ kv.k.as_u64[0] =
+ (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
+ (u64) ip0->src_address.as_u32 << 32;
+ kv.k.as_u64[1] = (u64) ip0->dst_address.as_u32 |
+ (u64) ip0->fragment_id << 32 |
+ (u64) ip0->protocol << 48;
ip4_sv_reass_t *reass =
ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
@@ -735,6 +827,8 @@ slow_path:
next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
+ if (with_custom_context)
+ forward_context = 1;
goto packet_enqueue;
}
@@ -771,31 +865,32 @@ slow_path:
{
ip4_sv_reass_add_trace (
vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
- reass->ip_proto, reass->l4_src_port, reass->l4_dst_port);
+ reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
goto packet_enqueue;
}
ip4_sv_reass_rc_t rc =
ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0);
+ u32 counter = ~0;
switch (rc)
{
case IP4_SV_REASS_RC_OK:
/* nothing to do here */
break;
case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip4_sv_reass_free (vm, rm, rt, reass);
- goto next_packet;
+ counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
break;
case IP4_SV_REASS_RC_UNSUPP_IP_PROTO:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_UNSUPP_IP_PROT, 1);
+ counter = IP4_ERROR_REASS_UNSUPP_IP_PROT;
+ break;
+ }
+ if (~0 != counter)
+ {
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
ip4_sv_reass_free (vm, rm, rt, reass);
goto next_packet;
- break;
}
if (reass->is_complete)
{
@@ -843,13 +938,15 @@ slow_path:
{
ip4_sv_reass_add_trace (
vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
- reass->ip_proto, reass->l4_src_port, reass->l4_dst_port);
+ reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next, bi0,
next0);
}
- _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
+ vec_set_len (reass->cached_buffers,
+ 0); // buffers are owned by frame now
}
goto next_packet;
@@ -862,13 +959,26 @@ slow_path:
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
+ if (with_custom_context && forward_context)
+ {
+ if (to_next_aux)
+ {
+ to_next_aux[0] = *context;
+ to_next_aux += 1;
+ }
+ vlib_validate_buffer_enqueue_with_aux_x1 (
+ vm, node, next_index, to_next, to_next_aux, n_left_to_next,
+ bi0, *context, next0);
+ }
+ else
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
next_packet:
from += 1;
n_left_from -= 1;
+ if (with_custom_context)
+ context += 1;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -879,28 +989,21 @@ done:
return frame->n_vectors;
}
-static char *ip4_sv_reass_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip4_error
-#undef _
-};
-
VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
.name = "ip4-sv-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -910,24 +1013,22 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
- false /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, true /* is_feature */, false /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
.name = "ip4-sv-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -936,34 +1037,30 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
[IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
.arc_name = "ip4-unicast",
.node_name = "ip4-sv-reassembly-feature",
.runs_before = VNET_FEATURES ("ip4-lookup"),
.runs_after = 0,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
- true /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, true /* is_feature */, true /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
.name = "ip4-sv-reassembly-output-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -972,24 +1069,20 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
[IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = {
.arc_name = "ip4-output",
.node_name = "ip4-sv-reassembly-output-feature",
.runs_before = 0,
.runs_after = 0,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
.name = "ip4-sv-reassembly-custom-next",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -999,15 +1092,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_output_feature */ ,
- true /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ true /* is_custom */, false /* with_custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = {
+ .name = "ip4-sv-reassembly-custom-context",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof(u32),
+ .format_trace = format_ip4_sv_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
+ .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
+ [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
+ [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-custom-context-handoff",
+
+ },
+};
+
+VLIB_NODE_FN (ip4_sv_reass_custom_context_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ true /* is_custom */, true /* with_custom_context */);
}
#ifndef CLIB_MARCH_VARIANT
@@ -1152,6 +1269,8 @@ ip4_sv_reass_init_function (vlib_main_t * vm)
rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
+ rm->fq_custom_context_index =
+ vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0);
rm->feature_use_refcount_per_intf = NULL;
rm->output_feature_use_refcount_per_intf = NULL;
@@ -1204,7 +1323,6 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
pool_foreach_index (index, rt->pool) {
reass = pool_elt_at_index (rt->pool, index);
if (now > reass->last_heard + rm->timeout)
@@ -1212,15 +1330,12 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
vec_add1 (pool_indexes_to_free, index);
}
}
- /* *INDENT-ON* */
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
ip4_sv_reass_free (vm, rm, rt, reass);
}
- /* *INDENT-ON* */
clib_spinlock_unlock (&rt->lock);
}
@@ -1228,33 +1343,29 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
vec_free (pool_indexes_to_free);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
- .function = ip4_sv_reass_walk_expired,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip4-sv-reassembly-expire-walk",
- .format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
-
+ .function = ip4_sv_reass_walk_expired,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip4-sv-reassembly-expire-walk",
+ .format_trace = format_ip4_sv_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip4_sv_reass_key (u8 * s, va_list * args)
{
ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
s =
- format (s,
- "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
- key->xx_id, format_ip4_address, &key->src, format_ip4_address,
+ format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
+ key->fib_index, format_ip4_address, &key->src, format_ip4_address,
&key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
return s;
}
@@ -1313,11 +1424,9 @@ show_ip4_reass (vlib_main_t * vm,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1341,13 +1450,11 @@ show_ip4_reass (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
.path = "show ip4-sv-reassembly",
.short_help = "show ip4-sv-reassembly [details]",
.function = show_ip4_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1398,25 +1505,30 @@ format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_custom_context)
{
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
+ u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
u32 fq_index;
from = vlib_frame_vector_args (frame);
+ if (is_custom_context)
+ context = vlib_frame_aux_args (frame);
+
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
+ fq_index = (is_feature) ? rm->fq_feature_index :
+ (is_custom_context ? rm->fq_custom_context_index :
+ rm->fq_index);
while (n_left_from > 0)
{
@@ -1435,8 +1547,12 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
ti += 1;
b += 1;
}
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
+ if (is_custom_context)
+ n_enq = vlib_buffer_enqueue_to_thread_with_aux (
+ vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
+ else
+ n_enq = vlib_buffer_enqueue_to_thread (
+ vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
vlib_node_increment_counter (vm, node->node_index,
@@ -1449,12 +1565,11 @@ VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_handoff_node_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, false /* is_feature */, false /* is_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
.name = "ip4-sv-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1468,22 +1583,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, false /* is_feature */, true /* is_custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = {
+ .name = "ip4-sv-reassembly-custom-context-handoff",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
+ .error_strings = ip4_sv_reass_handoff_error_strings,
+ .format_trace = format_ip4_sv_reass_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
-/* *INDENT-OFF* */
VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t *
node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_handoff_node_inline (vm, node, frame,
- true /* is_feature */ );
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, true /* is_feature */, false /* is_custom_context */);
}
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
.name = "ip4-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1497,7 +1629,6 @@ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -1535,6 +1666,13 @@ ip4_sv_reass_custom_register_next_node (uword node_index)
node_index);
}
+uword
+ip4_sv_reass_custom_context_register_next_node (uword node_index)
+{
+ return vlib_node_add_next (
+ vlib_get_main (), ip4_sv_reass_custom_context_node.index, node_index);
+}
+
int
ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable)
diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h
index e926dbeebcc..3a684eb9809 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.h
+++ b/src/vnet/ip/reass/ip4_sv_reass.h
@@ -49,6 +49,7 @@ int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
uword ip4_sv_reass_custom_register_next_node (uword node_index);
+uword ip4_sv_reass_custom_context_register_next_node (uword node_index);
#endif /* __included_ip4_sv_reass_h__ */
diff --git a/src/vnet/ip/reass/ip6_full_reass.c b/src/vnet/ip/reass/ip6_full_reass.c
index 9ec40cd347c..27647985877 100644
--- a/src/vnet/ip/reass/ip6_full_reass.c
+++ b/src/vnet/ip/reass/ip6_full_reass.c
@@ -25,10 +25,14 @@
#include <vnet/ip/ip.h>
#include <vppinfra/bihash_48_8.h>
#include <vnet/ip/reass/ip6_full_reass.h>
+#include <vnet/ip/ip6_inlines.h>
#define MSEC_PER_SEC 1000
-#define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 100
-#define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
+#define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 200
+/* As there are only 1024 reass context per thread, either the DDOS attacks
+ * or fractions of real timeouts, would consume these contexts quickly and
+ * running out context space and unable to perform reassembly */
+#define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default
#define IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT 1024
#define IP6_FULL_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
#define IP6_FULL_REASS_HT_LOAD_FACTOR (0.75)
@@ -40,6 +44,8 @@ typedef enum
IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS,
IP6_FULL_REASS_RC_NO_BUF,
IP6_FULL_REASS_RC_HANDOFF,
+ IP6_FULL_REASS_RC_INVALID_FRAG_LEN,
+ IP6_FULL_REASS_RC_OVERLAP,
} ip6_full_reass_rc_t;
typedef struct
@@ -132,6 +138,8 @@ typedef struct
ip6_full_reass_t *pool;
u32 reass_n;
u32 id_counter;
+ // for pacing the main thread timeouts
+ u32 last_id;
clib_spinlock_t lock;
} ip6_full_reass_per_thread_t;
@@ -155,17 +163,20 @@ typedef struct
// convenience
vlib_main_t *vlib_main;
- // node index of ip6-drop node
- u32 ip6_drop_idx;
u32 ip6_icmp_error_idx;
u32 ip6_full_reass_expire_node_idx;
/** Worker handoff */
u32 fq_index;
+ u32 fq_local_index;
u32 fq_feature_index;
+ u32 fq_custom_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
+
+ // whether local fragmented packets are reassembled or not
+ int is_local_reass_enabled;
} ip6_full_reass_main_t;
extern ip6_full_reass_main_t ip6_full_reass_main;
@@ -185,13 +196,22 @@ typedef enum
typedef enum
{
+ NORMAL,
+ FEATURE,
+ CUSTOM
+} ip6_full_reass_node_type_t;
+
+typedef enum
+{
RANGE_NEW,
+ RANGE_DISCARD,
RANGE_OVERLAP,
ICMP_ERROR_RT_EXCEEDED,
ICMP_ERROR_FL_TOO_BIG,
ICMP_ERROR_FL_NOT_MULT_8,
FINALIZE,
HANDOFF,
+ PASSTHROUGH,
} ip6_full_reass_trace_operation_e;
typedef struct
@@ -278,6 +298,10 @@ format_ip6_full_reass_trace (u8 * s, va_list * args)
s = format (s, "\n%Unew %U", format_white_space, indent,
format_ip6_full_reass_range_trace, &t->trace_range);
break;
+ case RANGE_DISCARD:
+ s = format (s, "\n%Udiscard %U", format_white_space, indent,
+ format_ip6_full_reass_range_trace, &t->trace_range);
+ break;
case RANGE_OVERLAP:
s = format (s, "\n%Uoverlap %U", format_white_space, indent,
format_ip6_full_reass_range_trace, &t->trace_range);
@@ -304,6 +328,9 @@ format_ip6_full_reass_trace (u8 * s, va_list * args)
format (s, "handoff from thread #%u to thread #%u", t->thread_id,
t->thread_id_to);
break;
+ case PASSTHROUGH:
+ s = format (s, "passthrough - not a fragment");
+ break;
}
return s;
}
@@ -396,59 +423,69 @@ ip6_full_reass_free (ip6_full_reass_main_t * rm,
ip6_full_reass_free_ctx (rt, reass);
}
+/* n_left_to_next, and to_next are taken as input params, as this function
+ * could be called from a graphnode, where its managing local copy of these
+ * variables, and ignoring those and still trying to enqueue the buffers
+ * with local variables would cause either buffer leak or corruption */
always_inline void
ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
- ip6_full_reass_t *reass)
+ ip6_full_reass_t *reass, u32 *n_left_to_next,
+ u32 **to_next)
{
u32 range_bi = reass->first_bi;
vlib_buffer_t *range_b;
vnet_buffer_opaque_t *range_vnb;
u32 *to_free = NULL;
+
while (~0 != range_bi)
{
range_b = vlib_get_buffer (vm, range_bi);
range_vnb = vnet_buffer (range_b);
- u32 bi = range_bi;
- while (~0 != bi)
+
+ if (~0 != range_bi)
{
- vec_add1 (to_free, bi);
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- bi = b->next_buffer;
- b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
- }
- else
- {
- bi = ~0;
- }
+ vec_add1 (to_free, range_bi);
}
range_bi = range_vnb->ip.reass.next_range_bi;
}
+
/* send to next_error_index */
- if (~0 != reass->error_next_index)
+ if (~0 != reass->error_next_index &&
+ reass->error_next_index < node->n_next_nodes)
{
- u32 n_left_to_next, *to_next, next_index;
+ u32 next_index;
next_index = reass->error_next_index;
u32 bi = ~0;
+ /* record number of packets sent to custom app */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TO_CUSTOM_APP,
+ vec_len (to_free));
+
while (vec_len (to_free) > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, *to_next,
+ (*n_left_to_next));
- while (vec_len (to_free) > 0 && n_left_to_next > 0)
+ while (vec_len (to_free) > 0 && (*n_left_to_next) > 0)
{
bi = vec_pop (to_free);
if (~0 != bi)
{
- to_next[0] = bi;
- to_next += 1;
- n_left_to_next -= 1;
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip6_full_reass_add_trace (vm, node, reass, bi, NULL,
+ RANGE_DISCARD, ~0);
+ }
+ *to_next[0] = bi;
+ (*to_next) += 1;
+ (*n_left_to_next) -= 1;
}
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_put_next_frame (vm, node, next_index, (*n_left_to_next));
}
}
else
@@ -459,8 +496,65 @@ ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
}
always_inline void
-ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_t * reass, u32 * icmp_bi)
+sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip6_full_reass_t *reass,
+ u32 *bi0)
+{
+ u32 range_bi = reass->first_bi;
+ vlib_buffer_t *range_b;
+ vnet_buffer_opaque_t *range_vnb;
+
+ while (~0 != range_bi)
+ {
+ range_b = vlib_get_buffer (vm, range_bi);
+ range_vnb = vnet_buffer (range_b);
+ u32 bi = range_bi;
+ if (~0 != bi)
+ {
+ if (bi == *bi0)
+ *bi0 = ~0;
+ if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ u32 _bi = bi;
+ vlib_buffer_t *_b = vlib_get_buffer (vm, _bi);
+ while (_b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ if (_b->next_buffer != range_vnb->ip.reass.next_range_bi)
+ {
+ _bi = _b->next_buffer;
+ _b = vlib_get_buffer (vm, _bi);
+ }
+ else
+ {
+ _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ break;
+ }
+ }
+ }
+ range_bi = range_vnb->ip.reass.next_range_bi;
+ }
+ }
+ if (*bi0 != ~0)
+ {
+ vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
+ if (~0 != reass->first_bi)
+ {
+ fvnb->ip.reass.next_range_bi = reass->first_bi;
+ reass->first_bi = *bi0;
+ }
+ else
+ {
+ reass->first_bi = *bi0;
+ fvnb->ip.reass.next_range_bi = ~0;
+ }
+ *bi0 = ~0;
+ }
+}
+
+always_inline void
+ip6_full_reass_on_timeout (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_t *reass, u32 *icmp_bi,
+ u32 *n_left_to_next, u32 **to_next)
{
if (~0 == reass->first_bi)
{
@@ -493,15 +587,16 @@ ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
0);
}
}
- ip6_full_reass_drop_all (vm, node, reass);
+ ip6_full_reass_drop_all (vm, node, reass, n_left_to_next, to_next);
}
always_inline ip6_full_reass_t *
-ip6_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_per_thread_t * rt,
- ip6_full_reass_kv_t * kv, u32 * icmp_bi,
- u8 * do_handoff)
+ip6_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_main_t *rm,
+ ip6_full_reass_per_thread_t *rt,
+ ip6_full_reass_kv_t *kv, u32 *icmp_bi,
+ u8 *do_handoff, int skip_bihash,
+ u32 *n_left_to_next, u32 **to_next)
{
ip6_full_reass_t *reass;
f64 now;
@@ -511,7 +606,7 @@ again:
reass = NULL;
now = vlib_time_now (vm);
- if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
+ if (!skip_bihash && !clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.memory_owner_thread_index)
{
@@ -526,7 +621,10 @@ again:
if (now > reass->last_heard + rm->timeout)
{
- ip6_full_reass_on_timeout (vm, node, reass, icmp_bi);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TIMEOUT, 1);
+ ip6_full_reass_on_timeout (vm, node, reass, icmp_bi, n_left_to_next,
+ to_next);
ip6_full_reass_free (rm, rt, reass);
reass = NULL;
}
@@ -554,27 +652,41 @@ again:
reass->data_len = 0;
reass->next_index = ~0;
reass->error_next_index = ~0;
+ reass->memory_owner_thread_index = vm->thread_index;
++rt->reass_n;
}
- reass->key.as_u64[0] = kv->kv.key[0];
- reass->key.as_u64[1] = kv->kv.key[1];
- reass->key.as_u64[2] = kv->kv.key[2];
- reass->key.as_u64[3] = kv->kv.key[3];
- reass->key.as_u64[4] = kv->kv.key[4];
- reass->key.as_u64[5] = kv->kv.key[5];
kv->v.reass_index = (reass - rt->pool);
kv->v.memory_owner_thread_index = vm->thread_index;
reass->last_heard = now;
- int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
- if (rv)
+ if (!skip_bihash)
{
- ip6_full_reass_free (rm, rt, reass);
- reass = NULL;
- // if other worker created a context already work with the other copy
- if (-2 == rv)
- goto again;
+ reass->key.as_u64[0] = kv->kv.key[0];
+ reass->key.as_u64[1] = kv->kv.key[1];
+ reass->key.as_u64[2] = kv->kv.key[2];
+ reass->key.as_u64[3] = kv->kv.key[3];
+ reass->key.as_u64[4] = kv->kv.key[4];
+ reass->key.as_u64[5] = kv->kv.key[5];
+
+ int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
+ {
+ ip6_full_reass_free (rm, rt, reass);
+ reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
+ }
+ }
+ else
+ {
+ reass->key.as_u64[0] = ~0;
+ reass->key.as_u64[1] = ~0;
+ reass->key.as_u64[2] = ~0;
+ reass->key.as_u64[3] = ~0;
+ reass->key.as_u64[4] = ~0;
+ reass->key.as_u64[5] = ~0;
}
return reass;
@@ -593,8 +705,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *last_b = NULL;
u32 sub_chain_bi = reass->first_bi;
u32 total_length = 0;
- u32 buf_cnt = 0;
- u32 dropped_cnt = 0;
u32 *vec_drop_compress = NULL;
ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK;
do
@@ -636,19 +746,18 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
while (1)
{
- ++buf_cnt;
if (trim_front)
{
if (trim_front > tmp->current_length)
{
/* drop whole buffer */
- vec_add1 (vec_drop_compress, tmp_bi);
- trim_front -= tmp->current_length;
if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
{
rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
goto free_buffers_and_return;
}
+ trim_front -= tmp->current_length;
+ vec_add1 (vec_drop_compress, tmp_bi);
tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
tmp_bi = tmp->next_buffer;
tmp = vlib_get_buffer (vm, tmp_bi);
@@ -686,13 +795,12 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- vec_add1 (vec_drop_compress, tmp_bi);
if (reass->first_bi == tmp_bi)
{
rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
goto free_buffers_and_return;
}
- ++dropped_cnt;
+ vec_add1 (vec_drop_compress, tmp_bi);
}
if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -729,19 +837,27 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer_opaque_t *first_b_vnb = vnet_buffer (first_b);
ip6_header_t *ip = vlib_buffer_get_current (first_b);
u16 ip6_frag_hdr_offset = first_b_vnb->ip.reass.ip6_frag_hdr_offset;
- ip6_ext_header_t *prev_hdr;
- frag_hdr =
- ip6_ext_header_find (vm, first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
- &prev_hdr);
- if (prev_hdr)
+ ip6_ext_hdr_chain_t hdr_chain;
+ ip6_ext_header_t *prev_hdr = 0;
+ int res = ip6_ext_header_walk (first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
+ &hdr_chain);
+ if (res < 0 ||
+ (hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION))
{
+ rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ goto free_buffers_and_return;
+ }
+ frag_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset);
+ if (res > 0)
+ {
+ prev_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset);
prev_hdr->next_hdr = frag_hdr->next_hdr;
}
else
{
ip->protocol = frag_hdr->next_hdr;
}
- if (!((u8 *) frag_hdr - (u8 *) ip == ip6_frag_hdr_offset))
+ if (hdr_chain.eh[res].offset != ip6_frag_hdr_offset)
{
rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
goto free_buffers_and_return;
@@ -799,6 +915,15 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
*next0 = reass->next_index;
}
vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
+ /* Keep track of number of successfully reassembled packets and number of
+ * fragments reassembled */
+ vlib_node_increment_counter (vm, node->node_index, IP6_ERROR_REASS_SUCCESS,
+ 1);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_FRAGMENTS_REASSEMBLED,
+ reass->fragments_n);
+
ip6_full_reass_free (rm, rt, reass);
reass = NULL;
free_buffers_and_return:
@@ -834,12 +959,13 @@ ip6_full_reass_insert_range_in_chain (vlib_main_t * vm,
}
always_inline ip6_full_reass_rc_t
-ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_per_thread_t * rt,
- ip6_full_reass_t * reass, u32 * bi0, u32 * next0,
- u32 * error0, ip6_frag_hdr_t * frag_hdr,
- bool is_custom_app, u32 * handoff_thread_idx)
+ip6_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_main_t *rm,
+ ip6_full_reass_per_thread_t *rt,
+ ip6_full_reass_t *reass, u32 *bi0, u32 *next0,
+ u32 *error0, ip6_frag_hdr_t *frag_hdr,
+ bool is_custom_app, u32 *handoff_thread_idx,
+ int skip_bihash)
{
int consumed = 0;
vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
@@ -865,6 +991,10 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 fragment_length =
vlib_buffer_length_in_chain (vm, fb) -
(fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
+ if (0 == fragment_length)
+ {
+ return IP6_FULL_REASS_RC_INVALID_FRAG_LEN;
+ }
u32 fragment_last = fvnb->ip.reass.fragment_last =
fragment_first + fragment_length - 1;
int more_fragments = ip6_frag_hdr_more (frag_hdr);
@@ -929,11 +1059,7 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
ip6_full_reass_add_trace (vm, node, reass, *bi0, frag_hdr,
RANGE_OVERLAP, ~0);
}
- ip6_full_reass_drop_all (vm, node, reass);
- ip6_full_reass_free (rm, rt, reass);
- *next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
- *error0 = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT;
- return IP6_FULL_REASS_RC_OK;
+ return IP6_FULL_REASS_RC_OVERLAP;
}
break;
}
@@ -947,6 +1073,12 @@ check_if_done_maybe:
~0);
}
}
+ else if (skip_bihash)
+ {
+ // if this reassembly is not in bihash, then the packet must have been
+ // consumed
+ return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ }
if (~0 != reass->last_packet_octet &&
reass->data_len == reass->last_packet_octet + 1)
{
@@ -964,6 +1096,12 @@ check_if_done_maybe:
}
else
{
+ if (skip_bihash)
+ {
+ // if this reassembly is not in bihash, it should've been an atomic
+ // fragment and thus finalized
+ return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ }
if (consumed)
{
*bi0 = ~0;
@@ -982,31 +1120,28 @@ check_if_done_maybe:
}
always_inline bool
-ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t * node,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_ext_hdr_chain_t *hc)
{
- ip6_ext_header_t *tmp = (ip6_ext_header_t *) frag_hdr;
- while (ip6_ext_hdr (tmp->next_hdr))
- {
- tmp = ip6_ext_next_header (tmp);
- }
- if (IP_PROTOCOL_IP6_NONXT == tmp->next_hdr)
+ int nh = hc->eh[hc->length - 1].protocol;
+ /* Checking to see if it's a terminating header */
+ if (ip6_ext_hdr (nh))
{
- icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
- ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain,
- 0);
+ icmp6_error_set_vnet_buffer (
+ b, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
-
return false;
}
return true;
}
always_inline bool
-ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
ip6_header_t *ip = vlib_buffer_get_current (b);
@@ -1019,15 +1154,17 @@ ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
ICMP6_parameter_problem_erroneous_header_field,
(u8 *) & ip->payload_length - (u8 *) ip);
+ b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE];
return false;
}
return true;
}
always_inline bool
-ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr);
@@ -1041,16 +1178,16 @@ ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
ICMP6_parameter_problem_erroneous_header_field,
(u8 *) & frag_hdr->fragment_offset_and_more
- (u8 *) ip0);
+ b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE];
return false;
}
return true;
}
always_inline uword
-ip6_full_reassembly_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature,
- bool is_custom_app)
+ip6_full_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_custom_app, bool is_local)
{
u32 *from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, *to_next, next_index;
@@ -1077,55 +1214,95 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
ip6_header_t *ip0 = vlib_buffer_get_current (b0);
ip6_frag_hdr_t *frag_hdr = NULL;
- ip6_ext_header_t *prev_hdr;
- if (ip6_ext_hdr (ip0->protocol))
+ ip6_ext_hdr_chain_t hdr_chain;
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
+
+ int res = ip6_ext_header_walk (
+ b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+ if (res < 0 ||
+ hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)
{
- frag_hdr =
- ip6_ext_header_find (vm, b0, ip0,
- IP_PROTOCOL_IPV6_FRAGMENTATION,
- &prev_hdr);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_NO_FRAG_HDR, 1);
+ // this is a mangled packet - no fragmentation
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_DROP;
+ ip6_full_reass_add_trace (vm, node, NULL, bi0, NULL, PASSTHROUGH,
+ ~0);
+ goto skip_reass;
}
- if (!frag_hdr)
+ if (is_local && !rm->is_local_reass_enabled)
{
- // this is a regular packet - no fragmentation
- next0 = IP6_FULL_REASSEMBLY_NEXT_INPUT;
+ next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
goto skip_reass;
}
+
+ /* Keep track of received fragments */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_FRAGMENTS_RCVD, 1);
+ frag_hdr =
+ ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
- (u8 *) frag_hdr - (u8 *) ip0;
+ hdr_chain.eh[res].offset;
if (0 == ip6_frag_hdr_offset (frag_hdr))
{
// first fragment - verify upper-layer is present
- if (!ip6_full_reass_verify_upper_layer_present
- (node, b0, frag_hdr))
+ if (!ip6_full_reass_verify_upper_layer_present (node, b0,
+ &hdr_chain))
{
- next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
goto skip_reass;
}
}
- if (!ip6_full_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) ||
- !ip6_full_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr))
+
+ if (!ip6_full_reass_verify_fragment_multiple_8 (vm, node, b0,
+ frag_hdr) ||
+ !ip6_full_reass_verify_packet_size_lt_64k (vm, node, b0,
+ frag_hdr))
{
- next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
goto skip_reass;
}
+
+ int skip_bihash = 0;
ip6_full_reass_kv_t kv;
u8 do_handoff = 0;
- kv.k.as_u64[0] = ip0->src_address.as_u64[0];
- kv.k.as_u64[1] = ip0->src_address.as_u64[1];
- kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
- kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
- kv.k.as_u64[4] =
- ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
- (u64) frag_hdr->identification;
- kv.k.as_u64[5] = ip0->protocol;
+ if (0 == ip6_frag_hdr_offset (frag_hdr) &&
+ !ip6_frag_hdr_more (frag_hdr))
+ {
+ // this is atomic fragment and needs to be processed separately
+ skip_bihash = 1;
+ }
+ else
+ {
+ u32 fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ?
+ vec_elt (ip6_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) :
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ kv.k.as_u64[0] = ip0->src_address.as_u64[0];
+ kv.k.as_u64[1] = ip0->src_address.as_u64[1];
+ kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
+ kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
+ kv.k.as_u64[4] =
+ ((u64) fib_index) << 32 | (u64) frag_hdr->identification;
+ /* RFC 8200: The Next Header values in the Fragment headers of
+ * different fragments of the same original packet may differ.
+ * Only the value from the Offset zero fragment packet is used
+ * for reassembly.
+ *
+ * Also, IPv6 Header doesnt contain the protocol value unlike
+ * IPv4.*/
+ kv.k.as_u64[5] = 0;
+ }
- ip6_full_reass_t *reass =
- ip6_full_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi,
- &do_handoff);
+ ip6_full_reass_t *reass = ip6_full_reass_find_or_create (
+ vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash,
+ &n_left_to_next, &to_next);
if (reass)
{
@@ -1144,9 +1321,10 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
else if (reass)
{
u32 handoff_thread_idx;
- switch (ip6_full_reass_update
- (vm, node, rm, rt, reass, &bi0, &next0, &error0,
- frag_hdr, is_custom_app, &handoff_thread_idx))
+ u32 counter = ~0;
+ switch (ip6_full_reass_update (
+ vm, node, rm, rt, reass, &bi0, &next0, &error0, frag_hdr,
+ is_custom_app, &handoff_thread_idx, skip_bihash))
{
case IP6_FULL_REASS_RC_OK:
/* nothing to do here */
@@ -1158,25 +1336,36 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
handoff_thread_idx;
break;
case IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip6_full_reass_drop_all (vm, node, reass);
- ip6_full_reass_free (rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
break;
case IP6_FULL_REASS_RC_NO_BUF:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_NO_BUF, 1);
- ip6_full_reass_drop_all (vm, node, reass);
- ip6_full_reass_free (rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_NO_BUF;
+ break;
+ case IP6_FULL_REASS_RC_INVALID_FRAG_LEN:
+ counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
+ break;
+ case IP6_FULL_REASS_RC_OVERLAP:
+ counter = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT;
break;
case IP6_FULL_REASS_RC_INTERNAL_ERROR:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_INTERNAL_ERROR,
+ counter = IP6_ERROR_REASS_INTERNAL_ERROR;
+ /* Sanitization is needed in internal error cases only, as
+ * the incoming packet is already dropped in other cases,
+ * also adding bi0 back to the reassembly list, fixes the
+ * leaking of buffers during internal errors.
+ *
+ * Also it doesnt make sense to send these buffers custom
+ * app, these fragments are with internal errors */
+ sanitize_reass_buffers_add_missing (vm, reass, &bi0);
+ reass->error_next_index = ~0;
+ break;
+ }
+ if (~0 != counter)
+ {
+ vlib_node_increment_counter (vm, node->node_index, counter,
1);
- ip6_full_reass_drop_all (vm, node, reass);
+ ip6_full_reass_drop_all (vm, node, reass, &n_left_to_next,
+ &to_next);
ip6_full_reass_free (rm, rt, reass);
goto next_packet;
break;
@@ -1190,7 +1379,6 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
}
else
{
- vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
next0 = fvnb->ip.reass.error_next_index;
}
error0 = IP6_ERROR_REASS_LIMIT_REACHED;
@@ -1223,6 +1411,15 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
{
vnet_feature_next (&next0, b0);
}
+
+ /* Increment the counter to-custom-app also as this fragment is
+ * also going to application */
+ if (is_custom_app)
+ {
+ vlib_node_increment_counter (
+ vm, node->node_index, IP6_ERROR_REASS_TO_CUSTOM_APP, 1);
+ }
+
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
@@ -1249,26 +1446,21 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
return frame->n_vectors;
}
-static char *ip6_full_reassembly_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip6_error
-#undef _
-};
-
VLIB_NODE_FN (ip6_full_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_custom_app */ );
+ return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* is_custom_app */,
+ false /* is_local */);
}
VLIB_REGISTER_NODE (ip6_full_reass_node) = {
.name = "ip6-full-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -1279,20 +1471,45 @@ VLIB_REGISTER_NODE (ip6_full_reass_node) = {
},
};
+VLIB_NODE_FN (ip6_local_full_reass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* is_custom_app */,
+ true /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip6_local_full_reass_node) = {
+ .name = "ip6-local-full-reassembly",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_full_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-local-full-reassembly-handoff",
+ },
+};
+
VLIB_NODE_FN (ip6_full_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */ ,
- false /* is_custom_app */ );
+ return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */,
+ false /* is_custom_app */,
+ false /* is_local */);
}
VLIB_REGISTER_NODE (ip6_full_reass_node_feature) = {
.name = "ip6-full-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -1311,6 +1528,30 @@ VNET_FEATURE_INIT (ip6_full_reassembly_feature, static) = {
.runs_after = 0,
};
+VLIB_NODE_FN (ip6_full_reass_node_custom)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
+ true /* is_custom_app */,
+ false /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip6_full_reass_node_custom) = {
+ .name = "ip6-full-reassembly-custom",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_full_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reass-custom-hoff",
+ },
+};
+
#ifndef CLIB_MARCH_VARIANT
static u32
ip6_full_reass_get_nbuckets ()
@@ -1319,7 +1560,9 @@ ip6_full_reass_get_nbuckets ()
u32 nbuckets;
u8 i;
- nbuckets = (u32) (rm->max_reass_n / IP6_FULL_REASS_HT_LOAD_FACTOR);
+ /* need more mem with more workers */
+ nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) /
+ IP6_FULL_REASS_HT_LOAD_FACTOR);
for (i = 0; i < 31; i++)
if ((1 << i) >= nbuckets)
@@ -1446,9 +1689,6 @@ ip6_full_reass_init_function (vlib_main_t * vm)
clib_bihash_init_48_8 (&rm->hash, "ip6-full-reass", nbuckets,
nbuckets * 1024);
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-drop");
- ASSERT (node);
- rm->ip6_drop_idx = node->index;
node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error");
ASSERT (node);
rm->ip6_icmp_error_idx = node->index;
@@ -1456,11 +1696,16 @@ ip6_full_reass_init_function (vlib_main_t * vm)
if ((error = vlib_call_init_function (vm, ip_main_init)))
return error;
ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
- ip6_full_reass_node.index);
+ ip6_local_full_reass_node.index);
+ rm->is_local_reass_enabled = 1;
rm->fq_index = vlib_frame_queue_main_init (ip6_full_reass_node.index, 0);
+ rm->fq_local_index =
+ vlib_frame_queue_main_init (ip6_local_full_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip6_full_reass_node_feature.index, 0);
+ rm->fq_custom_index =
+ vlib_frame_queue_main_init (ip6_full_reass_node_custom.index, 0);
rm->feature_use_refcount_per_intf = NULL;
return error;
@@ -1504,26 +1749,53 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
int index;
const uword nthreads = vlib_num_workers () + 1;
u32 *vec_icmp_bi = NULL;
+ u32 n_left_to_next, *to_next;
+
for (thread_index = 0; thread_index < nthreads; ++thread_index)
{
ip6_full_reass_per_thread_t *rt =
&rm->per_thread_data[thread_index];
+ u32 reass_timeout_cnt = 0;
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- pool_foreach_index (index, rt->pool) {
- reass = pool_elt_at_index (rt->pool, index);
- if (now > reass->last_heard + rm->timeout)
- {
- vec_add1 (pool_indexes_to_free, index);
- }
- }
+ /* Pace the number of timeouts handled per thread,to avoid barrier
+ * sync issues in real world scenarios */
+
+ u32 beg = rt->last_id;
+ /* to ensure we walk at least once per sec per context */
+ u32 end = beg + (IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT *
+ IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS /
+ MSEC_PER_SEC +
+ 1);
+ if (end > vec_len (rt->pool))
+ {
+ end = vec_len (rt->pool);
+ rt->last_id = 0;
+ }
+ else
+ {
+ rt->last_id = end;
+ }
+
+ pool_foreach_stepping_index (index, beg, end, rt->pool)
+ {
+ reass = pool_elt_at_index (rt->pool, index);
+ if (now > reass->last_heard + rm->timeout)
+ {
+ vec_add1 (pool_indexes_to_free, index);
+ }
+ }
+
int *i;
vec_foreach (i, pool_indexes_to_free)
{
ip6_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
u32 icmp_bi = ~0;
- ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi);
+
+ reass_timeout_cnt += reass->fragments_n;
+ ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi,
+ &n_left_to_next, &to_next);
if (~0 != icmp_bi)
vec_add1 (vec_icmp_bi, icmp_bi);
@@ -1531,6 +1803,10 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
}
clib_spinlock_unlock (&rt->lock);
+ if (reass_timeout_cnt)
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TIMEOUT,
+ reass_timeout_cnt);
}
while (vec_len (vec_icmp_bi) > 0)
@@ -1546,7 +1822,6 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
trace_frame = 1;
- b->error = node->errors[IP6_ERROR_REASS_TIMEOUT];
to_next[0] = bi;
++f->n_vectors;
to_next += 1;
@@ -1560,7 +1835,7 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
vec_free (vec_icmp_bi);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
@@ -1568,14 +1843,13 @@ ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
}
VLIB_REGISTER_NODE (ip6_full_reass_expire_node) = {
- .function = ip6_full_reass_walk_expired,
- .format_trace = format_ip6_full_reass_trace,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip6-full-reassembly-expire-walk",
-
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .function = ip6_full_reass_walk_expired,
+ .format_trace = format_ip6_full_reass_trace,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip6-full-reassembly-expire-walk",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
static u8 *
@@ -1733,9 +2007,10 @@ format_ip6_full_reassembly_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip6_full_reassembly_handoff_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_full_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame,
+ ip6_full_reass_node_type_t type,
+ bool is_local)
{
ip6_full_reass_main_t *rm = &ip6_full_reass_main;
@@ -1751,8 +2026,28 @@ ip6_full_reassembly_handoff_inline (vlib_main_t * vm,
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
-
+ switch (type)
+ {
+ case NORMAL:
+ if (is_local)
+ {
+ fq_index = rm->fq_local_index;
+ }
+ else
+ {
+ fq_index = rm->fq_index;
+ }
+ break;
+ case FEATURE:
+ fq_index = rm->fq_feature_index;
+ break;
+ case CUSTOM:
+ fq_index = rm->fq_custom_index;
+ break;
+ default:
+ clib_warning ("Unexpected `type' (%d)!", type);
+ ASSERT (0);
+ }
while (n_left_from > 0)
{
ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
@@ -1784,8 +2079,8 @@ VLIB_NODE_FN (ip6_full_reassembly_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_full_reassembly_handoff_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL,
+ false /* is_local */);
}
VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = {
@@ -1802,14 +2097,34 @@ VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = {
},
};
+VLIB_NODE_FN (ip6_local_full_reassembly_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL,
+ true /* is_feature */);
+}
+
+VLIB_REGISTER_NODE (ip6_local_full_reassembly_handoff_node) = {
+ .name = "ip6-local-full-reassembly-handoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
+ .error_strings = ip6_full_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_full_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
VLIB_NODE_FN (ip6_full_reassembly_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
- return ip6_full_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, FEATURE,
+ false /* is_local */);
}
-
VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = {
.name = "ip6-full-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1824,6 +2139,27 @@ VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = {
},
};
+VLIB_NODE_FN (ip6_full_reassembly_custom_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, CUSTOM,
+ false /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip6_full_reassembly_custom_handoff_node) = {
+ .name = "ip6-full-reass-custom-hoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
+ .error_strings = ip6_full_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_full_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
#ifndef CLIB_MARCH_VARIANT
int
ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
@@ -1849,8 +2185,37 @@ ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
"ip6-full-reassembly-feature",
sw_if_index, 0, 0, 0);
}
- return -1;
+ return 0;
+}
+
+void
+ip6_local_full_reass_enable_disable (int enable)
+{
+ if (enable)
+ {
+ if (!ip6_full_reass_main.is_local_reass_enabled)
+ {
+ ip6_full_reass_main.is_local_reass_enabled = 1;
+ ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
+ ip6_local_full_reass_node.index);
+ }
+ }
+ else
+ {
+ if (ip6_full_reass_main.is_local_reass_enabled)
+ {
+ ip6_full_reass_main.is_local_reass_enabled = 0;
+ ip6_unregister_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION);
+ }
+ }
+}
+
+int
+ip6_local_full_reass_enabled ()
+{
+ return ip6_full_reass_main.is_local_reass_enabled;
}
+
#endif
/*
diff --git a/src/vnet/ip/reass/ip6_full_reass.h b/src/vnet/ip/reass/ip6_full_reass.h
index 546075b04b4..f66cb67d796 100644
--- a/src/vnet/ip/reass/ip6_full_reass.h
+++ b/src/vnet/ip/reass/ip6_full_reass.h
@@ -46,6 +46,8 @@ vnet_api_error_t ip6_full_reass_enable_disable (u32 sw_if_index,
int ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
+void ip6_local_full_reass_enable_disable (int enable);
+int ip6_local_full_reass_enabled ();
#endif /* __included_ip6_full_reass_h */
/*
diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c
index 28941311f50..fe2ed05555c 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.c
+++ b/src/vnet/ip/reass/ip6_sv_reass.c
@@ -26,6 +26,7 @@
#include <vnet/ip/ip6_to_ip4.h>
#include <vppinfra/bihash_48_8.h>
#include <vnet/ip/reass/ip6_sv_reass.h>
+#include <vnet/ip/ip6_inlines.h>
#define MSEC_PER_SEC 1000
#define IP6_SV_REASS_TIMEOUT_DEFAULT_MS 100
@@ -40,6 +41,7 @@ typedef enum
IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS,
IP6_SV_REASS_RC_INTERNAL_ERROR,
IP6_SV_REASS_RC_UNSUPP_IP_PROTO,
+ IP6_SV_REASS_RC_INVALID_FRAG_LEN,
} ip6_sv_reass_rc_t;
typedef struct
@@ -50,7 +52,7 @@ typedef struct
{
ip6_address_t src;
ip6_address_t dst;
- u32 xx_id;
+ u32 fib_index;
u32 frag_id;
u8 unused[7];
u8 proto;
@@ -148,6 +150,7 @@ typedef struct
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
@@ -214,7 +217,7 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args)
clib_net_to_host_u16 (t->l4_dst_port));
break;
case REASS_PASSTHROUGH:
- s = format (s, "[not-fragmented]");
+ s = format (s, "[not fragmented or atomic fragment]");
break;
}
return s;
@@ -309,6 +312,8 @@ ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm,
ip6_sv_reass_t *reass = NULL;
f64 now = vlib_time_now (vm);
+again:
+
if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.thread_index)
@@ -368,10 +373,14 @@ ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm,
kv->v.thread_index = vm->thread_index;
reass->last_heard = now;
- if (clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 1))
+ int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
{
ip6_sv_reass_free (vm, rm, rt, reass);
reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
}
return reass;
@@ -399,6 +408,10 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 fragment_length =
vlib_buffer_length_in_chain (vm, fb) -
(fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
+ if (0 == fragment_length)
+ {
+ return IP6_SV_REASS_RC_INVALID_FRAG_LEN;
+ }
u32 fragment_last = fvnb->ip.reass.fragment_last =
fragment_first + fragment_length - 1;
fvnb->ip.reass.range_first = fragment_first;
@@ -440,22 +453,18 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
}
always_inline bool
-ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t * node,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_ext_hdr_chain_t *hc)
{
- ip6_ext_header_t *tmp = (ip6_ext_header_t *) frag_hdr;
- while (ip6_ext_hdr (tmp->next_hdr))
+ int nh = hc->eh[hc->length - 1].protocol;
+ /* Checking to see if it's a terminating header */
+ if (ip6_ext_hdr (nh))
{
- tmp = ip6_ext_next_header (tmp);
- }
- if (IP_PROTOCOL_IP6_NONXT == tmp->next_hdr)
- {
- icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
- ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain,
- 0);
+ icmp6_error_set_vnet_buffer (
+ b, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
-
return false;
}
return true;
@@ -505,14 +514,18 @@ ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
}
always_inline uword
-ip6_sv_reassembly_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool custom_next, bool custom_context)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u32 *context;
+ if (custom_context)
+ context = vlib_frame_aux_args (frame);
+
clib_spinlock_lock (&rt->lock);
n_left_from = frame->n_vectors;
@@ -520,7 +533,11 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
while (n_left_from > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ if (custom_context)
+ vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
+ to_next_aux, n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -528,23 +545,31 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
vlib_buffer_t *b0;
u32 next0 = IP6_SV_REASSEMBLY_NEXT_DROP;
u32 error0 = IP6_ERROR_NONE;
-
+ u8 forward_context = 0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
ip6_header_t *ip0 = vlib_buffer_get_current (b0);
- ip6_frag_hdr_t *frag_hdr = NULL;
- ip6_ext_header_t *prev_hdr;
- if (ip6_ext_hdr (ip0->protocol))
+ ip6_frag_hdr_t *frag_hdr;
+ ip6_ext_hdr_chain_t hdr_chain;
+ bool is_atomic_fragment = false;
+
+ int res = ip6_ext_header_walk (
+ b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+ if (res >= 0 &&
+ hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
{
frag_hdr =
- ip6_ext_header_find (vm, b0, ip0,
- IP_PROTOCOL_IPV6_FRAGMENTATION,
- &prev_hdr);
+ ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
+ is_atomic_fragment = (0 == ip6_frag_hdr_offset (frag_hdr) &&
+ !ip6_frag_hdr_more (frag_hdr));
}
- if (!frag_hdr)
+
+ if (res < 0 ||
+ hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION ||
+ is_atomic_fragment)
{
- // this is a regular packet - no fragmentation
+ // this is a regular unfragmented packet or an atomic fragment
if (!ip6_get_port
(vm, b0, ip0, b0->current_length,
&(vnet_buffer (b0)->ip.reass.ip_proto),
@@ -560,7 +585,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
goto packet_enqueue;
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
- next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_sv_reass_add_trace (
@@ -571,13 +597,15 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
}
goto packet_enqueue;
}
+
vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
- (u8 *) frag_hdr - (u8 *) ip0;
+ hdr_chain.eh[res].offset;
+
if (0 == ip6_frag_hdr_offset (frag_hdr))
{
// first fragment - verify upper-layer is present
- if (!ip6_sv_reass_verify_upper_layer_present
- (node, b0, frag_hdr))
+ if (!ip6_sv_reass_verify_upper_layer_present (node, b0,
+ &hdr_chain))
{
next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR;
goto packet_enqueue;
@@ -597,10 +625,15 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
kv.k.as_u64[1] = ip0->src_address.as_u64[1];
kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
- kv.k.as_u64[4] =
- ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
- (u64) frag_hdr->identification;
+ if (custom_context)
+ kv.k.as_u64[4] =
+ (u64) *context << 32 | (u64) frag_hdr->identification;
+ else
+ kv.k.as_u64[4] =
+ ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]))
+ << 32 |
+ (u64) frag_hdr->identification;
kv.k.as_u64[5] = ip0->protocol;
ip6_sv_reass_t *reass =
@@ -611,6 +644,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
+ if (custom_context)
+ forward_context = 1;
goto packet_enqueue;
}
@@ -635,7 +670,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
reass->tcp_seq_number;
vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
- next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_sv_reass_add_trace (
@@ -645,31 +681,30 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
goto packet_enqueue;
}
+ u32 counter = ~0;
switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr))
{
case IP6_SV_REASS_RC_OK:
/* nothing to do here */
break;
case IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip6_sv_reass_free (vm, rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
break;
case IP6_SV_REASS_RC_UNSUPP_IP_PROTO:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_UNSUPP_IP_PROTO,
- 1);
- ip6_sv_reass_free (vm, rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_UNSUPP_IP_PROTO;
break;
case IP6_SV_REASS_RC_INTERNAL_ERROR:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_INTERNAL_ERROR, 1);
+ counter = IP6_ERROR_REASS_INTERNAL_ERROR;
+ break;
+ case IP6_SV_REASS_RC_INVALID_FRAG_LEN:
+ counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
+ break;
+ }
+ if (~0 != counter)
+ {
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
ip6_sv_reass_free (vm, rm, rt, reass);
goto next_packet;
- break;
}
if (reass->is_complete)
@@ -717,7 +752,8 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
to_next, n_left_to_next, bi0,
next0);
}
- _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
+ vec_set_len (reass->cached_buffers,
+ 0); // buffers are owned by frame now
}
goto next_packet;
@@ -730,11 +766,25 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
+ if (custom_context && forward_context)
+ {
+ if (to_next_aux)
+ {
+ to_next_aux[0] = *context;
+ to_next_aux += 1;
+ }
+ vlib_validate_buffer_enqueue_with_aux_x1 (
+ vm, node, next_index, to_next, to_next_aux, n_left_to_next,
+ bi0, *context, next0);
+ }
+ else
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
next_packet:
from += 1;
+ if (custom_context)
+ context += 1;
n_left_from -= 1;
}
@@ -745,26 +795,21 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
return frame->n_vectors;
}
-static char *ip6_sv_reassembly_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip6_error
-#undef _
-};
-
VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */ );
+ return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* custom next */,
+ false /* custom context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
.name = "ip6-sv-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip6_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
- .error_strings = ip6_sv_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -774,22 +819,22 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
[IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-handoff",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */,
+ false /* custom next */,
+ false /* custom context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
.name = "ip6-sv-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip6_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
- .error_strings = ip6_sv_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -799,16 +844,38 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
[IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = {
.arc_name = "ip6-unicast",
.node_name = "ip6-sv-reassembly-feature",
.runs_before = VNET_FEATURES ("ip6-lookup"),
.runs_after = 0,
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_sv_reass_custom_context_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
+ true /* custom next */,
+ true /* custom context */);
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reass_custom_context_node) = {
+ .name = "ip6-sv-reassembly-custom-context",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .format_trace = format_ip6_sv_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-custom-context-handoff",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
static u32
@@ -959,6 +1026,8 @@ ip6_sv_reass_init_function (vlib_main_t * vm)
rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0);
+ rm->fq_custom_context_index =
+ vlib_frame_queue_main_init (ip6_sv_reass_custom_context_node.index, 0);
rm->feature_use_refcount_per_intf = NULL;
@@ -1009,7 +1078,6 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
pool_foreach_index (index, rt->pool) {
reass = pool_elt_at_index (rt->pool, index);
if (now > reass->last_heard + rm->timeout)
@@ -1017,15 +1085,12 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
vec_add1 (pool_indexes_to_free, index);
}
}
- /* *INDENT-ON* */
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
ip6_sv_reass_free (vm, rm, rt, reass);
}
- /* *INDENT-ON* */
clib_spinlock_unlock (&rt->lock);
}
@@ -1033,33 +1098,31 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
vec_free (pool_indexes_to_free);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = {
- .function = ip6_sv_reass_walk_expired,
- .format_trace = format_ip6_sv_reass_trace,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip6-sv-reassembly-expire-walk",
-
- .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
- .error_strings = ip6_sv_reassembly_error_strings,
+ .function = ip6_sv_reass_walk_expired,
+ .format_trace = format_ip6_sv_reass_trace,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip6-sv-reassembly-expire-walk",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip6_sv_reass_key (u8 * s, va_list * args)
{
ip6_sv_reass_key_t *key = va_arg (*args, ip6_sv_reass_key_t *);
- s = format (s, "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
- key->xx_id, format_ip6_address, &key->src, format_ip6_address,
- &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
+ s =
+ format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
+ key->fib_index, format_ip6_address, &key->src, format_ip6_address,
+ &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
return s;
}
@@ -1116,11 +1179,9 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1146,13 +1207,11 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = {
.path = "show ip6-sv-reassembly",
.short_help = "show ip6-sv-reassembly [details]",
.function = show_ip6_sv_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1202,25 +1261,29 @@ format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip6_sv_reassembly_handoff_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool custom_context)
{
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
+ u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
u32 fq_index;
from = vlib_frame_vector_args (frame);
+ if (custom_context)
+ context = vlib_frame_aux_args (frame);
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
+ fq_index = (is_feature) ?
+ rm->fq_feature_index :
+ (custom_context ? rm->fq_custom_context_index : rm->fq_index);
while (n_left_from > 0)
{
@@ -1239,8 +1302,12 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t * vm,
ti += 1;
b += 1;
}
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
+ if (custom_context)
+ n_enq = vlib_buffer_enqueue_to_thread_with_aux (
+ vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
+ else
+ n_enq = vlib_buffer_enqueue_to_thread (
+ vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
vlib_node_increment_counter (vm, node->node_index,
@@ -1253,11 +1320,10 @@ VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_handoff_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, false /* is_feature */, false /* custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
.name = "ip6-sv-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1276,11 +1342,11 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
- return ip6_sv_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, true /* is_feature */, false /* custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
.name = "ip6-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1294,7 +1360,28 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_sv_reassembly_custom_context_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, false /* is_feature */, true /* custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reassembly_custom_context_handoff_node) = {
+ .name = "ip6-sv-reassembly-custom-context-handoff",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings),
+ .error_strings = ip6_sv_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_sv_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
int
@@ -1323,6 +1410,14 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
}
return 0;
}
+
+uword
+ip6_sv_reass_custom_context_register_next_node (uword node_index)
+{
+ return vlib_node_add_next (
+ vlib_get_main (), ip6_sv_reassembly_custom_context_handoff_node.index,
+ node_index);
+}
#endif
/*
diff --git a/src/vnet/ip/reass/ip6_sv_reass.h b/src/vnet/ip/reass/ip6_sv_reass.h
index 81ac2312bdf..7dc9df132dd 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.h
+++ b/src/vnet/ip/reass/ip6_sv_reass.h
@@ -44,6 +44,7 @@ vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index,
u8 enable_disable);
int ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable);
+uword ip6_sv_reass_custom_context_register_next_node (uword node_index);
#endif /* __included_ip6_sv_reass_h */
diff --git a/src/vnet/ip/reass/reassembly.rst b/src/vnet/ip/reass/reassembly.rst
new file mode 100644
index 00000000000..49e0a8de6e6
--- /dev/null
+++ b/src/vnet/ip/reass/reassembly.rst
@@ -0,0 +1,221 @@
+.. _reassembly:
+
+IP Reassembly
+=============
+
+Some VPP functions need access to whole packet and/or stream
+classification based on L4 headers. Reassembly functionality allows
+both former and latter.
+
+Full reassembly vs shallow (virtual) reassembly
+-----------------------------------------------
+
+There are two kinds of reassembly available in VPP:
+
+1. Full reassembly changes a stream of packet fragments into one
+packet containing all data reassembled with fragment bits cleared
+and fragment header stripped (in case of ip6). Note that resulting
+packet may come out of reassembly as a buffer chain. Because it's
+impractical to parse headers which are split over multiple vnet
+buffers, vnet_buffer_chain_linearize() is called after reassembly so
+that L2/L3/L4 headers can be found in first buffer. Full reassembly
+is costly and shouldn't be used unless necessary. Full reassembly is by
+default enabled for both ipv4 and ipv6 "for us" traffic
+- that is packets aimed at VPP addresses. This can be disabled via API
+if desired, in which case "for us" fragments are dropped.
+
+2. Shallow (virtual) reassembly allows various classifying and/or
+translating features to work with fragments without having to
+understand fragmentation. It works by extracting L4 data and adding
+them to vnet_buffer for each packet/fragment passing through SVR
+nodes. This operation is performed for both fragments and regular
+packets, allowing consuming code to treat all packets in same way. SVR
+caches incoming packet fragments (buffers) until first fragment is
+seen. Then it extracts L4 data from that first fragment, fills it for
+any cached fragments and transmits them in the same order as they were
+received. From that point on, any other passing fragments get L4 data
+populated in vnet_buffer based on reassembly context.
+
+Multi-worker behaviour
+^^^^^^^^^^^^^^^^^^^^^^
+
+Both reassembly types deal with fragments arriving on different workers
+via handoff mechanism. All reassembly contexts are stored in pools.
+Bihash mapping 5-tuple key to a value containing pool index and thread
+index is used for lookups. When a lookup finds an existing reassembly on
+a different thread, it hands off the fragment to that thread. If lookup
+fails, a new reassembly context is created and current worker becomes
+owner of that context. Further fragments received on other worker
+threads are then handed off owner worker thread.
+
+Full reassembly also remembers thread index where first fragment (as in
+fragment with fragment offset 0) was seen and uses handoff mechanism to
+send the reassembled packet out on that thread even if pool owner is
+a different thread. This then requires an additional handoff to free
+reassembly context as only pool owner can do that in a thread-safe way.
+
+Limits
+^^^^^^
+
+Because reassembly could be an attack vector, there is a configurable
+limit on the number of concurrent reassemblies and also maximum
+fragments per packet.
+
+Custom applications
+^^^^^^^^^^^^^^^^^^^
+
+Both reassembly features allow to be used by custom application which
+are not part of VPP source tree. Be it patches or 3rd party plugins,
+they can build their own graph paths by using "-custom*" versions of
+nodes. Reassembly then reads next_index and error_next_index for each
+buffer from vnet_buffer, allowing custom application to steer
+both reassembled packets and any packets which are considered an error
+in a way the custom application requires.
+
+Full reassembly
+---------------
+
+Configuration
+^^^^^^^^^^^^^
+
+Configuration is via API (``ip_reassembly_enable_disable``) or CLI:
+
+``set interface reassembly <interface-name> [on|off|ip4|ip6]``
+
+here ``on`` means both ip4 and ip6.
+
+A show command is provided to see reassembly contexts:
+
+For ip4:
+
+``show ip4-full-reassembly [details]``
+
+For ip6:
+
+``show ip6-full-reassembly [details]``
+
+Global full reassembly parameters can be modified using API
+``ip_reassembly_set`` and retrieved using ``ip_reassembly_get``.
+
+Defaults
+""""""""
+
+For defaults values, see #defines in
+
+`ip4_full_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip4_full_reass.c>`_
+
+========================================= ==========================================
+#define description
+----------------------------------------- ------------------------------------------
+IP4_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP4_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+========================================= ==========================================
+
+and
+
+`ip6_full_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip6_full_reass.c>`_
+
+========================================= ==========================================
+#define description
+----------------------------------------- ------------------------------------------
+IP6_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP6_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP6_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP6_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+========================================= ==========================================
+
+Finished/expired contexts
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Reassembly contexts are freed either when reassembly is finished - when
+all data has been received or in case of timeout. There is a process
+walking all reassemblies, freeing any expired ones.
+
+Shallow (virtual) reassembly
+----------------------------
+
+Configuration
+^^^^^^^^^^^^^
+
+Configuration is via API (``ip_reassembly_enable_disable``) only as
+there is no value in turning SVR on by hand without a feature consuming
+buffer metadata. SVR is designed to be turned on by a feature requiring
+it in a programmatic way.
+
+A show command is provided to see reassembly contexts:
+
+For ip4:
+
+``show ip4-sv-reassembly [details]``
+
+For ip6:
+
+``show ip6-sv-reassembly [details]``
+
+Global shallow reassembly parameters can be modified using API
+``ip_reassembly_set`` and retrieved using ``ip_reassembly_get``.
+
+Defaults
+""""""""
+
+For defaults values, see #defines in
+
+`ip4_sv_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip4_sv_reass.c>`_
+
+============================================ ==========================================
+#define description
+-------------------------------------------- ------------------------------------------
+IP4_SV_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+============================================ ==========================================
+
+and
+
+`ip6_sv_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip6_sv_reass.c>`_
+
+============================================ ==========================================
+#define description
+-------------------------------------------- ------------------------------------------
+IP6_SV_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP6_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+============================================ ==========================================
+
+Expiring contexts
+^^^^^^^^^^^^^^^^^
+
+There is no way of knowing when a reassembly is finished without
+performing (an almost) full reassembly, so contexts in SVR cannot be
+freed in the same way as in full reassembly. Instead a different
+approach is taken. Least recently used (LRU) list is maintained where
+reassembly contexts are ordered based on last update. The oldest
+context is then freed whenever SVR hits limit on number of concurrent
+reassembly contexts. There is also a process reaping expired sessions
+similar as in full reassembly.
+
+Truncated packets
+^^^^^^^^^^^^^^^^^
+
+When SVR detects that a packet has been truncated in a way where L4
+headers are not available, it will mark it as such in vnet_buffer,
+allowing downstream features to handle such packets as they deem fit.
+
+Fast path/slow path
+^^^^^^^^^^^^^^^^^^^
+
+SVR runs is implemented fast path/slow path way. By default, it assumes
+that any passing traffic doesn't contain fragments, processing buffers
+in a dual-loop. If it sees a fragment, it then jumps to single-loop
+processing.
+
+Feature enabled by other features/reference counting
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+SVR feature is enabled by some other features, like NAT, when those
+features are enabled. For this to work, it implements a reference
+counted API for enabling/disabling SVR.
diff --git a/src/vnet/ip/vtep.h b/src/vnet/ip/vtep.h
index 92e8002e55a..97e74429e88 100644
--- a/src/vnet/ip/vtep.h
+++ b/src/vnet/ip/vtep.h
@@ -29,7 +29,6 @@
* processing and go directly to the tunnel protocol handler node.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
union {
@@ -40,7 +39,6 @@ typedef CLIB_PACKED
u64 as_u64;
};
}) vtep4_key_t;
-/* *INDENT-ON* */
/**
* @brief Tunnel endpoint key (IPv6)
@@ -51,13 +49,11 @@ typedef CLIB_PACKED
* processing and go directly to the tunnel protocol handler node.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
ip6_address_t addr;
u32 fib_index;
}) vtep6_key_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -111,13 +107,13 @@ vtep4_check (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40,
return VTEP_CHECK_PASS;
}
-#ifdef CLIB_HAVE_VEC512
typedef struct
{
vtep4_key_t vtep4_cache[8];
int idx;
} vtep4_cache_t;
+#ifdef CLIB_HAVE_VEC512
always_inline u8
vtep4_check_vector (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40,
vtep4_key_t * last_k4, vtep4_cache_t * vtep4_u512)