aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/ip
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/ip')
-rw-r--r--src/vnet/ip/icmp6.c2
-rw-r--r--src/vnet/ip/ip.api17
-rw-r--r--src/vnet/ip/ip.h2
-rw-r--r--src/vnet/ip/ip4_to_ip6.h15
-rw-r--r--src/vnet/ip/ip6.h5
-rw-r--r--src/vnet/ip/ip6_forward.c147
-rw-r--r--src/vnet/ip/ip6_input.h66
-rw-r--r--src/vnet/ip/ip6_ll_table.c78
-rw-r--r--src/vnet/ip/ip6_to_ip4.h24
-rw-r--r--src/vnet/ip/ip_api.c69
-rw-r--r--src/vnet/ip/ip_sas.c2
-rw-r--r--src/vnet/ip/ip_test.c54
-rw-r--r--src/vnet/ip/lookup.c11
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.c1039
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.h28
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.c827
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.h31
17 files changed, 1549 insertions, 868 deletions
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
index b095f679cc8..f93ebce4bf1 100644
--- a/src/vnet/ip/icmp6.c
+++ b/src/vnet/ip/icmp6.c
@@ -338,7 +338,7 @@ ip6_icmp_error (vlib_main_t * vm,
if (throttle_check (&icmp_throttle, thread_index, r0, seed))
{
- vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1);
+ vlib_error_count (vm, node->node_index, ICMP6_ERROR_DROP, 1);
from += 1;
n_left_from -= 1;
continue;
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index 967f56cf917..fc7d7582dec 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -57,6 +57,23 @@ autoreply define ip_table_add_del
vl_api_ip_table_t table;
};
+/** \brief Add / del table request - version 2
+ A table can be added multiple times, but need be deleted only once.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table - the FIB table to add or del
+ @param create_mfib - whether to create mfib or not
+ @param is_add - add or del
+*/
+autoreply define ip_table_add_del_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ip_table_t table;
+ bool create_mfib [default=true];
+ bool is_add [default=true];
+};
+
/** \brief Allocate an unused table
A table can be added multiple times.
If a large number of tables are in use (millions), this API might
diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
index 9ebefa0cf5d..084243dccfa 100644
--- a/src/vnet/ip/ip.h
+++ b/src/vnet/ip/ip.h
@@ -262,7 +262,7 @@ extern vlib_node_registration_t ip4_inacl_node;
extern vlib_node_registration_t ip6_inacl_node;
void ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api,
- const u8 * name);
+ u8 create_mfib, const u8 *name);
void ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api);
diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h
index 57c2b6ff78b..d356fd5411c 100644
--- a/src/vnet/ip/ip4_to_ip6.h
+++ b/src/vnet/ip/ip4_to_ip6.h
@@ -46,10 +46,9 @@ static u8 icmp_to_icmp6_updater_pointer_table[] =
* @returns Port number on success, 0 otherwise.
*/
always_inline u16
-ip4_get_port (ip4_header_t * ip, u8 sender)
+ip4_get_port (ip4_header_t *ip, u8 sender)
{
- if (ip->ip_version_and_header_length != 0x45 ||
- ip4_get_fragment_offset (ip))
+ if (ip->ip_version_and_header_length != 0x45 || ip4_get_fragment_offset (ip))
return 0;
if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
@@ -65,7 +64,15 @@ ip4_get_port (ip4_header_t * ip, u8 sender)
{
return *((u16 *) (icmp + 1));
}
- else if (clib_net_to_host_u16 (ip->length) >= 64)
+ /*
+ * Minimum length here consists of:
+ * - outer IP header length
+ * - outer ICMP header length (2*sizeof (icmp46_header_t))
+ * - inner IP header length
+ * - first 8 bytes of payload of original packet in case of ICMP error
+ */
+ else if (clib_net_to_host_u16 (ip->length) >=
+ 2 * sizeof (ip4_header_t) + 2 * sizeof (icmp46_header_t) + 8)
{
ip = (ip4_header_t *) (icmp + 2);
if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index 56eec523d5b..f8462a5cbff 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -68,6 +68,11 @@ typedef struct
/* Index into FIB vector. */
u32 index;
+
+ /**
+ * The hash table DB
+ */
+ uword *fib_entry_by_dst_address;
} ip6_fib_t;
typedef struct ip6_mfib_t
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 48fb633fd32..31adc90ecab 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -51,6 +51,7 @@
#include <vnet/dpo/receive_dpo.h>
#include <vnet/dpo/classify_dpo.h>
#include <vnet/classify/vnet_classify.h>
+#include <vnet/adj/adj_dp.h>
#include <vnet/pg/pg.h>
#ifndef CLIB_MARCH_VARIANT
@@ -1897,18 +1898,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
- if (do_counters)
- {
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index, adj_index0, 1,
- vlib_buffer_length_in_chain (vm, p0) + rw_len0);
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index, adj_index1, 1,
- vlib_buffer_length_in_chain (vm, p1) + rw_len1);
- }
-
/* Check MTU of outgoing interface. */
u16 ip0_len =
clib_net_to_host_u16 (ip0->payload_length) +
@@ -1933,16 +1922,15 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
* wants to see the IP header */
if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
{
- p0->current_data -= rw_len0;
- p0->current_length += rw_len0;
+ vlib_buffer_advance (p0, -(word) rw_len0);
tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
next0 = adj0[0].rewrite_header.next_index;
if (PREDICT_FALSE
(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
- vnet_feature_arc_start_w_cfg_index
- (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
- adj0->ia_cfg_index);
+ vnet_feature_arc_start_w_cfg_index (
+ lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
+ adj0->ia_cfg_index);
}
else
{
@@ -1950,18 +1938,16 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
}
if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
{
- p1->current_data -= rw_len1;
- p1->current_length += rw_len1;
-
+ vlib_buffer_advance (p1, -(word) rw_len1);
tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
next1 = adj1[0].rewrite_header.next_index;
if (PREDICT_FALSE
(adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
- vnet_feature_arc_start_w_cfg_index
- (lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1,
- adj1->ia_cfg_index);
+ vnet_feature_arc_start_w_cfg_index (
+ lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1,
+ adj1->ia_cfg_index);
}
else
{
@@ -1969,40 +1955,46 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
}
if (is_midchain)
- {
- /* Guess we are only writing on ipv6 header. */
- vnet_rewrite_two_headers (adj0[0], adj1[0],
- ip0, ip1, sizeof (ip6_header_t));
- }
+ /* Guess we are only writing on ipv6 header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0], ip0, ip1,
+ sizeof (ip6_header_t));
else
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_two_headers (adj0[0], adj1[0],
ip0, ip1, sizeof (ethernet_header_t));
+ if (do_counters)
+ {
+ if (error0 == IP6_ERROR_NONE)
+ vlib_increment_combined_counter (
+ &adjacency_counters, thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ if (error1 == IP6_ERROR_NONE)
+ vlib_increment_combined_counter (
+ &adjacency_counters, thread_index, adj_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1) + rw_len1);
+ }
+
if (is_midchain)
{
- if (adj0->sub_type.midchain.fixup_func)
- adj0->sub_type.midchain.fixup_func
- (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
- if (adj1->sub_type.midchain.fixup_func)
- adj1->sub_type.midchain.fixup_func
- (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
+ if (error0 == IP6_ERROR_NONE)
+ adj_midchain_fixup (vm, adj0, p0, VNET_LINK_IP6);
+ if (error1 == IP6_ERROR_NONE)
+ adj_midchain_fixup (vm, adj1, p1, VNET_LINK_IP6);
}
if (is_mcast)
{
/*
* copy bytes from the IP address into the MAC rewrite
*/
- vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
- adj0->
- rewrite_header.dst_mcast_offset,
- &ip0->dst_address.as_u32[3],
- (u8 *) ip0);
- vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
- adj1->
- rewrite_header.dst_mcast_offset,
- &ip1->dst_address.as_u32[3],
- (u8 *) ip1);
+ if (error0 == IP6_ERROR_NONE)
+ vnet_ip_mcast_fixup_header (
+ IP6_MCAST_ADDR_MASK, adj0->rewrite_header.dst_mcast_offset,
+ &ip0->dst_address.as_u32[3], (u8 *) ip0);
+ if (error1 == IP6_ERROR_NONE)
+ vnet_ip_mcast_fixup_header (
+ IP6_MCAST_ADDR_MASK, adj1->rewrite_header.dst_mcast_offset,
+ &ip1->dst_address.as_u32[3], (u8 *) ip1);
}
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
@@ -2061,28 +2053,10 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
}
}
- if (is_midchain)
- {
- /* Guess we are only writing on ip6 header. */
- vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
- }
- else
- /* Guess we are only writing on simple Ethernet header. */
- vnet_rewrite_one_header (adj0[0], ip0,
- sizeof (ethernet_header_t));
-
/* Update packet buffer attributes/set output interface. */
rw_len0 = adj0[0].rewrite_header.data_bytes;
vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
- if (do_counters)
- {
- vlib_increment_combined_counter
- (&adjacency_counters,
- thread_index, adj_index0, 1,
- vlib_buffer_length_in_chain (vm, p0) + rw_len0);
- }
-
/* Check MTU of outgoing interface. */
u16 ip0_len =
clib_net_to_host_u16 (ip0->payload_length) +
@@ -2098,9 +2072,7 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
* wants to see the IP header */
if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
{
- p0->current_data -= rw_len0;
- p0->current_length += rw_len0;
-
+ vlib_buffer_advance (p0, -(word) rw_len0);
tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
@@ -2108,30 +2080,37 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
if (PREDICT_FALSE
(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
- vnet_feature_arc_start_w_cfg_index
- (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
- adj0->ia_cfg_index);
+ vnet_feature_arc_start_w_cfg_index (
+ lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0,
+ adj0->ia_cfg_index);
+
+ if (is_midchain)
+ /* Guess we are only writing on ip6 header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
+ else
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0,
+ sizeof (ethernet_header_t));
+
+ if (do_counters)
+ {
+ vlib_increment_combined_counter (
+ &adjacency_counters, thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ }
+
+ if (is_midchain && adj0->sub_type.midchain.fixup_func)
+ adj_midchain_fixup (vm, adj0, p0, VNET_LINK_IP6);
+ if (is_mcast)
+ vnet_ip_mcast_fixup_header (
+ IP6_MCAST_ADDR_MASK, adj0->rewrite_header.dst_mcast_offset,
+ &ip0->dst_address.as_u32[3], (u8 *) ip0);
}
else
{
p0->error = error_node->errors[error0];
}
- if (is_midchain)
- {
- if (adj0->sub_type.midchain.fixup_func)
- adj0->sub_type.midchain.fixup_func
- (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
- }
- if (is_mcast)
- {
- vnet_ip_mcast_fixup_header (IP6_MCAST_ADDR_MASK,
- adj0->
- rewrite_header.dst_mcast_offset,
- &ip0->dst_address.as_u32[3],
- (u8 *) ip0);
- }
-
from += 1;
n_left_from -= 1;
to_next += 1;
@@ -2215,7 +2194,7 @@ VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
VLIB_REGISTER_NODE (ip6_midchain_node) = {
.name = "ip6-midchain",
.vector_size = sizeof (u32),
- .format_trace = format_ip6_forward_next_trace,
+ .format_trace = format_ip6_rewrite_trace,
.sibling_of = "ip6-rewrite",
};
diff --git a/src/vnet/ip/ip6_input.h b/src/vnet/ip/ip6_input.h
index 49e37ec1808..25eae62723d 100644
--- a/src/vnet/ip/ip6_input.h
+++ b/src/vnet/ip/ip6_input.h
@@ -53,11 +53,9 @@ typedef enum
} ip6_input_next_t;
always_inline void
-ip6_input_check_x2 (vlib_main_t * vm,
- vlib_node_runtime_t * error_node,
- vlib_buffer_t * p0, vlib_buffer_t * p1,
- ip6_header_t * ip0, ip6_header_t * ip1,
- u32 * next0, u32 * next1)
+ip6_input_check_x2 (vlib_main_t *vm, vlib_node_runtime_t *error_node,
+ vlib_buffer_t *p0, vlib_buffer_t *p1, ip6_header_t *ip0,
+ ip6_header_t *ip1, u32 *next0, u32 *next1)
{
u8 error0, error1;
@@ -65,13 +63,15 @@ ip6_input_check_x2 (vlib_main_t * vm,
/* Version != 6? Drop it. */
error0 =
- (clib_net_to_host_u32
- (ip0->ip_version_traffic_class_and_flow_label) >> 28) !=
- 6 ? IP6_ERROR_VERSION : error0;
+ (clib_net_to_host_u32 (ip0->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6 ?
+ IP6_ERROR_VERSION :
+ error0;
error1 =
- (clib_net_to_host_u32
- (ip1->ip_version_traffic_class_and_flow_label) >> 28) !=
- 6 ? IP6_ERROR_VERSION : error1;
+ (clib_net_to_host_u32 (ip1->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6 ?
+ IP6_ERROR_VERSION :
+ error1;
/* hop limit < 1? Drop it. for link-local broadcast packets,
* like dhcpv6 packets from client has hop-limit 1, which should not
@@ -81,18 +81,18 @@ ip6_input_check_x2 (vlib_main_t * vm,
error1 = ip1->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error1;
/* L2 length must be at least minimal IP header. */
- error0 =
- p0->current_length < sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
- error1 =
- p1->current_length < sizeof (ip1[0]) ? IP6_ERROR_TOO_SHORT : error1;
+ error0 = p0->current_length < sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+ error1 = p1->current_length < sizeof (ip1[0]) ? IP6_ERROR_TOO_SHORT : error1;
if (PREDICT_FALSE (error0 != IP6_ERROR_NONE))
{
+ p0->error = error_node->errors[error0];
+
if (error0 == IP6_ERROR_TIME_EXPIRED)
{
- icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
- ICMP6_time_exceeded_ttl_exceeded_in_transit,
- 0);
+ icmp6_error_set_vnet_buffer (
+ p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
*next0 = IP6_INPUT_NEXT_ICMP_ERROR;
}
else
@@ -102,11 +102,13 @@ ip6_input_check_x2 (vlib_main_t * vm,
}
if (PREDICT_FALSE (error1 != IP6_ERROR_NONE))
{
+ p1->error = error_node->errors[error1];
+
if (error1 == IP6_ERROR_TIME_EXPIRED)
{
- icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
- ICMP6_time_exceeded_ttl_exceeded_in_transit,
- 0);
+ icmp6_error_set_vnet_buffer (
+ p1, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
*next1 = IP6_INPUT_NEXT_ICMP_ERROR;
}
else
@@ -117,9 +119,8 @@ ip6_input_check_x2 (vlib_main_t * vm,
}
always_inline void
-ip6_input_check_x1 (vlib_main_t * vm,
- vlib_node_runtime_t * error_node,
- vlib_buffer_t * p0, ip6_header_t * ip0, u32 * next0)
+ip6_input_check_x1 (vlib_main_t *vm, vlib_node_runtime_t *error_node,
+ vlib_buffer_t *p0, ip6_header_t *ip0, u32 *next0)
{
u8 error0;
@@ -127,9 +128,10 @@ ip6_input_check_x1 (vlib_main_t * vm,
/* Version != 6? Drop it. */
error0 =
- (clib_net_to_host_u32
- (ip0->ip_version_traffic_class_and_flow_label) >> 28) !=
- 6 ? IP6_ERROR_VERSION : error0;
+ (clib_net_to_host_u32 (ip0->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6 ?
+ IP6_ERROR_VERSION :
+ error0;
/* hop limit < 1? Drop it. for link-local broadcast packets,
* like dhcpv6 packets from client has hop-limit 1, which should not
@@ -138,16 +140,16 @@ ip6_input_check_x1 (vlib_main_t * vm,
error0 = ip0->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error0;
/* L2 length must be at least minimal IP header. */
- error0 =
- p0->current_length < sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+ error0 = p0->current_length < sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
if (PREDICT_FALSE (error0 != IP6_ERROR_NONE))
{
+ p0->error = error_node->errors[error0];
if (error0 == IP6_ERROR_TIME_EXPIRED)
{
- icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
- ICMP6_time_exceeded_ttl_exceeded_in_transit,
- 0);
+ icmp6_error_set_vnet_buffer (
+ p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit, 0);
*next0 = IP6_INPUT_NEXT_ICMP_ERROR;
}
else
diff --git a/src/vnet/ip/ip6_ll_table.c b/src/vnet/ip/ip6_ll_table.c
index f9172f6c50c..2234ea9df37 100644
--- a/src/vnet/ip/ip6_ll_table.c
+++ b/src/vnet/ip/ip6_ll_table.c
@@ -144,17 +144,20 @@ ip6_ll_table_entry_delete (const ip6_ll_prefix_t * ilp)
fib_node_index_t ip6_ll_entry_index;
u32 fib_index;
+ fib_index = ip6_ll_fib_get (ilp->ilp_sw_if_index);
+ if (~0 == fib_index)
+ return;
+
ip6_ll_entry_index = ip6_ll_table_lookup_exact_match (ilp);
+ if (FIB_NODE_INDEX_INVALID == ip6_ll_entry_index)
+ return;
- if (FIB_NODE_INDEX_INVALID != ip6_ll_entry_index)
- fib_table_entry_delete_index (ip6_ll_entry_index, FIB_SOURCE_IP6_ND);
+ fib_table_entry_delete_index (ip6_ll_entry_index, FIB_SOURCE_IP6_ND);
/*
* if there are no ND sourced prefixes left, then we can clean up this FIB
*/
- fib_index = ip6_ll_fib_get (ilp->ilp_sw_if_index);
- if (~0 != fib_index &&
- 0 == fib_table_get_num_entries (fib_index, FIB_PROTOCOL_IP6,
+ if (0 == fib_table_get_num_entries (fib_index, FIB_PROTOCOL_IP6,
FIB_SOURCE_IP6_ND))
{
fib_table_unlock (fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_IP6_ND);
@@ -208,33 +211,10 @@ ip6_ll_table_show_all (vlib_main_t * vm, u32 fib_index)
vec_free (ctx.entries);
}
-typedef struct
-{
- u32 fib_index;
- u64 count_by_prefix_length[129];
-} count_routes_in_fib_at_prefix_length_arg_t;
-
-static int
-count_routes_in_fib_at_prefix_length (clib_bihash_kv_24_8_t * kvp, void *arg)
-{
- count_routes_in_fib_at_prefix_length_arg_t *ap = arg;
- int mask_width;
-
- if ((kvp->key[2] >> 32) != ap->fib_index)
- return (BIHASH_WALK_CONTINUE);
-
- mask_width = kvp->key[2] & 0xFF;
-
- ap->count_by_prefix_length[mask_width]++;
-
- return (BIHASH_WALK_CONTINUE);
-}
-
static clib_error_t *
ip6_ll_show_fib (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca;
fib_table_t *fib_table;
int verbose, matching;
ip6_address_t matching_address;
@@ -272,9 +252,6 @@ ip6_ll_show_fib (vlib_main_t * vm,
vec_foreach_index (sw_if_index, ip6_ll_table.ilt_fibs)
{
- fib_source_t source;
- u8 *s = NULL;
-
fib_index = ip6_ll_table.ilt_fibs[sw_if_index];
if (~0 == fib_index)
continue;
@@ -284,44 +261,9 @@ ip6_ll_show_fib (vlib_main_t * vm,
if (!(fib_table->ft_flags & FIB_TABLE_FLAG_IP6_LL))
continue;
- s = format (s, "%U, fib_index:%d, locks:[",
- format_fib_table_name, fib_index,
- FIB_PROTOCOL_IP6, fib_index);
- vec_foreach_index (source, fib_table->ft_locks)
- {
- if (0 != fib_table->ft_locks[source])
- {
- s = format (s, "%U:%d, ",
- format_fib_source, source, fib_table->ft_locks[source]);
- }
- }
- s = format (s, "]");
- vlib_cli_output (vm, "%v", s);
- vec_free (s);
-
- /* Show summary? */
+ ip6_fib_table_show (vm, fib_table, !verbose);
if (!verbose)
- {
- clib_bihash_24_8_t *h =
- &ip6_fib_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash;
- int len;
-
- vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
-
- clib_memset (ca, 0, sizeof (*ca));
- ca->fib_index = fib_index;
-
- clib_bihash_foreach_key_value_pair_24_8
- (h, count_routes_in_fib_at_prefix_length, ca);
-
- for (len = 128; len >= 0; len--)
- {
- if (ca->count_by_prefix_length[len])
- vlib_cli_output (vm, "%=20d%=16lld",
- len, ca->count_by_prefix_length[len]);
- }
- continue;
- }
+ continue;
if (!matching)
{
diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h
index 29d5718d4da..ebabcd0b797 100644
--- a/src/vnet/ip/ip6_to_ip4.h
+++ b/src/vnet/ip/ip6_to_ip4.h
@@ -96,10 +96,10 @@ ip6_parse (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, u32 buff_len,
* @returns 1 on success, 0 otherwise.
*/
always_inline u16
-ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6,
- u16 buffer_len, u8 * ip_protocol, u16 * src_port,
- u16 * dst_port, u8 * icmp_type_or_tcp_flags,
- u32 * tcp_ack_number, u32 * tcp_seq_number)
+ip6_get_port (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6,
+ u16 buffer_len, u8 *ip_protocol, u16 *src_port, u16 *dst_port,
+ u8 *icmp_type_or_tcp_flags, u32 *tcp_ack_number,
+ u32 *tcp_seq_number, void **l4_hdr)
{
u8 l4_protocol;
u16 l4_offset;
@@ -120,8 +120,19 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6,
*ip_protocol = l4_protocol;
}
l4 = u8_ptr_add (ip6, l4_offset);
+ if (l4_hdr)
+ *l4_hdr = l4;
if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP)
{
+ if ((IP_PROTOCOL_UDP == l4_protocol &&
+ u8_ptr_add (l4, sizeof (udp_header_t)) >
+ u8_ptr_add (vlib_buffer_get_current (b), b->current_length)) ||
+ (IP_PROTOCOL_TCP == l4_protocol &&
+ u8_ptr_add (l4, sizeof (tcp_header_t)) >
+ u8_ptr_add (vlib_buffer_get_current (b), b->current_length)))
+ {
+ return 0;
+ }
if (src_port)
*src_port = ((udp_header_t *) (l4))->src_port;
if (dst_port)
@@ -135,6 +146,11 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6,
}
else if (l4_protocol == IP_PROTOCOL_ICMP6)
{
+ if (u8_ptr_add (l4, sizeof (icmp46_header_t)) >
+ u8_ptr_add (vlib_buffer_get_current (b), b->current_length))
+ {
+ return 0;
+ }
icmp46_header_t *icmp = (icmp46_header_t *) (l4);
if (icmp_type_or_tcp_flags)
*icmp_type_or_tcp_flags = ((icmp46_header_t *) (l4))->type;
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index 644b4988abc..1f025fa1113 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -636,7 +636,8 @@ vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp)
if (mp->is_add)
{
- ip_table_create (fproto, table_id, 1, mp->table.name);
+ ip_table_create (fproto, table_id, 1 /* is_api */, 1 /* create_mfib */,
+ mp->table.name);
}
else
{
@@ -647,6 +648,28 @@ vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp)
}
void
+vl_api_ip_table_add_del_v2_t_handler (vl_api_ip_table_add_del_v2_t *mp)
+{
+ vl_api_ip_table_add_del_v2_reply_t *rmp;
+ fib_protocol_t fproto =
+ (mp->table.is_ip6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4);
+ u32 table_id = ntohl (mp->table.table_id);
+ int rv = 0;
+
+ if (mp->is_add)
+ {
+ ip_table_create (fproto, table_id, 1 /* is_api */, mp->create_mfib,
+ mp->table.name);
+ }
+ else
+ {
+ ip_table_delete (fproto, table_id, 1);
+ }
+
+ REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_V2_REPLY);
+}
+
+void
vl_api_ip_table_allocate_t_handler (vl_api_ip_table_allocate_t *mp)
{
vl_api_ip_table_allocate_reply_t *rmp;
@@ -661,7 +684,8 @@ vl_api_ip_table_allocate_t_handler (vl_api_ip_table_allocate_t *mp)
if (~0 == table_id)
rv = VNET_API_ERROR_EAGAIN;
else
- ip_table_create (fproto, table_id, 1, mp->table.name);
+ ip_table_create (fproto, table_id, 1 /* is_api */, 1 /* create_mfib */,
+ mp->table.name);
REPLY_MACRO2 (VL_API_IP_TABLE_ALLOCATE_REPLY, {
clib_memcpy_fast (&rmp->table, &mp->table, sizeof (mp->table));
@@ -915,8 +939,8 @@ vl_api_ip_route_lookup_v2_t_handler (vl_api_ip_route_lookup_v2_t *mp)
}
void
-ip_table_create (fib_protocol_t fproto,
- u32 table_id, u8 is_api, const u8 * name)
+ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api,
+ u8 create_mfib, const u8 *name)
{
u32 fib_index, mfib_index;
vnet_main_t *vnm = vnet_get_main ();
@@ -936,16 +960,23 @@ ip_table_create (fib_protocol_t fproto,
* their own unicast tables.
*/
fib_index = fib_table_find (fproto, table_id);
- mfib_index = mfib_table_find (fproto, table_id);
-
/*
* Always try to re-lock in case the fib was deleted by an API call
* but was not yet freed because some other locks were held
*/
fib_table_find_or_create_and_lock_w_name (
fproto, table_id, (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI), name);
- mfib_table_find_or_create_and_lock_w_name (
- fproto, table_id, (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI), name);
+
+ if (create_mfib)
+ {
+ /* same for mfib, if needs be */
+ mfib_index = mfib_table_find (fproto, table_id);
+ mfib_table_find_or_create_and_lock_w_name (
+ fproto, table_id, (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI),
+ name);
+ }
+ else
+ mfib_index = 0;
if ((~0 == fib_index) || (~0 == mfib_index))
call_elf_section_ip_table_callbacks (vnm, table_id, 1 /* is_add */ ,
@@ -1655,9 +1686,10 @@ vl_api_ip_table_replace_begin_t_handler (vl_api_ip_table_replace_begin_t * mp)
rv = VNET_API_ERROR_NO_SUCH_FIB;
else
{
+ u32 mfib_index = mfib_table_find (fproto, ntohl (mp->table.table_id));
fib_table_mark (fib_index, fproto, FIB_SOURCE_API);
- mfib_table_mark (mfib_table_find (fproto, ntohl (mp->table.table_id)),
- fproto, MFIB_SOURCE_API);
+ if (mfib_index != INDEX_INVALID)
+ mfib_table_mark (mfib_index, fproto, MFIB_SOURCE_API);
}
REPLY_MACRO (VL_API_IP_TABLE_REPLACE_BEGIN_REPLY);
}
@@ -1677,10 +1709,10 @@ vl_api_ip_table_replace_end_t_handler (vl_api_ip_table_replace_end_t * mp)
rv = VNET_API_ERROR_NO_SUCH_FIB;
else
{
+ u32 mfib_index = mfib_table_find (fproto, ntohl (mp->table.table_id));
fib_table_sweep (fib_index, fproto, FIB_SOURCE_API);
- mfib_table_sweep (mfib_table_find
- (fproto, ntohl (mp->table.table_id)), fproto,
- MFIB_SOURCE_API);
+ if (mfib_index != INDEX_INVALID)
+ mfib_table_sweep (mfib_index, fproto, MFIB_SOURCE_API);
}
REPLY_MACRO (VL_API_IP_TABLE_REPLACE_END_REPLY);
}
@@ -1703,6 +1735,7 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
vnet_main_t *vnm = vnet_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
vnet_sw_interface_t *si;
+ u32 mfib_index;
/* Shut down interfaces in this FIB / clean out intfc routes */
pool_foreach (si, im->sw_interfaces)
@@ -1717,8 +1750,10 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
}
fib_table_flush (fib_index, fproto, FIB_SOURCE_API);
- mfib_table_flush (mfib_table_find (fproto, ntohl (mp->table.table_id)),
- fproto, MFIB_SOURCE_API);
+
+ mfib_index = mfib_table_find (fproto, ntohl (mp->table.table_id));
+ if (mfib_index != INDEX_INVALID)
+ mfib_table_flush (mfib_index, fproto, MFIB_SOURCE_API);
}
REPLY_MACRO (VL_API_IP_TABLE_FLUSH_REPLY);
@@ -1889,7 +1924,7 @@ vl_api_ip_local_reass_get_t_handler (vl_api_ip_local_reass_get_t *mp)
{
vl_api_ip_local_reass_get_reply_t *rmp;
int rv = 0;
- REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET, {
+ REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET_REPLY, {
rmp->ip4_is_enabled = ip4_local_full_reass_enabled ();
rmp->ip6_is_enabled = ip6_local_full_reass_enabled ();
});
@@ -2128,6 +2163,8 @@ ip_api_hookup (vlib_main_t * vm)
am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2, 1);
vl_api_set_msg_thread_safe (
am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2_REPLY, 1);
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_IP_ADDRESS_DUMP,
+ 1);
return 0;
}
diff --git a/src/vnet/ip/ip_sas.c b/src/vnet/ip/ip_sas.c
index 0fc261724f1..01f6c90baf8 100644
--- a/src/vnet/ip/ip_sas.c
+++ b/src/vnet/ip/ip_sas.c
@@ -54,6 +54,8 @@ ip6_sas_commonlen (const ip6_address_t *a1, const ip6_address_t *a2)
static int
ip4_sas_commonlen (const ip4_address_t *a1, const ip4_address_t *a2)
{
+ if (!a1 || !a2)
+ return 0;
u64 a =
clib_net_to_host_u32 (a1->as_u32) ^ clib_net_to_host_u32 (a2->as_u32);
if (a == 0)
diff --git a/src/vnet/ip/ip_test.c b/src/vnet/ip/ip_test.c
index 727afba67f4..0d1c71063ae 100644
--- a/src/vnet/ip/ip_test.c
+++ b/src/vnet/ip/ip_test.c
@@ -464,6 +464,60 @@ api_ip_table_add_del (vat_main_t *vam)
}
static int
+api_ip_table_add_del_v2 (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_table_add_del_v2_t *mp;
+ u8 create_mfib = 1;
+ u32 table_id = ~0;
+ u8 is_ipv6 = 0;
+ u8 is_add = 1;
+ int ret = 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "table %d", &table_id))
+ ;
+ else if (unformat (i, "no-mfib"))
+ create_mfib = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ errmsg ("missing table-ID");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (IP_TABLE_ADD_DEL_V2, mp);
+
+ mp->table.table_id = ntohl (table_id);
+ mp->table.is_ip6 = is_ipv6;
+ mp->is_add = is_add;
+ mp->create_mfib = create_mfib;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static int
api_ip_table_replace_begin (vat_main_t *vam)
{
unformat_input_t *i = vam->input;
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index c225c222a38..b978bd79742 100644
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -419,10 +419,12 @@ vnet_ip_table_cmd (vlib_main_t * vm,
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
u32 table_id, is_add;
+ u8 create_mfib;
u8 *name = NULL;
is_add = 1;
table_id = ~0;
+ create_mfib = 1;
/* Get a line of input. */
if (!unformat_user (main_input, unformat_line_input, line_input))
@@ -438,6 +440,8 @@ vnet_ip_table_cmd (vlib_main_t * vm,
is_add = 1;
else if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "no-mfib"))
+ create_mfib = 0;
else
{
error = unformat_parse_error (line_input);
@@ -459,7 +463,8 @@ vnet_ip_table_cmd (vlib_main_t * vm,
table_id = ip_table_get_unused_id (fproto);
vlib_cli_output (vm, "%u\n", table_id);
}
- ip_table_create (fproto, table_id, 0, name);
+ ip_table_create (fproto, table_id, 0 /* is_api */, create_mfib,
+ name);
}
else
{
@@ -603,6 +608,8 @@ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
* @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0 weight 3}
* To add a route to a particular FIB table (VRF), use:
* @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0}
+ * To add a route to drop the traffic:
+ * @cliexcmd{ip route add 172.16.24.0/24 table 100 via 127.0.0.1 drop}
?*/
VLIB_CLI_COMMAND (ip_route_command, static) = {
.path = "ip route",
@@ -612,7 +619,7 @@ VLIB_CLI_COMMAND (ip_route_command, static) = {
"<value>] [udp-encap <value>] [ip4-lookup-in-table <value>] "
"[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
"[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 "
- "<interface>] [out-labels <value value value>]",
+ "<interface>] [out-labels <value value value>] [drop]",
.function = vnet_ip_route_cmd,
.is_mp_safe = 1,
};
diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c
index 7c3c2fff217..50b4b22eb60 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.c
+++ b/src/vnet/ip/reass/ip4_sv_reass.c
@@ -28,12 +28,13 @@
#include <vppinfra/bihash_16_8.h>
#include <vnet/ip/reass/ip4_sv_reass.h>
-#define MSEC_PER_SEC 1000
+#define MSEC_PER_SEC 1000
#define IP4_SV_REASS_TIMEOUT_DEFAULT_MS 100
-#define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
-#define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
+#define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS \
+ 10000 // 10 seconds default
+#define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
#define IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
-#define IP4_SV_REASS_HT_LOAD_FACTOR (0.75)
+#define IP4_SV_REASS_HT_LOAD_FACTOR (0.75)
typedef enum
{
@@ -94,17 +95,23 @@ typedef struct
// buffer indexes of buffers in this reassembly in chronological order -
// including overlaps and duplicate fragments
u32 *cached_buffers;
- // set to true when this reassembly is completed
- bool is_complete;
- // ip protocol
+
+ bool first_fragment_seen;
+ bool last_fragment_seen;
+
+ // vnet_buffer data
u8 ip_proto;
u8 icmp_type_or_tcp_flags;
u32 tcp_ack_number;
u32 tcp_seq_number;
- // l4 src port
u16 l4_src_port;
- // l4 dst port
u16 l4_dst_port;
+
+ // vnet_buffer2 data
+ u32 total_ip_payload_length;
+ u32 first_fragment_total_ip_header_length;
+ u32 first_fragment_clone_bi;
+
u32 next_index;
// lru indexes
u32 lru_prev;
@@ -114,13 +121,11 @@ typedef struct
typedef struct
{
ip4_sv_reass_t *pool;
- u32 reass_n;
u32 id_counter;
clib_spinlock_t lock;
// lru indexes
u32 lru_first;
u32 lru_last;
-
} ip4_sv_reass_per_thread_t;
typedef struct
@@ -143,13 +148,12 @@ typedef struct
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
- // node index of ip4-drop node
- u32 ip4_drop_idx;
u32 ip4_sv_reass_expire_node_idx;
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_output_feature_index;
u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
@@ -158,6 +162,8 @@ typedef struct
// reference count for enabling/disabling feature - per interface
u32 *output_feature_use_refcount_per_intf;
+ // extended reassembly refcount - see ip4_sv_reass_enable_disable_extended()
+ u32 extended_refcount;
} ip4_sv_reass_main_t;
extern ip4_sv_reass_main_t ip4_sv_reass_main;
@@ -177,9 +183,15 @@ typedef enum
typedef enum
{
REASS_FRAGMENT_CACHE,
- REASS_FINISH,
+ REASS_FIRST_FRAG,
+ REASS_LAST_FRAG,
REASS_FRAGMENT_FORWARD,
REASS_PASSTHROUGH,
+ REASS_HANDOFF,
+ REASS_KEY,
+ REASS_FREE_TIMEOUT,
+ REASS_FREE_LRU,
+ REASS_FREE_ERROR,
} ip4_sv_reass_trace_operation_e;
typedef struct
@@ -190,19 +202,23 @@ typedef struct
u8 ip_proto;
u16 l4_src_port;
u16 l4_dst_port;
- int l4_layer_truncated;
+ int l4_hdr_truncated;
+ u32 handoff_thread_index;
+ clib_bihash_kv_16_8_t kv;
} ip4_sv_reass_trace_t;
extern vlib_node_registration_t ip4_sv_reass_node;
extern vlib_node_registration_t ip4_sv_reass_node_feature;
static u8 *
-format_ip4_sv_reass_trace (u8 * s, va_list * args)
+format_ip4_sv_reass_trace (u8 *s, va_list *args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *);
- if (REASS_PASSTHROUGH != t->action)
+ if (REASS_PASSTHROUGH != t->action && REASS_HANDOFF != t->action &&
+ REASS_KEY != t->action && REASS_FREE_TIMEOUT != t->action &&
+ REASS_FREE_LRU != t->action && REASS_FREE_ERROR != t->action)
{
s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
}
@@ -211,25 +227,42 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args)
case REASS_FRAGMENT_CACHE:
s = format (s, "[cached]");
break;
- case REASS_FINISH:
+ case REASS_FIRST_FRAG:
s =
- format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]",
+ format (s, "[first-frag-seen, ip proto=%u, src_port=%u, dst_port=%u]",
t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
clib_net_to_host_u16 (t->l4_dst_port));
break;
+ case REASS_LAST_FRAG:
+ s = format (s, "[last-frag-seen]");
+ break;
+ case REASS_HANDOFF:
+ s = format (s, "[handoff, thread index: %u]", t->handoff_thread_index);
+ break;
+ case REASS_KEY:
+ s = format (s, "[lookup, key: %U]", format_bihash_kvp_16_8, &t->kv);
+ break;
+ case REASS_FREE_LRU:
+ s = format (s, "[free, LRU pressure]");
+ break;
+ case REASS_FREE_TIMEOUT:
+ s = format (s, "[free, timed out]");
+ break;
+ case REASS_FREE_ERROR:
+ s = format (s, "[free, error occurred]");
+ break;
case REASS_FRAGMENT_FORWARD:
- s =
- format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
- t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
- clib_net_to_host_u16 (t->l4_dst_port));
+ s = format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
+ t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
+ clib_net_to_host_u16 (t->l4_dst_port));
break;
case REASS_PASSTHROUGH:
s = format (s, "[not-fragmented]");
break;
}
- if (t->l4_layer_truncated)
+ if (t->l4_hdr_truncated)
{
- s = format (s, " [l4-layer-truncated]");
+ s = format (s, " [l4-hdr-truncated]");
}
return s;
}
@@ -238,12 +271,12 @@ static void
ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
ip4_sv_reass_t *reass, u32 bi,
ip4_sv_reass_trace_operation_e action, u32 ip_proto,
- u16 l4_src_port, u16 l4_dst_port,
- int l4_layer_truncated)
+ u16 l4_src_port, u16 l4_dst_port, int l4_hdr_truncated,
+ u32 handoff_thread_index)
{
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (pool_is_free_index
- (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b)))
+ if (pool_is_free_index (vm->trace_main.trace_buffer_pool,
+ vlib_buffer_get_trace_index (b)))
{
// this buffer's trace is gone
b->flags &= ~VLIB_BUFFER_IS_TRACED;
@@ -260,7 +293,8 @@ ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
t->ip_proto = ip_proto;
t->l4_src_port = l4_src_port;
t->l4_dst_port = l4_dst_port;
- t->l4_layer_truncated = l4_layer_truncated;
+ t->l4_hdr_truncated = l4_hdr_truncated;
+ t->handoff_thread_index = handoff_thread_index;
#if 0
static u8 *s = NULL;
s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
@@ -270,29 +304,56 @@ ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
#endif
}
+static void
+ip4_sv_reass_trace_timeout (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_sv_reass_t *reass, u32 bi)
+{
+ return ip4_sv_reass_add_trace (vm, node, reass, bi, REASS_FREE_TIMEOUT, ~0,
+ ~0, ~0, 0, ~0);
+}
+
+static void
+ip4_sv_reass_trace_lru_free (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_sv_reass_t *reass, u32 bi)
+{
+ return ip4_sv_reass_add_trace (vm, node, reass, bi, REASS_FREE_LRU, ~0, ~0,
+ ~0, 0, ~0);
+}
+
+static void
+ip4_sv_reass_trace_error_free (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_sv_reass_t *reass, u32 bi)
+{
+ return ip4_sv_reass_add_trace (vm, node, reass, bi, REASS_FREE_ERROR, ~0, ~0,
+ ~0, 0, ~0);
+}
always_inline void
-ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
- ip4_sv_reass_per_thread_t * rt, ip4_sv_reass_t * reass)
+ip4_sv_reass_free (vlib_main_t *vm, ip4_sv_reass_main_t *rm,
+ ip4_sv_reass_per_thread_t *rt, ip4_sv_reass_t *reass,
+ bool del_bihash)
{
- clib_bihash_kv_16_8_t kv;
- kv.key[0] = reass->key.as_u64[0];
- kv.key[1] = reass->key.as_u64[1];
- clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
+ if (del_bihash)
+ {
+ clib_bihash_kv_16_8_t kv;
+ kv.key[0] = reass->key.as_u64[0];
+ kv.key[1] = reass->key.as_u64[1];
+ clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
+ }
vlib_buffer_free (vm, reass->cached_buffers,
vec_len (reass->cached_buffers));
vec_free (reass->cached_buffers);
reass->cached_buffers = NULL;
+ if (~0 != reass->first_fragment_clone_bi)
+ vlib_buffer_free_one (vm, reass->first_fragment_clone_bi);
if (~0 != reass->lru_prev)
{
- ip4_sv_reass_t *lru_prev =
- pool_elt_at_index (rt->pool, reass->lru_prev);
+ ip4_sv_reass_t *lru_prev = pool_elt_at_index (rt->pool, reass->lru_prev);
lru_prev->lru_next = reass->lru_next;
}
if (~0 != reass->lru_next)
{
- ip4_sv_reass_t *lru_next =
- pool_elt_at_index (rt->pool, reass->lru_next);
+ ip4_sv_reass_t *lru_next = pool_elt_at_index (rt->pool, reass->lru_next);
lru_next->lru_prev = reass->lru_prev;
}
if (rt->lru_first == reass - rt->pool)
@@ -304,20 +365,13 @@ ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
rt->lru_last = reass->lru_prev;
}
pool_put (rt->pool, reass);
- --rt->reass_n;
-}
-
-always_inline void
-ip4_sv_reass_init (ip4_sv_reass_t * reass)
-{
- reass->cached_buffers = NULL;
- reass->is_complete = false;
}
always_inline ip4_sv_reass_t *
-ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
- ip4_sv_reass_per_thread_t * rt,
- ip4_sv_reass_kv_t * kv, u8 * do_handoff)
+ip4_sv_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 bi, ip4_sv_reass_main_t *rm,
+ ip4_sv_reass_per_thread_t *rt,
+ ip4_sv_reass_kv_t *kv, u8 *do_handoff)
{
ip4_sv_reass_t *reass = NULL;
f64 now = vlib_time_now (vm);
@@ -335,7 +389,8 @@ again:
if (now > reass->last_heard + rm->timeout)
{
- ip4_sv_reass_free (vm, rm, rt, reass);
+ ip4_sv_reass_trace_timeout (vm, node, reass, bi);
+ ip4_sv_reass_free (vm, rm, rt, reass, true);
reass = NULL;
}
}
@@ -346,18 +401,17 @@ again:
return reass;
}
- if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n)
+ if (pool_elts (rt->pool) >= rm->max_reass_n && rm->max_reass_n)
{
reass = pool_elt_at_index (rt->pool, rt->lru_first);
- ip4_sv_reass_free (vm, rm, rt, reass);
+ ip4_sv_reass_trace_lru_free (vm, node, reass, bi);
+ ip4_sv_reass_free (vm, rm, rt, reass, true);
}
- pool_get (rt->pool, reass);
- clib_memset (reass, 0, sizeof (*reass));
+ pool_get_zero (rt->pool, reass);
+ reass->first_fragment_clone_bi = ~0;
reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
++rt->id_counter;
- ip4_sv_reass_init (reass);
- ++rt->reass_n;
reass->lru_prev = reass->lru_next = ~0;
if (~0 != rt->lru_last)
@@ -381,7 +435,7 @@ again:
int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2);
if (rv)
{
- ip4_sv_reass_free (vm, rm, rt, reass);
+ ip4_sv_reass_free (vm, rm, rt, reass, false);
reass = NULL;
// if other worker created a context already work with the other copy
if (-2 == rv)
@@ -391,10 +445,23 @@ again:
return reass;
}
+always_inline bool
+ip4_sv_reass_is_complete (ip4_sv_reass_t *reass, bool extended)
+{
+ /*
+ * Both first and last fragments have to be seen for extended reassembly to
+ * be complete. Otherwise first fragment is enough.
+ */
+ if (extended)
+ return reass->first_fragment_seen && reass->last_fragment_seen;
+
+ return reass->first_fragment_seen;
+}
+
always_inline ip4_sv_reass_rc_t
ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
ip4_sv_reass_main_t *rm, ip4_header_t *ip0,
- ip4_sv_reass_t *reass, u32 bi0)
+ ip4_sv_reass_t *reass, u32 bi0, bool extended)
{
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK;
@@ -408,33 +475,59 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
return IP4_SV_REASS_RC_UNSUPP_IP_PROTO;
if (IP_PROTOCOL_TCP == reass->ip_proto)
{
- reass->icmp_type_or_tcp_flags = ((tcp_header_t *) (ip0 + 1))->flags;
- reass->tcp_ack_number = ((tcp_header_t *) (ip0 + 1))->ack_number;
- reass->tcp_seq_number = ((tcp_header_t *) (ip0 + 1))->seq_number;
+ tcp_header_t *th = ip4_next_header (ip0);
+ reass->icmp_type_or_tcp_flags = th->flags;
+ reass->tcp_ack_number = th->ack_number;
+ reass->tcp_seq_number = th->seq_number;
}
else if (IP_PROTOCOL_ICMP == reass->ip_proto)
{
reass->icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ ((icmp46_header_t *) (ip4_next_header (ip0)))->type;
+ }
+ reass->first_fragment_seen = true;
+ if (extended)
+ {
+ reass->first_fragment_total_ip_header_length =
+ ip4_header_bytes (ip0);
+ vlib_buffer_t *clone = vlib_buffer_copy_no_chain (
+ vm, b0, &reass->first_fragment_clone_bi);
+ if (!clone)
+ reass->first_fragment_clone_bi = ~0;
}
- reass->is_complete = true;
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip4_sv_reass_add_trace (
- vm, node, reass, bi0, REASS_FINISH, reass->ip_proto,
+ vm, node, reass, bi0, REASS_FIRST_FRAG, reass->ip_proto,
reass->l4_src_port, reass->l4_dst_port,
- vnet_buffer (b0)->ip.reass.l4_layer_truncated);
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0);
}
}
+ if (!ip4_get_fragment_more (ip0))
+ {
+ const u32 fragment_length =
+ clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
+ reass->last_fragment_seen = true;
+ reass->total_ip_payload_length = fragment_first + fragment_length;
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_LAST_FRAG, ~0, ~0, ~0,
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0);
+ }
+ }
+
vec_add1 (reass->cached_buffers, bi0);
- if (!reass->is_complete)
+
+ if (!ip4_sv_reass_is_complete (reass, extended))
{
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip4_sv_reass_add_trace (
vm, node, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0,
- vnet_buffer (b0)->ip.reass.l4_layer_truncated);
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0);
}
if (vec_len (reass->cached_buffers) > rm->max_reass_len)
{
@@ -445,30 +538,63 @@ ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
}
always_inline int
-l4_layer_truncated (ip4_header_t *ip)
+l4_hdr_truncated (ip4_header_t *ip)
{
- static const int l4_layer_length[256] = {
- [IP_PROTOCOL_TCP] = sizeof (tcp_header_t),
- [IP_PROTOCOL_UDP] = sizeof (udp_header_t),
- [IP_PROTOCOL_ICMP] = sizeof (icmp46_header_t),
- };
+ if (IP_PROTOCOL_UDP == ip->protocol)
+ return ((u8 *) ip + ip4_header_bytes (ip) + sizeof (udp_header_t) >
+ (u8 *) ip + clib_net_to_host_u16 (ip->length));
+ if (IP_PROTOCOL_ICMP == ip->protocol)
+ return ((u8 *) ip + ip4_header_bytes (ip) + sizeof (icmp46_header_t) >
+ (u8 *) ip + clib_net_to_host_u16 (ip->length));
+
+ if (IP_PROTOCOL_TCP != ip->protocol)
+ return false;
+
+ tcp_header_t *th = ip4_next_header (ip);
+ const u32 tcp_opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t);
- return ((u8 *) ip + ip4_header_bytes (ip) + l4_layer_length[ip->protocol] >
+ return ((u8 *) ip + ip4_header_bytes (ip) + sizeof (tcp_header_t) +
+ tcp_opts_len >
(u8 *) ip + clib_net_to_host_u16 (ip->length));
}
+always_inline void
+ip4_sv_reass_reset_vnet_buffer2 (vlib_buffer_t *b)
+{
+ vnet_buffer2 (b)->ip.reass.pool_index = ~0;
+ vnet_buffer2 (b)->ip.reass.thread_index = ~0;
+ vnet_buffer2 (b)->ip.reass.id = ~0;
+}
+
+always_inline void
+ip4_sv_reass_set_vnet_buffer2_from_reass (vlib_main_t *vm, vlib_buffer_t *b,
+ ip4_sv_reass_t *reass)
+{
+ vnet_buffer2 (b)->ip.reass.thread_index = vm->thread_index;
+ vnet_buffer2 (b)->ip.reass.id = reass->id;
+ vnet_buffer2 (b)->ip.reass.pool_index =
+ reass - ip4_sv_reass_main.per_thread_data[vm->thread_index].pool;
+}
+
+struct ip4_sv_reass_args
+{
+ bool is_feature;
+ bool is_output_feature;
+ bool is_custom;
+ bool with_custom_context;
+ bool extended;
+};
+
always_inline uword
ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame, bool is_feature,
- bool is_output_feature, bool is_custom,
- bool with_custom_context)
+ vlib_frame_t *frame, struct ip4_sv_reass_args a)
{
u32 *from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
u32 *context;
- if (with_custom_context)
+ if (a.with_custom_context)
context = vlib_frame_aux_args (frame);
clib_spinlock_lock (&rt->lock);
@@ -506,20 +632,18 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
clib_prefetch_load (p3->data);
}
- ip4_header_t *ip0 =
- (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0),
- (is_output_feature ? 1 : 0) *
- vnet_buffer (b0)->
- ip.save_rewrite_length);
- ip4_header_t *ip1 =
- (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b1),
- (is_output_feature ? 1 : 0) *
- vnet_buffer (b1)->
- ip.save_rewrite_length);
-
- if (PREDICT_FALSE
- (ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0))
- || (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1)))
+ ip4_header_t *ip0 = (ip4_header_t *) u8_ptr_add (
+ vlib_buffer_get_current (b0),
+ (ptrdiff_t) (a.is_output_feature ? 1 : 0) *
+ vnet_buffer (b0)->ip.save_rewrite_length);
+ ip4_header_t *ip1 = (ip4_header_t *) u8_ptr_add (
+ vlib_buffer_get_current (b1),
+ (ptrdiff_t) (a.is_output_feature ? 1 : 0) *
+ vnet_buffer (b1)->ip.save_rewrite_length);
+
+ if (PREDICT_FALSE (ip4_get_fragment_more (ip0) ||
+ ip4_get_fragment_offset (ip0)) ||
+ (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1)))
{
// fragment found, go slow path
b -= 2;
@@ -530,39 +654,41 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
goto slow_path;
}
- if (is_feature)
+ if (a.is_feature)
{
vnet_feature_next (&next0, b0);
}
else
{
- next0 = is_custom ? vnet_buffer (b0)->ip.reass.next_index :
- IP4_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = a.is_custom ? vnet_buffer (b0)->ip.reass.next_index :
+ IP4_SV_REASSEMBLY_NEXT_INPUT;
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (l4_layer_truncated (ip0))
+
+ if (a.extended)
+ ip4_sv_reass_reset_vnet_buffer2 (b0);
+
+ if (l4_hdr_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
- vnet_buffer (b0)->ip.reass.l4_src_port = 0;
- vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 1;
}
else
{
- vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0;
if (IP_PROTOCOL_TCP == ip0->protocol)
{
vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
+ ((tcp_header_t *) (ip4_next_header (ip0)))->flags;
vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
+ ((tcp_header_t *) (ip4_next_header (ip0)))->ack_number;
vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ ((tcp_header_t *) (ip4_next_header (ip0)))->seq_number;
}
else if (IP_PROTOCOL_ICMP == ip0->protocol)
{
vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ ((icmp46_header_t *) (ip4_next_header (ip0)))->type;
}
vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
@@ -574,41 +700,43 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_buffer (b0)->ip.reass.ip_proto,
vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port,
- vnet_buffer (b0)->ip.reass.l4_layer_truncated);
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0);
}
- if (is_feature)
+ if (a.is_feature)
{
vnet_feature_next (&next1, b1);
}
else
{
- next1 = is_custom ? vnet_buffer (b1)->ip.reass.next_index :
- IP4_SV_REASSEMBLY_NEXT_INPUT;
+ next1 = a.is_custom ? vnet_buffer (b1)->ip.reass.next_index :
+ IP4_SV_REASSEMBLY_NEXT_INPUT;
}
vnet_buffer (b1)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b1)->ip.reass.ip_proto = ip1->protocol;
- if (l4_layer_truncated (ip1))
+
+ if (a.extended)
+ ip4_sv_reass_reset_vnet_buffer2 (b1);
+
+ if (l4_hdr_truncated (ip1))
{
- vnet_buffer (b1)->ip.reass.l4_layer_truncated = 1;
- vnet_buffer (b1)->ip.reass.l4_src_port = 0;
- vnet_buffer (b1)->ip.reass.l4_dst_port = 0;
+ vnet_buffer (b1)->ip.reass.l4_hdr_truncated = 1;
}
else
{
- vnet_buffer (b1)->ip.reass.l4_layer_truncated = 0;
+ vnet_buffer (b1)->ip.reass.l4_hdr_truncated = 0;
if (IP_PROTOCOL_TCP == ip1->protocol)
{
vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip1 + 1))->flags;
+ ((tcp_header_t *) (ip4_next_header (ip1)))->flags;
vnet_buffer (b1)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip1 + 1))->ack_number;
+ ((tcp_header_t *) (ip4_next_header (ip1)))->ack_number;
vnet_buffer (b1)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip1 + 1))->seq_number;
+ ((tcp_header_t *) (ip4_next_header (ip1)))->seq_number;
}
else if (IP_PROTOCOL_ICMP == ip1->protocol)
{
vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip1 + 1))->type;
+ ((icmp46_header_t *) (ip4_next_header (ip1)))->type;
}
vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1);
vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0);
@@ -620,14 +748,14 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_buffer (b1)->ip.reass.ip_proto,
vnet_buffer (b1)->ip.reass.l4_src_port,
vnet_buffer (b1)->ip.reass.l4_dst_port,
- vnet_buffer (b1)->ip.reass.l4_layer_truncated);
+ vnet_buffer (b1)->ip.reass.l4_hdr_truncated, ~0);
}
n_left_from -= 2;
next[0] = next0;
next[1] = next1;
next += 2;
- if (with_custom_context)
+ if (a.with_custom_context)
context += 2;
}
@@ -638,13 +766,12 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
b0 = *b;
b++;
- ip4_header_t *ip0 =
- (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0),
- (is_output_feature ? 1 : 0) *
- vnet_buffer (b0)->
- ip.save_rewrite_length);
- if (PREDICT_FALSE
- (ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0)))
+ ip4_header_t *ip0 = (ip4_header_t *) u8_ptr_add (
+ vlib_buffer_get_current (b0),
+ (ptrdiff_t) (a.is_output_feature ? 1 : 0) *
+ vnet_buffer (b0)->ip.save_rewrite_length);
+ if (PREDICT_FALSE (ip4_get_fragment_more (ip0) ||
+ ip4_get_fragment_offset (ip0)))
{
// fragment found, go slow path
b -= 1;
@@ -655,38 +782,41 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
goto slow_path;
}
- if (is_feature)
+ if (a.is_feature)
{
vnet_feature_next (&next0, b0);
}
else
{
- next0 =
- is_custom ? vnet_buffer (b0)->ip.
- reass.next_index : IP4_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = a.is_custom ? vnet_buffer (b0)->ip.reass.next_index :
+ IP4_SV_REASSEMBLY_NEXT_INPUT;
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (l4_layer_truncated (ip0))
+
+ if (a.extended)
+ ip4_sv_reass_reset_vnet_buffer2 (b0);
+
+ if (l4_hdr_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 1;
}
else
{
- vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0;
if (IP_PROTOCOL_TCP == ip0->protocol)
{
vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
+ ((tcp_header_t *) (ip4_next_header (ip0)))->flags;
vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
+ ((tcp_header_t *) (ip4_next_header (ip0)))->ack_number;
vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ ((tcp_header_t *) (ip4_next_header (ip0)))->seq_number;
}
else if (IP_PROTOCOL_ICMP == ip0->protocol)
{
vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ ((icmp46_header_t *) (ip4_next_header (ip0)))->type;
}
vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
@@ -698,13 +828,13 @@ ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_buffer (b0)->ip.reass.ip_proto,
vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port,
- vnet_buffer (b0)->ip.reass.l4_layer_truncated);
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0);
}
n_left_from -= 1;
next[0] = next0;
next += 1;
- if (with_custom_context)
+ if (a.with_custom_context)
context += 1;
}
@@ -719,7 +849,7 @@ slow_path:
while (n_left_from > 0)
{
- if (with_custom_context)
+ if (a.with_custom_context)
vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
to_next_aux, n_left_to_next);
else
@@ -736,15 +866,14 @@ slow_path:
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
- ip4_header_t *ip0 =
- (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0),
- (is_output_feature ? 1 : 0) *
- vnet_buffer (b0)->
- ip.save_rewrite_length);
+ ip4_header_t *ip0 = (ip4_header_t *) u8_ptr_add (
+ vlib_buffer_get_current (b0),
+ (ptrdiff_t) (a.is_output_feature ? 1 : 0) *
+ vnet_buffer (b0)->ip.save_rewrite_length);
if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
{
// this is a regular packet - no fragmentation
- if (is_custom)
+ if (a.is_custom)
{
next0 = vnet_buffer (b0)->ip.reass.next_index;
}
@@ -754,28 +883,28 @@ slow_path:
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (l4_layer_truncated (ip0))
+ if (l4_hdr_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 1;
vnet_buffer (b0)->ip.reass.l4_src_port = 0;
vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
}
else
{
- vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0;
if (IP_PROTOCOL_TCP == ip0->protocol)
{
vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
+ ((tcp_header_t *) (ip4_next_header (ip0)))->flags;
vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
+ ((tcp_header_t *) (ip4_next_header (ip0)))->ack_number;
vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ ((tcp_header_t *) (ip4_next_header (ip0)))->seq_number;
}
else if (IP_PROTOCOL_ICMP == ip0->protocol)
{
vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ ((icmp46_header_t *) (ip4_next_header (ip0)))->type;
}
vnet_buffer (b0)->ip.reass.l4_src_port =
ip4_get_port (ip0, 1);
@@ -789,7 +918,7 @@ slow_path:
vnet_buffer (b0)->ip.reass.ip_proto,
vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port,
- vnet_buffer (b0)->ip.reass.l4_layer_truncated);
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0);
}
goto packet_enqueue;
}
@@ -797,7 +926,11 @@ slow_path:
const u32 fragment_length =
clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
const u32 fragment_last = fragment_first + fragment_length - 1;
- if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
+ if (fragment_first > fragment_last ||
+ fragment_first + fragment_length > UINT16_MAX - 20 ||
+ (fragment_length < 8 &&
+ ip4_get_fragment_more (
+ ip0))) // 8 is minimum frag length per RFC 791
{
next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
@@ -807,7 +940,7 @@ slow_path:
ip4_sv_reass_kv_t kv;
u8 do_handoff = 0;
- if (with_custom_context)
+ if (a.with_custom_context)
kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32
<< 32;
else
@@ -819,15 +952,29 @@ slow_path:
(u64) ip0->fragment_id << 32 |
(u64) ip0->protocol << 48;
- ip4_sv_reass_t *reass =
- ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip4_sv_reass_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (t[0]));
+ t->action = REASS_KEY;
+ STATIC_ASSERT_SIZEOF (t->kv, sizeof (kv));
+ clib_memcpy (&t->kv, &kv, sizeof (kv));
+ }
+
+ ip4_sv_reass_t *reass = ip4_sv_reass_find_or_create (
+ vm, node, bi0, rm, rt, &kv, &do_handoff);
if (PREDICT_FALSE (do_handoff))
{
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip4_sv_reass_add_trace (vm, node, reass, bi0, REASS_HANDOFF,
+ ~0, ~0, ~0, 0, kv.v.thread_index);
+ }
next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
- if (with_custom_context)
+ if (a.with_custom_context)
forward_context = 1;
goto packet_enqueue;
}
@@ -840,9 +987,9 @@ slow_path:
goto packet_enqueue;
}
- if (reass->is_complete)
+ if (ip4_sv_reass_is_complete (reass, a.extended))
{
- if (is_custom)
+ if (a.is_custom)
{
next0 = vnet_buffer (b0)->ip.reass.next_index;
}
@@ -851,7 +998,7 @@ slow_path:
next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment =
- ! !fragment_first;
+ !!fragment_first;
vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
reass->icmp_type_or_tcp_flags;
@@ -861,18 +1008,20 @@ slow_path:
reass->tcp_seq_number;
vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
+ if (a.extended)
+ ip4_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip4_sv_reass_add_trace (
vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
- vnet_buffer (b0)->ip.reass.l4_layer_truncated);
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0);
}
goto packet_enqueue;
}
ip4_sv_reass_rc_t rc =
- ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0);
+ ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0, a.extended);
u32 counter = ~0;
switch (rc)
{
@@ -889,62 +1038,64 @@ slow_path:
if (~0 != counter)
{
vlib_node_increment_counter (vm, node->node_index, counter, 1);
- ip4_sv_reass_free (vm, rm, rt, reass);
+ ip4_sv_reass_trace_error_free (vm, node, reass, bi0);
+ ip4_sv_reass_free (vm, rm, rt, reass, true);
goto next_packet;
}
- if (reass->is_complete)
+ if (ip4_sv_reass_is_complete (reass, a.extended))
{
u32 idx;
vec_foreach_index (idx, reass->cached_buffers)
- {
- u32 bi0 = vec_elt (reass->cached_buffers, idx);
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- ip0 =
- (ip4_header_t *) u8_ptr_add (vlib_buffer_get_current (b0),
- (is_output_feature ? 1 : 0) *
- vnet_buffer (b0)->
- ip.save_rewrite_length);
- u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
- if (is_feature)
- {
- vnet_feature_next (&next0, b0);
- }
- if (is_custom)
- {
- next0 = vnet_buffer (b0)->ip.reass.next_index;
- }
- if (0 == n_left_to_next)
- {
- vlib_put_next_frame (vm, node, next_index,
- n_left_to_next);
- vlib_get_next_frame (vm, node, next_index, to_next,
- n_left_to_next);
- }
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
- vnet_buffer (b0)->ip.reass.is_non_first_fragment =
- ! !ip4_get_fragment_offset (ip0);
- vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- reass->icmp_type_or_tcp_flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- reass->tcp_ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- reass->tcp_seq_number;
- vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
- vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- ip4_sv_reass_add_trace (
- vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
- reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
- vnet_buffer (b0)->ip.reass.l4_layer_truncated);
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next, bi0,
- next0);
- }
+ {
+ u32 bi0 = vec_elt (reass->cached_buffers, idx);
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ ip0 = (ip4_header_t *) u8_ptr_add (
+ vlib_buffer_get_current (b0),
+ (ptrdiff_t) (a.is_output_feature ? 1 : 0) *
+ vnet_buffer (b0)->ip.save_rewrite_length);
+ u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
+ if (a.is_feature)
+ {
+ vnet_feature_next (&next0, b0);
+ }
+ if (a.is_custom)
+ {
+ next0 = vnet_buffer (b0)->ip.reass.next_index;
+ }
+ if (0 == n_left_to_next)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ vnet_buffer (b0)->ip.reass.is_non_first_fragment =
+ !!ip4_get_fragment_offset (ip0);
+ vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ reass->icmp_type_or_tcp_flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ reass->tcp_ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ reass->tcp_seq_number;
+ vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
+ vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
+ if (a.extended)
+ ip4_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass);
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
+ reass->ip_proto, reass->l4_src_port,
+ reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated, ~0);
+ }
+ vlib_validate_buffer_enqueue_x1 (
+ vm, node, next_index, to_next, n_left_to_next, bi0, next0);
+ }
vec_set_len (reass->cached_buffers,
0); // buffers are owned by frame now
}
@@ -954,12 +1105,13 @@ slow_path:
to_next[0] = bi0;
to_next += 1;
n_left_to_next -= 1;
- if (is_feature && IP4_ERROR_NONE == error0)
+ if (a.is_feature && IP4_ERROR_NONE == error0 &&
+ IP4_SV_REASSEMBLY_NEXT_HANDOFF != next0)
{
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- if (with_custom_context && forward_context)
+ if (a.with_custom_context && forward_context)
{
if (to_next_aux)
{
@@ -977,7 +1129,7 @@ slow_path:
next_packet:
from += 1;
n_left_from -= 1;
- if (with_custom_context)
+ if (a.with_custom_context)
context += 1;
}
@@ -989,13 +1141,20 @@ done:
return frame->n_vectors;
}
-VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip4_sv_reass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return ip4_sv_reass_inline (
- vm, node, frame, false /* is_feature */, false /* is_output_feature */,
- false /* is_custom */, false /* with_custom_context */);
+ /*
+ * Extended reassembly is not supported for non-feature nodes.
+ */
+ return ip4_sv_reass_inline (vm, node, frame,
+ (struct ip4_sv_reass_args){
+ .is_feature = false,
+ .is_output_feature = false,
+ .is_custom = false,
+ .with_custom_context = false,
+ .extended = false,
+ });
}
VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
@@ -1014,13 +1173,27 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
},
};
-VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip4_sv_reass_node_feature)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return ip4_sv_reass_inline (
- vm, node, frame, true /* is_feature */, false /* is_output_feature */,
- false /* is_custom */, false /* with_custom_context */);
+ if (ip4_sv_reass_main.extended_refcount > 0)
+ return ip4_sv_reass_inline (vm, node, frame,
+ (struct ip4_sv_reass_args){
+ .is_feature = true,
+ .is_output_feature = false,
+ .is_custom = false,
+ .with_custom_context = false,
+ .extended = true,
+ });
+
+ return ip4_sv_reass_inline (vm, node, frame,
+ (struct ip4_sv_reass_args){
+ .is_feature = true,
+ .is_output_feature = false,
+ .is_custom = false,
+ .with_custom_context = false,
+ .extended = false,
+ });
}
VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
@@ -1039,22 +1212,35 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
};
VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
- .arc_name = "ip4-unicast",
- .node_name = "ip4-sv-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup"),
- .runs_after = 0,
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-sv-reassembly-feature",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+ .runs_after = 0,
};
-VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip4_sv_reass_node_output_feature)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return ip4_sv_reass_inline (
- vm, node, frame, true /* is_feature */, true /* is_output_feature */,
- false /* is_custom */, false /* with_custom_context */);
+ if (ip4_sv_reass_main.extended_refcount > 0)
+ return ip4_sv_reass_inline (vm, node, frame,
+ (struct ip4_sv_reass_args){
+ .is_feature = true,
+ .is_output_feature = true,
+ .is_custom = false,
+ .with_custom_context = false,
+ .extended = true,
+ });
+
+ return ip4_sv_reass_inline (vm, node, frame,
+ (struct ip4_sv_reass_args){
+ .is_feature = true,
+ .is_output_feature = true,
+ .is_custom = false,
+ .with_custom_context = false,
+ .extended = false,
+ });
}
-
VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
.name = "ip4-sv-reassembly-output-feature",
.vector_size = sizeof (u32),
@@ -1066,15 +1252,15 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
{
[IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
[IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
- [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
+ [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-output-feature-hoff",
},
};
VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = {
- .arc_name = "ip4-output",
- .node_name = "ip4-sv-reassembly-output-feature",
- .runs_before = 0,
- .runs_after = 0,
+ .arc_name = "ip4-output",
+ .node_name = "ip4-sv-reassembly-output-feature",
+ .runs_before = 0,
+ .runs_after = 0,
};
VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
@@ -1093,13 +1279,20 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
},
};
-VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip4_sv_reass_custom_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return ip4_sv_reass_inline (
- vm, node, frame, false /* is_feature */, false /* is_output_feature */,
- true /* is_custom */, false /* with_custom_context */);
+ /*
+ * Extended reassembly is not supported for non-feature nodes.
+ */
+ return ip4_sv_reass_inline (vm, node, frame,
+ (struct ip4_sv_reass_args){
+ .is_feature = false,
+ .is_output_feature = false,
+ .is_custom = true,
+ .with_custom_context = false,
+ .extended = false,
+ });
}
VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = {
@@ -1122,9 +1315,17 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = {
VLIB_NODE_FN (ip4_sv_reass_custom_context_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return ip4_sv_reass_inline (
- vm, node, frame, false /* is_feature */, false /* is_output_feature */,
- true /* is_custom */, true /* with_custom_context */);
+ /*
+ * Extended reassembly is not supported for non-feature nodes.
+ */
+ return ip4_sv_reass_inline (vm, node, frame,
+ (struct ip4_sv_reass_args){
+ .is_feature = false,
+ .is_output_feature = false,
+ .is_custom = true,
+ .with_custom_context = true,
+ .extended = false,
+ });
}
#ifndef CLIB_MARCH_VARIANT
@@ -1159,7 +1360,7 @@ typedef struct
#ifndef CLIB_MARCH_VARIANT
static int
-ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
+ip4_rehash_cb (clib_bihash_kv_16_8_t *kv, void *_ctx)
{
ip4_rehash_cb_ctx *ctx = _ctx;
if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
@@ -1186,8 +1387,8 @@ ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
u32 max_reassembly_length, u32 expire_walk_interval_ms)
{
u32 old_nbuckets = ip4_sv_reass_get_nbuckets ();
- ip4_sv_reass_set_params (timeout_ms, max_reassemblies,
- max_reassembly_length, expire_walk_interval_ms);
+ ip4_sv_reass_set_params (timeout_ms, max_reassemblies, max_reassembly_length,
+ expire_walk_interval_ms);
vlib_process_signal_event (ip4_sv_reass_main.vlib_main,
ip4_sv_reass_main.ip4_sv_reass_expire_node_idx,
IP4_EVENT_CONFIG_CHANGED, 0);
@@ -1200,7 +1401,7 @@ ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
ctx.failure = 0;
ctx.new_hash = &new_hash;
clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets,
- new_nbuckets * 1024);
+ (uword) new_nbuckets * 1024);
clib_bihash_foreach_key_value_pair_16_8 (&ip4_sv_reass_main.hash,
ip4_rehash_cb, &ctx);
if (ctx.failure)
@@ -1220,8 +1421,8 @@ ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
}
vnet_api_error_t
-ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
- u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
+ip4_sv_reass_get (u32 *timeout_ms, u32 *max_reassemblies,
+ u32 *max_reassembly_length, u32 *expire_walk_interval_ms)
{
*timeout_ms = ip4_sv_reass_main.timeout_ms;
*max_reassemblies = ip4_sv_reass_main.max_reass_n;
@@ -1231,7 +1432,7 @@ ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
}
static clib_error_t *
-ip4_sv_reass_init_function (vlib_main_t * vm)
+ip4_sv_reass_init_function (vlib_main_t *vm)
{
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
clib_error_t *error = 0;
@@ -1244,11 +1445,11 @@ ip4_sv_reass_init_function (vlib_main_t * vm)
vec_validate (rm->per_thread_data, vlib_num_workers ());
ip4_sv_reass_per_thread_t *rt;
vec_foreach (rt, rm->per_thread_data)
- {
- clib_spinlock_init (&rt->lock);
- pool_alloc (rt->pool, rm->max_reass_n);
- rt->lru_first = rt->lru_last = ~0;
- }
+ {
+ clib_spinlock_init (&rt->lock);
+ pool_alloc (rt->pool, rm->max_reass_n);
+ rt->lru_first = rt->lru_last = ~0;
+ }
node = vlib_get_node_by_name (vm, (u8 *) "ip4-sv-reassembly-expire-walk");
ASSERT (node);
@@ -1260,15 +1461,14 @@ ip4_sv_reass_init_function (vlib_main_t * vm)
IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
nbuckets = ip4_sv_reass_get_nbuckets ();
- clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
-
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
- ASSERT (node);
- rm->ip4_drop_idx = node->index;
+ clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets,
+ (uword) nbuckets * 1024);
rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
+ rm->fq_output_feature_index =
+ vlib_frame_queue_main_init (ip4_sv_reass_node_output_feature.index, 0);
rm->fq_custom_context_index =
vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0);
@@ -1291,10 +1491,8 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
while (true)
{
- vlib_process_wait_for_event_or_clock (vm,
- (f64)
- rm->expire_walk_interval_ms /
- (f64) MSEC_PER_SEC);
+ vlib_process_wait_for_event_or_clock (
+ vm, (f64) rm->expire_walk_interval_ms / (f64) MSEC_PER_SEC);
event_type = vlib_process_get_events (vm, &event_data);
switch (event_type)
@@ -1323,19 +1521,20 @@ ip4_sv_reass_walk_expired (vlib_main_t *vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- pool_foreach_index (index, rt->pool) {
- reass = pool_elt_at_index (rt->pool, index);
- if (now > reass->last_heard + rm->timeout)
- {
- vec_add1 (pool_indexes_to_free, index);
- }
- }
+ pool_foreach_index (index, rt->pool)
+ {
+ reass = pool_elt_at_index (rt->pool, index);
+ if (now > reass->last_heard + rm->timeout)
+ {
+ vec_add1 (pool_indexes_to_free, index);
+ }
+ }
int *i;
- vec_foreach (i, pool_indexes_to_free)
- {
- ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
- ip4_sv_reass_free (vm, rm, rt, reass);
- }
+ vec_foreach (i, pool_indexes_to_free)
+ {
+ ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
+ ip4_sv_reass_free (vm, rm, rt, reass, true);
+ }
clib_spinlock_unlock (&rt->lock);
}
@@ -1360,7 +1559,7 @@ VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
};
static u8 *
-format_ip4_sv_reass_key (u8 * s, va_list * args)
+format_ip4_sv_reass_key (u8 *s, va_list *args)
{
ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
s =
@@ -1371,37 +1570,35 @@ format_ip4_sv_reass_key (u8 * s, va_list * args)
}
static u8 *
-format_ip4_sv_reass (u8 * s, va_list * args)
+format_ip4_sv_reass (u8 *s, va_list *args)
{
vlib_main_t *vm = va_arg (*args, vlib_main_t *);
ip4_sv_reass_t *reass = va_arg (*args, ip4_sv_reass_t *);
- s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n",
- reass->id, format_ip4_sv_reass_key, &reass->key,
- reass->trace_op_counter);
+ s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n", reass->id,
+ format_ip4_sv_reass_key, &reass->key, reass->trace_op_counter);
vlib_buffer_t *b;
u32 *bip;
u32 counter = 0;
vec_foreach (bip, reass->cached_buffers)
- {
- u32 bi = *bip;
- do
- {
- b = vlib_get_buffer (vm, bi);
- s = format (s, " #%03u: bi: %u, ", counter, bi);
- ++counter;
- bi = b->next_buffer;
- }
- while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
- }
+ {
+ u32 bi = *bip;
+ do
+ {
+ b = vlib_get_buffer (vm, bi);
+ s = format (s, " #%03u: bi: %u, ", counter, bi);
+ ++counter;
+ bi = b->next_buffer;
+ }
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ }
return s;
}
static clib_error_t *
-show_ip4_reass (vlib_main_t * vm,
- unformat_input_t * input,
- CLIB_UNUSED (vlib_cli_command_t * lmd))
+show_ip4_reass (vlib_main_t *vm, unformat_input_t *input,
+ CLIB_UNUSED (vlib_cli_command_t *lmd))
{
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
@@ -1424,100 +1621,105 @@ show_ip4_reass (vlib_main_t * vm,
clib_spinlock_lock (&rt->lock);
if (details)
{
- pool_foreach (reass, rt->pool) {
- vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
- }
+ pool_foreach (reass, rt->pool)
+ {
+ vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
+ }
}
- sum_reass_n += rt->reass_n;
+ sum_reass_n += pool_elts (rt->pool);
clib_spinlock_unlock (&rt->lock);
}
vlib_cli_output (vm, "---------------------");
vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n",
(long unsigned) sum_reass_n);
vlib_cli_output (vm,
- "Maximum configured concurrent shallow virtual IP4 reassemblies per worker-thread: %lu\n",
+ "Maximum configured concurrent shallow virtual IP4 "
+ "reassemblies per worker-thread: %lu\n",
(long unsigned) rm->max_reass_n);
vlib_cli_output (vm,
"Maximum configured amount of fragments per shallow "
"virtual IP4 reassembly: %lu\n",
(long unsigned) rm->max_reass_len);
+ vlib_cli_output (
+ vm, "Maximum configured shallow virtual IP4 reassembly timeout: %lums\n",
+ (long unsigned) rm->timeout_ms);
vlib_cli_output (vm,
- "Maximum configured shallow virtual IP4 reassembly timeout: %lums\n",
- (long unsigned) rm->timeout_ms);
- vlib_cli_output (vm,
- "Maximum configured shallow virtual IP4 reassembly expire walk interval: %lums\n",
+ "Maximum configured shallow virtual IP4 reassembly expire "
+ "walk interval: %lums\n",
(long unsigned) rm->expire_walk_interval_ms);
+
return 0;
}
VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
- .path = "show ip4-sv-reassembly",
- .short_help = "show ip4-sv-reassembly [details]",
- .function = show_ip4_reass,
+ .path = "show ip4-sv-reassembly",
+ .short_help = "show ip4-sv-reassembly [details]",
+ .function = show_ip4_reass,
};
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
{
- return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index,
- enable_disable);
+ return ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, enable_disable);
}
#endif /* CLIB_MARCH_VARIANT */
-
-#define foreach_ip4_sv_reass_handoff_error \
-_(CONGESTION_DROP, "congestion drop")
-
+#define foreach_ip4_sv_reass_handoff_error \
+ _ (CONGESTION_DROP, "congestion drop")
typedef enum
{
-#define _(sym,str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
+#define _(sym, str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
foreach_ip4_sv_reass_handoff_error
#undef _
IP4_SV_REASSEMBLY_HANDOFF_N_ERROR,
} ip4_sv_reass_handoff_error_t;
static char *ip4_sv_reass_handoff_error_strings[] = {
-#define _(sym,string) string,
+#define _(sym, string) string,
foreach_ip4_sv_reass_handoff_error
#undef _
};
typedef struct
{
- u32 next_worker_index;
+ u32 thread_index;
} ip4_sv_reass_handoff_trace_t;
static u8 *
-format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
+format_ip4_sv_reass_handoff_trace (u8 *s, va_list *args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_sv_reass_handoff_trace_t *t =
va_arg (*args, ip4_sv_reass_handoff_trace_t *);
- s =
- format (s, "ip4-sv-reassembly-handoff: next-worker %d",
- t->next_worker_index);
+ s = format (s, "to thread-index: %u", t->thread_index);
return s;
}
+struct ip4_sv_reass_hoff_args
+{
+ bool is_feature;
+ bool is_output_feature;
+ bool is_custom_context;
+};
+
always_inline uword
ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame, bool is_feature,
- bool is_custom_context)
+ vlib_frame_t *frame,
+ struct ip4_sv_reass_hoff_args a)
{
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
- u32 fq_index;
from = vlib_frame_vector_args (frame);
- if (is_custom_context)
+ if (a.is_custom_context)
context = vlib_frame_aux_args (frame);
n_left_from = frame->n_vectors;
@@ -1526,28 +1728,28 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index :
- (is_custom_context ? rm->fq_custom_context_index :
- rm->fq_index);
+ const u32 fq_index = a.is_output_feature ? rm->fq_output_feature_index :
+ a.is_feature ? rm->fq_feature_index :
+ a.is_custom_context ? rm->fq_custom_context_index :
+ rm->fq_index;
while (n_left_from > 0)
{
ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
- if (PREDICT_FALSE
- ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
{
ip4_sv_reass_handoff_trace_t *t =
vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->next_worker_index = ti[0];
+ t->thread_index = ti[0];
}
n_left_from -= 1;
ti += 1;
b += 1;
}
- if (is_custom_context)
+ if (a.is_custom_context)
n_enq = vlib_buffer_enqueue_to_thread_with_aux (
vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
else
@@ -1555,21 +1757,22 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
- vlib_node_increment_counter (vm, node->node_index,
- IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
- frame->n_vectors - n_enq);
+ vlib_node_increment_counter (
+ vm, node->node_index, IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
+ frame->n_vectors - n_enq);
return frame->n_vectors;
}
-VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip4_sv_reass_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip4_sv_reass_handoff_node_inline (
- vm, node, frame, false /* is_feature */, false /* is_custom_context */);
+ vm, node, frame,
+ (struct ip4_sv_reass_hoff_args){ .is_feature = false,
+ .is_output_feature = false,
+ .is_custom_context = false });
}
-
VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
.name = "ip4-sv-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1588,7 +1791,10 @@ VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip4_sv_reass_handoff_node_inline (
- vm, node, frame, false /* is_feature */, true /* is_custom_context */);
+ vm, node, frame,
+ (struct ip4_sv_reass_hoff_args){ .is_feature = false,
+ .is_output_feature = false,
+ .is_custom_context = true });
}
VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = {
@@ -1606,16 +1812,16 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = {
},
};
-VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
- vlib_node_runtime_t *
- node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip4_sv_reass_handoff_node_inline (
- vm, node, frame, true /* is_feature */, false /* is_custom_context */);
+ vm, node, frame,
+ (struct ip4_sv_reass_hoff_args){ .is_feature = true,
+ .is_output_feature = false,
+ .is_custom_context = false });
}
-
VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
.name = "ip4-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1630,6 +1836,30 @@ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
},
};
+VLIB_NODE_FN (ip4_sv_reass_output_feature_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame,
+ (struct ip4_sv_reass_hoff_args){ .is_feature = false,
+ .is_output_feature = true,
+ .is_custom_context = false });
+}
+
+VLIB_REGISTER_NODE (ip4_sv_reass_output_feature_handoff_node) = {
+ .name = "ip4-sv-reass-output-feature-hoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
+ .error_strings = ip4_sv_reass_handoff_error_strings,
+ .format_trace = format_ip4_sv_reass_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
#ifndef CLIB_MARCH_VARIANT
int
ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
@@ -1640,10 +1870,10 @@ ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
{
if (!rm->feature_use_refcount_per_intf[sw_if_index])
{
- ++rm->feature_use_refcount_per_intf[sw_if_index];
- return vnet_feature_enable_disable ("ip4-unicast",
- "ip4-sv-reassembly-feature",
- sw_if_index, 1, 0, 0);
+ int rv = vnet_feature_enable_disable (
+ "ip4-unicast", "ip4-sv-reassembly-feature", sw_if_index, 1, 0, 0);
+ if (0 != rv)
+ return rv;
}
++rm->feature_use_refcount_per_intf[sw_if_index];
}
@@ -1652,9 +1882,10 @@ ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
if (rm->feature_use_refcount_per_intf[sw_if_index])
--rm->feature_use_refcount_per_intf[sw_if_index];
if (!rm->feature_use_refcount_per_intf[sw_if_index])
- return vnet_feature_enable_disable ("ip4-unicast",
- "ip4-sv-reassembly-feature",
- sw_if_index, 0, 0, 0);
+ {
+ return vnet_feature_enable_disable (
+ "ip4-unicast", "ip4-sv-reassembly-feature", sw_if_index, 0, 0, 0);
+ }
}
return 0;
}
@@ -1674,8 +1905,7 @@ ip4_sv_reass_custom_context_register_next_node (uword node_index)
}
int
-ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
- int is_enable)
+ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
{
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index);
@@ -1683,10 +1913,11 @@ ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
{
if (!rm->output_feature_use_refcount_per_intf[sw_if_index])
{
- ++rm->output_feature_use_refcount_per_intf[sw_if_index];
- return vnet_feature_enable_disable ("ip4-output",
- "ip4-sv-reassembly-output-feature",
- sw_if_index, 1, 0, 0);
+ int rv = vnet_feature_enable_disable (
+ "ip4-output", "ip4-sv-reassembly-output-feature", sw_if_index, 1,
+ 0, 0);
+ if (0 != rv)
+ return rv;
}
++rm->output_feature_use_refcount_per_intf[sw_if_index];
}
@@ -1695,12 +1926,66 @@ ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
if (rm->output_feature_use_refcount_per_intf[sw_if_index])
--rm->output_feature_use_refcount_per_intf[sw_if_index];
if (!rm->output_feature_use_refcount_per_intf[sw_if_index])
- return vnet_feature_enable_disable ("ip4-output",
- "ip4-sv-reassembly-output-feature",
- sw_if_index, 0, 0, 0);
+ {
+ return vnet_feature_enable_disable (
+ "ip4-output", "ip4-sv-reassembly-output-feature", sw_if_index, 0,
+ 0, 0);
+ }
}
return 0;
}
+
+void
+ip4_sv_reass_enable_disable_extended (bool is_enable)
+{
+ if (is_enable)
+ ++ip4_sv_reass_main.extended_refcount;
+ else
+ --ip4_sv_reass_main.extended_refcount;
+}
+
+int
+ip4_sv_reass_extended_lock (vlib_buffer_t *b,
+ struct ip4_sv_lock_unlock_args *a)
+{
+ ip4_sv_reass_per_thread_t *per_thread =
+ &ip4_sv_reass_main
+ .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index];
+
+ if (!vec_is_member (ip4_sv_reass_main.per_thread_data, per_thread))
+ return -1;
+
+ clib_spinlock_lock (&per_thread->lock);
+ if (pool_is_free_index (per_thread->pool,
+ vnet_buffer2 (b)->ip.reass.pool_index))
+ goto fail;
+
+ ip4_sv_reass_t *reass = pool_elt_at_index (
+ per_thread->pool, vnet_buffer2 (b)->ip.reass.pool_index);
+ if (vnet_buffer2 (b)->ip.reass.id == reass->id)
+ {
+ *a->total_ip_payload_length = reass->total_ip_payload_length;
+
+ *a->first_fragment_buffer_index = reass->first_fragment_clone_bi;
+ *a->first_fragment_total_ip_header_length =
+ reass->first_fragment_total_ip_header_length;
+ return 0;
+ }
+
+fail:
+ clib_spinlock_unlock (&per_thread->lock);
+ return -1;
+}
+
+void
+ip4_sv_reass_extended_unlock (vlib_buffer_t *b)
+{
+ ip4_sv_reass_per_thread_t *per_thread =
+ &ip4_sv_reass_main
+ .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index];
+ clib_spinlock_unlock (&per_thread->lock);
+}
+
#endif
/*
diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h
index 3a684eb9809..a1e5659a9f1 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.h
+++ b/src/vnet/ip/reass/ip4_sv_reass.h
@@ -23,6 +23,7 @@
#ifndef __included_ip4_sv_reass_h__
#define __included_ip4_sv_reass_h__
+#include <stdbool.h>
#include <vnet/api_errno.h>
#include <vnet/vnet.h>
@@ -48,6 +49,33 @@ int ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable);
int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
+/*
+ * Enable or disable extended reassembly.
+ *
+ * Extended reassembly means that fragments are cached until both first and
+ * last fragments are seen. Furthermore, first fragment buffer will be cloned
+ * and stored in reassembly context for later retrieval.
+ */
+void ip4_sv_reass_enable_disable_extended (bool is_enable);
+
+struct ip4_sv_lock_unlock_args
+{
+ u32 *total_ip_payload_length;
+ u32 *first_fragment_buffer_index;
+ u32 *first_fragment_total_ip_header_length;
+};
+
+/*
+ * Lock thread-level lock and fetch information from reassembly context.
+ * Uses vnet_buffer2 data filled by extended reassembly.
+ *
+ * Returns 0 on success, -1 otherwise.
+ */
+int ip4_sv_reass_extended_lock (vlib_buffer_t *b,
+ struct ip4_sv_lock_unlock_args *a);
+
+void ip4_sv_reass_extended_unlock (vlib_buffer_t *b);
+
uword ip4_sv_reass_custom_register_next_node (uword node_index);
uword ip4_sv_reass_custom_context_register_next_node (uword node_index);
diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c
index fe2ed05555c..69b27c5aa8e 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.c
+++ b/src/vnet/ip/reass/ip6_sv_reass.c
@@ -28,12 +28,13 @@
#include <vnet/ip/reass/ip6_sv_reass.h>
#include <vnet/ip/ip6_inlines.h>
-#define MSEC_PER_SEC 1000
+#define MSEC_PER_SEC 1000
#define IP6_SV_REASS_TIMEOUT_DEFAULT_MS 100
-#define IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
-#define IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
+#define IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS \
+ 10000 // 10 seconds default
+#define IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
#define IP6_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
-#define IP6_SV_REASS_HT_LOAD_FACTOR (0.75)
+#define IP6_SV_REASS_HT_LOAD_FACTOR (0.75)
typedef enum
{
@@ -94,17 +95,23 @@ typedef struct
// buffer indexes of buffers in this reassembly in chronological order -
// including overlaps and duplicate fragments
u32 *cached_buffers;
- // set to true when this reassembly is completed
- bool is_complete;
- // ip protocol
+
+ bool first_fragment_seen;
+ bool last_fragment_seen;
+
+ // vnet_buffer data
u8 ip_proto;
u8 icmp_type_or_tcp_flags;
u32 tcp_ack_number;
u32 tcp_seq_number;
- // l4 src port
u16 l4_src_port;
- // l4 dst port
u16 l4_dst_port;
+
+ // vnet_buffer2 data
+ u32 total_ip_payload_length;
+ u32 first_fragment_total_ip_header_length;
+ u32 first_fragment_clone_bi;
+
// lru indexes
u32 lru_prev;
u32 lru_next;
@@ -142,18 +149,21 @@ typedef struct
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
- // node index of ip6-drop node
- u32 ip6_drop_idx;
- u32 ip6_icmp_error_idx;
u32 ip6_sv_reass_expire_node_idx;
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_output_feature_index;
u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
+ // reference count for enabling/disabling output feature - per interface
+ u32 *output_feature_use_refcount_per_intf;
+
+ // extended reassembly refcount - see ip6_sv_reass_enable_disable_extended()
+ u32 extended_refcount;
} ip6_sv_reass_main_t;
extern ip6_sv_reass_main_t ip6_sv_reass_main;
@@ -174,7 +184,8 @@ typedef enum
typedef enum
{
REASS_FRAGMENT_CACHE,
- REASS_FINISH,
+ REASS_FIRST_FRAG,
+ REASS_LAST_FRAG,
REASS_FRAGMENT_FORWARD,
REASS_PASSTHROUGH,
} ip6_sv_reass_trace_operation_e;
@@ -190,7 +201,7 @@ typedef struct
} ip6_sv_reass_trace_t;
static u8 *
-format_ip6_sv_reass_trace (u8 * s, va_list * args)
+format_ip6_sv_reass_trace (u8 *s, va_list *args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
@@ -204,17 +215,19 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args)
case REASS_FRAGMENT_CACHE:
s = format (s, "[cached]");
break;
- case REASS_FINISH:
+ case REASS_FIRST_FRAG:
s =
- format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]",
+ format (s, "[first-frag-seen, ip proto=%u, src_port=%u, dst_port=%u]",
t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
clib_net_to_host_u16 (t->l4_dst_port));
break;
+ case REASS_LAST_FRAG:
+ s = format (s, "[last-frag-seen]");
+ break;
case REASS_FRAGMENT_FORWARD:
- s =
- format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
- t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
- clib_net_to_host_u16 (t->l4_dst_port));
+ s = format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
+ t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
+ clib_net_to_host_u16 (t->l4_dst_port));
break;
case REASS_PASSTHROUGH:
s = format (s, "[not fragmented or atomic fragment]");
@@ -224,14 +237,14 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args)
}
static void
-ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_sv_reass_t * reass, u32 bi,
- ip6_sv_reass_trace_operation_e action,
- u32 ip_proto, u16 l4_src_port, u16 l4_dst_port)
+ip6_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_sv_reass_t *reass, u32 bi,
+ ip6_sv_reass_trace_operation_e action, u32 ip_proto,
+ u16 l4_src_port, u16 l4_dst_port)
{
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (pool_is_free_index
- (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b)))
+ if (pool_is_free_index (vm->trace_main.trace_buffer_pool,
+ vlib_buffer_get_trace_index (b)))
{
// this buffer's trace is gone
b->flags &= ~VLIB_BUFFER_IS_TRACED;
@@ -258,31 +271,35 @@ ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
}
always_inline void
-ip6_sv_reass_free (vlib_main_t * vm, ip6_sv_reass_main_t * rm,
- ip6_sv_reass_per_thread_t * rt, ip6_sv_reass_t * reass)
+ip6_sv_reass_free (vlib_main_t *vm, ip6_sv_reass_main_t *rm,
+ ip6_sv_reass_per_thread_t *rt, ip6_sv_reass_t *reass,
+ bool del_bihash)
{
- clib_bihash_kv_48_8_t kv;
- kv.key[0] = reass->key.as_u64[0];
- kv.key[1] = reass->key.as_u64[1];
- kv.key[2] = reass->key.as_u64[2];
- kv.key[3] = reass->key.as_u64[3];
- kv.key[4] = reass->key.as_u64[4];
- kv.key[5] = reass->key.as_u64[5];
- clib_bihash_add_del_48_8 (&rm->hash, &kv, 0);
+ if (del_bihash)
+ {
+ clib_bihash_kv_48_8_t kv;
+ kv.key[0] = reass->key.as_u64[0];
+ kv.key[1] = reass->key.as_u64[1];
+ kv.key[2] = reass->key.as_u64[2];
+ kv.key[3] = reass->key.as_u64[3];
+ kv.key[4] = reass->key.as_u64[4];
+ kv.key[5] = reass->key.as_u64[5];
+ clib_bihash_add_del_48_8 (&rm->hash, &kv, 0);
+ }
vlib_buffer_free (vm, reass->cached_buffers,
vec_len (reass->cached_buffers));
vec_free (reass->cached_buffers);
reass->cached_buffers = NULL;
+ if (~0 != reass->first_fragment_clone_bi)
+ vlib_buffer_free_one (vm, reass->first_fragment_clone_bi);
if (~0 != reass->lru_prev)
{
- ip6_sv_reass_t *lru_prev =
- pool_elt_at_index (rt->pool, reass->lru_prev);
+ ip6_sv_reass_t *lru_prev = pool_elt_at_index (rt->pool, reass->lru_prev);
lru_prev->lru_next = reass->lru_next;
}
if (~0 != reass->lru_next)
{
- ip6_sv_reass_t *lru_next =
- pool_elt_at_index (rt->pool, reass->lru_next);
+ ip6_sv_reass_t *lru_next = pool_elt_at_index (rt->pool, reass->lru_next);
lru_next->lru_prev = reass->lru_prev;
}
if (rt->lru_first == reass - rt->pool)
@@ -297,13 +314,6 @@ ip6_sv_reass_free (vlib_main_t * vm, ip6_sv_reass_main_t * rm,
--rt->reass_n;
}
-always_inline void
-ip6_sv_reass_init (ip6_sv_reass_t * reass)
-{
- reass->cached_buffers = NULL;
- reass->is_complete = false;
-}
-
always_inline ip6_sv_reass_t *
ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm,
ip6_sv_reass_per_thread_t *rt,
@@ -325,7 +335,7 @@ again:
if (now > reass->last_heard + rm->timeout)
{
- ip6_sv_reass_free (vm, rm, rt, reass);
+ ip6_sv_reass_free (vm, rm, rt, reass, true);
reass = NULL;
}
}
@@ -336,19 +346,17 @@ again:
return reass;
}
- if (rt->reass_n >= rm->max_reass_n)
+ if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n)
{
reass = pool_elt_at_index (rt->pool, rt->lru_first);
- ip6_sv_reass_free (vm, rm, rt, reass);
+ ip6_sv_reass_free (vm, rm, rt, reass, true);
}
- pool_get (rt->pool, reass);
- clib_memset (reass, 0, sizeof (*reass));
+ pool_get_zero (rt->pool, reass);
+ reass->first_fragment_clone_bi = ~0;
reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
++rt->id_counter;
- ip6_sv_reass_init (reass);
++rt->reass_n;
-
reass->lru_prev = reass->lru_next = ~0;
if (~0 != rt->lru_last)
@@ -376,7 +384,7 @@ again:
int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
if (rv)
{
- ip6_sv_reass_free (vm, rm, rt, reass);
+ ip6_sv_reass_free (vm, rm, rt, reass, false);
reass = NULL;
// if other worker created a context already work with the other copy
if (-2 == rv)
@@ -386,10 +394,23 @@ again:
return reass;
}
+always_inline bool
+ip6_sv_reass_is_complete (ip6_sv_reass_t *reass, bool extended)
+{
+ /*
+ * Both first and last fragments have to be seen for extended reassembly to
+ * be complete. Otherwise first fragment is enough.
+ */
+ if (extended)
+ return reass->first_fragment_seen && reass->last_fragment_seen;
+
+ return reass->first_fragment_seen;
+}
+
always_inline ip6_sv_reass_rc_t
ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
ip6_sv_reass_main_t *rm, ip6_sv_reass_t *reass, u32 bi0,
- ip6_frag_hdr_t *frag_hdr)
+ ip6_frag_hdr_t *frag_hdr, bool extended)
{
vlib_buffer_t *fb = vlib_get_buffer (vm, bi0);
vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
@@ -417,26 +438,51 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
fvnb->ip.reass.range_first = fragment_first;
fvnb->ip.reass.range_last = fragment_last;
fvnb->ip.reass.next_range_bi = ~0;
+ void *l4_hdr = NULL;
if (0 == fragment_first)
{
- if (!ip6_get_port
- (vm, fb, fip, fb->current_length, &reass->ip_proto,
- &reass->l4_src_port, &reass->l4_dst_port,
- &reass->icmp_type_or_tcp_flags, &reass->tcp_ack_number,
- &reass->tcp_seq_number))
+ if (!ip6_get_port (vm, fb, fip, fb->current_length, &reass->ip_proto,
+ &reass->l4_src_port, &reass->l4_dst_port,
+ &reass->icmp_type_or_tcp_flags,
+ &reass->tcp_ack_number, &reass->tcp_seq_number,
+ &l4_hdr))
return IP6_SV_REASS_RC_UNSUPP_IP_PROTO;
- reass->is_complete = true;
+ reass->first_fragment_seen = true;
+ if (extended)
+ {
+ reass->first_fragment_total_ip_header_length =
+ (u8 *) l4_hdr - (u8 *) fip;
+ vlib_buffer_t *clone = vlib_buffer_copy_no_chain (
+ vm, fb, &reass->first_fragment_clone_bi);
+ if (!clone)
+ reass->first_fragment_clone_bi = ~0;
+ }
+
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FINISH,
+ ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FIRST_FRAG,
reass->ip_proto, reass->l4_src_port,
reass->l4_dst_port);
}
}
+
+ if (!ip6_frag_hdr_more (frag_hdr))
+ {
+ reass->last_fragment_seen = true;
+ reass->total_ip_payload_length = fragment_last - 1;
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_LAST_FRAG, ~0,
+ ~0, ~0);
+ }
+ }
+
vec_add1 (reass->cached_buffers, bi0);
- if (!reass->is_complete)
+
+ if (!ip6_sv_reass_is_complete (reass, extended))
{
if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -453,8 +499,7 @@ ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
}
always_inline bool
-ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
- vlib_buffer_t *b,
+ip6_sv_reass_verify_upper_layer_present (vlib_buffer_t *b,
ip6_ext_hdr_chain_t *hc)
{
int nh = hc->eh[hc->length - 1].protocol;
@@ -464,16 +509,14 @@ ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
icmp6_error_set_vnet_buffer (
b, ICMP6_parameter_problem,
ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
- b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
return false;
}
return true;
}
always_inline bool
-ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t *vm, vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
ip6_header_t *ip = vlib_buffer_get_current (b);
@@ -483,18 +526,18 @@ ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
(vnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
if (more_fragments && 0 != fragment_length % 8)
{
- icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
- ICMP6_parameter_problem_erroneous_header_field,
- (u8 *) & ip->payload_length - (u8 *) ip);
+ icmp6_error_set_vnet_buffer (
+ b, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_erroneous_header_field,
+ (u8 *) &ip->payload_length - (u8 *) ip);
return false;
}
return true;
}
always_inline bool
-ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t *vm, vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr);
@@ -504,26 +547,52 @@ ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
if (fragment_first + fragment_length > 65535)
{
ip6_header_t *ip0 = vlib_buffer_get_current (b);
- icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
- ICMP6_parameter_problem_erroneous_header_field,
- (u8 *) & frag_hdr->fragment_offset_and_more
- - (u8 *) ip0);
+ icmp6_error_set_vnet_buffer (
+ b, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_erroneous_header_field,
+ (u8 *) &frag_hdr->fragment_offset_and_more - (u8 *) ip0);
return false;
}
return true;
}
+always_inline void
+ip6_sv_reass_reset_vnet_buffer2 (vlib_buffer_t *b)
+{
+ vnet_buffer2 (b)->ip.reass.pool_index = ~0;
+ vnet_buffer2 (b)->ip.reass.thread_index = ~0;
+ vnet_buffer2 (b)->ip.reass.id = ~0;
+}
+
+always_inline void
+ip6_sv_reass_set_vnet_buffer2_from_reass (vlib_main_t *vm, vlib_buffer_t *b,
+ ip6_sv_reass_t *reass)
+{
+ vnet_buffer2 (b)->ip.reass.thread_index = vm->thread_index;
+ vnet_buffer2 (b)->ip.reass.id = reass->id;
+ vnet_buffer2 (b)->ip.reass.pool_index =
+ reass - ip6_sv_reass_main.per_thread_data[vm->thread_index].pool;
+}
+
+struct ip6_sv_reass_args
+{
+ bool is_feature;
+ bool is_output_feature;
+ bool custom_next;
+ bool custom_context;
+ bool extended;
+};
+
always_inline uword
ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame, bool is_feature,
- bool custom_next, bool custom_context)
+ vlib_frame_t *frame, struct ip6_sv_reass_args a)
{
u32 *from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
u32 *context;
- if (custom_context)
+ if (a.custom_context)
context = vlib_frame_aux_args (frame);
clib_spinlock_lock (&rt->lock);
@@ -533,7 +602,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
while (n_left_from > 0)
{
- if (custom_context)
+ if (a.custom_context)
vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
to_next_aux, n_left_to_next);
else
@@ -549,7 +618,11 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
- ip6_header_t *ip0 = vlib_buffer_get_current (b0);
+ ip6_header_t *ip0 = (ip6_header_t *) u8_ptr_add (
+ vlib_buffer_get_current (b0),
+ (ptrdiff_t) (a.is_output_feature ? 1 : 0) *
+ vnet_buffer (b0)->ip.save_rewrite_length);
+
ip6_frag_hdr_t *frag_hdr;
ip6_ext_hdr_chain_t hdr_chain;
bool is_atomic_fragment = false;
@@ -569,24 +642,29 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION ||
is_atomic_fragment)
{
- // this is a regular unfragmented packet or an atomic fragment
- if (!ip6_get_port
- (vm, b0, ip0, b0->current_length,
- &(vnet_buffer (b0)->ip.reass.ip_proto),
- &(vnet_buffer (b0)->ip.reass.l4_src_port),
- &(vnet_buffer (b0)->ip.reass.l4_dst_port),
- &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags),
- &(vnet_buffer (b0)->ip.reass.tcp_ack_number),
- &(vnet_buffer (b0)->ip.reass.tcp_seq_number)))
+ void *l4_hdr;
+ // this is a regular unfragmented packet or an atomic
+ // fragment
+ if (!ip6_get_port (
+ vm, b0, ip0, b0->current_length,
+ &(vnet_buffer (b0)->ip.reass.ip_proto),
+ &(vnet_buffer (b0)->ip.reass.l4_src_port),
+ &(vnet_buffer (b0)->ip.reass.l4_dst_port),
+ &(vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags),
+ &(vnet_buffer (b0)->ip.reass.tcp_ack_number),
+ &(vnet_buffer (b0)->ip.reass.tcp_seq_number), &l4_hdr))
{
error0 = IP6_ERROR_REASS_UNSUPP_IP_PROTO;
b0->error = node->errors[error0];
next0 = IP6_SV_REASSEMBLY_NEXT_DROP;
goto packet_enqueue;
}
+ if (a.extended)
+ ip6_sv_reass_reset_vnet_buffer2 (b0);
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0;
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
- next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
- IP6_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = a.custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_sv_reass_add_trace (
@@ -604,9 +682,10 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (0 == ip6_frag_hdr_offset (frag_hdr))
{
// first fragment - verify upper-layer is present
- if (!ip6_sv_reass_verify_upper_layer_present (node, b0,
- &hdr_chain))
+ if (!ip6_sv_reass_verify_upper_layer_present (b0, &hdr_chain))
{
+ error0 = IP6_ERROR_REASS_MISSING_UPPER;
+ b0->error = node->errors[error0];
next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR;
goto packet_enqueue;
}
@@ -614,6 +693,8 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (!ip6_sv_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) ||
!ip6_sv_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr))
{
+ error0 = IP6_ERROR_REASS_INVALID_FRAG_LEN;
+ b0->error = node->errors[error0];
next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR;
goto packet_enqueue;
}
@@ -625,7 +706,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
kv.k.as_u64[1] = ip0->src_address.as_u64[1];
kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
- if (custom_context)
+ if (a.custom_context)
kv.k.as_u64[4] =
(u64) *context << 32 | (u64) frag_hdr->identification;
else
@@ -644,7 +725,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
- if (custom_context)
+ if (a.custom_context)
forward_context = 1;
goto packet_enqueue;
}
@@ -657,10 +738,11 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
goto packet_enqueue;
}
- if (reass->is_complete)
+ if (ip6_sv_reass_is_complete (reass, a.extended))
{
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0;
vnet_buffer (b0)->ip.reass.is_non_first_fragment =
- ! !ip6_frag_hdr_offset (frag_hdr);
+ !!ip6_frag_hdr_offset (frag_hdr);
vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
reass->icmp_type_or_tcp_flags;
@@ -670,8 +752,12 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
reass->tcp_seq_number;
vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
- next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
- IP6_SV_REASSEMBLY_NEXT_INPUT;
+
+ if (a.extended)
+ ip6_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass);
+
+ next0 = a.custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_sv_reass_add_trace (
@@ -682,7 +768,8 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
u32 counter = ~0;
- switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr))
+ switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr,
+ a.extended))
{
case IP6_SV_REASS_RC_OK:
/* nothing to do here */
@@ -703,55 +790,57 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (~0 != counter)
{
vlib_node_increment_counter (vm, node->node_index, counter, 1);
- ip6_sv_reass_free (vm, rm, rt, reass);
+ ip6_sv_reass_free (vm, rm, rt, reass, true);
goto next_packet;
}
- if (reass->is_complete)
+ if (ip6_sv_reass_is_complete (reass, a.extended))
{
u32 idx;
vec_foreach_index (idx, reass->cached_buffers)
- {
- u32 bi0 = vec_elt (reass->cached_buffers, idx);
- if (0 == n_left_to_next)
- {
- vlib_put_next_frame (vm, node, next_index,
- n_left_to_next);
- vlib_get_next_frame (vm, node, next_index, to_next,
- n_left_to_next);
- }
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
- b0 = vlib_get_buffer (vm, bi0);
- if (is_feature)
- {
- vnet_feature_next (&next0, b0);
- }
- frag_hdr =
- vlib_buffer_get_current (b0) +
- vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset;
- vnet_buffer (b0)->ip.reass.is_non_first_fragment =
- ! !ip6_frag_hdr_offset (frag_hdr);
- vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- reass->icmp_type_or_tcp_flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- reass->tcp_ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- reass->tcp_seq_number;
- vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
- vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- ip6_sv_reass_add_trace (
- vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
- reass->ip_proto, reass->l4_src_port, reass->l4_dst_port);
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next, bi0,
- next0);
- }
+ {
+ u32 bi0 = vec_elt (reass->cached_buffers, idx);
+ if (0 == n_left_to_next)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+ if (a.is_feature || a.is_output_feature)
+ {
+ vnet_feature_next (&next0, b0);
+ }
+ frag_hdr = vlib_buffer_get_current (b0) +
+ vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset;
+ vnet_buffer (b0)->ip.reass.l4_hdr_truncated = 0;
+ vnet_buffer (b0)->ip.reass.is_non_first_fragment =
+ !!ip6_frag_hdr_offset (frag_hdr);
+ vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ reass->icmp_type_or_tcp_flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ reass->tcp_ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ reass->tcp_seq_number;
+ vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
+ vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
+ if (a.extended)
+ ip6_sv_reass_set_vnet_buffer2_from_reass (vm, b0, reass);
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip6_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
+ reass->ip_proto, reass->l4_src_port,
+ reass->l4_dst_port);
+ }
+ vlib_validate_buffer_enqueue_x1 (
+ vm, node, next_index, to_next, n_left_to_next, bi0, next0);
+ }
vec_set_len (reass->cached_buffers,
0); // buffers are owned by frame now
}
@@ -761,12 +850,14 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
to_next[0] = bi0;
to_next += 1;
n_left_to_next -= 1;
- if (is_feature && IP6_ERROR_NONE == error0)
+ if ((a.is_feature || a.is_output_feature) &&
+ IP6_ERROR_NONE == error0 &&
+ IP6_SV_REASSEMBLY_NEXT_HANDOFF != next0)
{
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- if (custom_context && forward_context)
+ if (a.custom_context && forward_context)
{
if (to_next_aux)
{
@@ -783,7 +874,7 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
next_packet:
from += 1;
- if (custom_context)
+ if (a.custom_context)
context += 1;
n_left_from -= 1;
}
@@ -795,13 +886,20 @@ ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
return frame->n_vectors;
}
-VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip6_sv_reass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
- false /* custom next */,
- false /* custom context */);
+ /*
+ * Extended reassembly not supported for non-feature nodes.
+ */
+ return ip6_sv_reassembly_inline (vm, node, frame,
+ (struct ip6_sv_reass_args){
+ .is_feature = false,
+ .is_output_feature = false,
+ .custom_context = false,
+ .custom_next = false,
+ .extended = false,
+ });
}
VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
@@ -820,13 +918,26 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
},
};
-VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip6_sv_reass_node_feature)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */,
- false /* custom next */,
- false /* custom context */);
+ if (ip6_sv_reass_main.extended_refcount > 0)
+ return ip6_sv_reassembly_inline (vm, node, frame,
+ (struct ip6_sv_reass_args){
+ .is_feature = true,
+ .is_output_feature = false,
+ .custom_context = false,
+ .custom_next = false,
+ .extended = true,
+ });
+ return ip6_sv_reassembly_inline (vm, node, frame,
+ (struct ip6_sv_reass_args){
+ .is_feature = true,
+ .is_output_feature = false,
+ .custom_context = false,
+ .custom_next = false,
+ .extended = false,
+ });
}
VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
@@ -846,18 +957,70 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
};
VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = {
- .arc_name = "ip6-unicast",
- .node_name = "ip6-sv-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip6-lookup"),
- .runs_after = 0,
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-sv-reassembly-feature",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+ .runs_after = 0,
+};
+
+VLIB_NODE_FN (ip6_sv_reass_node_output_feature)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ if (ip6_sv_reass_main.extended_refcount > 0)
+ return ip6_sv_reassembly_inline (vm, node, frame,
+ (struct ip6_sv_reass_args){
+ .is_feature = false,
+ .is_output_feature = true,
+ .custom_context = false,
+ .custom_next = false,
+ .extended = true,
+ });
+ return ip6_sv_reassembly_inline (vm, node, frame,
+ (struct ip6_sv_reass_args){
+ .is_feature = false,
+ .is_output_feature = true,
+ .custom_context = false,
+ .custom_next = false,
+ .extended = false,
+ });
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reass_node_output_feature) = {
+ .name = "ip6-sv-reassembly-output-feature",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_sv_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-output-feature-hoff",
+ },
+};
+
+VNET_FEATURE_INIT (ip6_sv_reassembly_output_feature) = {
+ .arc_name = "ip6-output",
+ .node_name = "ip6-sv-reassembly-output-feature",
+ .runs_after = 0,
};
VLIB_NODE_FN (ip6_sv_reass_custom_context_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
- true /* custom next */,
- true /* custom context */);
+ /*
+ * Extended reassembly not supported for non-feature nodes.
+ */
+ return ip6_sv_reassembly_inline (vm, node, frame,
+ (struct ip6_sv_reass_args){
+ .is_feature = false,
+ .is_output_feature = false,
+ .custom_context = true,
+ .custom_next = true,
+ .extended = false,
+ });
}
VLIB_REGISTER_NODE (ip6_sv_reass_custom_context_node) = {
@@ -909,7 +1072,7 @@ typedef struct
} ip6_rehash_cb_ctx;
static int
-ip6_rehash_cb (clib_bihash_kv_48_8_t * kv, void *_ctx)
+ip6_rehash_cb (clib_bihash_kv_48_8_t *kv, void *_ctx)
{
ip6_rehash_cb_ctx *ctx = _ctx;
if (clib_bihash_add_del_48_8 (ctx->new_hash, kv, 1))
@@ -936,8 +1099,8 @@ ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
u32 max_reassembly_length, u32 expire_walk_interval_ms)
{
u32 old_nbuckets = ip6_sv_reass_get_nbuckets ();
- ip6_sv_reass_set_params (timeout_ms, max_reassemblies,
- max_reassembly_length, expire_walk_interval_ms);
+ ip6_sv_reass_set_params (timeout_ms, max_reassemblies, max_reassembly_length,
+ expire_walk_interval_ms);
vlib_process_signal_event (ip6_sv_reass_main.vlib_main,
ip6_sv_reass_main.ip6_sv_reass_expire_node_idx,
IP6_EVENT_CONFIG_CHANGED, 0);
@@ -950,7 +1113,7 @@ ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
ctx.failure = 0;
ctx.new_hash = &new_hash;
clib_bihash_init_48_8 (&new_hash, "ip6-sv-reass", new_nbuckets,
- new_nbuckets * 1024);
+ (uword) new_nbuckets * 1024);
clib_bihash_foreach_key_value_pair_48_8 (&ip6_sv_reass_main.hash,
ip6_rehash_cb, &ctx);
if (ctx.failure)
@@ -970,8 +1133,8 @@ ip6_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
}
vnet_api_error_t
-ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
- u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
+ip6_sv_reass_get (u32 *timeout_ms, u32 *max_reassemblies,
+ u32 *max_reassembly_length, u32 *expire_walk_interval_ms)
{
*timeout_ms = ip6_sv_reass_main.timeout_ms;
*max_reassemblies = ip6_sv_reass_main.max_reass_n;
@@ -981,7 +1144,7 @@ ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
}
static clib_error_t *
-ip6_sv_reass_init_function (vlib_main_t * vm)
+ip6_sv_reass_init_function (vlib_main_t *vm)
{
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
clib_error_t *error = 0;
@@ -994,11 +1157,11 @@ ip6_sv_reass_init_function (vlib_main_t * vm)
vec_validate (rm->per_thread_data, vlib_num_workers ());
ip6_sv_reass_per_thread_t *rt;
vec_foreach (rt, rm->per_thread_data)
- {
- clib_spinlock_init (&rt->lock);
- pool_alloc (rt->pool, rm->max_reass_n);
- rt->lru_first = rt->lru_last = ~0;
- }
+ {
+ clib_spinlock_init (&rt->lock);
+ pool_alloc (rt->pool, rm->max_reass_n);
+ rt->lru_first = rt->lru_last = ~0;
+ }
node = vlib_get_node_by_name (vm, (u8 *) "ip6-sv-reassembly-expire-walk");
ASSERT (node);
@@ -1011,14 +1174,7 @@ ip6_sv_reass_init_function (vlib_main_t * vm)
nbuckets = ip6_sv_reass_get_nbuckets ();
clib_bihash_init_48_8 (&rm->hash, "ip6-sv-reass", nbuckets,
- nbuckets * 1024);
-
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-drop");
- ASSERT (node);
- rm->ip6_drop_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error");
- ASSERT (node);
- rm->ip6_icmp_error_idx = node->index;
+ (uword) nbuckets * 1024);
if ((error = vlib_call_init_function (vm, ip_main_init)))
return error;
@@ -1026,6 +1182,8 @@ ip6_sv_reass_init_function (vlib_main_t * vm)
rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0);
+ rm->fq_output_feature_index =
+ vlib_frame_queue_main_init (ip6_sv_reass_node_output_feature.index, 0);
rm->fq_custom_context_index =
vlib_frame_queue_main_init (ip6_sv_reass_custom_context_node.index, 0);
@@ -1047,9 +1205,8 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
while (true)
{
- vlib_process_wait_for_event_or_clock (vm,
- (f64) rm->expire_walk_interval_ms
- / (f64) MSEC_PER_SEC);
+ vlib_process_wait_for_event_or_clock (
+ vm, (f64) rm->expire_walk_interval_ms / (f64) MSEC_PER_SEC);
event_type = vlib_process_get_events (vm, &event_data);
switch (event_type)
@@ -1078,19 +1235,20 @@ ip6_sv_reass_walk_expired (vlib_main_t *vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- pool_foreach_index (index, rt->pool) {
- reass = pool_elt_at_index (rt->pool, index);
- if (now > reass->last_heard + rm->timeout)
- {
- vec_add1 (pool_indexes_to_free, index);
- }
- }
+ pool_foreach_index (index, rt->pool)
+ {
+ reass = pool_elt_at_index (rt->pool, index);
+ if (now > reass->last_heard + rm->timeout)
+ {
+ vec_add1 (pool_indexes_to_free, index);
+ }
+ }
int *i;
- vec_foreach (i, pool_indexes_to_free)
- {
- ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
- ip6_sv_reass_free (vm, rm, rt, reass);
- }
+ vec_foreach (i, pool_indexes_to_free)
+ {
+ ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
+ ip6_sv_reass_free (vm, rm, rt, reass, true);
+ }
clib_spinlock_unlock (&rt->lock);
}
@@ -1116,7 +1274,7 @@ VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = {
};
static u8 *
-format_ip6_sv_reass_key (u8 * s, va_list * args)
+format_ip6_sv_reass_key (u8 *s, va_list *args)
{
ip6_sv_reass_key_t *key = va_arg (*args, ip6_sv_reass_key_t *);
s =
@@ -1127,35 +1285,34 @@ format_ip6_sv_reass_key (u8 * s, va_list * args)
}
static u8 *
-format_ip6_sv_reass (u8 * s, va_list * args)
+format_ip6_sv_reass (u8 *s, va_list *args)
{
vlib_main_t *vm = va_arg (*args, vlib_main_t *);
ip6_sv_reass_t *reass = va_arg (*args, ip6_sv_reass_t *);
- s = format (s, "ID: %lu, key: %U, trace_op_counter: %u\n",
- reass->id, format_ip6_sv_reass_key, &reass->key,
- reass->trace_op_counter);
+ s = format (s, "ID: %lu, key: %U, trace_op_counter: %u\n", reass->id,
+ format_ip6_sv_reass_key, &reass->key, reass->trace_op_counter);
vlib_buffer_t *b;
u32 *bip;
u32 counter = 0;
vec_foreach (bip, reass->cached_buffers)
- {
- u32 bi = *bip;
- do
- {
- b = vlib_get_buffer (vm, bi);
- s = format (s, " #%03u: bi: %u\n", counter, bi);
- ++counter;
- bi = b->next_buffer;
- }
- while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
- }
+ {
+ u32 bi = *bip;
+ do
+ {
+ b = vlib_get_buffer (vm, bi);
+ s = format (s, " #%03u: bi: %u\n", counter, bi);
+ ++counter;
+ bi = b->next_buffer;
+ }
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ }
return s;
}
static clib_error_t *
-show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
- CLIB_UNUSED (vlib_cli_command_t * lmd))
+show_ip6_sv_reass (vlib_main_t *vm, unformat_input_t *input,
+ CLIB_UNUSED (vlib_cli_command_t *lmd))
{
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
@@ -1179,9 +1336,10 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
clib_spinlock_lock (&rt->lock);
if (details)
{
- pool_foreach (reass, rt->pool) {
- vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass);
- }
+ pool_foreach (reass, rt->pool)
+ {
+ vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass);
+ }
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1190,90 +1348,93 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "Current IP6 reassemblies count: %lu\n",
(long unsigned) sum_reass_n);
vlib_cli_output (vm,
- "Maximum configured concurrent shallow virtual IP6 reassemblies per worker-thread: %lu\n",
+ "Maximum configured concurrent shallow virtual IP6 "
+ "reassemblies per worker-thread: %lu\n",
(long unsigned) rm->max_reass_n);
vlib_cli_output (vm,
"Maximum configured amount of fragments per shallow "
"virtual IP6 reassembly: %lu\n",
(long unsigned) rm->max_reass_len);
+ vlib_cli_output (
+ vm, "Maximum configured shallow virtual IP6 reassembly timeout: %lums\n",
+ (long unsigned) rm->timeout_ms);
vlib_cli_output (vm,
- "Maximum configured shallow virtual IP6 reassembly timeout: %lums\n",
- (long unsigned) rm->timeout_ms);
- vlib_cli_output (vm,
- "Maximum configured shallow virtual IP6 reassembly expire walk interval: %lums\n",
+ "Maximum configured shallow virtual IP6 reassembly expire "
+ "walk interval: %lums\n",
(long unsigned) rm->expire_walk_interval_ms);
- vlib_cli_output (vm, "Buffers in use: %lu\n",
- (long unsigned) sum_buffers_n);
+ vlib_cli_output (vm, "Buffers in use: %lu\n", (long unsigned) sum_buffers_n);
return 0;
}
VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = {
- .path = "show ip6-sv-reassembly",
- .short_help = "show ip6-sv-reassembly [details]",
- .function = show_ip6_sv_reass,
+ .path = "show ip6-sv-reassembly",
+ .short_help = "show ip6-sv-reassembly [details]",
+ .function = show_ip6_sv_reass,
};
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
ip6_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
{
- return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index,
- enable_disable);
+ return ip6_sv_reass_enable_disable_with_refcnt (sw_if_index, enable_disable);
}
#endif /* CLIB_MARCH_VARIANT */
-#define foreach_ip6_sv_reassembly_handoff_error \
-_(CONGESTION_DROP, "congestion drop")
-
+#define foreach_ip6_sv_reassembly_handoff_error \
+ _ (CONGESTION_DROP, "congestion drop")
typedef enum
{
-#define _(sym,str) IP6_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
+#define _(sym, str) IP6_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
foreach_ip6_sv_reassembly_handoff_error
#undef _
IP6_SV_REASSEMBLY_HANDOFF_N_ERROR,
} ip6_sv_reassembly_handoff_error_t;
static char *ip6_sv_reassembly_handoff_error_strings[] = {
-#define _(sym,string) string,
+#define _(sym, string) string,
foreach_ip6_sv_reassembly_handoff_error
#undef _
};
typedef struct
{
- u32 next_worker_index;
+ u32 thread_index;
} ip6_sv_reassembly_handoff_trace_t;
static u8 *
-format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args)
+format_ip6_sv_reassembly_handoff_trace (u8 *s, va_list *args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip6_sv_reassembly_handoff_trace_t *t =
va_arg (*args, ip6_sv_reassembly_handoff_trace_t *);
- s =
- format (s, "ip6-sv-reassembly-handoff: next-worker %d",
- t->next_worker_index);
+ s = format (s, "to thread-index: %u", t->thread_index);
return s;
}
+struct ip6_sv_reass_hoff_args
+{
+ bool is_feature;
+ bool is_output_feature;
+ bool custom_context;
+};
+
always_inline uword
ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame, bool is_feature,
- bool custom_context)
+ vlib_frame_t *frame,
+ struct ip6_sv_reass_hoff_args a)
{
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
- u32 fq_index;
from = vlib_frame_vector_args (frame);
- if (custom_context)
+ if (a.custom_context)
context = vlib_frame_aux_args (frame);
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
@@ -1281,28 +1442,28 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ?
- rm->fq_feature_index :
- (custom_context ? rm->fq_custom_context_index : rm->fq_index);
+ const u32 fq_index = a.is_output_feature ? rm->fq_output_feature_index :
+ a.is_feature ? rm->fq_feature_index :
+ a.custom_context ? rm->fq_custom_context_index :
+ rm->fq_index;
while (n_left_from > 0)
{
ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
- if (PREDICT_FALSE
- ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
{
ip6_sv_reassembly_handoff_trace_t *t =
vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->next_worker_index = ti[0];
+ t->thread_index = ti[0];
}
n_left_from -= 1;
ti += 1;
b += 1;
}
- if (custom_context)
+ if (a.custom_context)
n_enq = vlib_buffer_enqueue_to_thread_with_aux (
vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
else
@@ -1310,18 +1471,20 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
- vlib_node_increment_counter (vm, node->node_index,
- IP6_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
- frame->n_vectors - n_enq);
+ vlib_node_increment_counter (
+ vm, node->node_index, IP6_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
+ frame->n_vectors - n_enq);
return frame->n_vectors;
}
-VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip6_sv_reassembly_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip6_sv_reassembly_handoff_inline (
- vm, node, frame, false /* is_feature */, false /* custom_context */);
+ vm, node, frame,
+ (struct ip6_sv_reass_hoff_args){ .is_feature = false,
+ .is_output_feature = false,
+ .custom_context = false });
}
VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
@@ -1338,15 +1501,16 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
},
};
-
-VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip6_sv_reassembly_handoff_inline (
- vm, node, frame, true /* is_feature */, false /* custom_context */);
+ vm, node, frame,
+ (struct ip6_sv_reass_hoff_args){ .is_feature = true,
+ .is_output_feature = false,
+ .custom_context = false });
}
-
VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
.name = "ip6-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1361,11 +1525,38 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
},
};
+VLIB_NODE_FN (ip6_sv_reassembly_output_feature_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame,
+ (struct ip6_sv_reass_hoff_args){ .is_feature = false,
+ .is_output_feature = true,
+ .custom_context = false });
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reassembly_output_feature_handoff_node) = {
+ .name = "ip6-sv-reass-output-feature-hoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings),
+ .error_strings = ip6_sv_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_sv_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
VLIB_NODE_FN (ip6_sv_reassembly_custom_context_handoff_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return ip6_sv_reassembly_handoff_inline (
- vm, node, frame, false /* is_feature */, true /* custom_context */);
+ vm, node, frame,
+ (struct ip6_sv_reass_hoff_args){ .is_feature = false,
+ .is_output_feature = false,
+ .custom_context = true });
}
VLIB_REGISTER_NODE (ip6_sv_reassembly_custom_context_handoff_node) = {
@@ -1393,10 +1584,10 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
{
if (!rm->feature_use_refcount_per_intf[sw_if_index])
{
- ++rm->feature_use_refcount_per_intf[sw_if_index];
- return vnet_feature_enable_disable ("ip6-unicast",
- "ip6-sv-reassembly-feature",
- sw_if_index, 1, 0, 0);
+ int rv = vnet_feature_enable_disable (
+ "ip6-unicast", "ip6-sv-reassembly-feature", sw_if_index, 1, 0, 0);
+ if (0 != rv)
+ return rv;
}
++rm->feature_use_refcount_per_intf[sw_if_index];
}
@@ -1404,8 +1595,35 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
{
--rm->feature_use_refcount_per_intf[sw_if_index];
if (!rm->feature_use_refcount_per_intf[sw_if_index])
- return vnet_feature_enable_disable ("ip6-unicast",
- "ip6-sv-reassembly-feature",
+ return vnet_feature_enable_disable (
+ "ip6-unicast", "ip6-sv-reassembly-feature", sw_if_index, 0, 0, 0);
+ }
+ return 0;
+}
+
+vnet_api_error_t
+ip6_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
+{
+ ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
+ vec_validate (rm->output_feature_use_refcount_per_intf, sw_if_index);
+ if (is_enable)
+ {
+ if (!rm->output_feature_use_refcount_per_intf[sw_if_index])
+ {
+ int rv = vnet_feature_enable_disable (
+ "ip6-output", "ip6-sv-reassembly-output-feature", sw_if_index, 1,
+ 0, 0);
+ if (0 != rv)
+ return rv;
+ }
+ ++rm->output_feature_use_refcount_per_intf[sw_if_index];
+ }
+ else
+ {
+ --rm->output_feature_use_refcount_per_intf[sw_if_index];
+ if (!rm->output_feature_use_refcount_per_intf[sw_if_index])
+ return vnet_feature_enable_disable ("ip6-output",
+ "ip6-sv-reassembly-output-feature",
sw_if_index, 0, 0, 0);
}
return 0;
@@ -1418,6 +1636,57 @@ ip6_sv_reass_custom_context_register_next_node (uword node_index)
vlib_get_main (), ip6_sv_reassembly_custom_context_handoff_node.index,
node_index);
}
+
+void
+ip6_sv_reass_enable_disable_extended (bool is_enable)
+{
+ if (is_enable)
+ ++ip6_sv_reass_main.extended_refcount;
+ else
+ --ip6_sv_reass_main.extended_refcount;
+}
+
+int
+ip6_sv_reass_extended_lock (vlib_buffer_t *b,
+ struct ip6_sv_lock_unlock_args *a)
+{
+ ip6_sv_reass_per_thread_t *per_thread =
+ &ip6_sv_reass_main
+ .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index];
+
+ if (!vec_is_member (ip6_sv_reass_main.per_thread_data, per_thread))
+ return -1;
+
+ clib_spinlock_lock (&per_thread->lock);
+ if (pool_is_free_index (per_thread->pool,
+ vnet_buffer2 (b)->ip.reass.pool_index))
+ goto fail;
+
+ ip6_sv_reass_t *reass = pool_elt_at_index (
+ per_thread->pool, vnet_buffer2 (b)->ip.reass.pool_index);
+ if (vnet_buffer2 (b)->ip.reass.id == reass->id)
+ {
+ *a->total_ip_payload_length = reass->total_ip_payload_length;
+
+ *a->first_fragment_buffer_index = reass->first_fragment_clone_bi;
+ *a->first_fragment_total_ip_header_length =
+ reass->first_fragment_total_ip_header_length;
+ return 0;
+ }
+
+fail:
+ clib_spinlock_unlock (&per_thread->lock);
+ return -1;
+}
+
+void
+ip6_sv_reass_extended_unlock (vlib_buffer_t *b)
+{
+ ip6_sv_reass_per_thread_t *per_thread =
+ &ip6_sv_reass_main
+ .per_thread_data[vnet_buffer2 (b)->ip.reass.thread_index];
+ clib_spinlock_unlock (&per_thread->lock);
+}
#endif
/*
diff --git a/src/vnet/ip/reass/ip6_sv_reass.h b/src/vnet/ip/reass/ip6_sv_reass.h
index 7dc9df132dd..9220581ffd3 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.h
+++ b/src/vnet/ip/reass/ip6_sv_reass.h
@@ -23,6 +23,7 @@
#ifndef __included_ip6_sv_reass_h__
#define __included_ip6_sv_reass_h__
+#include <stdbool.h>
#include <vnet/api_errno.h>
#include <vnet/vnet.h>
@@ -42,6 +43,36 @@ vnet_api_error_t ip6_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index,
u8 enable_disable);
+vnet_api_error_t
+ip6_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
+ int is_enable);
+
+/*
+ * Enable or disable extended reassembly.
+ *
+ * Extended reassembly means that fragments are cached until both first and
+ * last fragments are seen. Furthermore, first fragment buffer will be cloned
+ * and stored in reassembly context for later retrieval.
+ */
+void ip6_sv_reass_enable_disable_extended (bool is_enable);
+
+struct ip6_sv_lock_unlock_args
+{
+ u32 *total_ip_payload_length;
+ u32 *first_fragment_buffer_index;
+ u32 *first_fragment_total_ip_header_length;
+};
+
+/*
+ * Lock thread-level lock and fetch information from reassembly context.
+ * Uses vnet_buffer2 data filled by extended reassembly.
+ *
+ * Returns 0 on success, -1 otherwise.
+ */
+int ip6_sv_reass_extended_lock (vlib_buffer_t *b,
+ struct ip6_sv_lock_unlock_args *a);
+
+void ip6_sv_reass_extended_unlock (vlib_buffer_t *b);
int ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable);
uword ip6_sv_reass_custom_context_register_next_node (uword node_index);