/* * Copyright (c) 2017 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @file * @brief IPv6 Full Reassembly. * * This file contains the source code for IPv6 full reassembly. */ #include #include #include #include #include #define MSEC_PER_SEC 1000 #define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 100 #define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default #define IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT 1024 #define IP6_FULL_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3 #define IP6_FULL_REASS_HT_LOAD_FACTOR (0.75) typedef enum { IP6_FULL_REASS_RC_OK, IP6_FULL_REASS_RC_INTERNAL_ERROR, IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS, IP6_FULL_REASS_RC_NO_BUF, IP6_FULL_REASS_RC_HANDOFF, } ip6_full_reass_rc_t; typedef struct { union { struct { ip6_address_t src; ip6_address_t dst; u32 xx_id; u32 frag_id; u8 unused[7]; u8 proto; }; u64 as_u64[6]; }; } ip6_full_reass_key_t; typedef union { struct { u32 reass_index; u32 memory_owner_thread_index; }; u64 as_u64; } ip6_full_reass_val_t; typedef union { struct { ip6_full_reass_key_t k; ip6_full_reass_val_t v; }; clib_bihash_kv_48_8_t kv; } ip6_full_reass_kv_t; always_inline u32 ip6_full_reass_buffer_get_data_offset (vlib_buffer_t * b) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); return vnb->ip.reass.range_first - vnb->ip.reass.fragment_first; } always_inline u16 ip6_full_reass_buffer_get_data_len (vlib_buffer_t * b) { vnet_buffer_opaque_t *vnb = vnet_buffer (b); return clib_min (vnb->ip.reass.range_last, vnb->ip.reass.fragment_last) - (vnb->ip.reass.fragment_first + ip6_full_reass_buffer_get_data_offset (b)) + 1; } typedef struct { // hash table key ip6_full_reass_key_t key; // time when last packet was received f64 last_heard; // internal id of this reassembly u64 id; // buffer index of first buffer in this reassembly context u32 first_bi; // last octet of packet, ~0 until fragment without more_fragments arrives u32 last_packet_octet; // length of data collected so far u32 data_len; // trace operation counter u32 trace_op_counter; // next index - used by custom apps (~0 if not set) u32 next_index; // error next index - used by custom apps (~0 if not set) u32 error_next_index; // minimum fragment length for this reassembly - used to estimate MTU u16 min_fragment_length; // number of fragments for this reassembly u32 fragments_n; // thread owning memory for this context (whose pool contains this ctx) u32 memory_owner_thread_index; // thread which received fragment with offset 0 and which sends out the // completed reassembly u32 sendout_thread_index; } ip6_full_reass_t; typedef struct { ip6_full_reass_t *pool; u32 reass_n; u32 id_counter; clib_spinlock_t lock; } ip6_full_reass_per_thread_t; typedef struct { // IPv6 config u32 timeout_ms; f64 timeout; u32 expire_walk_interval_ms; // maximum number of fragments in one reassembly u32 max_reass_len; // maximum number of reassemblies u32 max_reass_n; // IPv6 runtime clib_bihash_48_8_t hash; // per-thread data ip6_full_reass_per_thread_t *per_thread_data; // convenience vlib_main_t *vlib_main; // node index of ip6-drop node u32 ip6_drop_idx; u32 ip6_icmp_error_idx; u32 ip6_full_reass_expire_node_idx; /** Worker handoff */ u32 fq_index; u32 fq_feature_index; // reference count for enabling/disabling feature - per interface u32 *feature_use_refcount_per_intf; } ip6_full_reass_main_t; extern ip6_full_reass_main_t ip6_full_reass_main; #ifndef CLIB_MARCH_VARIANT ip6_full_reass_main_t ip6_full_reass_main; #endif /* CLIB_MARCH_VARIANT */ typedef enum { IP6_FULL_REASSEMBLY_NEXT_INPUT, IP6_FULL_REASSEMBLY_NEXT_DROP, IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR, IP6_FULL_REASSEMBLY_NEXT_HANDOFF, IP6_FULL_REASSEMBLY_N_NEXT, } ip6_full_reass_next_t; typedef enum { RANGE_NEW, RANGE_OVERLAP, ICMP_ERROR_RT_EXCEEDED, ICMP_ERROR_FL_TOO_BIG, ICMP_ERROR_FL_NOT_MULT_8, FINALIZE, HANDOFF, } ip6_full_reass_trace_operation_e; typedef struct { u16 range_first; u16 range_last; u32 range_bi; i32 data_offset; u32 data_len; u32 first_bi; } ip6_full_reass_range_trace_t; typedef struct { ip6_full_reass_trace_operation_e action; u32 reass_id; ip6_full_reass_range_trace_t trace_range; u32 op_id; u32 fragment_first; u32 fragment_last; u32 total_data_len; u32 thread_id; u32 thread_id_to; bool is_after_handoff; ip6_header_t ip6_header; ip6_frag_hdr_t ip6_frag_header; } ip6_full_reass_trace_t; static void ip6_full_reass_trace_details (vlib_main_t * vm, u32 bi, ip6_full_reass_range_trace_t * trace) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); vnet_buffer_opaque_t *vnb = vnet_buffer (b); trace->range_first = vnb->ip.reass.range_first; trace->range_last = vnb->ip.reass.range_last; trace->data_offset = ip6_full_reass_buffer_get_data_offset (b); trace->data_len = ip6_full_reass_buffer_get_data_len (b); trace->range_bi = bi; } static u8 * format_ip6_full_reass_range_trace (u8 * s, va_list * args) { ip6_full_reass_range_trace_t *trace = va_arg (*args, ip6_full_reass_range_trace_t *); s = format (s, "range: [%u, %u], off %d, len %u, bi %u", trace->range_first, trace->range_last, trace->data_offset, trace->data_len, trace->range_bi); return s; } static u8 * format_ip6_full_reass_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_full_reass_trace_t *t = va_arg (*args, ip6_full_reass_trace_t *); u32 indent = 0; if (~0 != t->reass_id) { if (t->is_after_handoff) { s = format (s, "%U\n", format_ip6_header, &t->ip6_header, sizeof (t->ip6_header)); s = format (s, " %U\n", format_ip6_frag_hdr, &t->ip6_frag_header, sizeof (t->ip6_frag_header)); indent = 2; } s = format (s, "%Ureass id: %u, op id: %u, ", format_white_space, indent, t->reass_id, t->op_id); indent = format_get_indent (s); s = format (s, "first bi: %u, data len: %u, ip/fragment[%u, %u]", t->trace_range.first_bi, t->total_data_len, t->fragment_first, t->fragment_last); } switch (t->action) { case RANGE_NEW: s = format (s, "\n%Unew %U", format_white_space, indent, format_ip6_full_reass_range_trace, &t->trace_range); break; case RANGE_OVERLAP: s = format (s, "\n%Uoverlap %U", format_white_space, indent, format_ip6_full_reass_range_trace, &t->trace_range); break; case ICMP_ERROR_FL_TOO_BIG: s = format (s, "\n%Uicmp-error - frag_len > 65535 %U", format_white_space, indent, format_ip6_full_reass_range_trace, &t->trace_range); break; case ICMP_ERROR_FL_NOT_MULT_8: s = format (s, "\n%Uicmp-error - frag_len mod 8 != 0 %U", format_white_space, indent, format_ip6_full_reass_range_trace, &t->trace_range); break; case ICMP_ERROR_RT_EXCEEDED: s = format (s, "\n%Uicmp-error - reassembly time exceeded", format_white_space, indent); break; case FINALIZE: s = format (s, "\n%Ufinalize reassembly", format_white_space, indent); break; case HANDOFF: s = format (s, "handoff from thread #%u to thread #%u", t->thread_id, t->thread_id_to); break; } return s; } static void ip6_full_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_full_reass_main_t * rm, ip6_full_reass_t * reass, u32 bi, ip6_frag_hdr_t * ip6_frag_header, ip6_full_reass_trace_operation_e action, u32 thread_id_to) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); vnet_buffer_opaque_t *vnb = vnet_buffer (b); bool is_after_handoff = false; if (pool_is_free_index (vm->trace_main.trace_buffer_pool, vlib_buffer_get_trace_index (b))) { // this buffer's trace is gone b->flags &= ~VLIB_BUFFER_IS_TRACED; return; } if (vlib_buffer_get_trace_thread (b) != vm->thread_index) { is_after_handoff = true; } ip6_full_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0])); t->is_after_handoff = is_after_handoff; if (t->is_after_handoff) { clib_memcpy (&t->ip6_header, vlib_buffer_get_current (b), clib_min (sizeof (t->ip6_header), b->current_length)); if (ip6_frag_header) { clib_memcpy (&t->ip6_frag_header, ip6_frag_header, sizeof (t->ip6_frag_header)); } else { clib_memset (&t->ip6_frag_header, 0, sizeof (t->ip6_frag_header)); } } if (reass) { t->reass_id = reass->id; t->op_id = reass->trace_op_counter; t->trace_range.first_bi = reass->first_bi; t->total_data_len = reass->data_len; ++reass->trace_op_counter; } else { t->reass_id = ~0; } t->action = action; t->thread_id = vm->thread_index; t->thread_id_to = thread_id_to; ip6_full_reass_trace_details (vm, bi, &t->trace_range); t->fragment_first = vnb->ip.reass.fragment_first; t->fragment_last = vnb->ip.reass.fragment_last; #if 0 static u8 *s = NULL; s = format (s, "%U", format_ip6_full_reass_trace, NULL, NULL, t); printf ("%.*s\n", vec_len (s), s); fflush (stdout); vec_reset_length (s); #endif } always_inline void ip6_full_reass_free_ctx (ip6_full_reass_per_thread_t * rt, ip6_full_reass_t * reass) { pool_put (rt->pool, reass); --rt->reass_n; } always_inline void ip6_full_reass_free (ip6_full_reass_main_t * rm, ip6_full_reass_per_thread_t * rt, ip6_full_reass_t * reass) { clib_bihash_kv_48_8_t kv; kv.key[0] = reass->key.as_u64[0]; kv.key[1] = reass->key.as_u64[1]; kv.key[2] = reass->key.as_u64[2]; kv.key[3] = reass->key.as_u64[3]; kv.key[4] = reass->key.as_u64[4]; kv.key[5] = reass->key.as_u64[5]; clib_bihash_add_del_48_8 (&rm->hash, &kv, 0); ip6_full_reass_free_ctx (rt, reass); } always_inline void ip6_full_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_full_reass_main_t * rm, ip6_full_reass_t * reass) { u32 range_bi = reass->first_bi; vlib_buffer_t *range_b; vnet_buffer_opaque_t *range_vnb; u32 *to_free = NULL; while (~0 != range_bi) { range_b = vlib_get_buffer (vm, range_bi); range_vnb = vnet_buffer (range_b); u32 bi = range_bi; while (~0 != bi) { vec_add1 (to_free, bi); vlib_buffer_t *b = vlib_get_buffer (vm, bi); if (b->flags & VLIB_BUFFER_NEXT_PRESENT) { bi = b->next_buffer; b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; } else { bi = ~0; } } range_bi = range_vnb->ip.reass.next_range_bi; } /* send to next_error_index */ if (~0 != reass->error_next_index) { u32 n_left_to_next, *to_next, next_index; next_index = reass->error_next_index; u32 bi = ~0; while (vec_len (to_free) > 0) { vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (vec_len (to_free) > 0 && n_left_to_next > 0) { bi = vec_pop (to_free); if (~0 != bi) { to_next[0] = bi; to_next += 1; n_left_to_next -= 1; } } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } } else { vlib_buffer_free (vm, to_free, vec_len (to_free)); } vec_free (to_free); } always_inline void ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_full_reass_main_t * rm, ip6_full_reass_t * reass, u32 * icmp_bi) { if (~0 == reass->first_bi) { return; } if (~0 == reass->next_index) // custom apps don't want icmp { vlib_buffer_t *b = vlib_get_buffer (vm, reass->first_bi); if (0 == vnet_buffer (b)->ip.reass.fragment_first) { *icmp_bi = reass->first_bi; if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED)) { ip6_full_reass_add_trace (vm, node, rm, reass, reass->first_bi, NULL, ICMP_ERROR_RT_EXCEEDED, ~0); } // fragment with offset zero received - send icmp message back if (b->flags & VLIB_BUFFER_NEXT_PRESENT) { // separate first buffer from chain and steer it towards icmp node b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; reass->first_bi = b->next_buffer; } else { reass->first_bi = vnet_buffer (b)->ip.reass.next_range_bi; } icmp6_error_set_vnet_buffer (b, ICMP6_time_exceeded, ICMP6_time_exceeded_fragment_reassembly_time_exceeded, 0); } } ip6_full_reass_drop_all (vm, node, rm, reass); } always_inline ip6_full_reass_t * ip6_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_full_reass_main_t * rm, ip6_full_reass_per_thread_t * rt, ip6_full_reass_kv_t * kv, u32 * icmp_bi, u8 * do_handoff) { ip6_full_reass_t *reass; f64 now; again: reass = NULL; now = vlib_time_now (vm); if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv)) { if (vm->thread_index != kv->v.memory_owner_thread_index) { *do_handoff = 1; return NULL; } reass = pool_elt_at_index (rm->per_thread_data [kv->v.memory_owner_thread_index].pool, kv->v.reass_index); if (now > reass->last_heard + rm->timeout) { ip6_full_reass_on_timeout (vm, node, rm, reass, icmp_bi); ip6_full_reass_free (rm, rt, reass); reass = NULL; } } if (reass) { reass->last_heard = now; return reass; } if (rt->reass_n >= rm->max_reass_n) { reass = NULL; return reass; } else { pool_get (rt->pool, reass); clib_memset (reass, 0, sizeof (*reass)); reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter; ++rt->id_counter; reass->first_bi = ~0; reass->last_packet_octet = ~0; reass->data_len = 0; reass->next_index = ~0; reass->error_next_index = ~0; ++rt->reass_n; } reass->key.as_u64[0] = kv->kv.key[0]; reass->key.as_u64[1] = kv->kv.key[1]; reass->key.as_u64[2] = kv->kv.key[2]; reass->key.as_u64[3] = kv->kv.key[3]; reass->key.as_u64[4] = kv->kv.key[4]; reass->key.as_u64[5] = kv->kv.key[5]; kv->v.reass_index = (reass - rt->pool); kv->v.memory_owner_thread_index = vm->thread_index; reass->last_heard = now; int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2); if (rv) { ip6_full_reass_free (rm, rt, reass); reass = NULL; // if other worker created a context already work with the other copy if (-2 == rv) goto again; } return reass; } always_inline ip6_full_reass_rc_t ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_full_reass_main_t * rm, ip6_full_reass_per_thread_t * rt, ip6_full_reass_t * reass, u32 * bi0, u32 * next0, u32 * error0, bool is_custom_app) { *bi0 = reass->first_bi; *error0 = IP6_ERROR_NONE; ip6_frag_hdr_t *frag_hdr; vlib_buffer_t *last_b = NULL; u32 sub_chain_bi = reass->first_bi; u32 total_length = 0; u32 buf_cnt = 0; u32 dropped_cnt = 0; u32 *vec_drop_compress = NULL; ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK; do { u32 tmp_bi = sub_chain_bi; vlib_buffer_t *tmp = vlib_get_buffer (vm, tmp_bi); vnet_buffer_opaque_t *vnb = vnet_buffer (tmp); if (!(vnb->ip.reass.range_first >= vnb->ip.reass.fragment_first) && !(vnb->ip.reass.range_last > vnb->ip.reass.fragment_first)) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } u32 data_len = ip6_full_reass_buffer_get_data_len (tmp); u32 trim_front = vnet_buffer (tmp)->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr) + ip6_full_reass_buffer_get_data_offset (tmp); u32 trim_end = vlib_buffer_length_in_chain (vm, tmp) - trim_front - data_len; if (tmp_bi == reass->first_bi) { /* first buffer - keep ip6 header */ if (0 != ip6_full_reass_buffer_get_data_offset (tmp)) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } trim_front = 0; trim_end = vlib_buffer_length_in_chain (vm, tmp) - data_len - (vnet_buffer (tmp)->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr)); if (!(vlib_buffer_length_in_chain (vm, tmp) - trim_end > 0)) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } } u32 keep_data = vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end; while (1) { ++buf_cnt; if (trim_front) { if (trim_front > tmp->current_length) { /* drop whole buffer */ vec_add1 (vec_drop_compress, tmp_bi); trim_front -= tmp->current_length; if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT)) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT; tmp_bi = tmp->next_buffer; tmp = vlib_get_buffer (vm, tmp_bi); continue; } else { vlib_buffer_advance (tmp, trim_front); trim_front = 0; } } if (keep_data) { if (last_b) { last_b->flags |= VLIB_BUFFER_NEXT_PRESENT; last_b->next_buffer = tmp_bi; } last_b = tmp; if (keep_data <= tmp->current_length) { tmp->current_length = keep_data; keep_data = 0; } else { keep_data -= tmp->current_length; if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT)) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } } total_length += tmp->current_length; } else { vec_add1 (vec_drop_compress, tmp_bi); if (reass->first_bi == tmp_bi) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } ++dropped_cnt; } if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT) { tmp_bi = tmp->next_buffer; tmp = vlib_get_buffer (vm, tmp->next_buffer); } else { break; } } sub_chain_bi = vnet_buffer (vlib_get_buffer (vm, sub_chain_bi))->ip. reass.next_range_bi; } while (~0 != sub_chain_bi); if (!last_b) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } last_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT; vlib_buffer_t *first_b = vlib_get_buffer (vm, reass->first_bi); if (total_length < first_b->current_length) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } total_length -= first_b->current_length; first_b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; first_b->total_length_not_including_first_buffer = total_length; // drop fragment header vnet_buffer_opaque_t *first_b_vnb = vnet_buffer (first_b); ip6_header_t *ip = vlib_buffer_get_current (first_b); u16 ip6_frag_hdr_offset = first_b_vnb->ip.reass.ip6_frag_hdr_offset; ip6_ext_header_t *prev_hdr; frag_hdr = ip6_ext_header_find (vm, first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION, &prev_hdr); if (prev_hdr) { prev_hdr->next_hdr = frag_hdr->next_hdr; } else { ip->protocol = frag_hdr->next_hdr; } if (!((u8 *) frag_hdr - (u8 *) ip == ip6_frag_hdr_offset)) { rv = IP6_FULL_REASS_RC_INTERNAL_ERROR; goto free_buffers_and_return; } memmove (frag_hdr, (u8 *) frag_hdr + sizeof (*frag_hdr), first_b->current_length - ip6_frag_hdr_offset - sizeof (ip6_frag_hdr_t)); first_b->current_length -= sizeof (*frag_hdr); ip->payload_length = clib_host_to_net_u16 (total_length + first_b->current_length - sizeof (*ip)); if (!vlib_buffer_chain_linearize (vm, first_b)) { rv = IP6_FULL_REASS_RC_NO_BUF; goto free_buffers_and_return; } first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID; if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED)) { ip6_full_reass_add_trace (vm, node, rm, reass, reass->first_bi, NULL, FINALIZE, ~0); #if 0 // following code does a hexdump of packet fragments to stdout ... do { u32 bi = reass->first_bi; u8 *s = NULL; while (~0 != bi) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); s = format (s, "%u: %U\n", bi, format_hexdump, vlib_buffer_get_current (b), b->current_length); if (b->flags & VLIB_BUFFER_NEXT_PRESENT) { bi = b->next_buffer; } else { break; } } printf ("%.*s\n", vec_len (s), s); fflush (stdout); vec_free (s); } while (0); #endif } if (!is_custom_app) { *next0 = IP6_FULL_REASSEMBLY_NEXT_INPUT; } else { *next0 = reass->next_index; } vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length; ip6_full_reass_free (rm, rt, reass); reass = NULL; free_buffers_and_return: vlib_buffer_free (vm, vec_drop_compress, vec_len (vec_drop_compress)); vec_free (vec_drop_compress); return rv; } always_inline void ip6_full_reass_insert_range_in_chain (vlib_main_t * vm, ip6_full_reass_main_t * rm, ip6_full_reass_per_thread_t * rt, ip6_full_reass_t * reass, u32 prev_range_bi, u32 new_next_bi) { vlib_buffer_t *new_next_b = vlib_get_buffer (vm, new_next_bi); vnet_buffer_opaque_t *new_next_vnb = vnet_buffer (new_next_b); if (~0 != prev_range_bi) { vlib_buffer_t *prev_b = vlib_get_buffer (v
# Copyright (c) 2017 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

*** Settings ***
| Resource | resources/libraries/robot/performance/performance_setup.robot
| Library | resources.libraries.python.Classify.Classify
| Library | resources.libraries.python.IPv4Setup.Dut | ${nodes['DUT1']}
| ... | WITH NAME | dut1_v4
| Library | resources.libraries.python.IPv4Setup.Dut | ${nodes['DUT2']}
| ... | WITH NAME | dut2_v4
| ...
| Force Tags | 3_NODE_SINGLE_LINK_TOPO | PERFTEST | HW_ENV | NDRCHK
| ... | NIC_Intel-X520-DA2 | ETH | IP4FWD | FEATURE | IACLDST
| ...
| Suite Setup | Set up 3-node performance topology with DUT's NIC model
| ... | L3 | Intel-X520-DA2
| Suite Teardown | Tear down 3-node performance topology
| ...
| Test Setup | Set up performance test
| Test Teardown | Tear down performance ndrchk test
| ...
| Documentation | *Reference NDR throughput IPv4 whitelist verify test cases*
| ...
| ... | *[Top] Network Topologies:* TG-DUT1-DUT2-TG 3-node circular topology
| ... | with single links between nodes.
| ... | *[Enc] Packet Encapsulations:* Eth-IPv4 for IPv4 routing.
| ... | *[Cfg] DUT configuration:* DUT1 and DUT2 are configured with IPv4
| ... | routing, two static IPv4 /24 routes and IPv4 iAcl security whitelist
| ... | ingress /24 filter entries applied on links TG - DUT1 and DUT2 - TG.
| ... | DUT1 and DUT2 tested with 2p10GE NIC X520 Niantic by Intel.
| ... | *[Ver] TG verification:* In short performance tests, TG verifies
| ... | DUTs' throughput at ref-NDR (reference Non Drop Rate) with zero packet
| ... | loss tolerance. Ref-NDR value is periodically updated acording to
| ... | formula: ref-NDR = 0.9x NDR, where NDR is found in RFC2544 long
| ... | performance tests for the same DUT configuration. Test packets are
| ... | generated by TG on links to DUTs. TG traffic profile contains two L3
| ... | flow-groups (flow-group per direction, 253 flows per flow-group) with
| ... | all packets containing Ethernet header, IPv4 header with IP protocol=61
| ... | and static payload. MAC addresses are matching MAC addresses of the
| ... | TG node interfaces.
| ... | *[Ref] Applicable standard specifications:* RFC2544.

*** Variables ***
# Traffic profile:
| ${traffic_profile} | trex-sl-3n-ethip4-ip4src253

*** Test Cases ***
| tc01-64B-1t1c-ethip4-ip4base-iacldstbase-ndrchk
| | [Documentation]
| | ... | [Cfg] DUT runs IPv4 routing and whitelist filters config with \
| | ... | 1 thread, 1 phy core, 1 receive queue per NIC port. [Ver] Verify
| | ... | ref-NDR for 64 Byte frames using single trial throughput test
| | ... | at 2x 3.6mpps.
| | [Tags] | 64B | 1T1C | STHREAD
| | ${framesize}= | Set Variable | ${64}
| | ${rate}= | Set Variable | 3.6mpps
| | Given Add '1' worker threads and '1' rxqueues in 3-node single-link circular topology
| | And Add PCI devices to DUTs in 3-node single link topology
| | And Add no multi seg to all DUTs
| | And Apply startup configuration on all VPP DUTs
| | When Initialize IPv4 forwarding in 3-node circular topology
| | ${table_idx} | ${skip_n} | ${match_n}= | And Vpp Creates Classify Table L3
| | ... | ${dut1} | ip4 | dst
| | And Vpp Configures Classify Session L3
| | ... | ${dut1} | permit | ${table_idx} | ${skip_n} | ${match_n}
| | ... | ip4 | dst | 20.20.20.2
| | And Vpp Enable Input Acl Interface
| | ... | ${dut1} | ${dut1_if1} | ip4 | ${table_idx}
| | ${table_idx} | ${skip_n} | ${match_n}= | And Vpp Creates Classify Table L3
| | ... | ${dut2} | ip4 | dst
| | And Vpp Configures Classify Session L3
| | ... | ${dut2} | permit | ${table_idx} | ${skip_n} | ${match_n}
| | ... | ip4 | dst | 10.10.10.2
| | And Vpp Enable Input Acl Interface
| | ... | ${dut2} | ${dut2_if2} | ip4 | ${table_idx}
| | Then Traffic should pass with no loss | ${perf_trial_duration} | ${rate}
| | ... | ${framesize} | ${traffic_profile}

| tc02-1518B-1t1c-ethip4-ip4base-iacldstbase-ndrchk
| | [Documentation]
| | ... | [Cfg] DUT runs IPv4 routing and whitelist filters config with \
| | ... | 1 thread, 1 phy core, 1 receive queue per NIC port. [Ver] Verify
| | ... | ref-NDR for 1518 Byte frames using single trial throughput test
| | ... | at 2x 812743pps.
| | [Tags] | 1518B | 1T1C | STHREAD
| | ${framesize}= | Set Variable | ${1518}
| | ${rate}= | Set Variable | 812743pps
| | Given Add '1' worker threads and '1' rxqueues in 3-node single-link circular topology
| | And Add PCI devices to DUTs in 3-node single link topology
| | And Add no multi seg to all DUTs
| | And Apply startup configuration on all VPP DUTs
| | When Initialize IPv4 forwarding in 3-node circular topology
| | ${table_idx} | ${skip_n} | ${match_n}= | And Vpp Creates Classify Table L3
| | ... | ${dut1} | ip4 | dst
| | And Vpp Configures Classify Session L3
| | ... | ${dut1} | permit | ${table_idx} | ${skip_n} | ${match_n}
| | ... | ip4 | dst | 20.20.20.2
| | And Vpp Enable Input Acl Interface
| | ... | ${dut1} | ${dut1_if1} | ip4 | ${table_idx}
| | ${table_idx} | ${skip_n} | ${match_n}= | And Vpp Creates Classify Table L3
| | ... | ${dut2} | ip4 | dst
| | And Vpp Configures Classify Session L3
| | ... | ${dut2} | permit | ${table_idx} | ${skip_n} | ${match_n}
| | ... | ip4 | dst | 10.10.10.2
| | And Vpp Enable Input Acl Interface
| | ... | ${dut2} | ${dut2_if2} | ip4 | ${table_idx}
| | Then Traffic should pass with no loss | ${perf_trial_duration} | ${rate}
| | ... | ${framesize} | ${traffic_profile}

| tc03-9000B-1t1c-ethip4-ip4base-iacldstbase-ndrchk
| | [Documentation]
| | ... | [Cfg] DUT runs IPv4 routing and whitelist filters config with \
| | ... | 1 thread, 1 phy core, 1 receive queue per NIC port. [Ver] Verify
| | ... | ref-NDR for 9000 Byte frames using single trial throughput test
| | ... | at 2x 138580pps.
| | [Tags] | 9000B | 1T1C | STHREAD
| | ${framesize}= | Set Variable | ${9000}
| | ${rate}= | Set Variable | 138580pps
| | Given Add '1' worker threads and '1' rxqueues in 3-node single-link circular topology
| | And Add PCI devices to DUTs in 3-node single link topology
| | And Apply startup configuration on all VPP DUTs
| | When Initialize IPv4 forwarding in 3-node circular topology
| | ${table_idx} | ${skip_n} | ${match_n}= | And Vpp Creates Classify Table L3
| | ... | ${dut1} | ip4 | dst
| | And Vpp Configures Classify Session L3
| | ... | ${dut1} | permit | ${table_idx} | ${skip_n} | ${match_n}
| | ... | ip4 | dst | 20.20.20.2
| | And Vpp Enable Input Acl Interface
| | ... | ${dut1} | ${dut1_if1} | ip4 | ${table_idx}
| | ${table_idx} | ${skip_n} | ${match_n}= | And Vpp Creates Classify Table L3
| | ... | ${dut2} | ip4 | dst
| | And Vpp Configures Classify Session L3
| | ... | ${dut2} | permit | ${table_idx} | ${skip_n} | ${match_n}
| | ... | ip4 | dst | 10.10.10.2