diff options
-rw-r--r-- | MAINTAINERS | 5 | ||||
-rw-r--r-- | docs/spelling_wordlist.txt | 3 | ||||
-rw-r--r-- | src/plugins/pvti/CMakeLists.txt | 40 | ||||
-rw-r--r-- | src/plugins/pvti/FEATURE.yaml | 8 | ||||
-rw-r--r-- | src/plugins/pvti/api.c | 137 | ||||
-rw-r--r-- | src/plugins/pvti/bypass-main.c | 79 | ||||
-rw-r--r-- | src/plugins/pvti/bypass.c | 202 | ||||
-rw-r--r-- | src/plugins/pvti/bypass.h | 53 | ||||
-rw-r--r-- | src/plugins/pvti/input-main.c | 115 | ||||
-rw-r--r-- | src/plugins/pvti/input.c | 496 | ||||
-rw-r--r-- | src/plugins/pvti/input.h | 87 | ||||
-rw-r--r-- | src/plugins/pvti/output-main.c | 85 | ||||
-rw-r--r-- | src/plugins/pvti/output.c | 543 | ||||
-rw-r--r-- | src/plugins/pvti/output.h | 75 | ||||
-rw-r--r-- | src/plugins/pvti/pvti.api | 111 | ||||
-rw-r--r-- | src/plugins/pvti/pvti.c | 481 | ||||
-rw-r--r-- | src/plugins/pvti/pvti.h | 257 | ||||
-rw-r--r-- | src/plugins/pvti/pvti_if.c | 376 | ||||
-rw-r--r-- | src/plugins/pvti/pvti_if.h | 47 | ||||
-rw-r--r-- | test/test_pvti.py | 1153 |
20 files changed, 4353 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index c6dbd8bc903..d6a9b1adf8f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -847,6 +847,11 @@ I: tracenode M: Maxime Peim <mpeim@cisco.com> F: src/plugins/tracenode +Plugin - Packet Vector Tunnel Interface +I: pvti +M: Andrew Yourtchenko <ayourtch@gmail.com> +F: src/plugins/pvti + cJSON I: cjson M: Ole Troan <ot@cisco.com> diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 8de7bf4ee24..d6c5b97793e 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -903,6 +903,9 @@ pthreads pton pushingapatch putatively +pvti +PVTI +Pvti pwait py pypi diff --git a/src/plugins/pvti/CMakeLists.txt b/src/plugins/pvti/CMakeLists.txt new file mode 100644 index 00000000000..900b662d54a --- /dev/null +++ b/src/plugins/pvti/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) 2024 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(pvti + SOURCES + pvti_if.c + pvti.c + input.h + input.c + input-main.c + output.h + output.c + output-main.c + bypass.h + bypass.c + bypass-main.c + api.c + pvti.h + + MULTIARCH_SOURCES + input.c + output.c + bypass.c + + API_FILES + pvti.api + + # API_TEST_SOURCES + # pvti_test.c +) diff --git a/src/plugins/pvti/FEATURE.yaml b/src/plugins/pvti/FEATURE.yaml new file mode 100644 index 00000000000..52dbe5b7c1b --- /dev/null +++ b/src/plugins/pvti/FEATURE.yaml @@ -0,0 +1,8 @@ +--- +name: Packet Vector Tunnel +maintainer: Andrew Yourtchenko <ayourtch@gmail.com> +features: + - support inner MTU up to ~8K over standard 1280..1500 MTU substrate +description: "Large MTU Tunnels" +state: development +properties: [API, CLI] diff --git a/src/plugins/pvti/api.c b/src/plugins/pvti/api.c new file mode 100644 index 00000000000..cda39ad44e8 --- /dev/null +++ b/src/plugins/pvti/api.c @@ -0,0 +1,137 @@ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/format_fns.h> +#include <vnet/ip/ip_types_api.h> +#include <vlibapi/api.h> + +#include <pvti/pvti.api_enum.h> +#include <pvti/pvti.api_types.h> + +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +#define REPLY_MSG_ID_BASE pvm->msg_id_base +#include <vlibapi/api_helper_macros.h> + +typedef struct +{ + vl_api_registration_t *reg; + u32 context; +} pvti_if_details_ctx_t; + +typedef struct +{ + +} pvti_interface_dump_ctx_t; + +static walk_rc_t +pvti_if_send_details (index_t pvtii, void *data) +{ + vl_api_pvti_interface_details_t *rmp; + pvti_if_details_ctx_t *ctx = data; + const pvti_if_t *pvi; + + pvi = pvti_if_get (pvtii); + + rmp = vl_msg_api_alloc_zero (sizeof (*rmp)); + rmp->_vl_msg_id = + htons (VL_API_PVTI_INTERFACE_DETAILS + pvti_main.msg_id_base); + + rmp->interface.sw_if_index = htonl (pvi->sw_if_index); + rmp->interface.local_port = htons (pvi->local_port); + rmp->interface.remote_port = htons (pvi->remote_port); + rmp->interface.underlay_mtu = htons (pvi->underlay_mtu); + + ip_address_encode2 (&pvi->local_ip, &rmp->interface.local_ip); + ip_address_encode2 (&pvi->remote_ip, &rmp->interface.remote_ip); + + rmp->context = ctx->context; + + vl_api_send_msg (ctx->reg, (u8 *) rmp); + + return (WALK_CONTINUE); +} + +static void +vl_api_pvti_interface_dump_t_handler (vl_api_pvti_interface_dump_t *mp) +{ + vl_api_registration_t *reg; + // pvti_main_t *pvm = &pvti_main; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (reg == 0) + return; + + pvti_if_details_ctx_t ctx = { + .reg = reg, + .context = mp->context, + }; + + u32 sw_if_index = ntohl (mp->sw_if_index); + if (sw_if_index == ~0) + pvti_if_walk (pvti_if_send_details, &ctx); + else + { + index_t pvtii = pvti_if_find_by_sw_if_index (sw_if_index); + if (pvtii != INDEX_INVALID) + pvti_if_send_details (pvtii, &ctx); + } +} + +static void +vl_api_pvti_interface_create_t_handler (vl_api_pvti_interface_create_t *mp) +{ + vl_api_pvti_interface_create_reply_t *rmp; + pvti_main_t *pvm = &pvti_main; + int rv = ~0; + u32 sw_if_index = ~0; + ip_address_t local_ip; + ip_address_t remote_ip; + + ip_address_decode2 (&mp->interface.local_ip, &local_ip); + ip_address_decode2 (&mp->interface.remote_ip, &remote_ip); + u16 lport = clib_host_to_net_u16 (mp->interface.local_port); + u16 rport = clib_host_to_net_u16 (mp->interface.remote_port); + u16 underlay_mtu = clib_host_to_net_u16 (mp->interface.underlay_mtu); + u32 underlay_fib_index = + clib_host_to_net_u32 (mp->interface.underlay_fib_index); + pvti_peer_address_method_t peer_address_method = + mp->interface.peer_address_from_payload ? PVTI_PEER_ADDRESS_FROM_PAYLOAD : + PVTI_PEER_ADDRESS_FIXED; + + if (underlay_mtu == 0) + { + underlay_mtu = 1500; + } + + rv = + pvti_if_create (&local_ip, lport, &remote_ip, rport, peer_address_method, + underlay_mtu, underlay_fib_index, &sw_if_index); + + REPLY_MACRO2 (VL_API_PVTI_INTERFACE_CREATE_REPLY, + { rmp->sw_if_index = htonl (sw_if_index); }); +} + +static void +vl_api_pvti_interface_delete_t_handler (vl_api_pvti_interface_delete_t *mp) +{ + vl_api_pvti_interface_delete_reply_t *rmp; + pvti_main_t *pvm = &pvti_main; + int rv = 0; + + rv = pvti_if_delete (ntohl (mp->sw_if_index)); + REPLY_MACRO (VL_API_PVTI_INTERFACE_DELETE_REPLY); +} + +/* API definitions */ +#include <pvti/pvti.api.c> + +void +pvti_api_init () +{ + pvti_main_t *pvm = &pvti_main; + /* Add our API messages to the global name_crc hash table */ + pvm->msg_id_base = setup_message_id_table (); +} diff --git a/src/plugins/pvti/bypass-main.c b/src/plugins/pvti/bypass-main.c new file mode 100644 index 00000000000..db79ccd2113 --- /dev/null +++ b/src/plugins/pvti/bypass-main.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <pvti/bypass.h> + +/* packet trace format function */ +static u8 * +format_pvti_bypass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + pvti_bypass_trace_t *t = va_arg (*args, pvti_bypass_trace_t *); + + s = format (s, "PVTI-BYPASS: sw_if_index %d, next index %d\n", + t->sw_if_index, t->next_index); + s = format (s, " src %U sport %d dport %d\n", format_ip_address, + &t->remote_ip, t->remote_port, t->local_port); + s = format (s, " seq: %d", t->seq); + return s; +} + +vlib_node_registration_t pvti4_bypass_node; +vlib_node_registration_t pvti6_bypass_node; + +static char *pvti_bypass_error_strings[] = { +#define _(sym, string) string, + foreach_pvti_bypass_error +#undef _ +}; + +VLIB_REGISTER_NODE (pvti4_bypass_node) = +{ + .name = "ip4-pvti-bypass", + .vector_size = sizeof (u32), + .format_trace = format_pvti_bypass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(pvti_bypass_error_strings), + .error_strings = pvti_bypass_error_strings, + + .n_next_nodes = PVTI_BYPASS_N_NEXT, + + .next_nodes = { + [PVTI_BYPASS_NEXT_DROP] = "error-drop", + [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti4-input", + }, + +}; + +VLIB_REGISTER_NODE (pvti6_bypass_node) = +{ + .name = "ip6-pvti-bypass", + .vector_size = sizeof (u32), + .format_trace = format_pvti_bypass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(pvti_bypass_error_strings), + .error_strings = pvti_bypass_error_strings, + + .n_next_nodes = PVTI_BYPASS_N_NEXT, + + .next_nodes = { + [PVTI_BYPASS_NEXT_DROP] = "error-drop", + [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti6-input", + }, + +}; diff --git a/src/plugins/pvti/bypass.c b/src/plugins/pvti/bypass.c new file mode 100644 index 00000000000..14c976439eb --- /dev/null +++ b/src/plugins/pvti/bypass.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> +#include <pvti/bypass.h> + +always_inline u16 +pvti_bypass_node_common (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_ip6) +{ + u32 n_left_from, *from, *to_next; + pvti_bypass_next_t next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_input_node.index); + + u32 pkts_processed = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + u32 sw_if_index0 = 0; + ip4_header_t *ip40; + ip6_header_t *ip60; + udp_header_t *udp0; + u32 bi0, ip_len0, udp_len0, flags0, next0; + u8 error0, good_udp0, proto0; + i32 len_diff0; + + bi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* setup the packet for the next feature */ + vnet_feature_next (&next0, b0); + + if (is_ip6) + { + ip60 = vlib_buffer_get_current (b0); + } + else + { + ip40 = vlib_buffer_get_current (b0); + } + + if (is_ip6) + { + proto0 = ip60->protocol; + } + else + { + /* Treat IP frag packets as "experimental" protocol for now */ + proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol; + } + + /* Process packet 0 */ + if (proto0 != IP_PROTOCOL_UDP) + goto exit; /* not UDP packet */ + + if (is_ip6) + udp0 = ip6_next_header (ip60); + else + udp0 = ip4_next_header (ip40); + + /* look up the destination ip and port */ + u32 pvti_index0 = INDEX_INVALID; + if (is_ip6) + { + pvti_index0 = pvti_if_find_by_remote_ip6_and_port ( + &ip60->src_address, clib_net_to_host_u16 (udp0->src_port)); + } + else + { + pvti_index0 = pvti_if_find_by_remote_ip4_and_port ( + &ip40->src_address, clib_net_to_host_u16 (udp0->src_port)); + } + if (pvti_index0 == INDEX_INVALID) + goto exit; + + flags0 = b0->flags; + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + + /* Don't verify UDP checksum for packets with explicit zero checksum. + */ + good_udp0 |= udp0->checksum == 0; + + /* Verify UDP length */ + if (is_ip6) + ip_len0 = clib_net_to_host_u16 (ip60->payload_length); + else + ip_len0 = clib_net_to_host_u16 (ip40->length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + len_diff0 = ip_len0 - udp_len0; + + /* Verify UDP checksum */ + if (PREDICT_FALSE (!good_udp0)) + { + if (is_ip6) + flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0); + else + flags0 = ip4_tcp_udp_validate_checksum (vm, b0); + good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + } + + if (is_ip6) + { + error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM; + error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH; + } + else + { + error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM; + error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH; + } + + next0 = error0 ? PVTI_BYPASS_NEXT_DROP : PVTI_BYPASS_NEXT_PVTI_INPUT; + b0->error = error0 ? error_node->errors[error0] : 0; + + /* pvtiX-input node expect current at PVTI header */ + if (is_ip6) + vlib_buffer_advance (b0, sizeof (ip6_header_t) + + sizeof (udp_header_t)); + else + vlib_buffer_advance (b0, sizeof (ip4_header_t) + + sizeof (udp_header_t)); + exit: + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + pvti_bypass_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->seq = 0; // clib_net_to_host_u32 (pvti0->seq); + if (is_ip6) + { + } + else + { + t->remote_ip.ip.ip4 = ip40->src_address; + t->remote_ip.version = AF_IP4; + } + // t->local_port = h0->udp.dst_port; + // t->remote_port = h0->udp.src_port; + } + + pkts_processed += 1; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + PVTI_BYPASS_ERROR_PROCESSED, pkts_processed); + return frame->n_vectors; +} + +VLIB_NODE_FN (pvti4_bypass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_bypass_node_common (vm, node, frame, 0); +} + +VLIB_NODE_FN (pvti6_bypass_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_bypass_node_common (vm, node, frame, 1); +} diff --git a/src/plugins/pvti/bypass.h b/src/plugins/pvti/bypass.h new file mode 100644 index 00000000000..611d5770ad3 --- /dev/null +++ b/src/plugins/pvti/bypass.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_pvti_bypass_h__ +#define __included_pvti_bypass_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + u32 seq; +} pvti_bypass_trace_t; + +#define foreach_pvti_bypass_error \ + _ (PROCESSED, "PVTI bypass tunnel packets processed") + +typedef enum +{ +#define _(sym, str) PVTI_BYPASS_ERROR_##sym, + foreach_pvti_bypass_error +#undef _ + PVTI_BYPASS_N_ERROR, +} pvti_bypass_error_t; + +typedef enum +{ + PVTI_BYPASS_NEXT_DROP, + PVTI_BYPASS_NEXT_PVTI_INPUT, + PVTI_BYPASS_N_NEXT, +} pvti_bypass_next_t; + +#endif // pvti_bypass_h diff --git a/src/plugins/pvti/input-main.c b/src/plugins/pvti/input-main.c new file mode 100644 index 00000000000..8ab8b18dd7c --- /dev/null +++ b/src/plugins/pvti/input-main.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <pvti/input.h> + +static char *pvti_input_error_strings[] = { +#define _(sym, string) string, + foreach_pvti_input_error +#undef _ +}; + +#define _(f, s) s, +static char *pvti_input_trace_type_names[] = { foreach_pvti_input_trace_type }; +#undef _ + +static char * +get_pvti_trace_type_name (u8 ptype) +{ + if (ptype < PVTI_INPUT_TRACE_N_TYPES) + { + return pvti_input_trace_type_names[ptype]; + } + else + { + return "unknown"; + } +} + +/* packet trace format function */ +static u8 * +format_pvti_input_trace (u8 *s, va_list *args) +{ + int i; + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + pvti_input_trace_t *t = va_arg (*args, pvti_input_trace_t *); + + u32 indent = format_get_indent (s); + + s = format (s, + "PVTI-IN: sw_if_index %d, next index %d, trace_type: %s(%d), " + "chunkcnt: %d\n", + t->sw_if_index, t->next_index, + get_pvti_trace_type_name (t->trace_type), t->trace_type, + t->chunk_count); + s = format (s, " src %U sport %d dport %d\n", format_ip_address, + &t->remote_ip, t->remote_port, t->local_port); + s = format (s, " seq: %d, chunk_count: %d\n", t->seq, t->chunk_count); + u16 max = t->chunk_count > MAX_CHUNKS ? MAX_CHUNKS : t->chunk_count; + for (i = 0; i < max; i++) + { + s = format (s, " %02d: sz %d\n", i, t->chunks[i].total_chunk_length); + } + s = format (s, "\n%U%U", format_white_space, indent, + format_ip_adjacency_packet_data, t->packet_data, + sizeof (t->packet_data)); + + return s; +} + +vlib_node_registration_t pvti4_input_node; +vlib_node_registration_t pvti6_input_node; + +VLIB_REGISTER_NODE (pvti4_input_node) = +{ + .name = "pvti4-input", + .vector_size = sizeof (u32), + .format_trace = format_pvti_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(pvti_input_error_strings), + .error_strings = pvti_input_error_strings, + + .n_next_nodes = PVTI_INPUT_N_NEXT, + + .next_nodes = { + [PVTI_INPUT_NEXT_DROP] = "error-drop", + [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input", + [PVTI_INPUT_NEXT_PUNT] = "error-punt", + }, + +}; +VLIB_REGISTER_NODE (pvti6_input_node) = +{ + .name = "pvti6-input", + .vector_size = sizeof (u32), + .format_trace = format_pvti_input_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(pvti_input_error_strings), + .error_strings = pvti_input_error_strings, + + .n_next_nodes = PVTI_INPUT_N_NEXT, + + .next_nodes = { + [PVTI_INPUT_NEXT_DROP] = "error-drop", + [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input", + [PVTI_INPUT_NEXT_PUNT] = "error-punt", + }, + +}; diff --git a/src/plugins/pvti/input.c b/src/plugins/pvti/input.c new file mode 100644 index 00000000000..6a8806e2795 --- /dev/null +++ b/src/plugins/pvti/input.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> +#include <pvti/input.h> + +always_inline void +pvti_enqueue_rx_bi_to_next_and_trace (vlib_main_t *vm, + vlib_node_runtime_t *node, + pvti_per_thread_data_t *ptd, u32 bi0, + u16 next0) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + + if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next0, b0, + /* follow_chain */ 0))) + { + pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->trace_type = PVTI_INPUT_TRACE_decap; + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + vec_add1 (ptd->pending_rx_buffers, bi0); + vec_add1 (ptd->pending_rx_nexts, next0); +} + +always_inline pvti_rx_peer_t * +pvti_try_find_or_create_rx_peer (pvti_per_thread_data_t *ptd, + vlib_buffer_t *b0, bool is_ip6) +{ + pvti_rx_peer_t *peer; + + ip_address_t remote_ip = { 0 }; + u16 remote_port; + if (is_ip6) + { + pvti_ip6_encap_header_t *h0 = + ((pvti_ip6_encap_header_t *) vlib_buffer_get_current (b0)) - 1; + ip_address_set (&remote_ip, &h0->ip6.src_address, AF_IP6); + remote_port = clib_net_to_host_u16 (h0->udp.src_port); + } + else + { + pvti_ip4_encap_header_t *h0 = + ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1; + ip_address_set (&remote_ip, &h0->ip4.src_address, AF_IP4); + remote_port = clib_net_to_host_u16 (h0->udp.src_port); + } + + pool_foreach (peer, ptd->rx_peers) + { + if (peer->remote_port == remote_port && + 0 == ip_address_cmp (&remote_ip, &peer->remote_ip)) + { + if (peer->deleted) + { + // The peer has been marked as deleted - wipe it. + clib_memset (peer, 0xca, sizeof (*peer)); + pool_put (ptd->rx_peers, peer); + continue; + } + return peer; + } + } + + index_t pvti_if_index0 = + pvti_if_find_by_remote_ip_and_port (&remote_ip, remote_port); + if (INDEX_INVALID == pvti_if_index0) + { + // no suitable interface found, bail + return 0; + } + pvti_if_t *pvti_if0 = pvti_if_get (pvti_if_index0); + + pvti_rx_peer_t new_peer = { + .local_ip = pvti_if0->local_ip, + .local_port = pvti_if0->local_port, + .remote_ip = remote_ip, + .remote_port = remote_port, + .pvti_if_index = pvti_if_index0, + .rx_streams = { { 0 } }, + }; + pvti_rx_peer_t *rx_new_peer; + pool_get (ptd->rx_peers, rx_new_peer); + *rx_new_peer = new_peer; + + int i; + for (i = 0; i < MAX_RX_STREAMS; i++) + { + rx_new_peer->rx_streams[i].rx_bi0 = INDEX_INVALID; + rx_new_peer->rx_streams[i].rx_bi0_first = INDEX_INVALID; + rx_new_peer->rx_streams[i].rx_next0 = 0; + } + + return rx_new_peer; +} + +always_inline u16 +pvti_input_node_common (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_ip6) +{ + u32 n_left_from, *from; + pvti_chunk_header_t *chunks[MAX_CHUNKS]; + u32 pkts_processed = 0; + u32 pkts_decapsulated = 0; + u32 decap_failed_no_buffers = 0; + + pvti_main_t *pvm = &pvti_main; + + u32 thread_index = vlib_get_thread_index (); + pvti_per_thread_data_t *ptd = + vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = PVTI_INPUT_NEXT_DROP; + u32 sw_if_index0; + u8 true_chunk_count = 0; + u8 max_chunk_count; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + pvti_ip4_encap_header_t *h0 = + ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1; + pvti_rx_peer_t *pvti_rx_peer0 = + pvti_try_find_or_create_rx_peer (ptd, b0, is_ip6); + if (!pvti_rx_peer0) + { + b0->error = node->errors[PVTI_INPUT_ERROR_PEER]; + goto drop_and_maybe_trace; + } + + b0 = vlib_get_buffer (vm, bi0); + pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0); + u8 stream_index = pvti0->stream_index; + max_chunk_count = + pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS; + u16 pvti_packet_header_sz0 = + pvti0->pad_bytes + offsetof (pvti_packet_header_t, pad); + if (b0->current_length < pvti_packet_header_sz0) + { + b0->error = node->errors[PVTI_INPUT_ERROR_PACKET_TOO_SHORT]; + goto drop_and_maybe_trace; + } + vlib_buffer_advance (b0, pvti_packet_header_sz0); + + if (max_chunk_count == 0) + { + b0->error = node->errors[PVTI_INPUT_ERROR_NOCHUNKS]; + goto drop_and_maybe_trace; + } + if (pvti0->reass_chunk_count > max_chunk_count) + { + b0->error = node->errors[PVTI_INPUT_ERROR_TOOMANYREASS]; + goto drop_and_maybe_trace; + } + pvti_per_rx_stream_data_t *rx_stream0 = + &pvti_rx_peer0->rx_streams[stream_index]; + + u32 new_seq0 = clib_net_to_host_u32 (pvti0->seq); + if (new_seq0 == rx_stream0->last_rx_seq + 1) + { + /* Sequence# matches, we can attempt adding the leading chunks to + * reassembly */ + rx_stream0->last_rx_seq = new_seq0; + + while ((b0->current_length > 0) && + true_chunk_count < pvti0->reass_chunk_count) + { + /* attempt to either incorporate the first chunk into + * reassembly or skip it. */ + pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0); + const u16 chunk_payload_length = + clib_net_to_host_u16 (pvc0->total_chunk_length) - + sizeof (*pvc0); + vlib_buffer_advance (b0, sizeof (*pvc0)); + + if (rx_stream0->rx_bi0 == INDEX_INVALID) + { + clib_warning ( + "RX internal error: not-first chunk but no wip block"); + } + else + { + + vlib_buffer_t *rb0 = + vlib_get_buffer (vm, rx_stream0->rx_bi0); + u16 allowed_length = + PVTI_RX_MAX_LENGTH - rb0->current_length; + if (allowed_length > chunk_payload_length) + { + // simple case - there is space in the buffer to fit + // the whole chunk + void *tail = + vlib_buffer_put_uninit (rb0, chunk_payload_length); + clib_memcpy (tail, vlib_buffer_get_current (b0), + chunk_payload_length); + } + else + { + // The current chunk can not fit - need to make two + // copies, one into the current buffer, and one into + // a newly allocated chained buffer. + void *tail = + vlib_buffer_put_uninit (rb0, allowed_length); + clib_memcpy (tail, vlib_buffer_get_current (b0), + allowed_length); + u16 remaining_payload_length = + chunk_payload_length - allowed_length; + u32 nrbi0 = pvti_get_new_buffer (vm); + if (INDEX_INVALID == nrbi0) + { + ASSERT (0); // FIXME what the recovery is + // supposed to look like ? + } + else + { + // link up the new buffer and copy the remainder + // there + vlib_buffer_t *nrb0 = vlib_get_buffer (vm, nrbi0); + rb0->flags |= VLIB_BUFFER_NEXT_PRESENT; + rb0->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; + rb0->next_buffer = nrbi0; + rx_stream0->rx_bi0 = nrbi0; + void *tail = vlib_buffer_put_uninit ( + nrb0, remaining_payload_length); + clib_memcpy (tail, + vlib_buffer_get_current (b0) + + allowed_length, + remaining_payload_length); + } + } + pvti_rx_peer0->rx_streams[stream_index] + .rx_received_inner_length += chunk_payload_length; + if (pvti_rx_peer0->rx_streams[stream_index] + .rx_received_inner_length == + pvti_rx_peer0->rx_streams[stream_index] + .rx_expected_inner_length) + { + next0 = rx_stream0->rx_next0; + pvti_enqueue_rx_bi_to_next_and_trace ( + vm, node, ptd, rx_stream0->rx_bi0_first, next0); + pkts_decapsulated += 1; + + // clean out the current reassemly state + rx_stream0->rx_bi0 = INDEX_INVALID; + rx_stream0->rx_bi0_first = INDEX_INVALID; + pvti_rx_peer0->rx_streams[stream_index] + .rx_received_inner_length = 0; + pvti_rx_peer0->rx_streams[stream_index] + .rx_expected_inner_length = 0; + rx_stream0->rx_next0 = 0; + } + } + chunks[true_chunk_count] = pvc0; + true_chunk_count += 1; + vlib_buffer_advance (b0, chunk_payload_length); + } + } + else + { + /* Sequence does not match, skip the reassembly chunks and reset + * the reassembly state */ + + while ((b0->current_length > 0) && + true_chunk_count < pvti0->reass_chunk_count) + { + /* skip the reassembly chunks */ + pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0); + chunks[true_chunk_count] = pvc0; + true_chunk_count += 1; + vlib_buffer_advance ( + b0, clib_net_to_host_u16 (pvc0->total_chunk_length)); + } + // FIXME: discard the current reassembly state, reset the seq# + if (rx_stream0->rx_bi0_first != INDEX_INVALID) + { + clib_warning ("RX PVTI: discard chunk being reassembled"); + vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first); + rx_stream0->rx_bi0 = INDEX_INVALID; + rx_stream0->rx_bi0_first = INDEX_INVALID; + rx_stream0->rx_received_inner_length = 0; + rx_stream0->rx_expected_inner_length = 0; + rx_stream0->rx_next0 = 0; + } + } + + while ((b0->current_length > 0) && true_chunk_count < max_chunk_count) + { + if (b0->current_length < sizeof (pvti_chunk_header_t)) + { + clib_warning ("RX ERR: length too short for a chunk"); + break; + } + pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0); + chunks[true_chunk_count] = pvc0; + true_chunk_count += 1; + u16 total_chunk_length = + clib_net_to_host_u16 (pvc0->total_chunk_length); + if (b0->current_length < total_chunk_length) + { + clib_warning ("RX ERR: length 0x%x too big for a chunk", + true_chunk_count); + break; + } + u8 *pkt = (u8 *) (pvc0 + 1); + u16 inner_length; + if (rx_stream0->rx_bi0_first != INDEX_INVALID) + { + vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first); + rx_stream0->rx_bi0 = INDEX_INVALID; + rx_stream0->rx_bi0_first = INDEX_INVALID; + rx_stream0->rx_received_inner_length = 0; + rx_stream0->rx_expected_inner_length = 0; + rx_stream0->rx_next0 = 0; + } + + switch (*pkt & 0xf0) + { + case 0x40: + next0 = PVTI_INPUT_NEXT_IP4_INPUT; + inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 2))); + break; + case 0x60: + next0 = PVTI_INPUT_NEXT_IP6_INPUT; + inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 4))) + + sizeof (ip6_header_t); + break; + default: + next0 = PVTI_INPUT_NEXT_DROP; + vlib_buffer_advance (b0, total_chunk_length); + continue; + } + vlib_buffer_advance (b0, sizeof (pvti_chunk_header_t)); + + if (inner_length + sizeof (pvti_chunk_header_t) > total_chunk_length) + { + /* FIXME: the packet size is larger than the chunk -> it's a + * first fragment */ + // enqueue the chunk and finish packet processing. + // There must be no active reassembly. + ASSERT (rx_stream0->rx_bi0_first == INDEX_INVALID); + rx_stream0->rx_next0 = next0; + rx_stream0->rx_bi0 = bi0; + rx_stream0->rx_bi0_first = bi0; + rx_stream0->rx_expected_inner_length = inner_length; + rx_stream0->rx_received_inner_length = + total_chunk_length - sizeof (pvti_chunk_header_t); + rx_stream0->last_rx_seq = new_seq0; + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + pvti_input_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = ~0; + t->trace_type = PVTI_INPUT_TRACE_enqueue; + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + goto continue_outer; + } + + u32 nbi0 = pvti_get_new_buffer (vm); + if (INDEX_INVALID == nbi0) + { + decap_failed_no_buffers += 1; + continue; + }; + vlib_buffer_t *nb0 = vlib_get_buffer (vm, nbi0); + pvti_if_t *pvti_if0 = pvti_if_get (pvti_rx_peer0->pvti_if_index); + vnet_buffer (nb0)->sw_if_index[VLIB_RX] = pvti_if0->sw_if_index; + void *new_packet = vlib_buffer_put_uninit (nb0, inner_length); + clib_memcpy (new_packet, pvc0 + 1, inner_length); + vlib_buffer_advance (b0, inner_length); + + pvti_enqueue_rx_bi_to_next_and_trace (vm, node, ptd, nbi0, next0); + pkts_decapsulated += 1; + } + /* we have processed all the chunks from the buffer, but the buffer + * remains. Free it. */ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = ~0; + t->trace_type = PVTI_INPUT_TRACE_free; + t->seq = clib_net_to_host_u32 (pvti0->seq); + t->chunk_count = pvti0->chunk_count; + u8 chunk_count = + pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS; + for (int i = 0; i < chunk_count; i++) + { + t->chunks[i].total_chunk_length = + clib_net_to_host_u16 (chunks[i]->total_chunk_length); + } + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + vlib_buffer_free_one (vm, bi0); + + continue_outer: + pkts_processed += 1; + continue; + + drop_and_maybe_trace: + next0 = PVTI_INPUT_NEXT_DROP; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + int i; + pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->trace_type = PVTI_INPUT_TRACE_drop; + t->next_index = next0; + t->remote_ip.ip.ip4 = h0->ip4.src_address; + t->remote_ip.version = AF_IP4; + t->local_port = h0->udp.dst_port; + t->remote_port = h0->udp.src_port; + if (!pvti_rx_peer0) + { + t->seq = 0xdeaddead; + } + else + { + t->seq = clib_net_to_host_u32 (pvti0->seq); + t->chunk_count = pvti0->chunk_count; + u8 chunk_count = pvti0->chunk_count < MAX_CHUNKS ? + pvti0->chunk_count : + MAX_CHUNKS; + for (i = 0; i < chunk_count; i++) + { + t->chunks[i].total_chunk_length = + clib_net_to_host_u16 (chunks[i]->total_chunk_length); + } + } + } + + pkts_processed += 1; + vec_add1 (ptd->pending_rx_buffers, bi0); + vec_add1 (ptd->pending_rx_nexts, next0); + } + + vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_rx_buffers, + &ptd->pending_rx_nexts, + vec_len (ptd->pending_rx_nexts)); + vec_reset_length (ptd->pending_rx_buffers); + vec_reset_length (ptd->pending_rx_nexts); + + vlib_node_increment_counter (vm, node->node_index, + PVTI_INPUT_ERROR_PROCESSED, pkts_processed); + vlib_node_increment_counter ( + vm, node->node_index, PVTI_INPUT_ERROR_DECAPSULATED, pkts_decapsulated); + vlib_node_increment_counter (vm, node->node_index, + PVTI_INPUT_ERROR_NO_BUFFERS, + decap_failed_no_buffers); + return frame->n_vectors; +} + +VLIB_NODE_FN (pvti4_input_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_input_node_common (vm, node, frame, 0); +} + +VLIB_NODE_FN (pvti6_input_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_input_node_common (vm, node, frame, 1); +} diff --git a/src/plugins/pvti/input.h b/src/plugins/pvti/input.h new file mode 100644 index 00000000000..02a186cde05 --- /dev/null +++ b/src/plugins/pvti/input.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_pvti_input_h__ +#define __included_pvti_input_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +typedef struct +{ + u16 total_chunk_length; +} pvti_input_chunk_t; + +#define MAX_CHUNKS 32 +#define PVTI_RX_MAX_LENGTH 2048 + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + u32 seq; + pvti_input_chunk_t chunks[MAX_CHUNKS]; + u8 chunk_count; + u8 trace_type; + u8 packet_data[64]; +} pvti_input_trace_t; + +#define foreach_pvti_input_trace_type \ + _ (drop, "drop") \ + _ (decap, "decapsulate") \ + _ (free, "free") \ + _ (enqueue, "enqueue") + +typedef enum +{ +#define _(f, s) PVTI_INPUT_TRACE_##f, + foreach_pvti_input_trace_type +#undef _ + PVTI_INPUT_TRACE_N_TYPES, +} pvti_input_trace_type_t; + +#define foreach_pvti_input_error \ + _ (PROCESSED, "PVTI tunneled packets processed") \ + _ (DECAPSULATED, "PVTI inner packets decapsulated") \ + _ (PEER, "Could not find a peer") \ + _ (NOCHUNKS, "Packet has no chunks") \ + _ (NO_BUFFERS, "No buffers available to decapsulate") \ + _ (TOOMANYREASS, "Packet has more reassembly chunks than total") \ + _ (PACKET_TOO_SHORT, "Packet too short") + +typedef enum +{ +#define _(sym, str) PVTI_INPUT_ERROR_##sym, + foreach_pvti_input_error +#undef _ + PVTI_INPUT_N_ERROR, +} pvti_input_error_t; + +typedef enum +{ + PVTI_INPUT_NEXT_DROP, + PVTI_INPUT_NEXT_IP4_INPUT, + PVTI_INPUT_NEXT_IP6_INPUT, + PVTI_INPUT_NEXT_PUNT, + PVTI_INPUT_N_NEXT, +} pvti_input_next_t; + +#endif // pvti_input_h diff --git a/src/plugins/pvti/output-main.c b/src/plugins/pvti/output-main.c new file mode 100644 index 00000000000..ae4ae5f8e98 --- /dev/null +++ b/src/plugins/pvti/output-main.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <pvti/output.h> + +/* packet trace format function */ +static u8 * +format_pvti_output_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + pvti_output_trace_t *t = va_arg (*args, pvti_output_trace_t *); + + u32 indent = format_get_indent (s); + s = + format (s, "PVTI-OUT(%d): sw_if_index %d, next index %d, underlay_mtu %d,", + t->trace_type, t->sw_if_index, t->next_index, t->underlay_mtu); + s = format (s, "\n%U stream_index %d, bi0_max_current_length %d, tx_seq %d", + format_white_space, indent, t->stream_index, + t->bi0_max_current_length, t->tx_seq); + s = format (s, "\n%U%U", format_white_space, indent, + format_ip_adjacency_packet_data, t->packet_data, + sizeof (t->packet_data)); + + return s; +} + +vlib_node_registration_t pvti_output_node; + +static char *pvti_output_error_strings[] = { +#define _(sym, string) string, + foreach_pvti_output_error +#undef _ +}; + +VLIB_REGISTER_NODE (pvti4_output_node) = +{ + .name = "pvti4-output", + .vector_size = sizeof (u32), + .format_trace = format_pvti_output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(pvti_output_error_strings), + .error_strings = pvti_output_error_strings, + + .n_next_nodes = PVTI_OUTPUT_N_NEXT, + + .next_nodes = { + [PVTI_OUTPUT_NEXT_DROP] = "error-drop", + [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx", + [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + }, + +}; +VLIB_REGISTER_NODE (pvti6_output_node) = +{ + .name = "pvti6-output", + .vector_size = sizeof (u32), + .format_trace = format_pvti_output_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(pvti_output_error_strings), + .error_strings = pvti_output_error_strings, + + .n_next_nodes = PVTI_OUTPUT_N_NEXT, + + .next_nodes = { + [PVTI_OUTPUT_NEXT_DROP] = "error-drop", + [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx", + [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup", + [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup", + }, + +}; diff --git a/src/plugins/pvti/output.c b/src/plugins/pvti/output.c new file mode 100644 index 00000000000..1939c6f585a --- /dev/null +++ b/src/plugins/pvti/output.c @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> +#include <pvti/output.h> + +static_always_inline u32 +ip6_vtcfl (u8 stream_index) +{ + u32 vtcfl = 0x6 << 28; + vtcfl |= stream_index; + + return (clib_host_to_net_u32 (vtcfl)); +} + +always_inline vlib_buffer_t * +pvti_alloc_new_tx_buffer (vlib_main_t *vm) +{ + u32 bi0 = INDEX_INVALID; + if (vlib_buffer_alloc (vm, &bi0, 1) != 1) + { + return 0; + } + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + b0->current_data = 0; + b0->current_length = 0; + return b0; +} + +always_inline bool +pvti_find_or_try_create_tx_peer (vlib_main_t *vm, pvti_per_thread_data_t *ptd, + pvti_if_t *pvti_if0, ip_address_t *remote_ip, + u16 remote_port, u32 *out_index) +{ + + pvti_tx_peer_t *peer; + pool_foreach (peer, ptd->tx_peers) + { + if (peer->remote_port == remote_port && + 0 == ip_address_cmp (remote_ip, &peer->remote_ip)) + { + if (peer->deleted) + { + // Bad luck, the peer has been deleted. + u32 boi0 = vlib_get_buffer_index (vm, peer->bo0); + if (peer->bo0) + { + vlib_buffer_free (vm, &boi0, 1); + } + clib_memset (peer, 0xca, sizeof (*peer)); + pool_put (ptd->tx_peers, peer); + continue; + } + *out_index = peer - ptd->tx_peers; + return 1; + } + } + + ip_address_family_t dst_ver = ip_addr_version (&pvti_if0->remote_ip); + + u16 pvti_encap_overhead = (dst_ver == AF_IP6) ? + sizeof (pvti_ip6_encap_header_t) : + sizeof (pvti_ip4_encap_header_t); + + u16 pvti_packet_overhead = + pvti_encap_overhead + sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES; + + ASSERT (pvti_if0->underlay_mtu > pvti_packet_overhead); + + u32 bo0_max_current_length = pvti_if0->underlay_mtu - pvti_packet_overhead; + + vlib_buffer_t *bo0 = pvti_alloc_new_tx_buffer (vm); + + if (!bo0) + { + return 0; + } + + pvti_tx_peer_t new_peer = { + .local_ip = pvti_if0->local_ip, + .remote_ip = *remote_ip, + .local_port = pvti_if0->local_port, + .remote_port = remote_port, + .underlay_mtu = pvti_if0->underlay_mtu, + .underlay_fib_index = pvti_if0->underlay_fib_index, + .bo0_max_current_length = bo0_max_current_length, + .pvti_if_index = pvti_if_get_index (pvti_if0), + .deleted = 0, + .bo0 = bo0, + .chunk_count = 0, + .reass_chunk_count = 0, + .current_tx_seq = 42, + }; + + pvti_tx_peer_t *tx_new_peer; + pool_get (ptd->tx_peers, tx_new_peer); + + *tx_new_peer = new_peer; + *out_index = tx_new_peer - ptd->tx_peers; + return 1; +} + +always_inline bool +pvti_try_get_tx_peer_index (vlib_main_t *vm, pvti_per_thread_data_t *ptd, + pvti_if_t *pvti_if0, vlib_buffer_t *b0, + bool is_ip6, u32 *out_index) +{ + if (pvti_if0->peer_address_from_payload) + { + ip_address_t remote_ip = { 0 }; + if (is_ip6) + { + ip6_header_t *ip6 = vlib_buffer_get_current (b0); + ip_address_set (&remote_ip, &ip6->dst_address, AF_IP6); + } + else + { + ip4_header_t *ip4 = vlib_buffer_get_current (b0); + ip_address_set (&remote_ip, &ip4->dst_address, AF_IP4); + } + return pvti_find_or_try_create_tx_peer ( + vm, ptd, pvti_if0, &remote_ip, pvti_if0->remote_port, out_index); + } + else + { + return pvti_find_or_try_create_tx_peer ( + vm, ptd, pvti_if0, &pvti_if0->remote_ip, pvti_if0->remote_port, + out_index); + } + /* not reached */ +} + +always_inline void +pvti_finalize_chunk (pvti_tx_peer_t *tx_peer, + pvti_chunk_header_t *chunk_header, u8 *tail, + bool is_reassembly_chunk) +{ + clib_memset (chunk_header, 0xab, sizeof (pvti_chunk_header_t)); + chunk_header->total_chunk_length = + clib_host_to_net_u16 (tail - (u8 *) chunk_header); + tx_peer->chunk_count++; + if (is_reassembly_chunk) + { + tx_peer->reass_chunk_count++; + } +} + +always_inline pvti_output_next_t +encap_pvti_buffer_ip46 (vlib_main_t *vm, vlib_node_runtime_t *node, + pvti_tx_peer_t *tx_peer, int is_ip6) +{ + ip_address_family_t src_ver = ip_addr_version (&tx_peer->local_ip); + ip_address_family_t dst_ver = ip_addr_version (&tx_peer->remote_ip); + u8 stream_index = 0; + + ASSERT (src_ver == dst_ver); + bool is_ip6_encap = (AF_IP6 == src_ver); + + vlib_buffer_t *b0 = tx_peer->bo0; + vlib_buffer_advance (b0, + -(sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES)); + + pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0); + clib_memset (pvti0, 0xca, sizeof (*pvti0) + PVTI_ALIGN_BYTES); + pvti0->pad_bytes = PVTI_ALIGN_BYTES; + + pvti0->seq = clib_host_to_net_u32 (tx_peer->current_tx_seq); + pvti0->stream_index = stream_index; + pvti0->reass_chunk_count = tx_peer->reass_chunk_count; + pvti0->chunk_count = tx_peer->chunk_count; + pvti0->mandatory_flags_mask = 0; + pvti0->flags_value = 0; + + if (is_ip6_encap) + { + vlib_buffer_advance (b0, -(sizeof (pvti_ip6_encap_header_t))); + if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE) + { + // undo the change + vlib_buffer_advance (b0, (sizeof (pvti_ip6_encap_header_t))); + b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE]; + return PVTI_OUTPUT_NEXT_DROP; + } + pvti_ip6_encap_header_t *ve = vlib_buffer_get_current (b0); + + ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port); + ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port); + ve->udp.length = clib_host_to_net_u16 ( + b0->current_length - offsetof (pvti_ip6_encap_header_t, udp)); + ve->udp.checksum = 0; + + ve->ip6.ip_version_traffic_class_and_flow_label = + ip6_vtcfl (stream_index); + ve->ip6.payload_length = ve->udp.length; + ve->ip6.protocol = 17; + ve->ip6.hop_limit = 128; + ip_address_copy_addr (&ve->ip6.src_address, &tx_peer->local_ip); + ip_address_copy_addr (&ve->ip6.dst_address, &tx_peer->remote_ip); + } + else + { + vlib_buffer_advance (b0, -(sizeof (pvti_ip4_encap_header_t))); + if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE) + { + // undo the change + vlib_buffer_advance (b0, (sizeof (pvti_ip4_encap_header_t))); + b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE]; + return PVTI_OUTPUT_NEXT_DROP; + } + pvti_ip4_encap_header_t *ve = vlib_buffer_get_current (b0); + + ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port); + ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port); + ve->udp.length = clib_host_to_net_u16 ( + b0->current_length - offsetof (pvti_ip4_encap_header_t, udp)); + ve->udp.checksum = 0; + + ve->ip4.ip_version_and_header_length = 0x45; + ve->ip4.tos = 0; + ve->ip4.length = clib_host_to_net_u16 (b0->current_length); + ve->ip4.fragment_id = + clib_host_to_net_u16 (tx_peer->current_tx_seq & 0xffff); + ve->ip4.flags_and_fragment_offset = 0; + ve->ip4.ttl = 128; + ve->ip4.protocol = 17; + + ve->ip4.dst_address.as_u32 = ip_addr_v4 (&tx_peer->remote_ip).data_u32; + ve->ip4.src_address.as_u32 = ip_addr_v4 (&tx_peer->local_ip).data_u32; + ve->ip4.checksum = ip4_header_checksum (&ve->ip4); + } + + // This is important, if not reset, causes a crash + vnet_buffer (b0)->sw_if_index[VLIB_TX] = tx_peer->underlay_fib_index; + + // vnet_buffer (b0)->oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM; + return is_ip6_encap ? PVTI_OUTPUT_NEXT_IP6_LOOKUP : + PVTI_OUTPUT_NEXT_IP4_LOOKUP; +} + +always_inline void +pvti_enqueue_tx_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + pvti_per_thread_data_t *ptd, vlib_buffer_t *b0, + u16 next0, u8 stream_index, pvti_tx_peer_t *tx_peer) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + tx_peer->is_bo0_traced)) + { + if (PREDICT_TRUE ( + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0))) + { + + pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->underlay_mtu = tx_peer->underlay_mtu; + t->stream_index = stream_index; + t->trace_type = 1; + t->bi0_max_current_length = tx_peer->bo0_max_current_length; + t->tx_seq = tx_peer->current_tx_seq; + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + } + u32 bi0 = vlib_get_buffer_index (vm, b0); + vec_add1 (ptd->pending_tx_buffers, bi0); + vec_add1 (ptd->pending_tx_nexts, next0); +} + +always_inline void +pvti_enqueue_tx_drop_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + pvti_per_thread_data_t *ptd, vlib_buffer_t *b0, + u8 stream_index) +{ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = PVTI_OUTPUT_NEXT_DROP; + t->stream_index = stream_index; + t->trace_type = 0; + clib_memcpy (t->packet_data, vlib_buffer_get_current (b0), + sizeof (t->packet_data)); + } + u32 bi0 = vlib_get_buffer_index (vm, b0); + vec_add1 (ptd->pending_tx_buffers, bi0); + vec_add1 (ptd->pending_tx_nexts, PVTI_OUTPUT_NEXT_DROP); +} + +always_inline bool +pvti_flush_peer_and_recharge (vlib_main_t *vm, vlib_node_runtime_t *node, + pvti_per_thread_data_t *ptd, u32 tx_peer_index, + u8 stream_index, const bool is_ip6) +{ + pvti_tx_peer_t *tx_peer = pool_elt_at_index (ptd->tx_peers, tx_peer_index); + u16 next0 = encap_pvti_buffer_ip46 (vm, node, tx_peer, is_ip6); + + pvti_enqueue_tx_and_trace (vm, node, ptd, tx_peer->bo0, next0, stream_index, + tx_peer); + + tx_peer->bo0 = pvti_alloc_new_tx_buffer (vm); + tx_peer->reass_chunk_count = 0; + tx_peer->chunk_count = 0; + tx_peer->current_tx_seq++; + + return 1; +} + +always_inline u16 +pvti_output_node_common (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, const bool is_ip6) +{ + pvti_main_t *pvm = &pvti_main; + + u32 n_left_from, *from; + u32 pkts_encapsulated = 0; + u32 pkts_processed = 0; + u32 pkts_chopped = 0; + u32 pkts_overflow = 0; + u32 pkts_overflow_cantfit = 0; + + bool is_node_traced = (node->flags & VLIB_NODE_FLAG_TRACE) ? 1 : 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + u8 stream_index = pvti_get_stream_index (is_ip6); + + u32 thread_index = vlib_get_thread_index (); + pvti_per_thread_data_t *ptd = + vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index); + + vlib_buffer_t *ibufs[VLIB_FRAME_SIZE], **ib = ibufs; + + vlib_get_buffers (vm, from, ibufs, n_left_from); + + n_left_from = frame->n_vectors; + while (1 && n_left_from > 0) + { + n_left_from -= 1; + vlib_buffer_t *b0 = ib[0]; + ib++; + u32 bi0 = vlib_get_buffer_index (vm, b0); + bool is_b0_traced = + is_node_traced && ((b0->flags & VLIB_BUFFER_IS_TRACED) ? 1 : 0); + pkts_processed += 1; + + u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + u32 pvti_index0 = pvti_if_find_by_sw_if_index (sw_if_index0); + if (pvti_index0 == INDEX_INVALID) + { + b0->error = node->errors[PVTI_OUTPUT_ERROR_PEER]; + pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index); + continue; + } + pvti_if_t *pvti_if0 = pvti_if_get (pvti_index0); + u32 tx_peer_index; + if (!pvti_try_get_tx_peer_index (vm, ptd, pvti_if0, b0, is_ip6, + &tx_peer_index)) + { + b0->error = node->errors[PVTI_OUTPUT_ERROR_MAKE_PEER]; + pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index); + continue; + } + pvti_tx_peer_t *tx_peer = &ptd->tx_peers[tx_peer_index]; + + u32 b0_len = vlib_buffer_length_in_chain (vm, b0); + u32 total_chunk_len = sizeof (pvti_chunk_header_t) + b0_len; + + if (tx_peer->bo0_max_current_length >= + tx_peer->bo0->current_length + total_chunk_len) + { + /* Happy case, we can fit the entire new chunk */ + pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit ( + tx_peer->bo0, sizeof (pvti_chunk_header_t)); + u8 *tail = vlib_buffer_put_uninit (tx_peer->bo0, b0_len); + vlib_buffer_t *b0_curr; + b0_curr = b0; + while (b0_len > 0) + { + clib_memcpy (tail, vlib_buffer_get_current (b0_curr), + b0_curr->current_length); + tail += b0_curr->current_length; + b0_len -= b0_curr->current_length; + ASSERT ((b0_len == 0) || + (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT)); + if (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer); + } + } + tx_peer->is_bo0_traced |= is_b0_traced; + pvti_finalize_chunk (tx_peer, chunk_header, tail, false); + } + else + { + bool is_reassembly = false; + /* FIXME: here, flush a packet if we want to avoid fragmenting it */ +#define PVTI_TINY_PACKET_SZ 20 + int threshold_len = + sizeof (pvti_chunk_header_t) + PVTI_TINY_PACKET_SZ; + + /* Can we fit anything meaningful into bo0 ? if not - flush */ + if (tx_peer->bo0_max_current_length <= + tx_peer->bo0->current_length + threshold_len) + { + if (!pvti_flush_peer_and_recharge (vm, node, ptd, tx_peer_index, + stream_index, is_ip6)) + { + b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE0]; + pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, + stream_index); + continue; + } + pkts_encapsulated += 1; + } + + pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit ( + tx_peer->bo0, sizeof (pvti_chunk_header_t)); + + u8 *tail; + vlib_buffer_t *b0_curr; + /* append the chained buffers and flush as necessary */ + b0_curr = b0; + + int curr_b0_start_offset = 0; + + while (b0_len > 0) + { + ASSERT (tx_peer->bo0_max_current_length > + tx_peer->bo0->current_length); + int copy_len = + clib_min (b0_curr->current_length - curr_b0_start_offset, + tx_peer->bo0_max_current_length - + tx_peer->bo0->current_length); + tail = vlib_buffer_put_uninit (tx_peer->bo0, copy_len); + clib_memcpy (tail, + (u8 *) vlib_buffer_get_current (b0_curr) + + curr_b0_start_offset, + copy_len); + tail += copy_len; + b0_len -= copy_len; + // Advance the start offset or reset it if we copied the entire + // block + curr_b0_start_offset = + curr_b0_start_offset + copy_len == b0_curr->current_length ? + 0 : + curr_b0_start_offset + copy_len; + ASSERT ((b0_len == 0) || (curr_b0_start_offset > 0) || + (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT)); + if (curr_b0_start_offset > 0) + { + pvti_finalize_chunk (tx_peer, chunk_header, tail, + is_reassembly); + tx_peer->is_bo0_traced |= is_b0_traced; + if (!pvti_flush_peer_and_recharge ( + vm, node, ptd, tx_peer_index, stream_index, is_ip6)) + { + b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE1]; + pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, + stream_index); + continue; + } + pkts_encapsulated += 1; + /* next chunk(s) will be reassembly until the next block */ + is_reassembly = true; + chunk_header = vlib_buffer_put_uninit ( + tx_peer->bo0, sizeof (pvti_chunk_header_t)); + } + else + { + if ((b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer); + } + else + { + pvti_finalize_chunk (tx_peer, chunk_header, tail, + is_reassembly); + tx_peer->is_bo0_traced |= is_b0_traced; + } + } + } + } + vlib_buffer_free_one (vm, bi0); + } + + int i; + for (i = 0; i < vec_len (ptd->tx_peers); i++) + { + if (ptd->tx_peers[i].chunk_count) + { + pvti_flush_peer_and_recharge (vm, node, ptd, i, stream_index, + is_ip6); + pkts_encapsulated += 1; + } + } + + vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_tx_buffers, + &ptd->pending_tx_nexts, + vec_len (ptd->pending_tx_nexts)); + vec_reset_length (ptd->pending_tx_buffers); + vec_reset_length (ptd->pending_tx_nexts); + + vlib_node_increment_counter ( + vm, node->node_index, PVTI_OUTPUT_ERROR_ENCAPSULATED, pkts_encapsulated); + vlib_node_increment_counter (vm, node->node_index, + PVTI_OUTPUT_ERROR_PROCESSED, pkts_processed); + vlib_node_increment_counter (vm, node->node_index, PVTI_OUTPUT_ERROR_CHOPPED, + pkts_chopped); + vlib_node_increment_counter (vm, node->node_index, + PVTI_OUTPUT_ERROR_OVERFLOW, pkts_overflow); + vlib_node_increment_counter (vm, node->node_index, + PVTI_OUTPUT_ERROR_OVERFLOW_CANTFIT, + pkts_overflow_cantfit); + return frame->n_vectors; +} + +VLIB_NODE_FN (pvti4_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_output_node_common (vm, node, frame, 0); +} + +VLIB_NODE_FN (pvti6_output_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return pvti_output_node_common (vm, node, frame, 1); +} diff --git a/src/plugins/pvti/output.h b/src/plugins/pvti/output.h new file mode 100644 index 00000000000..95e78ba9720 --- /dev/null +++ b/src/plugins/pvti/output.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_pvti_output_h__ +#define __included_pvti_output_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u32 tx_seq; + u16 underlay_mtu; + u16 bi0_max_current_length; + u8 stream_index; + u8 trace_type; + u8 packet_data[96]; +} pvti_output_trace_t; + +#define foreach_pvti_output_error \ + _ (NONE, "No error") \ + _ (PROCESSED, "Packets processed") \ + _ (ENCAPSULATED, "Packets encapsulated") \ + _ (PEER, "No peer found") \ + _ (MAKE_PEER, "Could not make peer") \ + _ (RECHARGE0, "Could not recharge 0") \ + _ (RECHARGE1, "Could not recharge 1") \ + _ (NO_PRE_SPACE, "Not enought pre-data space") \ + _ (CHOPPED, "Packets chopped") \ + _ (OVERFLOW, "Packets overflowed") \ + _ (OVERFLOW_CANTFIT, "Packets overflowed and cant fit excess") + +typedef enum +{ +#define _(sym, str) PVTI_OUTPUT_ERROR_##sym, + foreach_pvti_output_error +#undef _ + PVTI_OUTPUT_N_ERROR, +} pvti_output_error_t; + +typedef enum +{ + PVTI_INDEPENDENT_CHUNK = 0, + PVTI_REASS_CHUNK, +} pvti_chunk_type_t; + +#define MAX_CURR_LEN_UNKNOWN 0xffff + +typedef enum +{ + PVTI_OUTPUT_NEXT_DROP, + PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT, + PVTI_OUTPUT_NEXT_IP4_LOOKUP, + PVTI_OUTPUT_NEXT_IP6_LOOKUP, + PVTI_OUTPUT_N_NEXT, +} pvti_output_next_t; + +#endif // pvti_output_h diff --git a/src/plugins/pvti/pvti.api b/src/plugins/pvti/pvti.api new file mode 100644 index 00000000000..859ed1ab6b0 --- /dev/null +++ b/src/plugins/pvti/pvti.api @@ -0,0 +1,111 @@ +/* + * pvti.api - binary API skeleton + * + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file pvti.api + * @brief VPP control-plane API messages. + * + * This file defines VPP control-plane binary API messages which are generally + * called through a shared memory interface. + */ + +/* Version and type recitations */ + +option version = "0.0.1"; +import "vnet/interface_types.api"; +import "vnet/ip/ip_types.api"; + +/** \brief A composite type uniquely defining a PVTI tunnel. + @param sw_if_index - ignored on create/delete, present in details. + @param src_ip - Source IP address + @param src_port - Source UDP port + @param dst_ip - Destination IP address + @param dst_port - Destination UDP port + @param underlay_mtu - Underlay MTU for packet splitting/coalescing + @param underlay_fib_index - Underlay FIB index to be used after encap +*/ +typedef pvti_tunnel +{ + vl_api_interface_index_t sw_if_index; + vl_api_address_t local_ip; + u16 local_port; + vl_api_address_t remote_ip; + bool peer_address_from_payload; + u16 remote_port; + u16 underlay_mtu; + u32 underlay_fib_index; +}; + + +/** @brief API to enable / disable pvti on an interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 to enable, 0 to disable the feature + @param sw_if_index - interface handle +*/ + +define pvti_interface_create +{ + option status="in_progress"; + + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + vl_api_pvti_tunnel_t interface; +}; + +define pvti_interface_create_reply +{ + option status="in_progress"; + u32 context; + i32 retval; + + /* Index for the newly created interface */ + vl_api_interface_index_t sw_if_index; +}; + +autoreply define pvti_interface_delete { + option status="in_progress"; + + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + + vl_api_interface_index_t sw_if_index; +}; + + +define pvti_interface_dump +{ + option status="in_progress"; + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; +}; + +define pvti_interface_details +{ + option status="in_progress"; + u32 context; + vl_api_pvti_tunnel_t interface; +}; + + diff --git a/src/plugins/pvti/pvti.c b/src/plugins/pvti/pvti.c new file mode 100644 index 00000000000..524eabc6f3f --- /dev/null +++ b/src/plugins/pvti/pvti.c @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <vnet/fib/fib_table.h> +#include <pvti/pvti.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vpp/app/version.h> +#include <stdbool.h> + +#include <pvti/pvti.api_enum.h> +#include <pvti/pvti.api_types.h> + +#include <pvti/pvti_if.h> + +#define REPLY_MSG_ID_BASE pmp->msg_id_base +#include <vlibapi/api_helper_macros.h> +#include <vnet/ip/ip_format_fns.h> + +pvti_main_t pvti_main; + +u8 * +format_pvti_tx_peer_ptr (u8 *s, va_list *args) +{ + pvti_tx_peer_t *peer = va_arg (*args, pvti_tx_peer_t *); + + s = format ( + s, + "[%p]%s local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d " + "pvti_idx:%d b0_max_clen:%d cseq:%d chunk_count:%d reass_chunk_count:%d", + peer, peer->deleted ? " DELETED" : "", format_ip46_address, + &peer->local_ip, IP46_TYPE_ANY, peer->local_port, format_ip46_address, + &peer->remote_ip, IP46_TYPE_ANY, peer->remote_port, peer->underlay_mtu, + peer->underlay_fib_index, peer->pvti_if_index, + peer->bo0_max_current_length, peer->current_tx_seq, peer->chunk_count, + peer->reass_chunk_count); + + return (s); +} + +u8 * +format_pvti_rx_peer_ptr (u8 *s, va_list *args) +{ + pvti_rx_peer_t *peer = va_arg (*args, pvti_rx_peer_t *); + + s = format (s, "[%p]%s local:%U:%d remote:%U:%d pvti_idx:%d", peer, + peer->deleted ? " DELETED" : "", format_ip46_address, + &peer->local_ip, IP46_TYPE_ANY, peer->local_port, + format_ip46_address, &peer->remote_ip, IP46_TYPE_ANY, + peer->remote_port, peer->pvti_if_index); + + return (s); +} + +void +pvti_verify_initialized (pvti_main_t *pvm) +{ + if (!pvm->is_initialized) + { + const int n_threads = vlib_get_n_threads (); + vec_validate (pvm->per_thread_data[0], n_threads - 1); + vec_validate (pvm->per_thread_data[1], n_threads - 1); + pvm->is_initialized = 1; + } +} + +void +vnet_int_pvti_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable) +{ + pvti_main_t *pvm = &pvti_main; + + if (pool_is_free_index (pvm->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return; + + pvti_verify_initialized (pvm); + + is_enable = !!is_enable; + + if (is_ip6) + { + if (clib_bitmap_get (pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index) != + is_enable) + { + vnet_feature_enable_disable ("ip6-unicast", "ip6-pvti-bypass", + sw_if_index, is_enable, 0, 0); + pvm->bm_ip6_bypass_enabled_by_sw_if = clib_bitmap_set ( + pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index, is_enable); + } + } + else + { + if (clib_bitmap_get (pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index) != + is_enable) + { + vnet_feature_enable_disable ("ip4-unicast", "ip4-pvti-bypass", + sw_if_index, is_enable, 0, 0); + pvm->bm_ip4_bypass_enabled_by_sw_if = clib_bitmap_set ( + pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index, is_enable); + } + } +} + +static clib_error_t * +set_ip_pvti_bypass (u32 is_ip6, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index, is_enable; + + sw_if_index = ~0; + is_enable = 1; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat_user (line_input, unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (line_input, "del")) + is_enable = 0; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == sw_if_index) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, line_input); + goto done; + } + + vnet_int_pvti_bypass_mode (sw_if_index, is_ip6, is_enable); + +done: + unformat_free (line_input); + + return error; +} + +static clib_error_t * +set_ip4_pvti_bypass (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + return set_ip_pvti_bypass (0, input, cmd); +} + +VLIB_CLI_COMMAND (set_interface_ip_pvti_bypass_command, static) = { + .path = "set interface ip pvti-bypass", + .function = set_ip4_pvti_bypass, + .short_help = "set interface ip pvti-bypass <interface> [del]", +}; + +static clib_error_t * +set_ip6_pvti_bypass (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + return set_ip_pvti_bypass (1, input, cmd); +} + +VLIB_CLI_COMMAND (set_interface_ip6_pvti_bypass_command, static) = { + .path = "set interface ip6 pvti-bypass", + .function = set_ip6_pvti_bypass, + .short_help = "set interface ip6 pvti-bypass <interface> [del]", +}; + +static clib_error_t * +pvti_interface_create_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + + // pvti_main_t * pmp = &pvti_main; + u32 sw_if_index = ~0; + int rv = 0; + ip_address_t peer_ip = { 0 }; + ip_address_t local_ip = { 0 }; + u32 peer_port = 0; + u32 local_port = 12345; + u32 underlay_mtu = 1500; + u32 underlay_fib_index = ~0; + u32 underlay_table_id = ~0; + pvti_peer_address_method_t peer_address_method = PVTI_PEER_ADDRESS_FIXED; + bool peer_set = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "peer %U %d %d", unformat_ip_address, &peer_ip, + &peer_port, &local_port)) + { + peer_set = 1; + } + else if (unformat (line_input, "underlay-mtu %d", &underlay_mtu)) + { + // MTU set + } + else if (unformat (line_input, "local-ip %U", unformat_ip_address, + &local_ip)) + { + // local IP set + } + else if (unformat (line_input, "underlay-fib %d", &underlay_fib_index)) + { + // underlay fib set + } + else if (unformat (line_input, "peer-address-from-payload")) + { + peer_address_method = PVTI_PEER_ADDRESS_FROM_PAYLOAD; + } + else if (unformat (line_input, "underlay-table %d", &underlay_table_id)) + { + fib_protocol_t fib_proto = FIB_PROTOCOL_IP4; + if (peer_ip.version == AF_IP6) + { + fib_proto = FIB_PROTOCOL_IP6; + } + u32 fib_index = fib_table_find (fib_proto, underlay_table_id); + + if (~0 == fib_index) + { + error = clib_error_return (0, "Nonexistent table id %d", + underlay_table_id); + goto done; + } + underlay_fib_index = fib_index; + } + else + break; + } + if (!peer_set) + { + error = clib_error_return (0, "Please specify a peer..."); + goto done; + } + + rv = pvti_if_create (&local_ip, local_port, &peer_ip, peer_port, + peer_address_method, underlay_mtu, underlay_fib_index, + &sw_if_index); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + error = clib_error_return (0, "Invalid interface"); + break; + + default: + error = clib_error_return (0, "pvti_if_create returned %d", rv); + } +done: + unformat_free (line_input); + return error; +} + +static clib_error_t * +pvti_interface_delete_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + // pvti_main_t * pmp = &pvti_main; + u32 sw_if_index = ~0; + int rv = 0; + bool if_index_set = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "if-index %d", &sw_if_index)) + { + if_index_set = 1; + } + else + break; + } + if (!if_index_set) + return clib_error_return (0, "Please specify a sw_if_index..."); + + rv = pvti_if_delete (sw_if_index); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "Invalid interface"); + break; + + default: + return clib_error_return (0, "pvti_if_delete returned %d", rv); + } + return 0; +} + +VLIB_CLI_COMMAND (pvti_interface_create_command, static) = { + .path = "pvti interface create", + .short_help = + "pvti interface create peer <remote-ip> <remote-port> <local-port> [ " + "local-ip <ip-addr> ][ underlay-mtu <MTU>][underlay-table " + "<table-index>][inderlay-fib <fib-index>]", + .function = pvti_interface_create_command_fn, +}; + +VLIB_CLI_COMMAND (pvti_interface_delete_command, static) = { + .path = "pvti interface delete", + .short_help = "pvti interface delete if-index <sw-ifindex>", + .function = pvti_interface_delete_command_fn, +}; + +static clib_error_t * +pvti_show_interface_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + pvti_if_t *pvti_if; + vec_foreach (pvti_if, pvti_main.if_pool) + { + int index = pvti_if - pvti_main.if_pool; + vlib_cli_output (vm, "%U", format_pvti_if, index); + }; + return 0; +} + +static clib_error_t * +pvti_show_tx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + pvti_per_thread_data_t *ptd; + int is_ip6; + for (is_ip6 = 0; is_ip6 <= 1; is_ip6++) + { + vec_foreach (ptd, pvti_main.per_thread_data[is_ip6]) + { + vlib_cli_output (vm, "thread %d (%s)", + ptd - pvti_main.per_thread_data[is_ip6], + is_ip6 ? "IPv6" : "IPv4"); + pvti_tx_peer_t *peer; + vec_foreach (peer, ptd->tx_peers) + { + vlib_cli_output (vm, " %U", format_pvti_tx_peer_ptr, peer); + } + } + } + return 0; +} + +static clib_error_t * +pvti_show_rx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + pvti_per_thread_data_t *ptd; + int is_ip6; + for (is_ip6 = 0; is_ip6 <= 1; is_ip6++) + { + vec_foreach (ptd, pvti_main.per_thread_data[is_ip6]) + { + vlib_cli_output (vm, "thread %d (%s)", + ptd - pvti_main.per_thread_data[is_ip6], + is_ip6 ? "IPv6" : "IPv4"); + pvti_rx_peer_t *peer; + vec_foreach (peer, ptd->rx_peers) + { + vlib_cli_output (vm, " %U", format_pvti_rx_peer_ptr, peer); + } + } + } + return 0; +} + +VLIB_CLI_COMMAND (pvti_show_interface_command, static) = { + .path = "show pvti interface", + .short_help = "show pvti interface", + .function = pvti_show_interface_command_fn, +}; + +VLIB_CLI_COMMAND (pvti_show_tx_peers_command, static) = { + .path = "show pvti tx peers", + .short_help = "show pvti tx peers", + .function = pvti_show_tx_peers_command_fn, +}; + +VLIB_CLI_COMMAND (pvti_show_rx_peers_command, static) = { + .path = "show pvti rx peers", + .short_help = "show pvti rx peers", + .function = pvti_show_rx_peers_command_fn, +}; + +void pvti_api_init (); + +VNET_FEATURE_INIT (pvti4_bypass, static) = { + .arc_name = "ip4-unicast", + .node_name = "ip4-pvti-bypass", + .runs_before = 0, +}; + +VNET_FEATURE_INIT (pvti6_bypass, static) = { + .arc_name = "ip6-unicast", + .node_name = "ip6-pvti-bypass", + .runs_before = 0, +}; + +static clib_error_t * +pvti_early_config (vlib_main_t *vm, unformat_input_t *input) +{ + clib_warning ("early config pvti"); + u8 *runs_before = 0; + int rbi = 0; + if (vec_len (vnet_feat_pvti4_bypass.runs_before) == 0) + { + rbi = 0; + } + else + { + rbi = vec_len (vnet_feat_pvti4_bypass.runs_before) - 1; + } + vec_validate (vnet_feat_pvti4_bypass.runs_before, rbi); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "runs-before %v", &runs_before)) + { + vec_add1 (runs_before, 0); + vnet_feat_pvti4_bypass.runs_before[rbi] = (char *) runs_before; + vec_add1 (vnet_feat_pvti4_bypass.runs_before, 0); + } + else + return clib_error_return (0, "unknown input"); + } + + return NULL; +} + +VLIB_EARLY_CONFIG_FUNCTION (pvti_early_config, "pvti"); + +static clib_error_t * +pvti_init (vlib_main_t *vm) +{ + pvti_main_t *pmp = &pvti_main; + clib_error_t *error = 0; + clib_warning ("pvti init"); + + pmp->vlib_main = vm; + pmp->vnet_main = vnet_get_main (); + pmp->is_initialized = 0; + + pvti_api_init (); + return error; +} + +VLIB_INIT_FUNCTION (pvti_init); + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Packet Vector Tunnel Interface plugin", +}; diff --git a/src/plugins/pvti/pvti.h b/src/plugins/pvti/pvti.h new file mode 100644 index 00000000000..ac097c5ecca --- /dev/null +++ b/src/plugins/pvti/pvti.h @@ -0,0 +1,257 @@ +/* + * pvti.h - skeleton vpp engine plug-in header file + * + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_pvti_h__ +#define __included_pvti_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> + +#define VPP_MAX_THREADS (1 << 8) + +#define MAX_RX_STREAMS 256 + +#define PVTI_ALIGN_BYTES 9 + +typedef CLIB_PACKED (struct { + u32 seq; + u8 stream_index; // set to the cpu# on the sending side + u8 chunk_count; + u8 reass_chunk_count; // number of chunks in the front that are related to + // previously started buffer + // mandatory_flags_mask highlights which of the flags cause packet drop if + // not understood, and which of them can be just ignored. + u8 mandatory_flags_mask; + u8 flags_value; + u8 pad_bytes; + u8 pad[0]; +}) pvti_packet_header_t; + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; + udp_header_t udp; + // not part of encap header pvti_packet_header_t pv; +}) pvti_ip4_encap_header_t; + +typedef CLIB_PACKED (struct { + ip6_header_t ip6; + udp_header_t udp; + // not part of encap header pvti_packet_header_t pv; +}) pvti_ip6_encap_header_t; + +typedef CLIB_PACKED (struct { + u16 total_chunk_length; + // More fragments: this chunk is not the last block fragment +#define CHUNK_FLAGS_MF (1 << 0) + // More blocks: this block has chained blocks that follow +#define CHUNK_FLAGS_MB (1 << 1) + u16 _pad0; + u32 _pad1; + u8 chunk_data[0]; +}) pvti_chunk_header_t; + +typedef struct +{ + // a buffer being built from the smaller packets + u32 bi0; + + // how big can this buffer grow + u32 bi0_max_current_length; + + // how many chunks are already in the buffer + u8 chunk_count; + // leading reassembly chunk count + u8 reass_chunk_count; + + u32 current_tx_seq; +} pvti_per_tx_stream_data_t; + +typedef struct +{ + /* The seq# that we last processed */ + u32 last_rx_seq; + + // a current buffer that is being reassembled + u32 rx_bi0; + // The root buffer, most of the times == rx_bi0 except in the case of chained + // buffers. + u32 rx_bi0_first; + + // Next index for dispatch when the reassembly is done + u16 rx_next0; + // expected totall inner length for the packet + u16 rx_expected_inner_length; + u16 rx_received_inner_length; + +} pvti_per_rx_stream_data_t; + +typedef struct +{ + ip_address_t local_ip; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + u16 underlay_mtu; + u32 underlay_fib_index; + + u32 pvti_if_index; + bool deleted; + bool is_bo0_traced; + + u32 bo0_max_current_length; + + u8 chunk_count; + u8 reass_chunk_count; + u32 current_tx_seq; + vlib_buffer_t *bo0; + +} pvti_tx_peer_t; + +typedef struct +{ + ip_address_t local_ip; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + + pvti_per_rx_stream_data_t rx_streams[MAX_RX_STREAMS]; + + u32 pvti_if_index; + bool deleted; +} pvti_rx_peer_t; + +typedef struct +{ + /* pool of destination-based structures which are used to build the packets + */ + pvti_tx_peer_t *tx_peers; + + /* vector of buffers to send */ + u32 *pending_tx_buffers; + u16 *pending_tx_nexts; + /* pool of source-based structures for the remote peers' data tracking + */ + pvti_rx_peer_t *rx_peers; + + /* vector of buffers being decapsulated */ + u32 *pending_rx_buffers; + u16 *pending_rx_nexts; + +} pvti_per_thread_data_t; + +typedef struct +{ + ip_address_t local_ip; + ip_address_t remote_ip; + u16 remote_port; + u16 local_port; + u16 underlay_mtu; + u32 underlay_fib_index; + bool peer_address_from_payload; + u64 created_at; + + u32 sw_if_index; + u32 hw_if_index; + + // per-stream data for TX + pvti_per_tx_stream_data_t tx_streams[256]; + pvti_per_rx_stream_data_t rx_streams[256]; + +} pvti_if_t; + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + + /* have we initialized the data structures ? */ + bool is_initialized; + + /* interface pool */ + pvti_if_t *if_pool; + + /* if_index in the pool above by sw_if_index */ + index_t *if_index_by_sw_if_index; + + /* indices by port */ + index_t **if_indices_by_port; + + /* per-thread data, ip4[0] and ip6[1] */ + pvti_per_thread_data_t *per_thread_data[2]; + + /* on/off switch for the periodic function */ + u8 periodic_timer_enabled; + /* Node index, non-zero if the periodic process has been created */ + u32 periodic_node_index; + + /* graph node state */ + uword *bm_ip4_bypass_enabled_by_sw_if; + uword *bm_ip6_bypass_enabled_by_sw_if; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ethernet_main_t *ethernet_main; +} pvti_main_t; + +extern pvti_main_t pvti_main; + +extern vlib_node_registration_t pvti_node; +extern vlib_node_registration_t pvti4_input_node; +extern vlib_node_registration_t pvti4_output_node; +extern vlib_node_registration_t pvti6_input_node; +extern vlib_node_registration_t pvti6_output_node; +extern vlib_node_registration_t pvti_periodic_node; + +always_inline u8 +pvti_get_stream_index (int is_ip6) +{ + u32 thread_index = vlib_get_thread_index (); + + ASSERT ((thread_index & 0xffffff80) == 0); + + u8 stream_index = (thread_index & 0x7f) | (is_ip6 ? 0x80 : 0); + return stream_index; +} + +/* attempt to get a new buffer */ +always_inline u32 +pvti_get_new_buffer (vlib_main_t *vm) +{ + u32 bi0 = INDEX_INVALID; + if (vlib_buffer_alloc (vm, &bi0, 1) != 1) + { + return INDEX_INVALID; + } + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + b0->current_data = 0; + b0->current_length = 0; + return bi0; +} + +/* Periodic function events */ +#define PVTI_EVENT1 1 +#define PVTI_EVENT2 2 +#define PVTI_EVENT_PERIODIC_ENABLE_DISABLE 3 + +void pvti_create_periodic_process (pvti_main_t *); +void pvti_verify_initialized (pvti_main_t *pvm); + +#endif /* __included_pvti_h__ */ diff --git a/src/plugins/pvti/pvti_if.c b/src/plugins/pvti/pvti_if.c new file mode 100644 index 00000000000..4f83994a1a4 --- /dev/null +++ b/src/plugins/pvti/pvti_if.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2020 Cisco and/or its affiliates. + * Copyright (c) 2020 Doc.ai and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/adj/adj_midchain.h> +#include <vnet/udp/udp.h> + +#include <pvti/pvti.h> +#include <pvti/pvti_if.h> + +static u8 * +format_pvti_if_name (u8 *s, va_list *args) +{ + u32 dev_instance = va_arg (*args, u32); + // wg_if_t *wgi = wg_if_get (dev_instance); + return format (s, "pvti%d", dev_instance); +} + +u8 * +format_pvti_if (u8 *s, va_list *args) +{ + index_t pvtii = va_arg (*args, u32); + pvti_if_t *pvti_if = pvti_if_get (pvtii); + + s = format ( + s, "[%d] %U local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d", + pvtii, format_vnet_sw_if_index_name, vnet_get_main (), + pvti_if->sw_if_index, format_ip46_address, &pvti_if->local_ip, + IP46_TYPE_ANY, pvti_if->local_port, format_ip46_address, + &pvti_if->remote_ip, IP46_TYPE_ANY, pvti_if->remote_port, + pvti_if->underlay_mtu, pvti_if->underlay_fib_index); + + return (s); +} + +index_t +pvti_if_find_by_sw_if_index (u32 sw_if_index) +{ + if (vec_len (pvti_main.if_index_by_sw_if_index) <= sw_if_index) + return INDEX_INVALID; + u32 ti = pvti_main.if_index_by_sw_if_index[sw_if_index]; + if (ti == ~0) + return INDEX_INVALID; + + return (ti); +} + +index_t +pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4, + u16 remote_port) +{ + pvti_if_t *ifc; + pool_foreach (ifc, pvti_main.if_pool) + { + if ((ifc->remote_port == remote_port) && + (ifc->remote_ip.version == AF_IP4) && + ((ifc->remote_ip.ip.ip4.as_u32 == remote_ip4->as_u32) || + ifc->peer_address_from_payload)) + { + return (ifc - pvti_main.if_pool); + } + } + return INDEX_INVALID; +} + +index_t +pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip6, + u16 remote_port) +{ + pvti_if_t *ifc; + pool_foreach (ifc, pvti_main.if_pool) + { + if ((ifc->remote_port == remote_port) && + (ifc->remote_ip.version == AF_IP6) && + ((0 == memcmp (&ifc->remote_ip.ip.ip6, remote_ip6, + sizeof (*remote_ip6))) || + ifc->peer_address_from_payload)) + { + return (ifc - pvti_main.if_pool); + } + } + return INDEX_INVALID; +} + +index_t +pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip, u16 remote_port) +{ + pvti_if_t *ifc; + pool_foreach (ifc, pvti_main.if_pool) + { + if ((ifc->remote_port == remote_port) && + (ifc->peer_address_from_payload || + (0 == ip_address_cmp (remote_ip, &ifc->remote_ip)))) + { + return (ifc - pvti_main.if_pool); + } + } + return INDEX_INVALID; +} + +static void +pvti_add_tidx_by_port (index_t t_index, u16 port) +{ + pvti_main_t *pvm = &pvti_main; + vec_validate_init_empty (pvm->if_indices_by_port, port, NULL); + vec_add1 (pvm->if_indices_by_port[port], t_index); +} + +static void +pvti_del_tidx_by_port (index_t t_index, u16 port) +{ + pvti_main_t *pvm = &pvti_main; + index_t *ii; + if (!pvm->if_indices_by_port) + { + return; + } + if (port >= vec_len (pvm->if_indices_by_port)) + { + return; + } + if (vec_len (pvm->if_indices_by_port[port]) == 0) + { + ALWAYS_ASSERT (pvm->if_indices_by_port[port] > 0); + /* not reached */ + return; + } + + vec_foreach (ii, pvm->if_indices_by_port[port]) + { + if (*ii == t_index) + { + vec_del1 (pvm->if_indices_by_port[port], + pvm->if_indices_by_port[port] - ii); + break; + } + } +} + +static u32 +pvti_get_tunnel_count_by_port (u16 port) +{ + pvti_main_t *pvm = &pvti_main; + if (!pvm->if_indices_by_port) + { + return 0; + } + return vec_len (vec_elt (pvm->if_indices_by_port, port)); +} + +static clib_error_t * +pvti_if_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags) +{ + // vnet_hw_interface_t *hi; + u32 hw_flags; + + // hi = vnet_get_hw_interface (vnm, hw_if_index); + hw_flags = + (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? VNET_HW_INTERFACE_FLAG_LINK_UP : + 0); + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + + return (NULL); +} + +void +pvti_if_update_adj (vnet_main_t *vnm, u32 sw_if_index, adj_index_t ai) +{ + + /* Convert any neighbour adjacency that has a next-hop reachable through + * the wg interface into a midchain. This is to avoid sending ARP/ND to + * resolve the next-hop address via the wg interface. Then, if one of the + * peers has matching prefix among allowed prefixes, the midchain will be + * updated to the corresponding one. + */ + adj_nbr_midchain_update_rewrite (ai, NULL, NULL, ADJ_FLAG_NONE, NULL); + + // wgii = wg_if_find_by_sw_if_index (sw_if_index); + // wg_if_peer_walk (wg_if_get (wgii), wg_peer_if_adj_change, &ai); +} + +VNET_DEVICE_CLASS (pvti_if_device_class) = { + .name = "Packet Vectorizer Tunnel", + .format_device_name = format_pvti_if_name, + .admin_up_down_function = pvti_if_admin_up_down, +}; + +VNET_HW_INTERFACE_CLASS (pvti_hw_interface_class) = { + .name = "PVTunnel", + .update_adjacency = pvti_if_update_adj, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, + // .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA, +}; + +int +pvti_if_create (ip_address_t *local_ip, u16 local_port, + ip_address_t *remote_ip, u16 remote_port, + pvti_peer_address_method_t peer_address_method, + u16 underlay_mtu, u32 underlay_fib_index, u32 *sw_if_indexp) +{ + vnet_main_t *vnm = vnet_get_main (); + pvti_main_t *pvm = &pvti_main; + u32 hw_if_index; + vnet_hw_interface_t *hi; + pvti_verify_initialized (pvm); + + pvti_if_t *pvti_if; + + ASSERT (sw_if_indexp); + + *sw_if_indexp = (u32) ~0; + + pool_get_zero (pvti_main.if_pool, pvti_if); + pvti_if->local_ip = *local_ip; + pvti_if->local_port = local_port; + pvti_if->remote_ip = *remote_ip; + if (peer_address_method == PVTI_PEER_ADDRESS_FROM_PAYLOAD) + { + pvti_if->peer_address_from_payload = 1; + } + pvti_if->remote_port = remote_port; + pvti_if->underlay_mtu = underlay_mtu; + pvti_if->underlay_fib_index = underlay_fib_index; + pvti_if->created_at = clib_cpu_time_now (); + + /* tunnel index (or instance) */ + u32 t_idx = pvti_if - pvti_main.if_pool; + + hw_if_index = + vnet_register_interface (vnm, pvti_if_device_class.index, t_idx, + pvti_hw_interface_class.index, t_idx); + + pvti_if->hw_if_index = hw_if_index; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + pvti_if->sw_if_index = *sw_if_indexp = hi->sw_if_index; + + vec_validate_init_empty (pvm->if_index_by_sw_if_index, hi->sw_if_index, + INDEX_INVALID); + + vec_elt (pvm->if_index_by_sw_if_index, hi->sw_if_index) = t_idx; + pvti_if_t *pvti_if0 = pool_elt_at_index (pvti_main.if_pool, t_idx); + int i; + for (i = 0; i < 256; i++) + { + pvti_if0->tx_streams[i].bi0 = INDEX_INVALID; + pvti_if0->tx_streams[i].current_tx_seq = 42; + + pvti_if0->rx_streams[i].rx_bi0 = INDEX_INVALID; + pvti_if0->rx_streams[i].rx_bi0_first = INDEX_INVALID; + } + + /* + int is_ip6 = 0; + u32 encap_index = !is_ip6 ? + pvti4_output_node.index : pvti6_output_node.index; + vnet_set_interface_output_node (vnm, pvti_if->hw_if_index, encap_index); + */ + vnet_set_interface_l3_output_node (vnm->vlib_main, hi->sw_if_index, + (u8 *) "pvti4-output"); + + pvti_add_tidx_by_port (t_idx, local_port); + if (1 == pvti_get_tunnel_count_by_port (local_port)) + { + clib_warning ("Registering local port %d", local_port); + udp_register_dst_port (vlib_get_main (), local_port, + pvti4_input_node.index, UDP_IP4); + udp_register_dst_port (vlib_get_main (), local_port, + pvti6_input_node.index, UDP_IP6); + } + else + { + clib_warning ("Not registering the port"); + } + + vnet_hw_interface_set_flags (vnm, pvti_if->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + + return 0; +} + +void +pvti_if_walk (pvti_if_walk_cb_t fn, void *data) +{ + index_t pvtii; + + pool_foreach_index (pvtii, pvti_main.if_pool) + { + if (WALK_STOP == fn (pvtii, data)) + break; + } +} + +int +pvti_if_delete (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + pvti_main_t *pvm = &pvti_main; + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hw == 0 || hw->dev_class_index != pvti_if_device_class.index) + return VNET_API_ERROR_INVALID_VALUE; + + pvti_if_t *ifc; + bool found = 0; + pool_foreach (ifc, pvm->if_pool) + { + if (ifc->sw_if_index == sw_if_index) + { + found = 1; + break; + } + } + if (!found) + { + return VNET_API_ERROR_INVALID_VALUE_2; + } + index_t tidx = ifc - pvm->if_pool; + + u16 local_port = ifc->local_port; + pvti_del_tidx_by_port (tidx, local_port); + pvm->if_index_by_sw_if_index[sw_if_index] = INDEX_INVALID; + + if (0 == pvti_get_tunnel_count_by_port (local_port)) + { + udp_unregister_dst_port (vlib_get_main (), local_port, 1); + udp_unregister_dst_port (vlib_get_main (), local_port, 0); + } + + vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index); + vnet_delete_hw_interface (vnm, hw->hw_if_index); + pool_put (pvti_main.if_pool, ifc); + + /* mark per-thread peers as deleted */ + pvti_per_thread_data_t *ptd; + + vec_foreach (ptd, pvm->per_thread_data[0]) + { + pvti_tx_peer_t *peer; + vec_foreach (peer, ptd->tx_peers) + { + if (tidx == peer->pvti_if_index) + { + peer->deleted = 1; + } + } + } + vec_foreach (ptd, pvm->per_thread_data[1]) + { + pvti_tx_peer_t *peer; + vec_foreach (peer, ptd->tx_peers) + { + if (tidx == peer->pvti_if_index) + { + peer->deleted = 1; + } + } + } + + return 0; +} diff --git a/src/plugins/pvti/pvti_if.h b/src/plugins/pvti/pvti_if.h new file mode 100644 index 00000000000..44bf22ce825 --- /dev/null +++ b/src/plugins/pvti/pvti_if.h @@ -0,0 +1,47 @@ +#ifndef PVTI_IF_H +#define PVTI_IF_H + +#include <vnet/interface_funcs.h> + +typedef enum +{ + PVTI_PEER_ADDRESS_FIXED = 0, + PVTI_PEER_ADDRESS_FROM_PAYLOAD +} pvti_peer_address_method_t; + +typedef walk_rc_t (*pvti_if_walk_cb_t) (index_t wgi, void *data); +void pvti_if_walk (pvti_if_walk_cb_t fn, void *data); + +int pvti_if_create (ip_address_t *local_ip, u16 local_port, + ip_address_t *remote_ip, u16 remote_port, + pvti_peer_address_method_t peer_address_method, + u16 underlay_mtu, u32 underlay_fib_index, + u32 *sw_if_indexp); +index_t pvti_if_find_by_sw_if_index (u32 sw_if_index); +index_t pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4, + u16 remote_port); +index_t pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip4, + u16 remote_port); + +index_t pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip, + u16 remote_port); + +int pvti_if_delete (u32 sw_if_index); + +u8 *format_pvti_if (u8 *s, va_list *args); + +static_always_inline pvti_if_t * +pvti_if_get (index_t pvtii) +{ + if (INDEX_INVALID == pvtii) + return (NULL); + return (pool_elt_at_index (pvti_main.if_pool, pvtii)); +} + +static_always_inline index_t +pvti_if_get_index (pvti_if_t *pvti_if) +{ + return pvti_if - pvti_main.if_pool; +} + +#endif diff --git a/test/test_pvti.py b/test/test_pvti.py new file mode 100644 index 00000000000..94ae7790323 --- /dev/null +++ b/test/test_pvti.py @@ -0,0 +1,1153 @@ +#!/usr/bin/env python3 +""" PVTI tests """ + +import datetime +import base64 +import os +import copy +import struct + +from hashlib import blake2s +from config import config +from scapy.packet import Raw +from scapy.compat import raw +from scapy.layers.l2 import Ether +from scapy.layers.inet import IP, UDP +from scapy.layers.inet6 import IPv6 +from scapy.layers.vxlan import VXLAN + +from vpp_interface import VppInterface +from vpp_pg_interface import is_ipv6_misc +from vpp_ip_route import VppIpRoute, VppRoutePath +from vpp_l2 import VppBridgeDomain, VppBridgeDomainPort +from vpp_vxlan_tunnel import VppVxlanTunnel +from vpp_object import VppObject +from vpp_papi import VppEnum +from asfframework import tag_run_solo, tag_fixme_vpp_debug +from framework import VppTestCase +from re import compile +import unittest + + +from scapy.packet import Packet, bind_layers +from scapy.layers.l2 import Ether +from scapy.layers.inet import IP, UDP +from scapy.layers.inet6 import IPv6 +from scapy.fields import ( + FlagsField, + XByteField, + XShortField, + ThreeBytesField, + ConditionalField, + ShortField, + ByteEnumField, + X3BytesField, + LEIntField, + ByteField, + StrLenField, + PacketListField, + LEShortField, + IntField, + ShortField, + XIntField, +) + +import sys + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +# +# A custom decoder for Scapy for PVTI packet format +# + + +class PVTIChunk(Packet): + name = "PVTIChunk" + fields_desc = [ + ShortField("total_chunk_length", None), + XShortField("_pad0", 0), + XIntField("_pad1", 0), + StrLenField("data", "", length_from=lambda pkt: pkt.total_chunk_length - 8), + ] + + # This prevents the first chunk from consuming the entire remaining + # contents of the packet + def extract_padding(self, s): + return "", s + + def post_build(self, p, pay): + if self.total_chunk_length is None and self.data: + chunk_header_size = 8 + l = chunk_header_size + len(self.data) + p = struct.pack("!H", l) + p[2:] + return p + pay + + +class PVTI(Packet): + name = "PVTI" + PVTI_ALIGN_BYTES = 9 + fields_desc = [ + IntField("seq", 0x0), + ByteField("stream_index", 0), + ByteField("chunk_count", None), + ByteField("reass_chunk_count", 0), + ByteField("mandatory_flags_mask", 0), + ByteField("flags_value", 0), + ByteField("pad_bytes", PVTI_ALIGN_BYTES), + StrLenField( + "pad", b"\xca" * PVTI_ALIGN_BYTES, length_from=lambda pkt: pkt.pad_bytes + ), + PacketListField("chunks", [], PVTIChunk, count_from=lambda p: p.chunk_count), + ] + + def mysummary(self): + return self.sprintf("PVTI (len=%PVTI.total_len%)") + + def post_build(self, p, pay): + if self.chunk_count is None: + l = len(self.chunks) + # offset of the chunk count within the fields + offset_of_chunk_count = 5 + p = ( + p[:offset_of_chunk_count] + + struct.pack("b", l) + + p[offset_of_chunk_count + 1 :] + ) + return p + pay + + +bind_layers(UDP, PVTI, dport=12312) +# By default, set both ports to the test +# bind_layers(UDP, PVTI, sport=6192, dport=6192) + + +# PVTI ENcapsulator/DEcapsulator +class PvtiEnDe(object): + """ + PVTI encapsulator/decapsulator + """ + + def __init__( + self, + local_ip, + local_port, + remote_ip, + remote_port, + underlay_mtu=1500, + for_rx_test=False, + ): + self.for_rx_test = for_rx_test + self.local_ip = local_ip + self.local_port = local_port + self.remote_ip = remote_ip + self.remote_port = remote_port + self.underlay_mtu = underlay_mtu + self.stream_index = 0 + self.tx_chunks = [] + self.tx_n_reass_chunks = 0 + self.tx_seq = 42 + # payload = chunk headers + data + self.max_payload_len = underlay_mtu - len(raw(IP() / UDP() / PVTI())) + self.pvti_header_len = len(raw(PVTI())) + self.chunk_header_len = len(raw(PVTIChunk())) + + def get_curr_payload_len(self): + tx_len = 0 + for c in self.tx_chunks: + tx_len = tx_len + len(c.data) + self.chunk_header_len + return tx_len + + def get_payload_room(self): + return self.max_payload_len - self.get_curr_payload_len() + + def flush_tx_chunks(self, more_frags=False): + if self.for_rx_test: + ip_dst = self.local_ip + ip_src = self.remote_ip + else: + ip_src = self.local_ip + ip_dst = self.remote_ip + p = ( + IP( + src=ip_src, + dst=ip_dst, + ttl=127, + frag=0, + flags=0, + id=self.tx_seq, + ) + / UDP(sport=self.local_port, dport=self.remote_port, chksum=0) + / PVTI( + reass_chunk_count=self.tx_n_reass_chunks, + seq=self.tx_seq, + stream_index=self.stream_index, + chunks=self.tx_chunks, + ) + ) + + p = IP(raw(p)) + + self.tx_n_reass_chunks = 0 + self.tx_chunks = [] + self.tx_seq = self.tx_seq + 1 + return p + + def encap_pkt(self, p): + out = [] + if IP in p: + p[IP].ttl = p[IP].ttl - 1 + payload_wip = p[IP].build() + elif IPv6 in p: + p[IPv6].hlim = p[IPv6].hlim - 1 + payload_wip = p[IPv6].build() + + split_chunks = False + huge_solo_packet = ( + len(payload_wip) + self.chunk_header_len > self.get_payload_room() + ) and len(self.tx_chunks) == 0 + + while True: + available_room = self.get_payload_room() + chunk_wip_len = len(payload_wip) + self.chunk_header_len + xpad0 = 0xABAB + xpad1 = 0xABABABAB + + if chunk_wip_len <= available_room: + # happy case - there is enough space to fit the entire chunk + if split_chunks: + self.tx_n_reass_chunks = self.tx_n_reass_chunks + 1 + tx = PVTIChunk(data=payload_wip, _pad0=xpad0, _pad1=xpad1) + self.tx_chunks.append(tx) + if chunk_wip_len == available_room: + # an unlikely perfect fit - send this packet. + out.append(self.flush_tx_chunks()) + break + elif available_room < self.chunk_header_len + 1: + # Can not fit even a chunk header + 1 byte of data + # Flush and retry + out.append(self.flush_tx_chunks()) + continue + else: + # Chop as much as we can from the packet + chop_len = available_room - self.chunk_header_len + if split_chunks: + self.tx_n_reass_chunks = self.tx_n_reass_chunks + 1 + tx = PVTIChunk(data=payload_wip[:chop_len], _pad0=xpad0, _pad1=xpad1) + self.tx_chunks.append(tx) + out.append(self.flush_tx_chunks()) + split_chunks = True + payload_wip = payload_wip[chop_len:] + continue + return out + + def encap_packets(self, pkts): + out = [] + self.start_encap() + for p in pkts: + out.extend(self.encap_pkt(p)) + last_pkt = self.finish_encap() + if last_pkt != None: + out.append(last_pkt) + return out + + def start_encap(self): + return None + + def finish_encap(self): + out = None + if len(self.tx_chunks) > 0: + out = self.flush_tx_chunks() + return out + + +""" TestPvti is a subclass of VPPTestCase classes. + +PVTI test. + +""" + + +def get_field_bytes(pkt, name): + fld, val = pkt.getfield_and_val(name) + return fld.i2m(pkt, val) + + +class VppPvtiInterface(VppInterface): + """ + VPP PVTI interface + """ + + def __init__( + self, test, local_ip, local_port, remote_ip, remote_port, underlay_mtu=1500 + ): + super(VppPvtiInterface, self).__init__(test) + + self.local_ip = local_ip + self.local_port = local_port + self.remote_ip = remote_ip + self.remote_port = remote_port + self.underlay_mtu = underlay_mtu + + def get_ende(self, for_rx_test=False): + return PvtiEnDe( + self.local_ip, + self.local_port, + self.remote_ip, + self.remote_port, + self.underlay_mtu, + for_rx_test, + ) + + def verify_encap_packets(self, orig_pkts, recv_pkts): + ende = self.get_ende() + recv2_pkts = ende.encap_packets(orig_pkts) + out1 = [] + out2 = [] + for i, pkt in enumerate(recv_pkts): + if IP in pkt: + rx_pkt = pkt[IP] + elif IPv6 in pkt: + rx_pkt = pkt[IPv6] + else: + raise "Neither IPv4 nor IPv6" + py_pkt = recv2_pkts[i] + if rx_pkt != py_pkt: + eprint("received packet:") + rx_pkt.show() + eprint("python packet:") + py_pkt.show() + out1.append(rx_pkt) + out2.append(py_pkt) + return (out1, out2) + + def add_vpp_config(self): + r = self.test.vapi.pvti_interface_create( + interface={ + "local_ip": self.local_ip, + "local_port": self.local_port, + "remote_ip": self.remote_ip, + "remote_port": self.remote_port, + "underlay_mtu": self.underlay_mtu, + } + ) + self.set_sw_if_index(r.sw_if_index) + self.test.registry.register(self, self.test.logger) + return self + + def remove_vpp_config(self): + self.test.vapi.pvti_interface_delete(sw_if_index=self._sw_if_index) + + def query_vpp_config(self): + ts = self.test.vapi.pvti_interface_dump(sw_if_index=0xFFFFFFFF) + for t in ts: + if ( + t.interface.sw_if_index == self._sw_if_index + and str(t.interface.local_ip) == self.local_ip + and t.interface.local_port == self.local_port + and t.interface.remote_port == self.remote_port + and str(t.interface.remote_ip) == self.remote_ip + ): + self.test.logger.info("QUERY AYXX: true") + return True + return False + + def __str__(self): + return self.object_id() + + def object_id(self): + return "pvti-%d" % self._sw_if_index + + +@unittest.skipIf("pvti" in config.excluded_plugins, "Exclude PVTI plugin tests") +# @tag_run_solo +class TestPvti(VppTestCase): + """Packet Vector Tunnel Interface (PVTI) Test Case""" + + error_str = compile(r"Error") + + # maxDiff = None + + wg4_output_node_name = "/err/wg4-output-tun/" + wg4_input_node_name = "/err/wg4-input/" + wg6_output_node_name = "/err/wg6-output-tun/" + wg6_input_node_name = "/err/wg6-input/" + kp4_error = wg4_output_node_name + "Keypair error" + mac4_error = wg4_input_node_name + "Invalid MAC handshake" + peer4_in_err = wg4_input_node_name + "Peer error" + peer4_out_err = wg4_output_node_name + "Peer error" + kp6_error = wg6_output_node_name + "Keypair error" + mac6_error = wg6_input_node_name + "Invalid MAC handshake" + peer6_in_err = wg6_input_node_name + "Peer error" + peer6_out_err = wg6_output_node_name + "Peer error" + cookie_dec4_err = wg4_input_node_name + "Failed during Cookie decryption" + cookie_dec6_err = wg6_input_node_name + "Failed during Cookie decryption" + ratelimited4_err = wg4_input_node_name + "Handshake ratelimited" + ratelimited6_err = wg6_input_node_name + "Handshake ratelimited" + + @classmethod + def setUpClass(cls): + super(TestPvti, cls).setUpClass() + try: + cls.create_pg_interfaces(range(2)) + for i in cls.pg_interfaces: + i.admin_up() + i.config_ip4() + i.config_ip6() + i.resolve_arp() + i.resolve_ndp() + + except Exception: + super(TestPvti, cls).tearDownClass() + raise + + @classmethod + def tearDownClass(cls): + super(TestPvti, cls).tearDownClass() + + def setUp(self): + super(VppTestCase, self).setUp() + self.base_kp4_err = self.statistics.get_err_counter(self.kp4_error) + self.base_mac4_err = self.statistics.get_err_counter(self.mac4_error) + self.base_peer4_in_err = self.statistics.get_err_counter(self.peer4_in_err) + self.base_peer4_out_err = self.statistics.get_err_counter(self.peer4_out_err) + self.base_kp6_err = self.statistics.get_err_counter(self.kp6_error) + self.base_mac6_err = self.statistics.get_err_counter(self.mac6_error) + self.base_peer6_in_err = self.statistics.get_err_counter(self.peer6_in_err) + self.base_peer6_out_err = self.statistics.get_err_counter(self.peer6_out_err) + self.base_cookie_dec4_err = self.statistics.get_err_counter( + self.cookie_dec4_err + ) + self.base_cookie_dec6_err = self.statistics.get_err_counter( + self.cookie_dec6_err + ) + self.base_ratelimited4_err = self.statistics.get_err_counter( + self.ratelimited4_err + ) + self.base_ratelimited6_err = self.statistics.get_err_counter( + self.ratelimited6_err + ) + + def create_packets( + self, src_ip_if, count=1, size=150, for_rx=False, is_ip6=False, af_mix=False + ): + pkts = [] + total_packet_count = count + padstr0 = "" + padstr1 = "" + for i in range(0, 2000): + padstr0 = padstr0 + (".%03x" % i) + padstr1 = padstr1 + ("+%03x" % i) + + for i in range(0, total_packet_count): + if af_mix: + is_ip6 = i % 2 == 1 + + dst_mac = src_ip_if.local_mac + src_mac = src_ip_if.remote_mac + if for_rx: + dst_ip4 = src_ip_if.remote_ip4 + dst_ip6 = src_ip_if.remote_ip6 + src_ip4 = "10.0.%d.4" % i + src_ip6 = "2001:db8::%x" % i + else: + src_ip4 = src_ip_if.remote_ip4 + src_ip6 = src_ip_if.remote_ip6 + dst_ip4 = "10.0.%d.4" % i + dst_ip6 = "2001:db8::%x" % i + src_l4 = 1234 + i + dst_l4 = 4321 + i + + ulp = UDP(sport=src_l4, dport=dst_l4) + payload = "test pkt #%d" % i + if i % 2 == 1: + padstr = padstr1 + else: + padstr = padstr0 + + p = Ether(dst=dst_mac, src=src_mac) + if is_ip6: + p /= IPv6(src=src_ip6, dst=dst_ip6) + else: + p /= IP(src=src_ip4, dst=dst_ip4, frag=0, flags=0) + + p /= ulp / Raw(payload) + + if i % 2 == 1 or total_packet_count == 1: + self.extend_packet(p, size, padstr) + else: + self.extend_packet(p, 150, padstr) + pkts.append(p) + return pkts + + def add_rx_ether_header(self, in_pkts, rx_intf=None): + out = [] + if rx_intf is None: + rx_intf = self.pg0 + dst_mac = rx_intf.local_mac + src_mac = rx_intf.remote_mac + pkts = [] + for p in in_pkts: + p0 = Ether(dst=dst_mac, src=src_mac) / p[IP] + out.append(p0) + return out + + def encap_for_rx_test(self, pkts, rx_intf=None): + ende = self.pvti0.get_ende(for_rx_test=True) + encap_pkts = ende.encap_packets(pkts) + return self.add_rx_ether_header(encap_pkts, rx_intf) + + def decrement_ttl_and_build(self, send_pkts): + out = [] + pkts = copy.deepcopy(send_pkts) + for p in pkts: + p[IP].ttl = p[IP].ttl - 1 + out.append(Ether(p.build())) + return out + + def create_rx_packets(self, dst_ip_if, rx_intf=None, count=1, size=150): + pkts = [] + total_packet_count = count + padstr = "" + if rx_intf is None: + rx_intf = self.pg0 + for i in range(0, 2000): + padstr = padstr + (".%03x" % i) + + dst_mac = rx_intf.local_mac + src_mac = rx_intf.remote_mac + + for i in range(0, total_packet_count): + dst_ip4 = dst_ip_if.remote_ip4 + src_ip4 = "10.0.%d.4" % i + src_l4 = 1234 + i + dst_l4 = 4321 + i + + ulp = UDP(sport=src_l4, dport=dst_l4) + payload = "test" + + # if i % 2 == 1 or total_packet_count == 1: + # self.extend_packet(p, size, padstr) + # else: + # self.extend_packet(p, 150, padstr) + + pvti = PVTI(seq=42 + i, chunks=[]) + for j in range(0, 32): + p = ( + IP(src=src_ip4, dst=dst_ip4, frag=0, flags=0, id=j + 0x4000) + / ulp + / Raw(payload) + ) + chunk0 = PVTIChunk(data=raw(p)) + pvti.chunks.append(chunk0) + + p = ( + Ether(dst=dst_mac, src=src_mac) + / IP(src="192.0.2.1", dst=rx_intf.local_ip4, id=0x3000 + i) + / UDP(sport=12312, dport=12312) + / pvti + ) + # p.show() + # Ether(raw(p)).show() + + pkts.append(p) + return pkts + + def send_and_assert_no_replies_ignoring_init( + self, intf, pkts, remark="", timeout=None + ): + self.pg_send(intf, pkts) + + def _filter_out_fn(p): + return is_ipv6_misc(p) or is_handshake_init(p) + + try: + if not timeout: + timeout = 1 + for i in self.pg_interfaces: + i.assert_nothing_captured( + timeout=timeout, remark=remark, filter_out_fn=_filter_out_fn + ) + timeout = 0.1 + finally: + pass + + def test_0000_pvti_interface(self): + """Simple interface creation""" + local_port = 12312 + peer_addr = self.pg0.remote_ip4 # "192.0.2.1" + peer_port = 31234 + peer_port = 12312 + + # Create interface + pvti0 = VppPvtiInterface( + self, self.pg1.local_ip4, local_port, peer_addr, peer_port + ).add_vpp_config() + + self.logger.info(self.vapi.cli("sh int")) + self.logger.info(self.vapi.cli("show pvti interface")) + self.logger.info(self.vapi.cli("show pvti tx peers")) + self.logger.info(self.vapi.cli("show pvti rx peers")) + + # delete interface + pvti0.remove_vpp_config() + # self.logger.info(self.vapi.cli("show pvti interface")) + # pvti0.add_vpp_config() + + def test_0001_pvti_send_simple_1pkt(self): + """v4o4 TX: Simple packet: 1 -> 1""" + + self.prepare_for_test("v4o4_1pkt_simple") + pkts = self.create_packets(self.pg1) + + recv_pkts = self.send_and_expect(self.pg1, pkts, self.pg0) + for p in recv_pkts: + self.logger.info(p) + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(pkts, recv_pkts) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def test_0101_pvti_send_simple_1pkt(self): + """v6o4 TX: Simple packet: 1 -> 1""" + + self.prepare_for_test("v6o4_1pkt_simple") + pkts = self.create_packets(self.pg1, is_ip6=True) + + recv_pkts = self.send_and_expect(self.pg1, pkts, self.pg0, n_rx=1) + for p in recv_pkts: + self.logger.info(p) + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(pkts, recv_pkts) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def test_0002_pvti_send_simple_2pkt(self): + """TX: Simple packet: 2 -> 1""" + self.prepare_for_test("2pkt_simple") + + send_pkts = self.create_packets(self.pg1, count=2) + pkts = copy.deepcopy(send_pkts) + rx = self.send_and_expect(self.pg1, pkts, self.pg0, n_rx=1) + for p in rx: + self.logger.info(p) + # p.show() + + payload0 = rx[0][PVTI].chunks[0].data + payload1 = rx[0][PVTI].chunks[1].data + + pktA0 = IP(payload0) + pktA1 = IP(payload1) + + p0 = pkts[0][IP] + p0.ttl = p0.ttl - 1 + pktB0 = IP(p0.build()) + + p1 = pkts[1][IP] + p1.ttl = p1.ttl - 1 + pktB1 = IP(p1.build()) + + self.assertEqual(pktA0, pktB0) + self.assertEqual(pktA1, pktB1) + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def prepare_for_test(self, test_name, underlay_mtu=1500, is_ip6=False): + local_port = 12312 + peer_ip4_addr = "192.0.2.1" + peer_ip6_addr = "2001:db8:dead::1" + peer_port = 31234 + peer_port = 12312 + for i in self.pg_interfaces: + i.test_name = test_name + if is_ip6: + self.pvti0 = VppPvtiInterface( + self, + self.pg1.local_ip6, + local_port, + peer_ip6_addr, + peer_port, + underlay_mtu, + ).add_vpp_config() + else: + self.pvti0 = VppPvtiInterface( + self, + self.pg1.local_ip4, + local_port, + peer_ip4_addr, + peer_port, + underlay_mtu, + ).add_vpp_config() + self.pvti0.config_ip4() + self.pvti0.config_ip6() + self.pvti0.admin_up() + + self.logger.info(self.vapi.cli("ip route add 0.0.0.0/0 via 172.16.3.3")) + ## FIXME: using direct "interface" below results in blackouts. intermittently. + # self.logger.info(self.vapi.cli("ip route 0.0.0.0/0 via pvti0")) + self.logger.info(self.vapi.cli("ip route add ::/0 via pvti0")) + self.logger.info(self.vapi.cli("ip route add 192.0.2.1/32 via pg0")) + self.logger.info(self.vapi.cli("ip neighbor pg0 192.0.2.1 000c.0102.0304")) + self.logger.info(self.vapi.cli("ip route 2001:db8:dead::1/128 via pg0")) + self.logger.info( + self.vapi.cli("ip neighbor pg0 2001:db8:dead::1 000c.0102.0304") + ) + self.logger.info(self.vapi.cli("ip neighbor pg1 172.16.2.2 000c.0102.0304")) + self.logger.info(self.vapi.cli("sh int")) + self.logger.info(self.vapi.cli("sh ip fib")) + self.logger.info(self.vapi.cli("show pvti interface")) + self.logger.info(self.vapi.cli("set interface ip pvti-bypass pg0")) + + def cleanup_after_test(self): + self.logger.info(self.vapi.cli("ip neighbor del pg0 192.0.2.1 000c.0102.0304")) + self.logger.info(self.vapi.cli("ip neighbor del pg1 172.16.2.2 000c.0102.0304")) + self.logger.info(self.vapi.cli("ip route del 192.0.2.1/32 via pg0")) + # self.logger.info(self.vapi.cli("ip route del 0.0.0.0/0 via pvti0")) + self.logger.info(self.vapi.cli("ip route del ::/0 via pvti0")) + self.logger.info(self.vapi.cli("sh int")) + self.logger.info(self.vapi.cli("show pvti interface")) + self.pvti0.remove_vpp_config() + + def test_0003_pvti_send_simple_1pkt_big(self): + """TX: Simple big packet: 1 -> 2""" + self.prepare_for_test("1big_pkt") + + send_pkts = self.create_packets(self.pg1, count=1, size=1900) + pkts = copy.deepcopy(send_pkts) + self.logger.info("count: ") + self.logger.info(len(pkts)) + rx = self.send_and_expect(self.pg1, pkts, self.pg0, n_rx=2) + for p in rx: + self.logger.info(p) + self.logger.info(len(p[PVTI].chunks[0].data)) + # p.show() + payload = rx[0][PVTI].chunks[0].data + rx[1][PVTI].chunks[0].data + + pkt1 = IP(payload) + p0 = pkts[0][IP] + p0.ttl = p0.ttl - 1 + + pkt0 = IP(p0.build()) + + self.assertEqual(pkt0, pkt1) + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def test_0004_pvti_send_simple_5pkt_big(self): + """v4o4 TX: Simple big packets: 5 -> 2""" + self.prepare_for_test("v4o4_5big_pkt") + + send_pkts = self.create_packets(self.pg1, count=5, size=1050) + self.logger.info("count: %d " % len(send_pkts)) + # self.logger.info(len(pkts)) + rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=2) + for p in rx: + self.logger.info(p) + self.logger.info(len(p[PVTI].chunks[0].data)) + # p.show() + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def test_0104_pvti_send_simple_5pkt_big(self): + """v6o4 TX: Simple big packets: 5 -> 2""" + self.prepare_for_test("v4o4_5big_pkt") + + send_pkts = self.create_packets(self.pg1, count=5, size=1050, is_ip6=True) + self.logger.info("count: %d " % len(send_pkts)) + # self.logger.info(len(pkts)) + rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=2) + for p in rx: + self.logger.info(p) + self.logger.info(len(p[PVTI].chunks[0].data)) + # p.show() + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def Xtest_0204_pvti_send_simple_5pkt_mix(self): + """vXo4 TX: Simple packets mix: 5 -> 2""" + # FIXME: This test is disabled for now, but left here, to have this comment + # The mix of IPv4 and IPv6 packets in VPP will forward two + # different graphs, so after encap it will result in two + # PV packets: one with IPv4 chunks, and one with IPv6 chunks. + # The python test encapsulator does not do this, and it is probably + # a useless idea to introduce attempts to mimic this behavior, + # because in any case one can not expect the orderly scheduling + # of IPv4 vs IPv6 graph processing. + self.prepare_for_test("vXo4_5big_pkt") + + send_pkts = self.create_packets(self.pg1, count=5, size=1050, af_mix=True) + # self.logger.info(len(pkts)) + rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=2) + for p in rx: + self.logger.info(p) + self.logger.info(len(p[PVTI].chunks[0].data)) + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def test_0005_pvti_send_mix_3pkt_medium_mtu(self): + """TX: small+big+small packets over medium mtu: 3 -> 3""" + self.prepare_for_test("3pkt_small_mtu", underlay_mtu=400) + + send_pkts = self.create_packets(self.pg1, count=3, size=500) + pkts = copy.deepcopy(send_pkts) + self.logger.info("count: %d " % len(send_pkts)) + # self.logger.info(len(pkts)) + rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=3) + for p in rx: + self.logger.info(p) + self.logger.info(len(p[PVTI].chunks[0].data)) + # p.show() + + # check the middle chunk which is spread across two packets + payload = rx[0][PVTI].chunks[1].data + rx[1][PVTI].chunks[0].data + + pkt1 = IP(payload) + + p0 = pkts[1][IP] + p0.ttl = p0.ttl - 1 + + pkt0 = IP(p0.build()) + self.assertEqual(pkt0, pkt1) + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def test_0006_pvti_send_mix_4pkt_medium_mtu(self): + """TX: small+big+small packets over 600 mtu: 4 -> 3""" + self.prepare_for_test("6pkt_small_mtu", underlay_mtu=600) + + send_pkts = self.create_packets(self.pg1, count=4, size=500) + pkts = copy.deepcopy(send_pkts) + # self.logger.info(len(pkts)) + rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=3) + for p in rx: + self.logger.info(p) + self.logger.info(len(p[PVTI].chunks[0].data)) + # p.show() + + # check the middle chunk which is spread across two packets + payload = rx[0][PVTI].chunks[1].data + rx[1][PVTI].chunks[0].data + + pkt1 = IP(payload) + + p0 = pkts[1][IP] + p0.ttl = p0.ttl - 1 + + pkt0 = IP(p0.build()) + self.assertEqual(pkt0, pkt1) + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def test_0007_pvti_send_simple_1_3_pkt(self): + """TX: Simple packet: 1 -> 3, small mtu""" + + self.prepare_for_test("1_3_pkt_simple", underlay_mtu=520) + send_pkts = self.create_packets(self.pg1, count=1, size=1400) + pkts = copy.deepcopy(send_pkts) + + rx = self.send_and_expect(self.pg1, pkts, self.pg0, n_rx=3) + for p in rx: + self.logger.info(p) + + c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx) + self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def test_0008_pvti_chained_1_3_pkt(self): + """TX: Chained packet: 2700 byte 1 -> 3, mtu 1000""" + + self.prepare_for_test("1_3_pkt_simple", underlay_mtu=1000) + send_pkts = self.create_packets(self.pg1, count=1, size=2700) + pkts = copy.deepcopy(send_pkts) + + pkt0 = Ether(raw(pkts[0]))[IP] + + rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=3) + for p in rx: + self.logger.info(p) + + p0 = pkts[0][IP] + p0.ttl = p0.ttl - 1 + pkt0 = IP(p0.build()) + + payload = ( + rx[0][PVTI].chunks[0].data + + rx[1][PVTI].chunks[0].data + + rx[2][PVTI].chunks[0].data + # + rx[2][PVTI].chunks[1].data + ) + pkt1 = IP(payload) + + self.assertEqual(pkt0, pkt1) + + # FIXME: this will fail because the send path + # does not combine the data from two chained blocks. + # when this succeeds, the above checks in this testcase will need to be redone + # c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx) + # self.assertEqual(c_pkts, py_pkts) + + self.cleanup_after_test() + + def test_1001_pvti_rx_simple_1pkt(self): + """RX: Simple packet: 1 -> 32""" + + self.prepare_for_test("1pkt_rx_simple") + pkts = self.create_rx_packets(self.pg1, rx_intf=self.pg0) + self.logger.info(self.vapi.cli("show pvti interface")) + self.logger.info(self.vapi.cli("show udp ports")) + + recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=32) + for p in recv_pkts: + self.logger.info(p) + + self.cleanup_after_test() + + def test_1002_pvti_rx_big_1buf(self): + """RX: Orig Big packet, single buf: 2 -> 1""" + + self.prepare_for_test("1buf_rx_big") + + pkts_orig = self.create_packets(self.pg1, count=1, size=1900, for_rx=True) + pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0) + self.logger.info(self.vapi.cli("show pvti interface")) + self.logger.info(self.vapi.cli("show udp ports")) + + known_good_pkts = self.decrement_ttl_and_build(pkts_orig) + + recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=1) + for i, p in enumerate(recv_pkts): + self.logger.info(p) + self.assertEqual(p[IP], known_good_pkts[i][IP]) + + self.cleanup_after_test() + + def test_1003_pvti_rx_big_2buf(self): + """RX: Very Big packet, chained buf: 3 -> 1""" + + self.prepare_for_test("2buf_rx_big") + + pkts_orig = self.create_packets(self.pg1, count=1, size=3000, for_rx=True) + + pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0) + self.logger.info(self.vapi.cli("show pvti interface")) + self.logger.info(self.vapi.cli("show udp ports")) + + known_good_pkts = self.decrement_ttl_and_build(pkts_orig) + + recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=1) + for i, p in enumerate(recv_pkts): + self.logger.info(p) + if p[IP] != known_good_pkts[i][IP]: + p[IP].show() + known_good_pkts[i][IP].show() + self.assertEqual(p[IP], known_good_pkts[i][IP]) + + self.cleanup_after_test() + + def test_1004_pvti_rx_big_2buf_and_small(self): + """RX: Very Big packet, chained buf: 3 -> 1 + small pkt""" + + self.prepare_for_test("2buf_rx_big_and_small") + + pkts_orig = self.create_packets(self.pg1, count=2, size=3000, for_rx=True) + + pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0) + self.logger.info(self.vapi.cli("show pvti interface")) + self.logger.info(self.vapi.cli("show udp ports")) + + known_good_pkts = self.decrement_ttl_and_build(pkts_orig) + + recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=2) + for i, p in enumerate(recv_pkts): + self.logger.info(p) + if p[IP] != known_good_pkts[i][IP]: + p[IP].show() + known_good_pkts[i][IP].show() + self.assertEqual(p[IP], known_good_pkts[i][IP]) + + self.cleanup_after_test() + + def test_1005_pvti_rx_big_2buf_and_small_drop(self): + """RX: Very Big packet, chained buf: 3 -> 1 + small pkt, encap pkt lost""" + + self.prepare_for_test("2buf_rx_big_and_small_drop") + + pkts_orig = self.create_packets(self.pg1, count=3, size=3000, for_rx=True) + + pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0) + # drop the second packet after encapsulation (the one with the second frag of the large packet) + pkts.pop(1) + self.logger.info(self.vapi.cli("show pvti interface")) + self.logger.info(self.vapi.cli("show udp ports")) + + known_good_pkts = self.decrement_ttl_and_build(pkts_orig) + + # drop the large original packet, leaving just two small ones + known_good_pkts.pop(1) + + recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=2) + for i, p in enumerate(recv_pkts): + self.logger.info(p) + if p[IP] != known_good_pkts[i][IP]: + p[IP].show() + known_good_pkts[i][IP].show() + self.assertEqual(p[IP], known_good_pkts[i][IP]) + + self.cleanup_after_test() + + def test_1006_pvti_rx_big_2buf_and_small_drop2(self): + """RX: Very Big packet, chained buf: 3 -> 1 + small pkt, non-initial frag pkt lost""" + + self.prepare_for_test("2buf_rx_big_and_small_drop2") + + pkts_orig = self.create_packets(self.pg1, count=3, size=6000, for_rx=True) + + pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0) + # drop the second packet after encapsulation (the one with the second frag of the large packet) + pkts.pop(2) + self.logger.info(self.vapi.cli("show pvti interface")) + self.logger.info(self.vapi.cli("show udp ports")) + + known_good_pkts = self.decrement_ttl_and_build(pkts_orig) + # drop the large original packet, leaving just two small ones + known_good_pkts.pop(1) + + recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=2) + for i, p in enumerate(recv_pkts): + self.logger.info(p) + if p[IP] != known_good_pkts[i][IP]: + p[IP].show() + known_good_pkts[i][IP].show() + self.assertEqual(p[IP], known_good_pkts[i][IP]) + + self.cleanup_after_test() + + +class PvtiHandoffTests(TestPvti): + """Pvti Tests in multi worker setup""" + + vpp_worker_count = 2 + + def xtest_wg_peer_init(self): + """Handoff""" + + port = 12383 + + # Create interfaces + wg0 = VppWgInterface(self, self.pg1.local_ip4, port).add_vpp_config() + wg0.admin_up() + wg0.config_ip4() + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + peer_1 = VppWgPeer( + self, wg0, self.pg1.remote_ip4, port + 1, ["10.11.2.0/24", "10.11.3.0/24"] + ).add_vpp_config() + self.assertEqual(len(self.vapi.wireguard_peers_dump()), 1) + + r1 = VppIpRoute( + self, "10.11.3.0", 24, [VppRoutePath("10.11.3.1", wg0.sw_if_index)] + ).add_vpp_config() + + # skip the first automatic handshake + self.pg1.get_capture(1, timeout=HANDSHAKE_JITTER) + + # send a valid handsake init for which we expect a response + p = peer_1.mk_handshake(self.pg1) + + rx = self.send_and_expect(self.pg1, [p], self.pg1) + + peer_1.consume_response(rx[0]) + + # send a data packet from the peer through the tunnel + # this completes the handshake and pins the peer to worker 0 + p = ( + IP(src="10.11.3.1", dst=self.pg0.remote_ip4, ttl=20) + / UDP(sport=222, dport=223) + / Raw() + ) + d = peer_1.encrypt_transport(p) + p = peer_1.mk_tunnel_header(self.pg1) / ( + Pvti(message_type=4, reserved_zero=0) + / PvtiTransport( + receiver_index=peer_1.sender, counter=0, encrypted_encapsulated_packet=d + ) + ) + rxs = self.send_and_expect(self.pg1, [p], self.pg0, worker=0) + + for rx in rxs: + self.assertEqual(rx[IP].dst, self.pg0.remote_ip4) + self.assertEqual(rx[IP].ttl, 19) + + # send a packets that are routed into the tunnel + # and pins the peer tp worker 1 + pe = ( + Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) + / IP(src=self.pg0.remote_ip4, dst="10.11.3.2") + / UDP(sport=555, dport=556) + / Raw(b"\x00" * 80) + ) + rxs = self.send_and_expect(self.pg0, pe * 255, self.pg1, worker=1) + peer_1.validate_encapped(rxs, pe) + + # send packets into the tunnel, from the other worker + p = [ + ( + peer_1.mk_tunnel_header(self.pg1) + / Pvti(message_type=4, reserved_zero=0) + / PvtiTransport( + receiver_index=peer_1.sender, + counter=ii + 1, + encrypted_encapsulated_packet=peer_1.encrypt_transport( + ( + IP(src="10.11.3.1", dst=self.pg0.remote_ip4, ttl=20) + / UDP(sport=222, dport=223) + / Raw() + ) + ), + ) + ) + for ii in range(255) + ] + + rxs = self.send_and_expect(self.pg1, p, self.pg0, worker=1) + + for rx in rxs: + self.assertEqual(rx[IP].dst, self.pg0.remote_ip4) + self.assertEqual(rx[IP].ttl, 19) + + # send a packets that are routed into the tunnel + # from worker 0 + rxs = self.send_and_expect(self.pg0, pe * 255, self.pg1, worker=0) + + peer_1.validate_encapped(rxs, pe) + + r1.remove_vpp_config() + peer_1.remove_vpp_config() + wg0.remove_vpp_config() |