summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS5
-rw-r--r--docs/spelling_wordlist.txt3
-rw-r--r--src/plugins/pvti/CMakeLists.txt40
-rw-r--r--src/plugins/pvti/FEATURE.yaml8
-rw-r--r--src/plugins/pvti/api.c137
-rw-r--r--src/plugins/pvti/bypass-main.c79
-rw-r--r--src/plugins/pvti/bypass.c202
-rw-r--r--src/plugins/pvti/bypass.h53
-rw-r--r--src/plugins/pvti/input-main.c115
-rw-r--r--src/plugins/pvti/input.c496
-rw-r--r--src/plugins/pvti/input.h87
-rw-r--r--src/plugins/pvti/output-main.c85
-rw-r--r--src/plugins/pvti/output.c543
-rw-r--r--src/plugins/pvti/output.h75
-rw-r--r--src/plugins/pvti/pvti.api111
-rw-r--r--src/plugins/pvti/pvti.c481
-rw-r--r--src/plugins/pvti/pvti.h257
-rw-r--r--src/plugins/pvti/pvti_if.c376
-rw-r--r--src/plugins/pvti/pvti_if.h47
-rw-r--r--test/test_pvti.py1153
20 files changed, 4353 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index c6dbd8bc903..d6a9b1adf8f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -847,6 +847,11 @@ I: tracenode
M: Maxime Peim <mpeim@cisco.com>
F: src/plugins/tracenode
+Plugin - Packet Vector Tunnel Interface
+I: pvti
+M: Andrew Yourtchenko <ayourtch@gmail.com>
+F: src/plugins/pvti
+
cJSON
I: cjson
M: Ole Troan <ot@cisco.com>
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
index 8de7bf4ee24..d6c5b97793e 100644
--- a/docs/spelling_wordlist.txt
+++ b/docs/spelling_wordlist.txt
@@ -903,6 +903,9 @@ pthreads
pton
pushingapatch
putatively
+pvti
+PVTI
+Pvti
pwait
py
pypi
diff --git a/src/plugins/pvti/CMakeLists.txt b/src/plugins/pvti/CMakeLists.txt
new file mode 100644
index 00000000000..900b662d54a
--- /dev/null
+++ b/src/plugins/pvti/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(pvti
+ SOURCES
+ pvti_if.c
+ pvti.c
+ input.h
+ input.c
+ input-main.c
+ output.h
+ output.c
+ output-main.c
+ bypass.h
+ bypass.c
+ bypass-main.c
+ api.c
+ pvti.h
+
+ MULTIARCH_SOURCES
+ input.c
+ output.c
+ bypass.c
+
+ API_FILES
+ pvti.api
+
+ # API_TEST_SOURCES
+ # pvti_test.c
+)
diff --git a/src/plugins/pvti/FEATURE.yaml b/src/plugins/pvti/FEATURE.yaml
new file mode 100644
index 00000000000..52dbe5b7c1b
--- /dev/null
+++ b/src/plugins/pvti/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: Packet Vector Tunnel
+maintainer: Andrew Yourtchenko <ayourtch@gmail.com>
+features:
+ - support inner MTU up to ~8K over standard 1280..1500 MTU substrate
+description: "Large MTU Tunnels"
+state: development
+properties: [API, CLI]
diff --git a/src/plugins/pvti/api.c b/src/plugins/pvti/api.c
new file mode 100644
index 00000000000..cda39ad44e8
--- /dev/null
+++ b/src/plugins/pvti/api.c
@@ -0,0 +1,137 @@
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/format_fns.h>
+#include <vnet/ip/ip_types_api.h>
+#include <vlibapi/api.h>
+
+#include <pvti/pvti.api_enum.h>
+#include <pvti/pvti.api_types.h>
+
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+#define REPLY_MSG_ID_BASE pvm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+typedef struct
+{
+ vl_api_registration_t *reg;
+ u32 context;
+} pvti_if_details_ctx_t;
+
+typedef struct
+{
+
+} pvti_interface_dump_ctx_t;
+
+static walk_rc_t
+pvti_if_send_details (index_t pvtii, void *data)
+{
+ vl_api_pvti_interface_details_t *rmp;
+ pvti_if_details_ctx_t *ctx = data;
+ const pvti_if_t *pvi;
+
+ pvi = pvti_if_get (pvtii);
+
+ rmp = vl_msg_api_alloc_zero (sizeof (*rmp));
+ rmp->_vl_msg_id =
+ htons (VL_API_PVTI_INTERFACE_DETAILS + pvti_main.msg_id_base);
+
+ rmp->interface.sw_if_index = htonl (pvi->sw_if_index);
+ rmp->interface.local_port = htons (pvi->local_port);
+ rmp->interface.remote_port = htons (pvi->remote_port);
+ rmp->interface.underlay_mtu = htons (pvi->underlay_mtu);
+
+ ip_address_encode2 (&pvi->local_ip, &rmp->interface.local_ip);
+ ip_address_encode2 (&pvi->remote_ip, &rmp->interface.remote_ip);
+
+ rmp->context = ctx->context;
+
+ vl_api_send_msg (ctx->reg, (u8 *) rmp);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_pvti_interface_dump_t_handler (vl_api_pvti_interface_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ // pvti_main_t *pvm = &pvti_main;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (reg == 0)
+ return;
+
+ pvti_if_details_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ if (sw_if_index == ~0)
+ pvti_if_walk (pvti_if_send_details, &ctx);
+ else
+ {
+ index_t pvtii = pvti_if_find_by_sw_if_index (sw_if_index);
+ if (pvtii != INDEX_INVALID)
+ pvti_if_send_details (pvtii, &ctx);
+ }
+}
+
+static void
+vl_api_pvti_interface_create_t_handler (vl_api_pvti_interface_create_t *mp)
+{
+ vl_api_pvti_interface_create_reply_t *rmp;
+ pvti_main_t *pvm = &pvti_main;
+ int rv = ~0;
+ u32 sw_if_index = ~0;
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+
+ ip_address_decode2 (&mp->interface.local_ip, &local_ip);
+ ip_address_decode2 (&mp->interface.remote_ip, &remote_ip);
+ u16 lport = clib_host_to_net_u16 (mp->interface.local_port);
+ u16 rport = clib_host_to_net_u16 (mp->interface.remote_port);
+ u16 underlay_mtu = clib_host_to_net_u16 (mp->interface.underlay_mtu);
+ u32 underlay_fib_index =
+ clib_host_to_net_u32 (mp->interface.underlay_fib_index);
+ pvti_peer_address_method_t peer_address_method =
+ mp->interface.peer_address_from_payload ? PVTI_PEER_ADDRESS_FROM_PAYLOAD :
+ PVTI_PEER_ADDRESS_FIXED;
+
+ if (underlay_mtu == 0)
+ {
+ underlay_mtu = 1500;
+ }
+
+ rv =
+ pvti_if_create (&local_ip, lport, &remote_ip, rport, peer_address_method,
+ underlay_mtu, underlay_fib_index, &sw_if_index);
+
+ REPLY_MACRO2 (VL_API_PVTI_INTERFACE_CREATE_REPLY,
+ { rmp->sw_if_index = htonl (sw_if_index); });
+}
+
+static void
+vl_api_pvti_interface_delete_t_handler (vl_api_pvti_interface_delete_t *mp)
+{
+ vl_api_pvti_interface_delete_reply_t *rmp;
+ pvti_main_t *pvm = &pvti_main;
+ int rv = 0;
+
+ rv = pvti_if_delete (ntohl (mp->sw_if_index));
+ REPLY_MACRO (VL_API_PVTI_INTERFACE_DELETE_REPLY);
+}
+
+/* API definitions */
+#include <pvti/pvti.api.c>
+
+void
+pvti_api_init ()
+{
+ pvti_main_t *pvm = &pvti_main;
+ /* Add our API messages to the global name_crc hash table */
+ pvm->msg_id_base = setup_message_id_table ();
+}
diff --git a/src/plugins/pvti/bypass-main.c b/src/plugins/pvti/bypass-main.c
new file mode 100644
index 00000000000..db79ccd2113
--- /dev/null
+++ b/src/plugins/pvti/bypass-main.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pvti/bypass.h>
+
+/* packet trace format function */
+static u8 *
+format_pvti_bypass_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pvti_bypass_trace_t *t = va_arg (*args, pvti_bypass_trace_t *);
+
+ s = format (s, "PVTI-BYPASS: sw_if_index %d, next index %d\n",
+ t->sw_if_index, t->next_index);
+ s = format (s, " src %U sport %d dport %d\n", format_ip_address,
+ &t->remote_ip, t->remote_port, t->local_port);
+ s = format (s, " seq: %d", t->seq);
+ return s;
+}
+
+vlib_node_registration_t pvti4_bypass_node;
+vlib_node_registration_t pvti6_bypass_node;
+
+static char *pvti_bypass_error_strings[] = {
+#define _(sym, string) string,
+ foreach_pvti_bypass_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (pvti4_bypass_node) =
+{
+ .name = "ip4-pvti-bypass",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_bypass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_bypass_error_strings),
+ .error_strings = pvti_bypass_error_strings,
+
+ .n_next_nodes = PVTI_BYPASS_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_BYPASS_NEXT_DROP] = "error-drop",
+ [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti4-input",
+ },
+
+};
+
+VLIB_REGISTER_NODE (pvti6_bypass_node) =
+{
+ .name = "ip6-pvti-bypass",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_bypass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_bypass_error_strings),
+ .error_strings = pvti_bypass_error_strings,
+
+ .n_next_nodes = PVTI_BYPASS_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_BYPASS_NEXT_DROP] = "error-drop",
+ [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti6-input",
+ },
+
+};
diff --git a/src/plugins/pvti/bypass.c b/src/plugins/pvti/bypass.c
new file mode 100644
index 00000000000..14c976439eb
--- /dev/null
+++ b/src/plugins/pvti/bypass.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+#include <pvti/bypass.h>
+
+always_inline u16
+pvti_bypass_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_ip6)
+{
+ u32 n_left_from, *from, *to_next;
+ pvti_bypass_next_t next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ u32 pkts_processed = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 sw_if_index0 = 0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ udp_header_t *udp0;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ u8 error0, good_udp0, proto0;
+ i32 len_diff0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* setup the packet for the next feature */
+ vnet_feature_next (&next0, b0);
+
+ if (is_ip6)
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ }
+ else
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ }
+
+ if (is_ip6)
+ {
+ proto0 = ip60->protocol;
+ }
+ else
+ {
+ /* Treat IP frag packets as "experimental" protocol for now */
+ proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
+ }
+
+ /* Process packet 0 */
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit; /* not UDP packet */
+
+ if (is_ip6)
+ udp0 = ip6_next_header (ip60);
+ else
+ udp0 = ip4_next_header (ip40);
+
+ /* look up the destination ip and port */
+ u32 pvti_index0 = INDEX_INVALID;
+ if (is_ip6)
+ {
+ pvti_index0 = pvti_if_find_by_remote_ip6_and_port (
+ &ip60->src_address, clib_net_to_host_u16 (udp0->src_port));
+ }
+ else
+ {
+ pvti_index0 = pvti_if_find_by_remote_ip4_and_port (
+ &ip40->src_address, clib_net_to_host_u16 (udp0->src_port));
+ }
+ if (pvti_index0 == INDEX_INVALID)
+ goto exit;
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum.
+ */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip6)
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if (is_ip6)
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ else
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ if (is_ip6)
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ? PVTI_BYPASS_NEXT_DROP : PVTI_BYPASS_NEXT_PVTI_INPUT;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* pvtiX-input node expect current at PVTI header */
+ if (is_ip6)
+ vlib_buffer_advance (b0, sizeof (ip6_header_t) +
+ sizeof (udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof (ip4_header_t) +
+ sizeof (udp_header_t));
+ exit:
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_bypass_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->seq = 0; // clib_net_to_host_u32 (pvti0->seq);
+ if (is_ip6)
+ {
+ }
+ else
+ {
+ t->remote_ip.ip.ip4 = ip40->src_address;
+ t->remote_ip.version = AF_IP4;
+ }
+ // t->local_port = h0->udp.dst_port;
+ // t->remote_port = h0->udp.src_port;
+ }
+
+ pkts_processed += 1;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_BYPASS_ERROR_PROCESSED, pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (pvti4_bypass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_bypass_node_common (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (pvti6_bypass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_bypass_node_common (vm, node, frame, 1);
+}
diff --git a/src/plugins/pvti/bypass.h b/src/plugins/pvti/bypass.h
new file mode 100644
index 00000000000..611d5770ad3
--- /dev/null
+++ b/src/plugins/pvti/bypass.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_bypass_h__
+#define __included_pvti_bypass_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u32 seq;
+} pvti_bypass_trace_t;
+
+#define foreach_pvti_bypass_error \
+ _ (PROCESSED, "PVTI bypass tunnel packets processed")
+
+typedef enum
+{
+#define _(sym, str) PVTI_BYPASS_ERROR_##sym,
+ foreach_pvti_bypass_error
+#undef _
+ PVTI_BYPASS_N_ERROR,
+} pvti_bypass_error_t;
+
+typedef enum
+{
+ PVTI_BYPASS_NEXT_DROP,
+ PVTI_BYPASS_NEXT_PVTI_INPUT,
+ PVTI_BYPASS_N_NEXT,
+} pvti_bypass_next_t;
+
+#endif // pvti_bypass_h
diff --git a/src/plugins/pvti/input-main.c b/src/plugins/pvti/input-main.c
new file mode 100644
index 00000000000..8ab8b18dd7c
--- /dev/null
+++ b/src/plugins/pvti/input-main.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pvti/input.h>
+
+static char *pvti_input_error_strings[] = {
+#define _(sym, string) string,
+ foreach_pvti_input_error
+#undef _
+};
+
+#define _(f, s) s,
+static char *pvti_input_trace_type_names[] = { foreach_pvti_input_trace_type };
+#undef _
+
+static char *
+get_pvti_trace_type_name (u8 ptype)
+{
+ if (ptype < PVTI_INPUT_TRACE_N_TYPES)
+ {
+ return pvti_input_trace_type_names[ptype];
+ }
+ else
+ {
+ return "unknown";
+ }
+}
+
+/* packet trace format function */
+static u8 *
+format_pvti_input_trace (u8 *s, va_list *args)
+{
+ int i;
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pvti_input_trace_t *t = va_arg (*args, pvti_input_trace_t *);
+
+ u32 indent = format_get_indent (s);
+
+ s = format (s,
+ "PVTI-IN: sw_if_index %d, next index %d, trace_type: %s(%d), "
+ "chunkcnt: %d\n",
+ t->sw_if_index, t->next_index,
+ get_pvti_trace_type_name (t->trace_type), t->trace_type,
+ t->chunk_count);
+ s = format (s, " src %U sport %d dport %d\n", format_ip_address,
+ &t->remote_ip, t->remote_port, t->local_port);
+ s = format (s, " seq: %d, chunk_count: %d\n", t->seq, t->chunk_count);
+ u16 max = t->chunk_count > MAX_CHUNKS ? MAX_CHUNKS : t->chunk_count;
+ for (i = 0; i < max; i++)
+ {
+ s = format (s, " %02d: sz %d\n", i, t->chunks[i].total_chunk_length);
+ }
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_ip_adjacency_packet_data, t->packet_data,
+ sizeof (t->packet_data));
+
+ return s;
+}
+
+vlib_node_registration_t pvti4_input_node;
+vlib_node_registration_t pvti6_input_node;
+
+VLIB_REGISTER_NODE (pvti4_input_node) =
+{
+ .name = "pvti4-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_input_error_strings),
+ .error_strings = pvti_input_error_strings,
+
+ .n_next_nodes = PVTI_INPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_INPUT_NEXT_DROP] = "error-drop",
+ [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input",
+ [PVTI_INPUT_NEXT_PUNT] = "error-punt",
+ },
+
+};
+VLIB_REGISTER_NODE (pvti6_input_node) =
+{
+ .name = "pvti6-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_input_error_strings),
+ .error_strings = pvti_input_error_strings,
+
+ .n_next_nodes = PVTI_INPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_INPUT_NEXT_DROP] = "error-drop",
+ [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input",
+ [PVTI_INPUT_NEXT_PUNT] = "error-punt",
+ },
+
+};
diff --git a/src/plugins/pvti/input.c b/src/plugins/pvti/input.c
new file mode 100644
index 00000000000..6a8806e2795
--- /dev/null
+++ b/src/plugins/pvti/input.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+#include <pvti/input.h>
+
+always_inline void
+pvti_enqueue_rx_bi_to_next_and_trace (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, u32 bi0,
+ u16 next0)
+{
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next0, b0,
+ /* follow_chain */ 0)))
+ {
+ pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->trace_type = PVTI_INPUT_TRACE_decap;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ vec_add1 (ptd->pending_rx_buffers, bi0);
+ vec_add1 (ptd->pending_rx_nexts, next0);
+}
+
+always_inline pvti_rx_peer_t *
+pvti_try_find_or_create_rx_peer (pvti_per_thread_data_t *ptd,
+ vlib_buffer_t *b0, bool is_ip6)
+{
+ pvti_rx_peer_t *peer;
+
+ ip_address_t remote_ip = { 0 };
+ u16 remote_port;
+ if (is_ip6)
+ {
+ pvti_ip6_encap_header_t *h0 =
+ ((pvti_ip6_encap_header_t *) vlib_buffer_get_current (b0)) - 1;
+ ip_address_set (&remote_ip, &h0->ip6.src_address, AF_IP6);
+ remote_port = clib_net_to_host_u16 (h0->udp.src_port);
+ }
+ else
+ {
+ pvti_ip4_encap_header_t *h0 =
+ ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1;
+ ip_address_set (&remote_ip, &h0->ip4.src_address, AF_IP4);
+ remote_port = clib_net_to_host_u16 (h0->udp.src_port);
+ }
+
+ pool_foreach (peer, ptd->rx_peers)
+ {
+ if (peer->remote_port == remote_port &&
+ 0 == ip_address_cmp (&remote_ip, &peer->remote_ip))
+ {
+ if (peer->deleted)
+ {
+ // The peer has been marked as deleted - wipe it.
+ clib_memset (peer, 0xca, sizeof (*peer));
+ pool_put (ptd->rx_peers, peer);
+ continue;
+ }
+ return peer;
+ }
+ }
+
+ index_t pvti_if_index0 =
+ pvti_if_find_by_remote_ip_and_port (&remote_ip, remote_port);
+ if (INDEX_INVALID == pvti_if_index0)
+ {
+ // no suitable interface found, bail
+ return 0;
+ }
+ pvti_if_t *pvti_if0 = pvti_if_get (pvti_if_index0);
+
+ pvti_rx_peer_t new_peer = {
+ .local_ip = pvti_if0->local_ip,
+ .local_port = pvti_if0->local_port,
+ .remote_ip = remote_ip,
+ .remote_port = remote_port,
+ .pvti_if_index = pvti_if_index0,
+ .rx_streams = { { 0 } },
+ };
+ pvti_rx_peer_t *rx_new_peer;
+ pool_get (ptd->rx_peers, rx_new_peer);
+ *rx_new_peer = new_peer;
+
+ int i;
+ for (i = 0; i < MAX_RX_STREAMS; i++)
+ {
+ rx_new_peer->rx_streams[i].rx_bi0 = INDEX_INVALID;
+ rx_new_peer->rx_streams[i].rx_bi0_first = INDEX_INVALID;
+ rx_new_peer->rx_streams[i].rx_next0 = 0;
+ }
+
+ return rx_new_peer;
+}
+
+always_inline u16
+pvti_input_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_ip6)
+{
+ u32 n_left_from, *from;
+ pvti_chunk_header_t *chunks[MAX_CHUNKS];
+ u32 pkts_processed = 0;
+ u32 pkts_decapsulated = 0;
+ u32 decap_failed_no_buffers = 0;
+
+ pvti_main_t *pvm = &pvti_main;
+
+ u32 thread_index = vlib_get_thread_index ();
+ pvti_per_thread_data_t *ptd =
+ vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = PVTI_INPUT_NEXT_DROP;
+ u32 sw_if_index0;
+ u8 true_chunk_count = 0;
+ u8 max_chunk_count;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ pvti_ip4_encap_header_t *h0 =
+ ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1;
+ pvti_rx_peer_t *pvti_rx_peer0 =
+ pvti_try_find_or_create_rx_peer (ptd, b0, is_ip6);
+ if (!pvti_rx_peer0)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_PEER];
+ goto drop_and_maybe_trace;
+ }
+
+ b0 = vlib_get_buffer (vm, bi0);
+ pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0);
+ u8 stream_index = pvti0->stream_index;
+ max_chunk_count =
+ pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS;
+ u16 pvti_packet_header_sz0 =
+ pvti0->pad_bytes + offsetof (pvti_packet_header_t, pad);
+ if (b0->current_length < pvti_packet_header_sz0)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_PACKET_TOO_SHORT];
+ goto drop_and_maybe_trace;
+ }
+ vlib_buffer_advance (b0, pvti_packet_header_sz0);
+
+ if (max_chunk_count == 0)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_NOCHUNKS];
+ goto drop_and_maybe_trace;
+ }
+ if (pvti0->reass_chunk_count > max_chunk_count)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_TOOMANYREASS];
+ goto drop_and_maybe_trace;
+ }
+ pvti_per_rx_stream_data_t *rx_stream0 =
+ &pvti_rx_peer0->rx_streams[stream_index];
+
+ u32 new_seq0 = clib_net_to_host_u32 (pvti0->seq);
+ if (new_seq0 == rx_stream0->last_rx_seq + 1)
+ {
+ /* Sequence# matches, we can attempt adding the leading chunks to
+ * reassembly */
+ rx_stream0->last_rx_seq = new_seq0;
+
+ while ((b0->current_length > 0) &&
+ true_chunk_count < pvti0->reass_chunk_count)
+ {
+ /* attempt to either incorporate the first chunk into
+ * reassembly or skip it. */
+ pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0);
+ const u16 chunk_payload_length =
+ clib_net_to_host_u16 (pvc0->total_chunk_length) -
+ sizeof (*pvc0);
+ vlib_buffer_advance (b0, sizeof (*pvc0));
+
+ if (rx_stream0->rx_bi0 == INDEX_INVALID)
+ {
+ clib_warning (
+ "RX internal error: not-first chunk but no wip block");
+ }
+ else
+ {
+
+ vlib_buffer_t *rb0 =
+ vlib_get_buffer (vm, rx_stream0->rx_bi0);
+ u16 allowed_length =
+ PVTI_RX_MAX_LENGTH - rb0->current_length;
+ if (allowed_length > chunk_payload_length)
+ {
+ // simple case - there is space in the buffer to fit
+ // the whole chunk
+ void *tail =
+ vlib_buffer_put_uninit (rb0, chunk_payload_length);
+ clib_memcpy (tail, vlib_buffer_get_current (b0),
+ chunk_payload_length);
+ }
+ else
+ {
+ // The current chunk can not fit - need to make two
+ // copies, one into the current buffer, and one into
+ // a newly allocated chained buffer.
+ void *tail =
+ vlib_buffer_put_uninit (rb0, allowed_length);
+ clib_memcpy (tail, vlib_buffer_get_current (b0),
+ allowed_length);
+ u16 remaining_payload_length =
+ chunk_payload_length - allowed_length;
+ u32 nrbi0 = pvti_get_new_buffer (vm);
+ if (INDEX_INVALID == nrbi0)
+ {
+ ASSERT (0); // FIXME what the recovery is
+ // supposed to look like ?
+ }
+ else
+ {
+ // link up the new buffer and copy the remainder
+ // there
+ vlib_buffer_t *nrb0 = vlib_get_buffer (vm, nrbi0);
+ rb0->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ rb0->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ rb0->next_buffer = nrbi0;
+ rx_stream0->rx_bi0 = nrbi0;
+ void *tail = vlib_buffer_put_uninit (
+ nrb0, remaining_payload_length);
+ clib_memcpy (tail,
+ vlib_buffer_get_current (b0) +
+ allowed_length,
+ remaining_payload_length);
+ }
+ }
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_received_inner_length += chunk_payload_length;
+ if (pvti_rx_peer0->rx_streams[stream_index]
+ .rx_received_inner_length ==
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_expected_inner_length)
+ {
+ next0 = rx_stream0->rx_next0;
+ pvti_enqueue_rx_bi_to_next_and_trace (
+ vm, node, ptd, rx_stream0->rx_bi0_first, next0);
+ pkts_decapsulated += 1;
+
+ // clean out the current reassemly state
+ rx_stream0->rx_bi0 = INDEX_INVALID;
+ rx_stream0->rx_bi0_first = INDEX_INVALID;
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_received_inner_length = 0;
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_expected_inner_length = 0;
+ rx_stream0->rx_next0 = 0;
+ }
+ }
+ chunks[true_chunk_count] = pvc0;
+ true_chunk_count += 1;
+ vlib_buffer_advance (b0, chunk_payload_length);
+ }
+ }
+ else
+ {
+ /* Sequence does not match, skip the reassembly chunks and reset
+ * the reassembly state */
+
+ while ((b0->current_length > 0) &&
+ true_chunk_count < pvti0->reass_chunk_count)
+ {
+ /* skip the reassembly chunks */
+ pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0);
+ chunks[true_chunk_count] = pvc0;
+ true_chunk_count += 1;
+ vlib_buffer_advance (
+ b0, clib_net_to_host_u16 (pvc0->total_chunk_length));
+ }
+ // FIXME: discard the current reassembly state, reset the seq#
+ if (rx_stream0->rx_bi0_first != INDEX_INVALID)
+ {
+ clib_warning ("RX PVTI: discard chunk being reassembled");
+ vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first);
+ rx_stream0->rx_bi0 = INDEX_INVALID;
+ rx_stream0->rx_bi0_first = INDEX_INVALID;
+ rx_stream0->rx_received_inner_length = 0;
+ rx_stream0->rx_expected_inner_length = 0;
+ rx_stream0->rx_next0 = 0;
+ }
+ }
+
+ while ((b0->current_length > 0) && true_chunk_count < max_chunk_count)
+ {
+ if (b0->current_length < sizeof (pvti_chunk_header_t))
+ {
+ clib_warning ("RX ERR: length too short for a chunk");
+ break;
+ }
+ pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0);
+ chunks[true_chunk_count] = pvc0;
+ true_chunk_count += 1;
+ u16 total_chunk_length =
+ clib_net_to_host_u16 (pvc0->total_chunk_length);
+ if (b0->current_length < total_chunk_length)
+ {
+ clib_warning ("RX ERR: length 0x%x too big for a chunk",
+ true_chunk_count);
+ break;
+ }
+ u8 *pkt = (u8 *) (pvc0 + 1);
+ u16 inner_length;
+ if (rx_stream0->rx_bi0_first != INDEX_INVALID)
+ {
+ vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first);
+ rx_stream0->rx_bi0 = INDEX_INVALID;
+ rx_stream0->rx_bi0_first = INDEX_INVALID;
+ rx_stream0->rx_received_inner_length = 0;
+ rx_stream0->rx_expected_inner_length = 0;
+ rx_stream0->rx_next0 = 0;
+ }
+
+ switch (*pkt & 0xf0)
+ {
+ case 0x40:
+ next0 = PVTI_INPUT_NEXT_IP4_INPUT;
+ inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 2)));
+ break;
+ case 0x60:
+ next0 = PVTI_INPUT_NEXT_IP6_INPUT;
+ inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 4))) +
+ sizeof (ip6_header_t);
+ break;
+ default:
+ next0 = PVTI_INPUT_NEXT_DROP;
+ vlib_buffer_advance (b0, total_chunk_length);
+ continue;
+ }
+ vlib_buffer_advance (b0, sizeof (pvti_chunk_header_t));
+
+ if (inner_length + sizeof (pvti_chunk_header_t) > total_chunk_length)
+ {
+ /* FIXME: the packet size is larger than the chunk -> it's a
+ * first fragment */
+ // enqueue the chunk and finish packet processing.
+ // There must be no active reassembly.
+ ASSERT (rx_stream0->rx_bi0_first == INDEX_INVALID);
+ rx_stream0->rx_next0 = next0;
+ rx_stream0->rx_bi0 = bi0;
+ rx_stream0->rx_bi0_first = bi0;
+ rx_stream0->rx_expected_inner_length = inner_length;
+ rx_stream0->rx_received_inner_length =
+ total_chunk_length - sizeof (pvti_chunk_header_t);
+ rx_stream0->last_rx_seq = new_seq0;
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = ~0;
+ t->trace_type = PVTI_INPUT_TRACE_enqueue;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ goto continue_outer;
+ }
+
+ u32 nbi0 = pvti_get_new_buffer (vm);
+ if (INDEX_INVALID == nbi0)
+ {
+ decap_failed_no_buffers += 1;
+ continue;
+ };
+ vlib_buffer_t *nb0 = vlib_get_buffer (vm, nbi0);
+ pvti_if_t *pvti_if0 = pvti_if_get (pvti_rx_peer0->pvti_if_index);
+ vnet_buffer (nb0)->sw_if_index[VLIB_RX] = pvti_if0->sw_if_index;
+ void *new_packet = vlib_buffer_put_uninit (nb0, inner_length);
+ clib_memcpy (new_packet, pvc0 + 1, inner_length);
+ vlib_buffer_advance (b0, inner_length);
+
+ pvti_enqueue_rx_bi_to_next_and_trace (vm, node, ptd, nbi0, next0);
+ pkts_decapsulated += 1;
+ }
+ /* we have processed all the chunks from the buffer, but the buffer
+ * remains. Free it. */
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = ~0;
+ t->trace_type = PVTI_INPUT_TRACE_free;
+ t->seq = clib_net_to_host_u32 (pvti0->seq);
+ t->chunk_count = pvti0->chunk_count;
+ u8 chunk_count =
+ pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS;
+ for (int i = 0; i < chunk_count; i++)
+ {
+ t->chunks[i].total_chunk_length =
+ clib_net_to_host_u16 (chunks[i]->total_chunk_length);
+ }
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ vlib_buffer_free_one (vm, bi0);
+
+ continue_outer:
+ pkts_processed += 1;
+ continue;
+
+ drop_and_maybe_trace:
+ next0 = PVTI_INPUT_NEXT_DROP;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ int i;
+ pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->trace_type = PVTI_INPUT_TRACE_drop;
+ t->next_index = next0;
+ t->remote_ip.ip.ip4 = h0->ip4.src_address;
+ t->remote_ip.version = AF_IP4;
+ t->local_port = h0->udp.dst_port;
+ t->remote_port = h0->udp.src_port;
+ if (!pvti_rx_peer0)
+ {
+ t->seq = 0xdeaddead;
+ }
+ else
+ {
+ t->seq = clib_net_to_host_u32 (pvti0->seq);
+ t->chunk_count = pvti0->chunk_count;
+ u8 chunk_count = pvti0->chunk_count < MAX_CHUNKS ?
+ pvti0->chunk_count :
+ MAX_CHUNKS;
+ for (i = 0; i < chunk_count; i++)
+ {
+ t->chunks[i].total_chunk_length =
+ clib_net_to_host_u16 (chunks[i]->total_chunk_length);
+ }
+ }
+ }
+
+ pkts_processed += 1;
+ vec_add1 (ptd->pending_rx_buffers, bi0);
+ vec_add1 (ptd->pending_rx_nexts, next0);
+ }
+
+ vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_rx_buffers,
+ &ptd->pending_rx_nexts,
+ vec_len (ptd->pending_rx_nexts));
+ vec_reset_length (ptd->pending_rx_buffers);
+ vec_reset_length (ptd->pending_rx_nexts);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_INPUT_ERROR_PROCESSED, pkts_processed);
+ vlib_node_increment_counter (
+ vm, node->node_index, PVTI_INPUT_ERROR_DECAPSULATED, pkts_decapsulated);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_INPUT_ERROR_NO_BUFFERS,
+ decap_failed_no_buffers);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (pvti4_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_input_node_common (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (pvti6_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_input_node_common (vm, node, frame, 1);
+}
diff --git a/src/plugins/pvti/input.h b/src/plugins/pvti/input.h
new file mode 100644
index 00000000000..02a186cde05
--- /dev/null
+++ b/src/plugins/pvti/input.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_input_h__
+#define __included_pvti_input_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+typedef struct
+{
+ u16 total_chunk_length;
+} pvti_input_chunk_t;
+
+#define MAX_CHUNKS 32
+#define PVTI_RX_MAX_LENGTH 2048
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u32 seq;
+ pvti_input_chunk_t chunks[MAX_CHUNKS];
+ u8 chunk_count;
+ u8 trace_type;
+ u8 packet_data[64];
+} pvti_input_trace_t;
+
+#define foreach_pvti_input_trace_type \
+ _ (drop, "drop") \
+ _ (decap, "decapsulate") \
+ _ (free, "free") \
+ _ (enqueue, "enqueue")
+
+typedef enum
+{
+#define _(f, s) PVTI_INPUT_TRACE_##f,
+ foreach_pvti_input_trace_type
+#undef _
+ PVTI_INPUT_TRACE_N_TYPES,
+} pvti_input_trace_type_t;
+
+#define foreach_pvti_input_error \
+ _ (PROCESSED, "PVTI tunneled packets processed") \
+ _ (DECAPSULATED, "PVTI inner packets decapsulated") \
+ _ (PEER, "Could not find a peer") \
+ _ (NOCHUNKS, "Packet has no chunks") \
+ _ (NO_BUFFERS, "No buffers available to decapsulate") \
+ _ (TOOMANYREASS, "Packet has more reassembly chunks than total") \
+ _ (PACKET_TOO_SHORT, "Packet too short")
+
+typedef enum
+{
+#define _(sym, str) PVTI_INPUT_ERROR_##sym,
+ foreach_pvti_input_error
+#undef _
+ PVTI_INPUT_N_ERROR,
+} pvti_input_error_t;
+
+typedef enum
+{
+ PVTI_INPUT_NEXT_DROP,
+ PVTI_INPUT_NEXT_IP4_INPUT,
+ PVTI_INPUT_NEXT_IP6_INPUT,
+ PVTI_INPUT_NEXT_PUNT,
+ PVTI_INPUT_N_NEXT,
+} pvti_input_next_t;
+
+#endif // pvti_input_h
diff --git a/src/plugins/pvti/output-main.c b/src/plugins/pvti/output-main.c
new file mode 100644
index 00000000000..ae4ae5f8e98
--- /dev/null
+++ b/src/plugins/pvti/output-main.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pvti/output.h>
+
+/* packet trace format function */
+static u8 *
+format_pvti_output_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pvti_output_trace_t *t = va_arg (*args, pvti_output_trace_t *);
+
+ u32 indent = format_get_indent (s);
+ s =
+ format (s, "PVTI-OUT(%d): sw_if_index %d, next index %d, underlay_mtu %d,",
+ t->trace_type, t->sw_if_index, t->next_index, t->underlay_mtu);
+ s = format (s, "\n%U stream_index %d, bi0_max_current_length %d, tx_seq %d",
+ format_white_space, indent, t->stream_index,
+ t->bi0_max_current_length, t->tx_seq);
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_ip_adjacency_packet_data, t->packet_data,
+ sizeof (t->packet_data));
+
+ return s;
+}
+
+vlib_node_registration_t pvti_output_node;
+
+static char *pvti_output_error_strings[] = {
+#define _(sym, string) string,
+ foreach_pvti_output_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (pvti4_output_node) =
+{
+ .name = "pvti4-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(pvti_output_error_strings),
+ .error_strings = pvti_output_error_strings,
+
+ .n_next_nodes = PVTI_OUTPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_OUTPUT_NEXT_DROP] = "error-drop",
+ [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
+ [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ },
+
+};
+VLIB_REGISTER_NODE (pvti6_output_node) =
+{
+ .name = "pvti6-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(pvti_output_error_strings),
+ .error_strings = pvti_output_error_strings,
+
+ .n_next_nodes = PVTI_OUTPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_OUTPUT_NEXT_DROP] = "error-drop",
+ [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
+ [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ },
+
+};
diff --git a/src/plugins/pvti/output.c b/src/plugins/pvti/output.c
new file mode 100644
index 00000000000..1939c6f585a
--- /dev/null
+++ b/src/plugins/pvti/output.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+#include <pvti/output.h>
+
+static_always_inline u32
+ip6_vtcfl (u8 stream_index)
+{
+ u32 vtcfl = 0x6 << 28;
+ vtcfl |= stream_index;
+
+ return (clib_host_to_net_u32 (vtcfl));
+}
+
+always_inline vlib_buffer_t *
+pvti_alloc_new_tx_buffer (vlib_main_t *vm)
+{
+ u32 bi0 = INDEX_INVALID;
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ return 0;
+ }
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ b0->current_data = 0;
+ b0->current_length = 0;
+ return b0;
+}
+
+always_inline bool
+pvti_find_or_try_create_tx_peer (vlib_main_t *vm, pvti_per_thread_data_t *ptd,
+ pvti_if_t *pvti_if0, ip_address_t *remote_ip,
+ u16 remote_port, u32 *out_index)
+{
+
+ pvti_tx_peer_t *peer;
+ pool_foreach (peer, ptd->tx_peers)
+ {
+ if (peer->remote_port == remote_port &&
+ 0 == ip_address_cmp (remote_ip, &peer->remote_ip))
+ {
+ if (peer->deleted)
+ {
+ // Bad luck, the peer has been deleted.
+ u32 boi0 = vlib_get_buffer_index (vm, peer->bo0);
+ if (peer->bo0)
+ {
+ vlib_buffer_free (vm, &boi0, 1);
+ }
+ clib_memset (peer, 0xca, sizeof (*peer));
+ pool_put (ptd->tx_peers, peer);
+ continue;
+ }
+ *out_index = peer - ptd->tx_peers;
+ return 1;
+ }
+ }
+
+ ip_address_family_t dst_ver = ip_addr_version (&pvti_if0->remote_ip);
+
+ u16 pvti_encap_overhead = (dst_ver == AF_IP6) ?
+ sizeof (pvti_ip6_encap_header_t) :
+ sizeof (pvti_ip4_encap_header_t);
+
+ u16 pvti_packet_overhead =
+ pvti_encap_overhead + sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES;
+
+ ASSERT (pvti_if0->underlay_mtu > pvti_packet_overhead);
+
+ u32 bo0_max_current_length = pvti_if0->underlay_mtu - pvti_packet_overhead;
+
+ vlib_buffer_t *bo0 = pvti_alloc_new_tx_buffer (vm);
+
+ if (!bo0)
+ {
+ return 0;
+ }
+
+ pvti_tx_peer_t new_peer = {
+ .local_ip = pvti_if0->local_ip,
+ .remote_ip = *remote_ip,
+ .local_port = pvti_if0->local_port,
+ .remote_port = remote_port,
+ .underlay_mtu = pvti_if0->underlay_mtu,
+ .underlay_fib_index = pvti_if0->underlay_fib_index,
+ .bo0_max_current_length = bo0_max_current_length,
+ .pvti_if_index = pvti_if_get_index (pvti_if0),
+ .deleted = 0,
+ .bo0 = bo0,
+ .chunk_count = 0,
+ .reass_chunk_count = 0,
+ .current_tx_seq = 42,
+ };
+
+ pvti_tx_peer_t *tx_new_peer;
+ pool_get (ptd->tx_peers, tx_new_peer);
+
+ *tx_new_peer = new_peer;
+ *out_index = tx_new_peer - ptd->tx_peers;
+ return 1;
+}
+
+always_inline bool
+pvti_try_get_tx_peer_index (vlib_main_t *vm, pvti_per_thread_data_t *ptd,
+ pvti_if_t *pvti_if0, vlib_buffer_t *b0,
+ bool is_ip6, u32 *out_index)
+{
+ if (pvti_if0->peer_address_from_payload)
+ {
+ ip_address_t remote_ip = { 0 };
+ if (is_ip6)
+ {
+ ip6_header_t *ip6 = vlib_buffer_get_current (b0);
+ ip_address_set (&remote_ip, &ip6->dst_address, AF_IP6);
+ }
+ else
+ {
+ ip4_header_t *ip4 = vlib_buffer_get_current (b0);
+ ip_address_set (&remote_ip, &ip4->dst_address, AF_IP4);
+ }
+ return pvti_find_or_try_create_tx_peer (
+ vm, ptd, pvti_if0, &remote_ip, pvti_if0->remote_port, out_index);
+ }
+ else
+ {
+ return pvti_find_or_try_create_tx_peer (
+ vm, ptd, pvti_if0, &pvti_if0->remote_ip, pvti_if0->remote_port,
+ out_index);
+ }
+ /* not reached */
+}
+
+always_inline void
+pvti_finalize_chunk (pvti_tx_peer_t *tx_peer,
+ pvti_chunk_header_t *chunk_header, u8 *tail,
+ bool is_reassembly_chunk)
+{
+ clib_memset (chunk_header, 0xab, sizeof (pvti_chunk_header_t));
+ chunk_header->total_chunk_length =
+ clib_host_to_net_u16 (tail - (u8 *) chunk_header);
+ tx_peer->chunk_count++;
+ if (is_reassembly_chunk)
+ {
+ tx_peer->reass_chunk_count++;
+ }
+}
+
+always_inline pvti_output_next_t
+encap_pvti_buffer_ip46 (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_tx_peer_t *tx_peer, int is_ip6)
+{
+ ip_address_family_t src_ver = ip_addr_version (&tx_peer->local_ip);
+ ip_address_family_t dst_ver = ip_addr_version (&tx_peer->remote_ip);
+ u8 stream_index = 0;
+
+ ASSERT (src_ver == dst_ver);
+ bool is_ip6_encap = (AF_IP6 == src_ver);
+
+ vlib_buffer_t *b0 = tx_peer->bo0;
+ vlib_buffer_advance (b0,
+ -(sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES));
+
+ pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0);
+ clib_memset (pvti0, 0xca, sizeof (*pvti0) + PVTI_ALIGN_BYTES);
+ pvti0->pad_bytes = PVTI_ALIGN_BYTES;
+
+ pvti0->seq = clib_host_to_net_u32 (tx_peer->current_tx_seq);
+ pvti0->stream_index = stream_index;
+ pvti0->reass_chunk_count = tx_peer->reass_chunk_count;
+ pvti0->chunk_count = tx_peer->chunk_count;
+ pvti0->mandatory_flags_mask = 0;
+ pvti0->flags_value = 0;
+
+ if (is_ip6_encap)
+ {
+ vlib_buffer_advance (b0, -(sizeof (pvti_ip6_encap_header_t)));
+ if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE)
+ {
+ // undo the change
+ vlib_buffer_advance (b0, (sizeof (pvti_ip6_encap_header_t)));
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE];
+ return PVTI_OUTPUT_NEXT_DROP;
+ }
+ pvti_ip6_encap_header_t *ve = vlib_buffer_get_current (b0);
+
+ ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port);
+ ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port);
+ ve->udp.length = clib_host_to_net_u16 (
+ b0->current_length - offsetof (pvti_ip6_encap_header_t, udp));
+ ve->udp.checksum = 0;
+
+ ve->ip6.ip_version_traffic_class_and_flow_label =
+ ip6_vtcfl (stream_index);
+ ve->ip6.payload_length = ve->udp.length;
+ ve->ip6.protocol = 17;
+ ve->ip6.hop_limit = 128;
+ ip_address_copy_addr (&ve->ip6.src_address, &tx_peer->local_ip);
+ ip_address_copy_addr (&ve->ip6.dst_address, &tx_peer->remote_ip);
+ }
+ else
+ {
+ vlib_buffer_advance (b0, -(sizeof (pvti_ip4_encap_header_t)));
+ if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE)
+ {
+ // undo the change
+ vlib_buffer_advance (b0, (sizeof (pvti_ip4_encap_header_t)));
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE];
+ return PVTI_OUTPUT_NEXT_DROP;
+ }
+ pvti_ip4_encap_header_t *ve = vlib_buffer_get_current (b0);
+
+ ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port);
+ ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port);
+ ve->udp.length = clib_host_to_net_u16 (
+ b0->current_length - offsetof (pvti_ip4_encap_header_t, udp));
+ ve->udp.checksum = 0;
+
+ ve->ip4.ip_version_and_header_length = 0x45;
+ ve->ip4.tos = 0;
+ ve->ip4.length = clib_host_to_net_u16 (b0->current_length);
+ ve->ip4.fragment_id =
+ clib_host_to_net_u16 (tx_peer->current_tx_seq & 0xffff);
+ ve->ip4.flags_and_fragment_offset = 0;
+ ve->ip4.ttl = 128;
+ ve->ip4.protocol = 17;
+
+ ve->ip4.dst_address.as_u32 = ip_addr_v4 (&tx_peer->remote_ip).data_u32;
+ ve->ip4.src_address.as_u32 = ip_addr_v4 (&tx_peer->local_ip).data_u32;
+ ve->ip4.checksum = ip4_header_checksum (&ve->ip4);
+ }
+
+ // This is important, if not reset, causes a crash
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = tx_peer->underlay_fib_index;
+
+ // vnet_buffer (b0)->oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
+ return is_ip6_encap ? PVTI_OUTPUT_NEXT_IP6_LOOKUP :
+ PVTI_OUTPUT_NEXT_IP4_LOOKUP;
+}
+
+always_inline void
+pvti_enqueue_tx_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, vlib_buffer_t *b0,
+ u16 next0, u8 stream_index, pvti_tx_peer_t *tx_peer)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ tx_peer->is_bo0_traced))
+ {
+ if (PREDICT_TRUE (
+ vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0)))
+ {
+
+ pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->underlay_mtu = tx_peer->underlay_mtu;
+ t->stream_index = stream_index;
+ t->trace_type = 1;
+ t->bi0_max_current_length = tx_peer->bo0_max_current_length;
+ t->tx_seq = tx_peer->current_tx_seq;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ }
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+ vec_add1 (ptd->pending_tx_buffers, bi0);
+ vec_add1 (ptd->pending_tx_nexts, next0);
+}
+
+always_inline void
+pvti_enqueue_tx_drop_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, vlib_buffer_t *b0,
+ u8 stream_index)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = PVTI_OUTPUT_NEXT_DROP;
+ t->stream_index = stream_index;
+ t->trace_type = 0;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+ vec_add1 (ptd->pending_tx_buffers, bi0);
+ vec_add1 (ptd->pending_tx_nexts, PVTI_OUTPUT_NEXT_DROP);
+}
+
+always_inline bool
+pvti_flush_peer_and_recharge (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, u32 tx_peer_index,
+ u8 stream_index, const bool is_ip6)
+{
+ pvti_tx_peer_t *tx_peer = pool_elt_at_index (ptd->tx_peers, tx_peer_index);
+ u16 next0 = encap_pvti_buffer_ip46 (vm, node, tx_peer, is_ip6);
+
+ pvti_enqueue_tx_and_trace (vm, node, ptd, tx_peer->bo0, next0, stream_index,
+ tx_peer);
+
+ tx_peer->bo0 = pvti_alloc_new_tx_buffer (vm);
+ tx_peer->reass_chunk_count = 0;
+ tx_peer->chunk_count = 0;
+ tx_peer->current_tx_seq++;
+
+ return 1;
+}
+
+always_inline u16
+pvti_output_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, const bool is_ip6)
+{
+ pvti_main_t *pvm = &pvti_main;
+
+ u32 n_left_from, *from;
+ u32 pkts_encapsulated = 0;
+ u32 pkts_processed = 0;
+ u32 pkts_chopped = 0;
+ u32 pkts_overflow = 0;
+ u32 pkts_overflow_cantfit = 0;
+
+ bool is_node_traced = (node->flags & VLIB_NODE_FLAG_TRACE) ? 1 : 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ u8 stream_index = pvti_get_stream_index (is_ip6);
+
+ u32 thread_index = vlib_get_thread_index ();
+ pvti_per_thread_data_t *ptd =
+ vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index);
+
+ vlib_buffer_t *ibufs[VLIB_FRAME_SIZE], **ib = ibufs;
+
+ vlib_get_buffers (vm, from, ibufs, n_left_from);
+
+ n_left_from = frame->n_vectors;
+ while (1 && n_left_from > 0)
+ {
+ n_left_from -= 1;
+ vlib_buffer_t *b0 = ib[0];
+ ib++;
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+ bool is_b0_traced =
+ is_node_traced && ((b0->flags & VLIB_BUFFER_IS_TRACED) ? 1 : 0);
+ pkts_processed += 1;
+
+ u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ u32 pvti_index0 = pvti_if_find_by_sw_if_index (sw_if_index0);
+ if (pvti_index0 == INDEX_INVALID)
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_PEER];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index);
+ continue;
+ }
+ pvti_if_t *pvti_if0 = pvti_if_get (pvti_index0);
+ u32 tx_peer_index;
+ if (!pvti_try_get_tx_peer_index (vm, ptd, pvti_if0, b0, is_ip6,
+ &tx_peer_index))
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_MAKE_PEER];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index);
+ continue;
+ }
+ pvti_tx_peer_t *tx_peer = &ptd->tx_peers[tx_peer_index];
+
+ u32 b0_len = vlib_buffer_length_in_chain (vm, b0);
+ u32 total_chunk_len = sizeof (pvti_chunk_header_t) + b0_len;
+
+ if (tx_peer->bo0_max_current_length >=
+ tx_peer->bo0->current_length + total_chunk_len)
+ {
+ /* Happy case, we can fit the entire new chunk */
+ pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit (
+ tx_peer->bo0, sizeof (pvti_chunk_header_t));
+ u8 *tail = vlib_buffer_put_uninit (tx_peer->bo0, b0_len);
+ vlib_buffer_t *b0_curr;
+ b0_curr = b0;
+ while (b0_len > 0)
+ {
+ clib_memcpy (tail, vlib_buffer_get_current (b0_curr),
+ b0_curr->current_length);
+ tail += b0_curr->current_length;
+ b0_len -= b0_curr->current_length;
+ ASSERT ((b0_len == 0) ||
+ (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT));
+ if (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer);
+ }
+ }
+ tx_peer->is_bo0_traced |= is_b0_traced;
+ pvti_finalize_chunk (tx_peer, chunk_header, tail, false);
+ }
+ else
+ {
+ bool is_reassembly = false;
+ /* FIXME: here, flush a packet if we want to avoid fragmenting it */
+#define PVTI_TINY_PACKET_SZ 20
+ int threshold_len =
+ sizeof (pvti_chunk_header_t) + PVTI_TINY_PACKET_SZ;
+
+ /* Can we fit anything meaningful into bo0 ? if not - flush */
+ if (tx_peer->bo0_max_current_length <=
+ tx_peer->bo0->current_length + threshold_len)
+ {
+ if (!pvti_flush_peer_and_recharge (vm, node, ptd, tx_peer_index,
+ stream_index, is_ip6))
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE0];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0,
+ stream_index);
+ continue;
+ }
+ pkts_encapsulated += 1;
+ }
+
+ pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit (
+ tx_peer->bo0, sizeof (pvti_chunk_header_t));
+
+ u8 *tail;
+ vlib_buffer_t *b0_curr;
+ /* append the chained buffers and flush as necessary */
+ b0_curr = b0;
+
+ int curr_b0_start_offset = 0;
+
+ while (b0_len > 0)
+ {
+ ASSERT (tx_peer->bo0_max_current_length >
+ tx_peer->bo0->current_length);
+ int copy_len =
+ clib_min (b0_curr->current_length - curr_b0_start_offset,
+ tx_peer->bo0_max_current_length -
+ tx_peer->bo0->current_length);
+ tail = vlib_buffer_put_uninit (tx_peer->bo0, copy_len);
+ clib_memcpy (tail,
+ (u8 *) vlib_buffer_get_current (b0_curr) +
+ curr_b0_start_offset,
+ copy_len);
+ tail += copy_len;
+ b0_len -= copy_len;
+ // Advance the start offset or reset it if we copied the entire
+ // block
+ curr_b0_start_offset =
+ curr_b0_start_offset + copy_len == b0_curr->current_length ?
+ 0 :
+ curr_b0_start_offset + copy_len;
+ ASSERT ((b0_len == 0) || (curr_b0_start_offset > 0) ||
+ (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT));
+ if (curr_b0_start_offset > 0)
+ {
+ pvti_finalize_chunk (tx_peer, chunk_header, tail,
+ is_reassembly);
+ tx_peer->is_bo0_traced |= is_b0_traced;
+ if (!pvti_flush_peer_and_recharge (
+ vm, node, ptd, tx_peer_index, stream_index, is_ip6))
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE1];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0,
+ stream_index);
+ continue;
+ }
+ pkts_encapsulated += 1;
+ /* next chunk(s) will be reassembly until the next block */
+ is_reassembly = true;
+ chunk_header = vlib_buffer_put_uninit (
+ tx_peer->bo0, sizeof (pvti_chunk_header_t));
+ }
+ else
+ {
+ if ((b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer);
+ }
+ else
+ {
+ pvti_finalize_chunk (tx_peer, chunk_header, tail,
+ is_reassembly);
+ tx_peer->is_bo0_traced |= is_b0_traced;
+ }
+ }
+ }
+ }
+ vlib_buffer_free_one (vm, bi0);
+ }
+
+ int i;
+ for (i = 0; i < vec_len (ptd->tx_peers); i++)
+ {
+ if (ptd->tx_peers[i].chunk_count)
+ {
+ pvti_flush_peer_and_recharge (vm, node, ptd, i, stream_index,
+ is_ip6);
+ pkts_encapsulated += 1;
+ }
+ }
+
+ vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_tx_buffers,
+ &ptd->pending_tx_nexts,
+ vec_len (ptd->pending_tx_nexts));
+ vec_reset_length (ptd->pending_tx_buffers);
+ vec_reset_length (ptd->pending_tx_nexts);
+
+ vlib_node_increment_counter (
+ vm, node->node_index, PVTI_OUTPUT_ERROR_ENCAPSULATED, pkts_encapsulated);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_OUTPUT_ERROR_PROCESSED, pkts_processed);
+ vlib_node_increment_counter (vm, node->node_index, PVTI_OUTPUT_ERROR_CHOPPED,
+ pkts_chopped);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_OUTPUT_ERROR_OVERFLOW, pkts_overflow);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_OUTPUT_ERROR_OVERFLOW_CANTFIT,
+ pkts_overflow_cantfit);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (pvti4_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_output_node_common (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (pvti6_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_output_node_common (vm, node, frame, 1);
+}
diff --git a/src/plugins/pvti/output.h b/src/plugins/pvti/output.h
new file mode 100644
index 00000000000..95e78ba9720
--- /dev/null
+++ b/src/plugins/pvti/output.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_output_h__
+#define __included_pvti_output_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 tx_seq;
+ u16 underlay_mtu;
+ u16 bi0_max_current_length;
+ u8 stream_index;
+ u8 trace_type;
+ u8 packet_data[96];
+} pvti_output_trace_t;
+
+#define foreach_pvti_output_error \
+ _ (NONE, "No error") \
+ _ (PROCESSED, "Packets processed") \
+ _ (ENCAPSULATED, "Packets encapsulated") \
+ _ (PEER, "No peer found") \
+ _ (MAKE_PEER, "Could not make peer") \
+ _ (RECHARGE0, "Could not recharge 0") \
+ _ (RECHARGE1, "Could not recharge 1") \
+ _ (NO_PRE_SPACE, "Not enought pre-data space") \
+ _ (CHOPPED, "Packets chopped") \
+ _ (OVERFLOW, "Packets overflowed") \
+ _ (OVERFLOW_CANTFIT, "Packets overflowed and cant fit excess")
+
+typedef enum
+{
+#define _(sym, str) PVTI_OUTPUT_ERROR_##sym,
+ foreach_pvti_output_error
+#undef _
+ PVTI_OUTPUT_N_ERROR,
+} pvti_output_error_t;
+
+typedef enum
+{
+ PVTI_INDEPENDENT_CHUNK = 0,
+ PVTI_REASS_CHUNK,
+} pvti_chunk_type_t;
+
+#define MAX_CURR_LEN_UNKNOWN 0xffff
+
+typedef enum
+{
+ PVTI_OUTPUT_NEXT_DROP,
+ PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT,
+ PVTI_OUTPUT_NEXT_IP4_LOOKUP,
+ PVTI_OUTPUT_NEXT_IP6_LOOKUP,
+ PVTI_OUTPUT_N_NEXT,
+} pvti_output_next_t;
+
+#endif // pvti_output_h
diff --git a/src/plugins/pvti/pvti.api b/src/plugins/pvti/pvti.api
new file mode 100644
index 00000000000..859ed1ab6b0
--- /dev/null
+++ b/src/plugins/pvti/pvti.api
@@ -0,0 +1,111 @@
+/*
+ * pvti.api - binary API skeleton
+ *
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file pvti.api
+ * @brief VPP control-plane API messages.
+ *
+ * This file defines VPP control-plane binary API messages which are generally
+ * called through a shared memory interface.
+ */
+
+/* Version and type recitations */
+
+option version = "0.0.1";
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+
+/** \brief A composite type uniquely defining a PVTI tunnel.
+ @param sw_if_index - ignored on create/delete, present in details.
+ @param src_ip - Source IP address
+ @param src_port - Source UDP port
+ @param dst_ip - Destination IP address
+ @param dst_port - Destination UDP port
+ @param underlay_mtu - Underlay MTU for packet splitting/coalescing
+ @param underlay_fib_index - Underlay FIB index to be used after encap
+*/
+typedef pvti_tunnel
+{
+ vl_api_interface_index_t sw_if_index;
+ vl_api_address_t local_ip;
+ u16 local_port;
+ vl_api_address_t remote_ip;
+ bool peer_address_from_payload;
+ u16 remote_port;
+ u16 underlay_mtu;
+ u32 underlay_fib_index;
+};
+
+
+/** @brief API to enable / disable pvti on an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 to enable, 0 to disable the feature
+ @param sw_if_index - interface handle
+*/
+
+define pvti_interface_create
+{
+ option status="in_progress";
+
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+ vl_api_pvti_tunnel_t interface;
+};
+
+define pvti_interface_create_reply
+{
+ option status="in_progress";
+ u32 context;
+ i32 retval;
+
+ /* Index for the newly created interface */
+ vl_api_interface_index_t sw_if_index;
+};
+
+autoreply define pvti_interface_delete {
+ option status="in_progress";
+
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ vl_api_interface_index_t sw_if_index;
+};
+
+
+define pvti_interface_dump
+{
+ option status="in_progress";
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
+define pvti_interface_details
+{
+ option status="in_progress";
+ u32 context;
+ vl_api_pvti_tunnel_t interface;
+};
+
+
diff --git a/src/plugins/pvti/pvti.c b/src/plugins/pvti/pvti.c
new file mode 100644
index 00000000000..524eabc6f3f
--- /dev/null
+++ b/src/plugins/pvti/pvti.c
@@ -0,0 +1,481 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/fib/fib_table.h>
+#include <pvti/pvti.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <stdbool.h>
+
+#include <pvti/pvti.api_enum.h>
+#include <pvti/pvti.api_types.h>
+
+#include <pvti/pvti_if.h>
+
+#define REPLY_MSG_ID_BASE pmp->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+#include <vnet/ip/ip_format_fns.h>
+
+pvti_main_t pvti_main;
+
+u8 *
+format_pvti_tx_peer_ptr (u8 *s, va_list *args)
+{
+ pvti_tx_peer_t *peer = va_arg (*args, pvti_tx_peer_t *);
+
+ s = format (
+ s,
+ "[%p]%s local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d "
+ "pvti_idx:%d b0_max_clen:%d cseq:%d chunk_count:%d reass_chunk_count:%d",
+ peer, peer->deleted ? " DELETED" : "", format_ip46_address,
+ &peer->local_ip, IP46_TYPE_ANY, peer->local_port, format_ip46_address,
+ &peer->remote_ip, IP46_TYPE_ANY, peer->remote_port, peer->underlay_mtu,
+ peer->underlay_fib_index, peer->pvti_if_index,
+ peer->bo0_max_current_length, peer->current_tx_seq, peer->chunk_count,
+ peer->reass_chunk_count);
+
+ return (s);
+}
+
+u8 *
+format_pvti_rx_peer_ptr (u8 *s, va_list *args)
+{
+ pvti_rx_peer_t *peer = va_arg (*args, pvti_rx_peer_t *);
+
+ s = format (s, "[%p]%s local:%U:%d remote:%U:%d pvti_idx:%d", peer,
+ peer->deleted ? " DELETED" : "", format_ip46_address,
+ &peer->local_ip, IP46_TYPE_ANY, peer->local_port,
+ format_ip46_address, &peer->remote_ip, IP46_TYPE_ANY,
+ peer->remote_port, peer->pvti_if_index);
+
+ return (s);
+}
+
+void
+pvti_verify_initialized (pvti_main_t *pvm)
+{
+ if (!pvm->is_initialized)
+ {
+ const int n_threads = vlib_get_n_threads ();
+ vec_validate (pvm->per_thread_data[0], n_threads - 1);
+ vec_validate (pvm->per_thread_data[1], n_threads - 1);
+ pvm->is_initialized = 1;
+ }
+}
+
+void
+vnet_int_pvti_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
+{
+ pvti_main_t *pvm = &pvti_main;
+
+ if (pool_is_free_index (pvm->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return;
+
+ pvti_verify_initialized (pvm);
+
+ is_enable = !!is_enable;
+
+ if (is_ip6)
+ {
+ if (clib_bitmap_get (pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index) !=
+ is_enable)
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-pvti-bypass",
+ sw_if_index, is_enable, 0, 0);
+ pvm->bm_ip6_bypass_enabled_by_sw_if = clib_bitmap_set (
+ pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index, is_enable);
+ }
+ }
+ else
+ {
+ if (clib_bitmap_get (pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index) !=
+ is_enable)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-pvti-bypass",
+ sw_if_index, is_enable, 0, 0);
+ pvm->bm_ip4_bypass_enabled_by_sw_if = clib_bitmap_set (
+ pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index, is_enable);
+ }
+ }
+}
+
+static clib_error_t *
+set_ip_pvti_bypass (u32 is_ip6, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, is_enable;
+
+ sw_if_index = ~0;
+ is_enable = 1;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_enable = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ vnet_int_pvti_bypass_mode (sw_if_index, is_ip6, is_enable);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+set_ip4_pvti_bypass (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ return set_ip_pvti_bypass (0, input, cmd);
+}
+
+VLIB_CLI_COMMAND (set_interface_ip_pvti_bypass_command, static) = {
+ .path = "set interface ip pvti-bypass",
+ .function = set_ip4_pvti_bypass,
+ .short_help = "set interface ip pvti-bypass <interface> [del]",
+};
+
+static clib_error_t *
+set_ip6_pvti_bypass (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ return set_ip_pvti_bypass (1, input, cmd);
+}
+
+VLIB_CLI_COMMAND (set_interface_ip6_pvti_bypass_command, static) = {
+ .path = "set interface ip6 pvti-bypass",
+ .function = set_ip6_pvti_bypass,
+ .short_help = "set interface ip6 pvti-bypass <interface> [del]",
+};
+
+static clib_error_t *
+pvti_interface_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+
+ // pvti_main_t * pmp = &pvti_main;
+ u32 sw_if_index = ~0;
+ int rv = 0;
+ ip_address_t peer_ip = { 0 };
+ ip_address_t local_ip = { 0 };
+ u32 peer_port = 0;
+ u32 local_port = 12345;
+ u32 underlay_mtu = 1500;
+ u32 underlay_fib_index = ~0;
+ u32 underlay_table_id = ~0;
+ pvti_peer_address_method_t peer_address_method = PVTI_PEER_ADDRESS_FIXED;
+ bool peer_set = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "peer %U %d %d", unformat_ip_address, &peer_ip,
+ &peer_port, &local_port))
+ {
+ peer_set = 1;
+ }
+ else if (unformat (line_input, "underlay-mtu %d", &underlay_mtu))
+ {
+ // MTU set
+ }
+ else if (unformat (line_input, "local-ip %U", unformat_ip_address,
+ &local_ip))
+ {
+ // local IP set
+ }
+ else if (unformat (line_input, "underlay-fib %d", &underlay_fib_index))
+ {
+ // underlay fib set
+ }
+ else if (unformat (line_input, "peer-address-from-payload"))
+ {
+ peer_address_method = PVTI_PEER_ADDRESS_FROM_PAYLOAD;
+ }
+ else if (unformat (line_input, "underlay-table %d", &underlay_table_id))
+ {
+ fib_protocol_t fib_proto = FIB_PROTOCOL_IP4;
+ if (peer_ip.version == AF_IP6)
+ {
+ fib_proto = FIB_PROTOCOL_IP6;
+ }
+ u32 fib_index = fib_table_find (fib_proto, underlay_table_id);
+
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d",
+ underlay_table_id);
+ goto done;
+ }
+ underlay_fib_index = fib_index;
+ }
+ else
+ break;
+ }
+ if (!peer_set)
+ {
+ error = clib_error_return (0, "Please specify a peer...");
+ goto done;
+ }
+
+ rv = pvti_if_create (&local_ip, local_port, &peer_ip, peer_port,
+ peer_address_method, underlay_mtu, underlay_fib_index,
+ &sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ error = clib_error_return (0, "Invalid interface");
+ break;
+
+ default:
+ error = clib_error_return (0, "pvti_if_create returned %d", rv);
+ }
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+static clib_error_t *
+pvti_interface_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ // pvti_main_t * pmp = &pvti_main;
+ u32 sw_if_index = ~0;
+ int rv = 0;
+ bool if_index_set = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "if-index %d", &sw_if_index))
+ {
+ if_index_set = 1;
+ }
+ else
+ break;
+ }
+ if (!if_index_set)
+ return clib_error_return (0, "Please specify a sw_if_index...");
+
+ rv = pvti_if_delete (sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "Invalid interface");
+ break;
+
+ default:
+ return clib_error_return (0, "pvti_if_delete returned %d", rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (pvti_interface_create_command, static) = {
+ .path = "pvti interface create",
+ .short_help =
+ "pvti interface create peer <remote-ip> <remote-port> <local-port> [ "
+ "local-ip <ip-addr> ][ underlay-mtu <MTU>][underlay-table "
+ "<table-index>][inderlay-fib <fib-index>]",
+ .function = pvti_interface_create_command_fn,
+};
+
+VLIB_CLI_COMMAND (pvti_interface_delete_command, static) = {
+ .path = "pvti interface delete",
+ .short_help = "pvti interface delete if-index <sw-ifindex>",
+ .function = pvti_interface_delete_command_fn,
+};
+
+static clib_error_t *
+pvti_show_interface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ pvti_if_t *pvti_if;
+ vec_foreach (pvti_if, pvti_main.if_pool)
+ {
+ int index = pvti_if - pvti_main.if_pool;
+ vlib_cli_output (vm, "%U", format_pvti_if, index);
+ };
+ return 0;
+}
+
+static clib_error_t *
+pvti_show_tx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ pvti_per_thread_data_t *ptd;
+ int is_ip6;
+ for (is_ip6 = 0; is_ip6 <= 1; is_ip6++)
+ {
+ vec_foreach (ptd, pvti_main.per_thread_data[is_ip6])
+ {
+ vlib_cli_output (vm, "thread %d (%s)",
+ ptd - pvti_main.per_thread_data[is_ip6],
+ is_ip6 ? "IPv6" : "IPv4");
+ pvti_tx_peer_t *peer;
+ vec_foreach (peer, ptd->tx_peers)
+ {
+ vlib_cli_output (vm, " %U", format_pvti_tx_peer_ptr, peer);
+ }
+ }
+ }
+ return 0;
+}
+
+static clib_error_t *
+pvti_show_rx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ pvti_per_thread_data_t *ptd;
+ int is_ip6;
+ for (is_ip6 = 0; is_ip6 <= 1; is_ip6++)
+ {
+ vec_foreach (ptd, pvti_main.per_thread_data[is_ip6])
+ {
+ vlib_cli_output (vm, "thread %d (%s)",
+ ptd - pvti_main.per_thread_data[is_ip6],
+ is_ip6 ? "IPv6" : "IPv4");
+ pvti_rx_peer_t *peer;
+ vec_foreach (peer, ptd->rx_peers)
+ {
+ vlib_cli_output (vm, " %U", format_pvti_rx_peer_ptr, peer);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (pvti_show_interface_command, static) = {
+ .path = "show pvti interface",
+ .short_help = "show pvti interface",
+ .function = pvti_show_interface_command_fn,
+};
+
+VLIB_CLI_COMMAND (pvti_show_tx_peers_command, static) = {
+ .path = "show pvti tx peers",
+ .short_help = "show pvti tx peers",
+ .function = pvti_show_tx_peers_command_fn,
+};
+
+VLIB_CLI_COMMAND (pvti_show_rx_peers_command, static) = {
+ .path = "show pvti rx peers",
+ .short_help = "show pvti rx peers",
+ .function = pvti_show_rx_peers_command_fn,
+};
+
+void pvti_api_init ();
+
+VNET_FEATURE_INIT (pvti4_bypass, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-pvti-bypass",
+ .runs_before = 0,
+};
+
+VNET_FEATURE_INIT (pvti6_bypass, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-pvti-bypass",
+ .runs_before = 0,
+};
+
+static clib_error_t *
+pvti_early_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ clib_warning ("early config pvti");
+ u8 *runs_before = 0;
+ int rbi = 0;
+ if (vec_len (vnet_feat_pvti4_bypass.runs_before) == 0)
+ {
+ rbi = 0;
+ }
+ else
+ {
+ rbi = vec_len (vnet_feat_pvti4_bypass.runs_before) - 1;
+ }
+ vec_validate (vnet_feat_pvti4_bypass.runs_before, rbi);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "runs-before %v", &runs_before))
+ {
+ vec_add1 (runs_before, 0);
+ vnet_feat_pvti4_bypass.runs_before[rbi] = (char *) runs_before;
+ vec_add1 (vnet_feat_pvti4_bypass.runs_before, 0);
+ }
+ else
+ return clib_error_return (0, "unknown input");
+ }
+
+ return NULL;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (pvti_early_config, "pvti");
+
+static clib_error_t *
+pvti_init (vlib_main_t *vm)
+{
+ pvti_main_t *pmp = &pvti_main;
+ clib_error_t *error = 0;
+ clib_warning ("pvti init");
+
+ pmp->vlib_main = vm;
+ pmp->vnet_main = vnet_get_main ();
+ pmp->is_initialized = 0;
+
+ pvti_api_init ();
+ return error;
+}
+
+VLIB_INIT_FUNCTION (pvti_init);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Packet Vector Tunnel Interface plugin",
+};
diff --git a/src/plugins/pvti/pvti.h b/src/plugins/pvti/pvti.h
new file mode 100644
index 00000000000..ac097c5ecca
--- /dev/null
+++ b/src/plugins/pvti/pvti.h
@@ -0,0 +1,257 @@
+/*
+ * pvti.h - skeleton vpp engine plug-in header file
+ *
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_h__
+#define __included_pvti_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+
+#define VPP_MAX_THREADS (1 << 8)
+
+#define MAX_RX_STREAMS 256
+
+#define PVTI_ALIGN_BYTES 9
+
+typedef CLIB_PACKED (struct {
+ u32 seq;
+ u8 stream_index; // set to the cpu# on the sending side
+ u8 chunk_count;
+ u8 reass_chunk_count; // number of chunks in the front that are related to
+ // previously started buffer
+ // mandatory_flags_mask highlights which of the flags cause packet drop if
+ // not understood, and which of them can be just ignored.
+ u8 mandatory_flags_mask;
+ u8 flags_value;
+ u8 pad_bytes;
+ u8 pad[0];
+}) pvti_packet_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ udp_header_t udp;
+ // not part of encap header pvti_packet_header_t pv;
+}) pvti_ip4_encap_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6;
+ udp_header_t udp;
+ // not part of encap header pvti_packet_header_t pv;
+}) pvti_ip6_encap_header_t;
+
+typedef CLIB_PACKED (struct {
+ u16 total_chunk_length;
+ // More fragments: this chunk is not the last block fragment
+#define CHUNK_FLAGS_MF (1 << 0)
+ // More blocks: this block has chained blocks that follow
+#define CHUNK_FLAGS_MB (1 << 1)
+ u16 _pad0;
+ u32 _pad1;
+ u8 chunk_data[0];
+}) pvti_chunk_header_t;
+
+typedef struct
+{
+ // a buffer being built from the smaller packets
+ u32 bi0;
+
+ // how big can this buffer grow
+ u32 bi0_max_current_length;
+
+ // how many chunks are already in the buffer
+ u8 chunk_count;
+ // leading reassembly chunk count
+ u8 reass_chunk_count;
+
+ u32 current_tx_seq;
+} pvti_per_tx_stream_data_t;
+
+typedef struct
+{
+ /* The seq# that we last processed */
+ u32 last_rx_seq;
+
+ // a current buffer that is being reassembled
+ u32 rx_bi0;
+ // The root buffer, most of the times == rx_bi0 except in the case of chained
+ // buffers.
+ u32 rx_bi0_first;
+
+ // Next index for dispatch when the reassembly is done
+ u16 rx_next0;
+ // expected totall inner length for the packet
+ u16 rx_expected_inner_length;
+ u16 rx_received_inner_length;
+
+} pvti_per_rx_stream_data_t;
+
+typedef struct
+{
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u16 underlay_mtu;
+ u32 underlay_fib_index;
+
+ u32 pvti_if_index;
+ bool deleted;
+ bool is_bo0_traced;
+
+ u32 bo0_max_current_length;
+
+ u8 chunk_count;
+ u8 reass_chunk_count;
+ u32 current_tx_seq;
+ vlib_buffer_t *bo0;
+
+} pvti_tx_peer_t;
+
+typedef struct
+{
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+
+ pvti_per_rx_stream_data_t rx_streams[MAX_RX_STREAMS];
+
+ u32 pvti_if_index;
+ bool deleted;
+} pvti_rx_peer_t;
+
+typedef struct
+{
+ /* pool of destination-based structures which are used to build the packets
+ */
+ pvti_tx_peer_t *tx_peers;
+
+ /* vector of buffers to send */
+ u32 *pending_tx_buffers;
+ u16 *pending_tx_nexts;
+ /* pool of source-based structures for the remote peers' data tracking
+ */
+ pvti_rx_peer_t *rx_peers;
+
+ /* vector of buffers being decapsulated */
+ u32 *pending_rx_buffers;
+ u16 *pending_rx_nexts;
+
+} pvti_per_thread_data_t;
+
+typedef struct
+{
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u16 underlay_mtu;
+ u32 underlay_fib_index;
+ bool peer_address_from_payload;
+ u64 created_at;
+
+ u32 sw_if_index;
+ u32 hw_if_index;
+
+ // per-stream data for TX
+ pvti_per_tx_stream_data_t tx_streams[256];
+ pvti_per_rx_stream_data_t rx_streams[256];
+
+} pvti_if_t;
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* have we initialized the data structures ? */
+ bool is_initialized;
+
+ /* interface pool */
+ pvti_if_t *if_pool;
+
+ /* if_index in the pool above by sw_if_index */
+ index_t *if_index_by_sw_if_index;
+
+ /* indices by port */
+ index_t **if_indices_by_port;
+
+ /* per-thread data, ip4[0] and ip6[1] */
+ pvti_per_thread_data_t *per_thread_data[2];
+
+ /* on/off switch for the periodic function */
+ u8 periodic_timer_enabled;
+ /* Node index, non-zero if the periodic process has been created */
+ u32 periodic_node_index;
+
+ /* graph node state */
+ uword *bm_ip4_bypass_enabled_by_sw_if;
+ uword *bm_ip6_bypass_enabled_by_sw_if;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ ethernet_main_t *ethernet_main;
+} pvti_main_t;
+
+extern pvti_main_t pvti_main;
+
+extern vlib_node_registration_t pvti_node;
+extern vlib_node_registration_t pvti4_input_node;
+extern vlib_node_registration_t pvti4_output_node;
+extern vlib_node_registration_t pvti6_input_node;
+extern vlib_node_registration_t pvti6_output_node;
+extern vlib_node_registration_t pvti_periodic_node;
+
+always_inline u8
+pvti_get_stream_index (int is_ip6)
+{
+ u32 thread_index = vlib_get_thread_index ();
+
+ ASSERT ((thread_index & 0xffffff80) == 0);
+
+ u8 stream_index = (thread_index & 0x7f) | (is_ip6 ? 0x80 : 0);
+ return stream_index;
+}
+
+/* attempt to get a new buffer */
+always_inline u32
+pvti_get_new_buffer (vlib_main_t *vm)
+{
+ u32 bi0 = INDEX_INVALID;
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ return INDEX_INVALID;
+ }
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ b0->current_data = 0;
+ b0->current_length = 0;
+ return bi0;
+}
+
+/* Periodic function events */
+#define PVTI_EVENT1 1
+#define PVTI_EVENT2 2
+#define PVTI_EVENT_PERIODIC_ENABLE_DISABLE 3
+
+void pvti_create_periodic_process (pvti_main_t *);
+void pvti_verify_initialized (pvti_main_t *pvm);
+
+#endif /* __included_pvti_h__ */
diff --git a/src/plugins/pvti/pvti_if.c b/src/plugins/pvti/pvti_if.c
new file mode 100644
index 00000000000..4f83994a1a4
--- /dev/null
+++ b/src/plugins/pvti/pvti_if.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Copyright (c) 2020 Doc.ai and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/udp/udp.h>
+
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+static u8 *
+format_pvti_if_name (u8 *s, va_list *args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ // wg_if_t *wgi = wg_if_get (dev_instance);
+ return format (s, "pvti%d", dev_instance);
+}
+
+u8 *
+format_pvti_if (u8 *s, va_list *args)
+{
+ index_t pvtii = va_arg (*args, u32);
+ pvti_if_t *pvti_if = pvti_if_get (pvtii);
+
+ s = format (
+ s, "[%d] %U local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d",
+ pvtii, format_vnet_sw_if_index_name, vnet_get_main (),
+ pvti_if->sw_if_index, format_ip46_address, &pvti_if->local_ip,
+ IP46_TYPE_ANY, pvti_if->local_port, format_ip46_address,
+ &pvti_if->remote_ip, IP46_TYPE_ANY, pvti_if->remote_port,
+ pvti_if->underlay_mtu, pvti_if->underlay_fib_index);
+
+ return (s);
+}
+
+index_t
+pvti_if_find_by_sw_if_index (u32 sw_if_index)
+{
+ if (vec_len (pvti_main.if_index_by_sw_if_index) <= sw_if_index)
+ return INDEX_INVALID;
+ u32 ti = pvti_main.if_index_by_sw_if_index[sw_if_index];
+ if (ti == ~0)
+ return INDEX_INVALID;
+
+ return (ti);
+}
+
+index_t
+pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4,
+ u16 remote_port)
+{
+ pvti_if_t *ifc;
+ pool_foreach (ifc, pvti_main.if_pool)
+ {
+ if ((ifc->remote_port == remote_port) &&
+ (ifc->remote_ip.version == AF_IP4) &&
+ ((ifc->remote_ip.ip.ip4.as_u32 == remote_ip4->as_u32) ||
+ ifc->peer_address_from_payload))
+ {
+ return (ifc - pvti_main.if_pool);
+ }
+ }
+ return INDEX_INVALID;
+}
+
+index_t
+pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip6,
+ u16 remote_port)
+{
+ pvti_if_t *ifc;
+ pool_foreach (ifc, pvti_main.if_pool)
+ {
+ if ((ifc->remote_port == remote_port) &&
+ (ifc->remote_ip.version == AF_IP6) &&
+ ((0 == memcmp (&ifc->remote_ip.ip.ip6, remote_ip6,
+ sizeof (*remote_ip6))) ||
+ ifc->peer_address_from_payload))
+ {
+ return (ifc - pvti_main.if_pool);
+ }
+ }
+ return INDEX_INVALID;
+}
+
+index_t
+pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip, u16 remote_port)
+{
+ pvti_if_t *ifc;
+ pool_foreach (ifc, pvti_main.if_pool)
+ {
+ if ((ifc->remote_port == remote_port) &&
+ (ifc->peer_address_from_payload ||
+ (0 == ip_address_cmp (remote_ip, &ifc->remote_ip))))
+ {
+ return (ifc - pvti_main.if_pool);
+ }
+ }
+ return INDEX_INVALID;
+}
+
+static void
+pvti_add_tidx_by_port (index_t t_index, u16 port)
+{
+ pvti_main_t *pvm = &pvti_main;
+ vec_validate_init_empty (pvm->if_indices_by_port, port, NULL);
+ vec_add1 (pvm->if_indices_by_port[port], t_index);
+}
+
+static void
+pvti_del_tidx_by_port (index_t t_index, u16 port)
+{
+ pvti_main_t *pvm = &pvti_main;
+ index_t *ii;
+ if (!pvm->if_indices_by_port)
+ {
+ return;
+ }
+ if (port >= vec_len (pvm->if_indices_by_port))
+ {
+ return;
+ }
+ if (vec_len (pvm->if_indices_by_port[port]) == 0)
+ {
+ ALWAYS_ASSERT (pvm->if_indices_by_port[port] > 0);
+ /* not reached */
+ return;
+ }
+
+ vec_foreach (ii, pvm->if_indices_by_port[port])
+ {
+ if (*ii == t_index)
+ {
+ vec_del1 (pvm->if_indices_by_port[port],
+ pvm->if_indices_by_port[port] - ii);
+ break;
+ }
+ }
+}
+
+static u32
+pvti_get_tunnel_count_by_port (u16 port)
+{
+ pvti_main_t *pvm = &pvti_main;
+ if (!pvm->if_indices_by_port)
+ {
+ return 0;
+ }
+ return vec_len (vec_elt (pvm->if_indices_by_port, port));
+}
+
+static clib_error_t *
+pvti_if_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ // vnet_hw_interface_t *hi;
+ u32 hw_flags;
+
+ // hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hw_flags =
+ (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? VNET_HW_INTERFACE_FLAG_LINK_UP :
+ 0);
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return (NULL);
+}
+
+void
+pvti_if_update_adj (vnet_main_t *vnm, u32 sw_if_index, adj_index_t ai)
+{
+
+ /* Convert any neighbour adjacency that has a next-hop reachable through
+ * the wg interface into a midchain. This is to avoid sending ARP/ND to
+ * resolve the next-hop address via the wg interface. Then, if one of the
+ * peers has matching prefix among allowed prefixes, the midchain will be
+ * updated to the corresponding one.
+ */
+ adj_nbr_midchain_update_rewrite (ai, NULL, NULL, ADJ_FLAG_NONE, NULL);
+
+ // wgii = wg_if_find_by_sw_if_index (sw_if_index);
+ // wg_if_peer_walk (wg_if_get (wgii), wg_peer_if_adj_change, &ai);
+}
+
+VNET_DEVICE_CLASS (pvti_if_device_class) = {
+ .name = "Packet Vectorizer Tunnel",
+ .format_device_name = format_pvti_if_name,
+ .admin_up_down_function = pvti_if_admin_up_down,
+};
+
+VNET_HW_INTERFACE_CLASS (pvti_hw_interface_class) = {
+ .name = "PVTunnel",
+ .update_adjacency = pvti_if_update_adj,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+ // .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
+};
+
+int
+pvti_if_create (ip_address_t *local_ip, u16 local_port,
+ ip_address_t *remote_ip, u16 remote_port,
+ pvti_peer_address_method_t peer_address_method,
+ u16 underlay_mtu, u32 underlay_fib_index, u32 *sw_if_indexp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ pvti_main_t *pvm = &pvti_main;
+ u32 hw_if_index;
+ vnet_hw_interface_t *hi;
+ pvti_verify_initialized (pvm);
+
+ pvti_if_t *pvti_if;
+
+ ASSERT (sw_if_indexp);
+
+ *sw_if_indexp = (u32) ~0;
+
+ pool_get_zero (pvti_main.if_pool, pvti_if);
+ pvti_if->local_ip = *local_ip;
+ pvti_if->local_port = local_port;
+ pvti_if->remote_ip = *remote_ip;
+ if (peer_address_method == PVTI_PEER_ADDRESS_FROM_PAYLOAD)
+ {
+ pvti_if->peer_address_from_payload = 1;
+ }
+ pvti_if->remote_port = remote_port;
+ pvti_if->underlay_mtu = underlay_mtu;
+ pvti_if->underlay_fib_index = underlay_fib_index;
+ pvti_if->created_at = clib_cpu_time_now ();
+
+ /* tunnel index (or instance) */
+ u32 t_idx = pvti_if - pvti_main.if_pool;
+
+ hw_if_index =
+ vnet_register_interface (vnm, pvti_if_device_class.index, t_idx,
+ pvti_hw_interface_class.index, t_idx);
+
+ pvti_if->hw_if_index = hw_if_index;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ pvti_if->sw_if_index = *sw_if_indexp = hi->sw_if_index;
+
+ vec_validate_init_empty (pvm->if_index_by_sw_if_index, hi->sw_if_index,
+ INDEX_INVALID);
+
+ vec_elt (pvm->if_index_by_sw_if_index, hi->sw_if_index) = t_idx;
+ pvti_if_t *pvti_if0 = pool_elt_at_index (pvti_main.if_pool, t_idx);
+ int i;
+ for (i = 0; i < 256; i++)
+ {
+ pvti_if0->tx_streams[i].bi0 = INDEX_INVALID;
+ pvti_if0->tx_streams[i].current_tx_seq = 42;
+
+ pvti_if0->rx_streams[i].rx_bi0 = INDEX_INVALID;
+ pvti_if0->rx_streams[i].rx_bi0_first = INDEX_INVALID;
+ }
+
+ /*
+ int is_ip6 = 0;
+ u32 encap_index = !is_ip6 ?
+ pvti4_output_node.index : pvti6_output_node.index;
+ vnet_set_interface_output_node (vnm, pvti_if->hw_if_index, encap_index);
+ */
+ vnet_set_interface_l3_output_node (vnm->vlib_main, hi->sw_if_index,
+ (u8 *) "pvti4-output");
+
+ pvti_add_tidx_by_port (t_idx, local_port);
+ if (1 == pvti_get_tunnel_count_by_port (local_port))
+ {
+ clib_warning ("Registering local port %d", local_port);
+ udp_register_dst_port (vlib_get_main (), local_port,
+ pvti4_input_node.index, UDP_IP4);
+ udp_register_dst_port (vlib_get_main (), local_port,
+ pvti6_input_node.index, UDP_IP6);
+ }
+ else
+ {
+ clib_warning ("Not registering the port");
+ }
+
+ vnet_hw_interface_set_flags (vnm, pvti_if->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ return 0;
+}
+
+void
+pvti_if_walk (pvti_if_walk_cb_t fn, void *data)
+{
+ index_t pvtii;
+
+ pool_foreach_index (pvtii, pvti_main.if_pool)
+ {
+ if (WALK_STOP == fn (pvtii, data))
+ break;
+ }
+}
+
+int
+pvti_if_delete (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ pvti_main_t *pvm = &pvti_main;
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (hw == 0 || hw->dev_class_index != pvti_if_device_class.index)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pvti_if_t *ifc;
+ bool found = 0;
+ pool_foreach (ifc, pvm->if_pool)
+ {
+ if (ifc->sw_if_index == sw_if_index)
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ {
+ return VNET_API_ERROR_INVALID_VALUE_2;
+ }
+ index_t tidx = ifc - pvm->if_pool;
+
+ u16 local_port = ifc->local_port;
+ pvti_del_tidx_by_port (tidx, local_port);
+ pvm->if_index_by_sw_if_index[sw_if_index] = INDEX_INVALID;
+
+ if (0 == pvti_get_tunnel_count_by_port (local_port))
+ {
+ udp_unregister_dst_port (vlib_get_main (), local_port, 1);
+ udp_unregister_dst_port (vlib_get_main (), local_port, 0);
+ }
+
+ vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index);
+ vnet_delete_hw_interface (vnm, hw->hw_if_index);
+ pool_put (pvti_main.if_pool, ifc);
+
+ /* mark per-thread peers as deleted */
+ pvti_per_thread_data_t *ptd;
+
+ vec_foreach (ptd, pvm->per_thread_data[0])
+ {
+ pvti_tx_peer_t *peer;
+ vec_foreach (peer, ptd->tx_peers)
+ {
+ if (tidx == peer->pvti_if_index)
+ {
+ peer->deleted = 1;
+ }
+ }
+ }
+ vec_foreach (ptd, pvm->per_thread_data[1])
+ {
+ pvti_tx_peer_t *peer;
+ vec_foreach (peer, ptd->tx_peers)
+ {
+ if (tidx == peer->pvti_if_index)
+ {
+ peer->deleted = 1;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/src/plugins/pvti/pvti_if.h b/src/plugins/pvti/pvti_if.h
new file mode 100644
index 00000000000..44bf22ce825
--- /dev/null
+++ b/src/plugins/pvti/pvti_if.h
@@ -0,0 +1,47 @@
+#ifndef PVTI_IF_H
+#define PVTI_IF_H
+
+#include <vnet/interface_funcs.h>
+
+typedef enum
+{
+ PVTI_PEER_ADDRESS_FIXED = 0,
+ PVTI_PEER_ADDRESS_FROM_PAYLOAD
+} pvti_peer_address_method_t;
+
+typedef walk_rc_t (*pvti_if_walk_cb_t) (index_t wgi, void *data);
+void pvti_if_walk (pvti_if_walk_cb_t fn, void *data);
+
+int pvti_if_create (ip_address_t *local_ip, u16 local_port,
+ ip_address_t *remote_ip, u16 remote_port,
+ pvti_peer_address_method_t peer_address_method,
+ u16 underlay_mtu, u32 underlay_fib_index,
+ u32 *sw_if_indexp);
+index_t pvti_if_find_by_sw_if_index (u32 sw_if_index);
+index_t pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4,
+ u16 remote_port);
+index_t pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip4,
+ u16 remote_port);
+
+index_t pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip,
+ u16 remote_port);
+
+int pvti_if_delete (u32 sw_if_index);
+
+u8 *format_pvti_if (u8 *s, va_list *args);
+
+static_always_inline pvti_if_t *
+pvti_if_get (index_t pvtii)
+{
+ if (INDEX_INVALID == pvtii)
+ return (NULL);
+ return (pool_elt_at_index (pvti_main.if_pool, pvtii));
+}
+
+static_always_inline index_t
+pvti_if_get_index (pvti_if_t *pvti_if)
+{
+ return pvti_if - pvti_main.if_pool;
+}
+
+#endif
diff --git a/test/test_pvti.py b/test/test_pvti.py
new file mode 100644
index 00000000000..94ae7790323
--- /dev/null
+++ b/test/test_pvti.py
@@ -0,0 +1,1153 @@
+#!/usr/bin/env python3
+""" PVTI tests """
+
+import datetime
+import base64
+import os
+import copy
+import struct
+
+from hashlib import blake2s
+from config import config
+from scapy.packet import Raw
+from scapy.compat import raw
+from scapy.layers.l2 import Ether
+from scapy.layers.inet import IP, UDP
+from scapy.layers.inet6 import IPv6
+from scapy.layers.vxlan import VXLAN
+
+from vpp_interface import VppInterface
+from vpp_pg_interface import is_ipv6_misc
+from vpp_ip_route import VppIpRoute, VppRoutePath
+from vpp_l2 import VppBridgeDomain, VppBridgeDomainPort
+from vpp_vxlan_tunnel import VppVxlanTunnel
+from vpp_object import VppObject
+from vpp_papi import VppEnum
+from asfframework import tag_run_solo, tag_fixme_vpp_debug
+from framework import VppTestCase
+from re import compile
+import unittest
+
+
+from scapy.packet import Packet, bind_layers
+from scapy.layers.l2 import Ether
+from scapy.layers.inet import IP, UDP
+from scapy.layers.inet6 import IPv6
+from scapy.fields import (
+ FlagsField,
+ XByteField,
+ XShortField,
+ ThreeBytesField,
+ ConditionalField,
+ ShortField,
+ ByteEnumField,
+ X3BytesField,
+ LEIntField,
+ ByteField,
+ StrLenField,
+ PacketListField,
+ LEShortField,
+ IntField,
+ ShortField,
+ XIntField,
+)
+
+import sys
+
+
+def eprint(*args, **kwargs):
+ print(*args, file=sys.stderr, **kwargs)
+
+
+#
+# A custom decoder for Scapy for PVTI packet format
+#
+
+
+class PVTIChunk(Packet):
+ name = "PVTIChunk"
+ fields_desc = [
+ ShortField("total_chunk_length", None),
+ XShortField("_pad0", 0),
+ XIntField("_pad1", 0),
+ StrLenField("data", "", length_from=lambda pkt: pkt.total_chunk_length - 8),
+ ]
+
+ # This prevents the first chunk from consuming the entire remaining
+ # contents of the packet
+ def extract_padding(self, s):
+ return "", s
+
+ def post_build(self, p, pay):
+ if self.total_chunk_length is None and self.data:
+ chunk_header_size = 8
+ l = chunk_header_size + len(self.data)
+ p = struct.pack("!H", l) + p[2:]
+ return p + pay
+
+
+class PVTI(Packet):
+ name = "PVTI"
+ PVTI_ALIGN_BYTES = 9
+ fields_desc = [
+ IntField("seq", 0x0),
+ ByteField("stream_index", 0),
+ ByteField("chunk_count", None),
+ ByteField("reass_chunk_count", 0),
+ ByteField("mandatory_flags_mask", 0),
+ ByteField("flags_value", 0),
+ ByteField("pad_bytes", PVTI_ALIGN_BYTES),
+ StrLenField(
+ "pad", b"\xca" * PVTI_ALIGN_BYTES, length_from=lambda pkt: pkt.pad_bytes
+ ),
+ PacketListField("chunks", [], PVTIChunk, count_from=lambda p: p.chunk_count),
+ ]
+
+ def mysummary(self):
+ return self.sprintf("PVTI (len=%PVTI.total_len%)")
+
+ def post_build(self, p, pay):
+ if self.chunk_count is None:
+ l = len(self.chunks)
+ # offset of the chunk count within the fields
+ offset_of_chunk_count = 5
+ p = (
+ p[:offset_of_chunk_count]
+ + struct.pack("b", l)
+ + p[offset_of_chunk_count + 1 :]
+ )
+ return p + pay
+
+
+bind_layers(UDP, PVTI, dport=12312)
+# By default, set both ports to the test
+# bind_layers(UDP, PVTI, sport=6192, dport=6192)
+
+
+# PVTI ENcapsulator/DEcapsulator
+class PvtiEnDe(object):
+ """
+ PVTI encapsulator/decapsulator
+ """
+
+ def __init__(
+ self,
+ local_ip,
+ local_port,
+ remote_ip,
+ remote_port,
+ underlay_mtu=1500,
+ for_rx_test=False,
+ ):
+ self.for_rx_test = for_rx_test
+ self.local_ip = local_ip
+ self.local_port = local_port
+ self.remote_ip = remote_ip
+ self.remote_port = remote_port
+ self.underlay_mtu = underlay_mtu
+ self.stream_index = 0
+ self.tx_chunks = []
+ self.tx_n_reass_chunks = 0
+ self.tx_seq = 42
+ # payload = chunk headers + data
+ self.max_payload_len = underlay_mtu - len(raw(IP() / UDP() / PVTI()))
+ self.pvti_header_len = len(raw(PVTI()))
+ self.chunk_header_len = len(raw(PVTIChunk()))
+
+ def get_curr_payload_len(self):
+ tx_len = 0
+ for c in self.tx_chunks:
+ tx_len = tx_len + len(c.data) + self.chunk_header_len
+ return tx_len
+
+ def get_payload_room(self):
+ return self.max_payload_len - self.get_curr_payload_len()
+
+ def flush_tx_chunks(self, more_frags=False):
+ if self.for_rx_test:
+ ip_dst = self.local_ip
+ ip_src = self.remote_ip
+ else:
+ ip_src = self.local_ip
+ ip_dst = self.remote_ip
+ p = (
+ IP(
+ src=ip_src,
+ dst=ip_dst,
+ ttl=127,
+ frag=0,
+ flags=0,
+ id=self.tx_seq,
+ )
+ / UDP(sport=self.local_port, dport=self.remote_port, chksum=0)
+ / PVTI(
+ reass_chunk_count=self.tx_n_reass_chunks,
+ seq=self.tx_seq,
+ stream_index=self.stream_index,
+ chunks=self.tx_chunks,
+ )
+ )
+
+ p = IP(raw(p))
+
+ self.tx_n_reass_chunks = 0
+ self.tx_chunks = []
+ self.tx_seq = self.tx_seq + 1
+ return p
+
+ def encap_pkt(self, p):
+ out = []
+ if IP in p:
+ p[IP].ttl = p[IP].ttl - 1
+ payload_wip = p[IP].build()
+ elif IPv6 in p:
+ p[IPv6].hlim = p[IPv6].hlim - 1
+ payload_wip = p[IPv6].build()
+
+ split_chunks = False
+ huge_solo_packet = (
+ len(payload_wip) + self.chunk_header_len > self.get_payload_room()
+ ) and len(self.tx_chunks) == 0
+
+ while True:
+ available_room = self.get_payload_room()
+ chunk_wip_len = len(payload_wip) + self.chunk_header_len
+ xpad0 = 0xABAB
+ xpad1 = 0xABABABAB
+
+ if chunk_wip_len <= available_room:
+ # happy case - there is enough space to fit the entire chunk
+ if split_chunks:
+ self.tx_n_reass_chunks = self.tx_n_reass_chunks + 1
+ tx = PVTIChunk(data=payload_wip, _pad0=xpad0, _pad1=xpad1)
+ self.tx_chunks.append(tx)
+ if chunk_wip_len == available_room:
+ # an unlikely perfect fit - send this packet.
+ out.append(self.flush_tx_chunks())
+ break
+ elif available_room < self.chunk_header_len + 1:
+ # Can not fit even a chunk header + 1 byte of data
+ # Flush and retry
+ out.append(self.flush_tx_chunks())
+ continue
+ else:
+ # Chop as much as we can from the packet
+ chop_len = available_room - self.chunk_header_len
+ if split_chunks:
+ self.tx_n_reass_chunks = self.tx_n_reass_chunks + 1
+ tx = PVTIChunk(data=payload_wip[:chop_len], _pad0=xpad0, _pad1=xpad1)
+ self.tx_chunks.append(tx)
+ out.append(self.flush_tx_chunks())
+ split_chunks = True
+ payload_wip = payload_wip[chop_len:]
+ continue
+ return out
+
+ def encap_packets(self, pkts):
+ out = []
+ self.start_encap()
+ for p in pkts:
+ out.extend(self.encap_pkt(p))
+ last_pkt = self.finish_encap()
+ if last_pkt != None:
+ out.append(last_pkt)
+ return out
+
+ def start_encap(self):
+ return None
+
+ def finish_encap(self):
+ out = None
+ if len(self.tx_chunks) > 0:
+ out = self.flush_tx_chunks()
+ return out
+
+
+""" TestPvti is a subclass of VPPTestCase classes.
+
+PVTI test.
+
+"""
+
+
+def get_field_bytes(pkt, name):
+ fld, val = pkt.getfield_and_val(name)
+ return fld.i2m(pkt, val)
+
+
+class VppPvtiInterface(VppInterface):
+ """
+ VPP PVTI interface
+ """
+
+ def __init__(
+ self, test, local_ip, local_port, remote_ip, remote_port, underlay_mtu=1500
+ ):
+ super(VppPvtiInterface, self).__init__(test)
+
+ self.local_ip = local_ip
+ self.local_port = local_port
+ self.remote_ip = remote_ip
+ self.remote_port = remote_port
+ self.underlay_mtu = underlay_mtu
+
+ def get_ende(self, for_rx_test=False):
+ return PvtiEnDe(
+ self.local_ip,
+ self.local_port,
+ self.remote_ip,
+ self.remote_port,
+ self.underlay_mtu,
+ for_rx_test,
+ )
+
+ def verify_encap_packets(self, orig_pkts, recv_pkts):
+ ende = self.get_ende()
+ recv2_pkts = ende.encap_packets(orig_pkts)
+ out1 = []
+ out2 = []
+ for i, pkt in enumerate(recv_pkts):
+ if IP in pkt:
+ rx_pkt = pkt[IP]
+ elif IPv6 in pkt:
+ rx_pkt = pkt[IPv6]
+ else:
+ raise "Neither IPv4 nor IPv6"
+ py_pkt = recv2_pkts[i]
+ if rx_pkt != py_pkt:
+ eprint("received packet:")
+ rx_pkt.show()
+ eprint("python packet:")
+ py_pkt.show()
+ out1.append(rx_pkt)
+ out2.append(py_pkt)
+ return (out1, out2)
+
+ def add_vpp_config(self):
+ r = self.test.vapi.pvti_interface_create(
+ interface={
+ "local_ip": self.local_ip,
+ "local_port": self.local_port,
+ "remote_ip": self.remote_ip,
+ "remote_port": self.remote_port,
+ "underlay_mtu": self.underlay_mtu,
+ }
+ )
+ self.set_sw_if_index(r.sw_if_index)
+ self.test.registry.register(self, self.test.logger)
+ return self
+
+ def remove_vpp_config(self):
+ self.test.vapi.pvti_interface_delete(sw_if_index=self._sw_if_index)
+
+ def query_vpp_config(self):
+ ts = self.test.vapi.pvti_interface_dump(sw_if_index=0xFFFFFFFF)
+ for t in ts:
+ if (
+ t.interface.sw_if_index == self._sw_if_index
+ and str(t.interface.local_ip) == self.local_ip
+ and t.interface.local_port == self.local_port
+ and t.interface.remote_port == self.remote_port
+ and str(t.interface.remote_ip) == self.remote_ip
+ ):
+ self.test.logger.info("QUERY AYXX: true")
+ return True
+ return False
+
+ def __str__(self):
+ return self.object_id()
+
+ def object_id(self):
+ return "pvti-%d" % self._sw_if_index
+
+
+@unittest.skipIf("pvti" in config.excluded_plugins, "Exclude PVTI plugin tests")
+# @tag_run_solo
+class TestPvti(VppTestCase):
+ """Packet Vector Tunnel Interface (PVTI) Test Case"""
+
+ error_str = compile(r"Error")
+
+ # maxDiff = None
+
+ wg4_output_node_name = "/err/wg4-output-tun/"
+ wg4_input_node_name = "/err/wg4-input/"
+ wg6_output_node_name = "/err/wg6-output-tun/"
+ wg6_input_node_name = "/err/wg6-input/"
+ kp4_error = wg4_output_node_name + "Keypair error"
+ mac4_error = wg4_input_node_name + "Invalid MAC handshake"
+ peer4_in_err = wg4_input_node_name + "Peer error"
+ peer4_out_err = wg4_output_node_name + "Peer error"
+ kp6_error = wg6_output_node_name + "Keypair error"
+ mac6_error = wg6_input_node_name + "Invalid MAC handshake"
+ peer6_in_err = wg6_input_node_name + "Peer error"
+ peer6_out_err = wg6_output_node_name + "Peer error"
+ cookie_dec4_err = wg4_input_node_name + "Failed during Cookie decryption"
+ cookie_dec6_err = wg6_input_node_name + "Failed during Cookie decryption"
+ ratelimited4_err = wg4_input_node_name + "Handshake ratelimited"
+ ratelimited6_err = wg6_input_node_name + "Handshake ratelimited"
+
+ @classmethod
+ def setUpClass(cls):
+ super(TestPvti, cls).setUpClass()
+ try:
+ cls.create_pg_interfaces(range(2))
+ for i in cls.pg_interfaces:
+ i.admin_up()
+ i.config_ip4()
+ i.config_ip6()
+ i.resolve_arp()
+ i.resolve_ndp()
+
+ except Exception:
+ super(TestPvti, cls).tearDownClass()
+ raise
+
+ @classmethod
+ def tearDownClass(cls):
+ super(TestPvti, cls).tearDownClass()
+
+ def setUp(self):
+ super(VppTestCase, self).setUp()
+ self.base_kp4_err = self.statistics.get_err_counter(self.kp4_error)
+ self.base_mac4_err = self.statistics.get_err_counter(self.mac4_error)
+ self.base_peer4_in_err = self.statistics.get_err_counter(self.peer4_in_err)
+ self.base_peer4_out_err = self.statistics.get_err_counter(self.peer4_out_err)
+ self.base_kp6_err = self.statistics.get_err_counter(self.kp6_error)
+ self.base_mac6_err = self.statistics.get_err_counter(self.mac6_error)
+ self.base_peer6_in_err = self.statistics.get_err_counter(self.peer6_in_err)
+ self.base_peer6_out_err = self.statistics.get_err_counter(self.peer6_out_err)
+ self.base_cookie_dec4_err = self.statistics.get_err_counter(
+ self.cookie_dec4_err
+ )
+ self.base_cookie_dec6_err = self.statistics.get_err_counter(
+ self.cookie_dec6_err
+ )
+ self.base_ratelimited4_err = self.statistics.get_err_counter(
+ self.ratelimited4_err
+ )
+ self.base_ratelimited6_err = self.statistics.get_err_counter(
+ self.ratelimited6_err
+ )
+
+ def create_packets(
+ self, src_ip_if, count=1, size=150, for_rx=False, is_ip6=False, af_mix=False
+ ):
+ pkts = []
+ total_packet_count = count
+ padstr0 = ""
+ padstr1 = ""
+ for i in range(0, 2000):
+ padstr0 = padstr0 + (".%03x" % i)
+ padstr1 = padstr1 + ("+%03x" % i)
+
+ for i in range(0, total_packet_count):
+ if af_mix:
+ is_ip6 = i % 2 == 1
+
+ dst_mac = src_ip_if.local_mac
+ src_mac = src_ip_if.remote_mac
+ if for_rx:
+ dst_ip4 = src_ip_if.remote_ip4
+ dst_ip6 = src_ip_if.remote_ip6
+ src_ip4 = "10.0.%d.4" % i
+ src_ip6 = "2001:db8::%x" % i
+ else:
+ src_ip4 = src_ip_if.remote_ip4
+ src_ip6 = src_ip_if.remote_ip6
+ dst_ip4 = "10.0.%d.4" % i
+ dst_ip6 = "2001:db8::%x" % i
+ src_l4 = 1234 + i
+ dst_l4 = 4321 + i
+
+ ulp = UDP(sport=src_l4, dport=dst_l4)
+ payload = "test pkt #%d" % i
+ if i % 2 == 1:
+ padstr = padstr1
+ else:
+ padstr = padstr0
+
+ p = Ether(dst=dst_mac, src=src_mac)
+ if is_ip6:
+ p /= IPv6(src=src_ip6, dst=dst_ip6)
+ else:
+ p /= IP(src=src_ip4, dst=dst_ip4, frag=0, flags=0)
+
+ p /= ulp / Raw(payload)
+
+ if i % 2 == 1 or total_packet_count == 1:
+ self.extend_packet(p, size, padstr)
+ else:
+ self.extend_packet(p, 150, padstr)
+ pkts.append(p)
+ return pkts
+
+ def add_rx_ether_header(self, in_pkts, rx_intf=None):
+ out = []
+ if rx_intf is None:
+ rx_intf = self.pg0
+ dst_mac = rx_intf.local_mac
+ src_mac = rx_intf.remote_mac
+ pkts = []
+ for p in in_pkts:
+ p0 = Ether(dst=dst_mac, src=src_mac) / p[IP]
+ out.append(p0)
+ return out
+
+ def encap_for_rx_test(self, pkts, rx_intf=None):
+ ende = self.pvti0.get_ende(for_rx_test=True)
+ encap_pkts = ende.encap_packets(pkts)
+ return self.add_rx_ether_header(encap_pkts, rx_intf)
+
+ def decrement_ttl_and_build(self, send_pkts):
+ out = []
+ pkts = copy.deepcopy(send_pkts)
+ for p in pkts:
+ p[IP].ttl = p[IP].ttl - 1
+ out.append(Ether(p.build()))
+ return out
+
+ def create_rx_packets(self, dst_ip_if, rx_intf=None, count=1, size=150):
+ pkts = []
+ total_packet_count = count
+ padstr = ""
+ if rx_intf is None:
+ rx_intf = self.pg0
+ for i in range(0, 2000):
+ padstr = padstr + (".%03x" % i)
+
+ dst_mac = rx_intf.local_mac
+ src_mac = rx_intf.remote_mac
+
+ for i in range(0, total_packet_count):
+ dst_ip4 = dst_ip_if.remote_ip4
+ src_ip4 = "10.0.%d.4" % i
+ src_l4 = 1234 + i
+ dst_l4 = 4321 + i
+
+ ulp = UDP(sport=src_l4, dport=dst_l4)
+ payload = "test"
+
+ # if i % 2 == 1 or total_packet_count == 1:
+ # self.extend_packet(p, size, padstr)
+ # else:
+ # self.extend_packet(p, 150, padstr)
+
+ pvti = PVTI(seq=42 + i, chunks=[])
+ for j in range(0, 32):
+ p = (
+ IP(src=src_ip4, dst=dst_ip4, frag=0, flags=0, id=j + 0x4000)
+ / ulp
+ / Raw(payload)
+ )
+ chunk0 = PVTIChunk(data=raw(p))
+ pvti.chunks.append(chunk0)
+
+ p = (
+ Ether(dst=dst_mac, src=src_mac)
+ / IP(src="192.0.2.1", dst=rx_intf.local_ip4, id=0x3000 + i)
+ / UDP(sport=12312, dport=12312)
+ / pvti
+ )
+ # p.show()
+ # Ether(raw(p)).show()
+
+ pkts.append(p)
+ return pkts
+
+ def send_and_assert_no_replies_ignoring_init(
+ self, intf, pkts, remark="", timeout=None
+ ):
+ self.pg_send(intf, pkts)
+
+ def _filter_out_fn(p):
+ return is_ipv6_misc(p) or is_handshake_init(p)
+
+ try:
+ if not timeout:
+ timeout = 1
+ for i in self.pg_interfaces:
+ i.assert_nothing_captured(
+ timeout=timeout, remark=remark, filter_out_fn=_filter_out_fn
+ )
+ timeout = 0.1
+ finally:
+ pass
+
+ def test_0000_pvti_interface(self):
+ """Simple interface creation"""
+ local_port = 12312
+ peer_addr = self.pg0.remote_ip4 # "192.0.2.1"
+ peer_port = 31234
+ peer_port = 12312
+
+ # Create interface
+ pvti0 = VppPvtiInterface(
+ self, self.pg1.local_ip4, local_port, peer_addr, peer_port
+ ).add_vpp_config()
+
+ self.logger.info(self.vapi.cli("sh int"))
+ self.logger.info(self.vapi.cli("show pvti interface"))
+ self.logger.info(self.vapi.cli("show pvti tx peers"))
+ self.logger.info(self.vapi.cli("show pvti rx peers"))
+
+ # delete interface
+ pvti0.remove_vpp_config()
+ # self.logger.info(self.vapi.cli("show pvti interface"))
+ # pvti0.add_vpp_config()
+
+ def test_0001_pvti_send_simple_1pkt(self):
+ """v4o4 TX: Simple packet: 1 -> 1"""
+
+ self.prepare_for_test("v4o4_1pkt_simple")
+ pkts = self.create_packets(self.pg1)
+
+ recv_pkts = self.send_and_expect(self.pg1, pkts, self.pg0)
+ for p in recv_pkts:
+ self.logger.info(p)
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(pkts, recv_pkts)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def test_0101_pvti_send_simple_1pkt(self):
+ """v6o4 TX: Simple packet: 1 -> 1"""
+
+ self.prepare_for_test("v6o4_1pkt_simple")
+ pkts = self.create_packets(self.pg1, is_ip6=True)
+
+ recv_pkts = self.send_and_expect(self.pg1, pkts, self.pg0, n_rx=1)
+ for p in recv_pkts:
+ self.logger.info(p)
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(pkts, recv_pkts)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def test_0002_pvti_send_simple_2pkt(self):
+ """TX: Simple packet: 2 -> 1"""
+ self.prepare_for_test("2pkt_simple")
+
+ send_pkts = self.create_packets(self.pg1, count=2)
+ pkts = copy.deepcopy(send_pkts)
+ rx = self.send_and_expect(self.pg1, pkts, self.pg0, n_rx=1)
+ for p in rx:
+ self.logger.info(p)
+ # p.show()
+
+ payload0 = rx[0][PVTI].chunks[0].data
+ payload1 = rx[0][PVTI].chunks[1].data
+
+ pktA0 = IP(payload0)
+ pktA1 = IP(payload1)
+
+ p0 = pkts[0][IP]
+ p0.ttl = p0.ttl - 1
+ pktB0 = IP(p0.build())
+
+ p1 = pkts[1][IP]
+ p1.ttl = p1.ttl - 1
+ pktB1 = IP(p1.build())
+
+ self.assertEqual(pktA0, pktB0)
+ self.assertEqual(pktA1, pktB1)
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def prepare_for_test(self, test_name, underlay_mtu=1500, is_ip6=False):
+ local_port = 12312
+ peer_ip4_addr = "192.0.2.1"
+ peer_ip6_addr = "2001:db8:dead::1"
+ peer_port = 31234
+ peer_port = 12312
+ for i in self.pg_interfaces:
+ i.test_name = test_name
+ if is_ip6:
+ self.pvti0 = VppPvtiInterface(
+ self,
+ self.pg1.local_ip6,
+ local_port,
+ peer_ip6_addr,
+ peer_port,
+ underlay_mtu,
+ ).add_vpp_config()
+ else:
+ self.pvti0 = VppPvtiInterface(
+ self,
+ self.pg1.local_ip4,
+ local_port,
+ peer_ip4_addr,
+ peer_port,
+ underlay_mtu,
+ ).add_vpp_config()
+ self.pvti0.config_ip4()
+ self.pvti0.config_ip6()
+ self.pvti0.admin_up()
+
+ self.logger.info(self.vapi.cli("ip route add 0.0.0.0/0 via 172.16.3.3"))
+ ## FIXME: using direct "interface" below results in blackouts. intermittently.
+ # self.logger.info(self.vapi.cli("ip route 0.0.0.0/0 via pvti0"))
+ self.logger.info(self.vapi.cli("ip route add ::/0 via pvti0"))
+ self.logger.info(self.vapi.cli("ip route add 192.0.2.1/32 via pg0"))
+ self.logger.info(self.vapi.cli("ip neighbor pg0 192.0.2.1 000c.0102.0304"))
+ self.logger.info(self.vapi.cli("ip route 2001:db8:dead::1/128 via pg0"))
+ self.logger.info(
+ self.vapi.cli("ip neighbor pg0 2001:db8:dead::1 000c.0102.0304")
+ )
+ self.logger.info(self.vapi.cli("ip neighbor pg1 172.16.2.2 000c.0102.0304"))
+ self.logger.info(self.vapi.cli("sh int"))
+ self.logger.info(self.vapi.cli("sh ip fib"))
+ self.logger.info(self.vapi.cli("show pvti interface"))
+ self.logger.info(self.vapi.cli("set interface ip pvti-bypass pg0"))
+
+ def cleanup_after_test(self):
+ self.logger.info(self.vapi.cli("ip neighbor del pg0 192.0.2.1 000c.0102.0304"))
+ self.logger.info(self.vapi.cli("ip neighbor del pg1 172.16.2.2 000c.0102.0304"))
+ self.logger.info(self.vapi.cli("ip route del 192.0.2.1/32 via pg0"))
+ # self.logger.info(self.vapi.cli("ip route del 0.0.0.0/0 via pvti0"))
+ self.logger.info(self.vapi.cli("ip route del ::/0 via pvti0"))
+ self.logger.info(self.vapi.cli("sh int"))
+ self.logger.info(self.vapi.cli("show pvti interface"))
+ self.pvti0.remove_vpp_config()
+
+ def test_0003_pvti_send_simple_1pkt_big(self):
+ """TX: Simple big packet: 1 -> 2"""
+ self.prepare_for_test("1big_pkt")
+
+ send_pkts = self.create_packets(self.pg1, count=1, size=1900)
+ pkts = copy.deepcopy(send_pkts)
+ self.logger.info("count: ")
+ self.logger.info(len(pkts))
+ rx = self.send_and_expect(self.pg1, pkts, self.pg0, n_rx=2)
+ for p in rx:
+ self.logger.info(p)
+ self.logger.info(len(p[PVTI].chunks[0].data))
+ # p.show()
+ payload = rx[0][PVTI].chunks[0].data + rx[1][PVTI].chunks[0].data
+
+ pkt1 = IP(payload)
+ p0 = pkts[0][IP]
+ p0.ttl = p0.ttl - 1
+
+ pkt0 = IP(p0.build())
+
+ self.assertEqual(pkt0, pkt1)
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def test_0004_pvti_send_simple_5pkt_big(self):
+ """v4o4 TX: Simple big packets: 5 -> 2"""
+ self.prepare_for_test("v4o4_5big_pkt")
+
+ send_pkts = self.create_packets(self.pg1, count=5, size=1050)
+ self.logger.info("count: %d " % len(send_pkts))
+ # self.logger.info(len(pkts))
+ rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=2)
+ for p in rx:
+ self.logger.info(p)
+ self.logger.info(len(p[PVTI].chunks[0].data))
+ # p.show()
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def test_0104_pvti_send_simple_5pkt_big(self):
+ """v6o4 TX: Simple big packets: 5 -> 2"""
+ self.prepare_for_test("v4o4_5big_pkt")
+
+ send_pkts = self.create_packets(self.pg1, count=5, size=1050, is_ip6=True)
+ self.logger.info("count: %d " % len(send_pkts))
+ # self.logger.info(len(pkts))
+ rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=2)
+ for p in rx:
+ self.logger.info(p)
+ self.logger.info(len(p[PVTI].chunks[0].data))
+ # p.show()
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def Xtest_0204_pvti_send_simple_5pkt_mix(self):
+ """vXo4 TX: Simple packets mix: 5 -> 2"""
+ # FIXME: This test is disabled for now, but left here, to have this comment
+ # The mix of IPv4 and IPv6 packets in VPP will forward two
+ # different graphs, so after encap it will result in two
+ # PV packets: one with IPv4 chunks, and one with IPv6 chunks.
+ # The python test encapsulator does not do this, and it is probably
+ # a useless idea to introduce attempts to mimic this behavior,
+ # because in any case one can not expect the orderly scheduling
+ # of IPv4 vs IPv6 graph processing.
+ self.prepare_for_test("vXo4_5big_pkt")
+
+ send_pkts = self.create_packets(self.pg1, count=5, size=1050, af_mix=True)
+ # self.logger.info(len(pkts))
+ rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=2)
+ for p in rx:
+ self.logger.info(p)
+ self.logger.info(len(p[PVTI].chunks[0].data))
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def test_0005_pvti_send_mix_3pkt_medium_mtu(self):
+ """TX: small+big+small packets over medium mtu: 3 -> 3"""
+ self.prepare_for_test("3pkt_small_mtu", underlay_mtu=400)
+
+ send_pkts = self.create_packets(self.pg1, count=3, size=500)
+ pkts = copy.deepcopy(send_pkts)
+ self.logger.info("count: %d " % len(send_pkts))
+ # self.logger.info(len(pkts))
+ rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=3)
+ for p in rx:
+ self.logger.info(p)
+ self.logger.info(len(p[PVTI].chunks[0].data))
+ # p.show()
+
+ # check the middle chunk which is spread across two packets
+ payload = rx[0][PVTI].chunks[1].data + rx[1][PVTI].chunks[0].data
+
+ pkt1 = IP(payload)
+
+ p0 = pkts[1][IP]
+ p0.ttl = p0.ttl - 1
+
+ pkt0 = IP(p0.build())
+ self.assertEqual(pkt0, pkt1)
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def test_0006_pvti_send_mix_4pkt_medium_mtu(self):
+ """TX: small+big+small packets over 600 mtu: 4 -> 3"""
+ self.prepare_for_test("6pkt_small_mtu", underlay_mtu=600)
+
+ send_pkts = self.create_packets(self.pg1, count=4, size=500)
+ pkts = copy.deepcopy(send_pkts)
+ # self.logger.info(len(pkts))
+ rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=3)
+ for p in rx:
+ self.logger.info(p)
+ self.logger.info(len(p[PVTI].chunks[0].data))
+ # p.show()
+
+ # check the middle chunk which is spread across two packets
+ payload = rx[0][PVTI].chunks[1].data + rx[1][PVTI].chunks[0].data
+
+ pkt1 = IP(payload)
+
+ p0 = pkts[1][IP]
+ p0.ttl = p0.ttl - 1
+
+ pkt0 = IP(p0.build())
+ self.assertEqual(pkt0, pkt1)
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def test_0007_pvti_send_simple_1_3_pkt(self):
+ """TX: Simple packet: 1 -> 3, small mtu"""
+
+ self.prepare_for_test("1_3_pkt_simple", underlay_mtu=520)
+ send_pkts = self.create_packets(self.pg1, count=1, size=1400)
+ pkts = copy.deepcopy(send_pkts)
+
+ rx = self.send_and_expect(self.pg1, pkts, self.pg0, n_rx=3)
+ for p in rx:
+ self.logger.info(p)
+
+ c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx)
+ self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def test_0008_pvti_chained_1_3_pkt(self):
+ """TX: Chained packet: 2700 byte 1 -> 3, mtu 1000"""
+
+ self.prepare_for_test("1_3_pkt_simple", underlay_mtu=1000)
+ send_pkts = self.create_packets(self.pg1, count=1, size=2700)
+ pkts = copy.deepcopy(send_pkts)
+
+ pkt0 = Ether(raw(pkts[0]))[IP]
+
+ rx = self.send_and_expect(self.pg1, send_pkts, self.pg0, n_rx=3)
+ for p in rx:
+ self.logger.info(p)
+
+ p0 = pkts[0][IP]
+ p0.ttl = p0.ttl - 1
+ pkt0 = IP(p0.build())
+
+ payload = (
+ rx[0][PVTI].chunks[0].data
+ + rx[1][PVTI].chunks[0].data
+ + rx[2][PVTI].chunks[0].data
+ # + rx[2][PVTI].chunks[1].data
+ )
+ pkt1 = IP(payload)
+
+ self.assertEqual(pkt0, pkt1)
+
+ # FIXME: this will fail because the send path
+ # does not combine the data from two chained blocks.
+ # when this succeeds, the above checks in this testcase will need to be redone
+ # c_pkts, py_pkts = self.pvti0.verify_encap_packets(send_pkts, rx)
+ # self.assertEqual(c_pkts, py_pkts)
+
+ self.cleanup_after_test()
+
+ def test_1001_pvti_rx_simple_1pkt(self):
+ """RX: Simple packet: 1 -> 32"""
+
+ self.prepare_for_test("1pkt_rx_simple")
+ pkts = self.create_rx_packets(self.pg1, rx_intf=self.pg0)
+ self.logger.info(self.vapi.cli("show pvti interface"))
+ self.logger.info(self.vapi.cli("show udp ports"))
+
+ recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=32)
+ for p in recv_pkts:
+ self.logger.info(p)
+
+ self.cleanup_after_test()
+
+ def test_1002_pvti_rx_big_1buf(self):
+ """RX: Orig Big packet, single buf: 2 -> 1"""
+
+ self.prepare_for_test("1buf_rx_big")
+
+ pkts_orig = self.create_packets(self.pg1, count=1, size=1900, for_rx=True)
+ pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0)
+ self.logger.info(self.vapi.cli("show pvti interface"))
+ self.logger.info(self.vapi.cli("show udp ports"))
+
+ known_good_pkts = self.decrement_ttl_and_build(pkts_orig)
+
+ recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=1)
+ for i, p in enumerate(recv_pkts):
+ self.logger.info(p)
+ self.assertEqual(p[IP], known_good_pkts[i][IP])
+
+ self.cleanup_after_test()
+
+ def test_1003_pvti_rx_big_2buf(self):
+ """RX: Very Big packet, chained buf: 3 -> 1"""
+
+ self.prepare_for_test("2buf_rx_big")
+
+ pkts_orig = self.create_packets(self.pg1, count=1, size=3000, for_rx=True)
+
+ pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0)
+ self.logger.info(self.vapi.cli("show pvti interface"))
+ self.logger.info(self.vapi.cli("show udp ports"))
+
+ known_good_pkts = self.decrement_ttl_and_build(pkts_orig)
+
+ recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=1)
+ for i, p in enumerate(recv_pkts):
+ self.logger.info(p)
+ if p[IP] != known_good_pkts[i][IP]:
+ p[IP].show()
+ known_good_pkts[i][IP].show()
+ self.assertEqual(p[IP], known_good_pkts[i][IP])
+
+ self.cleanup_after_test()
+
+ def test_1004_pvti_rx_big_2buf_and_small(self):
+ """RX: Very Big packet, chained buf: 3 -> 1 + small pkt"""
+
+ self.prepare_for_test("2buf_rx_big_and_small")
+
+ pkts_orig = self.create_packets(self.pg1, count=2, size=3000, for_rx=True)
+
+ pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0)
+ self.logger.info(self.vapi.cli("show pvti interface"))
+ self.logger.info(self.vapi.cli("show udp ports"))
+
+ known_good_pkts = self.decrement_ttl_and_build(pkts_orig)
+
+ recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=2)
+ for i, p in enumerate(recv_pkts):
+ self.logger.info(p)
+ if p[IP] != known_good_pkts[i][IP]:
+ p[IP].show()
+ known_good_pkts[i][IP].show()
+ self.assertEqual(p[IP], known_good_pkts[i][IP])
+
+ self.cleanup_after_test()
+
+ def test_1005_pvti_rx_big_2buf_and_small_drop(self):
+ """RX: Very Big packet, chained buf: 3 -> 1 + small pkt, encap pkt lost"""
+
+ self.prepare_for_test("2buf_rx_big_and_small_drop")
+
+ pkts_orig = self.create_packets(self.pg1, count=3, size=3000, for_rx=True)
+
+ pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0)
+ # drop the second packet after encapsulation (the one with the second frag of the large packet)
+ pkts.pop(1)
+ self.logger.info(self.vapi.cli("show pvti interface"))
+ self.logger.info(self.vapi.cli("show udp ports"))
+
+ known_good_pkts = self.decrement_ttl_and_build(pkts_orig)
+
+ # drop the large original packet, leaving just two small ones
+ known_good_pkts.pop(1)
+
+ recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=2)
+ for i, p in enumerate(recv_pkts):
+ self.logger.info(p)
+ if p[IP] != known_good_pkts[i][IP]:
+ p[IP].show()
+ known_good_pkts[i][IP].show()
+ self.assertEqual(p[IP], known_good_pkts[i][IP])
+
+ self.cleanup_after_test()
+
+ def test_1006_pvti_rx_big_2buf_and_small_drop2(self):
+ """RX: Very Big packet, chained buf: 3 -> 1 + small pkt, non-initial frag pkt lost"""
+
+ self.prepare_for_test("2buf_rx_big_and_small_drop2")
+
+ pkts_orig = self.create_packets(self.pg1, count=3, size=6000, for_rx=True)
+
+ pkts = self.encap_for_rx_test(pkts_orig, rx_intf=self.pg0)
+ # drop the second packet after encapsulation (the one with the second frag of the large packet)
+ pkts.pop(2)
+ self.logger.info(self.vapi.cli("show pvti interface"))
+ self.logger.info(self.vapi.cli("show udp ports"))
+
+ known_good_pkts = self.decrement_ttl_and_build(pkts_orig)
+ # drop the large original packet, leaving just two small ones
+ known_good_pkts.pop(1)
+
+ recv_pkts = self.send_and_expect(self.pg0, pkts, self.pg1, n_rx=2)
+ for i, p in enumerate(recv_pkts):
+ self.logger.info(p)
+ if p[IP] != known_good_pkts[i][IP]:
+ p[IP].show()
+ known_good_pkts[i][IP].show()
+ self.assertEqual(p[IP], known_good_pkts[i][IP])
+
+ self.cleanup_after_test()
+
+
+class PvtiHandoffTests(TestPvti):
+ """Pvti Tests in multi worker setup"""
+
+ vpp_worker_count = 2
+
+ def xtest_wg_peer_init(self):
+ """Handoff"""
+
+ port = 12383
+
+ # Create interfaces
+ wg0 = VppWgInterface(self, self.pg1.local_ip4, port).add_vpp_config()
+ wg0.admin_up()
+ wg0.config_ip4()
+
+ self.pg_enable_capture(self.pg_interfaces)
+ self.pg_start()
+
+ peer_1 = VppWgPeer(
+ self, wg0, self.pg1.remote_ip4, port + 1, ["10.11.2.0/24", "10.11.3.0/24"]
+ ).add_vpp_config()
+ self.assertEqual(len(self.vapi.wireguard_peers_dump()), 1)
+
+ r1 = VppIpRoute(
+ self, "10.11.3.0", 24, [VppRoutePath("10.11.3.1", wg0.sw_if_index)]
+ ).add_vpp_config()
+
+ # skip the first automatic handshake
+ self.pg1.get_capture(1, timeout=HANDSHAKE_JITTER)
+
+ # send a valid handsake init for which we expect a response
+ p = peer_1.mk_handshake(self.pg1)
+
+ rx = self.send_and_expect(self.pg1, [p], self.pg1)
+
+ peer_1.consume_response(rx[0])
+
+ # send a data packet from the peer through the tunnel
+ # this completes the handshake and pins the peer to worker 0
+ p = (
+ IP(src="10.11.3.1", dst=self.pg0.remote_ip4, ttl=20)
+ / UDP(sport=222, dport=223)
+ / Raw()
+ )
+ d = peer_1.encrypt_transport(p)
+ p = peer_1.mk_tunnel_header(self.pg1) / (
+ Pvti(message_type=4, reserved_zero=0)
+ / PvtiTransport(
+ receiver_index=peer_1.sender, counter=0, encrypted_encapsulated_packet=d
+ )
+ )
+ rxs = self.send_and_expect(self.pg1, [p], self.pg0, worker=0)
+
+ for rx in rxs:
+ self.assertEqual(rx[IP].dst, self.pg0.remote_ip4)
+ self.assertEqual(rx[IP].ttl, 19)
+
+ # send a packets that are routed into the tunnel
+ # and pins the peer tp worker 1
+ pe = (
+ Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac)
+ / IP(src=self.pg0.remote_ip4, dst="10.11.3.2")
+ / UDP(sport=555, dport=556)
+ / Raw(b"\x00" * 80)
+ )
+ rxs = self.send_and_expect(self.pg0, pe * 255, self.pg1, worker=1)
+ peer_1.validate_encapped(rxs, pe)
+
+ # send packets into the tunnel, from the other worker
+ p = [
+ (
+ peer_1.mk_tunnel_header(self.pg1)
+ / Pvti(message_type=4, reserved_zero=0)
+ / PvtiTransport(
+ receiver_index=peer_1.sender,
+ counter=ii + 1,
+ encrypted_encapsulated_packet=peer_1.encrypt_transport(
+ (
+ IP(src="10.11.3.1", dst=self.pg0.remote_ip4, ttl=20)
+ / UDP(sport=222, dport=223)
+ / Raw()
+ )
+ ),
+ )
+ )
+ for ii in range(255)
+ ]
+
+ rxs = self.send_and_expect(self.pg1, p, self.pg0, worker=1)
+
+ for rx in rxs:
+ self.assertEqual(rx[IP].dst, self.pg0.remote_ip4)
+ self.assertEqual(rx[IP].ttl, 19)
+
+ # send a packets that are routed into the tunnel
+ # from worker 0
+ rxs = self.send_and_expect(self.pg0, pe * 255, self.pg1, worker=0)
+
+ peer_1.validate_encapped(rxs, pe)
+
+ r1.remove_vpp_config()
+ peer_1.remove_vpp_config()
+ wg0.remove_vpp_config()