aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/pvti
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/pvti')
-rw-r--r--src/plugins/pvti/CMakeLists.txt40
-rw-r--r--src/plugins/pvti/FEATURE.yaml8
-rw-r--r--src/plugins/pvti/api.c137
-rw-r--r--src/plugins/pvti/bypass-main.c79
-rw-r--r--src/plugins/pvti/bypass.c202
-rw-r--r--src/plugins/pvti/bypass.h53
-rw-r--r--src/plugins/pvti/input-main.c115
-rw-r--r--src/plugins/pvti/input.c496
-rw-r--r--src/plugins/pvti/input.h87
-rw-r--r--src/plugins/pvti/output-main.c85
-rw-r--r--src/plugins/pvti/output.c543
-rw-r--r--src/plugins/pvti/output.h75
-rw-r--r--src/plugins/pvti/pvti.api111
-rw-r--r--src/plugins/pvti/pvti.c481
-rw-r--r--src/plugins/pvti/pvti.h257
-rw-r--r--src/plugins/pvti/pvti_if.c376
-rw-r--r--src/plugins/pvti/pvti_if.h47
17 files changed, 3192 insertions, 0 deletions
diff --git a/src/plugins/pvti/CMakeLists.txt b/src/plugins/pvti/CMakeLists.txt
new file mode 100644
index 00000000000..900b662d54a
--- /dev/null
+++ b/src/plugins/pvti/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(pvti
+ SOURCES
+ pvti_if.c
+ pvti.c
+ input.h
+ input.c
+ input-main.c
+ output.h
+ output.c
+ output-main.c
+ bypass.h
+ bypass.c
+ bypass-main.c
+ api.c
+ pvti.h
+
+ MULTIARCH_SOURCES
+ input.c
+ output.c
+ bypass.c
+
+ API_FILES
+ pvti.api
+
+ # API_TEST_SOURCES
+ # pvti_test.c
+)
diff --git a/src/plugins/pvti/FEATURE.yaml b/src/plugins/pvti/FEATURE.yaml
new file mode 100644
index 00000000000..52dbe5b7c1b
--- /dev/null
+++ b/src/plugins/pvti/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: Packet Vector Tunnel
+maintainer: Andrew Yourtchenko <ayourtch@gmail.com>
+features:
+ - support inner MTU up to ~8K over standard 1280..1500 MTU substrate
+description: "Large MTU Tunnels"
+state: development
+properties: [API, CLI]
diff --git a/src/plugins/pvti/api.c b/src/plugins/pvti/api.c
new file mode 100644
index 00000000000..cda39ad44e8
--- /dev/null
+++ b/src/plugins/pvti/api.c
@@ -0,0 +1,137 @@
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/format_fns.h>
+#include <vnet/ip/ip_types_api.h>
+#include <vlibapi/api.h>
+
+#include <pvti/pvti.api_enum.h>
+#include <pvti/pvti.api_types.h>
+
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+#define REPLY_MSG_ID_BASE pvm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+typedef struct
+{
+ vl_api_registration_t *reg;
+ u32 context;
+} pvti_if_details_ctx_t;
+
+typedef struct
+{
+
+} pvti_interface_dump_ctx_t;
+
+static walk_rc_t
+pvti_if_send_details (index_t pvtii, void *data)
+{
+ vl_api_pvti_interface_details_t *rmp;
+ pvti_if_details_ctx_t *ctx = data;
+ const pvti_if_t *pvi;
+
+ pvi = pvti_if_get (pvtii);
+
+ rmp = vl_msg_api_alloc_zero (sizeof (*rmp));
+ rmp->_vl_msg_id =
+ htons (VL_API_PVTI_INTERFACE_DETAILS + pvti_main.msg_id_base);
+
+ rmp->interface.sw_if_index = htonl (pvi->sw_if_index);
+ rmp->interface.local_port = htons (pvi->local_port);
+ rmp->interface.remote_port = htons (pvi->remote_port);
+ rmp->interface.underlay_mtu = htons (pvi->underlay_mtu);
+
+ ip_address_encode2 (&pvi->local_ip, &rmp->interface.local_ip);
+ ip_address_encode2 (&pvi->remote_ip, &rmp->interface.remote_ip);
+
+ rmp->context = ctx->context;
+
+ vl_api_send_msg (ctx->reg, (u8 *) rmp);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_pvti_interface_dump_t_handler (vl_api_pvti_interface_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ // pvti_main_t *pvm = &pvti_main;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (reg == 0)
+ return;
+
+ pvti_if_details_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ if (sw_if_index == ~0)
+ pvti_if_walk (pvti_if_send_details, &ctx);
+ else
+ {
+ index_t pvtii = pvti_if_find_by_sw_if_index (sw_if_index);
+ if (pvtii != INDEX_INVALID)
+ pvti_if_send_details (pvtii, &ctx);
+ }
+}
+
+static void
+vl_api_pvti_interface_create_t_handler (vl_api_pvti_interface_create_t *mp)
+{
+ vl_api_pvti_interface_create_reply_t *rmp;
+ pvti_main_t *pvm = &pvti_main;
+ int rv = ~0;
+ u32 sw_if_index = ~0;
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+
+ ip_address_decode2 (&mp->interface.local_ip, &local_ip);
+ ip_address_decode2 (&mp->interface.remote_ip, &remote_ip);
+ u16 lport = clib_host_to_net_u16 (mp->interface.local_port);
+ u16 rport = clib_host_to_net_u16 (mp->interface.remote_port);
+ u16 underlay_mtu = clib_host_to_net_u16 (mp->interface.underlay_mtu);
+ u32 underlay_fib_index =
+ clib_host_to_net_u32 (mp->interface.underlay_fib_index);
+ pvti_peer_address_method_t peer_address_method =
+ mp->interface.peer_address_from_payload ? PVTI_PEER_ADDRESS_FROM_PAYLOAD :
+ PVTI_PEER_ADDRESS_FIXED;
+
+ if (underlay_mtu == 0)
+ {
+ underlay_mtu = 1500;
+ }
+
+ rv =
+ pvti_if_create (&local_ip, lport, &remote_ip, rport, peer_address_method,
+ underlay_mtu, underlay_fib_index, &sw_if_index);
+
+ REPLY_MACRO2 (VL_API_PVTI_INTERFACE_CREATE_REPLY,
+ { rmp->sw_if_index = htonl (sw_if_index); });
+}
+
+static void
+vl_api_pvti_interface_delete_t_handler (vl_api_pvti_interface_delete_t *mp)
+{
+ vl_api_pvti_interface_delete_reply_t *rmp;
+ pvti_main_t *pvm = &pvti_main;
+ int rv = 0;
+
+ rv = pvti_if_delete (ntohl (mp->sw_if_index));
+ REPLY_MACRO (VL_API_PVTI_INTERFACE_DELETE_REPLY);
+}
+
+/* API definitions */
+#include <pvti/pvti.api.c>
+
+void
+pvti_api_init ()
+{
+ pvti_main_t *pvm = &pvti_main;
+ /* Add our API messages to the global name_crc hash table */
+ pvm->msg_id_base = setup_message_id_table ();
+}
diff --git a/src/plugins/pvti/bypass-main.c b/src/plugins/pvti/bypass-main.c
new file mode 100644
index 00000000000..db79ccd2113
--- /dev/null
+++ b/src/plugins/pvti/bypass-main.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pvti/bypass.h>
+
+/* packet trace format function */
+static u8 *
+format_pvti_bypass_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pvti_bypass_trace_t *t = va_arg (*args, pvti_bypass_trace_t *);
+
+ s = format (s, "PVTI-BYPASS: sw_if_index %d, next index %d\n",
+ t->sw_if_index, t->next_index);
+ s = format (s, " src %U sport %d dport %d\n", format_ip_address,
+ &t->remote_ip, t->remote_port, t->local_port);
+ s = format (s, " seq: %d", t->seq);
+ return s;
+}
+
+vlib_node_registration_t pvti4_bypass_node;
+vlib_node_registration_t pvti6_bypass_node;
+
+static char *pvti_bypass_error_strings[] = {
+#define _(sym, string) string,
+ foreach_pvti_bypass_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (pvti4_bypass_node) =
+{
+ .name = "ip4-pvti-bypass",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_bypass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_bypass_error_strings),
+ .error_strings = pvti_bypass_error_strings,
+
+ .n_next_nodes = PVTI_BYPASS_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_BYPASS_NEXT_DROP] = "error-drop",
+ [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti4-input",
+ },
+
+};
+
+VLIB_REGISTER_NODE (pvti6_bypass_node) =
+{
+ .name = "ip6-pvti-bypass",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_bypass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_bypass_error_strings),
+ .error_strings = pvti_bypass_error_strings,
+
+ .n_next_nodes = PVTI_BYPASS_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_BYPASS_NEXT_DROP] = "error-drop",
+ [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti6-input",
+ },
+
+};
diff --git a/src/plugins/pvti/bypass.c b/src/plugins/pvti/bypass.c
new file mode 100644
index 00000000000..14c976439eb
--- /dev/null
+++ b/src/plugins/pvti/bypass.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+#include <pvti/bypass.h>
+
+always_inline u16
+pvti_bypass_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_ip6)
+{
+ u32 n_left_from, *from, *to_next;
+ pvti_bypass_next_t next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ u32 pkts_processed = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 sw_if_index0 = 0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ udp_header_t *udp0;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ u8 error0, good_udp0, proto0;
+ i32 len_diff0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* setup the packet for the next feature */
+ vnet_feature_next (&next0, b0);
+
+ if (is_ip6)
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ }
+ else
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ }
+
+ if (is_ip6)
+ {
+ proto0 = ip60->protocol;
+ }
+ else
+ {
+ /* Treat IP frag packets as "experimental" protocol for now */
+ proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
+ }
+
+ /* Process packet 0 */
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit; /* not UDP packet */
+
+ if (is_ip6)
+ udp0 = ip6_next_header (ip60);
+ else
+ udp0 = ip4_next_header (ip40);
+
+ /* look up the destination ip and port */
+ u32 pvti_index0 = INDEX_INVALID;
+ if (is_ip6)
+ {
+ pvti_index0 = pvti_if_find_by_remote_ip6_and_port (
+ &ip60->src_address, clib_net_to_host_u16 (udp0->src_port));
+ }
+ else
+ {
+ pvti_index0 = pvti_if_find_by_remote_ip4_and_port (
+ &ip40->src_address, clib_net_to_host_u16 (udp0->src_port));
+ }
+ if (pvti_index0 == INDEX_INVALID)
+ goto exit;
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum.
+ */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip6)
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if (is_ip6)
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ else
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ if (is_ip6)
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ? PVTI_BYPASS_NEXT_DROP : PVTI_BYPASS_NEXT_PVTI_INPUT;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* pvtiX-input node expect current at PVTI header */
+ if (is_ip6)
+ vlib_buffer_advance (b0, sizeof (ip6_header_t) +
+ sizeof (udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof (ip4_header_t) +
+ sizeof (udp_header_t));
+ exit:
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_bypass_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->seq = 0; // clib_net_to_host_u32 (pvti0->seq);
+ if (is_ip6)
+ {
+ }
+ else
+ {
+ t->remote_ip.ip.ip4 = ip40->src_address;
+ t->remote_ip.version = AF_IP4;
+ }
+ // t->local_port = h0->udp.dst_port;
+ // t->remote_port = h0->udp.src_port;
+ }
+
+ pkts_processed += 1;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_BYPASS_ERROR_PROCESSED, pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (pvti4_bypass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_bypass_node_common (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (pvti6_bypass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_bypass_node_common (vm, node, frame, 1);
+}
diff --git a/src/plugins/pvti/bypass.h b/src/plugins/pvti/bypass.h
new file mode 100644
index 00000000000..611d5770ad3
--- /dev/null
+++ b/src/plugins/pvti/bypass.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_bypass_h__
+#define __included_pvti_bypass_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u32 seq;
+} pvti_bypass_trace_t;
+
+#define foreach_pvti_bypass_error \
+ _ (PROCESSED, "PVTI bypass tunnel packets processed")
+
+typedef enum
+{
+#define _(sym, str) PVTI_BYPASS_ERROR_##sym,
+ foreach_pvti_bypass_error
+#undef _
+ PVTI_BYPASS_N_ERROR,
+} pvti_bypass_error_t;
+
+typedef enum
+{
+ PVTI_BYPASS_NEXT_DROP,
+ PVTI_BYPASS_NEXT_PVTI_INPUT,
+ PVTI_BYPASS_N_NEXT,
+} pvti_bypass_next_t;
+
+#endif // pvti_bypass_h
diff --git a/src/plugins/pvti/input-main.c b/src/plugins/pvti/input-main.c
new file mode 100644
index 00000000000..8ab8b18dd7c
--- /dev/null
+++ b/src/plugins/pvti/input-main.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pvti/input.h>
+
+static char *pvti_input_error_strings[] = {
+#define _(sym, string) string,
+ foreach_pvti_input_error
+#undef _
+};
+
+#define _(f, s) s,
+static char *pvti_input_trace_type_names[] = { foreach_pvti_input_trace_type };
+#undef _
+
+static char *
+get_pvti_trace_type_name (u8 ptype)
+{
+ if (ptype < PVTI_INPUT_TRACE_N_TYPES)
+ {
+ return pvti_input_trace_type_names[ptype];
+ }
+ else
+ {
+ return "unknown";
+ }
+}
+
+/* packet trace format function */
+static u8 *
+format_pvti_input_trace (u8 *s, va_list *args)
+{
+ int i;
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pvti_input_trace_t *t = va_arg (*args, pvti_input_trace_t *);
+
+ u32 indent = format_get_indent (s);
+
+ s = format (s,
+ "PVTI-IN: sw_if_index %d, next index %d, trace_type: %s(%d), "
+ "chunkcnt: %d\n",
+ t->sw_if_index, t->next_index,
+ get_pvti_trace_type_name (t->trace_type), t->trace_type,
+ t->chunk_count);
+ s = format (s, " src %U sport %d dport %d\n", format_ip_address,
+ &t->remote_ip, t->remote_port, t->local_port);
+ s = format (s, " seq: %d, chunk_count: %d\n", t->seq, t->chunk_count);
+ u16 max = t->chunk_count > MAX_CHUNKS ? MAX_CHUNKS : t->chunk_count;
+ for (i = 0; i < max; i++)
+ {
+ s = format (s, " %02d: sz %d\n", i, t->chunks[i].total_chunk_length);
+ }
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_ip_adjacency_packet_data, t->packet_data,
+ sizeof (t->packet_data));
+
+ return s;
+}
+
+vlib_node_registration_t pvti4_input_node;
+vlib_node_registration_t pvti6_input_node;
+
+VLIB_REGISTER_NODE (pvti4_input_node) =
+{
+ .name = "pvti4-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_input_error_strings),
+ .error_strings = pvti_input_error_strings,
+
+ .n_next_nodes = PVTI_INPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_INPUT_NEXT_DROP] = "error-drop",
+ [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input",
+ [PVTI_INPUT_NEXT_PUNT] = "error-punt",
+ },
+
+};
+VLIB_REGISTER_NODE (pvti6_input_node) =
+{
+ .name = "pvti6-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_input_error_strings),
+ .error_strings = pvti_input_error_strings,
+
+ .n_next_nodes = PVTI_INPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_INPUT_NEXT_DROP] = "error-drop",
+ [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input",
+ [PVTI_INPUT_NEXT_PUNT] = "error-punt",
+ },
+
+};
diff --git a/src/plugins/pvti/input.c b/src/plugins/pvti/input.c
new file mode 100644
index 00000000000..6a8806e2795
--- /dev/null
+++ b/src/plugins/pvti/input.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+#include <pvti/input.h>
+
+always_inline void
+pvti_enqueue_rx_bi_to_next_and_trace (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, u32 bi0,
+ u16 next0)
+{
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next0, b0,
+ /* follow_chain */ 0)))
+ {
+ pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->trace_type = PVTI_INPUT_TRACE_decap;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ vec_add1 (ptd->pending_rx_buffers, bi0);
+ vec_add1 (ptd->pending_rx_nexts, next0);
+}
+
+always_inline pvti_rx_peer_t *
+pvti_try_find_or_create_rx_peer (pvti_per_thread_data_t *ptd,
+ vlib_buffer_t *b0, bool is_ip6)
+{
+ pvti_rx_peer_t *peer;
+
+ ip_address_t remote_ip = { 0 };
+ u16 remote_port;
+ if (is_ip6)
+ {
+ pvti_ip6_encap_header_t *h0 =
+ ((pvti_ip6_encap_header_t *) vlib_buffer_get_current (b0)) - 1;
+ ip_address_set (&remote_ip, &h0->ip6.src_address, AF_IP6);
+ remote_port = clib_net_to_host_u16 (h0->udp.src_port);
+ }
+ else
+ {
+ pvti_ip4_encap_header_t *h0 =
+ ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1;
+ ip_address_set (&remote_ip, &h0->ip4.src_address, AF_IP4);
+ remote_port = clib_net_to_host_u16 (h0->udp.src_port);
+ }
+
+ pool_foreach (peer, ptd->rx_peers)
+ {
+ if (peer->remote_port == remote_port &&
+ 0 == ip_address_cmp (&remote_ip, &peer->remote_ip))
+ {
+ if (peer->deleted)
+ {
+ // The peer has been marked as deleted - wipe it.
+ clib_memset (peer, 0xca, sizeof (*peer));
+ pool_put (ptd->rx_peers, peer);
+ continue;
+ }
+ return peer;
+ }
+ }
+
+ index_t pvti_if_index0 =
+ pvti_if_find_by_remote_ip_and_port (&remote_ip, remote_port);
+ if (INDEX_INVALID == pvti_if_index0)
+ {
+ // no suitable interface found, bail
+ return 0;
+ }
+ pvti_if_t *pvti_if0 = pvti_if_get (pvti_if_index0);
+
+ pvti_rx_peer_t new_peer = {
+ .local_ip = pvti_if0->local_ip,
+ .local_port = pvti_if0->local_port,
+ .remote_ip = remote_ip,
+ .remote_port = remote_port,
+ .pvti_if_index = pvti_if_index0,
+ .rx_streams = { { 0 } },
+ };
+ pvti_rx_peer_t *rx_new_peer;
+ pool_get (ptd->rx_peers, rx_new_peer);
+ *rx_new_peer = new_peer;
+
+ int i;
+ for (i = 0; i < MAX_RX_STREAMS; i++)
+ {
+ rx_new_peer->rx_streams[i].rx_bi0 = INDEX_INVALID;
+ rx_new_peer->rx_streams[i].rx_bi0_first = INDEX_INVALID;
+ rx_new_peer->rx_streams[i].rx_next0 = 0;
+ }
+
+ return rx_new_peer;
+}
+
+always_inline u16
+pvti_input_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_ip6)
+{
+ u32 n_left_from, *from;
+ pvti_chunk_header_t *chunks[MAX_CHUNKS];
+ u32 pkts_processed = 0;
+ u32 pkts_decapsulated = 0;
+ u32 decap_failed_no_buffers = 0;
+
+ pvti_main_t *pvm = &pvti_main;
+
+ u32 thread_index = vlib_get_thread_index ();
+ pvti_per_thread_data_t *ptd =
+ vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = PVTI_INPUT_NEXT_DROP;
+ u32 sw_if_index0;
+ u8 true_chunk_count = 0;
+ u8 max_chunk_count;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ pvti_ip4_encap_header_t *h0 =
+ ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1;
+ pvti_rx_peer_t *pvti_rx_peer0 =
+ pvti_try_find_or_create_rx_peer (ptd, b0, is_ip6);
+ if (!pvti_rx_peer0)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_PEER];
+ goto drop_and_maybe_trace;
+ }
+
+ b0 = vlib_get_buffer (vm, bi0);
+ pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0);
+ u8 stream_index = pvti0->stream_index;
+ max_chunk_count =
+ pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS;
+ u16 pvti_packet_header_sz0 =
+ pvti0->pad_bytes + offsetof (pvti_packet_header_t, pad);
+ if (b0->current_length < pvti_packet_header_sz0)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_PACKET_TOO_SHORT];
+ goto drop_and_maybe_trace;
+ }
+ vlib_buffer_advance (b0, pvti_packet_header_sz0);
+
+ if (max_chunk_count == 0)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_NOCHUNKS];
+ goto drop_and_maybe_trace;
+ }
+ if (pvti0->reass_chunk_count > max_chunk_count)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_TOOMANYREASS];
+ goto drop_and_maybe_trace;
+ }
+ pvti_per_rx_stream_data_t *rx_stream0 =
+ &pvti_rx_peer0->rx_streams[stream_index];
+
+ u32 new_seq0 = clib_net_to_host_u32 (pvti0->seq);
+ if (new_seq0 == rx_stream0->last_rx_seq + 1)
+ {
+ /* Sequence# matches, we can attempt adding the leading chunks to
+ * reassembly */
+ rx_stream0->last_rx_seq = new_seq0;
+
+ while ((b0->current_length > 0) &&
+ true_chunk_count < pvti0->reass_chunk_count)
+ {
+ /* attempt to either incorporate the first chunk into
+ * reassembly or skip it. */
+ pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0);
+ const u16 chunk_payload_length =
+ clib_net_to_host_u16 (pvc0->total_chunk_length) -
+ sizeof (*pvc0);
+ vlib_buffer_advance (b0, sizeof (*pvc0));
+
+ if (rx_stream0->rx_bi0 == INDEX_INVALID)
+ {
+ clib_warning (
+ "RX internal error: not-first chunk but no wip block");
+ }
+ else
+ {
+
+ vlib_buffer_t *rb0 =
+ vlib_get_buffer (vm, rx_stream0->rx_bi0);
+ u16 allowed_length =
+ PVTI_RX_MAX_LENGTH - rb0->current_length;
+ if (allowed_length > chunk_payload_length)
+ {
+ // simple case - there is space in the buffer to fit
+ // the whole chunk
+ void *tail =
+ vlib_buffer_put_uninit (rb0, chunk_payload_length);
+ clib_memcpy (tail, vlib_buffer_get_current (b0),
+ chunk_payload_length);
+ }
+ else
+ {
+ // The current chunk can not fit - need to make two
+ // copies, one into the current buffer, and one into
+ // a newly allocated chained buffer.
+ void *tail =
+ vlib_buffer_put_uninit (rb0, allowed_length);
+ clib_memcpy (tail, vlib_buffer_get_current (b0),
+ allowed_length);
+ u16 remaining_payload_length =
+ chunk_payload_length - allowed_length;
+ u32 nrbi0 = pvti_get_new_buffer (vm);
+ if (INDEX_INVALID == nrbi0)
+ {
+ ASSERT (0); // FIXME what the recovery is
+ // supposed to look like ?
+ }
+ else
+ {
+ // link up the new buffer and copy the remainder
+ // there
+ vlib_buffer_t *nrb0 = vlib_get_buffer (vm, nrbi0);
+ rb0->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ rb0->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ rb0->next_buffer = nrbi0;
+ rx_stream0->rx_bi0 = nrbi0;
+ void *tail = vlib_buffer_put_uninit (
+ nrb0, remaining_payload_length);
+ clib_memcpy (tail,
+ vlib_buffer_get_current (b0) +
+ allowed_length,
+ remaining_payload_length);
+ }
+ }
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_received_inner_length += chunk_payload_length;
+ if (pvti_rx_peer0->rx_streams[stream_index]
+ .rx_received_inner_length ==
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_expected_inner_length)
+ {
+ next0 = rx_stream0->rx_next0;
+ pvti_enqueue_rx_bi_to_next_and_trace (
+ vm, node, ptd, rx_stream0->rx_bi0_first, next0);
+ pkts_decapsulated += 1;
+
+ // clean out the current reassemly state
+ rx_stream0->rx_bi0 = INDEX_INVALID;
+ rx_stream0->rx_bi0_first = INDEX_INVALID;
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_received_inner_length = 0;
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_expected_inner_length = 0;
+ rx_stream0->rx_next0 = 0;
+ }
+ }
+ chunks[true_chunk_count] = pvc0;
+ true_chunk_count += 1;
+ vlib_buffer_advance (b0, chunk_payload_length);
+ }
+ }
+ else
+ {
+ /* Sequence does not match, skip the reassembly chunks and reset
+ * the reassembly state */
+
+ while ((b0->current_length > 0) &&
+ true_chunk_count < pvti0->reass_chunk_count)
+ {
+ /* skip the reassembly chunks */
+ pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0);
+ chunks[true_chunk_count] = pvc0;
+ true_chunk_count += 1;
+ vlib_buffer_advance (
+ b0, clib_net_to_host_u16 (pvc0->total_chunk_length));
+ }
+ // FIXME: discard the current reassembly state, reset the seq#
+ if (rx_stream0->rx_bi0_first != INDEX_INVALID)
+ {
+ clib_warning ("RX PVTI: discard chunk being reassembled");
+ vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first);
+ rx_stream0->rx_bi0 = INDEX_INVALID;
+ rx_stream0->rx_bi0_first = INDEX_INVALID;
+ rx_stream0->rx_received_inner_length = 0;
+ rx_stream0->rx_expected_inner_length = 0;
+ rx_stream0->rx_next0 = 0;
+ }
+ }
+
+ while ((b0->current_length > 0) && true_chunk_count < max_chunk_count)
+ {
+ if (b0->current_length < sizeof (pvti_chunk_header_t))
+ {
+ clib_warning ("RX ERR: length too short for a chunk");
+ break;
+ }
+ pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0);
+ chunks[true_chunk_count] = pvc0;
+ true_chunk_count += 1;
+ u16 total_chunk_length =
+ clib_net_to_host_u16 (pvc0->total_chunk_length);
+ if (b0->current_length < total_chunk_length)
+ {
+ clib_warning ("RX ERR: length 0x%x too big for a chunk",
+ true_chunk_count);
+ break;
+ }
+ u8 *pkt = (u8 *) (pvc0 + 1);
+ u16 inner_length;
+ if (rx_stream0->rx_bi0_first != INDEX_INVALID)
+ {
+ vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first);
+ rx_stream0->rx_bi0 = INDEX_INVALID;
+ rx_stream0->rx_bi0_first = INDEX_INVALID;
+ rx_stream0->rx_received_inner_length = 0;
+ rx_stream0->rx_expected_inner_length = 0;
+ rx_stream0->rx_next0 = 0;
+ }
+
+ switch (*pkt & 0xf0)
+ {
+ case 0x40:
+ next0 = PVTI_INPUT_NEXT_IP4_INPUT;
+ inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 2)));
+ break;
+ case 0x60:
+ next0 = PVTI_INPUT_NEXT_IP6_INPUT;
+ inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 4))) +
+ sizeof (ip6_header_t);
+ break;
+ default:
+ next0 = PVTI_INPUT_NEXT_DROP;
+ vlib_buffer_advance (b0, total_chunk_length);
+ continue;
+ }
+ vlib_buffer_advance (b0, sizeof (pvti_chunk_header_t));
+
+ if (inner_length + sizeof (pvti_chunk_header_t) > total_chunk_length)
+ {
+ /* FIXME: the packet size is larger than the chunk -> it's a
+ * first fragment */
+ // enqueue the chunk and finish packet processing.
+ // There must be no active reassembly.
+ ASSERT (rx_stream0->rx_bi0_first == INDEX_INVALID);
+ rx_stream0->rx_next0 = next0;
+ rx_stream0->rx_bi0 = bi0;
+ rx_stream0->rx_bi0_first = bi0;
+ rx_stream0->rx_expected_inner_length = inner_length;
+ rx_stream0->rx_received_inner_length =
+ total_chunk_length - sizeof (pvti_chunk_header_t);
+ rx_stream0->last_rx_seq = new_seq0;
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = ~0;
+ t->trace_type = PVTI_INPUT_TRACE_enqueue;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ goto continue_outer;
+ }
+
+ u32 nbi0 = pvti_get_new_buffer (vm);
+ if (INDEX_INVALID == nbi0)
+ {
+ decap_failed_no_buffers += 1;
+ continue;
+ };
+ vlib_buffer_t *nb0 = vlib_get_buffer (vm, nbi0);
+ pvti_if_t *pvti_if0 = pvti_if_get (pvti_rx_peer0->pvti_if_index);
+ vnet_buffer (nb0)->sw_if_index[VLIB_RX] = pvti_if0->sw_if_index;
+ void *new_packet = vlib_buffer_put_uninit (nb0, inner_length);
+ clib_memcpy (new_packet, pvc0 + 1, inner_length);
+ vlib_buffer_advance (b0, inner_length);
+
+ pvti_enqueue_rx_bi_to_next_and_trace (vm, node, ptd, nbi0, next0);
+ pkts_decapsulated += 1;
+ }
+ /* we have processed all the chunks from the buffer, but the buffer
+ * remains. Free it. */
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = ~0;
+ t->trace_type = PVTI_INPUT_TRACE_free;
+ t->seq = clib_net_to_host_u32 (pvti0->seq);
+ t->chunk_count = pvti0->chunk_count;
+ u8 chunk_count =
+ pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS;
+ for (int i = 0; i < chunk_count; i++)
+ {
+ t->chunks[i].total_chunk_length =
+ clib_net_to_host_u16 (chunks[i]->total_chunk_length);
+ }
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ vlib_buffer_free_one (vm, bi0);
+
+ continue_outer:
+ pkts_processed += 1;
+ continue;
+
+ drop_and_maybe_trace:
+ next0 = PVTI_INPUT_NEXT_DROP;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ int i;
+ pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->trace_type = PVTI_INPUT_TRACE_drop;
+ t->next_index = next0;
+ t->remote_ip.ip.ip4 = h0->ip4.src_address;
+ t->remote_ip.version = AF_IP4;
+ t->local_port = h0->udp.dst_port;
+ t->remote_port = h0->udp.src_port;
+ if (!pvti_rx_peer0)
+ {
+ t->seq = 0xdeaddead;
+ }
+ else
+ {
+ t->seq = clib_net_to_host_u32 (pvti0->seq);
+ t->chunk_count = pvti0->chunk_count;
+ u8 chunk_count = pvti0->chunk_count < MAX_CHUNKS ?
+ pvti0->chunk_count :
+ MAX_CHUNKS;
+ for (i = 0; i < chunk_count; i++)
+ {
+ t->chunks[i].total_chunk_length =
+ clib_net_to_host_u16 (chunks[i]->total_chunk_length);
+ }
+ }
+ }
+
+ pkts_processed += 1;
+ vec_add1 (ptd->pending_rx_buffers, bi0);
+ vec_add1 (ptd->pending_rx_nexts, next0);
+ }
+
+ vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_rx_buffers,
+ &ptd->pending_rx_nexts,
+ vec_len (ptd->pending_rx_nexts));
+ vec_reset_length (ptd->pending_rx_buffers);
+ vec_reset_length (ptd->pending_rx_nexts);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_INPUT_ERROR_PROCESSED, pkts_processed);
+ vlib_node_increment_counter (
+ vm, node->node_index, PVTI_INPUT_ERROR_DECAPSULATED, pkts_decapsulated);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_INPUT_ERROR_NO_BUFFERS,
+ decap_failed_no_buffers);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (pvti4_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_input_node_common (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (pvti6_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_input_node_common (vm, node, frame, 1);
+}
diff --git a/src/plugins/pvti/input.h b/src/plugins/pvti/input.h
new file mode 100644
index 00000000000..02a186cde05
--- /dev/null
+++ b/src/plugins/pvti/input.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_input_h__
+#define __included_pvti_input_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+typedef struct
+{
+ u16 total_chunk_length;
+} pvti_input_chunk_t;
+
+#define MAX_CHUNKS 32
+#define PVTI_RX_MAX_LENGTH 2048
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u32 seq;
+ pvti_input_chunk_t chunks[MAX_CHUNKS];
+ u8 chunk_count;
+ u8 trace_type;
+ u8 packet_data[64];
+} pvti_input_trace_t;
+
+#define foreach_pvti_input_trace_type \
+ _ (drop, "drop") \
+ _ (decap, "decapsulate") \
+ _ (free, "free") \
+ _ (enqueue, "enqueue")
+
+typedef enum
+{
+#define _(f, s) PVTI_INPUT_TRACE_##f,
+ foreach_pvti_input_trace_type
+#undef _
+ PVTI_INPUT_TRACE_N_TYPES,
+} pvti_input_trace_type_t;
+
+#define foreach_pvti_input_error \
+ _ (PROCESSED, "PVTI tunneled packets processed") \
+ _ (DECAPSULATED, "PVTI inner packets decapsulated") \
+ _ (PEER, "Could not find a peer") \
+ _ (NOCHUNKS, "Packet has no chunks") \
+ _ (NO_BUFFERS, "No buffers available to decapsulate") \
+ _ (TOOMANYREASS, "Packet has more reassembly chunks than total") \
+ _ (PACKET_TOO_SHORT, "Packet too short")
+
+typedef enum
+{
+#define _(sym, str) PVTI_INPUT_ERROR_##sym,
+ foreach_pvti_input_error
+#undef _
+ PVTI_INPUT_N_ERROR,
+} pvti_input_error_t;
+
+typedef enum
+{
+ PVTI_INPUT_NEXT_DROP,
+ PVTI_INPUT_NEXT_IP4_INPUT,
+ PVTI_INPUT_NEXT_IP6_INPUT,
+ PVTI_INPUT_NEXT_PUNT,
+ PVTI_INPUT_N_NEXT,
+} pvti_input_next_t;
+
+#endif // pvti_input_h
diff --git a/src/plugins/pvti/output-main.c b/src/plugins/pvti/output-main.c
new file mode 100644
index 00000000000..ae4ae5f8e98
--- /dev/null
+++ b/src/plugins/pvti/output-main.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pvti/output.h>
+
+/* packet trace format function */
+static u8 *
+format_pvti_output_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pvti_output_trace_t *t = va_arg (*args, pvti_output_trace_t *);
+
+ u32 indent = format_get_indent (s);
+ s =
+ format (s, "PVTI-OUT(%d): sw_if_index %d, next index %d, underlay_mtu %d,",
+ t->trace_type, t->sw_if_index, t->next_index, t->underlay_mtu);
+ s = format (s, "\n%U stream_index %d, bi0_max_current_length %d, tx_seq %d",
+ format_white_space, indent, t->stream_index,
+ t->bi0_max_current_length, t->tx_seq);
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_ip_adjacency_packet_data, t->packet_data,
+ sizeof (t->packet_data));
+
+ return s;
+}
+
+vlib_node_registration_t pvti_output_node;
+
+static char *pvti_output_error_strings[] = {
+#define _(sym, string) string,
+ foreach_pvti_output_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (pvti4_output_node) =
+{
+ .name = "pvti4-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(pvti_output_error_strings),
+ .error_strings = pvti_output_error_strings,
+
+ .n_next_nodes = PVTI_OUTPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_OUTPUT_NEXT_DROP] = "error-drop",
+ [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
+ [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ },
+
+};
+VLIB_REGISTER_NODE (pvti6_output_node) =
+{
+ .name = "pvti6-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(pvti_output_error_strings),
+ .error_strings = pvti_output_error_strings,
+
+ .n_next_nodes = PVTI_OUTPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_OUTPUT_NEXT_DROP] = "error-drop",
+ [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
+ [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ },
+
+};
diff --git a/src/plugins/pvti/output.c b/src/plugins/pvti/output.c
new file mode 100644
index 00000000000..1939c6f585a
--- /dev/null
+++ b/src/plugins/pvti/output.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+#include <pvti/output.h>
+
+static_always_inline u32
+ip6_vtcfl (u8 stream_index)
+{
+ u32 vtcfl = 0x6 << 28;
+ vtcfl |= stream_index;
+
+ return (clib_host_to_net_u32 (vtcfl));
+}
+
+always_inline vlib_buffer_t *
+pvti_alloc_new_tx_buffer (vlib_main_t *vm)
+{
+ u32 bi0 = INDEX_INVALID;
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ return 0;
+ }
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ b0->current_data = 0;
+ b0->current_length = 0;
+ return b0;
+}
+
+always_inline bool
+pvti_find_or_try_create_tx_peer (vlib_main_t *vm, pvti_per_thread_data_t *ptd,
+ pvti_if_t *pvti_if0, ip_address_t *remote_ip,
+ u16 remote_port, u32 *out_index)
+{
+
+ pvti_tx_peer_t *peer;
+ pool_foreach (peer, ptd->tx_peers)
+ {
+ if (peer->remote_port == remote_port &&
+ 0 == ip_address_cmp (remote_ip, &peer->remote_ip))
+ {
+ if (peer->deleted)
+ {
+ // Bad luck, the peer has been deleted.
+ u32 boi0 = vlib_get_buffer_index (vm, peer->bo0);
+ if (peer->bo0)
+ {
+ vlib_buffer_free (vm, &boi0, 1);
+ }
+ clib_memset (peer, 0xca, sizeof (*peer));
+ pool_put (ptd->tx_peers, peer);
+ continue;
+ }
+ *out_index = peer - ptd->tx_peers;
+ return 1;
+ }
+ }
+
+ ip_address_family_t dst_ver = ip_addr_version (&pvti_if0->remote_ip);
+
+ u16 pvti_encap_overhead = (dst_ver == AF_IP6) ?
+ sizeof (pvti_ip6_encap_header_t) :
+ sizeof (pvti_ip4_encap_header_t);
+
+ u16 pvti_packet_overhead =
+ pvti_encap_overhead + sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES;
+
+ ASSERT (pvti_if0->underlay_mtu > pvti_packet_overhead);
+
+ u32 bo0_max_current_length = pvti_if0->underlay_mtu - pvti_packet_overhead;
+
+ vlib_buffer_t *bo0 = pvti_alloc_new_tx_buffer (vm);
+
+ if (!bo0)
+ {
+ return 0;
+ }
+
+ pvti_tx_peer_t new_peer = {
+ .local_ip = pvti_if0->local_ip,
+ .remote_ip = *remote_ip,
+ .local_port = pvti_if0->local_port,
+ .remote_port = remote_port,
+ .underlay_mtu = pvti_if0->underlay_mtu,
+ .underlay_fib_index = pvti_if0->underlay_fib_index,
+ .bo0_max_current_length = bo0_max_current_length,
+ .pvti_if_index = pvti_if_get_index (pvti_if0),
+ .deleted = 0,
+ .bo0 = bo0,
+ .chunk_count = 0,
+ .reass_chunk_count = 0,
+ .current_tx_seq = 42,
+ };
+
+ pvti_tx_peer_t *tx_new_peer;
+ pool_get (ptd->tx_peers, tx_new_peer);
+
+ *tx_new_peer = new_peer;
+ *out_index = tx_new_peer - ptd->tx_peers;
+ return 1;
+}
+
+always_inline bool
+pvti_try_get_tx_peer_index (vlib_main_t *vm, pvti_per_thread_data_t *ptd,
+ pvti_if_t *pvti_if0, vlib_buffer_t *b0,
+ bool is_ip6, u32 *out_index)
+{
+ if (pvti_if0->peer_address_from_payload)
+ {
+ ip_address_t remote_ip = { 0 };
+ if (is_ip6)
+ {
+ ip6_header_t *ip6 = vlib_buffer_get_current (b0);
+ ip_address_set (&remote_ip, &ip6->dst_address, AF_IP6);
+ }
+ else
+ {
+ ip4_header_t *ip4 = vlib_buffer_get_current (b0);
+ ip_address_set (&remote_ip, &ip4->dst_address, AF_IP4);
+ }
+ return pvti_find_or_try_create_tx_peer (
+ vm, ptd, pvti_if0, &remote_ip, pvti_if0->remote_port, out_index);
+ }
+ else
+ {
+ return pvti_find_or_try_create_tx_peer (
+ vm, ptd, pvti_if0, &pvti_if0->remote_ip, pvti_if0->remote_port,
+ out_index);
+ }
+ /* not reached */
+}
+
+always_inline void
+pvti_finalize_chunk (pvti_tx_peer_t *tx_peer,
+ pvti_chunk_header_t *chunk_header, u8 *tail,
+ bool is_reassembly_chunk)
+{
+ clib_memset (chunk_header, 0xab, sizeof (pvti_chunk_header_t));
+ chunk_header->total_chunk_length =
+ clib_host_to_net_u16 (tail - (u8 *) chunk_header);
+ tx_peer->chunk_count++;
+ if (is_reassembly_chunk)
+ {
+ tx_peer->reass_chunk_count++;
+ }
+}
+
+always_inline pvti_output_next_t
+encap_pvti_buffer_ip46 (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_tx_peer_t *tx_peer, int is_ip6)
+{
+ ip_address_family_t src_ver = ip_addr_version (&tx_peer->local_ip);
+ ip_address_family_t dst_ver = ip_addr_version (&tx_peer->remote_ip);
+ u8 stream_index = 0;
+
+ ASSERT (src_ver == dst_ver);
+ bool is_ip6_encap = (AF_IP6 == src_ver);
+
+ vlib_buffer_t *b0 = tx_peer->bo0;
+ vlib_buffer_advance (b0,
+ -(sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES));
+
+ pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0);
+ clib_memset (pvti0, 0xca, sizeof (*pvti0) + PVTI_ALIGN_BYTES);
+ pvti0->pad_bytes = PVTI_ALIGN_BYTES;
+
+ pvti0->seq = clib_host_to_net_u32 (tx_peer->current_tx_seq);
+ pvti0->stream_index = stream_index;
+ pvti0->reass_chunk_count = tx_peer->reass_chunk_count;
+ pvti0->chunk_count = tx_peer->chunk_count;
+ pvti0->mandatory_flags_mask = 0;
+ pvti0->flags_value = 0;
+
+ if (is_ip6_encap)
+ {
+ vlib_buffer_advance (b0, -(sizeof (pvti_ip6_encap_header_t)));
+ if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE)
+ {
+ // undo the change
+ vlib_buffer_advance (b0, (sizeof (pvti_ip6_encap_header_t)));
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE];
+ return PVTI_OUTPUT_NEXT_DROP;
+ }
+ pvti_ip6_encap_header_t *ve = vlib_buffer_get_current (b0);
+
+ ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port);
+ ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port);
+ ve->udp.length = clib_host_to_net_u16 (
+ b0->current_length - offsetof (pvti_ip6_encap_header_t, udp));
+ ve->udp.checksum = 0;
+
+ ve->ip6.ip_version_traffic_class_and_flow_label =
+ ip6_vtcfl (stream_index);
+ ve->ip6.payload_length = ve->udp.length;
+ ve->ip6.protocol = 17;
+ ve->ip6.hop_limit = 128;
+ ip_address_copy_addr (&ve->ip6.src_address, &tx_peer->local_ip);
+ ip_address_copy_addr (&ve->ip6.dst_address, &tx_peer->remote_ip);
+ }
+ else
+ {
+ vlib_buffer_advance (b0, -(sizeof (pvti_ip4_encap_header_t)));
+ if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE)
+ {
+ // undo the change
+ vlib_buffer_advance (b0, (sizeof (pvti_ip4_encap_header_t)));
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE];
+ return PVTI_OUTPUT_NEXT_DROP;
+ }
+ pvti_ip4_encap_header_t *ve = vlib_buffer_get_current (b0);
+
+ ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port);
+ ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port);
+ ve->udp.length = clib_host_to_net_u16 (
+ b0->current_length - offsetof (pvti_ip4_encap_header_t, udp));
+ ve->udp.checksum = 0;
+
+ ve->ip4.ip_version_and_header_length = 0x45;
+ ve->ip4.tos = 0;
+ ve->ip4.length = clib_host_to_net_u16 (b0->current_length);
+ ve->ip4.fragment_id =
+ clib_host_to_net_u16 (tx_peer->current_tx_seq & 0xffff);
+ ve->ip4.flags_and_fragment_offset = 0;
+ ve->ip4.ttl = 128;
+ ve->ip4.protocol = 17;
+
+ ve->ip4.dst_address.as_u32 = ip_addr_v4 (&tx_peer->remote_ip).data_u32;
+ ve->ip4.src_address.as_u32 = ip_addr_v4 (&tx_peer->local_ip).data_u32;
+ ve->ip4.checksum = ip4_header_checksum (&ve->ip4);
+ }
+
+ // This is important, if not reset, causes a crash
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = tx_peer->underlay_fib_index;
+
+ // vnet_buffer (b0)->oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
+ return is_ip6_encap ? PVTI_OUTPUT_NEXT_IP6_LOOKUP :
+ PVTI_OUTPUT_NEXT_IP4_LOOKUP;
+}
+
+always_inline void
+pvti_enqueue_tx_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, vlib_buffer_t *b0,
+ u16 next0, u8 stream_index, pvti_tx_peer_t *tx_peer)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ tx_peer->is_bo0_traced))
+ {
+ if (PREDICT_TRUE (
+ vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0)))
+ {
+
+ pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->underlay_mtu = tx_peer->underlay_mtu;
+ t->stream_index = stream_index;
+ t->trace_type = 1;
+ t->bi0_max_current_length = tx_peer->bo0_max_current_length;
+ t->tx_seq = tx_peer->current_tx_seq;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ }
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+ vec_add1 (ptd->pending_tx_buffers, bi0);
+ vec_add1 (ptd->pending_tx_nexts, next0);
+}
+
+always_inline void
+pvti_enqueue_tx_drop_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, vlib_buffer_t *b0,
+ u8 stream_index)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = PVTI_OUTPUT_NEXT_DROP;
+ t->stream_index = stream_index;
+ t->trace_type = 0;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+ vec_add1 (ptd->pending_tx_buffers, bi0);
+ vec_add1 (ptd->pending_tx_nexts, PVTI_OUTPUT_NEXT_DROP);
+}
+
+always_inline bool
+pvti_flush_peer_and_recharge (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, u32 tx_peer_index,
+ u8 stream_index, const bool is_ip6)
+{
+ pvti_tx_peer_t *tx_peer = pool_elt_at_index (ptd->tx_peers, tx_peer_index);
+ u16 next0 = encap_pvti_buffer_ip46 (vm, node, tx_peer, is_ip6);
+
+ pvti_enqueue_tx_and_trace (vm, node, ptd, tx_peer->bo0, next0, stream_index,
+ tx_peer);
+
+ tx_peer->bo0 = pvti_alloc_new_tx_buffer (vm);
+ tx_peer->reass_chunk_count = 0;
+ tx_peer->chunk_count = 0;
+ tx_peer->current_tx_seq++;
+
+ return 1;
+}
+
+always_inline u16
+pvti_output_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, const bool is_ip6)
+{
+ pvti_main_t *pvm = &pvti_main;
+
+ u32 n_left_from, *from;
+ u32 pkts_encapsulated = 0;
+ u32 pkts_processed = 0;
+ u32 pkts_chopped = 0;
+ u32 pkts_overflow = 0;
+ u32 pkts_overflow_cantfit = 0;
+
+ bool is_node_traced = (node->flags & VLIB_NODE_FLAG_TRACE) ? 1 : 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ u8 stream_index = pvti_get_stream_index (is_ip6);
+
+ u32 thread_index = vlib_get_thread_index ();
+ pvti_per_thread_data_t *ptd =
+ vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index);
+
+ vlib_buffer_t *ibufs[VLIB_FRAME_SIZE], **ib = ibufs;
+
+ vlib_get_buffers (vm, from, ibufs, n_left_from);
+
+ n_left_from = frame->n_vectors;
+ while (1 && n_left_from > 0)
+ {
+ n_left_from -= 1;
+ vlib_buffer_t *b0 = ib[0];
+ ib++;
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+ bool is_b0_traced =
+ is_node_traced && ((b0->flags & VLIB_BUFFER_IS_TRACED) ? 1 : 0);
+ pkts_processed += 1;
+
+ u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ u32 pvti_index0 = pvti_if_find_by_sw_if_index (sw_if_index0);
+ if (pvti_index0 == INDEX_INVALID)
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_PEER];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index);
+ continue;
+ }
+ pvti_if_t *pvti_if0 = pvti_if_get (pvti_index0);
+ u32 tx_peer_index;
+ if (!pvti_try_get_tx_peer_index (vm, ptd, pvti_if0, b0, is_ip6,
+ &tx_peer_index))
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_MAKE_PEER];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index);
+ continue;
+ }
+ pvti_tx_peer_t *tx_peer = &ptd->tx_peers[tx_peer_index];
+
+ u32 b0_len = vlib_buffer_length_in_chain (vm, b0);
+ u32 total_chunk_len = sizeof (pvti_chunk_header_t) + b0_len;
+
+ if (tx_peer->bo0_max_current_length >=
+ tx_peer->bo0->current_length + total_chunk_len)
+ {
+ /* Happy case, we can fit the entire new chunk */
+ pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit (
+ tx_peer->bo0, sizeof (pvti_chunk_header_t));
+ u8 *tail = vlib_buffer_put_uninit (tx_peer->bo0, b0_len);
+ vlib_buffer_t *b0_curr;
+ b0_curr = b0;
+ while (b0_len > 0)
+ {
+ clib_memcpy (tail, vlib_buffer_get_current (b0_curr),
+ b0_curr->current_length);
+ tail += b0_curr->current_length;
+ b0_len -= b0_curr->current_length;
+ ASSERT ((b0_len == 0) ||
+ (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT));
+ if (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer);
+ }
+ }
+ tx_peer->is_bo0_traced |= is_b0_traced;
+ pvti_finalize_chunk (tx_peer, chunk_header, tail, false);
+ }
+ else
+ {
+ bool is_reassembly = false;
+ /* FIXME: here, flush a packet if we want to avoid fragmenting it */
+#define PVTI_TINY_PACKET_SZ 20
+ int threshold_len =
+ sizeof (pvti_chunk_header_t) + PVTI_TINY_PACKET_SZ;
+
+ /* Can we fit anything meaningful into bo0 ? if not - flush */
+ if (tx_peer->bo0_max_current_length <=
+ tx_peer->bo0->current_length + threshold_len)
+ {
+ if (!pvti_flush_peer_and_recharge (vm, node, ptd, tx_peer_index,
+ stream_index, is_ip6))
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE0];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0,
+ stream_index);
+ continue;
+ }
+ pkts_encapsulated += 1;
+ }
+
+ pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit (
+ tx_peer->bo0, sizeof (pvti_chunk_header_t));
+
+ u8 *tail;
+ vlib_buffer_t *b0_curr;
+ /* append the chained buffers and flush as necessary */
+ b0_curr = b0;
+
+ int curr_b0_start_offset = 0;
+
+ while (b0_len > 0)
+ {
+ ASSERT (tx_peer->bo0_max_current_length >
+ tx_peer->bo0->current_length);
+ int copy_len =
+ clib_min (b0_curr->current_length - curr_b0_start_offset,
+ tx_peer->bo0_max_current_length -
+ tx_peer->bo0->current_length);
+ tail = vlib_buffer_put_uninit (tx_peer->bo0, copy_len);
+ clib_memcpy (tail,
+ (u8 *) vlib_buffer_get_current (b0_curr) +
+ curr_b0_start_offset,
+ copy_len);
+ tail += copy_len;
+ b0_len -= copy_len;
+ // Advance the start offset or reset it if we copied the entire
+ // block
+ curr_b0_start_offset =
+ curr_b0_start_offset + copy_len == b0_curr->current_length ?
+ 0 :
+ curr_b0_start_offset + copy_len;
+ ASSERT ((b0_len == 0) || (curr_b0_start_offset > 0) ||
+ (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT));
+ if (curr_b0_start_offset > 0)
+ {
+ pvti_finalize_chunk (tx_peer, chunk_header, tail,
+ is_reassembly);
+ tx_peer->is_bo0_traced |= is_b0_traced;
+ if (!pvti_flush_peer_and_recharge (
+ vm, node, ptd, tx_peer_index, stream_index, is_ip6))
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE1];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0,
+ stream_index);
+ continue;
+ }
+ pkts_encapsulated += 1;
+ /* next chunk(s) will be reassembly until the next block */
+ is_reassembly = true;
+ chunk_header = vlib_buffer_put_uninit (
+ tx_peer->bo0, sizeof (pvti_chunk_header_t));
+ }
+ else
+ {
+ if ((b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer);
+ }
+ else
+ {
+ pvti_finalize_chunk (tx_peer, chunk_header, tail,
+ is_reassembly);
+ tx_peer->is_bo0_traced |= is_b0_traced;
+ }
+ }
+ }
+ }
+ vlib_buffer_free_one (vm, bi0);
+ }
+
+ int i;
+ for (i = 0; i < vec_len (ptd->tx_peers); i++)
+ {
+ if (ptd->tx_peers[i].chunk_count)
+ {
+ pvti_flush_peer_and_recharge (vm, node, ptd, i, stream_index,
+ is_ip6);
+ pkts_encapsulated += 1;
+ }
+ }
+
+ vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_tx_buffers,
+ &ptd->pending_tx_nexts,
+ vec_len (ptd->pending_tx_nexts));
+ vec_reset_length (ptd->pending_tx_buffers);
+ vec_reset_length (ptd->pending_tx_nexts);
+
+ vlib_node_increment_counter (
+ vm, node->node_index, PVTI_OUTPUT_ERROR_ENCAPSULATED, pkts_encapsulated);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_OUTPUT_ERROR_PROCESSED, pkts_processed);
+ vlib_node_increment_counter (vm, node->node_index, PVTI_OUTPUT_ERROR_CHOPPED,
+ pkts_chopped);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_OUTPUT_ERROR_OVERFLOW, pkts_overflow);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_OUTPUT_ERROR_OVERFLOW_CANTFIT,
+ pkts_overflow_cantfit);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (pvti4_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_output_node_common (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (pvti6_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_output_node_common (vm, node, frame, 1);
+}
diff --git a/src/plugins/pvti/output.h b/src/plugins/pvti/output.h
new file mode 100644
index 00000000000..95e78ba9720
--- /dev/null
+++ b/src/plugins/pvti/output.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_output_h__
+#define __included_pvti_output_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 tx_seq;
+ u16 underlay_mtu;
+ u16 bi0_max_current_length;
+ u8 stream_index;
+ u8 trace_type;
+ u8 packet_data[96];
+} pvti_output_trace_t;
+
+#define foreach_pvti_output_error \
+ _ (NONE, "No error") \
+ _ (PROCESSED, "Packets processed") \
+ _ (ENCAPSULATED, "Packets encapsulated") \
+ _ (PEER, "No peer found") \
+ _ (MAKE_PEER, "Could not make peer") \
+ _ (RECHARGE0, "Could not recharge 0") \
+ _ (RECHARGE1, "Could not recharge 1") \
+ _ (NO_PRE_SPACE, "Not enought pre-data space") \
+ _ (CHOPPED, "Packets chopped") \
+ _ (OVERFLOW, "Packets overflowed") \
+ _ (OVERFLOW_CANTFIT, "Packets overflowed and cant fit excess")
+
+typedef enum
+{
+#define _(sym, str) PVTI_OUTPUT_ERROR_##sym,
+ foreach_pvti_output_error
+#undef _
+ PVTI_OUTPUT_N_ERROR,
+} pvti_output_error_t;
+
+typedef enum
+{
+ PVTI_INDEPENDENT_CHUNK = 0,
+ PVTI_REASS_CHUNK,
+} pvti_chunk_type_t;
+
+#define MAX_CURR_LEN_UNKNOWN 0xffff
+
+typedef enum
+{
+ PVTI_OUTPUT_NEXT_DROP,
+ PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT,
+ PVTI_OUTPUT_NEXT_IP4_LOOKUP,
+ PVTI_OUTPUT_NEXT_IP6_LOOKUP,
+ PVTI_OUTPUT_N_NEXT,
+} pvti_output_next_t;
+
+#endif // pvti_output_h
diff --git a/src/plugins/pvti/pvti.api b/src/plugins/pvti/pvti.api
new file mode 100644
index 00000000000..859ed1ab6b0
--- /dev/null
+++ b/src/plugins/pvti/pvti.api
@@ -0,0 +1,111 @@
+/*
+ * pvti.api - binary API skeleton
+ *
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file pvti.api
+ * @brief VPP control-plane API messages.
+ *
+ * This file defines VPP control-plane binary API messages which are generally
+ * called through a shared memory interface.
+ */
+
+/* Version and type recitations */
+
+option version = "0.0.1";
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+
+/** \brief A composite type uniquely defining a PVTI tunnel.
+ @param sw_if_index - ignored on create/delete, present in details.
+ @param src_ip - Source IP address
+ @param src_port - Source UDP port
+ @param dst_ip - Destination IP address
+ @param dst_port - Destination UDP port
+ @param underlay_mtu - Underlay MTU for packet splitting/coalescing
+ @param underlay_fib_index - Underlay FIB index to be used after encap
+*/
+typedef pvti_tunnel
+{
+ vl_api_interface_index_t sw_if_index;
+ vl_api_address_t local_ip;
+ u16 local_port;
+ vl_api_address_t remote_ip;
+ bool peer_address_from_payload;
+ u16 remote_port;
+ u16 underlay_mtu;
+ u32 underlay_fib_index;
+};
+
+
+/** @brief API to enable / disable pvti on an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 to enable, 0 to disable the feature
+ @param sw_if_index - interface handle
+*/
+
+define pvti_interface_create
+{
+ option status="in_progress";
+
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+ vl_api_pvti_tunnel_t interface;
+};
+
+define pvti_interface_create_reply
+{
+ option status="in_progress";
+ u32 context;
+ i32 retval;
+
+ /* Index for the newly created interface */
+ vl_api_interface_index_t sw_if_index;
+};
+
+autoreply define pvti_interface_delete {
+ option status="in_progress";
+
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ vl_api_interface_index_t sw_if_index;
+};
+
+
+define pvti_interface_dump
+{
+ option status="in_progress";
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
+define pvti_interface_details
+{
+ option status="in_progress";
+ u32 context;
+ vl_api_pvti_tunnel_t interface;
+};
+
+
diff --git a/src/plugins/pvti/pvti.c b/src/plugins/pvti/pvti.c
new file mode 100644
index 00000000000..524eabc6f3f
--- /dev/null
+++ b/src/plugins/pvti/pvti.c
@@ -0,0 +1,481 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/fib/fib_table.h>
+#include <pvti/pvti.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <stdbool.h>
+
+#include <pvti/pvti.api_enum.h>
+#include <pvti/pvti.api_types.h>
+
+#include <pvti/pvti_if.h>
+
+#define REPLY_MSG_ID_BASE pmp->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+#include <vnet/ip/ip_format_fns.h>
+
+pvti_main_t pvti_main;
+
+u8 *
+format_pvti_tx_peer_ptr (u8 *s, va_list *args)
+{
+ pvti_tx_peer_t *peer = va_arg (*args, pvti_tx_peer_t *);
+
+ s = format (
+ s,
+ "[%p]%s local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d "
+ "pvti_idx:%d b0_max_clen:%d cseq:%d chunk_count:%d reass_chunk_count:%d",
+ peer, peer->deleted ? " DELETED" : "", format_ip46_address,
+ &peer->local_ip, IP46_TYPE_ANY, peer->local_port, format_ip46_address,
+ &peer->remote_ip, IP46_TYPE_ANY, peer->remote_port, peer->underlay_mtu,
+ peer->underlay_fib_index, peer->pvti_if_index,
+ peer->bo0_max_current_length, peer->current_tx_seq, peer->chunk_count,
+ peer->reass_chunk_count);
+
+ return (s);
+}
+
+u8 *
+format_pvti_rx_peer_ptr (u8 *s, va_list *args)
+{
+ pvti_rx_peer_t *peer = va_arg (*args, pvti_rx_peer_t *);
+
+ s = format (s, "[%p]%s local:%U:%d remote:%U:%d pvti_idx:%d", peer,
+ peer->deleted ? " DELETED" : "", format_ip46_address,
+ &peer->local_ip, IP46_TYPE_ANY, peer->local_port,
+ format_ip46_address, &peer->remote_ip, IP46_TYPE_ANY,
+ peer->remote_port, peer->pvti_if_index);
+
+ return (s);
+}
+
+void
+pvti_verify_initialized (pvti_main_t *pvm)
+{
+ if (!pvm->is_initialized)
+ {
+ const int n_threads = vlib_get_n_threads ();
+ vec_validate (pvm->per_thread_data[0], n_threads - 1);
+ vec_validate (pvm->per_thread_data[1], n_threads - 1);
+ pvm->is_initialized = 1;
+ }
+}
+
+void
+vnet_int_pvti_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
+{
+ pvti_main_t *pvm = &pvti_main;
+
+ if (pool_is_free_index (pvm->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return;
+
+ pvti_verify_initialized (pvm);
+
+ is_enable = !!is_enable;
+
+ if (is_ip6)
+ {
+ if (clib_bitmap_get (pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index) !=
+ is_enable)
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-pvti-bypass",
+ sw_if_index, is_enable, 0, 0);
+ pvm->bm_ip6_bypass_enabled_by_sw_if = clib_bitmap_set (
+ pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index, is_enable);
+ }
+ }
+ else
+ {
+ if (clib_bitmap_get (pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index) !=
+ is_enable)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-pvti-bypass",
+ sw_if_index, is_enable, 0, 0);
+ pvm->bm_ip4_bypass_enabled_by_sw_if = clib_bitmap_set (
+ pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index, is_enable);
+ }
+ }
+}
+
+static clib_error_t *
+set_ip_pvti_bypass (u32 is_ip6, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, is_enable;
+
+ sw_if_index = ~0;
+ is_enable = 1;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_enable = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ vnet_int_pvti_bypass_mode (sw_if_index, is_ip6, is_enable);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+set_ip4_pvti_bypass (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ return set_ip_pvti_bypass (0, input, cmd);
+}
+
+VLIB_CLI_COMMAND (set_interface_ip_pvti_bypass_command, static) = {
+ .path = "set interface ip pvti-bypass",
+ .function = set_ip4_pvti_bypass,
+ .short_help = "set interface ip pvti-bypass <interface> [del]",
+};
+
+static clib_error_t *
+set_ip6_pvti_bypass (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ return set_ip_pvti_bypass (1, input, cmd);
+}
+
+VLIB_CLI_COMMAND (set_interface_ip6_pvti_bypass_command, static) = {
+ .path = "set interface ip6 pvti-bypass",
+ .function = set_ip6_pvti_bypass,
+ .short_help = "set interface ip6 pvti-bypass <interface> [del]",
+};
+
+static clib_error_t *
+pvti_interface_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+
+ // pvti_main_t * pmp = &pvti_main;
+ u32 sw_if_index = ~0;
+ int rv = 0;
+ ip_address_t peer_ip = { 0 };
+ ip_address_t local_ip = { 0 };
+ u32 peer_port = 0;
+ u32 local_port = 12345;
+ u32 underlay_mtu = 1500;
+ u32 underlay_fib_index = ~0;
+ u32 underlay_table_id = ~0;
+ pvti_peer_address_method_t peer_address_method = PVTI_PEER_ADDRESS_FIXED;
+ bool peer_set = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "peer %U %d %d", unformat_ip_address, &peer_ip,
+ &peer_port, &local_port))
+ {
+ peer_set = 1;
+ }
+ else if (unformat (line_input, "underlay-mtu %d", &underlay_mtu))
+ {
+ // MTU set
+ }
+ else if (unformat (line_input, "local-ip %U", unformat_ip_address,
+ &local_ip))
+ {
+ // local IP set
+ }
+ else if (unformat (line_input, "underlay-fib %d", &underlay_fib_index))
+ {
+ // underlay fib set
+ }
+ else if (unformat (line_input, "peer-address-from-payload"))
+ {
+ peer_address_method = PVTI_PEER_ADDRESS_FROM_PAYLOAD;
+ }
+ else if (unformat (line_input, "underlay-table %d", &underlay_table_id))
+ {
+ fib_protocol_t fib_proto = FIB_PROTOCOL_IP4;
+ if (peer_ip.version == AF_IP6)
+ {
+ fib_proto = FIB_PROTOCOL_IP6;
+ }
+ u32 fib_index = fib_table_find (fib_proto, underlay_table_id);
+
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d",
+ underlay_table_id);
+ goto done;
+ }
+ underlay_fib_index = fib_index;
+ }
+ else
+ break;
+ }
+ if (!peer_set)
+ {
+ error = clib_error_return (0, "Please specify a peer...");
+ goto done;
+ }
+
+ rv = pvti_if_create (&local_ip, local_port, &peer_ip, peer_port,
+ peer_address_method, underlay_mtu, underlay_fib_index,
+ &sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ error = clib_error_return (0, "Invalid interface");
+ break;
+
+ default:
+ error = clib_error_return (0, "pvti_if_create returned %d", rv);
+ }
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+static clib_error_t *
+pvti_interface_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ // pvti_main_t * pmp = &pvti_main;
+ u32 sw_if_index = ~0;
+ int rv = 0;
+ bool if_index_set = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "if-index %d", &sw_if_index))
+ {
+ if_index_set = 1;
+ }
+ else
+ break;
+ }
+ if (!if_index_set)
+ return clib_error_return (0, "Please specify a sw_if_index...");
+
+ rv = pvti_if_delete (sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "Invalid interface");
+ break;
+
+ default:
+ return clib_error_return (0, "pvti_if_delete returned %d", rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (pvti_interface_create_command, static) = {
+ .path = "pvti interface create",
+ .short_help =
+ "pvti interface create peer <remote-ip> <remote-port> <local-port> [ "
+ "local-ip <ip-addr> ][ underlay-mtu <MTU>][underlay-table "
+ "<table-index>][inderlay-fib <fib-index>]",
+ .function = pvti_interface_create_command_fn,
+};
+
+VLIB_CLI_COMMAND (pvti_interface_delete_command, static) = {
+ .path = "pvti interface delete",
+ .short_help = "pvti interface delete if-index <sw-ifindex>",
+ .function = pvti_interface_delete_command_fn,
+};
+
+static clib_error_t *
+pvti_show_interface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ pvti_if_t *pvti_if;
+ vec_foreach (pvti_if, pvti_main.if_pool)
+ {
+ int index = pvti_if - pvti_main.if_pool;
+ vlib_cli_output (vm, "%U", format_pvti_if, index);
+ };
+ return 0;
+}
+
+static clib_error_t *
+pvti_show_tx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ pvti_per_thread_data_t *ptd;
+ int is_ip6;
+ for (is_ip6 = 0; is_ip6 <= 1; is_ip6++)
+ {
+ vec_foreach (ptd, pvti_main.per_thread_data[is_ip6])
+ {
+ vlib_cli_output (vm, "thread %d (%s)",
+ ptd - pvti_main.per_thread_data[is_ip6],
+ is_ip6 ? "IPv6" : "IPv4");
+ pvti_tx_peer_t *peer;
+ vec_foreach (peer, ptd->tx_peers)
+ {
+ vlib_cli_output (vm, " %U", format_pvti_tx_peer_ptr, peer);
+ }
+ }
+ }
+ return 0;
+}
+
+static clib_error_t *
+pvti_show_rx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ pvti_per_thread_data_t *ptd;
+ int is_ip6;
+ for (is_ip6 = 0; is_ip6 <= 1; is_ip6++)
+ {
+ vec_foreach (ptd, pvti_main.per_thread_data[is_ip6])
+ {
+ vlib_cli_output (vm, "thread %d (%s)",
+ ptd - pvti_main.per_thread_data[is_ip6],
+ is_ip6 ? "IPv6" : "IPv4");
+ pvti_rx_peer_t *peer;
+ vec_foreach (peer, ptd->rx_peers)
+ {
+ vlib_cli_output (vm, " %U", format_pvti_rx_peer_ptr, peer);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (pvti_show_interface_command, static) = {
+ .path = "show pvti interface",
+ .short_help = "show pvti interface",
+ .function = pvti_show_interface_command_fn,
+};
+
+VLIB_CLI_COMMAND (pvti_show_tx_peers_command, static) = {
+ .path = "show pvti tx peers",
+ .short_help = "show pvti tx peers",
+ .function = pvti_show_tx_peers_command_fn,
+};
+
+VLIB_CLI_COMMAND (pvti_show_rx_peers_command, static) = {
+ .path = "show pvti rx peers",
+ .short_help = "show pvti rx peers",
+ .function = pvti_show_rx_peers_command_fn,
+};
+
+void pvti_api_init ();
+
+VNET_FEATURE_INIT (pvti4_bypass, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-pvti-bypass",
+ .runs_before = 0,
+};
+
+VNET_FEATURE_INIT (pvti6_bypass, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-pvti-bypass",
+ .runs_before = 0,
+};
+
+static clib_error_t *
+pvti_early_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ clib_warning ("early config pvti");
+ u8 *runs_before = 0;
+ int rbi = 0;
+ if (vec_len (vnet_feat_pvti4_bypass.runs_before) == 0)
+ {
+ rbi = 0;
+ }
+ else
+ {
+ rbi = vec_len (vnet_feat_pvti4_bypass.runs_before) - 1;
+ }
+ vec_validate (vnet_feat_pvti4_bypass.runs_before, rbi);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "runs-before %v", &runs_before))
+ {
+ vec_add1 (runs_before, 0);
+ vnet_feat_pvti4_bypass.runs_before[rbi] = (char *) runs_before;
+ vec_add1 (vnet_feat_pvti4_bypass.runs_before, 0);
+ }
+ else
+ return clib_error_return (0, "unknown input");
+ }
+
+ return NULL;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (pvti_early_config, "pvti");
+
+static clib_error_t *
+pvti_init (vlib_main_t *vm)
+{
+ pvti_main_t *pmp = &pvti_main;
+ clib_error_t *error = 0;
+ clib_warning ("pvti init");
+
+ pmp->vlib_main = vm;
+ pmp->vnet_main = vnet_get_main ();
+ pmp->is_initialized = 0;
+
+ pvti_api_init ();
+ return error;
+}
+
+VLIB_INIT_FUNCTION (pvti_init);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Packet Vector Tunnel Interface plugin",
+};
diff --git a/src/plugins/pvti/pvti.h b/src/plugins/pvti/pvti.h
new file mode 100644
index 00000000000..ac097c5ecca
--- /dev/null
+++ b/src/plugins/pvti/pvti.h
@@ -0,0 +1,257 @@
+/*
+ * pvti.h - skeleton vpp engine plug-in header file
+ *
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_h__
+#define __included_pvti_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+
+#define VPP_MAX_THREADS (1 << 8)
+
+#define MAX_RX_STREAMS 256
+
+#define PVTI_ALIGN_BYTES 9
+
+typedef CLIB_PACKED (struct {
+ u32 seq;
+ u8 stream_index; // set to the cpu# on the sending side
+ u8 chunk_count;
+ u8 reass_chunk_count; // number of chunks in the front that are related to
+ // previously started buffer
+ // mandatory_flags_mask highlights which of the flags cause packet drop if
+ // not understood, and which of them can be just ignored.
+ u8 mandatory_flags_mask;
+ u8 flags_value;
+ u8 pad_bytes;
+ u8 pad[0];
+}) pvti_packet_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ udp_header_t udp;
+ // not part of encap header pvti_packet_header_t pv;
+}) pvti_ip4_encap_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6;
+ udp_header_t udp;
+ // not part of encap header pvti_packet_header_t pv;
+}) pvti_ip6_encap_header_t;
+
+typedef CLIB_PACKED (struct {
+ u16 total_chunk_length;
+ // More fragments: this chunk is not the last block fragment
+#define CHUNK_FLAGS_MF (1 << 0)
+ // More blocks: this block has chained blocks that follow
+#define CHUNK_FLAGS_MB (1 << 1)
+ u16 _pad0;
+ u32 _pad1;
+ u8 chunk_data[0];
+}) pvti_chunk_header_t;
+
+typedef struct
+{
+ // a buffer being built from the smaller packets
+ u32 bi0;
+
+ // how big can this buffer grow
+ u32 bi0_max_current_length;
+
+ // how many chunks are already in the buffer
+ u8 chunk_count;
+ // leading reassembly chunk count
+ u8 reass_chunk_count;
+
+ u32 current_tx_seq;
+} pvti_per_tx_stream_data_t;
+
+typedef struct
+{
+ /* The seq# that we last processed */
+ u32 last_rx_seq;
+
+ // a current buffer that is being reassembled
+ u32 rx_bi0;
+ // The root buffer, most of the times == rx_bi0 except in the case of chained
+ // buffers.
+ u32 rx_bi0_first;
+
+ // Next index for dispatch when the reassembly is done
+ u16 rx_next0;
+ // expected totall inner length for the packet
+ u16 rx_expected_inner_length;
+ u16 rx_received_inner_length;
+
+} pvti_per_rx_stream_data_t;
+
+typedef struct
+{
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u16 underlay_mtu;
+ u32 underlay_fib_index;
+
+ u32 pvti_if_index;
+ bool deleted;
+ bool is_bo0_traced;
+
+ u32 bo0_max_current_length;
+
+ u8 chunk_count;
+ u8 reass_chunk_count;
+ u32 current_tx_seq;
+ vlib_buffer_t *bo0;
+
+} pvti_tx_peer_t;
+
+typedef struct
+{
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+
+ pvti_per_rx_stream_data_t rx_streams[MAX_RX_STREAMS];
+
+ u32 pvti_if_index;
+ bool deleted;
+} pvti_rx_peer_t;
+
+typedef struct
+{
+ /* pool of destination-based structures which are used to build the packets
+ */
+ pvti_tx_peer_t *tx_peers;
+
+ /* vector of buffers to send */
+ u32 *pending_tx_buffers;
+ u16 *pending_tx_nexts;
+ /* pool of source-based structures for the remote peers' data tracking
+ */
+ pvti_rx_peer_t *rx_peers;
+
+ /* vector of buffers being decapsulated */
+ u32 *pending_rx_buffers;
+ u16 *pending_rx_nexts;
+
+} pvti_per_thread_data_t;
+
+typedef struct
+{
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u16 underlay_mtu;
+ u32 underlay_fib_index;
+ bool peer_address_from_payload;
+ u64 created_at;
+
+ u32 sw_if_index;
+ u32 hw_if_index;
+
+ // per-stream data for TX
+ pvti_per_tx_stream_data_t tx_streams[256];
+ pvti_per_rx_stream_data_t rx_streams[256];
+
+} pvti_if_t;
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* have we initialized the data structures ? */
+ bool is_initialized;
+
+ /* interface pool */
+ pvti_if_t *if_pool;
+
+ /* if_index in the pool above by sw_if_index */
+ index_t *if_index_by_sw_if_index;
+
+ /* indices by port */
+ index_t **if_indices_by_port;
+
+ /* per-thread data, ip4[0] and ip6[1] */
+ pvti_per_thread_data_t *per_thread_data[2];
+
+ /* on/off switch for the periodic function */
+ u8 periodic_timer_enabled;
+ /* Node index, non-zero if the periodic process has been created */
+ u32 periodic_node_index;
+
+ /* graph node state */
+ uword *bm_ip4_bypass_enabled_by_sw_if;
+ uword *bm_ip6_bypass_enabled_by_sw_if;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ ethernet_main_t *ethernet_main;
+} pvti_main_t;
+
+extern pvti_main_t pvti_main;
+
+extern vlib_node_registration_t pvti_node;
+extern vlib_node_registration_t pvti4_input_node;
+extern vlib_node_registration_t pvti4_output_node;
+extern vlib_node_registration_t pvti6_input_node;
+extern vlib_node_registration_t pvti6_output_node;
+extern vlib_node_registration_t pvti_periodic_node;
+
+always_inline u8
+pvti_get_stream_index (int is_ip6)
+{
+ u32 thread_index = vlib_get_thread_index ();
+
+ ASSERT ((thread_index & 0xffffff80) == 0);
+
+ u8 stream_index = (thread_index & 0x7f) | (is_ip6 ? 0x80 : 0);
+ return stream_index;
+}
+
+/* attempt to get a new buffer */
+always_inline u32
+pvti_get_new_buffer (vlib_main_t *vm)
+{
+ u32 bi0 = INDEX_INVALID;
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ return INDEX_INVALID;
+ }
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ b0->current_data = 0;
+ b0->current_length = 0;
+ return bi0;
+}
+
+/* Periodic function events */
+#define PVTI_EVENT1 1
+#define PVTI_EVENT2 2
+#define PVTI_EVENT_PERIODIC_ENABLE_DISABLE 3
+
+void pvti_create_periodic_process (pvti_main_t *);
+void pvti_verify_initialized (pvti_main_t *pvm);
+
+#endif /* __included_pvti_h__ */
diff --git a/src/plugins/pvti/pvti_if.c b/src/plugins/pvti/pvti_if.c
new file mode 100644
index 00000000000..4f83994a1a4
--- /dev/null
+++ b/src/plugins/pvti/pvti_if.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Copyright (c) 2020 Doc.ai and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/udp/udp.h>
+
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+static u8 *
+format_pvti_if_name (u8 *s, va_list *args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ // wg_if_t *wgi = wg_if_get (dev_instance);
+ return format (s, "pvti%d", dev_instance);
+}
+
+u8 *
+format_pvti_if (u8 *s, va_list *args)
+{
+ index_t pvtii = va_arg (*args, u32);
+ pvti_if_t *pvti_if = pvti_if_get (pvtii);
+
+ s = format (
+ s, "[%d] %U local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d",
+ pvtii, format_vnet_sw_if_index_name, vnet_get_main (),
+ pvti_if->sw_if_index, format_ip46_address, &pvti_if->local_ip,
+ IP46_TYPE_ANY, pvti_if->local_port, format_ip46_address,
+ &pvti_if->remote_ip, IP46_TYPE_ANY, pvti_if->remote_port,
+ pvti_if->underlay_mtu, pvti_if->underlay_fib_index);
+
+ return (s);
+}
+
+index_t
+pvti_if_find_by_sw_if_index (u32 sw_if_index)
+{
+ if (vec_len (pvti_main.if_index_by_sw_if_index) <= sw_if_index)
+ return INDEX_INVALID;
+ u32 ti = pvti_main.if_index_by_sw_if_index[sw_if_index];
+ if (ti == ~0)
+ return INDEX_INVALID;
+
+ return (ti);
+}
+
+index_t
+pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4,
+ u16 remote_port)
+{
+ pvti_if_t *ifc;
+ pool_foreach (ifc, pvti_main.if_pool)
+ {
+ if ((ifc->remote_port == remote_port) &&
+ (ifc->remote_ip.version == AF_IP4) &&
+ ((ifc->remote_ip.ip.ip4.as_u32 == remote_ip4->as_u32) ||
+ ifc->peer_address_from_payload))
+ {
+ return (ifc - pvti_main.if_pool);
+ }
+ }
+ return INDEX_INVALID;
+}
+
+index_t
+pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip6,
+ u16 remote_port)
+{
+ pvti_if_t *ifc;
+ pool_foreach (ifc, pvti_main.if_pool)
+ {
+ if ((ifc->remote_port == remote_port) &&
+ (ifc->remote_ip.version == AF_IP6) &&
+ ((0 == memcmp (&ifc->remote_ip.ip.ip6, remote_ip6,
+ sizeof (*remote_ip6))) ||
+ ifc->peer_address_from_payload))
+ {
+ return (ifc - pvti_main.if_pool);
+ }
+ }
+ return INDEX_INVALID;
+}
+
+index_t
+pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip, u16 remote_port)
+{
+ pvti_if_t *ifc;
+ pool_foreach (ifc, pvti_main.if_pool)
+ {
+ if ((ifc->remote_port == remote_port) &&
+ (ifc->peer_address_from_payload ||
+ (0 == ip_address_cmp (remote_ip, &ifc->remote_ip))))
+ {
+ return (ifc - pvti_main.if_pool);
+ }
+ }
+ return INDEX_INVALID;
+}
+
+static void
+pvti_add_tidx_by_port (index_t t_index, u16 port)
+{
+ pvti_main_t *pvm = &pvti_main;
+ vec_validate_init_empty (pvm->if_indices_by_port, port, NULL);
+ vec_add1 (pvm->if_indices_by_port[port], t_index);
+}
+
+static void
+pvti_del_tidx_by_port (index_t t_index, u16 port)
+{
+ pvti_main_t *pvm = &pvti_main;
+ index_t *ii;
+ if (!pvm->if_indices_by_port)
+ {
+ return;
+ }
+ if (port >= vec_len (pvm->if_indices_by_port))
+ {
+ return;
+ }
+ if (vec_len (pvm->if_indices_by_port[port]) == 0)
+ {
+ ALWAYS_ASSERT (pvm->if_indices_by_port[port] > 0);
+ /* not reached */
+ return;
+ }
+
+ vec_foreach (ii, pvm->if_indices_by_port[port])
+ {
+ if (*ii == t_index)
+ {
+ vec_del1 (pvm->if_indices_by_port[port],
+ pvm->if_indices_by_port[port] - ii);
+ break;
+ }
+ }
+}
+
+static u32
+pvti_get_tunnel_count_by_port (u16 port)
+{
+ pvti_main_t *pvm = &pvti_main;
+ if (!pvm->if_indices_by_port)
+ {
+ return 0;
+ }
+ return vec_len (vec_elt (pvm->if_indices_by_port, port));
+}
+
+static clib_error_t *
+pvti_if_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ // vnet_hw_interface_t *hi;
+ u32 hw_flags;
+
+ // hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hw_flags =
+ (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? VNET_HW_INTERFACE_FLAG_LINK_UP :
+ 0);
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return (NULL);
+}
+
+void
+pvti_if_update_adj (vnet_main_t *vnm, u32 sw_if_index, adj_index_t ai)
+{
+
+ /* Convert any neighbour adjacency that has a next-hop reachable through
+ * the wg interface into a midchain. This is to avoid sending ARP/ND to
+ * resolve the next-hop address via the wg interface. Then, if one of the
+ * peers has matching prefix among allowed prefixes, the midchain will be
+ * updated to the corresponding one.
+ */
+ adj_nbr_midchain_update_rewrite (ai, NULL, NULL, ADJ_FLAG_NONE, NULL);
+
+ // wgii = wg_if_find_by_sw_if_index (sw_if_index);
+ // wg_if_peer_walk (wg_if_get (wgii), wg_peer_if_adj_change, &ai);
+}
+
+VNET_DEVICE_CLASS (pvti_if_device_class) = {
+ .name = "Packet Vectorizer Tunnel",
+ .format_device_name = format_pvti_if_name,
+ .admin_up_down_function = pvti_if_admin_up_down,
+};
+
+VNET_HW_INTERFACE_CLASS (pvti_hw_interface_class) = {
+ .name = "PVTunnel",
+ .update_adjacency = pvti_if_update_adj,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+ // .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
+};
+
+int
+pvti_if_create (ip_address_t *local_ip, u16 local_port,
+ ip_address_t *remote_ip, u16 remote_port,
+ pvti_peer_address_method_t peer_address_method,
+ u16 underlay_mtu, u32 underlay_fib_index, u32 *sw_if_indexp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ pvti_main_t *pvm = &pvti_main;
+ u32 hw_if_index;
+ vnet_hw_interface_t *hi;
+ pvti_verify_initialized (pvm);
+
+ pvti_if_t *pvti_if;
+
+ ASSERT (sw_if_indexp);
+
+ *sw_if_indexp = (u32) ~0;
+
+ pool_get_zero (pvti_main.if_pool, pvti_if);
+ pvti_if->local_ip = *local_ip;
+ pvti_if->local_port = local_port;
+ pvti_if->remote_ip = *remote_ip;
+ if (peer_address_method == PVTI_PEER_ADDRESS_FROM_PAYLOAD)
+ {
+ pvti_if->peer_address_from_payload = 1;
+ }
+ pvti_if->remote_port = remote_port;
+ pvti_if->underlay_mtu = underlay_mtu;
+ pvti_if->underlay_fib_index = underlay_fib_index;
+ pvti_if->created_at = clib_cpu_time_now ();
+
+ /* tunnel index (or instance) */
+ u32 t_idx = pvti_if - pvti_main.if_pool;
+
+ hw_if_index =
+ vnet_register_interface (vnm, pvti_if_device_class.index, t_idx,
+ pvti_hw_interface_class.index, t_idx);
+
+ pvti_if->hw_if_index = hw_if_index;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ pvti_if->sw_if_index = *sw_if_indexp = hi->sw_if_index;
+
+ vec_validate_init_empty (pvm->if_index_by_sw_if_index, hi->sw_if_index,
+ INDEX_INVALID);
+
+ vec_elt (pvm->if_index_by_sw_if_index, hi->sw_if_index) = t_idx;
+ pvti_if_t *pvti_if0 = pool_elt_at_index (pvti_main.if_pool, t_idx);
+ int i;
+ for (i = 0; i < 256; i++)
+ {
+ pvti_if0->tx_streams[i].bi0 = INDEX_INVALID;
+ pvti_if0->tx_streams[i].current_tx_seq = 42;
+
+ pvti_if0->rx_streams[i].rx_bi0 = INDEX_INVALID;
+ pvti_if0->rx_streams[i].rx_bi0_first = INDEX_INVALID;
+ }
+
+ /*
+ int is_ip6 = 0;
+ u32 encap_index = !is_ip6 ?
+ pvti4_output_node.index : pvti6_output_node.index;
+ vnet_set_interface_output_node (vnm, pvti_if->hw_if_index, encap_index);
+ */
+ vnet_set_interface_l3_output_node (vnm->vlib_main, hi->sw_if_index,
+ (u8 *) "pvti4-output");
+
+ pvti_add_tidx_by_port (t_idx, local_port);
+ if (1 == pvti_get_tunnel_count_by_port (local_port))
+ {
+ clib_warning ("Registering local port %d", local_port);
+ udp_register_dst_port (vlib_get_main (), local_port,
+ pvti4_input_node.index, UDP_IP4);
+ udp_register_dst_port (vlib_get_main (), local_port,
+ pvti6_input_node.index, UDP_IP6);
+ }
+ else
+ {
+ clib_warning ("Not registering the port");
+ }
+
+ vnet_hw_interface_set_flags (vnm, pvti_if->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ return 0;
+}
+
+void
+pvti_if_walk (pvti_if_walk_cb_t fn, void *data)
+{
+ index_t pvtii;
+
+ pool_foreach_index (pvtii, pvti_main.if_pool)
+ {
+ if (WALK_STOP == fn (pvtii, data))
+ break;
+ }
+}
+
+int
+pvti_if_delete (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ pvti_main_t *pvm = &pvti_main;
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (hw == 0 || hw->dev_class_index != pvti_if_device_class.index)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pvti_if_t *ifc;
+ bool found = 0;
+ pool_foreach (ifc, pvm->if_pool)
+ {
+ if (ifc->sw_if_index == sw_if_index)
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ {
+ return VNET_API_ERROR_INVALID_VALUE_2;
+ }
+ index_t tidx = ifc - pvm->if_pool;
+
+ u16 local_port = ifc->local_port;
+ pvti_del_tidx_by_port (tidx, local_port);
+ pvm->if_index_by_sw_if_index[sw_if_index] = INDEX_INVALID;
+
+ if (0 == pvti_get_tunnel_count_by_port (local_port))
+ {
+ udp_unregister_dst_port (vlib_get_main (), local_port, 1);
+ udp_unregister_dst_port (vlib_get_main (), local_port, 0);
+ }
+
+ vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index);
+ vnet_delete_hw_interface (vnm, hw->hw_if_index);
+ pool_put (pvti_main.if_pool, ifc);
+
+ /* mark per-thread peers as deleted */
+ pvti_per_thread_data_t *ptd;
+
+ vec_foreach (ptd, pvm->per_thread_data[0])
+ {
+ pvti_tx_peer_t *peer;
+ vec_foreach (peer, ptd->tx_peers)
+ {
+ if (tidx == peer->pvti_if_index)
+ {
+ peer->deleted = 1;
+ }
+ }
+ }
+ vec_foreach (ptd, pvm->per_thread_data[1])
+ {
+ pvti_tx_peer_t *peer;
+ vec_foreach (peer, ptd->tx_peers)
+ {
+ if (tidx == peer->pvti_if_index)
+ {
+ peer->deleted = 1;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/src/plugins/pvti/pvti_if.h b/src/plugins/pvti/pvti_if.h
new file mode 100644
index 00000000000..44bf22ce825
--- /dev/null
+++ b/src/plugins/pvti/pvti_if.h
@@ -0,0 +1,47 @@
+#ifndef PVTI_IF_H
+#define PVTI_IF_H
+
+#include <vnet/interface_funcs.h>
+
+typedef enum
+{
+ PVTI_PEER_ADDRESS_FIXED = 0,
+ PVTI_PEER_ADDRESS_FROM_PAYLOAD
+} pvti_peer_address_method_t;
+
+typedef walk_rc_t (*pvti_if_walk_cb_t) (index_t wgi, void *data);
+void pvti_if_walk (pvti_if_walk_cb_t fn, void *data);
+
+int pvti_if_create (ip_address_t *local_ip, u16 local_port,
+ ip_address_t *remote_ip, u16 remote_port,
+ pvti_peer_address_method_t peer_address_method,
+ u16 underlay_mtu, u32 underlay_fib_index,
+ u32 *sw_if_indexp);
+index_t pvti_if_find_by_sw_if_index (u32 sw_if_index);
+index_t pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4,
+ u16 remote_port);
+index_t pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip4,
+ u16 remote_port);
+
+index_t pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip,
+ u16 remote_port);
+
+int pvti_if_delete (u32 sw_if_index);
+
+u8 *format_pvti_if (u8 *s, va_list *args);
+
+static_always_inline pvti_if_t *
+pvti_if_get (index_t pvtii)
+{
+ if (INDEX_INVALID == pvtii)
+ return (NULL);
+ return (pool_elt_at_index (pvti_main.if_pool, pvtii));
+}
+
+static_always_inline index_t
+pvti_if_get_index (pvti_if_t *pvti_if)
+{
+ return pvti_if - pvti_main.if_pool;
+}
+
+#endif