summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2023-10-17 16:06:26 +0000
committerDamjan Marion <damarion@cisco.com>2023-11-02 13:41:32 +0000
commit38c619115b0399bae8b0dcf66e57e623cc50809c (patch)
tree2d40af83187c2dce0e971328ab7add4d9940ac57 /src
parentd3ef00098cd27e01bb24db15e3440fabbc025aa8 (diff)
dev: new device driver infra
Type: feature Change-Id: I20c56e0d3103624407f18365c2bc1273dea5c199 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src')
-rw-r--r--src/vlib/linux/pci.c48
-rw-r--r--src/vlib/pci/pci.h5
-rw-r--r--src/vnet/CMakeLists.txt14
-rw-r--r--src/vnet/dev/api.c241
-rw-r--r--src/vnet/dev/api.h62
-rw-r--r--src/vnet/dev/cli.c315
-rw-r--r--src/vnet/dev/config.c182
-rw-r--r--src/vnet/dev/counters.c132
-rw-r--r--src/vnet/dev/counters.h128
-rw-r--r--src/vnet/dev/dev.c456
-rw-r--r--src/vnet/dev/dev.h701
-rw-r--r--src/vnet/dev/dev_funcs.h251
-rw-r--r--src/vnet/dev/error.c29
-rw-r--r--src/vnet/dev/errors.h42
-rw-r--r--src/vnet/dev/format.c405
-rw-r--r--src/vnet/dev/handlers.c225
-rw-r--r--src/vnet/dev/log.h23
-rw-r--r--src/vnet/dev/mgmt.h10
-rw-r--r--src/vnet/dev/pci.c447
-rw-r--r--src/vnet/dev/pci.h78
-rw-r--r--src/vnet/dev/port.c678
-rw-r--r--src/vnet/dev/process.c474
-rw-r--r--src/vnet/dev/process.h10
-rw-r--r--src/vnet/dev/queue.c227
-rw-r--r--src/vnet/dev/runtime.c174
-rw-r--r--src/vnet/dev/types.h66
-rw-r--r--src/vnet/ethernet/p2p_ethernet.c5
-rw-r--r--src/vnet/handoff.c2
-rw-r--r--src/vnet/interface_funcs.h4
-rw-r--r--src/vnet/l2/l2_patch.c4
-rw-r--r--src/vnet/span/span.c3
-rw-r--r--src/vppinfra/types.h13
-rw-r--r--src/vppinfra/vec_bootstrap.h5
33 files changed, 5454 insertions, 5 deletions
diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c
index 1a70c568176..69d26fd7d64 100644
--- a/src/vlib/linux/pci.c
+++ b/src/vlib/linux/pci.c
@@ -886,6 +886,27 @@ vlib_pci_register_intx_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
}
clib_error_t *
+vlib_pci_unregister_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ linux_pci_device_t *p = linux_pci_get_device (h);
+ linux_pci_irq_t *irq = &p->intx_irq;
+
+ if (irq->intx_handler == 0)
+ return 0;
+
+ clib_file_del_by_index (&file_main, irq->clib_file_index);
+ if (p->type == LINUX_PCI_DEVICE_TYPE_VFIO)
+ {
+ close (irq->fd);
+ irq->fd = -1;
+ }
+
+ irq->intx_handler = 0;
+
+ return 0;
+}
+
+clib_error_t *
vlib_pci_register_msix_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
u32 start, u32 count,
pci_msix_handler_function_t * msix_handler)
@@ -943,6 +964,33 @@ error:
}
clib_error_t *
+vlib_pci_unregister_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ u32 start, u32 count)
+{
+ clib_error_t *err = 0;
+ linux_pci_device_t *p = linux_pci_get_device (h);
+ u32 i;
+
+ if (p->type != LINUX_PCI_DEVICE_TYPE_VFIO)
+ return clib_error_return (0, "vfio driver is needed for MSI-X interrupt "
+ "support");
+
+ for (i = start; i < start + count; i++)
+ {
+ linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, i);
+
+ if (irq->fd != -1)
+ {
+ clib_file_del_by_index (&file_main, irq->clib_file_index);
+ close (irq->fd);
+ irq->fd = -1;
+ }
+ }
+
+ return err;
+}
+
+clib_error_t *
vlib_pci_enable_msix_irq (vlib_main_t * vm, vlib_pci_dev_handle_t h,
u16 start, u16 count)
{
diff --git a/src/vlib/pci/pci.h b/src/vlib/pci/pci.h
index 627833d4dfc..06a2a176419 100644
--- a/src/vlib/pci/pci.h
+++ b/src/vlib/pci/pci.h
@@ -240,11 +240,16 @@ clib_error_t *vlib_pci_register_intx_handler (vlib_main_t * vm,
vlib_pci_dev_handle_t h,
pci_intx_handler_function_t *
intx_handler);
+clib_error_t *vlib_pci_unregister_intx_handler (vlib_main_t *vm,
+ vlib_pci_dev_handle_t h);
clib_error_t *vlib_pci_register_msix_handler (vlib_main_t * vm,
vlib_pci_dev_handle_t h,
u32 start, u32 count,
pci_msix_handler_function_t *
msix_handler);
+clib_error_t *vlib_pci_unregister_msix_handler (vlib_main_t *vm,
+ vlib_pci_dev_handle_t h,
+ u32 start, u32 count);
clib_error_t *vlib_pci_enable_msix_irq (vlib_main_t * vm,
vlib_pci_dev_handle_t h, u16 start,
u16 count);
diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt
index 5e913dffdea..5236d7e67b5 100644
--- a/src/vnet/CMakeLists.txt
+++ b/src/vnet/CMakeLists.txt
@@ -26,6 +26,19 @@ list(APPEND VNET_SOURCES
config.c
devices/devices.c
devices/netlink.c
+ dev/api.c
+ dev/cli.c
+ dev/config.c
+ dev/counters.c
+ dev/dev.c
+ dev/error.c
+ dev/format.c
+ dev/handlers.c
+ dev/pci.c
+ dev/port.c
+ dev/process.c
+ dev/queue.c
+ dev/runtime.c
error.c
flow/flow.c
flow/flow_cli.c
@@ -59,6 +72,7 @@ list(APPEND VNET_HEADERS
config.h
devices/devices.h
devices/netlink.h
+ dev/dev.h
flow/flow.h
global_funcs.h
interface/rx_queue_funcs.h
diff --git a/src/vnet/dev/api.c b/src/vnet/dev/api.c
new file mode 100644
index 00000000000..4d556c72367
--- /dev/null
+++ b/src/vnet/dev/api.c
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/api.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "api",
+};
+
+static int
+_vnet_dev_queue_size_validate (u32 size, vnet_dev_queue_config_t c)
+{
+ if (size < c.min_size)
+ return 0;
+ if (size > c.max_size)
+ return 0;
+ if (c.size_is_power_of_two && count_set_bits (size) != 1)
+ return 0;
+ if (c.multiplier && size % c.multiplier)
+ return 0;
+
+ return 1;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_attach (vlib_main_t *vm, vnet_dev_api_attach_args_t *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = 0;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_bus_t *bus;
+ vnet_dev_driver_t *driver;
+ void *bus_dev_info = 0;
+ u8 *dev_desc = 0;
+
+ log_debug (0, "%s driver %s flags '%U' args '%v'", args->device_id,
+ args->driver_name, format_vnet_dev_flags, &args->flags,
+ args->args);
+
+ if (vnet_dev_by_id (args->device_id))
+ return VNET_DEV_ERR_ALREADY_IN_USE;
+
+ bus = vnet_dev_find_device_bus (vm, args->device_id);
+ if (!bus)
+ {
+ log_err (dev, "unknown bus");
+ rv = VNET_DEV_ERR_INVALID_BUS;
+ goto done;
+ }
+
+ bus_dev_info = vnet_dev_get_device_info (vm, args->device_id);
+ if (!bus_dev_info)
+ {
+ log_err (dev, "invalid or unsupported device id");
+ rv = VNET_DEV_ERR_INVALID_DEVICE_ID;
+ goto done;
+ }
+
+ vec_foreach (driver, dm->drivers)
+ {
+ if (args->driver_name[0] &&
+ strcmp (args->driver_name, driver->registration->name))
+ continue;
+ if (driver->ops.probe &&
+ (dev_desc = driver->ops.probe (vm, bus->index, bus_dev_info)))
+ break;
+ }
+
+ if (!dev_desc)
+ {
+ log_err (dev, "driver not available for %s", args->device_id);
+ rv = VNET_DEV_ERR_DRIVER_NOT_AVAILABLE;
+ goto done;
+ }
+
+ dev = vnet_dev_alloc (vm, args->device_id, driver);
+ if (!dev)
+ {
+ log_err (dev, "dev alloc failed for %s", args->device_id);
+ rv = VNET_DEV_ERR_BUG;
+ goto done;
+ }
+ dev->description = dev_desc;
+
+ if ((args->flags.e & VNET_DEV_F_NO_STATS) == 0)
+ dev->poll_stats = 1;
+
+ log_debug (0, "found '%v'", dev->description);
+
+ rv = vnet_dev_process_call_op (vm, dev, vnet_dev_init);
+
+done:
+ if (bus_dev_info)
+ bus->ops.free_device_info (vm, bus_dev_info);
+
+ if (rv != VNET_DEV_OK && dev)
+ vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_free);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_detach (vlib_main_t *vm, vnet_dev_api_detach_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_id (args->device_id);
+
+ log_debug (dev, "detach");
+
+ if (dev)
+ return vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_detach);
+
+ return VNET_DEV_ERR_NOT_FOUND;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_reset (vlib_main_t *vm, vnet_dev_api_reset_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_id (args->device_id);
+
+ log_debug (dev, "detach");
+
+ if (!dev)
+ return VNET_DEV_ERR_NOT_FOUND;
+
+ if (dev->ops.reset)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ return vnet_dev_process_call_op (vm, dev, vnet_dev_reset);
+}
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *vm,
+ vnet_dev_api_create_port_if_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_id (args->device_id);
+ vnet_dev_port_t *port = 0;
+ u16 n_threads = vlib_get_n_threads ();
+
+ log_debug (dev,
+ "create_port_if: device '%s' port %u intf_name '%s' num_rx_q %u "
+ "num_tx_q %u rx_q_sz %u tx_q_sz %u, flags '%U' args '%v'",
+ args->device_id, args->port_id, args->intf_name,
+ args->num_rx_queues, args->num_tx_queues, args->rx_queue_size,
+ args->tx_queue_size, format_vnet_dev_port_flags, &args->flags,
+ args->args);
+
+ if (dev == 0)
+ return VNET_DEV_ERR_NOT_FOUND;
+
+ foreach_vnet_dev_port (p, dev)
+ if (p->port_id == args->port_id)
+ {
+ port = p;
+ break;
+ }
+
+ if (!port)
+ return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+ if (port->interface_created)
+ return VNET_DEV_ERR_ALREADY_EXISTS;
+
+ if (args->num_rx_queues)
+ {
+ if (args->num_rx_queues > port->attr.max_rx_queues)
+ return VNET_DEV_ERR_INVALID_NUM_RX_QUEUES;
+ port->intf.num_rx_queues = args->num_rx_queues;
+ }
+ else
+ port->intf.num_rx_queues = clib_min (port->attr.max_tx_queues, 1);
+
+ if (args->num_tx_queues)
+ {
+ if (args->num_tx_queues > port->attr.max_tx_queues)
+ return VNET_DEV_ERR_INVALID_NUM_TX_QUEUES;
+ port->intf.num_tx_queues = args->num_tx_queues;
+ }
+ else
+ port->intf.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads);
+
+ if (args->rx_queue_size)
+ {
+ if (!_vnet_dev_queue_size_validate (args->rx_queue_size,
+ port->rx_queue_config))
+ return VNET_DEV_ERR_INVALID_RX_QUEUE_SIZE;
+ port->intf.rxq_sz = args->rx_queue_size;
+ }
+ else
+ port->intf.rxq_sz = port->rx_queue_config.default_size;
+
+ if (args->tx_queue_size)
+ {
+ if (!_vnet_dev_queue_size_validate (args->tx_queue_size,
+ port->tx_queue_config))
+ return VNET_DEV_ERR_INVALID_TX_QUEUE_SIZE;
+ port->intf.txq_sz = args->tx_queue_size;
+ }
+ else
+ port->intf.txq_sz = port->tx_queue_config.default_size;
+
+ clib_memcpy (port->intf.name, args->intf_name, sizeof (port->intf.name));
+
+ return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_create);
+}
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *vm,
+ vnet_dev_api_remove_port_if_args_t *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+ vnet_dev_port_t *port;
+
+ si = vnet_get_sw_interface_or_null (vnm, args->sw_if_index);
+ if (!si)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ if (pool_is_free_index (dm->ports_by_dev_instance, hi->dev_instance))
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ port = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+
+ if (port->intf.hw_if_index != si->hw_if_index)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove);
+}
diff --git a/src/vnet/dev/api.h b/src/vnet/dev/api.h
new file mode 100644
index 00000000000..69a846296ed
--- /dev/null
+++ b/src/vnet/dev/api.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_API_H_
+#define _VNET_DEV_API_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+ vnet_dev_driver_name_t driver_name;
+ vnet_dev_flags_t flags;
+ u8 *args;
+} vnet_dev_api_attach_args_t;
+
+vnet_dev_rv_t vnet_dev_api_attach (vlib_main_t *,
+ vnet_dev_api_attach_args_t *);
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+} vnet_dev_api_detach_args_t;
+vnet_dev_rv_t vnet_dev_api_detach (vlib_main_t *,
+ vnet_dev_api_detach_args_t *);
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+} vnet_dev_api_reset_args_t;
+vnet_dev_rv_t vnet_dev_api_reset (vlib_main_t *, vnet_dev_api_reset_args_t *);
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+ vnet_dev_if_name_t intf_name;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 rx_queue_size;
+ u16 tx_queue_size;
+ vnet_dev_port_id_t port_id;
+ vnet_dev_port_flags_t flags;
+ u8 *args;
+} vnet_dev_api_create_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *,
+ vnet_dev_api_create_port_if_args_t *);
+
+typedef struct
+{
+ u32 sw_if_index;
+} vnet_dev_api_remove_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *,
+ vnet_dev_api_remove_port_if_args_t *);
+
+#endif /* _VNET_DEV_API_H_ */
diff --git a/src/vnet/dev/cli.c b/src/vnet/dev/cli.c
new file mode 100644
index 00000000000..d478f1d233c
--- /dev/null
+++ b/src/vnet/dev/cli.c
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/api.h>
+
+static clib_error_t *
+device_attach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_attach_args_t a = {};
+ vnet_dev_rv_t rv;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!a.driver_name[0] &&
+ unformat (input, "driver %U", unformat_c_string_array, a.driver_name,
+ sizeof (a.driver_name)))
+ ;
+ else if (!a.flags.n &&
+ unformat (input, "flags %U", unformat_vnet_dev_flags, &a.flags))
+ ;
+ else if (!a.args && unformat (input, "args %v", &a.args))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ rv = vnet_dev_api_attach (vm, &a);
+
+ vec_free (a.args);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to attach '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_attach_cmd, static) = {
+ .path = "device attach",
+ .short_help = "device attach <device-id> [driver <name>] "
+ "[args <dev-args>]",
+ .function = device_attach_cmd_fn,
+};
+
+static clib_error_t *
+device_detach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_detach_args_t a = {};
+ vnet_dev_rv_t rv;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ rv = vnet_dev_api_detach (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to detach '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_detach_cmd, static) = {
+ .path = "device detach",
+ .short_help = "device detach <device-id>",
+ .function = device_detach_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_reset_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_reset_args_t a = {};
+ vnet_dev_rv_t rv;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ rv = vnet_dev_api_reset (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to reset '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_reset_cmd, static) = {
+ .path = "device reset",
+ .short_help = "device reset <device-id>",
+ .function = device_reset_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_create_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_create_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ u32 n;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!a.intf_name[0] &&
+ unformat (input, "if-name %U", unformat_c_string_array, a.intf_name,
+ sizeof (a.intf_name)))
+ ;
+ else if (!a.port_id && unformat (input, "port %u", &n))
+ a.port_id = n;
+ else if (!a.flags.n && unformat (input, "flags %U",
+ unformat_vnet_dev_port_flags, &a.flags))
+ ;
+ else if (!a.num_rx_queues && unformat (input, "num-rx-queues %u", &n))
+ a.num_rx_queues = n;
+ else if (!a.num_tx_queues && unformat (input, "num-tx-queues %u", &n))
+ a.num_tx_queues = n;
+ else if (!a.rx_queue_size && unformat (input, "rx-queues-size %u", &n))
+ a.rx_queue_size = n;
+ else if (!a.tx_queue_size && unformat (input, "tx-queues-size %u", &n))
+ a.tx_queue_size = n;
+ else if (!a.intf_name[0] &&
+ unformat (input, "name %U", unformat_c_string_array,
+ &a.intf_name, sizeof (a.intf_name)))
+ ;
+ else if (!a.args && unformat (input, "args %v", &a.args))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ rv = vnet_dev_api_create_port_if (vm, &a);
+
+ vec_free (a.args);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to create_if '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_create_if_cmd, static) = {
+ .path = "device create-interface",
+ .short_help = "device create-interface <device-id> [port <port-id>] "
+ "[args <iface-args>]",
+ .function = device_create_if_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_remove_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_remove_port_if_args_t a = { .sw_if_index = ~0 };
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_dev_rv_t rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &a.sw_if_index))
+ ;
+ else if (unformat (input, "sw-if-index %u", &a.sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (a.sw_if_index == ~0)
+ return clib_error_return (0, "please specify existing interface name");
+
+ rv = vnet_dev_api_remove_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to remove interface: %U",
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_remove_if_cmd, static) = {
+ .path = "device remove-interface",
+ .short_help = "device remove-interface [<interface-name> | sw-if-index <n>]",
+ .function = device_remove_if_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_devices_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_format_args_t fa = {}, *a = &fa;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "counters"))
+ fa.counters = 1;
+ else if (unformat (input, "all"))
+ fa.show_zero_counters = 1;
+ else if (unformat (input, "debug"))
+ fa.debug = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ pool_foreach_pointer (dev, dm->devices)
+ {
+ vlib_cli_output (vm, "device '%s':", dev->device_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_info, a, dev);
+ foreach_vnet_dev_port (p, dev)
+ {
+ vlib_cli_output (vm, " Port %u:", p->port_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_port_info, a, p);
+ if (fa.counters)
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, a,
+ p->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, p)
+ {
+ vlib_cli_output (vm, " RX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_rx_queue_info,
+ a, q);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, p)
+ {
+ vlib_cli_output (vm, " TX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_tx_queue_info,
+ a, q);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_devices_cmd, static) = {
+ .path = "show device",
+ .short_help = "show device [counters]",
+ .function = show_devices_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_device_counters_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_format_args_t fa = { .counters = 1 };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "all"))
+ fa.show_zero_counters = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ pool_foreach_pointer (dev, dm->devices)
+ {
+ vlib_cli_output (vm, "device '%s':", dev->device_id);
+ foreach_vnet_dev_port (p, dev)
+ {
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ p->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, p)
+ if (q->counter_main)
+ {
+ vlib_cli_output (vm, " RX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ q->counter_main);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, p)
+ if (q->counter_main)
+ {
+ vlib_cli_output (vm, " TX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ q->counter_main);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_device_counters_cmd, static) = {
+ .path = "show device counters",
+ .short_help = "show device counters [all]",
+ .function = show_device_counters_cmd_fn,
+ .is_mp_safe = 1,
+};
diff --git a/src/vnet/dev/config.c b/src/vnet/dev/config.c
new file mode 100644
index 00000000000..c98524c9b7e
--- /dev/null
+++ b/src/vnet/dev/config.c
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/api.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "config",
+};
+
+static clib_error_t *
+vnet_dev_config_one_interface (vlib_main_t *vm, unformat_input_t *input,
+ vnet_dev_api_create_port_if_args_t *args)
+{
+ clib_error_t *err = 0;
+
+ log_debug (0, "port %u %U", args->port_id, format_unformat_input, input);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u32 n;
+
+ if (unformat (input, "name %U", unformat_c_string_array, args->intf_name,
+ sizeof (args->intf_name)))
+ ;
+ else if (unformat (input, "num-rx-queues %u", &n))
+ args->num_rx_queues = n;
+ else if (unformat (input, "num-tx-queues %u", &n))
+ args->num_tx_queues = n;
+ else if (unformat (input, "rx-queue-size %u", &n))
+ args->rx_queue_size = n;
+ else if (unformat (input, "tx-queue-size %u", &n))
+ args->tx_queue_size = n;
+ else if (unformat (input, "flags %U", unformat_vnet_dev_port_flags,
+ &args->flags))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+ return err;
+}
+
+static clib_error_t *
+vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input,
+ char *device_id)
+{
+ log_debug (0, "device %s %U", device_id, format_unformat_input, input);
+ clib_error_t *err = 0;
+ vnet_dev_api_attach_args_t args = {};
+ vnet_dev_api_create_port_if_args_t *if_args_vec = 0, *if_args;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ u32 n;
+
+ if (unformat (input, "driver %U", unformat_c_string_array,
+ args.driver_name, sizeof (args.driver_name)))
+ ;
+ else if (unformat (input, "flags %U", unformat_vnet_dev_flags,
+ &args.flags))
+ ;
+ else if (unformat (input, "port %u %U", &n, unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ vnet_dev_api_create_port_if_args_t *if_args;
+ vec_add2 (if_args_vec, if_args, 1);
+ if_args->port_id = n;
+ err = vnet_dev_config_one_interface (vm, &sub_input, if_args);
+ unformat_free (&sub_input);
+ if (err)
+ break;
+ }
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+
+ if (err == 0)
+ {
+ vnet_dev_rv_t rv;
+
+ clib_memcpy (args.device_id, device_id, sizeof (args.device_id));
+ rv = vnet_dev_api_attach (vm, &args);
+
+ if (rv == VNET_DEV_OK)
+ {
+ vec_foreach (if_args, if_args_vec)
+ {
+ clib_memcpy (if_args->device_id, device_id,
+ sizeof (if_args->device_id));
+ rv = vnet_dev_api_create_port_if (vm, if_args);
+ if (rv != VNET_DEV_OK)
+ break;
+ }
+
+ if (rv != VNET_DEV_OK)
+ err = clib_error_return (0, "error: %U for device '%s'",
+ format_vnet_dev_rv, rv, device_id);
+ }
+ }
+
+ vec_free (if_args_vec);
+ return err;
+}
+
+uword
+dev_config_process_node_fn (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_frame_t *f)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ unformat_input_t input;
+ clib_error_t *err = 0;
+
+ if (dm->startup_config == 0)
+ return 0;
+
+ unformat_init_vector (&input, dm->startup_config);
+ dm->startup_config = 0;
+
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ vnet_dev_device_id_t device_id;
+ if (unformat (&input, "dev %U %U", unformat_c_string_array, device_id,
+ sizeof (device_id), unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ err = vnet_dev_config_one_device (vm, &sub_input, device_id);
+ unformat_free (&sub_input);
+ if (err)
+ break;
+ }
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+
+ unformat_free (&input);
+
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+ vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+ vec_add1 (dm->free_process_node_indices, rt->node_index);
+ return 0;
+}
+
+VLIB_REGISTER_NODE (dev_config_process_node) = {
+ .function = dev_config_process_node_fn,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "dev-config",
+};
+
+static clib_error_t *
+devices_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ uword c;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ vec_add1 (dm->startup_config, c);
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (devices_config, "devices");
diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c
new file mode 100644
index 00000000000..0a1e0a7419d
--- /dev/null
+++ b/src/vnet/dev/counters.c
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/interface/rx_queue_funcs.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "counters",
+};
+
+vnet_dev_counter_main_t *
+vnet_dev_counters_alloc (vlib_main_t *vm, vnet_dev_counter_t *counters,
+ u16 n_counters, char *fmt, ...)
+{
+ vnet_dev_counter_t *c;
+ vnet_dev_counter_main_t *cm;
+ u32 alloc_sz;
+
+ alloc_sz = sizeof (*cm) + n_counters * sizeof (*c);
+ cm = clib_mem_alloc_aligned (alloc_sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (cm, 0, sizeof (*cm));
+ cm->n_counters = n_counters;
+
+ if (fmt && strlen (fmt))
+ {
+ va_list va;
+ va_start (va, fmt);
+ cm->desc = va_format (0, fmt, &va);
+ va_end (va);
+ }
+
+ for (u32 i = 0; i < n_counters; i++)
+ {
+ cm->counters[i] = counters[i];
+ cm->counters[i].index = i;
+ }
+
+ vec_validate_aligned (cm->counter_data, n_counters - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->counter_start, n_counters - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return cm;
+}
+
+void
+vnet_dev_counters_clear (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+ for (int i = 0; i < cm->n_counters; i++)
+ {
+ cm->counter_start[i] = cm->counter_data[i];
+ cm->counter_data[i] = 0;
+ }
+}
+
+void
+vnet_dev_counters_free (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+ vec_free (cm->desc);
+ vec_free (cm->counter_data);
+ vec_free (cm->counter_start);
+ clib_mem_free (cm);
+}
+
+u8 *
+format_vnet_dev_counter_name (u8 *s, va_list *va)
+{
+ vnet_dev_counter_t *c = va_arg (*va, vnet_dev_counter_t *);
+
+ char *std_counters[] = {
+ [VNET_DEV_CTR_TYPE_RX_BYTES] = "total bytes received",
+ [VNET_DEV_CTR_TYPE_TX_BYTES] = "total bytes transmitted",
+ [VNET_DEV_CTR_TYPE_RX_PACKETS] = "total packets received",
+ [VNET_DEV_CTR_TYPE_TX_PACKETS] = "total packets transmitted",
+ [VNET_DEV_CTR_TYPE_RX_DROPS] = "total drops received",
+ [VNET_DEV_CTR_TYPE_TX_DROPS] = "total drops transmitted",
+ };
+
+ char *directions[] = {
+ [VNET_DEV_CTR_DIR_RX] = "received",
+ [VNET_DEV_CTR_DIR_TX] = "sent",
+ };
+ char *units[] = {
+ [VNET_DEV_CTR_UNIT_BYTES] = "bytes",
+ [VNET_DEV_CTR_UNIT_PACKETS] = "packets",
+ };
+
+ if (c->type == VNET_DEV_CTR_TYPE_VENDOR)
+ {
+ s = format (s, "%s", c->name);
+
+ if (c->unit < ARRAY_LEN (units) && units[c->unit])
+ s = format (s, " %s", units[c->unit]);
+
+ if (c->dir < ARRAY_LEN (directions) && directions[c->dir])
+ s = format (s, " %s", directions[c->dir]);
+ }
+ else if (c->type < ARRAY_LEN (std_counters) && std_counters[c->type])
+ s = format (s, "%s", std_counters[c->type]);
+ else
+ ASSERT (0);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_counters (u8 *s, va_list *va)
+{
+ vnet_dev_format_args_t *a = va_arg (*va, vnet_dev_format_args_t *);
+ vnet_dev_counter_main_t *cm = va_arg (*va, vnet_dev_counter_main_t *);
+ u32 line = 0, indent = format_get_indent (s);
+
+ foreach_vnet_dev_counter (c, cm)
+ {
+ if (a->show_zero_counters == 0 && cm->counter_data[c->index] == 0)
+ continue;
+
+ if (line++)
+ s = format (s, "\n%U", format_white_space, indent);
+
+ s = format (s, "%-45U%lu", format_vnet_dev_counter_name, c,
+ cm->counter_data[c->index]);
+ }
+
+ return s;
+}
diff --git a/src/vnet/dev/counters.h b/src/vnet/dev/counters.h
new file mode 100644
index 00000000000..33d08ffbecd
--- /dev/null
+++ b/src/vnet/dev/counters.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_COUNTERS_H_
+#define _VNET_DEV_COUNTERS_H_
+
+#include <vnet/dev/dev.h>
+
+typedef enum
+{
+ VNET_DEV_CTR_DIR_NA,
+ VNET_DEV_CTR_DIR_RX,
+ VNET_DEV_CTR_DIR_TX,
+} __clib_packed vnet_dev_counter_direction_t;
+
+typedef enum
+{
+ VNET_DEV_CTR_TYPE_RX_BYTES,
+ VNET_DEV_CTR_TYPE_RX_PACKETS,
+ VNET_DEV_CTR_TYPE_RX_DROPS,
+ VNET_DEV_CTR_TYPE_TX_BYTES,
+ VNET_DEV_CTR_TYPE_TX_PACKETS,
+ VNET_DEV_CTR_TYPE_TX_DROPS,
+ VNET_DEV_CTR_TYPE_VENDOR,
+} __clib_packed vnet_dev_counter_type_t;
+
+typedef enum
+{
+ VNET_DEV_CTR_UNIT_NA,
+ VNET_DEV_CTR_UNIT_BYTES,
+ VNET_DEV_CTR_UNIT_PACKETS,
+} __clib_packed vnet_dev_counter_unit_t;
+
+typedef struct vnet_dev_counter
+{
+ char name[24];
+ uword user_data;
+ vnet_dev_counter_type_t type;
+ vnet_dev_counter_direction_t dir;
+ vnet_dev_counter_unit_t unit;
+ u16 index;
+} vnet_dev_counter_t;
+
+typedef struct vnet_dev_counter_main
+{
+ u8 *desc;
+ u64 *counter_data;
+ u64 *counter_start;
+ u16 n_counters;
+ vnet_dev_counter_t counters[];
+} vnet_dev_counter_main_t;
+
+#define VNET_DEV_CTR_RX_BYTES(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_BYTES, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_BYTES(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_BYTES, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_RX_PACKETS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_PACKETS, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_PACKETS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_PACKETS, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_RX_DROPS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_DROPS, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_DROPS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_DROPS, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_VENDOR(p, d, u, n, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_VENDOR, .user_data = (p), .name = n, \
+ .dir = VNET_DEV_CTR_DIR_##d, .unit = VNET_DEV_CTR_UNIT_##u, __VA_ARGS__ \
+ }
+
+vnet_dev_counter_main_t *vnet_dev_counters_alloc (vlib_main_t *,
+ vnet_dev_counter_t *, u16,
+ char *, ...);
+void vnet_dev_counters_clear (vlib_main_t *, vnet_dev_counter_main_t *);
+void vnet_dev_counters_free (vlib_main_t *, vnet_dev_counter_main_t *);
+
+format_function_t format_vnet_dev_counters;
+format_function_t format_vnet_dev_counters_all;
+
+static_always_inline vnet_dev_counter_main_t *
+vnet_dev_counter_get_main (vnet_dev_counter_t *counter)
+{
+ return (vnet_dev_counter_main_t *) ((u8 *) (counter - counter->index) -
+ STRUCT_OFFSET_OF (
+ vnet_dev_counter_main_t, counters));
+}
+
+static_always_inline void
+vnet_dev_counter_value_add (vlib_main_t *vm, vnet_dev_counter_t *counter,
+ u64 val)
+{
+ vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+ cm->counter_data[counter->index] += val;
+}
+
+static_always_inline void
+vnet_dev_counter_value_update (vlib_main_t *vm, vnet_dev_counter_t *counter,
+ u64 val)
+{
+ vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+ cm->counter_data[counter->index] = val - cm->counter_start[counter->index];
+}
+
+#define foreach_vnet_dev_counter(c, cm) \
+ if (cm) \
+ for (typeof (*(cm)->counters) *(c) = (cm)->counters; \
+ (c) < (cm)->counters + (cm)->n_counters; (c)++)
+
+#endif /* _VNET_DEV_COUNTERS_H_ */
diff --git a/src/vnet/dev/dev.c b/src/vnet/dev/dev.c
new file mode 100644
index 00000000000..538d1449998
--- /dev/null
+++ b/src/vnet/dev/dev.c
@@ -0,0 +1,456 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/counters.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+};
+
+vnet_dev_main_t vnet_dev_main = { .next_rx_queue_thread = 1 };
+
+vnet_dev_bus_t *
+vnet_dev_find_device_bus (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus;
+
+ pool_foreach (bus, dm->buses)
+ {
+ int n = strlen (bus->registration->name);
+ int l = strlen (id);
+ int dl = strlen (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER);
+
+ if (l <= n + dl)
+ continue;
+
+ if (strncmp (bus->registration->name, id, n))
+ continue;
+
+ if (strncmp (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER, id + n, dl))
+ continue;
+
+ return bus;
+ }
+
+ return 0;
+}
+
+void *
+vnet_dev_get_device_info (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+ vnet_dev_bus_t *bus;
+
+ bus = vnet_dev_find_device_bus (vm, id);
+ if (bus == 0)
+ return 0;
+
+ return bus->ops.get_device_info (vm, id);
+}
+
+vnet_dev_t *
+vnet_dev_alloc (vlib_main_t *vm, vnet_dev_device_id_t id,
+ vnet_dev_driver_t *driver)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = 0, **devp = 0;
+
+ dev = vnet_dev_alloc_with_data (sizeof (vnet_dev_t),
+ driver->registration->device_data_sz);
+
+ pool_get (dm->devices, devp);
+ devp[0] = dev;
+ dev->index = devp - dm->devices;
+ dev->driver_index = driver->index;
+ dev->ops = driver->registration->ops;
+ dev->bus_index = driver->bus_index;
+ clib_memcpy (dev->device_id, id, sizeof (dev->device_id));
+ hash_set (dm->device_index_by_id, dev->device_id, dev->index);
+
+ if ((vnet_dev_process_create (vm, dev)) == VNET_DEV_OK)
+ return dev;
+
+ vnet_dev_free (vm, dev);
+ return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_validate (vm, dev);
+
+ if ((rv = bus->ops.device_open (vm, dev)) != VNET_DEV_OK)
+ return rv;
+
+ if ((rv = dev->ops.alloc (vm, dev)) != VNET_DEV_OK)
+ {
+ log_err (dev, "device init failed [rv %d]", rv);
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+ if (dev->ops.free)
+ dev->ops.free (vm, dev);
+ return rv;
+ }
+
+ if ((rv = dev->ops.init (vm, dev)) != VNET_DEV_OK)
+ {
+ log_err (dev, "device init failed [rv %d]", rv);
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+ if (dev->ops.free)
+ dev->ops.free (vm, dev);
+ return rv;
+ }
+
+ dev->initialized = 1;
+ dev->not_first_init = 1;
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ASSERT (dev->initialized == 1);
+ vnet_dev_bus_t *bus;
+
+ vnet_dev_validate (vm, dev);
+
+ foreach_vnet_dev_port (p, dev)
+ ASSERT (p->interface_created == 0);
+
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+
+ bus = vnet_dev_get_bus (dev);
+ if (bus->ops.device_close)
+ bus->ops.device_close (vm, dev);
+
+ vnet_dev_process_quit (vm, dev);
+
+ dev->initialized = 0;
+}
+
+void
+vnet_dev_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+
+ vnet_dev_validate (vm, dev);
+
+ ASSERT (dev->initialized == 0);
+
+ foreach_vnet_dev_port (p, dev)
+ vnet_dev_port_free (vm, p);
+
+ vec_free (dev->description);
+ pool_free (dev->ports);
+ pool_free (dev->periodic_ops);
+ hash_unset (dm->device_index_by_id, dev->device_id);
+ pool_put_index (dm->devices, dev->index);
+}
+
+vnet_dev_rv_t
+vnet_dev_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_rv_t rv;
+
+ ASSERT (dev->initialized == 1);
+ vnet_dev_validate (vm, dev);
+
+ if (dev->ops.reset == 0)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ if ((rv = dev->ops.reset (vm, dev)) != VNET_DEV_OK)
+ {
+ log_err (dev, "device reset failed [rv %d]", rv);
+ return rv;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_detach (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ foreach_vnet_dev_port (p, dev)
+ if (p->interface_created)
+ vnet_dev_port_if_remove (vm, p);
+ vnet_dev_deinit (vm, dev);
+ vnet_dev_free (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size, u32 align,
+ void **pp)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_validate (vm, dev);
+
+ if (!bus->ops.dma_mem_alloc_fn)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ rv = bus->ops.dma_mem_alloc_fn (vm, dev, size, align, pp);
+ if (rv == VNET_DEV_OK)
+ log_debug (dev, "%u bytes va %p dma-addr 0x%lx numa %u align %u", size,
+ *pp, vnet_dev_get_dma_addr (vm, dev, *pp), dev->numa_node,
+ align);
+ return rv;
+}
+
+void
+vnet_dev_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+ vnet_dev_validate (vm, dev);
+
+ if (p == 0 || !bus->ops.dma_mem_free_fn)
+ return;
+
+ return bus->ops.dma_mem_free_fn (vm, dev, p);
+}
+
+clib_error_t *
+vnet_dev_admin_up_down_fn (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ u32 is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (is_up && p->started == 0)
+ rv = vnet_dev_process_call_port_op (vm, p, vnet_dev_port_start);
+ else if (!is_up && p->started)
+ rv = vnet_dev_process_call_port_op_no_rv (vm, p, vnet_dev_port_stop);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "failed to change port admin state: %U",
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+static void
+vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable,
+ void *cb)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ vnet_dev_main_t *vdm = &vnet_dev_main;
+ vnet_dev_port_t *port;
+ vnet_hw_interface_t *hw;
+ u32 current_config_index = ~0;
+ u32 next_index = ~0;
+ int update_runtime = 0;
+
+ if (arc_index != vdm->eth_port_rx_feature_arc_index)
+ return;
+
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ port = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+
+ if (port == 0 || port->intf.sw_if_index != sw_if_index)
+ return;
+
+ if (vnet_have_features (arc_index, sw_if_index))
+ {
+ cm = &fm->feature_config_mains[arc_index];
+ current_config_index =
+ vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+ vnet_get_config_data (&cm->config_main, &current_config_index,
+ &next_index, 0);
+ if (port->intf.feature_arc == 0 ||
+ port->intf.rx_next_index != next_index ||
+ port->intf.current_config_index != current_config_index)
+ {
+ port->intf.current_config_index = current_config_index;
+ port->intf.rx_next_index = next_index;
+ port->intf.feature_arc_index = arc_index;
+ port->intf.feature_arc = 1;
+ update_runtime = 1;
+ }
+ }
+ else
+ {
+ if (port->intf.feature_arc)
+ {
+ port->intf.current_config_index = 0;
+ port->intf.rx_next_index =
+ port->intf.redirect_to_node ?
+ port->intf.redirect_to_node_next_index :
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+ port->intf.feature_arc_index = 0;
+ port->intf.feature_arc = 0;
+ update_runtime = 1;
+ }
+ }
+
+ if (update_runtime)
+ {
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ vnet_dev_rx_queue_rt_request (
+ vm, rxq,
+ (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1,
+ .update_feature_arc = 1 });
+ log_debug (port->dev, "runtime update requested due to chgange in "
+ "feature arc configuration");
+ }
+}
+
+static int
+sort_driver_registrations (void *a0, void *a1)
+{
+ vnet_dev_driver_registration_t **r0 = a0;
+ vnet_dev_driver_registration_t **r1 = a1;
+
+ if (r0[0]->priority > r1[0]->priority)
+ return -1;
+ else if (r0[0]->priority < r1[0]->priority)
+ return 1;
+
+ return 0;
+}
+
+static clib_error_t *
+vnet_dev_main_init (vlib_main_t *vm)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_driver_registration_t **drv = 0;
+ u32 temp_space_sz = 0;
+
+ dm->device_index_by_id = hash_create_string (0, sizeof (uword));
+
+ for (vnet_dev_bus_registration_t *r = dm->bus_registrations; r;
+ r = r->next_registration)
+ {
+ vnet_dev_bus_t *bus;
+ pool_get_zero (dm->buses, bus);
+ bus->registration = r;
+ bus->index = bus - dm->buses;
+ bus->ops = r->ops;
+ if (!r->device_data_size ||
+ r->device_data_size > STRUCT_SIZE_OF (vnet_dev_t, bus_data))
+ return clib_error_return (
+ 0, "bus device data for bus '%s' is too big not specified", r->name);
+
+ log_debug (0, "bus '%s' registered", r->name);
+ }
+
+ for (vnet_dev_driver_registration_t *r = dm->driver_registrations; r;
+ r = r->next_registration)
+ vec_add1 (drv, r);
+
+ vec_sort_with_function (drv, sort_driver_registrations);
+
+ vec_foreach_pointer (r, drv)
+ {
+ vnet_dev_driver_t *driver;
+ vnet_dev_bus_t *bus;
+ vnet_device_class_t *dev_class;
+ int bus_index = -1;
+
+ pool_foreach (bus, dm->buses)
+ {
+ if (strcmp (bus->registration->name, r->bus) == 0)
+ {
+ bus_index = bus->index;
+ break;
+ }
+ }
+
+ if (bus_index < 0)
+ return clib_error_return (0, "unknown bus '%s'", r->bus);
+
+ pool_get_zero (dm->drivers, driver);
+ driver->registration = r;
+ driver->index = driver - dm->drivers;
+ driver->bus_index = bus_index;
+ driver->ops = r->ops;
+ dev_class = clib_mem_alloc (sizeof (vnet_device_class_t));
+ *dev_class = (vnet_device_class_t){
+ .name = r->name,
+ .format_device_name = format_vnet_dev_interface_name,
+ .format_device = format_vnet_dev_interface_info,
+ .admin_up_down_function = vnet_dev_admin_up_down_fn,
+ .rx_redirect_to_node = vnet_dev_set_interface_next_node,
+ .clear_counters = vnet_dev_clear_hw_interface_counters,
+ .rx_mode_change_function = vnet_dev_rx_mode_change_fn,
+ .mac_addr_change_function = vnet_dev_port_mac_change,
+ .mac_addr_add_del_function = vnet_dev_add_del_mac_address,
+ .flow_ops_function = vnet_dev_flow_ops_fn,
+ .set_rss_queues_function = vnet_dev_interface_set_rss_queues,
+ };
+ driver->dev_class_index = vnet_register_device_class (vm, dev_class);
+ log_debug (0, "driver '%s' registered on bus '%s'", r->name,
+ bus->registration->name);
+
+ if (temp_space_sz < r->runtime_temp_space_sz)
+ temp_space_sz = r->runtime_temp_space_sz;
+ }
+
+ if (dm->startup_config)
+ log_debug (0, "startup config: %v", dm->startup_config);
+
+ vec_free (drv);
+
+ if (temp_space_sz > 0)
+ {
+ const u32 align = CLIB_CACHE_LINE_BYTES;
+ u32 sz = round_pow2 (temp_space_sz, align);
+ dm->log2_runtime_temp_space_sz =
+ get_lowest_set_bit_index (max_pow2 (sz));
+ sz = 1 << dm->log2_runtime_temp_space_sz;
+ sz *= vlib_get_n_threads ();
+ dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+ clib_memset (dm->runtime_temp_spaces, 0, sz);
+ log_debug (0,
+ "requested %u bytes for runtime temp storage, allocated %u "
+ "per thread (total %u)",
+ temp_space_sz, 1 << dm->log2_runtime_temp_space_sz, sz);
+ }
+
+ vnet_feature_register (vnet_dev_feature_update_cb, 0);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_dev_main_init);
+
+clib_error_t *
+vnet_dev_num_workers_change (vlib_main_t *vm)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+
+ if (dm->log2_runtime_temp_space_sz > 0)
+ {
+ const u32 align = CLIB_CACHE_LINE_BYTES;
+ uword sz =
+ (1ULL << dm->log2_runtime_temp_space_sz) * vlib_get_n_threads ();
+ if (dm->runtime_temp_spaces)
+ clib_mem_free (dm->runtime_temp_spaces);
+ dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+ clib_memset (dm->runtime_temp_spaces, 0, sz);
+ log_debug (0, "runtime temp storage resized to %u", sz);
+ }
+
+ return 0;
+}
+
+VLIB_NUM_WORKERS_CHANGE_FN (vnet_dev_num_workers_change);
diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h
new file mode 100644
index 00000000000..5c80b98f580
--- /dev/null
+++ b/src/vnet/dev/dev.h
@@ -0,0 +1,701 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_H_
+#define _VNET_DEV_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+
+#define VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "/"
+
+#define foreach_vnet_dev_port_type \
+ _ (0, UNKNOWN) \
+ _ (1, ETHERNET)
+
+typedef char vnet_dev_device_id_t[32];
+
+typedef enum
+{
+#define _(b, n) VNET_DEV_PORT_TYPE_##n = (1U << (b)),
+ foreach_vnet_dev_port_type
+#undef _
+} vnet_dev_port_type_t;
+
+#define foreach_vnet_dev_port_caps \
+ _ (interrupt_mode) \
+ _ (rss)
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_caps
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_caps_t;
+
+typedef union
+{
+ u8 eth_mac[6];
+ u8 raw[8];
+} vnet_dev_hw_addr_t;
+
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+typedef struct vnet_dev_counter vnet_dev_counter_t;
+typedef struct vnet_dev_counter_main vnet_dev_counter_main_t;
+typedef struct vnet_dev_port_cfg_change_req vnet_dev_port_cfg_change_req_t;
+
+typedef vnet_dev_rv_t (vnet_dev_op_t) (vlib_main_t *, vnet_dev_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_op_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_cfg_change_op_t) (
+ vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *);
+typedef vnet_dev_rv_t (vnet_dev_rx_queue_op_t) (vlib_main_t *,
+ vnet_dev_rx_queue_t *);
+typedef vnet_dev_rv_t (vnet_dev_tx_queue_op_t) (vlib_main_t *,
+ vnet_dev_tx_queue_t *);
+typedef void (vnet_dev_op_no_rv_t) (vlib_main_t *, vnet_dev_t *);
+typedef void (vnet_dev_port_op_no_rv_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef void (vnet_dev_rx_queue_op_no_rv_t) (vlib_main_t *,
+ vnet_dev_rx_queue_t *);
+typedef void (vnet_dev_tx_queue_op_no_rv_t) (vlib_main_t *,
+ vnet_dev_tx_queue_t *);
+
+typedef u16 vnet_dev_queue_id_t;
+typedef u16 vnet_dev_bus_index_t;
+typedef u16 vnet_dev_driver_index_t;
+
+typedef struct
+{
+ vnet_dev_rx_queue_op_t *alloc;
+ vnet_dev_rx_queue_op_t *start;
+ vnet_dev_rx_queue_op_no_rv_t *stop;
+ vnet_dev_rx_queue_op_no_rv_t *free;
+} vnet_dev_rx_queue_ops_t;
+
+typedef struct
+{
+ vnet_dev_tx_queue_op_t *alloc;
+ vnet_dev_tx_queue_op_t *start;
+ vnet_dev_tx_queue_op_no_rv_t *stop;
+ vnet_dev_tx_queue_op_no_rv_t *free;
+} vnet_dev_tx_queue_ops_t;
+
+typedef struct
+{
+ u16 data_size;
+ u16 min_size;
+ u16 max_size;
+ u16 default_size;
+ u8 multiplier;
+ u8 size_is_power_of_two : 1;
+} vnet_dev_queue_config_t;
+
+#define foreach_vnet_dev_port_cfg_type \
+ _ (PROMISC_MODE) \
+ _ (MAX_FRAME_SIZE) \
+ _ (CHANGE_PRIMARY_HW_ADDR) \
+ _ (ADD_SECONDARY_HW_ADDR) \
+ _ (REMOVE_SECONDARY_HW_ADDR)
+
+typedef enum
+{
+ VNET_DEV_PORT_CFG_UNKNOWN,
+#define _(n) VNET_DEV_PORT_CFG_##n,
+ foreach_vnet_dev_port_cfg_type
+#undef _
+} __clib_packed vnet_dev_port_cfg_type_t;
+
+typedef struct vnet_dev_port_cfg_change_req
+{
+ vnet_dev_port_cfg_type_t type;
+ u8 validated : 1;
+
+ union
+ {
+ u8 promisc : 1;
+ vnet_dev_hw_addr_t addr;
+ u16 max_frame_size;
+ };
+
+} vnet_dev_port_cfg_change_req_t;
+
+typedef struct
+{
+ vnet_dev_hw_addr_t hw_addr;
+ u16 max_rx_queues;
+ u16 max_tx_queues;
+ u16 max_supported_frame_size;
+ vnet_dev_port_type_t type;
+ vnet_dev_port_caps_t caps;
+} vnet_dev_port_attr_t;
+
+typedef enum
+{
+ VNET_DEV_PERIODIC_OP_TYPE_DEV = 1,
+ VNET_DEV_PERIODIC_OP_TYPE_PORT = 2,
+} __clib_packed vnet_dev_periodic_op_type_t;
+
+typedef struct
+{
+ f64 interval;
+ f64 last_run;
+ vnet_dev_periodic_op_type_t type;
+ union
+ {
+ vnet_dev_t *dev;
+ vnet_dev_port_t *port;
+ void *arg;
+ };
+ union
+ {
+ vnet_dev_op_no_rv_t *dev_op;
+ vnet_dev_port_op_no_rv_t *port_op;
+ void *op;
+ };
+} vnet_dev_periodic_op_t;
+
+typedef struct
+{
+ struct _vlib_node_fn_registration *registrations;
+ format_function_t *format_trace;
+ vlib_error_desc_t *error_counters;
+ u16 n_error_counters;
+} vnet_dev_node_t;
+
+typedef struct
+{
+ vnet_dev_op_t *alloc;
+ vnet_dev_op_t *init;
+ vnet_dev_op_no_rv_t *deinit;
+ vnet_dev_op_t *reset;
+ vnet_dev_op_no_rv_t *free;
+ u8 *(*probe) (vlib_main_t *, vnet_dev_bus_index_t, void *);
+ format_function_t *format_info;
+} vnet_dev_ops_t;
+
+typedef struct
+{
+ vnet_dev_port_op_t *alloc;
+ vnet_dev_port_op_t *init;
+ vnet_dev_port_cfg_change_op_t *config_change;
+ vnet_dev_port_cfg_change_op_t *config_change_validate;
+ vnet_dev_port_op_t *start;
+ vnet_dev_port_op_no_rv_t *stop;
+ vnet_dev_port_op_no_rv_t *deinit;
+ vnet_dev_port_op_no_rv_t *free;
+ format_function_t *format_status;
+} vnet_dev_port_ops_t;
+
+typedef union
+{
+ struct
+ {
+ u8 update_next_index : 1;
+ u8 update_feature_arc : 1;
+ u8 suspend_off : 1;
+ u8 suspend_on : 1;
+ };
+ u8 as_number;
+} vnet_dev_rx_queue_rt_req_t;
+
+typedef struct vnet_dev_rx_queue
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_port_t *port;
+ u16 rx_thread_index;
+ u16 index;
+ vnet_dev_counter_main_t *counter_main;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+ u8 enabled : 1;
+ u8 started : 1;
+ u8 suspended : 1;
+ vnet_dev_queue_id_t queue_id;
+ u16 size;
+ u16 next_index;
+ vnet_dev_rx_queue_rt_req_t runtime_request;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime1);
+ vlib_buffer_template_t buffer_template;
+ CLIB_ALIGN_MARK (private_data, 16);
+ u8 data[];
+} vnet_dev_rx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 3 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_tx_queue
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_port_t *port;
+ clib_bitmap_t *assigned_threads;
+ u16 index;
+ vnet_dev_counter_main_t *counter_main;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+ vnet_dev_queue_id_t queue_id;
+ u8 started : 1;
+ u8 enabled : 1;
+ u8 lock_needed : 1;
+ u8 lock;
+ u16 size;
+ CLIB_ALIGN_MARK (private_data, 16);
+ u8 data[];
+} vnet_dev_tx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_tx_queue_t, 2 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_port
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_t *dev;
+ vnet_dev_port_id_t port_id;
+ vnet_dev_driver_index_t driver_index;
+ u8 initialized : 1;
+ u8 started : 1;
+ u8 link_up : 1;
+ u8 promisc : 1;
+ u8 interface_created : 1;
+ u8 rx_node_assigned : 1;
+ vnet_dev_counter_main_t *counter_main;
+ vnet_dev_queue_config_t rx_queue_config;
+ vnet_dev_queue_config_t tx_queue_config;
+ vnet_dev_port_attr_t attr;
+ u32 max_frame_size;
+ vnet_dev_hw_addr_t primary_hw_addr;
+ vnet_dev_hw_addr_t *secondary_hw_addr;
+ u32 index;
+ u32 speed;
+ vnet_dev_rx_queue_t **rx_queues;
+ vnet_dev_tx_queue_t **tx_queues;
+ vnet_dev_port_ops_t port_ops;
+ vnet_dev_rx_queue_ops_t rx_queue_ops;
+ vnet_dev_tx_queue_ops_t tx_queue_ops;
+ vnet_dev_node_t rx_node;
+ vnet_dev_node_t tx_node;
+
+ struct
+ {
+ vnet_dev_if_name_t name;
+ u32 dev_instance;
+ u32 rx_node_index;
+ u32 current_config_index;
+ u16 rx_next_index;
+ u16 redirect_to_node_next_index;
+ u8 feature_arc_index;
+ u8 feature_arc : 1;
+ u8 redirect_to_node : 1;
+ u32 tx_node_index;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 txq_sz;
+ u16 rxq_sz;
+ } intf;
+
+ CLIB_CACHE_LINE_ALIGN_MARK (data0);
+ u8 data[];
+} vnet_dev_port_t;
+
+typedef struct vnet_dev
+{
+ vnet_dev_device_id_t device_id;
+ u16 initialized : 1;
+ u16 not_first_init : 1;
+ u16 va_dma : 1;
+ u16 process_node_quit : 1;
+ u16 process_node_periodic : 1;
+ u16 poll_stats : 1;
+ u16 bus_index;
+ u8 numa_node;
+ u16 max_rx_queues;
+ u16 max_tx_queues;
+ vnet_dev_driver_index_t driver_index;
+ u32 index;
+ u32 process_node_index;
+ u8 bus_data[32] __clib_aligned (16);
+ vnet_dev_ops_t ops;
+ vnet_dev_port_t **ports;
+ vnet_dev_periodic_op_t *periodic_ops;
+ u8 *description;
+ u8 __clib_aligned (16)
+ data[];
+} vnet_dev_t;
+
+typedef struct
+{
+ u16 vendor_id, device_id;
+ char *description;
+} vnet_dev_match_t;
+
+#define VNET_DEV_MATCH(...) \
+ (vnet_dev_match_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
+typedef struct
+{
+ vnet_dev_op_t *device_open;
+ vnet_dev_op_no_rv_t *device_close;
+ vnet_dev_rv_t (*dma_mem_alloc_fn) (vlib_main_t *, vnet_dev_t *, u32, u32,
+ void **);
+ void (*dma_mem_free_fn) (vlib_main_t *, vnet_dev_t *, void *);
+ void *(*get_device_info) (vlib_main_t *, char *);
+ void (*free_device_info) (vlib_main_t *, void *);
+ format_function_t *format_device_info;
+ format_function_t *format_device_addr;
+} vnet_dev_bus_ops_t;
+
+struct vnet_dev_bus_registration
+{
+ vnet_dev_bus_registration_t *next_registration;
+ vnet_dev_driver_name_t name;
+ u16 device_data_size;
+ vnet_dev_bus_ops_t ops;
+};
+
+struct vnet_dev_driver_registration
+{
+ vnet_dev_driver_registration_t *next_registration;
+ u8 bus_master_enable : 1;
+ vnet_dev_driver_name_t name;
+ vnet_dev_bus_name_t bus;
+ u16 device_data_sz;
+ u16 runtime_temp_space_sz;
+ vnet_dev_match_t *match;
+ int priority;
+ vnet_dev_ops_t ops;
+};
+
+typedef struct
+{
+ u32 index;
+ vnet_dev_bus_registration_t *registration;
+ vnet_dev_bus_ops_t ops;
+} vnet_dev_bus_t;
+
+typedef struct
+{
+ u32 index;
+ void *dev_data;
+ vnet_dev_driver_registration_t *registration;
+ u32 dev_class_index;
+ vnet_dev_bus_index_t bus_index;
+ vnet_dev_ops_t ops;
+} vnet_dev_driver_t;
+
+typedef struct
+{
+ vnet_dev_bus_t *buses;
+ vnet_dev_driver_t *drivers;
+ vnet_dev_t **devices;
+ vnet_dev_port_t **ports_by_dev_instance;
+ vnet_dev_bus_registration_t *bus_registrations;
+ vnet_dev_driver_registration_t *driver_registrations;
+ void *runtime_temp_spaces;
+ u32 log2_runtime_temp_space_sz;
+ u32 *free_process_node_indices;
+ u32 *free_rx_node_indices;
+ uword *device_index_by_id;
+
+ u8 *startup_config;
+ u16 next_rx_queue_thread;
+ u8 eth_port_rx_feature_arc_index;
+} vnet_dev_main_t;
+
+extern vnet_dev_main_t vnet_dev_main;
+
+typedef struct
+{
+ struct
+ {
+ vnet_dev_port_attr_t attr;
+ vnet_dev_port_ops_t ops;
+ u16 data_size;
+ void *initial_data;
+ } port;
+
+ vnet_dev_node_t *rx_node;
+ vnet_dev_node_t *tx_node;
+
+ struct
+ {
+ vnet_dev_queue_config_t config;
+ vnet_dev_rx_queue_ops_t ops;
+ } rx_queue;
+
+ struct
+ {
+ vnet_dev_queue_config_t config;
+ vnet_dev_tx_queue_ops_t ops;
+ } tx_queue;
+} vnet_dev_port_add_args_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u8 link_speed : 1;
+ u8 link_state : 1;
+ u8 link_duplex : 1;
+ };
+ u8 any;
+ } change;
+ u8 link_state : 1;
+ u8 full_duplex : 1;
+ u32 link_speed;
+} vnet_dev_port_state_changes_t;
+
+/* dev.c */
+vnet_dev_t *vnet_dev_alloc (vlib_main_t *, vnet_dev_device_id_t,
+ vnet_dev_driver_t *);
+void vnet_dev_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_init (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_deinit (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_reset (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_detach (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_port_add (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_port_id_t,
+ vnet_dev_port_add_args_t *);
+vnet_dev_rv_t vnet_dev_dma_mem_alloc (vlib_main_t *, vnet_dev_t *, u32, u32,
+ void **);
+void vnet_dev_dma_mem_free (vlib_main_t *, vnet_dev_t *, void *);
+vnet_dev_bus_t *vnet_dev_find_device_bus (vlib_main_t *, vnet_dev_device_id_t);
+void *vnet_dev_get_device_info (vlib_main_t *, vnet_dev_device_id_t);
+
+/* error.c */
+clib_error_t *vnet_dev_port_err (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_rv_t, char *, ...);
+
+/* handlers.c */
+clib_error_t *vnet_dev_port_set_max_frame_size (vnet_main_t *,
+ vnet_hw_interface_t *, u32);
+u32 vnet_dev_port_eth_flag_change (vnet_main_t *, vnet_hw_interface_t *, u32);
+clib_error_t *vnet_dev_port_mac_change (vnet_hw_interface_t *, const u8 *,
+ const u8 *);
+clib_error_t *vnet_dev_add_del_mac_address (vnet_hw_interface_t *, const u8 *,
+ u8);
+int vnet_dev_flow_ops_fn (vnet_main_t *, vnet_flow_dev_op_t, u32, u32,
+ uword *);
+clib_error_t *vnet_dev_interface_set_rss_queues (vnet_main_t *,
+ vnet_hw_interface_t *,
+ clib_bitmap_t *);
+void vnet_dev_clear_hw_interface_counters (u32);
+clib_error_t *vnet_dev_rx_mode_change_fn (vnet_main_t *, u32, u32,
+ vnet_hw_if_rx_mode);
+void vnet_dev_set_interface_next_node (vnet_main_t *, u32, u32);
+
+/* port.c */
+vnet_dev_rv_t vnet_dev_port_start (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_rx_queues (vlib_main_t *,
+ vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_tx_queues (vlib_main_t *,
+ vnet_dev_port_t *);
+void vnet_dev_port_stop (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_deinit (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_free (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_add_counters (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_port_free_counters (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_update_tx_node_runtime (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_state_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_state_changes_t);
+void vnet_dev_port_clear_counters (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_if_create (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_if_remove (vlib_main_t *, vnet_dev_port_t *);
+
+/* queue.c */
+vnet_dev_rv_t vnet_dev_rx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+vnet_dev_rv_t vnet_dev_tx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+void vnet_dev_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_add_counters (vlib_main_t *, vnet_dev_rx_queue_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_rx_queue_free_counters (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_add_counters (vlib_main_t *, vnet_dev_tx_queue_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_tx_queue_free_counters (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t vnet_dev_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t vnet_dev_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* process.c */
+vnet_dev_rv_t vnet_dev_process_create (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_process_call_op (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_op_no_rv (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_process_call_op_no_wait (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm,
+ vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+void vnet_dev_process_call_port_op_no_wait (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+void vnet_dev_process_quit (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_poll_dev_add (vlib_main_t *, vnet_dev_t *, f64,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_dev_remove (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_port_add (vlib_main_t *, vnet_dev_port_t *, f64,
+ vnet_dev_port_op_no_rv_t *);
+void vnet_dev_poll_port_remove (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+
+/* runtime.c */
+typedef enum
+{
+ VNET_DEV_RT_OP_TYPE_UNKNOWN,
+ VNET_DEV_RT_OP_TYPE_RX_QUEUE,
+} __clib_packed vnet_dev_rt_op_type_t;
+
+typedef enum
+{
+ VNET_DEV_RT_OP_ACTION_UNKNOWN,
+ VNET_DEV_RT_OP_ACTION_START,
+ VNET_DEV_RT_OP_ACTION_STOP,
+} __clib_packed vnet_dev_rt_op_action_t;
+
+typedef struct
+{
+ u16 thread_index;
+ u8 type : 4;
+ u8 action : 4;
+ u8 completed;
+ vnet_dev_rx_queue_t *rx_queue;
+} vnet_dev_rt_op_t;
+
+vnet_dev_rv_t vnet_dev_rt_exec_ops (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_rt_op_t *, u32);
+
+/* format.c */
+typedef struct
+{
+ u8 counters : 1;
+ u8 show_zero_counters : 1;
+ u8 debug : 1;
+} vnet_dev_format_args_t;
+
+format_function_t format_vnet_dev_addr;
+format_function_t format_vnet_dev_hw_addr;
+format_function_t format_vnet_dev_info;
+format_function_t format_vnet_dev_interface_info;
+format_function_t format_vnet_dev_interface_name;
+format_function_t format_vnet_dev_port_info;
+format_function_t format_vnet_dev_rv;
+format_function_t format_vnet_dev_rx_queue_info;
+format_function_t format_vnet_dev_tx_queue_info;
+format_function_t format_vnet_dev_flags;
+format_function_t format_vnet_dev_port_flags;
+unformat_function_t unformat_vnet_dev_flags;
+unformat_function_t unformat_vnet_dev_port_flags;
+
+typedef struct
+{
+ u8 n_rx_queues;
+ vnet_dev_rx_queue_t *rx_queues[4];
+} vnet_dev_rx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_rx_node_runtime_t) <=
+ VLIB_NODE_RUNTIME_DATA_SIZE,
+ "must fit into runtime data");
+
+#define foreach_vnet_dev_port_rx_next \
+ _ (ETH_INPUT, "ethernet-input") \
+ _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(n, s) VNET_DEV_ETH_RX_PORT_NEXT_##n,
+ foreach_vnet_dev_port_rx_next
+#undef _
+ VNET_DEV_ETH_RX_PORT_N_NEXTS
+} vnet_dev_eth_port_rx_next_t;
+
+extern u16 vnet_dev_default_next_index_by_port_type[];
+extern vlib_node_registration_t port_rx_eth_node;
+
+typedef vnet_interface_output_runtime_t vnet_dev_tx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_tx_node_runtime_t) <=
+ VLIB_NODE_RUNTIME_DATA_SIZE,
+ "must fit into runtime data");
+
+#define VNET_DEV_REGISTER_BUS(x, ...) \
+ __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x; \
+ static void __clib_constructor __vnet_dev_bus_registration_fn_##x (void) \
+ { \
+ vnet_dev_main_t *dm = &vnet_dev_main; \
+ __vnet_dev_bus_registration_##x.next_registration = \
+ dm->bus_registrations; \
+ dm->bus_registrations = &__vnet_dev_bus_registration_##x; \
+ } \
+ __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x
+
+#define VNET_DEV_REGISTER_DRIVER(x, ...) \
+ __VA_ARGS__ vnet_dev_driver_registration_t \
+ __vnet_dev_driver_registration_##x; \
+ static void __clib_constructor __vnet_dev_driver_registration_fn_##x (void) \
+ { \
+ vnet_dev_main_t *dm = &vnet_dev_main; \
+ __vnet_dev_driver_registration_##x.next_registration = \
+ dm->driver_registrations; \
+ dm->driver_registrations = &__vnet_dev_driver_registration_##x; \
+ } \
+ __VA_ARGS__ vnet_dev_driver_registration_t __vnet_dev_driver_registration_##x
+
+#define VNET_DEV_NODE_FN(node) \
+ uword CLIB_MARCH_SFX (node##_fn) (vlib_main_t *, vlib_node_runtime_t *, \
+ vlib_frame_t *); \
+ static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \
+ node##_fn_registration) = { \
+ .function = &CLIB_MARCH_SFX (node##_fn), \
+ }; \
+ \
+ static void __clib_constructor CLIB_MARCH_SFX ( \
+ node##_fn_multiarch_register) (void) \
+ { \
+ extern vnet_dev_node_t node; \
+ vlib_node_fn_registration_t *r; \
+ r = &CLIB_MARCH_SFX (node##_fn_registration); \
+ r->march_variant = CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE); \
+ r->next_registration = (node).registrations; \
+ (node).registrations = r; \
+ } \
+ uword CLIB_MARCH_SFX (node##_fn)
+
+#define foreach_vnet_dev_port(p, d) pool_foreach_pointer (p, d->ports)
+#define foreach_vnet_dev_port_rx_queue(q, p) \
+ pool_foreach_pointer (q, p->rx_queues)
+#define foreach_vnet_dev_port_tx_queue(q, p) \
+ pool_foreach_pointer (q, p->tx_queues)
+
+#include <vnet/dev/dev_funcs.h>
+
+#endif /* _VNET_DEV_H_ */
diff --git a/src/vnet/dev/dev_funcs.h b/src/vnet/dev/dev_funcs.h
new file mode 100644
index 00000000000..892cef4b3e8
--- /dev/null
+++ b/src/vnet/dev/dev_funcs.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_FUNCS_H_
+#define _VNET_DEV_FUNCS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/dev.h>
+
+static_always_inline void *
+vnet_dev_get_data (vnet_dev_t *dev)
+{
+ return dev->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_from_data (void *p)
+{
+ return (void *) ((u8 *) p - STRUCT_OFFSET_OF (vnet_dev_t, data));
+}
+
+static_always_inline void *
+vnet_dev_get_port_data (vnet_dev_port_t *port)
+{
+ return port->data;
+}
+
+static_always_inline void *
+vnet_dev_get_rx_queue_data (vnet_dev_rx_queue_t *rxq)
+{
+ return rxq->data;
+}
+
+static_always_inline void *
+vnet_dev_get_tx_queue_data (vnet_dev_tx_queue_t *txq)
+{
+ return txq->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_get_by_index (u32 index)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ return pool_elt_at_index (dm->devices, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_index (vnet_dev_t *dev, u32 index)
+{
+ return pool_elt_at_index (dev->ports, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_from_dev_instance (u32 dev_instance)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ if (pool_is_free_index (dm->ports_by_dev_instance, dev_instance))
+ return 0;
+ return pool_elt_at_index (dm->ports_by_dev_instance, dev_instance)[0];
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_by_id (char *id)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ uword *p = hash_get (dm->device_index_by_id, id);
+ if (p)
+ return *pool_elt_at_index (dm->devices, p[0]);
+ return 0;
+}
+
+static_always_inline uword
+vnet_dev_get_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ return dev->va_dma ? pointer_to_uword (p) : vlib_physmem_get_pa (vm, p);
+}
+
+static_always_inline void *
+vnet_dev_get_bus_data (vnet_dev_t *dev)
+{
+ return (void *) dev->bus_data;
+}
+
+static_always_inline vnet_dev_bus_t *
+vnet_dev_get_bus (vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ return pool_elt_at_index (dm->buses, dev->bus_index);
+}
+
+static_always_inline void
+vnet_dev_validate (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ASSERT (dev->process_node_index == vlib_get_current_process_node_index (vm));
+ ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline void
+vnet_dev_port_validate (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ ASSERT (port->dev->process_node_index ==
+ vlib_get_current_process_node_index (vm));
+ ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline u32
+vnet_dev_port_get_sw_if_index (vnet_dev_port_t *port)
+{
+ return port->intf.sw_if_index;
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id)
+{
+ foreach_vnet_dev_port (p, dev)
+ if (p->port_id == port_id)
+ return p;
+ return 0;
+}
+
+static_always_inline void *
+vnet_dev_alloc_with_data (u32 sz, u32 data_sz)
+{
+ void *p;
+ sz += data_sz;
+ sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+ p = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (p, 0, sz);
+ return p;
+}
+
+static_always_inline void
+vnet_dev_tx_queue_lock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+ u8 free = 0;
+
+ if (!txq->lock_needed)
+ return;
+
+ while (!__atomic_compare_exchange_n (&txq->lock, &free, 1, 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+ {
+ while (__atomic_load_n (&txq->lock, __ATOMIC_RELAXED))
+ CLIB_PAUSE ();
+ free = 0;
+ }
+}
+
+static_always_inline void
+vnet_dev_tx_queue_unlock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+ if (!txq->lock_needed)
+ return;
+ __atomic_store_n (&txq->lock, 0, __ATOMIC_RELEASE);
+}
+
+static_always_inline u8
+vnet_dev_get_rx_queue_buffer_pool_index (vnet_dev_rx_queue_t *rxq)
+{
+ return rxq->buffer_template.buffer_pool_index;
+}
+
+static_always_inline void
+vnet_dev_rx_queue_rt_request (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+ vnet_dev_rx_queue_rt_req_t req)
+{
+ __atomic_fetch_or (&rxq->runtime_request.as_number, req.as_number,
+ __ATOMIC_RELEASE);
+}
+
+static_always_inline vnet_dev_rx_node_runtime_t *
+vnet_dev_get_rx_node_runtime (vlib_node_runtime_t *node)
+{
+ return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_tx_node_runtime_t *
+vnet_dev_get_tx_node_runtime (vlib_node_runtime_t *node)
+{
+ return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_rx_queue_t **
+foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node)
+{
+ vnet_dev_rx_node_runtime_t *rt = vnet_dev_get_rx_node_runtime (node);
+ return rt->rx_queues;
+}
+
+static_always_inline int
+vnet_dev_rx_queue_runtime_update (vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port;
+ vnet_dev_rx_queue_rt_req_t req;
+ int rv = 1;
+
+ if (PREDICT_TRUE (rxq->runtime_request.as_number == 0))
+ return 1;
+
+ req.as_number =
+ __atomic_exchange_n (&rxq->runtime_request.as_number, 0, __ATOMIC_ACQUIRE);
+
+ port = rxq->port;
+ if (req.update_next_index)
+ rxq->next_index = port->intf.rx_next_index;
+
+ if (req.update_feature_arc)
+ {
+ vlib_buffer_template_t *bt = &rxq->buffer_template;
+ bt->current_config_index = port->intf.current_config_index;
+ vnet_buffer (bt)->feature_arc_index = port->intf.feature_arc_index;
+ }
+
+ if (req.suspend_on)
+ {
+ rxq->suspended = 1;
+ rv = 0;
+ }
+
+ if (req.suspend_off)
+ rxq->suspended = 0;
+
+ return rv;
+}
+
+static_always_inline void *
+vnet_dev_get_rt_temp_space (vlib_main_t *vm)
+{
+ return vnet_dev_main.runtime_temp_spaces +
+ ((uword) vm->thread_index
+ << vnet_dev_main.log2_runtime_temp_space_sz);
+}
+
+static_always_inline void
+vnet_dev_set_hw_addr_eth_mac (vnet_dev_hw_addr_t *addr, const u8 *eth_mac_addr)
+{
+ vnet_dev_hw_addr_t ha = {};
+ clib_memcpy_fast (&ha.eth_mac, eth_mac_addr, sizeof (ha.eth_mac));
+ *addr = ha;
+}
+
+#define foreach_vnet_dev_rx_queue_runtime(q, node) \
+ for (vnet_dev_rx_queue_t * \
+ *__qp = foreach_vnet_dev_rx_queue_runtime_helper (node), \
+ **__last = __qp + (vnet_dev_get_rx_node_runtime (node))->n_rx_queues, \
+ *(q) = *__qp; \
+ __qp < __last; __qp++, (q) = *__qp) \
+ if (vnet_dev_rx_queue_runtime_update (q))
+
+#endif /* _VNET_DEV_FUNCS_H_ */
diff --git a/src/vnet/dev/error.c b/src/vnet/dev/error.c
new file mode 100644
index 00000000000..df9c6d364e7
--- /dev/null
+++ b/src/vnet/dev/error.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+
+clib_error_t *
+vnet_dev_port_err (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_rv_t rv,
+ char *fmt, ...)
+{
+ clib_error_t *err;
+ va_list va;
+ u8 *s;
+
+ if (rv == VNET_DEV_OK)
+ return 0;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ err = clib_error_return (0, "%s port %u: %U (%v)", port->dev->device_id,
+ port->port_id, format_vnet_dev_rv, rv, s);
+ vec_free (s);
+ return err;
+}
diff --git a/src/vnet/dev/errors.h b/src/vnet/dev/errors.h
new file mode 100644
index 00000000000..2256e1e4203
--- /dev/null
+++ b/src/vnet/dev/errors.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_ERRORS_H_
+#define _VNET_DEV_ERRORS_H_
+
+#define foreach_vnet_dev_rv_type \
+ _ (ALREADY_EXISTS, "already exists") \
+ _ (ALREADY_IN_USE, "already in use") \
+ _ (BUFFER_ALLOC_FAIL, "packet buffer allocation failure") \
+ _ (BUG, "bug") \
+ _ (BUS, "bus error") \
+ _ (DEVICE_NO_REPLY, "no reply from device") \
+ _ (DMA_MEM_ALLOC_FAIL, "DMA memory allocation error") \
+ _ (DRIVER_NOT_AVAILABLE, "driver not available") \
+ _ (INVALID_BUS, "invalid bus") \
+ _ (INVALID_DATA, "invalid data") \
+ _ (INVALID_DEVICE_ID, "invalid device id") \
+ _ (INVALID_NUM_RX_QUEUES, "invalid number of rx queues") \
+ _ (INVALID_NUM_TX_QUEUES, "invalid number of tx queues") \
+ _ (INVALID_PORT_ID, "invalid port id") \
+ _ (INVALID_RX_QUEUE_SIZE, "invalid rx queue size") \
+ _ (INVALID_TX_QUEUE_SIZE, "invalid tx queue size") \
+ _ (INVALID_VALUE, "invalid value") \
+ _ (INTERNAL, "internal error") \
+ _ (NOT_FOUND, "not found") \
+ _ (NOT_READY, "not ready") \
+ _ (NOT_SUPPORTED, "not supported") \
+ _ (NO_CHANGE, "no change") \
+ _ (NO_AVAIL_QUEUES, "no queues available") \
+ _ (NO_SUCH_ENTRY, "no such enty") \
+ _ (PORT_STARTED, "port started") \
+ _ (PROCESS_REPLY, "dev process reply error") \
+ _ (RESOURCE_NOT_AVAILABLE, "resource not available") \
+ _ (TIMEOUT, "timeout") \
+ _ (UNKNOWN_INTERFACE, "unknown interface") \
+ _ (UNSUPPORTED_CONFIG, "unsupported config") \
+ _ (UNSUPPORTED_DEVICE, "unsupported device") \
+ _ (UNSUPPORTED_DEVICE_VER, "unsupported device version")
+
+#endif /* _VNET_DEV_ERRORS_H_ */
diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c
new file mode 100644
index 00000000000..4e1ece66d31
--- /dev/null
+++ b/src/vnet/dev/format.c
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vlib/pci/pci.h"
+#include "vnet/dev/counters.h"
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/ethernet/ethernet.h>
+
+u8 *
+format_vnet_dev_rv (u8 *s, va_list *args)
+{
+ vnet_dev_rv_t rv = va_arg (*args, vnet_dev_rv_t);
+ u32 index = -rv;
+
+ char *strings[] = { [0] = "OK",
+#define _(n, d) [-VNET_DEV_ERR_##n] = d,
+ foreach_vnet_dev_rv_type
+#undef _
+ };
+
+ if (index >= ARRAY_LEN (strings))
+ return format (s, "unknown return value (%d)", rv);
+ return format (s, "%s", strings[index]);
+}
+
+u8 *
+format_vnet_dev_addr (u8 *s, va_list *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_t *bus;
+
+ if (dev == 0)
+ return 0;
+
+ bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ s = format (s, "%U", bus->ops.format_device_addr, dev);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_interface_name (u8 *s, va_list *args)
+{
+ u32 i = va_arg (*args, u32);
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+
+ return format (s, "%s", port->intf.name);
+}
+
+u8 *
+format_vnet_dev_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_driver_t *dr = pool_elt_at_index (dm->drivers, dev->driver_index);
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+ u32 indent = format_get_indent (s);
+ s = format (s, "Driver is '%s', bus is '%s'", dr->registration->name,
+ bus->registration->name);
+
+ if (dev->description)
+ s = format (s, ", description is '%v'", dev->description);
+
+ if (bus->ops.format_device_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ bus->ops.format_device_info, a, dev);
+
+ s = format (s, "\n%UAssigned process node is '%U'", format_white_space,
+ indent, format_vlib_node_name, vm, dev->process_node_index);
+ if (dev->ops.format_info)
+ s = format (s, "\n%U%U", format_white_space, indent, dev->ops.format_info,
+ a, dev);
+ return s;
+}
+
+u8 *
+format_vnet_dev_hw_addr (u8 *s, va_list *args)
+{
+ vnet_dev_hw_addr_t *addr = va_arg (*args, vnet_dev_hw_addr_t *);
+ return format (s, "%U", format_ethernet_address, addr->eth_mac);
+}
+
+u8 *
+format_vnet_dev_port_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Hardware Address is %U", format_vnet_dev_hw_addr,
+ &port->attr.hw_addr);
+ s = format (s, ", %u RX queues (max %u), %u TX queues (max %u)",
+ pool_elts (port->rx_queues), port->attr.max_rx_queues,
+ pool_elts (port->tx_queues), port->attr.max_tx_queues);
+ if (pool_elts (port->secondary_hw_addr))
+ {
+ u32 i = 0;
+ vnet_dev_hw_addr_t *a;
+ s = format (s, "\n%USecondary Hardware Address%s:", format_white_space,
+ indent,
+ pool_elts (port->secondary_hw_addr) > 1 ? "es are" : " is");
+ pool_foreach (a, port->secondary_hw_addr)
+ {
+ if (i++ % 6 == 0)
+ s = format (s, "\n%U", format_white_space, indent + 1);
+ s = format (s, " %U", format_vnet_dev_hw_addr, a);
+ }
+ }
+ s = format (s, "\n%UMax frame size is %u (max supported %u)",
+ format_white_space, indent, port->max_frame_size,
+ port->attr.max_supported_frame_size);
+ if (port->port_ops.format_status)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ port->port_ops.format_status, a, port);
+
+ s = format (s, "\n%UInterface ", format_white_space, indent);
+ if (port->interface_created)
+ {
+ s = format (s, "assigned, interface name is '%U', RX node is '%U'",
+ format_vnet_sw_if_index_name, vnm, port->intf.sw_if_index,
+ format_vlib_node_name, vm, port->intf.rx_node_index);
+ }
+ else
+ s = format (s, "not assigned");
+ return s;
+}
+
+u8 *
+format_vnet_dev_rx_queue_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_rx_queue_t *rxq = va_arg (*args, vnet_dev_rx_queue_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Size is %u, buffer pool index is %u", rxq->size,
+ vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+ s = format (s, "\n%UPolling thread is %u, %sabled, %sstarted",
+ format_white_space, indent, rxq->rx_thread_index,
+ rxq->enabled ? "en" : "dis", rxq->started ? "" : "not-");
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_tx_queue_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_tx_queue_t *txq = va_arg (*args, vnet_dev_tx_queue_t *);
+ u32 indent = format_get_indent (s);
+ u32 n;
+
+ s = format (s, "Size is %u", txq->size);
+ s = format (s, "\n%U", format_white_space, indent);
+ n = clib_bitmap_count_set_bits (txq->assigned_threads);
+ if (n == 0)
+ s = format (s, "Not used by any thread");
+ else
+ s = format (s, "Used by thread%s %U", n > 1 ? "s" : "", format_bitmap_list,
+ txq->assigned_threads);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_interface_info (u8 *s, va_list *args)
+{
+ u32 i = va_arg (*args, u32);
+ vnet_dev_format_args_t fa = {}, *a = &fa;
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+ vnet_dev_t *dev = port->dev;
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Device:");
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_vnet_dev_info, a, dev);
+
+ s = format (s, "\n%UPort %u:", format_white_space, indent, port->port_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_vnet_dev_port_info, a, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ s = format (s, "\n%URX queue %u:", format_white_space, indent + 2,
+ q->queue_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 4,
+ format_vnet_dev_rx_queue_info, a, q);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ s = format (s, "\n%UTX queue %u:", format_white_space, indent + 2,
+ q->queue_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 4,
+ format_vnet_dev_tx_queue_info, a, q);
+ }
+ return s;
+}
+
+static u64
+unformat_flags (unformat_input_t *input, char *names[], u64 val[], u32 n_flags)
+{
+ u64 rv = 0;
+ uword c = 0;
+ u8 *s = 0;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ switch (c)
+ {
+ case 'a' ... 'z':
+ c -= 'a' - 'A';
+ case '0' ... '9':
+ case 'A' ... 'Z':
+ vec_add1 (s, c);
+ break;
+ case '-':
+ vec_add1 (s, '_');
+ break;
+ case ',':
+ vec_add1 (s, 0);
+ break;
+ default:
+ goto end_of_string;
+ }
+ }
+end_of_string:
+
+ if (s == 0)
+ return 0;
+
+ vec_add1 (s, 0);
+
+ for (u8 *p = s, *end = vec_end (s); p < end; p += strlen ((char *) p) + 1)
+ {
+ for (c = 0; c < n_flags; c++)
+ if (strcmp (names[c], (char *) p) == 0)
+ {
+ rv |= val[c];
+ break;
+ }
+ if (c == n_flags)
+ goto done;
+ }
+
+done:
+ vec_free (s);
+ return rv;
+}
+
+uword
+unformat_vnet_dev_flags (unformat_input_t *input, va_list *args)
+{
+ vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+ u64 val;
+
+ char *names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_flag
+#undef _
+ };
+ u64 vals[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_flag
+#undef _
+ };
+
+ val = unformat_flags (input, names, vals, ARRAY_LEN (names));
+
+ if (!val)
+ return 0;
+
+ fp->n = val;
+ return 1;
+}
+
+uword
+unformat_vnet_dev_port_flags (unformat_input_t *input, va_list *args)
+{
+ vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+ u64 val;
+
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+
+ val =
+ unformat_flags (input, flag_names, flag_values, ARRAY_LEN (flag_names));
+
+ if (!val)
+ return 0;
+
+ fp->n = val;
+ return 1;
+}
+
+static u8 *
+format_flags (u8 *s, u64 val, char *flag_names[], u64 flag_values[],
+ u32 n_flags)
+{
+ u32 n = 0;
+ for (int i = 0; i < n_flags; i++)
+ {
+ if ((val & flag_values[i]) == 0)
+ continue;
+
+ if (n++)
+ vec_add1 (s, ' ');
+
+ for (char *c = flag_names[i]; c[0] != 0; c++)
+ {
+ switch (c[0])
+ {
+ case 'A' ... 'Z':
+ vec_add1 (s, c[0] + 'a' - 'A');
+ break;
+ case '_':
+ vec_add1 (s, '-');
+ break;
+ default:
+ vec_add1 (s, c[0]);
+ }
+ }
+ }
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_flags (u8 *s, va_list *args)
+{
+ vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_flag
+#undef _
+ };
+
+ return format_flags (s, fp->n, flag_names, flag_values,
+ ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_port_flags (u8 *s, va_list *args)
+{
+ vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+
+ return format_flags (s, fp->n, flag_names, flag_values,
+ ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_log (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ char *func = va_arg (*args, char *);
+
+ if (dev)
+ s = format (s, "%U", format_vnet_dev_addr, dev);
+ if (dev && func)
+ vec_add1 (s, ' ');
+ if (func)
+ {
+ if (strncmp (func, "vnet_dev_", 9) == 0)
+ func += 9;
+ s = format (s, "%s", func);
+ }
+ vec_add1 (s, ':');
+ vec_add1 (s, ' ');
+ return s;
+}
diff --git a/src/vnet/dev/handlers.c b/src/vnet/dev/handlers.c
new file mode 100644
index 00000000000..7e7347eb217
--- /dev/null
+++ b/src/vnet/dev/handlers.c
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/flow/flow.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "handler",
+};
+
+clib_error_t *
+vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_MAX_FRAME_SIZE,
+ .max_frame_size = frame_size,
+ };
+
+ log_debug (p->dev, "size %u", frame_size);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "new max frame size is not valid for port");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "device failed to change max frame size");
+
+ return 0;
+}
+
+u32
+vnet_dev_port_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 flags)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_PROMISC_MODE,
+ };
+
+ switch (flags)
+ {
+ case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
+ log_debug (p->dev, "promisc off");
+ break;
+ case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
+ log_debug (p->dev, "promisc on");
+ req.promisc = 1;
+ break;
+ default:
+ return ~0;
+ }
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return ~0;
+
+ rv = vnet_dev_process_port_cfg_change_req (vm, p, &req);
+ if (rv == VNET_DEV_OK || rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+ return ~0;
+}
+
+clib_error_t *
+vnet_dev_port_mac_change (vnet_hw_interface_t *hi, const u8 *old,
+ const u8 *new)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR,
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&req.addr, new);
+
+ log_debug (p->dev, "new mac %U", format_vnet_dev_hw_addr, &req.addr);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv, "hw address is not valid for port");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv, "device failed to change hw address");
+
+ return 0;
+}
+
+clib_error_t *
+vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address,
+ u8 is_add)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = is_add ? VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR :
+ VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR,
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&req.addr, address);
+
+ log_debug (p->dev, "received (addr %U is_add %u", format_vnet_dev_hw_addr,
+ &req.addr, is_add);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "provided secondary hw addresses cannot "
+ "be added/removed");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (
+ vm, p, rv, "device failed to add/remove secondary hw address");
+
+ return 0;
+}
+
+int
+vnet_dev_flow_ops_fn (vnet_main_t *vnm, vnet_flow_dev_op_t op,
+ u32 dev_instance, u32 flow_index, uword *private_data)
+{
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (dev_instance);
+ log_warn (p->dev, "unsupported request for flow_ops received");
+ return VNET_FLOW_ERROR_NOT_SUPPORTED;
+}
+
+clib_error_t *
+vnet_dev_interface_set_rss_queues (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ clib_bitmap_t *bitmap)
+{
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ log_warn (p->dev, "unsupported request for flow_ops received");
+ return vnet_error (VNET_ERR_UNSUPPORTED, "not implemented");
+}
+
+void
+vnet_dev_clear_hw_interface_counters (u32 instance)
+{
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (instance);
+ vlib_main_t *vm = vlib_get_main ();
+
+ vnet_dev_process_call_port_op_no_rv (vm, port, vnet_dev_port_clear_counters);
+}
+
+clib_error_t *
+vnet_dev_rx_mode_change_fn (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
+ vnet_hw_if_rx_mode mode)
+{
+ return clib_error_return (0, "not supported");
+}
+
+void
+vnet_dev_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_dev_port_t *port =
+ vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ int runtime_update = 0;
+
+ if (node_index == ~0)
+ {
+ port->intf.redirect_to_node_next_index = 0;
+ if (port->intf.feature_arc == 0)
+ {
+ port->intf.rx_next_index =
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+ runtime_update = 1;
+ }
+ port->intf.redirect_to_node = 0;
+ }
+ else
+ {
+ u16 next_index = vlib_node_add_next (vlib_get_main (),
+ port_rx_eth_node.index, node_index);
+ port->intf.redirect_to_node_next_index = next_index;
+ if (port->intf.feature_arc == 0)
+ {
+ port->intf.rx_next_index = next_index;
+ runtime_update = 1;
+ }
+ port->intf.redirect_to_node = 1;
+ }
+ port->intf.rx_next_index =
+ node_index == ~0 ?
+ vnet_dev_default_next_index_by_port_type[port->attr.type] :
+ node_index;
+
+ if (runtime_update)
+ {
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ vnet_dev_rx_queue_rt_request (
+ vm, rxq, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+ log_debug (port->dev, "runtime update requested due to chgange in "
+ "reditect-to-next configuration");
+ }
+}
diff --git a/src/vnet/dev/log.h b/src/vnet/dev/log.h
new file mode 100644
index 00000000000..432e7b88a21
--- /dev/null
+++ b/src/vnet/dev/log.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_LOG_H_
+#define _VNET_DEV_LOG_H_
+
+format_function_t format_vnet_dev_log;
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, dev_log.class, "%U" f, format_vnet_dev_log, \
+ dev, __func__, ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, dev_log.class, "%U" f, \
+ format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, dev_log.class, "%U" f, \
+ format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, dev_log.class, "%U" f, format_vnet_dev_log, \
+ dev, 0, ##__VA_ARGS__)
+
+#endif /* _VNET_DEV_LOG_H_ */
diff --git a/src/vnet/dev/mgmt.h b/src/vnet/dev/mgmt.h
new file mode 100644
index 00000000000..f13f4075255
--- /dev/null
+++ b/src/vnet/dev/mgmt.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_MGMT_H_
+#define _VNET_DEV_MGMT_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_MGMT_H_ */
diff --git a/src/vnet/dev/pci.c b/src/vnet/dev/pci.c
new file mode 100644
index 00000000000..3310841c6c7
--- /dev/null
+++ b/src/vnet/dev/pci.c
@@ -0,0 +1,447 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vlib/pci/pci.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "pci",
+ .default_syslog_level = VLIB_LOG_LEVEL_DEBUG,
+};
+
+static int
+vnet_dev_bus_pci_device_id_to_pci_addr (vlib_pci_addr_t *addr, char *str)
+{
+ unformat_input_t input;
+ uword rv;
+ unformat_init_string (&input, str, strlen (str));
+ rv = unformat (&input, "pci" VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "%U",
+ unformat_vlib_pci_addr, addr);
+ unformat_free (&input);
+ return rv;
+}
+
+static void *
+vnet_dev_bus_pci_get_device_info (vlib_main_t *vm, char *device_id)
+{
+ vnet_dev_bus_pci_device_info_t *info;
+ vlib_pci_addr_t addr = {};
+ clib_error_t *err = 0;
+ vlib_pci_device_info_t *di = 0;
+
+ vlib_log_debug (dev_log.class, "device %s", device_id);
+
+ if (vnet_dev_bus_pci_device_id_to_pci_addr (&addr, device_id) == 0)
+ return 0;
+
+ di = vlib_pci_get_device_info (vm, &addr, &err);
+ if (err)
+ {
+ vlib_log_err (dev_log.class, "get_device_info: %U", format_clib_error,
+ err);
+ clib_error_free (err);
+ return 0;
+ }
+
+ info = clib_mem_alloc (sizeof (vnet_dev_bus_pci_device_info_t));
+ info->addr = addr;
+ info->vendor_id = di->vendor_id;
+ info->device_id = di->device_id;
+ info->revision = di->revision;
+
+ vlib_pci_free_device_info (di);
+ return info;
+}
+
+static void
+vnet_dev_bus_pci_free_device_info (vlib_main_t *vm, void *dev_info)
+{
+ clib_mem_free (dev_info);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_open (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ clib_error_t *err = 0;
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (vnet_dev_bus_pci_device_id_to_pci_addr (&pdd->addr, dev->device_id) == 0)
+ return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+ if ((err = vlib_pci_device_open (vm, &pdd->addr, 0, &pdd->handle)))
+ {
+ log_err (dev, "device_open: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ dev->numa_node = vlib_pci_get_numa_node (vm, pdd->handle);
+
+ if (vlib_pci_supports_virtual_addr_dma (vm, pdd->handle))
+ {
+ dev->va_dma = 1;
+ log_debug (dev, "device supports VA DMA");
+ }
+
+ vlib_pci_set_private_data (vm, pdd->handle, (uword) dev);
+
+ pdd->n_msix_int = vlib_pci_get_num_msix_interrupts (vm, pdd->handle);
+ if (pdd->n_msix_int)
+ {
+ u32 sz = sizeof (pdd->msix_handlers[0]) * pdd->n_msix_int;
+ sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+ pdd->msix_handlers = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (pdd->msix_handlers, 0, sz);
+ }
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_close (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (pdd->intx_handler)
+ vnet_dev_pci_intx_remove_handler (vm, dev);
+
+ if (pdd->msix_handlers)
+ {
+ for (u16 i = 0; i < pdd->n_msix_int; i++)
+ if (pdd->msix_handlers[i])
+ vnet_dev_pci_msix_remove_handler (vm, dev, i, 1);
+ clib_mem_free (pdd->msix_handlers);
+ pdd->msix_handlers = 0;
+ }
+
+ if (pdd->pci_handle_valid)
+ vlib_pci_device_close (vm, pdd->handle);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size,
+ u32 align, void **pp)
+{
+ clib_error_t *err;
+ void *p;
+
+ align = align ? align : CLIB_CACHE_LINE_BYTES;
+ size = round_pow2 (size, align);
+
+ p = vlib_physmem_alloc_aligned_on_numa (vm, size, align, dev->numa_node);
+
+ if (p == 0)
+ {
+ err = vlib_physmem_last_error (vm);
+ log_err (dev, "dev_dma_mem_alloc: physmem_alloc_aligned error %U",
+ format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+ }
+
+ if ((err = vlib_pci_map_dma (vm, vnet_dev_get_pci_handle (dev), p)))
+ {
+ log_err (dev, "dev_dma_mem_alloc: pci_map_dma: %U", format_clib_error,
+ err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+ }
+
+ clib_memset (p, 0, size);
+ pp[0] = p;
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ if (p)
+ vlib_physmem_free (vm, p);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_read_config_header (vlib_main_t *vm, vnet_dev_t *dev,
+ vlib_pci_config_hdr_t *hdr)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_read_write_config (vm, h, VLIB_READ, 0, hdr, sizeof (*hdr));
+ if (err)
+ {
+ log_err (dev, "pci_read_config_header: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_map_region (vlib_main_t *vm, vnet_dev_t *dev, u8 region,
+ void **pp)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_map_region (vm, h, region, pp)))
+ {
+ log_err (dev, "pci_map_region: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_function_level_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_function_level_reset (vm, h)))
+ {
+ log_err (dev, "pci_function_level_reset: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_enable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_bus_master_enable (vm, h)))
+ {
+ log_err (dev, "pci_bus_master_enable: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (pdd->intx_handler)
+ pdd->intx_handler (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_pci_intx_handler_fn_t *fn)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_register_intx_handler (vm, h, vnet_dev_pci_intx_handler);
+
+ if (err)
+ {
+ log_err (dev, "pci_register_intx_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_remove_handler (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_unregister_intx_handler (vm, h);
+
+ if (err)
+ {
+ log_err (dev, "pci_unregister_intx_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ pdd->intx_handler = 0;
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 line)
+{
+ vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (line < vec_len (pdd->msix_handlers) && pdd->msix_handlers[line])
+ pdd->msix_handlers[line](vm, dev, line);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_pci_msix_handler_fn_t *fn, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_register_msix_handler (vm, h, first, count,
+ vnet_dev_pci_msix_handler);
+
+ if (err)
+ {
+ log_err (dev, "pci_register_msix_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ for (u16 i = first; i < first + count; i++)
+ {
+ ASSERT (pdd->msix_handlers[i] == 0);
+ pdd->msix_handlers[i] = fn;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_remove_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_unregister_msix_handler (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_unregister_msix_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ for (u16 i = first; i < first + count; i++)
+ {
+ ASSERT (pdd->msix_handlers[i] != 0);
+ pdd->msix_handlers[i] = 0;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_enable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_enable_msix_irq (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_enable_msix_irq: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_disable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_disable_msix_irq (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_disble_msix_irq: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_disable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_bus_master_disable (vm, h)))
+ {
+ log_err (dev, "pci_bus_master_disable: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+static u8 *
+format_dev_pci_device_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_pci_config_t cfg = {};
+ clib_error_t *err;
+
+ s = format (s, "PCIe address is %U", format_vlib_pci_addr, &pdd->addr);
+
+ err = vlib_pci_read_write_config (vm, pdd->handle, VLIB_READ, 0, &cfg,
+ sizeof (cfg));
+ if (!err)
+ {
+ s = format (s, ", port is %U, speed is %U (max %U)",
+ format_vlib_pci_link_port, &cfg, format_vlib_pci_link_speed,
+ &cfg, format_vlib_pci_link_speed_cap, &cfg);
+ }
+ else
+ clib_error_free (err);
+
+ return s;
+}
+
+static u8 *
+format_dev_pci_device_addr (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ return format (s, "%U", format_vlib_pci_addr, &pdd->addr);
+}
+
+VNET_DEV_REGISTER_BUS (pci) = {
+ .name = "pci",
+ .device_data_size = sizeof (vnet_dev_bus_pci_device_info_t),
+ .ops = {
+ .device_open = vnet_dev_bus_pci_open,
+ .device_close = vnet_dev_bus_pci_close,
+ .get_device_info = vnet_dev_bus_pci_get_device_info,
+ .free_device_info = vnet_dev_bus_pci_free_device_info,
+ .dma_mem_alloc_fn = vnet_dev_bus_pci_dma_mem_alloc,
+ .dma_mem_free_fn = vnet_dev_bus_pci_dma_mem_free,
+ .format_device_info = format_dev_pci_device_info,
+ .format_device_addr = format_dev_pci_device_addr,
+ },
+};
diff --git a/src/vnet/dev/pci.h b/src/vnet/dev/pci.h
new file mode 100644
index 00000000000..dd104ea8507
--- /dev/null
+++ b/src/vnet/dev/pci.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PCI_H_
+#define _VNET_DEV_PCI_H_
+
+#include <vppinfra/clib.h>
+#include <vlib/pci/pci.h>
+#include <vnet/dev/dev.h>
+
+typedef void (vnet_dev_pci_intx_handler_fn_t) (vlib_main_t *vm,
+ vnet_dev_t *dev);
+typedef void (vnet_dev_pci_msix_handler_fn_t) (vlib_main_t *vm,
+ vnet_dev_t *dev, u16 line);
+
+typedef struct
+{
+ vlib_pci_addr_t addr;
+ u16 vendor_id;
+ u16 device_id;
+ u8 revision;
+} vnet_dev_bus_pci_device_info_t;
+
+typedef struct
+{
+ u8 pci_handle_valid : 1;
+ u16 n_msix_int;
+ vlib_pci_addr_t addr;
+ vlib_pci_dev_handle_t handle;
+ vnet_dev_pci_intx_handler_fn_t *intx_handler;
+ vnet_dev_pci_msix_handler_fn_t **msix_handlers;
+} vnet_dev_bus_pci_device_data_t;
+
+static_always_inline vnet_dev_bus_pci_device_data_t *
+vnet_dev_get_bus_pci_device_data (vnet_dev_t *dev)
+{
+ return (void *) dev->bus_data;
+}
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_handle (vnet_dev_t *dev)
+{
+ return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->handle;
+}
+
+static_always_inline vlib_pci_addr_t
+vnet_dev_get_pci_addr (vnet_dev_t *dev)
+{
+ return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->addr;
+}
+
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_n_msix_interrupts (vnet_dev_t *dev)
+{
+ return vnet_dev_get_bus_pci_device_data (dev)->n_msix_int;
+}
+
+vnet_dev_rv_t vnet_dev_pci_read_config_header (vlib_main_t *, vnet_dev_t *,
+ vlib_pci_config_hdr_t *);
+
+vnet_dev_rv_t vnet_dev_pci_map_region (vlib_main_t *, vnet_dev_t *, u8,
+ void **);
+vnet_dev_rv_t vnet_dev_pci_function_level_reset (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_enable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_disable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_add_handler (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_pci_intx_handler_fn_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_remove_handler (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_msix_add_handler (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_pci_msix_handler_fn_t *,
+ u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_remove_handler (vlib_main_t *, vnet_dev_t *,
+ u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_enable (vlib_main_t *, vnet_dev_t *, u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_disable (vlib_main_t *, vnet_dev_t *, u16,
+ u16);
+
+#endif /* _VNET_DEV_PCI_H_ */
diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c
new file mode 100644
index 00000000000..f9d6c010b97
--- /dev/null
+++ b/src/vnet/dev/port.c
@@ -0,0 +1,678 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "port",
+};
+
+static uword
+dummy_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ ASSERT (0);
+ return 0;
+}
+
+VLIB_REGISTER_NODE (port_rx_eth_node) = {
+ .function = dummy_input_fn,
+ .name = "port-rx-eth",
+ .runtime_data_bytes = sizeof (vnet_dev_rx_node_runtime_t),
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .n_next_nodes = VNET_DEV_ETH_RX_PORT_N_NEXTS,
+ .next_nodes = {
+#define _(n, s) [VNET_DEV_ETH_RX_PORT_NEXT_##n] = s,
+ foreach_vnet_dev_port_rx_next
+#undef _
+ },
+};
+
+u16 vnet_dev_default_next_index_by_port_type[] = {
+ [VNET_DEV_PORT_TYPE_ETHERNET] = VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT,
+};
+
+VNET_FEATURE_ARC_INIT (eth_port_rx, static) = {
+ .arc_name = "port-rx-eth",
+ .start_nodes = VNET_FEATURES ("port-rx-eth"),
+ .last_in_arc = "ethernet-input",
+ .arc_index_ptr = &vnet_dev_main.eth_port_rx_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (l2_patch, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "l2-patch",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (worker_handoff, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "worker-handoff",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (span_input, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "span-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (p2p_ethernet_node, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "p2p-ethernet-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (ethernet_input, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "ethernet-input",
+ .runs_before = 0, /* not before any other features */
+};
+
+void
+vnet_dev_port_free (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+
+ vnet_dev_port_validate (vm, port);
+
+ ASSERT (port->started == 0);
+
+ log_debug (dev, "port %u", port->port_id);
+
+ if (port->port_ops.free)
+ port->port_ops.free (vm, port);
+
+ pool_free (port->secondary_hw_addr);
+ pool_free (port->rx_queues);
+ pool_free (port->tx_queues);
+ pool_put_index (dev->ports, port->index);
+ clib_mem_free (port);
+}
+
+void
+vnet_dev_port_update_tx_node_runtime (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ u32 ti;
+ clib_bitmap_foreach (ti, q->assigned_threads)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (ti);
+ vlib_node_runtime_t *nr =
+ vlib_node_get_runtime (tvm, port->intf.tx_node_index);
+ vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (nr);
+ tnr->hw_if_index = port->intf.hw_if_index;
+ tnr->tx_queue = q;
+ }
+ }
+}
+
+void
+vnet_dev_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rt_op_t *ops = 0;
+
+ log_debug (dev, "stopping port %u", port->port_id);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->started)
+ {
+ vnet_dev_rt_op_t op = {
+ .type = VNET_DEV_RT_OP_TYPE_RX_QUEUE,
+ .action = VNET_DEV_RT_OP_ACTION_STOP,
+ .thread_index = q->rx_thread_index,
+ .rx_queue = q,
+ };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+ vec_free (ops);
+
+ port->port_ops.stop (vm, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ q->started = 0;
+ log_debug (dev, "port %u rx queue %u stopped", port->port_id,
+ q->queue_id);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ q->started = 0;
+ log_debug (dev, "port %u tx queue %u stopped", port->port_id,
+ q->queue_id);
+ }
+
+ log_debug (dev, "port %u stopped", port->port_id);
+ port->started = 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_rx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ rv = vnet_dev_rx_queue_start (vm, q);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_tx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ rv = vnet_dev_tx_queue_start (vm, q);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rt_op_t *ops = 0;
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "starting port %u", port->port_id);
+
+ vnet_dev_port_update_tx_node_runtime (vm, port);
+
+ if ((rv = port->port_ops.start (vm, port)) != VNET_DEV_OK)
+ {
+ vnet_dev_port_stop (vm, port);
+ return rv;
+ }
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ {
+ vnet_dev_rt_op_t op = {
+ .type = VNET_DEV_RT_OP_TYPE_RX_QUEUE,
+ .action = VNET_DEV_RT_OP_ACTION_START,
+ .thread_index = q->rx_thread_index,
+ .rx_queue = q,
+ };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+ vec_free (ops);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ {
+ log_debug (dev, "port %u rx queue %u started", port->port_id,
+ q->queue_id);
+ q->started = 1;
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->enabled)
+ {
+ log_debug (dev, "port %u tx queue %u started", port->port_id,
+ q->queue_id);
+ q->started = 1;
+ }
+
+ port->started = 1;
+ log_debug (dev, "port %u started", port->port_id);
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id,
+ vnet_dev_port_add_args_t *args)
+{
+ vnet_dev_port_t **pp, *port;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ ASSERT (args->port.attr.type != VNET_DEV_PORT_TYPE_UNKNOWN);
+ ASSERT (args->port.attr.max_supported_frame_size);
+
+ port =
+ vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t), args->port.data_size);
+ pool_get (dev->ports, pp);
+ pp[0] = port;
+ clib_memcpy (vnet_dev_get_port_data (port), args->port.initial_data,
+ args->port.data_size);
+ port->port_id = id;
+ port->index = pp - dev->ports;
+ port->dev = dev;
+ port->attr = args->port.attr;
+ port->rx_queue_config = args->rx_queue.config;
+ port->tx_queue_config = args->tx_queue.config;
+ port->rx_queue_ops = args->rx_queue.ops;
+ port->tx_queue_ops = args->tx_queue.ops;
+ port->port_ops = args->port.ops;
+ port->rx_node = *args->rx_node;
+ port->tx_node = *args->tx_node;
+
+ /* defaults out of port attributes */
+ port->max_frame_size = args->port.attr.max_supported_frame_size;
+ port->primary_hw_addr = args->port.attr.hw_addr;
+
+ if (port->port_ops.alloc)
+ rv = port->port_ops.alloc (vm, port);
+
+ if (rv == VNET_DEV_OK)
+ port->initialized = 1;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv;
+ vnet_dev_hw_addr_t *addr;
+ int found;
+
+ if (req->validated)
+ return VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_FRAME_SIZE:
+ if (req->max_frame_size > port->attr.max_supported_frame_size)
+ return VNET_DEV_ERR_INVALID_VALUE;
+ if (req->max_frame_size == port->max_frame_size)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ if (req->promisc == port->promisc)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ if (clib_memcmp (&req->addr, &port->primary_hw_addr,
+ sizeof (vnet_dev_hw_addr_t)) == 0)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ pool_foreach (addr, port->secondary_hw_addr)
+ if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+ return VNET_DEV_ERR_ALREADY_EXISTS;
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ found = 0;
+ pool_foreach (addr, port->secondary_hw_addr)
+ if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+ found = 1;
+ if (!found)
+ return VNET_DEV_ERR_NO_SUCH_ENTRY;
+ break;
+
+ default:
+ break;
+ }
+
+ if (port->port_ops.config_change_validate)
+ {
+ rv = port->port_ops.config_change_validate (vm, port, req);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+
+ req->validated = 1;
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_hw_addr_t *a;
+
+ vnet_dev_port_validate (vm, port);
+
+ vnet_dev_port_cfg_change_req_validate (vm, port, req);
+
+ if (port->port_ops.config_change)
+ rv = port->port_ops.config_change (vm, port, req);
+
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_FRAME_SIZE:
+ port->max_frame_size = req->max_frame_size;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ port->promisc = req->promisc;
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ clib_memcpy (&port->primary_hw_addr, &req->addr,
+ sizeof (vnet_dev_hw_addr_t));
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ pool_get (port->secondary_hw_addr, a);
+ clib_memcpy (a, &req->addr, sizeof (vnet_dev_hw_addr_t));
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ pool_foreach (a, port->secondary_hw_addr)
+ if (clib_memcmp (a, &req->addr, sizeof (vnet_dev_hw_addr_t)) == 0)
+ {
+ pool_put (port->secondary_hw_addr, a);
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_port_state_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_state_changes_t changes)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnet_dev_port_validate (vm, port);
+
+ if (changes.change.link_speed)
+ {
+ port->speed = changes.link_speed;
+ if (port->interface_created)
+ vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+ changes.link_speed);
+ log_debug (port->dev, "port speed changed to %u", changes.link_speed);
+ }
+
+ if (changes.change.link_state)
+ {
+ port->link_up = changes.link_state;
+ if (port->interface_created)
+ vnet_hw_interface_set_flags (
+ vnm, port->intf.hw_if_index,
+ changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ log_debug (port->dev, "port link state changed to %s",
+ changes.link_state ? "up" : "down");
+ }
+}
+
+void
+vnet_dev_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ vnet_dev_port_validate (vm, port);
+
+ port->counter_main =
+ vnet_dev_counters_alloc (vm, counters, n_counters, "%s port %u counters",
+ port->dev->device_id, port->port_id);
+}
+
+void
+vnet_dev_port_free_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_validate (vm, port);
+
+ if (port->counter_main)
+ vnet_dev_counters_free (vm, port->counter_main);
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 n_threads = vlib_get_n_threads ();
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_port_t **pp;
+ vnet_dev_rv_t rv;
+ u16 ti = 0;
+
+ if (port->intf.name[0] == 0)
+ {
+ u8 *s;
+ s = format (0, "%s%u/%u",
+ dm->drivers[port->dev->driver_index].registration->name,
+ port->dev->index, port->index);
+ u32 n = vec_len (s);
+
+ if (n >= sizeof (port->intf.name))
+ {
+ vec_free (s);
+ return VNET_DEV_ERR_BUG;
+ }
+ clib_memcpy (port->intf.name, s, n);
+ port->intf.name[n] = 0;
+ vec_free (s);
+ }
+
+ log_debug (
+ dev, "allocating %u rx queues with size %u and %u tx queues with size %u",
+ port->intf.num_rx_queues, port->intf.rxq_sz, port->intf.num_tx_queues,
+ port->intf.txq_sz);
+
+ for (int i = 0; i < port->intf.num_rx_queues; i++)
+ if ((rv = vnet_dev_rx_queue_alloc (vm, port, port->intf.rxq_sz)) !=
+ VNET_DEV_OK)
+ goto error;
+
+ for (u32 i = 0; i < port->intf.num_tx_queues; i++)
+ if ((rv = vnet_dev_tx_queue_alloc (vm, port, port->intf.txq_sz)) !=
+ VNET_DEV_OK)
+ goto error;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1);
+ log_debug (dev, "port %u tx queue %u assigned to thread %u",
+ port->port_id, q->queue_id, ti);
+ if (++ti >= n_threads)
+ break;
+ }
+
+ /* pool of port pointers helps us to assign unique dev_instance */
+ pool_get (dm->ports_by_dev_instance, pp);
+ port->intf.dev_instance = pp - dm->ports_by_dev_instance;
+ pp[0] = port;
+
+ if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET)
+ {
+ vnet_device_class_t *dev_class;
+ vnet_dev_driver_t *driver;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ u32 rx_node_index;
+
+ driver = pool_elt_at_index (dm->drivers, dev->driver_index);
+
+ /* hack to provide per-port tx node function */
+ dev_class = vnet_get_device_class (vnm, driver->dev_class_index);
+ dev_class->tx_fn_registrations = port->tx_node.registrations;
+ dev_class->format_tx_trace = port->tx_node.format_trace;
+ dev_class->tx_function_error_counters = port->tx_node.error_counters;
+ dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+
+ /* create new interface including tx and output nodes */
+ port->intf.hw_if_index = vnet_eth_register_interface (
+ vnm, &(vnet_eth_interface_registration_t){
+ .address = port->primary_hw_addr.eth_mac,
+ .max_frame_size = port->max_frame_size,
+ .dev_class_index = driver->dev_class_index,
+ .dev_instance = port->intf.dev_instance,
+ .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size,
+ .cb.flag_change = vnet_dev_port_eth_flag_change,
+ });
+
+ sw = vnet_get_hw_sw_interface (vnm, port->intf.hw_if_index);
+ hw = vnet_get_hw_interface (vnm, port->intf.hw_if_index);
+ port->intf.sw_if_index = sw->sw_if_index;
+ vnet_hw_interface_set_flags (
+ vnm, port->intf.hw_if_index,
+ port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ if (port->speed)
+ vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+ port->speed);
+
+ port->intf.tx_node_index = hw->tx_node_index;
+
+ /* create / reuse rx node */
+ if (vec_len (dm->free_rx_node_indices))
+ {
+ vlib_node_t *n;
+ rx_node_index = vec_pop (dm->free_rx_node_indices);
+ vlib_node_rename (vm, rx_node_index, "%s-rx", port->intf.name);
+ n = vlib_get_node (vm, rx_node_index);
+ n->function = vlib_node_get_preferred_node_fn_variant (
+ vm, port->rx_node.registrations);
+ n->format_trace = port->rx_node.format_trace;
+ vlib_register_errors (vm, rx_node_index,
+ port->rx_node.n_error_counters, 0,
+ port->rx_node.error_counters);
+ }
+ else
+ {
+ dev_class->format_tx_trace = port->tx_node.format_trace;
+ dev_class->tx_function_error_counters = port->tx_node.error_counters;
+ dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+ vlib_node_registration_t rx_node_reg = {
+ .sibling_of = "port-rx-eth",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+ .node_fn_registrations = port->rx_node.registrations,
+ .format_trace = port->rx_node.format_trace,
+ .error_counters = port->rx_node.error_counters,
+ .n_errors = port->rx_node.n_error_counters,
+ };
+ rx_node_index =
+ vlib_register_node (vm, &rx_node_reg, "%s-rx", port->intf.name);
+ }
+ port->rx_node_assigned = 1;
+ port->intf.rx_node_index = rx_node_index;
+ port->intf.rx_next_index =
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+ vlib_worker_thread_node_runtime_update ();
+ log_debug (dev,
+ "ethernet interface created, hw_if_index %u sw_if_index %u "
+ "rx_node_index %u tx_node_index %u",
+ port->intf.hw_if_index, port->intf.sw_if_index,
+ port->intf.rx_node_index, port->intf.tx_node_index);
+ }
+
+ port->interface_created = 1;
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ vnet_buffer (&q->buffer_template)->sw_if_index[VLIB_RX] =
+ port->intf.sw_if_index;
+ /* poison to catch node not calling runtime update function */
+ q->next_index = ~0;
+ vnet_dev_rx_queue_rt_request (
+ vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+ }
+
+ vnet_dev_port_update_tx_node_runtime (vm, port);
+
+ if (port->port_ops.init)
+ rv = port->port_ops.init (vm, port);
+
+error:
+ if (rv != VNET_DEV_OK)
+ vnet_dev_port_if_remove (vm, port);
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnet_dev_port_validate (vm, port);
+
+ if (port->started)
+ vnet_dev_port_stop (vm, port);
+
+ if (port->rx_node_assigned)
+ {
+ vlib_node_rename (vm, port->intf.rx_node_index, "deleted-%u",
+ port->intf.rx_node_index);
+ vec_add1 (dm->free_rx_node_indices, port->intf.rx_node_index);
+ port->rx_node_assigned = 0;
+ }
+
+ if (port->interface_created)
+ {
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_delete_hw_interface (vnm, port->intf.hw_if_index);
+ vlib_worker_thread_barrier_release (vm);
+ pool_put_index (dm->ports_by_dev_instance, port->intf.dev_instance);
+ port->interface_created = 0;
+ }
+
+ port->intf = (typeof (port->intf)){};
+
+ if (port->port_ops.deinit)
+ port->port_ops.deinit (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ vnet_dev_tx_queue_free (vm, q);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ vnet_dev_rx_queue_free (vm, q);
+
+ vnet_dev_port_free_counters (vm, port);
+
+ return VNET_DEV_OK;
+}
+void
+vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ if (port->counter_main)
+ vnet_dev_counters_clear (vm, port->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->counter_main)
+ vnet_dev_counters_clear (vm, q->counter_main);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->counter_main)
+ vnet_dev_counters_clear (vm, q->counter_main);
+
+ log_notice (port->dev, "counters cleared on port %u", port->port_id);
+}
diff --git a/src/vnet/dev/process.c b/src/vnet/dev/process.c
new file mode 100644
index 00000000000..3c1f0b8d2d8
--- /dev/null
+++ b/src/vnet/dev/process.c
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "process",
+};
+
+typedef enum
+{
+ VNET_DEV_EVENT_PERIODIC_STOP,
+ VNET_DEV_EVENT_PERIODIC_START,
+ VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+ VNET_DEV_EVENT_PROCESS_QUIT,
+ VNET_DEV_EVENT_CALL_OP,
+ VNET_DEV_EVENT_CALL_OP_NO_RV,
+ VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+ VNET_DEV_EVENT_CALL_PORT_OP,
+ VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+ VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+ VNET_DEV_EVENT_CLOCK = ~0
+} __clib_packed vnet_dev_event_t;
+
+typedef struct
+{
+ vnet_dev_event_t event;
+ u8 reply_needed : 1;
+ u32 calling_process_index;
+ union
+ {
+ struct
+ {
+ vnet_dev_port_t *port;
+ vnet_dev_port_cfg_change_req_t *change_req;
+ } port_cfg_change;
+ struct
+ {
+ vnet_dev_op_t *op;
+ } call_op;
+ struct
+ {
+ vnet_dev_op_no_rv_t *op;
+ } call_op_no_rv;
+ struct
+ {
+ vnet_dev_op_no_rv_t *op;
+ } call_op_no_wait;
+ struct
+ {
+ vnet_dev_port_op_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op;
+ struct
+ {
+ vnet_dev_port_op_no_rv_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op_no_rv;
+ struct
+ {
+ vnet_dev_port_op_no_rv_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op_no_wait;
+ };
+} vnet_dev_event_data_t;
+
+static vnet_dev_rv_t
+vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t *ed)
+{
+ vnet_dev_port_t *p;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (ed->event)
+ {
+ case VNET_DEV_EVENT_CLOCK:
+ break;
+ case VNET_DEV_EVENT_PROCESS_QUIT:
+ log_debug (dev, "quit requested");
+ dev->process_node_quit = 1;
+ break;
+ case VNET_DEV_EVENT_PERIODIC_START:
+ log_debug (dev, "periodic start");
+ dev->process_node_periodic = 1;
+ break;
+ case VNET_DEV_EVENT_PERIODIC_STOP:
+ log_debug (dev, "periodic stop");
+ dev->process_node_periodic = 0;
+ break;
+ case VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ:
+ log_debug (dev, "port config change");
+ p = ed->port_cfg_change.port;
+ rv = vnet_dev_port_cfg_change (vm, p, ed->port_cfg_change.change_req);
+ break;
+ case VNET_DEV_EVENT_CALL_OP:
+ log_debug (dev, "call op");
+ rv = ed->call_op.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_OP_NO_RV:
+ log_debug (dev, "call op no rv");
+ ed->call_op_no_rv.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_OP_NO_WAIT:
+ log_debug (dev, "call op no wait");
+ ed->call_op_no_wait.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP:
+ log_debug (dev, "call port op");
+ rv = ed->call_port_op.op (vm, ed->call_port_op.port);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP_NO_RV:
+ log_debug (dev, "call port op no rv");
+ ed->call_port_op_no_rv.op (vm, ed->call_port_op_no_rv.port);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT:
+ log_debug (dev, "call port op no wait");
+ ed->call_port_op_no_wait.op (vm, ed->call_port_op_no_wait.port);
+ break;
+ default:
+ ASSERT (0);
+ }
+ return rv;
+}
+
+static uword
+vnet_dev_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_periodic_op_t *pop, *pops = 0;
+ f64 next = CLIB_F64_MAX;
+ vnet_dev_event_data_t *event_data = 0, *new_event_data, *ed;
+
+ vnet_dev_t *dev =
+ *((vnet_dev_t **) vlib_node_get_runtime_data (vm, rt->node_index));
+
+ log_debug (dev, "process '%U' started", format_vlib_node_name, vm,
+ rt->node_index);
+
+ while (dev->process_node_quit == 0)
+ {
+ uword event_type;
+ f64 now = vlib_time_now (vm);
+
+ if (dev->process_node_periodic)
+ vlib_process_wait_for_event_or_clock (vm, next > now ? next - now : 0);
+ else
+ vlib_process_wait_for_event (vm);
+
+ new_event_data = vlib_process_get_event_data (vm, &event_type);
+
+ if (new_event_data)
+ {
+ vec_append (event_data, new_event_data);
+ vlib_process_put_event_data (vm, new_event_data);
+
+ ASSERT (event_type == 0);
+
+ vec_foreach (ed, event_data)
+ {
+ vnet_dev_rv_t rv;
+ rv = vnet_dev_process_one_event (vm, dev, ed);
+ if (ed->reply_needed)
+ vlib_process_signal_event (vm, ed->calling_process_index,
+ ed->event, rv);
+ }
+ vec_reset_length (event_data);
+ }
+
+ next = CLIB_F64_MAX;
+ pool_foreach (pop, dev->periodic_ops)
+ {
+ if (pop->last_run + pop->interval < now)
+ {
+ vec_add1 (pops, *pop);
+ pop->last_run = now;
+ }
+ if (pop->last_run + pop->interval < next)
+ next = pop->last_run + pop->interval;
+ }
+
+ vec_foreach (pop, pops)
+ {
+ switch (pop->type)
+ {
+ case VNET_DEV_PERIODIC_OP_TYPE_DEV:
+ pop->dev_op (vm, pop->dev);
+ break;
+ case VNET_DEV_PERIODIC_OP_TYPE_PORT:
+ pop->port_op (vm, pop->port);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+ vec_reset_length (pops);
+ }
+
+ log_debug (dev, "process '%U' quit", format_vlib_node_name, vm,
+ rt->node_index);
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+ vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+
+ /* add node index to the freelist */
+ vec_add1 (dm->free_process_node_indices, rt->node_index);
+ vec_free (pops);
+ vec_free (event_data);
+ return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_process_create (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vlib_node_t *n;
+ uword l;
+
+ l = vec_len (dm->free_process_node_indices);
+ if (l > 0)
+ {
+ n = vlib_get_node (vm, dm->free_process_node_indices[l - 1]);
+ if (n->function != vnet_dev_process)
+ {
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, n->index);
+ n->function = vnet_dev_process;
+ rt->function = vnet_dev_process;
+ }
+ vlib_node_rename (vm, n->index, "%s-process", dev->device_id);
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+ vec_set_len (dm->free_process_node_indices, l - 1);
+ log_debug (dev, "process node '%U' (%u) reused", format_vlib_node_name,
+ vm, n->index, n->index);
+ }
+ else
+ {
+ vlib_node_registration_t r = {
+ .function = vnet_dev_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 16,
+ .runtime_data_bytes = sizeof (void *),
+ };
+
+ vlib_register_node (vm, &r, "%s-process", dev->device_id);
+
+ n = vlib_get_node (vm, r.index);
+ log_debug (dev, "process node '%U' (%u) created", format_vlib_node_name,
+ vm, r.index, r.index);
+ }
+
+ dev->process_node_index = n->index;
+ *(vnet_dev_t **) vlib_node_get_runtime_data (vm, n->index) = dev;
+ vlib_start_process (vm, n->runtime_index);
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_process_event_send (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t ed)
+{
+ vnet_dev_event_data_t *edp = vlib_process_signal_event_data (
+ vm, dev->process_node_index, 0, 1, sizeof (ed));
+ *edp = ed;
+}
+
+static vnet_dev_rv_t
+vnet_dev_process_event_send_and_wait (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t ed)
+{
+ uword event, *event_data = 0;
+ vnet_dev_rv_t rv;
+
+ ed.calling_process_index = vlib_get_current_process_node_index (vm);
+
+ if (ed.calling_process_index == dev->process_node_index)
+ return vnet_dev_process_one_event (vm, dev, &ed);
+
+ ed.reply_needed = 1;
+ vnet_dev_process_event_send (vm, dev, ed);
+ vlib_process_wait_for_event_or_clock (vm, 5.0);
+ event = vlib_process_get_events (vm, &event_data);
+ if (event != ed.event)
+ {
+ log_err (dev, "%s",
+ event == VNET_DEV_EVENT_CLOCK ?
+ "timeout waiting for process node to respond" :
+ "unexpected event received");
+ rv = VNET_DEV_ERR_PROCESS_REPLY;
+ }
+ else
+ rv = event_data[0];
+ vec_free (event_data);
+ return rv;
+}
+
+void
+vnet_dev_process_quit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PROCESS_QUIT };
+ vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+static int
+_vnet_dev_poll_add (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_periodic_op_t pop)
+{
+ const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_START };
+ vnet_dev_periodic_op_t *p;
+
+ pool_foreach (p, dev->periodic_ops)
+ if (p->op == pop.op && p->arg == pop.arg)
+ return 0;
+
+ pool_get_zero (dev->periodic_ops, p);
+ *p = pop;
+ if (pool_elts (dev->periodic_ops) == 1)
+ vnet_dev_process_event_send (vm, dev, ed);
+ return 1;
+}
+
+static int
+_vnet_dev_poll_remove (vlib_main_t *vm, vnet_dev_t *dev, void *op, void *arg)
+{
+ const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_STOP };
+ vnet_dev_periodic_op_t *pop;
+
+ pool_foreach (pop, dev->periodic_ops)
+ if (pop->op == op && pop->arg == arg)
+ {
+ pool_put (dev->periodic_ops, pop);
+ if (pool_elts (dev->periodic_ops) == 0)
+ vnet_dev_process_event_send (vm, dev, ed);
+ return 1;
+ }
+ return 0;
+}
+
+void
+vnet_dev_poll_dev_add (vlib_main_t *vm, vnet_dev_t *dev, f64 interval,
+ vnet_dev_op_no_rv_t *dev_op)
+{
+ vnet_dev_periodic_op_t pop = {
+ .interval = interval,
+ .type = VNET_DEV_PERIODIC_OP_TYPE_DEV,
+ .dev_op = dev_op,
+ .dev = dev,
+ };
+
+ if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+ log_warn (dev, "poll_dev_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_dev_remove (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *dev_op)
+{
+ if (_vnet_dev_poll_remove (vm, dev, (void *) dev_op, (void *) dev) == 0)
+ log_warn (dev, "poll_dev_remove: op not found, not removed");
+}
+
+void
+vnet_dev_poll_port_add (vlib_main_t *vm, vnet_dev_port_t *port, f64 interval,
+ vnet_dev_port_op_no_rv_t *port_op)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_periodic_op_t pop = {
+ .interval = interval,
+ .type = VNET_DEV_PERIODIC_OP_TYPE_PORT,
+ .port_op = port_op,
+ .port = port,
+ };
+
+ if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+ log_warn (dev, "poll_port_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_port_remove (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *port_op)
+{
+ vnet_dev_t *dev = port->dev;
+ if (_vnet_dev_poll_remove (vm, dev, (void *) port_op, (void *) port) == 0)
+ log_warn (dev, "poll_port_remove: op not found, not removed");
+}
+
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *pccr)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+ .port_cfg_change = {
+ .port = port,
+ .change_req = pccr,
+ },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_op_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP,
+ .call_op.op = op,
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op_no_rv (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP_NO_RV,
+ .call_op_no_rv.op = op,
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+void
+vnet_dev_process_call_op_no_wait (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+ .call_op_no_rv.op = op,
+ };
+
+ vnet_dev_process_event_send (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP,
+ .call_port_op = { .op = op, .port = port },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+ .call_port_op_no_rv = { .op = op, .port = port },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+void
+vnet_dev_process_call_port_op_no_wait (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+ .call_port_op_no_wait = { .op = op, .port = port },
+ };
+
+ vnet_dev_process_event_send (vm, port->dev, ed);
+}
diff --git a/src/vnet/dev/process.h b/src/vnet/dev/process.h
new file mode 100644
index 00000000000..9223973dffc
--- /dev/null
+++ b/src/vnet/dev/process.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PROCESS_H_
+#define _VNET_DEV_PROCESS_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_PROCESS_H_ */
diff --git a/src/vnet/dev/queue.c b/src/vnet/dev/queue.c
new file mode 100644
index 00000000000..9a016a626fb
--- /dev/null
+++ b/src/vnet/dev/queue.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "error",
+};
+
+void
+vnet_dev_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ log_debug (dev, "queue %u", rxq->queue_id);
+ if (port->rx_queue_ops.free)
+ port->rx_queue_ops.free (vm, rxq);
+
+ vnet_dev_rx_queue_free_counters (vm, rxq);
+ pool_put_index (port->rx_queues, rxq->index);
+ clib_mem_free (rxq);
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 queue_size)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_rx_queue_t *rxq, **qp;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ u16 n_threads = vlib_get_n_threads ();
+ u8 buffer_pool_index;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "port %u queue_size %u", port->port_id, queue_size);
+
+ if (pool_elts (port->rx_queues) == port->attr.max_rx_queues)
+ return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+ rxq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+ port->rx_queue_config.data_size);
+ pool_get (port->rx_queues, qp);
+ qp[0] = rxq;
+ rxq->enabled = 1;
+ rxq->port = port;
+ rxq->size = queue_size;
+ rxq->index = qp - port->rx_queues;
+
+ /* default queue id - can be changed by driver */
+ rxq->queue_id = qp - port->rx_queues;
+ ASSERT (rxq->queue_id < port->attr.max_rx_queues);
+
+ if (n_threads > 1)
+ {
+ rxq->rx_thread_index = dm->next_rx_queue_thread++;
+ if (dm->next_rx_queue_thread >= n_threads)
+ dm->next_rx_queue_thread = 1;
+ }
+
+ buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node);
+ vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
+
+ rxq->buffer_template = bp->buffer_template;
+ vnet_buffer (&rxq->buffer_template)->sw_if_index[VLIB_TX] = ~0;
+
+ rxq->next_index = vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+ if (port->rx_queue_ops.alloc)
+ rv = port->rx_queue_ops.alloc (vm, rxq);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "driver rejected rx queue add with rv %d", rv);
+ vnet_dev_rx_queue_free (vm, rxq);
+ }
+ else
+ log_debug (dev, "queue %u added, assigned to thread %u", rxq->queue_id,
+ rxq->rx_thread_index);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ if (rxq->port->rx_queue_ops.start)
+ rv = rxq->port->rx_queue_ops.start (vm, rxq);
+
+ if (rv == VNET_DEV_OK)
+ rxq->started = 1;
+
+ return rv;
+}
+
+void
+vnet_dev_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ if (rxq->port->rx_queue_ops.stop)
+ rxq->port->rx_queue_ops.stop (vm, rxq);
+ vlib_node_set_state (vm, rxq->port->intf.rx_node_index,
+ VLIB_NODE_STATE_DISABLED);
+ rxq->started = 0;
+}
+
+void
+vnet_dev_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+ if (port->tx_queue_ops.free)
+ port->tx_queue_ops.free (vm, txq);
+
+ clib_bitmap_free (txq->assigned_threads);
+ vnet_dev_tx_queue_free_counters (vm, txq);
+ pool_put_index (port->tx_queues, txq->index);
+ clib_mem_free (txq);
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 queue_size)
+{
+ vnet_dev_tx_queue_t *txq, **qp;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ log_debug (dev, "port %u size %u", port->port_id, queue_size);
+
+ if (pool_elts (port->tx_queues) == port->attr.max_tx_queues)
+ return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+ txq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+ port->tx_queue_config.data_size);
+ pool_get (port->tx_queues, qp);
+ qp[0] = txq;
+ txq->enabled = 1;
+ txq->port = port;
+ txq->size = queue_size;
+ txq->index = qp - port->tx_queues;
+
+ /* default queue id - can be changed by driver */
+ txq->queue_id = qp - port->tx_queues;
+ ASSERT (txq->queue_id < port->attr.max_tx_queues);
+
+ if (port->tx_queue_ops.alloc)
+ rv = port->tx_queue_ops.alloc (vm, txq);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "driver rejected tx queue alloc with rv %d", rv);
+ vnet_dev_tx_queue_free (vm, txq);
+ }
+ else
+ log_debug (dev, "queue %u added", txq->queue_id);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ if (txq->port->tx_queue_ops.start)
+ rv = txq->port->tx_queue_ops.start (vm, txq);
+
+ if (rv == VNET_DEV_OK)
+ txq->started = 1;
+
+ return rv;
+}
+
+void
+vnet_dev_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ if (txq->port->tx_queue_ops.stop)
+ txq->port->tx_queue_ops.stop (vm, txq);
+ txq->started = 0;
+}
+
+void
+vnet_dev_rx_queue_add_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ rxq->counter_main = vnet_dev_counters_alloc (
+ vm, counters, n_counters, "%s port %u rx-queue %u counters",
+ rxq->port->dev->device_id, rxq->port->port_id, rxq->queue_id);
+}
+
+void
+vnet_dev_rx_queue_free_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ if (rxq->counter_main)
+ vnet_dev_counters_free (vm, rxq->counter_main);
+}
+
+void
+vnet_dev_tx_queue_add_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ txq->counter_main = vnet_dev_counters_alloc (
+ vm, counters, n_counters, "%s port %u tx-queue %u counters",
+ txq->port->dev->device_id, txq->port->port_id, txq->queue_id);
+}
+
+void
+vnet_dev_tx_queue_free_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ if (!txq->counter_main)
+ return;
+
+ log_debug (txq->port->dev, "free");
+ vnet_dev_counters_free (vm, txq->counter_main);
+}
diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c
new file mode 100644
index 00000000000..e8f96c41a08
--- /dev/null
+++ b/src/vnet/dev/runtime.c
@@ -0,0 +1,174 @@
+
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/bitmap.h"
+#include "vppinfra/lock.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "runtime",
+};
+
+static vnet_dev_rt_op_t *rt_ops;
+
+static void
+_vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op)
+{
+ if (op->type == VNET_DEV_RT_OP_TYPE_RX_QUEUE)
+ {
+ vnet_dev_rx_node_runtime_t *rtd;
+ vnet_dev_rx_queue_t *rxq = op->rx_queue;
+ u32 i, node_index = rxq->port->intf.rx_node_index;
+
+ rtd = vlib_node_get_runtime_data (vm, node_index);
+
+ if (op->action == VNET_DEV_RT_OP_ACTION_START)
+ {
+ for (i = 0; i < rtd->n_rx_queues; i++)
+ ASSERT (rtd->rx_queues[i] != op->rx_queue);
+ rtd->rx_queues[rtd->n_rx_queues++] = op->rx_queue;
+ }
+
+ else if (op->action == VNET_DEV_RT_OP_ACTION_STOP)
+ {
+ for (i = 0; i < rtd->n_rx_queues; i++)
+ if (rtd->rx_queues[i] == op->rx_queue)
+ break;
+ ASSERT (i < rtd->n_rx_queues);
+ rtd->n_rx_queues--;
+ for (; i < rtd->n_rx_queues; i++)
+ rtd->rx_queues[i] = rtd->rx_queues[i + 1];
+ }
+
+ if (rtd->n_rx_queues == 1)
+ vlib_node_set_state (vm, node_index, VLIB_NODE_STATE_POLLING);
+ else if (rtd->n_rx_queues == 0)
+ vlib_node_set_state (vm, node_index, VLIB_NODE_STATE_DISABLED);
+
+ __atomic_store_n (&op->completed, 1, __ATOMIC_RELEASE);
+ }
+}
+
+static int
+_vnet_dev_rt_op_not_occured_before (vnet_dev_rt_op_t *first,
+ vnet_dev_rt_op_t *current)
+{
+ for (vnet_dev_rt_op_t *op = first; op < current; op++)
+ if (op->rx_queue == current->rx_queue && op->completed == 0)
+ return 0;
+ return 1;
+}
+
+static uword
+vnet_dev_rt_mgmt_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u16 thread_index = vm->thread_index;
+ vnet_dev_rt_op_t *ops = __atomic_load_n (&rt_ops, __ATOMIC_ACQUIRE);
+ vnet_dev_rt_op_t *op;
+ int come_back = 0;
+ uword rv = 0;
+
+ vec_foreach (op, ops)
+ if (op->thread_index == thread_index)
+ {
+ if (_vnet_dev_rt_op_not_occured_before (ops, op))
+ {
+ _vnet_dev_rt_exec_op (vm, op);
+ rv++;
+ }
+ else
+ come_back = 1;
+ }
+
+ if (come_back)
+ vlib_node_set_interrupt_pending (vm, node->node_index);
+
+ return rv;
+}
+
+VLIB_REGISTER_NODE (vnet_dev_rt_mgmt_node, static) = {
+ .function = vnet_dev_rt_mgmt_node_fn,
+ .name = "dev-rt-mgmt",
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+
+u8 *
+format_vnet_dev_mgmt_op (u8 *s, va_list *args)
+{
+ vnet_dev_rt_op_t *op = va_arg (*args, vnet_dev_rt_op_t *);
+
+ char *types[] = {
+ [VNET_DEV_RT_OP_TYPE_RX_QUEUE] = "rx queue",
+ };
+ char *actions[] = {
+ [VNET_DEV_RT_OP_ACTION_START] = "start",
+ [VNET_DEV_RT_OP_ACTION_STOP] = "stop",
+ };
+
+ return format (s, "port %u %s %u %s on thread %u",
+ op->rx_queue->port->port_id, types[op->type],
+ op->rx_queue->queue_id, actions[op->action],
+ op->thread_index);
+}
+
+vnet_dev_rv_t
+vnet_dev_rt_exec_ops (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_rt_op_t *ops,
+ u32 n_ops)
+{
+ vnet_dev_rt_op_t *op = ops;
+ vnet_dev_rt_op_t *remote_ops = 0;
+ clib_bitmap_t *remote_bmp = 0;
+ u32 i;
+
+ ASSERT (rt_ops == 0);
+
+ for (op = ops; op < (ops + n_ops); op++)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (op->thread_index);
+
+ if ((vlib_worker_thread_barrier_held ()) ||
+ (op->thread_index == vm->thread_index &&
+ _vnet_dev_rt_op_not_occured_before (ops, op)))
+ {
+ _vnet_dev_rt_exec_op (tvm, op);
+ log_debug (dev, "%U executed locally", format_vnet_dev_mgmt_op, op);
+ continue;
+ }
+
+ vec_add1 (remote_ops, *op);
+ log_debug (dev, "%U enqueued for remote execution",
+ format_vnet_dev_mgmt_op, op);
+ remote_bmp = clib_bitmap_set (remote_bmp, op->thread_index, 1);
+ }
+
+ if (remote_ops == 0)
+ return VNET_DEV_OK;
+
+ __atomic_store_n (&rt_ops, remote_ops, __ATOMIC_RELEASE);
+
+ clib_bitmap_foreach (i, remote_bmp)
+ {
+ vlib_node_set_interrupt_pending (vlib_get_main_by_index (i),
+ vnet_dev_rt_mgmt_node.index);
+ log_debug (dev, "interrupt sent to %s node on thread %u",
+ vnet_dev_rt_mgmt_node.name, i);
+ }
+
+ vec_foreach (op, remote_ops)
+ {
+ while (op->completed == 0)
+ CLIB_PAUSE ();
+ }
+
+ __atomic_store_n (&rt_ops, 0, __ATOMIC_RELAXED);
+ vec_free (remote_ops);
+ clib_bitmap_free (remote_bmp);
+ return VNET_DEV_OK;
+}
diff --git a/src/vnet/dev/types.h b/src/vnet/dev/types.h
new file mode 100644
index 00000000000..1a82c9746ef
--- /dev/null
+++ b/src/vnet/dev/types.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_TYPES_H_
+#define _VNET_DEV_TYPES_H_
+
+#include <vppinfra/types.h>
+#include <vnet/dev/errors.h>
+
+typedef char vnet_dev_device_id_t[32];
+typedef char vnet_dev_if_name_t[32];
+typedef char vnet_dev_driver_name_t[16];
+typedef char vnet_dev_bus_name_t[8];
+typedef u16 vnet_dev_port_id_t;
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+
+typedef enum
+{
+ VNET_DEV_MINUS_OK = 0,
+#define _(n, d) VNET_DEV_ERR_MINUS_##n,
+ foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_minus_rv_t;
+
+typedef enum
+{
+ VNET_DEV_OK = 0,
+#define _(n, d) VNET_DEV_ERR_##n = -(VNET_DEV_ERR_MINUS_##n),
+ foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_rv_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_flag _ (3, NO_STATS, "don't poll device stats")
+
+typedef union
+{
+ enum
+ {
+#define _(b, n, d) VNET_DEV_F_##n = 1ull << (b),
+ foreach_vnet_dev_flag
+#undef _
+ } e;
+ u64 n;
+} vnet_dev_flags_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_port_flag \
+ _ (3, INTERRUPT_MODE, "enable interrupt mode")
+
+typedef union
+{
+ enum
+ {
+#define _(b, n, d) VNET_DEV_PORT_F_##n = 1ull << (b),
+ foreach_vnet_dev_port_flag
+#undef _
+ } e;
+ u64 n;
+} vnet_dev_port_flags_t;
+
+#endif /* _VNET_DEV_TYPES_H_ */
diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c
index 51aba137bf2..0ece84fd9cc 100644
--- a/src/vnet/ethernet/p2p_ethernet.c
+++ b/src/vnet/ethernet/p2p_ethernet.c
@@ -146,6 +146,8 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
vnet_feature_enable_disable ("device-input",
"p2p-ethernet-input",
parent_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "p2p-ethernet-input",
+ parent_if_index, 1, 0, 0);
/* Set promiscuous mode on the l2 interface */
ethernet_set_flags (vnm, parent_if_index,
ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
@@ -176,6 +178,9 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
vnet_feature_enable_disable ("device-input",
"p2p-ethernet-input",
parent_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth",
+ "p2p-ethernet-input",
+ parent_if_index, 0, 0, 0);
/* Disable promiscuous mode on the l2 interface */
ethernet_set_flags (vnm, parent_if_index, 0);
}
diff --git a/src/vnet/handoff.c b/src/vnet/handoff.c
index 5d4ef6f5c1b..61ca30f2962 100644
--- a/src/vnet/handoff.c
+++ b/src/vnet/handoff.c
@@ -244,6 +244,8 @@ interface_handoff_enable_disable (vlib_main_t *vm, u32 sw_if_index,
vnet_feature_enable_disable ("device-input", "worker-handoff",
sw_if_index, enable_disable, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "worker-handoff", sw_if_index,
+ enable_disable, 0, 0);
return rv;
}
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index 02d80996a15..511df4920e4 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -483,12 +483,14 @@ unformat_function_t unformat_vnet_sw_interface_flags;
format_function_t format_vtr;
/* Node runtime for interface output function. */
+struct vnet_dev_tx_queue;
typedef struct
{
u32 hw_if_index;
u32 sw_if_index;
u32 dev_instance;
- u32 is_deleted;
+ u8 is_deleted;
+ struct vnet_dev_tx_queue *tx_queue;
} vnet_interface_output_runtime_t;
/* Interface output function. */
diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c
index 6de4e50a298..5697fb6a8ae 100644
--- a/src/vnet/l2/l2_patch.c
+++ b/src/vnet/l2/l2_patch.c
@@ -270,6 +270,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
vnet_feature_enable_disable ("device-input", "l2-patch",
rxhi->sw_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+ rxhi->sw_if_index, 1, 0, 0);
}
else
{
@@ -278,6 +280,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
vnet_feature_enable_disable ("device-input", "l2-patch",
rxhi->sw_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+ rxhi->sw_if_index, 0, 0, 0);
if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index)
{
l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0;
diff --git a/src/vnet/span/span.c b/src/vnet/span/span.c
index ec47920504a..85ee0c2e566 100644
--- a/src/vnet/span/span.c
+++ b/src/vnet/span/span.c
@@ -87,6 +87,9 @@ span_add_delete_entry (vlib_main_t * vm,
if (enable_rx || disable_rx)
vnet_feature_enable_disable ("device-input", "span-input",
src_sw_if_index, rx, 0, 0);
+ if (enable_rx || disable_rx)
+ vnet_feature_enable_disable ("port-rx-eth", "span-input",
+ src_sw_if_index, rx, 0, 0);
if (enable_tx || disable_tx)
vnet_feature_enable_disable ("interface-output", "span-output",
src_sw_if_index, tx, 0, 0);
diff --git a/src/vppinfra/types.h b/src/vppinfra/types.h
index 42217968dcc..ad85af35ac9 100644
--- a/src/vppinfra/types.h
+++ b/src/vppinfra/types.h
@@ -131,6 +131,9 @@ typedef u32 clib_address_t;
#define CLIB_U32_MAX __UINT32_MAX__
#define CLIB_U64_MAX __UINT64_MAX__
+#define CLIB_F64_MAX __DBL_MAX__
+#define CLIB_F32_MAX __FLT_MAX__
+
#if clib_address_bits == 64
#define CLIB_WORD_MAX CLIB_I64_MAX
#define CLIB_UWORD_MAX CLIB_U64_MAX
@@ -197,11 +200,17 @@ typedef word wordu __attribute__ ((aligned (1), __may_alias__));
typedef uword uwordu __attribute__ ((aligned (1), __may_alias__));
#define foreach_int(__var, ...) \
- for (int __int_array[] = { __VA_ARGS__ }, *__int_ptr = __int_array, \
+ for (int __int_array[] = { __VA_ARGS__, 0 }, *__int_ptr = __int_array, \
__var = *__int_ptr; \
- __int_ptr - ARRAY_LEN (__int_array) < __int_array; \
+ __int_ptr - (ARRAY_LEN (__int_array) - 1) < __int_array; \
__var = *++__int_ptr)
+#define foreach_pointer(__var, ...) \
+ for (void *__ptr_array[] = { __VA_ARGS__, 0 }, **__ptr_ptr = __ptr_array, \
+ *__var = *__ptr_ptr; \
+ __ptr_ptr - (ARRAY_LEN (__ptr_array) - 1) < __ptr_array; \
+ __var = *++__ptr_ptr)
+
#endif /* included_clib_types_h */
/*
diff --git a/src/vppinfra/vec_bootstrap.h b/src/vppinfra/vec_bootstrap.h
index 5c827d4aa5a..5ea7a8003f5 100644
--- a/src/vppinfra/vec_bootstrap.h
+++ b/src/vppinfra/vec_bootstrap.h
@@ -239,8 +239,9 @@ _vec_set_len (void *v, uword len, uword elt_sz)
for ((var) = vec_len ((v)) - 1; (var) >= 0; (var)--)
#define vec_foreach_pointer(e, v) \
- for (typeof (**v) **__ep = (v), *(e) = *__ep; __ep - (v) < vec_len (v); \
- __ep++, (e) = *__ep)
+ if (v) \
+ for (typeof (**v) **__ep = (v), *(e) = *__ep; __ep - (v) < vec_len (v); \
+ __ep++, (e) = *__ep)
#endif /* included_clib_vec_bootstrap_h */