From 16cc51b88a16c14002aebcff9943167b75d79a92 Mon Sep 17 00:00:00 2001 From: Tom Jones Date: Wed, 7 Feb 2024 14:55:20 +0000 Subject: netmap: Reinstate and update netmap plugin Thet netmap plugin was moved to depreciated in commit 998b8fe. On FreeBSD netmap offers a natively supported kernel interface for userspace networking and enables VPP without the use of DPDK. Reinstate the netmap plugin and adapt it to the newer plugin interface. Type: improvement Change-Id: I113daa33a490f04cbb29909f9789fa66284ac80e Signed-off-by: Tom Jones --- MAINTAINERS | 5 + extras/deprecated/netmap/FEATURE.yaml | 12 - extras/deprecated/netmap/cli.c | 236 ------------ extras/deprecated/netmap/device.c | 252 ------------- extras/deprecated/netmap/dir.dox | 27 -- extras/deprecated/netmap/net_netmap.h | 650 ---------------------------------- extras/deprecated/netmap/netmap.api | 56 --- extras/deprecated/netmap/netmap.c | 315 ---------------- extras/deprecated/netmap/netmap.h | 166 --------- extras/deprecated/netmap/netmap_api.c | 137 ------- extras/deprecated/netmap/node.c | 297 ---------------- src/plugins/netmap/CMakeLists.txt | 32 ++ src/plugins/netmap/FEATURE.yaml | 12 + src/plugins/netmap/cli.c | 236 ++++++++++++ src/plugins/netmap/device.c | 252 +++++++++++++ src/plugins/netmap/net_netmap.h | 650 ++++++++++++++++++++++++++++++++++ src/plugins/netmap/netmap.api | 56 +++ src/plugins/netmap/netmap.c | 334 +++++++++++++++++ src/plugins/netmap/netmap.h | 166 +++++++++ src/plugins/netmap/netmap_api.c | 95 +++++ src/plugins/netmap/node.c | 295 +++++++++++++++ src/plugins/netmap/plugin.c | 16 + 22 files changed, 2149 insertions(+), 2148 deletions(-) delete mode 100644 extras/deprecated/netmap/FEATURE.yaml delete mode 100644 extras/deprecated/netmap/cli.c delete mode 100644 extras/deprecated/netmap/device.c delete mode 100644 extras/deprecated/netmap/dir.dox delete mode 100644 extras/deprecated/netmap/net_netmap.h delete mode 100644 extras/deprecated/netmap/netmap.api delete mode 100644 extras/deprecated/netmap/netmap.c delete mode 100644 extras/deprecated/netmap/netmap.h delete mode 100644 extras/deprecated/netmap/netmap_api.c delete mode 100644 extras/deprecated/netmap/node.c create mode 100644 src/plugins/netmap/CMakeLists.txt create mode 100644 src/plugins/netmap/FEATURE.yaml create mode 100644 src/plugins/netmap/cli.c create mode 100644 src/plugins/netmap/device.c create mode 100644 src/plugins/netmap/net_netmap.h create mode 100644 src/plugins/netmap/netmap.api create mode 100644 src/plugins/netmap/netmap.c create mode 100644 src/plugins/netmap/netmap.h create mode 100644 src/plugins/netmap/netmap_api.c create mode 100644 src/plugins/netmap/node.c create mode 100644 src/plugins/netmap/plugin.c diff --git a/MAINTAINERS b/MAINTAINERS index c310e728227..abc7faf7be2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -892,3 +892,8 @@ M: vpp-dev Mailing List C: Missing Maintainer F: * F: */ + +Netmap +I: netmap +M: Tom Jones +F: src/plugins/netmap/ diff --git a/extras/deprecated/netmap/FEATURE.yaml b/extras/deprecated/netmap/FEATURE.yaml deleted file mode 100644 index e23e5c243e7..00000000000 --- a/extras/deprecated/netmap/FEATURE.yaml +++ /dev/null @@ -1,12 +0,0 @@ ---- -name: Netmap Device -maintainer: Damjan Marion -features: - - L4 checksum offload -description: "Create a netmap interface, which is a high speed user-space - interface that allows VPP to patch into a linux namespace, - a linux container, or a physical NIC without the use of DPDK." -missing: - - API dump -state: production -properties: [API, CLI, STATS, MULTITHREAD] diff --git a/extras/deprecated/netmap/cli.c b/extras/deprecated/netmap/cli.c deleted file mode 100644 index 713632947a1..00000000000 --- a/extras/deprecated/netmap/cli.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ -#include -#include -#include - -#include -#include -#include - -#include -#include - -static clib_error_t * -netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u8 *host_if_name = NULL; - u8 hwaddr[6]; - u8 *hw_addr_ptr = 0; - int r; - u8 is_pipe = 0; - u8 is_master = 0; - u32 sw_if_index = ~0; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "name %s", &host_if_name)) - ; - else - if (unformat - (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) - hw_addr_ptr = hwaddr; - else if (unformat (line_input, "pipe")) - is_pipe = 1; - else if (unformat (line_input, "master")) - is_master = 1; - else if (unformat (line_input, "slave")) - is_master = 0; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (host_if_name == NULL) - { - error = clib_error_return (0, "missing host interface name"); - goto done; - } - - r = - netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master, - &sw_if_index); - - if (r == VNET_API_ERROR_SYSCALL_ERROR_1) - { - error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); - goto done; - } - - if (r == VNET_API_ERROR_INVALID_INTERFACE) - { - error = clib_error_return (0, "Invalid interface name"); - goto done; - } - - if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - { - error = clib_error_return (0, "Interface already exists"); - goto done; - } - - vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), - sw_if_index); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * 'netmap' is a framework for very fast packet I/O from userspace. - * 'VALE' is an equally fast in-kernel software switch using the - * netmap API. 'netmap' includes 'netmap pipes', a shared - * memory packet transport channel. Together, they provide a high speed - * user-space interface that allows VPP to patch into a linux namespace, a - * linux container, or a physical NIC without the use of DPDK. Netmap/VALE - * generates the 'netmap.ko' kernel module that needs to be loaded - * before netmap interfaces can be created. - * - https://github.com/luigirizzo/netmap - Netmap/VALE repo. - * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE, - * which is a snapshot of the Netmap/VALE repo with minor changes to work - * with containers and modified kernel drivers to work with NICs. - * - * Create a netmap interface that will attach to a linux interface. - * The interface must already exist. Once created, a new netmap interface - * will exist in VPP with the name 'netmap-', where - * '' takes one of two forms: - * - ifname - Linux interface to bind too. - * - valeXXX:YYY - - * - Where 'valeXXX' is an arbitrary name for a VALE - * interface that must start with 'vale' and is less - * than 16 characters. - * - Where 'YYY' is an existing linux namespace. - * - * This command has the following optional parameters: - * - * - hw-addr - Optional ethernet address, can be in either - * X:X:X:X:X:X unix or X.X.X cisco format. - * - * - pipe - Optional flag to indicate that a 'netmap pipe' - * instance should be created. - * - * - master | slave - Optional flag to indicate whether VPP should - * be the master or slave of the 'netmap pipe'. Only considered - * if 'pipe' is entered. Defaults to 'slave' if not entered. - * - * @cliexpar - * Example of how to create a netmap interface tied to the linux - * namespace 'vpp1': - * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master} - * netmap-vale00:vpp1 - * @cliexend - * Once the netmap interface is created, enable the interface using: - * @cliexcmd{set interface state netmap-vale00:vpp1 up} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (netmap_create_command, static) = { - .path = "create netmap", - .short_help = "create netmap name |valeXXX:YYY " - "[hw-addr ] [pipe] [master|slave]", - .function = netmap_create_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u8 *host_if_name = NULL; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "name %s", &host_if_name)) - ; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (host_if_name == NULL) - { - error = clib_error_return (0, "missing host interface name"); - goto done; - } - - netmap_delete_if (vm, host_if_name); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * Delete a netmap interface. Use the '' to identify - * the netmap interface to be deleted. In VPP, netmap interfaces are - * named as 'netmap-', where '' - * takes one of two forms: - * - ifname - Linux interface to bind too. - * - valeXXX:YYY - - * - Where 'valeXXX' is an arbitrary name for a VALE - * interface that must start with 'vale' and is less - * than 16 characters. - * - Where 'YYY' is an existing linux namespace. - * - * @cliexpar - * Example of how to delete a netmap interface named 'netmap-vale00:vpp1': - * @cliexcmd{delete netmap name vale00:vpp1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (netmap_delete_command, static) = { - .path = "delete netmap", - .short_help = "delete netmap name |valeXXX:YYY", - .function = netmap_delete_command_fn, -}; -/* *INDENT-ON* */ - -clib_error_t * -netmap_cli_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (netmap_cli_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/extras/deprecated/netmap/device.c b/extras/deprecated/netmap/device.c deleted file mode 100644 index 47407aaaa26..00000000000 --- a/extras/deprecated/netmap/device.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include -#include - -#include -#include -#include - -#include -#include - -#define foreach_netmap_tx_func_error \ -_(NO_FREE_SLOTS, "no free tx slots") \ -_(PENDING_MSGS, "pending msgs in tx ring") - -typedef enum -{ -#define _(f,s) NETMAP_TX_ERROR_##f, - foreach_netmap_tx_func_error -#undef _ - NETMAP_TX_N_ERROR, -} netmap_tx_func_error_t; - -static char *netmap_tx_func_error_strings[] = { -#define _(n,s) s, - foreach_netmap_tx_func_error -#undef _ -}; - - -static u8 * -format_netmap_device_name (u8 * s, va_list * args) -{ - u32 i = va_arg (*args, u32); - netmap_main_t *apm = &netmap_main; - netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i); - - s = format (s, "netmap-%s", nif->host_if_name); - return s; -} - -static u8 * -format_netmap_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - int verbose = va_arg (*args, int); - netmap_main_t *nm = &netmap_main; - netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance); - u32 indent = format_get_indent (s); - - s = format (s, "NETMAP interface"); - if (verbose) - { - s = format (s, "\n%U version %d flags 0x%x" - "\n%U region %u memsize 0x%x offset 0x%x" - "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u", - format_white_space, indent + 2, - nif->req->nr_version, - nif->req->nr_flags, - format_white_space, indent + 2, - nif->mem_region, - nif->req->nr_memsize, - nif->req->nr_offset, - format_white_space, indent + 2, - nif->req->nr_tx_slots, - nif->req->nr_rx_slots, - nif->req->nr_tx_rings, nif->req->nr_rx_rings); - } - return s; -} - -static u8 * -format_netmap_tx_trace (u8 * s, va_list * args) -{ - s = format (s, "Unimplemented..."); - return s; -} - -VNET_DEVICE_CLASS_TX_FN (netmap_device_class) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - netmap_main_t *nm = &netmap_main; - u32 *buffers = vlib_frame_vector_args (frame); - u32 n_left = frame->n_vectors; - f64 const time_constant = 1e3; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance); - int cur_ring; - - clib_spinlock_lock_if_init (&nif->lockp); - - cur_ring = nif->first_tx_ring; - - while (n_left && cur_ring <= nif->last_tx_ring) - { - struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring); - int n_free_slots = nm_ring_space (ring); - uint cur = ring->cur; - - if (nm_tx_pending (ring)) - { - if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0) - clib_unix_warning ("NIOCTXSYNC"); - clib_cpu_time_wait (time_constant); - - if (nm_tx_pending (ring) && !n_free_slots) - { - cur_ring++; - continue; - } - } - - while (n_left && n_free_slots) - { - vlib_buffer_t *b0 = 0; - u32 bi = buffers[0]; - u32 len; - u32 offset = 0; - buffers++; - - struct netmap_slot *slot = &ring->slot[cur]; - - do - { - b0 = vlib_get_buffer (vm, bi); - len = b0->current_length; - /* memcpy */ - clib_memcpy_fast ((u8 *) NETMAP_BUF (ring, slot->buf_idx) + - offset, vlib_buffer_get_current (b0), len); - offset += len; - } - while ((bi = b0->next_buffer)); - - slot->len = offset; - cur = (cur + 1) % ring->num_slots; - n_free_slots--; - n_left--; - } - CLIB_MEMORY_BARRIER (); - ring->head = ring->cur = cur; - } - - if (n_left < frame->n_vectors) - ioctl (nif->fd, NIOCTXSYNC, NULL); - - clib_spinlock_unlock_if_init (&nif->lockp); - - if (n_left) - vlib_error_count (vm, node->node_index, - (n_left == - frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS : - NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left); - - vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); - return frame->n_vectors; -} - -static void -netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) -{ - netmap_main_t *apm = &netmap_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); - - /* Shut off redirection */ - if (node_index == ~0) - { - nif->per_interface_next_index = node_index; - return; - } - - nif->per_interface_next_index = - vlib_node_add_next (vlib_get_main (), netmap_input_node.index, - node_index); -} - -static void -netmap_clear_hw_interface_counters (u32 instance) -{ - /* Nothing for now */ -} - -static clib_error_t * -netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - netmap_main_t *apm = &netmap_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); - u32 hw_flags; - - nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - - if (nif->is_admin_up) - hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; - else - hw_flags = 0; - - vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); - - return 0; -} - -static clib_error_t * -netmap_subif_add_del_function (vnet_main_t * vnm, - u32 hw_if_index, - struct vnet_sw_interface_t *st, int is_add) -{ - /* Nothing for now */ - return 0; -} - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (netmap_device_class) = { - .name = "netmap", - .format_device_name = format_netmap_device_name, - .format_device = format_netmap_device, - .format_tx_trace = format_netmap_tx_trace, - .tx_function_n_errors = NETMAP_TX_N_ERROR, - .tx_function_error_strings = netmap_tx_func_error_strings, - .rx_redirect_to_node = netmap_set_interface_next_node, - .clear_counters = netmap_clear_hw_interface_counters, - .admin_up_down_function = netmap_interface_admin_up_down, - .subif_add_del_function = netmap_subif_add_del_function, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/extras/deprecated/netmap/dir.dox b/extras/deprecated/netmap/dir.dox deleted file mode 100644 index 7ddbf947c29..00000000000 --- a/extras/deprecated/netmap/dir.dox +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Doxygen directory documentation */ - -/** -@dir -@brief netmap Interface Implementation. - -This directory contains the source code for the netmap driver. - -*/ -/*? %%clicmd:group_label netmap %% ?*/ -/*? %%syscfg:group_label netmap %% ?*/ diff --git a/extras/deprecated/netmap/net_netmap.h b/extras/deprecated/netmap/net_netmap.h deleted file mode 100644 index fd4253b7c0c..00000000000 --- a/extras/deprecated/netmap/net_netmap.h +++ /dev/null @@ -1,650 +0,0 @@ -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ - * - * Definitions of constants and the structures used by the netmap - * framework, for the part visible to both kernel and userspace. - * Detailed info on netmap is available with "man netmap" or at - * - * http://info.iet.unipi.it/~luigi/netmap/ - * - * This API is also used to communicate with the VALE software switch - */ - -#ifndef _NET_NETMAP_H_ -#define _NET_NETMAP_H_ - -#define NETMAP_API 11 /* current API version */ - -#define NETMAP_MIN_API 11 /* min and max versions accepted */ -#define NETMAP_MAX_API 15 -/* - * Some fields should be cache-aligned to reduce contention. - * The alignment is architecture and OS dependent, but rather than - * digging into OS headers to find the exact value we use an estimate - * that should cover most architectures. - */ -#define NM_CACHE_ALIGN 128 - -/* - * --- Netmap data structures --- - * - * The userspace data structures used by netmap are shown below. - * They are allocated by the kernel and mmap()ed by userspace threads. - * Pointers are implemented as memory offsets or indexes, - * so that they can be easily dereferenced in kernel and userspace. - - KERNEL (opaque, obviously) - - ==================================================================== - | - USERSPACE | struct netmap_ring - +---->+---------------+ - / | head,cur,tail | - struct netmap_if (nifp, 1 per fd) / | buf_ofs | - +---------------+ / | other fields | - | ni_tx_rings | / +===============+ - | ni_rx_rings | / | buf_idx, len | slot[0] - | | / | flags, ptr | - | | / +---------------+ - +===============+ / | buf_idx, len | slot[1] - | txring_ofs[0] | (rel.to nifp)--' | flags, ptr | - | txring_ofs[1] | +---------------+ - (tx+1 entries) (num_slots entries) - | txring_ofs[t] | | buf_idx, len | slot[n-1] - +---------------+ | flags, ptr | - | rxring_ofs[0] | +---------------+ - | rxring_ofs[1] | - (rx+1 entries) - | rxring_ofs[r] | - +---------------+ - - * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to - * a file descriptor, the mmap()ed region contains a (logically readonly) - * struct netmap_if pointing to struct netmap_ring's. - * - * There is one netmap_ring per physical NIC ring, plus one tx/rx ring - * pair attached to the host stack (this pair is unused for non-NIC ports). - * - * All physical/host stack ports share the same memory region, - * so that zero-copy can be implemented between them. - * VALE switch ports instead have separate memory regions. - * - * The netmap_ring is the userspace-visible replica of the NIC ring. - * Each slot has the index of a buffer (MTU-sized and residing in the - * mmapped region), its length and some flags. An extra 64-bit pointer - * is provided for user-supplied buffers in the tx path. - * - * In user space, the buffer address is computed as - * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE - * - * Added in NETMAP_API 11: - * - * + NIOCREGIF can request the allocation of extra spare buffers from - * the same memory pool. The desired number of buffers must be in - * nr_arg3. The ioctl may return fewer buffers, depending on memory - * availability. nr_arg3 will return the actual value, and, once - * mapped, nifp->ni_bufs_head will be the index of the first buffer. - * - * The buffers are linked to each other using the first uint32_t - * as the index. On close, ni_bufs_head must point to the list of - * buffers to be released. - * - * + NIOCREGIF can request space for extra rings (and buffers) - * allocated in the same memory space. The number of extra rings - * is in nr_arg1, and is advisory. This is a no-op on NICs where - * the size of the memory space is fixed. - * - * + NIOCREGIF can attach to PIPE rings sharing the same memory - * space with a parent device. The ifname indicates the parent device, - * which must already exist. Flags in nr_flags indicate if we want to - * bind the master or slave side, the index (from nr_ringid) - * is just a cookie and does not need to be sequential. - * - * + NIOCREGIF can also attach to 'monitor' rings that replicate - * the content of specific rings, also from the same memory space. - * - * Extra flags in nr_flags support the above functions. - * Application libraries may use the following naming scheme: - * netmap:foo all NIC ring pairs - * netmap:foo^ only host ring pair - * netmap:foo+ all NIC ring + host ring pairs - * netmap:foo-k the k-th NIC ring pair - * netmap:foo{k PIPE ring pair k, master side - * netmap:foo}k PIPE ring pair k, slave side - */ - -/* - * struct netmap_slot is a buffer descriptor - */ -struct netmap_slot { - uint32_t buf_idx; /* buffer index */ - uint16_t len; /* length for this slot */ - uint16_t flags; /* buf changed, etc. */ - uint64_t ptr; /* pointer for indirect buffers */ -}; - -/* - * The following flags control how the slot is used - */ - -#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */ - /* - * must be set whenever buf_idx is changed (as it might be - * necessary to recompute the physical address and mapping) - * - * It is also set by the kernel whenever the buf_idx is - * changed internally (e.g., by pipes). Applications may - * use this information to know when they can reuse the - * contents of previously prepared buffers. - */ - -#define NS_REPORT 0x0002 /* ask the hardware to report results */ - /* - * Request notification when slot is used by the hardware. - * Normally transmit completions are handled lazily and - * may be unreported. This flag lets us know when a slot - * has been sent (e.g. to terminate the sender). - */ - -#define NS_FORWARD 0x0004 /* pass packet 'forward' */ - /* - * (Only for physical ports, rx rings with NR_FORWARD set). - * Slot released to the kernel (i.e. before ring->head) with - * this flag set are passed to the peer ring (host/NIC), - * thus restoring the host-NIC connection for these slots. - * This supports efficient traffic monitoring or firewalling. - */ - -#define NS_NO_LEARN 0x0008 /* disable bridge learning */ - /* - * On a VALE switch, do not 'learn' the source port for - * this buffer. - */ - -#define NS_INDIRECT 0x0010 /* userspace buffer */ - /* - * (VALE tx rings only) data is in a userspace buffer, - * whose address is in the 'ptr' field in the slot. - */ - -#define NS_MOREFRAG 0x0020 /* packet has more fragments */ - /* - * (VALE ports only) - * Set on all but the last slot of a multi-segment packet. - * The 'len' field refers to the individual fragment. - */ - -#define NS_PORT_SHIFT 8 -#define NS_PORT_MASK (0xff << NS_PORT_SHIFT) - /* - * The high 8 bits of the flag, if not zero, indicate the - * destination port for the VALE switch, overriding - * the lookup table. - */ - -#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff) - /* - * (VALE rx rings only) the high 8 bits - * are the number of fragments. - */ - - -/* - * struct netmap_ring - * - * Netmap representation of a TX or RX ring (also known as "queue"). - * This is a queue implemented as a fixed-size circular array. - * At the software level the important fields are: head, cur, tail. - * - * In TX rings: - * - * head first slot available for transmission. - * cur wakeup point. select() and poll() will unblock - * when 'tail' moves past 'cur' - * tail (readonly) first slot reserved to the kernel - * - * [head .. tail-1] can be used for new packets to send; - * 'head' and 'cur' must be incremented as slots are filled - * with new packets to be sent; - * 'cur' can be moved further ahead if we need more space - * for new transmissions. XXX todo (2014-03-12) - * - * In RX rings: - * - * head first valid received packet - * cur wakeup point. select() and poll() will unblock - * when 'tail' moves past 'cur' - * tail (readonly) first slot reserved to the kernel - * - * [head .. tail-1] contain received packets; - * 'head' and 'cur' must be incremented as slots are consumed - * and can be returned to the kernel; - * 'cur' can be moved further ahead if we want to wait for - * new packets without returning the previous ones. - * - * DATA OWNERSHIP/LOCKING: - * The netmap_ring, and all slots and buffers in the range - * [head .. tail-1] are owned by the user program; - * the kernel only accesses them during a netmap system call - * and in the user thread context. - * - * Other slots and buffers are reserved for use by the kernel - */ -struct netmap_ring { - /* - * buf_ofs is meant to be used through macros. - * It contains the offset of the buffer region from this - * descriptor. - */ - const int64_t buf_ofs; - const uint32_t num_slots; /* number of slots in the ring. */ - const uint32_t nr_buf_size; - const uint16_t ringid; - const uint16_t dir; /* 0: tx, 1: rx */ - - uint32_t head; /* (u) first user slot */ - uint32_t cur; /* (u) wakeup point */ - uint32_t tail; /* (k) first kernel slot */ - - uint32_t flags; - - struct timeval ts; /* (k) time of last *sync() */ - - /* opaque room for a mutex or similar object */ -#if !defined(_WIN32) || defined(__CYGWIN__) - uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; -#else - uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128]; -#endif - - /* the slots follow. This struct has variable size */ - struct netmap_slot slot[0]; /* array of slots. */ -}; - - -/* - * RING FLAGS - */ -#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ - /* - * updates the 'ts' field on each netmap syscall. This saves - * saves a separate gettimeofday(), and is not much worse than - * software timestamps generated in the interrupt handler. - */ - -#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */ - /* - * Enables the NS_FORWARD slot flag for the ring. - */ - - -/* - * Netmap representation of an interface and its queue(s). - * This is initialized by the kernel when binding a file - * descriptor to a port, and should be considered as readonly - * by user programs. The kernel never uses it. - * - * There is one netmap_if for each file descriptor on which we want - * to select/poll. - * select/poll operates on one or all pairs depending on the value of - * nmr_queueid passed on the ioctl. - */ -struct netmap_if { - char ni_name[IFNAMSIZ]; /* name of the interface. */ - const uint32_t ni_version; /* API version, currently unused */ - const uint32_t ni_flags; /* properties */ -#define NI_PRIV_MEM 0x1 /* private memory region */ - - /* - * The number of packet rings available in netmap mode. - * Physical NICs can have different numbers of tx and rx rings. - * Physical NICs also have a 'host' ring pair. - * Additionally, clients can request additional ring pairs to - * be used for internal communication. - */ - const uint32_t ni_tx_rings; /* number of HW tx rings */ - const uint32_t ni_rx_rings; /* number of HW rx rings */ - - uint32_t ni_bufs_head; /* head index for extra bufs */ - uint32_t ni_spare1[5]; - /* - * The following array contains the offset of each netmap ring - * from this structure, in the following order: - * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; - * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. - * - * The area is filled up by the kernel on NIOCREGIF, - * and then only read by userspace code. - */ - const ssize_t ring_ofs[0]; -}; - - -#ifndef NIOCREGIF -/* - * ioctl names and related fields - * - * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, - * whose identity is set in NIOCREGIF through nr_ringid. - * These are non blocking and take no argument. - * - * NIOCGINFO takes a struct ifreq, the interface name is the input, - * the outputs are number of queues and number of descriptor - * for each queue (useful to set number of threads etc.). - * The info returned is only advisory and may change before - * the interface is bound to a file descriptor. - * - * NIOCREGIF takes an interface name within a struct nmre, - * and activates netmap mode on the interface (if possible). - * - * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we - * can pass it down to other NIC-related ioctls. - * - * The actual argument (struct nmreq) has a number of options to request - * different functions. - * The following are used in NIOCREGIF when nr_cmd == 0: - * - * nr_name (in) - * The name of the port (em0, valeXXX:YYY, etc.) - * limited to IFNAMSIZ for backward compatibility. - * - * nr_version (in/out) - * Must match NETMAP_API as used in the kernel, error otherwise. - * Always returns the desired value on output. - * - * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) - * On input, non-zero values may be used to reconfigure the port - * according to the requested values, but this is not guaranteed. - * On output the actual values in use are reported. - * - * nr_ringid (in) - * Indicates how rings should be bound to the file descriptors. - * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK) - * are used to indicate the ring number, and nr_flags specifies - * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected. - * - * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED: - * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control - * the binding as follows: - * 0 (default) binds all physical rings - * NETMAP_HW_RING | ring number binds a single ring pair - * NETMAP_SW_RING binds only the host tx/rx rings - * - * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push - * packets on tx rings only if POLLOUT is set. - * The default is to push any pending packet. - * - * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release - * packets on rx rings also when POLLIN is NOT set. - * The default is to touch the rx ring only with POLLIN. - * Note that this is the opposite of TX because it - * reflects the common usage. - * - * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead. - * NETMAP_PRIV_MEM is set on return for ports that do not use - * the global memory allocator. - * This information is not significant and applications - * should look at the region id in nr_arg2 - * - * nr_flags is the recommended mode to indicate which rings should - * be bound to a file descriptor. Values are NR_REG_* - * - * nr_arg1 (in) The number of extra rings to be reserved. - * Especially when allocating a VALE port the system only - * allocates the amount of memory needed for the port. - * If more shared memory rings are desired (e.g. for pipes), - * the first invocation for the same basename/allocator - * should specify a suitable number. Memory cannot be - * extended after the first allocation without closing - * all ports on the same region. - * - * nr_arg2 (in/out) The identity of the memory region used. - * On input, 0 means the system decides autonomously, - * other values may try to select a specific region. - * On return the actual value is reported. - * Region '1' is the global allocator, normally shared - * by all interfaces. Other values are private regions. - * If two ports the same region zero-copy is possible. - * - * nr_arg3 (in/out) number of extra buffers to be allocated. - * - * - * - * nr_cmd (in) if non-zero indicates a special command: - * NETMAP_BDG_ATTACH and nr_name = vale*:ifname - * attaches the NIC to the switch; nr_ringid specifies - * which rings to use. Used by vale-ctl -a ... - * nr_arg1 = NETMAP_BDG_HOST also attaches the host port - * as in vale-ctl -h ... - * - * NETMAP_BDG_DETACH and nr_name = vale*:ifname - * disconnects a previously attached NIC. - * Used by vale-ctl -d ... - * - * NETMAP_BDG_LIST - * list the configuration of VALE switches. - * - * NETMAP_BDG_VNET_HDR - * Set the virtio-net header length used by the client - * of a VALE switch port. - * - * NETMAP_BDG_NEWIF - * create a persistent VALE port with name nr_name. - * Used by vale-ctl -n ... - * - * NETMAP_BDG_DELIF - * delete a persistent VALE port. Used by vale-ctl -d ... - * - * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific - * - * - * - */ - - -/* - * struct nmreq overlays a struct ifreq (just the name) - */ -struct nmreq { - char nr_name[IFNAMSIZ]; - uint32_t nr_version; /* API version */ - uint32_t nr_offset; /* nifp offset in the shared region */ - uint32_t nr_memsize; /* size of the shared region */ - uint32_t nr_tx_slots; /* slots in tx rings */ - uint32_t nr_rx_slots; /* slots in rx rings */ - uint16_t nr_tx_rings; /* number of tx rings */ - uint16_t nr_rx_rings; /* number of rx rings */ - - uint16_t nr_ringid; /* ring(s) we care about */ -#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */ -#define NETMAP_SW_RING 0x2000 /* only host ring pair */ - -#define NETMAP_RING_MASK 0x0fff /* the ring number */ - -#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ - -#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */ - - uint16_t nr_cmd; -#define NETMAP_BDG_ATTACH 1 /* attach the NIC */ -#define NETMAP_BDG_DETACH 2 /* detach the NIC */ -#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */ -#define NETMAP_BDG_LIST 4 /* get bridge's info */ -#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */ -#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ -#define NETMAP_BDG_NEWIF 6 /* create a virtual port */ -#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */ -#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */ -#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */ -#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */ -#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ -#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ - uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ -#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ - - uint16_t nr_arg2; - uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */ - uint32_t nr_flags; - /* various modes, extends nr_ringid */ - uint32_t spare2[1]; -}; - -#define NR_REG_MASK 0xf /* values for nr_flags */ -enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ - NR_REG_ALL_NIC = 1, - NR_REG_SW = 2, - NR_REG_NIC_SW = 3, - NR_REG_ONE_NIC = 4, - NR_REG_PIPE_MASTER = 5, - NR_REG_PIPE_SLAVE = 6, -}; -/* monitor uses the NR_REG to select the rings to monitor */ -#define NR_MONITOR_TX 0x100 -#define NR_MONITOR_RX 0x200 -#define NR_ZCOPY_MON 0x400 -/* request exclusive access to the selected rings */ -#define NR_EXCLUSIVE 0x800 -/* request ptnetmap host support */ -#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ -#define NR_PTNETMAP_HOST 0x1000 -#define NR_RX_RINGS_ONLY 0x2000 -#define NR_TX_RINGS_ONLY 0x4000 -/* Applications set this flag if they are able to deal with virtio-net headers, - * that is send/receive frames that start with a virtio-net header. - * If not set, NIOCREGIF will fail with netmap ports that require applications - * to use those headers. If the flag is set, the application can use the - * NETMAP_VNET_HDR_GET command to figure out the header length. */ -#define NR_ACCEPT_VNET_HDR 0x8000 - - -/* - * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined - * in ws2def.h but not sure if they are in the form we need. - * XXX so we redefine them - * in a convenient way to use for DeviceIoControl signatures - */ -#ifdef _WIN32 -#undef _IO // ws2def.h -#define _WIN_NM_IOCTL_TYPE 40000 -#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ - METHOD_BUFFERED, FILE_ANY_ACCESS ) -#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ - METHOD_OUT_DIRECT, FILE_ANY_ACCESS ) - -#define _IOWR(_c, _n, _s) _IO(_c, _n) - -/* We havesome internal sysctl in addition to the externally visible ones */ -#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT -#define NETMAP_POLL _IO('i', 162) - -/* and also two setsockopt for sysctl emulation */ -#define NETMAP_SETSOCKOPT _IO('i', 140) -#define NETMAP_GETSOCKOPT _IO('i', 141) - - -//These linknames are for the Netmap Core Driver -#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP" -#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap" - -//Definition of a structure used to pass a virtual address within an IOCTL -typedef struct _MEMORY_ENTRY { - PVOID pUsermodeVirtualAddress; -} MEMORY_ENTRY, *PMEMORY_ENTRY; - -typedef struct _POLL_REQUEST_DATA { - int events; - int timeout; - int revents; -} POLL_REQUEST_DATA; - -#endif /* _WIN32 */ - -/* - * FreeBSD uses the size value embedded in the _IOWR to determine - * how much to copy in/out. So we need it to match the actual - * data structure we pass. We put some spares in the structure - * to ease compatibility with other versions - */ -#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ -#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ -#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ -#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ -#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */ -#endif /* !NIOCREGIF */ - - -/* - * Helper functions for kernel and userspace - */ - -/* - * check if space is available in the ring. - */ -static inline int -nm_ring_empty(struct netmap_ring *ring) -{ - return (ring->cur == ring->tail); -} - -/* - * Opaque structure that is passed to an external kernel - * module via ioctl(fd, NIOCCONFIG, req) for a user-owned - * bridge port (at this point ephemeral VALE interface). - */ -#define NM_IFRDATA_LEN 256 -struct nm_ifreq { - char nifr_name[IFNAMSIZ]; - char data[NM_IFRDATA_LEN]; -}; - -/* - * netmap kernel thread configuration - */ -/* bhyve/vmm.ko MSIX parameters for IOCTL */ -struct ptn_vmm_ioctl_msix { - uint64_t msg; - uint64_t addr; -}; - -/* IOCTL parameters */ -struct nm_kth_ioctl { - u_long com; - /* TODO: use union */ - union { - struct ptn_vmm_ioctl_msix msix; - } data; -}; - -/* Configuration of a ptnetmap ring */ -struct ptnet_ring_cfg { - uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */ - uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */ - struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */ -}; -#endif /* _NET_NETMAP_H_ */ diff --git a/extras/deprecated/netmap/netmap.api b/extras/deprecated/netmap/netmap.api deleted file mode 100644 index a14753cad9c..00000000000 --- a/extras/deprecated/netmap/netmap.api +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -option version = "1.0.0"; - -/** \brief Create netmap - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param netmap_if_name - interface name - @param hw_addr - interface MAC - @param use_random_hw_addr - use random generated MAC - @param is_pipe - is pipe - @param is_master - 0=slave, 1=master -*/ -autoreply define netmap_create -{ - u32 client_index; - u32 context; - - u8 netmap_if_name[64]; - u8 hw_addr[6]; - u8 use_random_hw_addr; - u8 is_pipe; - u8 is_master; -}; - -/** \brief Delete netmap - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param netmap_if_name - interface name -*/ -autoreply define netmap_delete -{ - u32 client_index; - u32 context; - - u8 netmap_if_name[64]; -}; - -/* - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/extras/deprecated/netmap/netmap.c b/extras/deprecated/netmap/netmap.c deleted file mode 100644 index 7cab6bb0289..00000000000 --- a/extras/deprecated/netmap/netmap.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -netmap_main_t netmap_main; - -static u32 -netmap_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, - u32 flags) -{ - /* nothing for now */ - return 0; -} - -static clib_error_t * -netmap_fd_read_ready (clib_file_t * uf) -{ - vlib_main_t *vm = vlib_get_main (); - netmap_main_t *nm = &netmap_main; - u32 idx = uf->private_data; - - nm->pending_input_bitmap = - clib_bitmap_set (nm->pending_input_bitmap, idx, 1); - - /* Schedule the rx node */ - vlib_node_set_interrupt_pending (vm, netmap_input_node.index); - - return 0; -} - -static void -close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) -{ - if (nif->clib_file_index != ~0) - { - clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index); - nif->clib_file_index = ~0; - } - else if (nif->fd > -1) - close (nif->fd); - - if (nif->mem_region) - { - netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region]; - if (--reg->refcnt == 0) - { - munmap (reg->mem, reg->region_size); - reg->region_size = 0; - } - } - - - mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name, - &nif->if_index); - vec_free (nif->host_if_name); - vec_free (nif->req); - - clib_memset (nif, 0, sizeof (*nif)); - pool_put (nm->interfaces, nif); -} - -int -netmap_worker_thread_enable () -{ - /* if worker threads are enabled, switch to polling mode */ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - netmap_input_node.index, - VLIB_NODE_STATE_POLLING); - })); - - return 0; -} - -int -netmap_worker_thread_disable () -{ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - netmap_input_node.index, - VLIB_NODE_STATE_INTERRUPT); - })); - - return 0; -} - -int -netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, - u8 is_pipe, u8 is_master, u32 * sw_if_index) -{ - netmap_main_t *nm = &netmap_main; - int ret = 0; - netmap_if_t *nif = 0; - u8 hw_addr[6]; - clib_error_t *error = 0; - vnet_sw_interface_t *sw; - vnet_main_t *vnm = vnet_get_main (); - uword *p; - struct nmreq *req = 0; - netmap_mem_region_t *reg; - vlib_thread_main_t *tm = vlib_get_thread_main (); - int fd; - - p = mhash_get (&nm->if_index_by_host_if_name, if_name); - if (p) - return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; - - fd = open ("/dev/netmap", O_RDWR); - if (fd < 0) - return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; - - pool_get (nm->interfaces, nif); - nif->if_index = nif - nm->interfaces; - nif->fd = fd; - nif->clib_file_index = ~0; - - vec_validate (req, 0); - nif->req = req; - req->nr_version = NETMAP_API; - req->nr_flags = NR_REG_ALL_NIC; - - if (is_pipe) - req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE; - else - req->nr_flags = NR_REG_ALL_NIC; - - req->nr_flags |= NR_ACCEPT_VNET_HDR; - snprintf (req->nr_name, IFNAMSIZ, "%s", if_name); - req->nr_name[IFNAMSIZ - 1] = 0; - - if (ioctl (nif->fd, NIOCREGIF, req)) - { - ret = VNET_API_ERROR_NOT_CONNECTED; - goto error; - } - - nif->mem_region = req->nr_arg2; - vec_validate (nm->mem_regions, nif->mem_region); - reg = &nm->mem_regions[nif->mem_region]; - if (reg->region_size == 0) - { - reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - clib_warning ("mem %p", reg->mem); - if (reg->mem == MAP_FAILED) - { - ret = VNET_API_ERROR_NOT_CONNECTED; - goto error; - } - reg->region_size = req->nr_memsize; - } - reg->refcnt++; - - nif->nifp = NETMAP_IF (reg->mem, req->nr_offset); - nif->first_rx_ring = 0; - nif->last_rx_ring = 0; - nif->first_tx_ring = 0; - nif->last_tx_ring = 0; - nif->host_if_name = if_name; - nif->per_interface_next_index = ~0; - - if (tm->n_vlib_mains > 1) - clib_spinlock_init (&nif->lockp); - - { - clib_file_t template = { 0 }; - template.read_function = netmap_fd_read_ready; - template.file_descriptor = nif->fd; - template.private_data = nif->if_index; - template.description = format (0, "netmap socket"); - nif->clib_file_index = clib_file_add (&file_main, &template); - } - - /*use configured or generate random MAC address */ - if (hw_addr_set) - memcpy (hw_addr, hw_addr_set, 6); - else - { - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - - memcpy (hw_addr + 2, &rnd, sizeof (rnd)); - hw_addr[0] = 2; - hw_addr[1] = 0xfe; - } - - error = ethernet_register_interface (vnm, netmap_device_class.index, - nif->if_index, hw_addr, - &nif->hw_if_index, - netmap_eth_flag_change); - - if (error) - { - clib_error_report (error); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index); - nif->sw_if_index = sw->sw_if_index; - - mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0); - - if (sw_if_index) - *sw_if_index = nif->sw_if_index; - - if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1) - netmap_worker_thread_enable (); - - return 0; - -error: - close_netmap_if (nm, nif); - return ret; -} - -int -netmap_delete_if (vlib_main_t * vm, u8 * host_if_name) -{ - vnet_main_t *vnm = vnet_get_main (); - netmap_main_t *nm = &netmap_main; - netmap_if_t *nif; - uword *p; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - p = mhash_get (&nm->if_index_by_host_if_name, host_if_name); - if (p == NULL) - { - clib_warning ("Host interface %s does not exist", host_if_name); - return VNET_API_ERROR_SYSCALL_ERROR_1; - } - nif = pool_elt_at_index (nm->interfaces, p[0]); - - /* bring down the interface */ - vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0); - - ethernet_delete_interface (vnm, nif->hw_if_index); - - close_netmap_if (nm, nif); - - if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0) - netmap_worker_thread_disable (); - - return 0; -} - -static clib_error_t * -netmap_init (vlib_main_t * vm) -{ - netmap_main_t *nm = &netmap_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - uword *p; - - clib_memset (nm, 0, sizeof (netmap_main_t)); - - nm->input_cpu_first_index = 0; - nm->input_cpu_count = 1; - - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - nm->input_cpu_first_index = tr->first_index; - nm->input_cpu_count = tr->count; - } - - mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword)); - - vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - return 0; -} - -VLIB_INIT_FUNCTION (netmap_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/extras/deprecated/netmap/netmap.h b/extras/deprecated/netmap/netmap.h deleted file mode 100644 index 29f855fda8e..00000000000 --- a/extras/deprecated/netmap/netmap.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ -/* - * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - clib_spinlock_t lockp; - u8 *host_if_name; - uword if_index; - u32 hw_if_index; - u32 sw_if_index; - u32 clib_file_index; - - u32 per_interface_next_index; - u8 is_admin_up; - - /* netmap */ - struct nmreq *req; - u16 mem_region; - int fd; - struct netmap_if *nifp; - u16 first_tx_ring; - u16 last_tx_ring; - u16 first_rx_ring; - u16 last_rx_ring; - -} netmap_if_t; - -typedef struct -{ - char *mem; - u32 region_size; - int refcnt; -} netmap_mem_region_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - netmap_if_t *interfaces; - - /* bitmap of pending rx interfaces */ - uword *pending_input_bitmap; - - /* rx buffer cache */ - u32 **rx_buffers; - - /* hash of host interface names */ - mhash_t if_index_by_host_if_name; - - /* vector of memory regions */ - netmap_mem_region_t *mem_regions; - - /* first cpu index */ - u32 input_cpu_first_index; - - /* total cpu count */ - u32 input_cpu_count; -} netmap_main_t; - -extern netmap_main_t netmap_main; -extern vnet_device_class_t netmap_device_class; -extern vlib_node_registration_t netmap_input_node; - -int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, - u8 is_pipe, u8 is_master, u32 * sw_if_index); -int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name); - - -/* Macros and helper functions from sys/net/netmap_user.h */ - -#ifdef _NET_NETMAP_H_ - -#define _NETMAP_OFFSET(type, ptr, offset) \ - ((type)(void *)((char *)(ptr) + (offset))) - -#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) - -#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ - nifp, (nifp)->ring_ofs[index] ) - -#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ - nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) - -#define NETMAP_BUF(ring, index) \ - ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) - -#define NETMAP_BUF_IDX(ring, buf) \ - ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ - (ring)->nr_buf_size ) - -static inline uint32_t -nm_ring_next (struct netmap_ring *ring, uint32_t i) -{ - return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1); -} - - -/* - * Return 1 if we have pending transmissions in the tx ring. - * When everything is complete ring->head = ring->tail + 1 (modulo ring size) - */ -static inline int -nm_tx_pending (struct netmap_ring *ring) -{ - return nm_ring_next (ring, ring->tail) != ring->head; -} - -static inline uint32_t -nm_ring_space (struct netmap_ring *ring) -{ - int ret = ring->tail - ring->cur; - if (ret < 0) - ret += ring->num_slots; - return ret; -} -#endif - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/extras/deprecated/netmap/netmap_api.c b/extras/deprecated/netmap/netmap_api.c deleted file mode 100644 index ee05ec22d25..00000000000 --- a/extras/deprecated/netmap/netmap_api.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - *------------------------------------------------------------------ - * netmap_api.c - netmap api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include - -#include -#include -#include - -#include - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -#define vl_endianfun /* define message structures */ -#include -#undef vl_endianfun - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include -#undef vl_printfun - -#include - -#define foreach_vpe_api_msg \ -_(NETMAP_CREATE, netmap_create) \ -_(NETMAP_DELETE, netmap_delete) \ - -static void -vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - vl_api_netmap_create_reply_t *rmp; - int rv = 0; - u8 *if_name = NULL; - - if_name = format (0, "%s", mp->netmap_if_name); - vec_add1 (if_name, 0); - - rv = - netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr, - mp->is_pipe, mp->is_master, 0); - - vec_free (if_name); - - REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY); -} - -static void -vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - vl_api_netmap_delete_reply_t *rmp; - int rv = 0; - u8 *if_name = NULL; - - if_name = format (0, "%s", mp->netmap_if_name); - vec_add1 (if_name, 0); - - rv = netmap_delete_if (vm, if_name); - - vec_free (if_name); - - REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY); -} - -/* - * netmap_api_hookup - * Add vpe's API message handlers to the table. - * vlib has already mapped shared memory and - * added the client registration handlers. - * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() - */ -#define vl_msg_name_crc_list -#include -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (api_main_t * am) -{ -#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); - foreach_vl_msg_name_crc_netmap; -#undef _ -} - -static clib_error_t * -netmap_api_hookup (vlib_main_t * vm) -{ - api_main_t *am = vlibapi_get_main (); - -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_api_msg; -#undef _ - - /* - * Set up the (msg_name, crc, message-id) table - */ - setup_message_id_table (am); - - return 0; -} - -VLIB_API_INIT_FUNCTION (netmap_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/extras/deprecated/netmap/node.c b/extras/deprecated/netmap/node.c deleted file mode 100644 index b0a68824b9c..00000000000 --- a/extras/deprecated/netmap/node.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#define foreach_netmap_input_error - -typedef enum -{ -#define _(f,s) NETMAP_INPUT_ERROR_##f, - foreach_netmap_input_error -#undef _ - NETMAP_INPUT_N_ERROR, -} netmap_input_error_t; - -static char *netmap_input_error_strings[] = { -#define _(n,s) s, - foreach_netmap_input_error -#undef _ -}; - -typedef struct -{ - u32 next_index; - u32 hw_if_index; - struct netmap_slot slot; -} netmap_input_trace_t; - -static u8 * -format_netmap_input_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *); - u32 indent = format_get_indent (s); - - s = format (s, "netmap: hw_if_index %d next-index %d", - t->hw_if_index, t->next_index); - s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u", - format_white_space, indent + 2, - t->slot.flags, t->slot.len, t->slot.buf_idx); - return s; -} - -always_inline void -buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); - vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); - - /* update first buffer */ - first_b->total_length_not_including_first_buffer += b->current_length; - - /* update previous buffer */ - prev_b->next_buffer = bi; - prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; - - /* update current buffer */ - b->next_buffer = 0; -} - -always_inline uword -netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, netmap_if_t * nif) -{ - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - uword n_trace = vlib_get_trace_count (vm, node); - netmap_main_t *nm = &netmap_main; - u32 n_rx_packets = 0; - u32 n_rx_bytes = 0; - u32 *to_next = 0; - u32 n_free_bufs; - struct netmap_ring *ring; - int cur_ring; - u32 thread_index = vm->thread_index; - u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); - - if (nif->per_interface_next_index != ~0) - next_index = nif->per_interface_next_index; - - n_free_bufs = vec_len (nm->rx_buffers[thread_index]); - if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) - { - vec_validate (nm->rx_buffers[thread_index], - VLIB_FRAME_SIZE + n_free_bufs - 1); - n_free_bufs += - vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs], - VLIB_FRAME_SIZE); - _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs; - } - - cur_ring = nif->first_rx_ring; - while (cur_ring <= nif->last_rx_ring && n_free_bufs) - { - int r = 0; - u32 cur_slot_index; - ring = NETMAP_RXRING (nif->nifp, cur_ring); - r = nm_ring_space (ring); - - if (!r) - { - cur_ring++; - continue; - } - - if (r > n_free_bufs) - r = n_free_bufs; - - cur_slot_index = ring->cur; - while (r) - { - u32 n_left_to_next; - u32 next0 = next_index; - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (r && n_left_to_next) - { - vlib_buffer_t *first_b0 = 0; - u32 offset = 0; - u32 bi0 = 0, first_bi0 = 0, prev_bi0; - u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots; - u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots; - struct netmap_slot *slot = &ring->slot[cur_slot_index]; - u32 data_len = slot->len; - - /* prefetch 2 slots in advance */ - CLIB_PREFETCH (&ring->slot[next2_slot_index], - CLIB_CACHE_LINE_BYTES, LOAD); - /* prefetch start of next packet */ - CLIB_PREFETCH (NETMAP_BUF - (ring, ring->slot[next_slot_index].buf_idx), - CLIB_CACHE_LINE_BYTES, LOAD); - - while (data_len && n_free_bufs) - { - vlib_buffer_t *b0; - /* grab free buffer */ - u32 last_empty_buffer = - vec_len (nm->rx_buffers[thread_index]) - 1; - prev_bi0 = bi0; - bi0 = nm->rx_buffers[thread_index][last_empty_buffer]; - b0 = vlib_get_buffer (vm, bi0); - _vec_len (nm->rx_buffers[thread_index]) = last_empty_buffer; - n_free_bufs--; - - /* copy data */ - u32 bytes_to_copy = - data_len > n_buffer_bytes ? n_buffer_bytes : data_len; - b0->current_data = 0; - clib_memcpy_fast (vlib_buffer_get_current (b0), - (u8 *) NETMAP_BUF (ring, slot->buf_idx) + - offset, bytes_to_copy); - - /* fill buffer header */ - b0->current_length = bytes_to_copy; - - if (offset == 0) - { - b0->total_length_not_including_first_buffer = 0; - b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; - vnet_buffer (b0)->sw_if_index[VLIB_RX] = - nif->sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - first_bi0 = bi0; - first_b0 = vlib_get_buffer (vm, first_bi0); - } - else - buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0); - - offset += bytes_to_copy; - data_len -= bytes_to_copy; - } - - /* trace */ - if (PREDICT_FALSE (n_trace > 0)) - { - if (PREDICT_TRUE (first_b0 != 0)) - { - netmap_input_trace_t *tr; - vlib_trace_buffer (vm, node, next0, first_b0, - /* follow_chain */ 0); - vlib_set_trace_count (vm, node, --n_trace); - tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); - tr->next_index = next0; - tr->hw_if_index = nif->hw_if_index; - memcpy (&tr->slot, slot, sizeof (struct netmap_slot)); - } - } - - /* redirect if feature path enabled */ - vnet_feature_start_device_input_x1 (nif->sw_if_index, &next0, - first_b0); - - /* enque and take next packet */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, first_bi0, - next0); - - /* next packet */ - n_rx_packets++; - n_rx_bytes += slot->len; - to_next[0] = first_bi0; - to_next += 1; - n_left_to_next--; - cur_slot_index = next_slot_index; - - r--; - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - ring->head = ring->cur = cur_slot_index; - cur_ring++; - } - - if (n_rx_packets) - ioctl (nif->fd, NIOCRXSYNC, NULL); - - vlib_increment_combined_counter - (vnet_get_main ()->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, - vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes); - - vnet_device_increment_rx_packets (thread_index, n_rx_packets); - - return n_rx_packets; -} - -VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - int i; - u32 n_rx_packets = 0; - u32 thread_index = vm->thread_index; - netmap_main_t *nm = &netmap_main; - netmap_if_t *nmi; - - for (i = 0; i < vec_len (nm->interfaces); i++) - { - nmi = vec_elt_at_index (nm->interfaces, i); - if (nmi->is_admin_up && - (i % nm->input_cpu_count) == - (thread_index - nm->input_cpu_first_index)) - n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi); - } - - return n_rx_packets; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (netmap_input_node) = { - .name = "netmap-input", - .sibling_of = "device-input", - .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, - .format_trace = format_netmap_input_trace, - .type = VLIB_NODE_TYPE_INPUT, - /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */ - .state = VLIB_NODE_STATE_INTERRUPT, - .n_errors = NETMAP_INPUT_N_ERROR, - .error_strings = netmap_input_error_strings, -}; -/* *INDENT-ON* */ - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/netmap/CMakeLists.txt b/src/plugins/netmap/CMakeLists.txt new file mode 100644 index 00000000000..d53a9e0911a --- /dev/null +++ b/src/plugins/netmap/CMakeLists.txt @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2024 Tom Jones +# +# This software was developed by Tom Jones under sponsorship +# from the FreeBSD Foundation. +# + +if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") + message(WARNING "Netmap is only currently support on FreeBSD - netmap plugin disabled") + return() +endif() + +add_vpp_plugin(netmap + SOURCES + plugin.c + netmap.c + node.c + device.c + cli.c + netmap_api.c + + MULTIARCH_SOURCES + node.c + device.c + + INSTALL_HEADERS + netmap.h + net_netmap.h + + API_FILES + netmap.api +) diff --git a/src/plugins/netmap/FEATURE.yaml b/src/plugins/netmap/FEATURE.yaml new file mode 100644 index 00000000000..a9dfb2163e4 --- /dev/null +++ b/src/plugins/netmap/FEATURE.yaml @@ -0,0 +1,12 @@ +--- +name: Netmap Device +maintainer: Tom Jones +features: + - L4 checksum offload +description: "Create a netmap interface, which is a high speed user-space + interface that allows VPP to patch to a physical or virtual NIC + without the use of DPDK" +missing: + - API dump +state: production +properties: [API, CLI, STATS, MULTITHREAD] diff --git a/src/plugins/netmap/cli.c b/src/plugins/netmap/cli.c new file mode 100644 index 00000000000..b54d397ecbe --- /dev/null +++ b/src/plugins/netmap/cli.c @@ -0,0 +1,236 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include +#include +#include + +#include +#include +#include + +#include +#include + +static clib_error_t * +netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + u8 hwaddr[6]; + u8 *hw_addr_ptr = 0; + int r; + u8 is_pipe = 0; + u8 is_master = 0; + u32 sw_if_index = ~0; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + if (unformat + (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) + hw_addr_ptr = hwaddr; + else if (unformat (line_input, "pipe")) + is_pipe = 1; + else if (unformat (line_input, "master")) + is_master = 1; + else if (unformat (line_input, "slave")) + is_master = 0; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (host_if_name == NULL) + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } + + r = + netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master, + &sw_if_index); + + if (r == VNET_API_ERROR_SYSCALL_ERROR_1) + { + error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + goto done; + } + + if (r == VNET_API_ERROR_INVALID_INTERFACE) + { + error = clib_error_return (0, "Invalid interface name"); + goto done; + } + + if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) + { + error = clib_error_return (0, "Interface already exists"); + goto done; + } + + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * 'netmap' is a framework for very fast packet I/O from userspace. + * 'VALE' is an equally fast in-kernel software switch using the + * netmap API. 'netmap' includes 'netmap pipes', a shared + * memory packet transport channel. Together, they provide a high speed + * user-space interface that allows VPP to patch into a linux namespace, a + * linux container, or a physical NIC without the use of DPDK. Netmap/VALE + * generates the 'netmap.ko' kernel module that needs to be loaded + * before netmap interfaces can be created. + * - https://github.com/luigirizzo/netmap - Netmap/VALE repo. + * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE, + * which is a snapshot of the Netmap/VALE repo with minor changes to work + * with containers and modified kernel drivers to work with NICs. + * + * Create a netmap interface that will attach to a linux interface. + * The interface must already exist. Once created, a new netmap interface + * will exist in VPP with the name 'netmap-', where + * '' takes one of two forms: + * - ifname - Linux interface to bind too. + * - valeXXX:YYY - + * - Where 'valeXXX' is an arbitrary name for a VALE + * interface that must start with 'vale' and is less + * than 16 characters. + * - Where 'YYY' is an existing linux namespace. + * + * This command has the following optional parameters: + * + * - hw-addr - Optional ethernet address, can be in either + * X:X:X:X:X:X unix or X.X.X cisco format. + * + * - pipe - Optional flag to indicate that a 'netmap pipe' + * instance should be created. + * + * - master | slave - Optional flag to indicate whether VPP should + * be the master or slave of the 'netmap pipe'. Only considered + * if 'pipe' is entered. Defaults to 'slave' if not entered. + * + * @cliexpar + * Example of how to create a netmap interface tied to the linux + * namespace 'vpp1': + * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master} + * netmap-vale00:vpp1 + * @cliexend + * Once the netmap interface is created, enable the interface using: + * @cliexcmd{set interface state netmap-vale00:vpp1 up} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_create_command, static) = { + .path = "create netmap", + .short_help = "create netmap name |valeXXX:YYY " + "[hw-addr ] [pipe] [master|slave]", + .function = netmap_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (host_if_name == NULL) + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } + + netmap_delete_if (vm, host_if_name); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * Delete a netmap interface. Use the '' to identify + * the netmap interface to be deleted. In VPP, netmap interfaces are + * named as 'netmap-', where '' + * takes one of two forms: + * - ifname - Linux interface to bind too. + * - valeXXX:YYY - + * - Where 'valeXXX' is an arbitrary name for a VALE + * interface that must start with 'vale' and is less + * than 16 characters. + * - Where 'YYY' is an existing linux namespace. + * + * @cliexpar + * Example of how to delete a netmap interface named 'netmap-vale00:vpp1': + * @cliexcmd{delete netmap name vale00:vpp1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_delete_command, static) = { + .path = "delete netmap", + .short_help = "delete netmap name |valeXXX:YYY", + .function = netmap_delete_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +netmap_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (netmap_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/device.c b/src/plugins/netmap/device.c new file mode 100644 index 00000000000..505deb988c4 --- /dev/null +++ b/src/plugins/netmap/device.c @@ -0,0 +1,252 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#define foreach_netmap_tx_func_error \ +_(NO_FREE_SLOTS, "no free tx slots") \ +_(PENDING_MSGS, "pending msgs in tx ring") + +typedef enum +{ +#define _(f,s) NETMAP_TX_ERROR_##f, + foreach_netmap_tx_func_error +#undef _ + NETMAP_TX_N_ERROR, +} netmap_tx_func_error_t; + +static char *netmap_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_netmap_tx_func_error +#undef _ +}; + + +static u8 * +format_netmap_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + netmap_main_t *apm = &netmap_main; + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i); + + s = format (s, "netmap-%s", nif->host_if_name); + return s; +} + +static u8 * +format_netmap_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance); + u32 indent = format_get_indent (s); + + s = format (s, "NETMAP interface"); + if (verbose) + { + s = format (s, "\n%U version %d flags 0x%x" + "\n%U region %u memsize 0x%x offset 0x%x" + "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u", + format_white_space, indent + 2, + nif->req->nr_version, + nif->req->nr_flags, + format_white_space, indent + 2, + nif->mem_region, + nif->req->nr_memsize, + nif->req->nr_offset, + format_white_space, indent + 2, + nif->req->nr_tx_slots, + nif->req->nr_rx_slots, + nif->req->nr_tx_rings, nif->req->nr_rx_rings); + } + return s; +} + +static u8 * +format_netmap_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + +VNET_DEVICE_CLASS_TX_FN (netmap_device_class) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + netmap_main_t *nm = &netmap_main; + u32 *buffers = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + f64 const time_constant = 1e3; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance); + int cur_ring; + + clib_spinlock_lock_if_init (&nif->lockp); + + cur_ring = nif->first_tx_ring; + + while (n_left && cur_ring <= nif->last_tx_ring) + { + struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring); + int n_free_slots = nm_ring_space (ring); + uint cur = ring->cur; + + if (nm_tx_pending (ring)) + { + if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0) + clib_unix_warning ("NIOCTXSYNC"); + clib_cpu_time_wait (time_constant); + + if (nm_tx_pending (ring) && !n_free_slots) + { + cur_ring++; + continue; + } + } + + while (n_left && n_free_slots) + { + vlib_buffer_t *b0 = 0; + u32 bi = buffers[0]; + u32 len; + u32 offset = 0; + buffers++; + + struct netmap_slot *slot = &ring->slot[cur]; + + do + { + b0 = vlib_get_buffer (vm, bi); + len = b0->current_length; + /* memcpy */ + clib_memcpy_fast ((u8 *) NETMAP_BUF (ring, slot->buf_idx) + + offset, vlib_buffer_get_current (b0), len); + offset += len; + } + while ((bi = b0->next_buffer)); + + slot->len = offset; + cur = (cur + 1) % ring->num_slots; + n_free_slots--; + n_left--; + } + CLIB_MEMORY_BARRIER (); + ring->head = ring->cur = cur; + } + + if (n_left < frame->n_vectors) + ioctl (nif->fd, NIOCTXSYNC, NULL); + + clib_spinlock_unlock_if_init (&nif->lockp); + + if (n_left) + vlib_error_count (vm, node->node_index, + (n_left == + frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS : + NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left); + + vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); + return frame->n_vectors; +} + +static void +netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + nif->per_interface_next_index = node_index; + return; + } + + nif->per_interface_next_index = + vlib_node_add_next (vlib_get_main (), netmap_input_node.index, + node_index); +} + +static void +netmap_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + u32 hw_flags; + + nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + if (nif->is_admin_up) + hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; + else + hw_flags = 0; + + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + + return 0; +} + +static clib_error_t * +netmap_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (netmap_device_class) = { + .name = "netmap", + .format_device_name = format_netmap_device_name, + .format_device = format_netmap_device, + .format_tx_trace = format_netmap_tx_trace, + .tx_function_n_errors = NETMAP_TX_N_ERROR, + .tx_function_error_strings = netmap_tx_func_error_strings, + .rx_redirect_to_node = netmap_set_interface_next_node, + .clear_counters = netmap_clear_hw_interface_counters, + .admin_up_down_function = netmap_interface_admin_up_down, + .subif_add_del_function = netmap_subif_add_del_function, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/net_netmap.h b/src/plugins/netmap/net_netmap.h new file mode 100644 index 00000000000..ecccedd4484 --- /dev/null +++ b/src/plugins/netmap/net_netmap.h @@ -0,0 +1,650 @@ +/* + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ + * + * Definitions of constants and the structures used by the netmap + * framework, for the part visible to both kernel and userspace. + * Detailed info on netmap is available with "man netmap" or at + * + * http://info.iet.unipi.it/~luigi/netmap/ + * + * This API is also used to communicate with the VALE software switch + */ + +#ifndef _NET_NETMAP_H_ +#define _NET_NETMAP_H_ + +#define NETMAP_API 14 /* current API version */ + +#define NETMAP_MIN_API 14 /* min and max versions accepted */ +#define NETMAP_MAX_API 15 +/* + * Some fields should be cache-aligned to reduce contention. + * The alignment is architecture and OS dependent, but rather than + * digging into OS headers to find the exact value we use an estimate + * that should cover most architectures. + */ +#define NM_CACHE_ALIGN 128 + +/* + * --- Netmap data structures --- + * + * The userspace data structures used by netmap are shown below. + * They are allocated by the kernel and mmap()ed by userspace threads. + * Pointers are implemented as memory offsets or indexes, + * so that they can be easily dereferenced in kernel and userspace. + + KERNEL (opaque, obviously) + + ==================================================================== + | + USERSPACE | struct netmap_ring + +---->+---------------+ + / | head,cur,tail | + struct netmap_if (nifp, 1 per fd) / | buf_ofs | + +---------------+ / | other fields | + | ni_tx_rings | / +===============+ + | ni_rx_rings | / | buf_idx, len | slot[0] + | | / | flags, ptr | + | | / +---------------+ + +===============+ / | buf_idx, len | slot[1] + | txring_ofs[0] | (rel.to nifp)--' | flags, ptr | + | txring_ofs[1] | +---------------+ + (tx+1 entries) (num_slots entries) + | txring_ofs[t] | | buf_idx, len | slot[n-1] + +---------------+ | flags, ptr | + | rxring_ofs[0] | +---------------+ + | rxring_ofs[1] | + (rx+1 entries) + | rxring_ofs[r] | + +---------------+ + + * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to + * a file descriptor, the mmap()ed region contains a (logically readonly) + * struct netmap_if pointing to struct netmap_ring's. + * + * There is one netmap_ring per physical NIC ring, plus one tx/rx ring + * pair attached to the host stack (this pair is unused for non-NIC ports). + * + * All physical/host stack ports share the same memory region, + * so that zero-copy can be implemented between them. + * VALE switch ports instead have separate memory regions. + * + * The netmap_ring is the userspace-visible replica of the NIC ring. + * Each slot has the index of a buffer (MTU-sized and residing in the + * mmapped region), its length and some flags. An extra 64-bit pointer + * is provided for user-supplied buffers in the tx path. + * + * In user space, the buffer address is computed as + * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE + * + * Added in NETMAP_API 11: + * + * + NIOCREGIF can request the allocation of extra spare buffers from + * the same memory pool. The desired number of buffers must be in + * nr_arg3. The ioctl may return fewer buffers, depending on memory + * availability. nr_arg3 will return the actual value, and, once + * mapped, nifp->ni_bufs_head will be the index of the first buffer. + * + * The buffers are linked to each other using the first uint32_t + * as the index. On close, ni_bufs_head must point to the list of + * buffers to be released. + * + * + NIOCREGIF can request space for extra rings (and buffers) + * allocated in the same memory space. The number of extra rings + * is in nr_arg1, and is advisory. This is a no-op on NICs where + * the size of the memory space is fixed. + * + * + NIOCREGIF can attach to PIPE rings sharing the same memory + * space with a parent device. The ifname indicates the parent device, + * which must already exist. Flags in nr_flags indicate if we want to + * bind the master or slave side, the index (from nr_ringid) + * is just a cookie and does not need to be sequential. + * + * + NIOCREGIF can also attach to 'monitor' rings that replicate + * the content of specific rings, also from the same memory space. + * + * Extra flags in nr_flags support the above functions. + * Application libraries may use the following naming scheme: + * netmap:foo all NIC ring pairs + * netmap:foo^ only host ring pair + * netmap:foo+ all NIC ring + host ring pairs + * netmap:foo-k the k-th NIC ring pair + * netmap:foo{k PIPE ring pair k, master side + * netmap:foo}k PIPE ring pair k, slave side + */ + +/* + * struct netmap_slot is a buffer descriptor + */ +struct netmap_slot { + uint32_t buf_idx; /* buffer index */ + uint16_t len; /* length for this slot */ + uint16_t flags; /* buf changed, etc. */ + uint64_t ptr; /* pointer for indirect buffers */ +}; + +/* + * The following flags control how the slot is used + */ + +#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */ + /* + * must be set whenever buf_idx is changed (as it might be + * necessary to recompute the physical address and mapping) + * + * It is also set by the kernel whenever the buf_idx is + * changed internally (e.g., by pipes). Applications may + * use this information to know when they can reuse the + * contents of previously prepared buffers. + */ + +#define NS_REPORT 0x0002 /* ask the hardware to report results */ + /* + * Request notification when slot is used by the hardware. + * Normally transmit completions are handled lazily and + * may be unreported. This flag lets us know when a slot + * has been sent (e.g. to terminate the sender). + */ + +#define NS_FORWARD 0x0004 /* pass packet 'forward' */ + /* + * (Only for physical ports, rx rings with NR_FORWARD set). + * Slot released to the kernel (i.e. before ring->head) with + * this flag set are passed to the peer ring (host/NIC), + * thus restoring the host-NIC connection for these slots. + * This supports efficient traffic monitoring or firewalling. + */ + +#define NS_NO_LEARN 0x0008 /* disable bridge learning */ + /* + * On a VALE switch, do not 'learn' the source port for + * this buffer. + */ + +#define NS_INDIRECT 0x0010 /* userspace buffer */ + /* + * (VALE tx rings only) data is in a userspace buffer, + * whose address is in the 'ptr' field in the slot. + */ + +#define NS_MOREFRAG 0x0020 /* packet has more fragments */ + /* + * (VALE ports only) + * Set on all but the last slot of a multi-segment packet. + * The 'len' field refers to the individual fragment. + */ + +#define NS_PORT_SHIFT 8 +#define NS_PORT_MASK (0xff << NS_PORT_SHIFT) + /* + * The high 8 bits of the flag, if not zero, indicate the + * destination port for the VALE switch, overriding + * the lookup table. + */ + +#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff) + /* + * (VALE rx rings only) the high 8 bits + * are the number of fragments. + */ + + +/* + * struct netmap_ring + * + * Netmap representation of a TX or RX ring (also known as "queue"). + * This is a queue implemented as a fixed-size circular array. + * At the software level the important fields are: head, cur, tail. + * + * In TX rings: + * + * head first slot available for transmission. + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] can be used for new packets to send; + * 'head' and 'cur' must be incremented as slots are filled + * with new packets to be sent; + * 'cur' can be moved further ahead if we need more space + * for new transmissions. XXX todo (2014-03-12) + * + * In RX rings: + * + * head first valid received packet + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] contain received packets; + * 'head' and 'cur' must be incremented as slots are consumed + * and can be returned to the kernel; + * 'cur' can be moved further ahead if we want to wait for + * new packets without returning the previous ones. + * + * DATA OWNERSHIP/LOCKING: + * The netmap_ring, and all slots and buffers in the range + * [head .. tail-1] are owned by the user program; + * the kernel only accesses them during a netmap system call + * and in the user thread context. + * + * Other slots and buffers are reserved for use by the kernel + */ +struct netmap_ring { + /* + * buf_ofs is meant to be used through macros. + * It contains the offset of the buffer region from this + * descriptor. + */ + const int64_t buf_ofs; + const uint32_t num_slots; /* number of slots in the ring. */ + const uint32_t nr_buf_size; + const uint16_t ringid; + const uint16_t dir; /* 0: tx, 1: rx */ + + uint32_t head; /* (u) first user slot */ + uint32_t cur; /* (u) wakeup point */ + uint32_t tail; /* (k) first kernel slot */ + + uint32_t flags; + + struct timeval ts; /* (k) time of last *sync() */ + + /* opaque room for a mutex or similar object */ +#if !defined(_WIN32) || defined(__CYGWIN__) + uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; +#else + uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128]; +#endif + + /* the slots follow. This struct has variable size */ + struct netmap_slot slot[0]; /* array of slots. */ +}; + + +/* + * RING FLAGS + */ +#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ + /* + * updates the 'ts' field on each netmap syscall. This saves + * saves a separate gettimeofday(), and is not much worse than + * software timestamps generated in the interrupt handler. + */ + +#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */ + /* + * Enables the NS_FORWARD slot flag for the ring. + */ + + +/* + * Netmap representation of an interface and its queue(s). + * This is initialized by the kernel when binding a file + * descriptor to a port, and should be considered as readonly + * by user programs. The kernel never uses it. + * + * There is one netmap_if for each file descriptor on which we want + * to select/poll. + * select/poll operates on one or all pairs depending on the value of + * nmr_queueid passed on the ioctl. + */ +struct netmap_if { + char ni_name[IFNAMSIZ]; /* name of the interface. */ + const uint32_t ni_version; /* API version, currently unused */ + const uint32_t ni_flags; /* properties */ +#define NI_PRIV_MEM 0x1 /* private memory region */ + + /* + * The number of packet rings available in netmap mode. + * Physical NICs can have different numbers of tx and rx rings. + * Physical NICs also have a 'host' ring pair. + * Additionally, clients can request additional ring pairs to + * be used for internal communication. + */ + const uint32_t ni_tx_rings; /* number of HW tx rings */ + const uint32_t ni_rx_rings; /* number of HW rx rings */ + + uint32_t ni_bufs_head; /* head index for extra bufs */ + uint32_t ni_spare1[5]; + /* + * The following array contains the offset of each netmap ring + * from this structure, in the following order: + * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; + * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. + * + * The area is filled up by the kernel on NIOCREGIF, + * and then only read by userspace code. + */ + const ssize_t ring_ofs[0]; +}; + + +#ifndef NIOCREGIF +/* + * ioctl names and related fields + * + * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, + * whose identity is set in NIOCREGIF through nr_ringid. + * These are non blocking and take no argument. + * + * NIOCGINFO takes a struct ifreq, the interface name is the input, + * the outputs are number of queues and number of descriptor + * for each queue (useful to set number of threads etc.). + * The info returned is only advisory and may change before + * the interface is bound to a file descriptor. + * + * NIOCREGIF takes an interface name within a struct nmre, + * and activates netmap mode on the interface (if possible). + * + * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we + * can pass it down to other NIC-related ioctls. + * + * The actual argument (struct nmreq) has a number of options to request + * different functions. + * The following are used in NIOCREGIF when nr_cmd == 0: + * + * nr_name (in) + * The name of the port (em0, valeXXX:YYY, etc.) + * limited to IFNAMSIZ for backward compatibility. + * + * nr_version (in/out) + * Must match NETMAP_API as used in the kernel, error otherwise. + * Always returns the desired value on output. + * + * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) + * On input, non-zero values may be used to reconfigure the port + * according to the requested values, but this is not guaranteed. + * On output the actual values in use are reported. + * + * nr_ringid (in) + * Indicates how rings should be bound to the file descriptors. + * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK) + * are used to indicate the ring number, and nr_flags specifies + * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected. + * + * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED: + * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control + * the binding as follows: + * 0 (default) binds all physical rings + * NETMAP_HW_RING | ring number binds a single ring pair + * NETMAP_SW_RING binds only the host tx/rx rings + * + * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push + * packets on tx rings only if POLLOUT is set. + * The default is to push any pending packet. + * + * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release + * packets on rx rings also when POLLIN is NOT set. + * The default is to touch the rx ring only with POLLIN. + * Note that this is the opposite of TX because it + * reflects the common usage. + * + * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead. + * NETMAP_PRIV_MEM is set on return for ports that do not use + * the global memory allocator. + * This information is not significant and applications + * should look at the region id in nr_arg2 + * + * nr_flags is the recommended mode to indicate which rings should + * be bound to a file descriptor. Values are NR_REG_* + * + * nr_arg1 (in) The number of extra rings to be reserved. + * Especially when allocating a VALE port the system only + * allocates the amount of memory needed for the port. + * If more shared memory rings are desired (e.g. for pipes), + * the first invocation for the same basename/allocator + * should specify a suitable number. Memory cannot be + * extended after the first allocation without closing + * all ports on the same region. + * + * nr_arg2 (in/out) The identity of the memory region used. + * On input, 0 means the system decides autonomously, + * other values may try to select a specific region. + * On return the actual value is reported. + * Region '1' is the global allocator, normally shared + * by all interfaces. Other values are private regions. + * If two ports the same region zero-copy is possible. + * + * nr_arg3 (in/out) number of extra buffers to be allocated. + * + * + * + * nr_cmd (in) if non-zero indicates a special command: + * NETMAP_BDG_ATTACH and nr_name = vale*:ifname + * attaches the NIC to the switch; nr_ringid specifies + * which rings to use. Used by vale-ctl -a ... + * nr_arg1 = NETMAP_BDG_HOST also attaches the host port + * as in vale-ctl -h ... + * + * NETMAP_BDG_DETACH and nr_name = vale*:ifname + * disconnects a previously attached NIC. + * Used by vale-ctl -d ... + * + * NETMAP_BDG_LIST + * list the configuration of VALE switches. + * + * NETMAP_BDG_VNET_HDR + * Set the virtio-net header length used by the client + * of a VALE switch port. + * + * NETMAP_BDG_NEWIF + * create a persistent VALE port with name nr_name. + * Used by vale-ctl -n ... + * + * NETMAP_BDG_DELIF + * delete a persistent VALE port. Used by vale-ctl -d ... + * + * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific + * + * + * + */ + + +/* + * struct nmreq overlays a struct ifreq (just the name) + */ +struct nmreq { + char nr_name[IFNAMSIZ]; + uint32_t nr_version; /* API version */ + uint32_t nr_offset; /* nifp offset in the shared region */ + uint32_t nr_memsize; /* size of the shared region */ + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of rx rings */ + + uint16_t nr_ringid; /* ring(s) we care about */ +#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */ +#define NETMAP_SW_RING 0x2000 /* only host ring pair */ + +#define NETMAP_RING_MASK 0x0fff /* the ring number */ + +#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ + +#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */ + + uint16_t nr_cmd; +#define NETMAP_BDG_ATTACH 1 /* attach the NIC */ +#define NETMAP_BDG_DETACH 2 /* detach the NIC */ +#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */ +#define NETMAP_BDG_LIST 4 /* get bridge's info */ +#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */ +#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ +#define NETMAP_BDG_NEWIF 6 /* create a virtual port */ +#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */ +#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */ +#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */ +#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */ +#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ +#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ + uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ +#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ + + uint16_t nr_arg2; + uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */ + uint32_t nr_flags; + /* various modes, extends nr_ringid */ + uint32_t spare2[1]; +}; + +#define NR_REG_MASK 0xf /* values for nr_flags */ +enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ + NR_REG_ALL_NIC = 1, + NR_REG_SW = 2, + NR_REG_NIC_SW = 3, + NR_REG_ONE_NIC = 4, + NR_REG_PIPE_MASTER = 5, + NR_REG_PIPE_SLAVE = 6, +}; +/* monitor uses the NR_REG to select the rings to monitor */ +#define NR_MONITOR_TX 0x100 +#define NR_MONITOR_RX 0x200 +#define NR_ZCOPY_MON 0x400 +/* request exclusive access to the selected rings */ +#define NR_EXCLUSIVE 0x800 +/* request ptnetmap host support */ +#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ +#define NR_PTNETMAP_HOST 0x1000 +#define NR_RX_RINGS_ONLY 0x2000 +#define NR_TX_RINGS_ONLY 0x4000 +/* Applications set this flag if they are able to deal with virtio-net headers, + * that is send/receive frames that start with a virtio-net header. + * If not set, NIOCREGIF will fail with netmap ports that require applications + * to use those headers. If the flag is set, the application can use the + * NETMAP_VNET_HDR_GET command to figure out the header length. */ +#define NR_ACCEPT_VNET_HDR 0x8000 + + +/* + * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined + * in ws2def.h but not sure if they are in the form we need. + * XXX so we redefine them + * in a convenient way to use for DeviceIoControl signatures + */ +#ifdef _WIN32 +#undef _IO // ws2def.h +#define _WIN_NM_IOCTL_TYPE 40000 +#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_BUFFERED, FILE_ANY_ACCESS ) +#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_OUT_DIRECT, FILE_ANY_ACCESS ) + +#define _IOWR(_c, _n, _s) _IO(_c, _n) + +/* We havesome internal sysctl in addition to the externally visible ones */ +#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT +#define NETMAP_POLL _IO('i', 162) + +/* and also two setsockopt for sysctl emulation */ +#define NETMAP_SETSOCKOPT _IO('i', 140) +#define NETMAP_GETSOCKOPT _IO('i', 141) + + +//These linknames are for the Netmap Core Driver +#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP" +#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap" + +//Definition of a structure used to pass a virtual address within an IOCTL +typedef struct _MEMORY_ENTRY { + PVOID pUsermodeVirtualAddress; +} MEMORY_ENTRY, *PMEMORY_ENTRY; + +typedef struct _POLL_REQUEST_DATA { + int events; + int timeout; + int revents; +} POLL_REQUEST_DATA; + +#endif /* _WIN32 */ + +/* + * FreeBSD uses the size value embedded in the _IOWR to determine + * how much to copy in/out. So we need it to match the actual + * data structure we pass. We put some spares in the structure + * to ease compatibility with other versions + */ +#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ +#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ +#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ +#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ +#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */ +#endif /* !NIOCREGIF */ + + +/* + * Helper functions for kernel and userspace + */ + +/* + * check if space is available in the ring. + */ +static inline int +nm_ring_empty(struct netmap_ring *ring) +{ + return (ring->cur == ring->tail); +} + +/* + * Opaque structure that is passed to an external kernel + * module via ioctl(fd, NIOCCONFIG, req) for a user-owned + * bridge port (at this point ephemeral VALE interface). + */ +#define NM_IFRDATA_LEN 256 +struct nm_ifreq { + char nifr_name[IFNAMSIZ]; + char data[NM_IFRDATA_LEN]; +}; + +/* + * netmap kernel thread configuration + */ +/* bhyve/vmm.ko MSIX parameters for IOCTL */ +struct ptn_vmm_ioctl_msix { + uint64_t msg; + uint64_t addr; +}; + +/* IOCTL parameters */ +struct nm_kth_ioctl { + u_long com; + /* TODO: use union */ + union { + struct ptn_vmm_ioctl_msix msix; + } data; +}; + +/* Configuration of a ptnetmap ring */ +struct ptnet_ring_cfg { + uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */ + uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */ + struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */ +}; +#endif /* _NET_NETMAP_H_ */ diff --git a/src/plugins/netmap/netmap.api b/src/plugins/netmap/netmap.api new file mode 100644 index 00000000000..a14753cad9c --- /dev/null +++ b/src/plugins/netmap/netmap.api @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +/** \brief Create netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name + @param hw_addr - interface MAC + @param use_random_hw_addr - use random generated MAC + @param is_pipe - is pipe + @param is_master - 0=slave, 1=master +*/ +autoreply define netmap_create +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; + u8 hw_addr[6]; + u8 use_random_hw_addr; + u8 is_pipe; + u8 is_master; +}; + +/** \brief Delete netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name +*/ +autoreply define netmap_delete +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap.c b/src/plugins/netmap/netmap.c new file mode 100644 index 00000000000..ebef215eb3b --- /dev/null +++ b/src/plugins/netmap/netmap.c @@ -0,0 +1,334 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +netmap_main_t netmap_main; + +static clib_error_t * +netmap_fd_read_ready (clib_file_t * uf) +{ + vlib_main_t *vm = vlib_get_main (); + netmap_main_t *nm = &netmap_main; + u32 idx = uf->private_data; + + nm->pending_input_bitmap = + clib_bitmap_set (nm->pending_input_bitmap, idx, 1); + + /* Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, netmap_input_node.index); + + return 0; +} + +static void +close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) +{ + if (nif->clib_file_index != ~0) + { + clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index); + nif->clib_file_index = ~0; + } + else if (nif->fd > -1) + close (nif->fd); + + if (nif->mem_region) + { + netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region]; + if (--reg->refcnt == 0) + { + munmap (reg->mem, reg->region_size); + reg->region_size = 0; + } + } + + + mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name, + &nif->if_index); + vec_free (nif->host_if_name); + vec_free (nif->req); + + clib_memset (nif, 0, sizeof (*nif)); + pool_put (nm->interfaces, nif); +} + +int +netmap_worker_thread_enable () +{ + /* if worker threads are enabled, switch to polling mode */ + foreach_vlib_main () + { + vlib_node_set_state (this_vlib_main, netmap_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + return 0; +} + +int +netmap_worker_thread_disable () +{ + foreach_vlib_main () + { + vlib_node_set_state (this_vlib_main, netmap_input_node.index, + VLIB_NODE_STATE_INTERRUPT); + } + + return 0; +} + +int +netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index) +{ + netmap_main_t *nm = &netmap_main; + int ret = 0; + uint32_t nr_reg; + netmap_if_t *nif = 0; + u8 hw_addr[6]; + vnet_sw_interface_t *sw; + vnet_main_t *vnm = vnet_get_main (); + uword *p; + struct nmreq *req = 0; + netmap_mem_region_t *reg; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int fd; + + p = mhash_get (&nm->if_index_by_host_if_name, if_name); + if (p) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + fd = open ("/dev/netmap", O_RDWR); + if (fd < 0) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + pool_get (nm->interfaces, nif); + nif->if_index = nif - nm->interfaces; + nif->fd = fd; + nif->clib_file_index = ~0; + + vec_validate (req, 0); + nif->req = req; + req->nr_version = NETMAP_API; + req->nr_flags = NR_REG_ALL_NIC; + + if (is_pipe) + req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE; + else + req->nr_flags = NR_REG_ALL_NIC; + + req->nr_flags |= NR_ACCEPT_VNET_HDR; + snprintf (req->nr_name, IFNAMSIZ, "%s", if_name); + req->nr_name[IFNAMSIZ - 1] = 0; + + if (ioctl (nif->fd, NIOCREGIF, req)) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + + nif->mem_region = req->nr_arg2; + vec_validate (nm->mem_regions, nif->mem_region); + reg = &nm->mem_regions[nif->mem_region]; + if (reg->region_size == 0) + { + reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + clib_warning ("mem %p", reg->mem); + if (reg->mem == MAP_FAILED) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + reg->region_size = req->nr_memsize; + } + reg->refcnt++; + + nif->nifp = NETMAP_IF (reg->mem, req->nr_offset); + nr_reg = nif->req->nr_flags & NR_REG_MASK; + + if (nr_reg == NR_REG_SW) + { /* host stack */ + nif->first_tx_ring = nif->last_tx_ring = nif->req->nr_tx_rings; + nif->first_rx_ring = nif->last_rx_ring = nif->req->nr_rx_rings; + } + else if (nr_reg == NR_REG_ALL_NIC) + { /* only nic */ + nif->first_tx_ring = 0; + nif->first_rx_ring = 0; + nif->last_tx_ring = nif->req->nr_tx_rings - 1; + nif->last_rx_ring = nif->req->nr_rx_rings - 1; + } + else if (nr_reg == NR_REG_NIC_SW) + { + nif->first_tx_ring = 0; + nif->first_rx_ring = 0; + nif->last_tx_ring = nif->req->nr_tx_rings; + nif->last_rx_ring = nif->req->nr_rx_rings; + } + else if (nr_reg == NR_REG_ONE_NIC) + { + /* XXX check validity */ + nif->first_tx_ring = nif->last_tx_ring = nif->first_rx_ring = + nif->last_rx_ring = nif->req->nr_ringid & NETMAP_RING_MASK; + } + else + { /* pipes */ + nif->first_tx_ring = nif->last_tx_ring = 0; + nif->first_rx_ring = nif->last_rx_ring = 0; + } + + nif->host_if_name = if_name; + nif->per_interface_next_index = ~0; + + if (tm->n_vlib_mains > 1) + clib_spinlock_init (&nif->lockp); + + { + clib_file_t template = { 0 }; + template.read_function = netmap_fd_read_ready; + template.file_descriptor = nif->fd; + template.private_data = nif->if_index; + template.description = format (0, "netmap socket"); + nif->clib_file_index = clib_file_add (&file_main, &template); + } + + /*use configured or generate random MAC address */ + if (hw_addr_set) + memcpy (hw_addr, hw_addr_set, 6); + else + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (hw_addr + 2, &rnd, sizeof (rnd)); + hw_addr[0] = 2; + hw_addr[1] = 0xfe; + } + + vnet_eth_interface_registration_t eir = {}; + + eir.dev_class_index = netmap_device_class.index; + eir.dev_instance = nif->if_index; + eir.address = hw_addr; + eir.cb.set_max_frame_size = NULL; + + nif->hw_if_index = vnet_eth_register_interface (vnm, &eir); + + sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index); + nif->sw_if_index = sw->sw_if_index; + + mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0); + + if (sw_if_index) + *sw_if_index = nif->sw_if_index; + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1) + netmap_worker_thread_enable (); + + return 0; + +error: + close_netmap_if (nm, nif); + return ret; +} + +int +netmap_delete_if (vlib_main_t * vm, u8 * host_if_name) +{ + vnet_main_t *vnm = vnet_get_main (); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif; + uword *p; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + p = mhash_get (&nm->if_index_by_host_if_name, host_if_name); + if (p == NULL) + { + clib_warning ("Host interface %s does not exist", host_if_name); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + nif = pool_elt_at_index (nm->interfaces, p[0]); + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0); + + ethernet_delete_interface (vnm, nif->hw_if_index); + + close_netmap_if (nm, nif); + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0) + netmap_worker_thread_disable (); + + return 0; +} + +static clib_error_t * +netmap_init (vlib_main_t * vm) +{ + netmap_main_t *nm = &netmap_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; + + clib_memset (nm, 0, sizeof (netmap_main_t)); + + nm->input_cpu_first_index = 0; + nm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + nm->input_cpu_first_index = tr->first_index; + nm->input_cpu_count = tr->count; + } + + mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword)); + + vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + return 0; +} + +VLIB_INIT_FUNCTION (netmap_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap.h b/src/plugins/netmap/netmap.h new file mode 100644 index 00000000000..29f855fda8e --- /dev/null +++ b/src/plugins/netmap/netmap.h @@ -0,0 +1,166 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +/* + * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + clib_spinlock_t lockp; + u8 *host_if_name; + uword if_index; + u32 hw_if_index; + u32 sw_if_index; + u32 clib_file_index; + + u32 per_interface_next_index; + u8 is_admin_up; + + /* netmap */ + struct nmreq *req; + u16 mem_region; + int fd; + struct netmap_if *nifp; + u16 first_tx_ring; + u16 last_tx_ring; + u16 first_rx_ring; + u16 last_rx_ring; + +} netmap_if_t; + +typedef struct +{ + char *mem; + u32 region_size; + int refcnt; +} netmap_mem_region_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + netmap_if_t *interfaces; + + /* bitmap of pending rx interfaces */ + uword *pending_input_bitmap; + + /* rx buffer cache */ + u32 **rx_buffers; + + /* hash of host interface names */ + mhash_t if_index_by_host_if_name; + + /* vector of memory regions */ + netmap_mem_region_t *mem_regions; + + /* first cpu index */ + u32 input_cpu_first_index; + + /* total cpu count */ + u32 input_cpu_count; +} netmap_main_t; + +extern netmap_main_t netmap_main; +extern vnet_device_class_t netmap_device_class; +extern vlib_node_registration_t netmap_input_node; + +int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index); +int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name); + + +/* Macros and helper functions from sys/net/netmap_user.h */ + +#ifdef _NET_NETMAP_H_ + +#define _NETMAP_OFFSET(type, ptr, offset) \ + ((type)(void *)((char *)(ptr) + (offset))) + +#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) + +#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index] ) + +#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) + +#define NETMAP_BUF(ring, index) \ + ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) + +#define NETMAP_BUF_IDX(ring, buf) \ + ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ + (ring)->nr_buf_size ) + +static inline uint32_t +nm_ring_next (struct netmap_ring *ring, uint32_t i) +{ + return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1); +} + + +/* + * Return 1 if we have pending transmissions in the tx ring. + * When everything is complete ring->head = ring->tail + 1 (modulo ring size) + */ +static inline int +nm_tx_pending (struct netmap_ring *ring) +{ + return nm_ring_next (ring, ring->tail) != ring->head; +} + +static inline uint32_t +nm_ring_space (struct netmap_ring *ring) +{ + int ret = ring->tail - ring->cur; + if (ret < 0) + ret += ring->num_slots; + return ret; +} +#endif + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap_api.c b/src/plugins/netmap/netmap_api.c new file mode 100644 index 00000000000..51f572a23e6 --- /dev/null +++ b/src/plugins/netmap/netmap_api.c @@ -0,0 +1,95 @@ +/* + *------------------------------------------------------------------ + * netmap_api.c - netmap api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#define foreach_vpe_api_msg \ +_(NETMAP_CREATE, netmap_create) \ +_(NETMAP_DELETE, netmap_delete) \ + +static void +vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_create_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = + netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr, + mp->is_pipe, mp->is_master, 0); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY); +} + +static void +vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_delete_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = netmap_delete_if (vm, if_name); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY); +} + +#include +static clib_error_t * +netmap_api_hookup (vlib_main_t * vm) +{ + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (); + + return 0; +} + +VLIB_API_INIT_FUNCTION (netmap_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/node.c b/src/plugins/netmap/node.c new file mode 100644 index 00000000000..6169847fa79 --- /dev/null +++ b/src/plugins/netmap/node.c @@ -0,0 +1,295 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#define foreach_netmap_input_error + +typedef enum +{ +#define _(f,s) NETMAP_INPUT_ERROR_##f, + foreach_netmap_input_error +#undef _ + NETMAP_INPUT_N_ERROR, +} netmap_input_error_t; + +static char *netmap_input_error_strings[] = { +#define _(n,s) s, + foreach_netmap_input_error +#undef _ +}; + +typedef struct +{ + u32 next_index; + u32 hw_if_index; + struct netmap_slot slot; +} netmap_input_trace_t; + +static u8 * +format_netmap_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *); + u32 indent = format_get_indent (s); + + s = format (s, "netmap: hw_if_index %d next-index %d", + t->hw_if_index, t->next_index); + s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u", + format_white_space, indent + 2, + t->slot.flags, t->slot.len, t->slot.buf_idx); + return s; +} + +always_inline void +buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); + vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); + + /* update first buffer */ + first_b->total_length_not_including_first_buffer += b->current_length; + + /* update previous buffer */ + prev_b->next_buffer = bi; + prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; + + /* update current buffer */ + b->next_buffer = 0; +} + +always_inline uword +netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, netmap_if_t * nif) +{ + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + uword n_trace = vlib_get_trace_count (vm, node); + netmap_main_t *nm = &netmap_main; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u32 *to_next = 0; + u32 n_free_bufs; + struct netmap_ring *ring; + int cur_ring; + u32 thread_index = vm->thread_index; + u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); + + if (nif->per_interface_next_index != ~0) + next_index = nif->per_interface_next_index; + + n_free_bufs = vec_len (nm->rx_buffers[thread_index]); + if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (nm->rx_buffers[thread_index], + VLIB_FRAME_SIZE + n_free_bufs - 1); + n_free_bufs += + vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs], + VLIB_FRAME_SIZE); + vec_set_len (nm->rx_buffers[thread_index], n_free_bufs); + } + + cur_ring = nif->first_rx_ring; + while (cur_ring <= nif->last_rx_ring && n_free_bufs) + { + int r = 0; + u32 cur_slot_index; + ring = NETMAP_RXRING (nif->nifp, cur_ring); + r = nm_ring_space (ring); + + if (!r) + { + cur_ring++; + continue; + } + + if (r > n_free_bufs) + r = n_free_bufs; + + cur_slot_index = ring->cur; + while (r) + { + u32 n_left_to_next; + u32 next0 = next_index; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (r && n_left_to_next) + { + vlib_buffer_t *first_b0 = 0; + u32 offset = 0; + u32 bi0 = 0, first_bi0 = 0, prev_bi0; + u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots; + u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots; + struct netmap_slot *slot = &ring->slot[cur_slot_index]; + u32 data_len = slot->len; + + /* prefetch 2 slots in advance */ + CLIB_PREFETCH (&ring->slot[next2_slot_index], + CLIB_CACHE_LINE_BYTES, LOAD); + /* prefetch start of next packet */ + CLIB_PREFETCH (NETMAP_BUF + (ring, ring->slot[next_slot_index].buf_idx), + CLIB_CACHE_LINE_BYTES, LOAD); + + while (data_len && n_free_bufs) + { + vlib_buffer_t *b0; + /* grab free buffer */ + u32 last_empty_buffer = + vec_len (nm->rx_buffers[thread_index]) - 1; + prev_bi0 = bi0; + bi0 = nm->rx_buffers[thread_index][last_empty_buffer]; + b0 = vlib_get_buffer (vm, bi0); + vec_set_len (nm->rx_buffers[thread_index], + last_empty_buffer); + n_free_bufs--; + + /* copy data */ + u32 bytes_to_copy = + data_len > n_buffer_bytes ? n_buffer_bytes : data_len; + b0->current_data = 0; + clib_memcpy_fast (vlib_buffer_get_current (b0), + (u8 *) NETMAP_BUF (ring, slot->buf_idx) + + offset, bytes_to_copy); + + /* fill buffer header */ + b0->current_length = bytes_to_copy; + + if (offset == 0) + { + b0->total_length_not_including_first_buffer = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + nif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + first_bi0 = bi0; + first_b0 = vlib_get_buffer (vm, first_bi0); + } + else + buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0); + + offset += bytes_to_copy; + data_len -= bytes_to_copy; + } + + /* trace */ + if (PREDICT_FALSE (n_trace > 0)) + { + if (PREDICT_TRUE (first_b0 != 0) && + vlib_trace_buffer (vm, node, next0, first_b0, + /* follow_chain */ 0)) + { + netmap_input_trace_t *tr; + + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = nif->hw_if_index; + memcpy (&tr->slot, slot, sizeof (struct netmap_slot)); + } + } + + /* enque and take next packet */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, first_bi0, + next0); + + /* next packet */ + n_rx_packets++; + n_rx_bytes += slot->len; + to_next[0] = first_bi0; + to_next += 1; + n_left_to_next--; + cur_slot_index = next_slot_index; + + r--; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + ring->head = ring->cur = cur_slot_index; + cur_ring++; + } + + if (n_rx_packets) + ioctl (nif->fd, NIOCRXSYNC, NULL); + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes); + + vnet_device_increment_rx_packets (thread_index, n_rx_packets); + + return n_rx_packets; +} + +VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + int i; + u32 n_rx_packets = 0; + u32 thread_index = vm->thread_index; + netmap_main_t *nm = &netmap_main; + netmap_if_t *nmi; + + for (i = 0; i < vec_len (nm->interfaces); i++) + { + nmi = vec_elt_at_index (nm->interfaces, i); + if (nmi->is_admin_up && + (i % nm->input_cpu_count) == + (thread_index - nm->input_cpu_first_index)) + n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi); + } + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (netmap_input_node) = { + .name = "netmap-input", + .sibling_of = "device-input", + .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, + .format_trace = format_netmap_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */ + .state = VLIB_NODE_STATE_INTERRUPT, + .n_errors = NETMAP_INPUT_N_ERROR, + .error_strings = netmap_input_error_strings, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/plugin.c b/src/plugins/netmap/plugin.c new file mode 100644 index 00000000000..1673225b683 --- /dev/null +++ b/src/plugins/netmap/plugin.c @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Tom Jones + * + * This software was developed by Tom Jones under sponsorship + * from the FreeBSD Foundation. + * + */ + +#include +#include +#include + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "netmap", +}; -- cgit 1.2.3-korg