diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/plugins/ixge/CMakeLists.txt | 20 | ||||
-rw-r--r-- | src/plugins/ixge/ixge.c | 2938 | ||||
-rw-r--r-- | src/plugins/ixge/ixge.h | 1292 | ||||
-rw-r--r-- | src/vat/api_format.c | 101 | ||||
-rw-r--r-- | src/vnet/CMakeLists.txt | 20 | ||||
-rw-r--r-- | src/vnet/devices/netmap/FEATURE.yaml | 12 | ||||
-rw-r--r-- | src/vnet/devices/netmap/cli.c | 236 | ||||
-rw-r--r-- | src/vnet/devices/netmap/device.c | 252 | ||||
-rw-r--r-- | src/vnet/devices/netmap/dir.dox | 27 | ||||
-rw-r--r-- | src/vnet/devices/netmap/net_netmap.h | 650 | ||||
-rw-r--r-- | src/vnet/devices/netmap/netmap.api | 56 | ||||
-rw-r--r-- | src/vnet/devices/netmap/netmap.c | 314 | ||||
-rw-r--r-- | src/vnet/devices/netmap/netmap.h | 166 | ||||
-rw-r--r-- | src/vnet/devices/netmap/netmap_api.c | 137 | ||||
-rw-r--r-- | src/vnet/devices/netmap/node.c | 298 | ||||
-rw-r--r-- | src/vnet/vnet_all_api_h.h | 1 | ||||
-rw-r--r-- | src/vpp/api/vpe.api | 1 |
17 files changed, 0 insertions, 6521 deletions
diff --git a/src/plugins/ixge/CMakeLists.txt b/src/plugins/ixge/CMakeLists.txt deleted file mode 100644 index 226652c72e2..00000000000 --- a/src/plugins/ixge/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2018 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -add_vpp_plugin(ixge - SOURCES - ixge.c - - INSTALL_HEADERS - ixge.h -) diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c deleted file mode 100644 index 6ab79c9872c..00000000000 --- a/src/plugins/ixge/ixge.c +++ /dev/null @@ -1,2938 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * WARNING! - * This driver is not intended for production use and it is unsupported. - * It is provided for educational use only. - * Please use supported DPDK driver instead. - */ - -#if __x86_64__ || __i386__ || __aarch64__ -#include <vppinfra/vector.h> - -#ifndef CLIB_HAVE_VEC128 -#warning HACK: ixge driver wont really work, missing u32x4 -typedef unsigned long long u32x4; -#endif - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vlib/pci/pci.h> -#include <vnet/vnet.h> -#include <ixge/ixge.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/plugin/plugin.h> -#include <vpp/app/version.h> - -#define IXGE_ALWAYS_POLL 0 - -#define EVENT_SET_FLAGS 0 -#define IXGE_HWBP_RACE_ELOG 0 - -#define PCI_VENDOR_ID_INTEL 0x8086 - -/* 10 GIG E (XGE) PHY IEEE 802.3 clause 45 definitions. */ -#define XGE_PHY_DEV_TYPE_PMA_PMD 1 -#define XGE_PHY_DEV_TYPE_PHY_XS 4 -#define XGE_PHY_ID1 0x2 -#define XGE_PHY_ID2 0x3 -#define XGE_PHY_CONTROL 0x0 -#define XGE_PHY_CONTROL_RESET (1 << 15) - -ixge_main_t ixge_main; -static vlib_node_registration_t ixge_input_node; -static vlib_node_registration_t ixge_process_node; - -static void -ixge_semaphore_get (ixge_device_t * xd) -{ - ixge_main_t *xm = &ixge_main; - vlib_main_t *vm = xm->vlib_main; - ixge_regs_t *r = xd->regs; - u32 i; - - i = 0; - while (!(r->software_semaphore & (1 << 0))) - { - if (i > 0) - vlib_process_suspend (vm, 100e-6); - i++; - } - do - { - r->software_semaphore |= 1 << 1; - } - while (!(r->software_semaphore & (1 << 1))); -} - -static void -ixge_semaphore_release (ixge_device_t * xd) -{ - ixge_regs_t *r = xd->regs; - r->software_semaphore &= ~3; -} - -static void -ixge_software_firmware_sync (ixge_device_t * xd, u32 sw_mask) -{ - ixge_main_t *xm = &ixge_main; - vlib_main_t *vm = xm->vlib_main; - ixge_regs_t *r = xd->regs; - u32 fw_mask = sw_mask << 5; - u32 m, done = 0; - - while (!done) - { - ixge_semaphore_get (xd); - m = r->software_firmware_sync; - done = (m & fw_mask) == 0; - if (done) - r->software_firmware_sync = m | sw_mask; - ixge_semaphore_release (xd); - if (!done) - vlib_process_suspend (vm, 10e-3); - } -} - -static void -ixge_software_firmware_sync_release (ixge_device_t * xd, u32 sw_mask) -{ - ixge_regs_t *r = xd->regs; - ixge_semaphore_get (xd); - r->software_firmware_sync &= ~sw_mask; - ixge_semaphore_release (xd); -} - -u32 -ixge_read_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, - u32 v, u32 is_read) -{ - ixge_regs_t *r = xd->regs; - const u32 busy_bit = 1 << 30; - u32 x; - - ASSERT (xd->phy_index < 2); - ixge_software_firmware_sync (xd, 1 << (1 + xd->phy_index)); - - ASSERT (reg_index < (1 << 16)); - ASSERT (dev_type < (1 << 5)); - if (!is_read) - r->xge_mac.phy_data = v; - - /* Address cycle. */ - x = - reg_index | (dev_type << 16) | (xd-> - phys[xd->phy_index].mdio_address << 21); - r->xge_mac.phy_command = x | busy_bit; - /* Busy wait timed to take 28e-6 secs. No suspend. */ - while (r->xge_mac.phy_command & busy_bit) - ; - - r->xge_mac.phy_command = x | ((is_read ? 2 : 1) << 26) | busy_bit; - while (r->xge_mac.phy_command & busy_bit) - ; - - if (is_read) - v = r->xge_mac.phy_data >> 16; - - ixge_software_firmware_sync_release (xd, 1 << (1 + xd->phy_index)); - - return v; -} - -static u32 -ixge_read_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index) -{ - return ixge_read_write_phy_reg (xd, dev_type, reg_index, 0, /* is_read */ - 1); -} - -static void -ixge_write_phy_reg (ixge_device_t * xd, u32 dev_type, u32 reg_index, u32 v) -{ - (void) ixge_read_write_phy_reg (xd, dev_type, reg_index, v, /* is_read */ - 0); -} - -static void -ixge_i2c_put_bits (i2c_bus_t * b, int scl, int sda) -{ - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); - u32 v; - - v = 0; - v |= (sda != 0) << 3; - v |= (scl != 0) << 1; - xd->regs->i2c_control = v; -} - -static void -ixge_i2c_get_bits (i2c_bus_t * b, int *scl, int *sda) -{ - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, b->private_data); - u32 v; - - v = xd->regs->i2c_control; - *sda = (v & (1 << 2)) != 0; - *scl = (v & (1 << 0)) != 0; -} - -static u16 -ixge_read_eeprom (ixge_device_t * xd, u32 address) -{ - ixge_regs_t *r = xd->regs; - u32 v; - r->eeprom_read = (( /* start bit */ (1 << 0)) | (address << 2)); - /* Wait for done bit. */ - while (!((v = r->eeprom_read) & (1 << 1))) - ; - return v >> 16; -} - -static void -ixge_sfp_enable_disable_laser (ixge_device_t * xd, uword enable) -{ - u32 tx_disable_bit = 1 << 3; - if (enable) - xd->regs->sdp_control &= ~tx_disable_bit; - else - xd->regs->sdp_control |= tx_disable_bit; -} - -static void -ixge_sfp_enable_disable_10g (ixge_device_t * xd, uword enable) -{ - u32 is_10g_bit = 1 << 5; - if (enable) - xd->regs->sdp_control |= is_10g_bit; - else - xd->regs->sdp_control &= ~is_10g_bit; -} - -static clib_error_t * -ixge_sfp_phy_init_from_eeprom (ixge_device_t * xd, u16 sfp_type) -{ - u16 a, id, reg_values_addr = 0; - - a = ixge_read_eeprom (xd, 0x2b); - if (a == 0 || a == 0xffff) - return clib_error_create ("no init sequence in eeprom"); - - while (1) - { - id = ixge_read_eeprom (xd, ++a); - if (id == 0xffff) - break; - reg_values_addr = ixge_read_eeprom (xd, ++a); - if (id == sfp_type) - break; - } - if (id != sfp_type) - return clib_error_create ("failed to find id 0x%x", sfp_type); - - ixge_software_firmware_sync (xd, 1 << 3); - while (1) - { - u16 v = ixge_read_eeprom (xd, ++reg_values_addr); - if (v == 0xffff) - break; - xd->regs->core_analog_config = v; - } - ixge_software_firmware_sync_release (xd, 1 << 3); - - /* Make sure laser is off. We'll turn on the laser when - the interface is brought up. */ - ixge_sfp_enable_disable_laser (xd, /* enable */ 0); - ixge_sfp_enable_disable_10g (xd, /* is_10g */ 1); - - return 0; -} - -static void -ixge_sfp_device_up_down (ixge_device_t * xd, uword is_up) -{ - u32 v; - - if (is_up) - { - /* pma/pmd 10g serial SFI. */ - xd->regs->xge_mac.auto_negotiation_control2 &= ~(3 << 16); - xd->regs->xge_mac.auto_negotiation_control2 |= 2 << 16; - - v = xd->regs->xge_mac.auto_negotiation_control; - v &= ~(7 << 13); - v |= (0 << 13); - /* Restart autoneg. */ - v |= (1 << 12); - xd->regs->xge_mac.auto_negotiation_control = v; - - while (!(xd->regs->xge_mac.link_partner_ability[0] & 0xf0000)) - ; - - v = xd->regs->xge_mac.auto_negotiation_control; - - /* link mode 10g sfi serdes */ - v &= ~(7 << 13); - v |= (3 << 13); - - /* Restart autoneg. */ - v |= (1 << 12); - xd->regs->xge_mac.auto_negotiation_control = v; - - xd->regs->xge_mac.link_status; - } - - ixge_sfp_enable_disable_laser (xd, /* enable */ is_up); - - /* Give time for link partner to notice that we're up. */ - if (is_up && vlib_in_process_context (vlib_get_main ())) - { - vlib_process_suspend (vlib_get_main (), 300e-3); - } -} - -always_inline ixge_dma_regs_t * -get_dma_regs (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 qi) -{ - ixge_regs_t *r = xd->regs; - ASSERT (qi < 128); - if (rt == VLIB_RX) - return qi < 64 ? &r->rx_dma0[qi] : &r->rx_dma1[qi - 64]; - else - return &r->tx_dma[qi]; -} - -static clib_error_t * -ixge_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); - uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, hif->dev_instance); - ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); - - if (is_up) - { - xd->regs->rx_enable |= 1; - xd->regs->tx_dma_control |= 1; - dr->control |= 1 << 25; - while (!(dr->control & (1 << 25))) - ; - } - else - { - xd->regs->rx_enable &= ~1; - xd->regs->tx_dma_control &= ~1; - } - - ixge_sfp_device_up_down (xd, is_up); - - return /* no error */ 0; -} - -static void -ixge_sfp_phy_init (ixge_device_t * xd) -{ - ixge_phy_t *phy = xd->phys + xd->phy_index; - i2c_bus_t *ib = &xd->i2c_bus; - - ib->private_data = xd->device_index; - ib->put_bits = ixge_i2c_put_bits; - ib->get_bits = ixge_i2c_get_bits; - vlib_i2c_init (ib); - - vlib_i2c_read_eeprom (ib, 0x50, 0, 128, (u8 *) & xd->sfp_eeprom); - - if (vlib_i2c_bus_timed_out (ib) || !sfp_eeprom_is_valid (&xd->sfp_eeprom)) - xd->sfp_eeprom.id = SFP_ID_UNKNOWN; - else - { - /* FIXME 5 => SR/LR eeprom ID. */ - clib_error_t *e = - ixge_sfp_phy_init_from_eeprom (xd, 5 + xd->pci_function); - if (e) - clib_error_report (e); - } - - phy->mdio_address = ~0; -} - -static void -ixge_phy_init (ixge_device_t * xd) -{ - ixge_main_t *xm = &ixge_main; - vlib_main_t *vm = xm->vlib_main; - ixge_phy_t *phy = xd->phys + xd->phy_index; - - switch (xd->device_id) - { - case IXGE_82599_sfp: - case IXGE_82599_sfp_em: - case IXGE_82599_sfp_fcoe: - /* others? */ - return ixge_sfp_phy_init (xd); - - default: - break; - } - - /* Probe address of phy. */ - { - u32 i, v; - - phy->mdio_address = ~0; - for (i = 0; i < 32; i++) - { - phy->mdio_address = i; - v = ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1); - if (v != 0xffff && v != 0) - break; - } - - /* No PHY found? */ - if (i >= 32) - return; - } - - phy->id = - ((ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID1) << 16) | - ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PMA_PMD, XGE_PHY_ID2)); - - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d, phy id 0x%d mdio address %d",.format_args = "i4i4i4",}; - struct - { - u32 instance, id, address; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->id = phy->id; - ed->address = phy->mdio_address; - } - - /* Reset phy. */ - ixge_write_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL, - XGE_PHY_CONTROL_RESET); - - /* Wait for self-clearning reset bit to clear. */ - do - { - vlib_process_suspend (vm, 1e-3); - } - while (ixge_read_phy_reg (xd, XGE_PHY_DEV_TYPE_PHY_XS, XGE_PHY_CONTROL) & - XGE_PHY_CONTROL_RESET); -} - -static u8 * -format_ixge_rx_from_hw_descriptor (u8 * s, va_list * va) -{ - ixge_rx_from_hw_descriptor_t *d = - va_arg (*va, ixge_rx_from_hw_descriptor_t *); - u32 s0 = d->status[0], s2 = d->status[2]; - u32 is_ip4, is_ip6, is_ip, is_tcp, is_udp; - u32 indent = format_get_indent (s); - - s = format (s, "%s-owned", - (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE) ? "sw" : - "hw"); - s = - format (s, ", length this descriptor %d, l3 offset %d", - d->n_packet_bytes_this_descriptor, - IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s0)); - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) - s = format (s, ", end-of-packet"); - - s = format (s, "\n%U", format_white_space, indent); - - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR) - s = format (s, "layer2 error"); - - if (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2) - { - s = format (s, "layer 2 type %d", (s0 & 0x1f)); - return s; - } - - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN) - s = format (s, "vlan header 0x%x\n%U", d->vlan_tag, - format_white_space, indent); - - if ((is_ip4 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4))) - { - s = format (s, "ip4%s", - (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT) ? " options" : - ""); - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED) - s = format (s, " checksum %s", - (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) ? - "bad" : "ok"); - } - if ((is_ip6 = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6))) - s = format (s, "ip6%s", - (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT) ? " extended" : - ""); - is_tcp = is_udp = 0; - if ((is_ip = (is_ip4 | is_ip6))) - { - is_tcp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP) != 0; - is_udp = (s0 & IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP) != 0; - if (is_tcp) - s = format (s, ", tcp"); - if (is_udp) - s = format (s, ", udp"); - } - - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED) - s = format (s, ", tcp checksum %s", - (s2 & IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR) ? "bad" : - "ok"); - if (s2 & IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED) - s = format (s, ", udp checksum %s", - (s2 & IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR) ? "bad" : - "ok"); - - return s; -} - -static u8 * -format_ixge_tx_descriptor (u8 * s, va_list * va) -{ - ixge_tx_descriptor_t *d = va_arg (*va, ixge_tx_descriptor_t *); - u32 s0 = d->status0, s1 = d->status1; - u32 indent = format_get_indent (s); - u32 v; - - s = format (s, "buffer 0x%Lx, %d packet bytes, %d bytes this buffer", - d->buffer_address, s1 >> 14, d->n_bytes_this_buffer); - - s = format (s, "\n%U", format_white_space, indent); - - if ((v = (s0 >> 0) & 3)) - s = format (s, "reserved 0x%x, ", v); - - if ((v = (s0 >> 2) & 3)) - s = format (s, "mac 0x%x, ", v); - - if ((v = (s0 >> 4) & 0xf) != 3) - s = format (s, "type 0x%x, ", v); - - s = format (s, "%s%s%s%s%s%s%s%s", - (s0 & (1 << 8)) ? "eop, " : "", - (s0 & (1 << 9)) ? "insert-fcs, " : "", - (s0 & (1 << 10)) ? "reserved26, " : "", - (s0 & (1 << 11)) ? "report-status, " : "", - (s0 & (1 << 12)) ? "reserved28, " : "", - (s0 & (1 << 13)) ? "is-advanced, " : "", - (s0 & (1 << 14)) ? "vlan-enable, " : "", - (s0 & (1 << 15)) ? "tx-segmentation, " : ""); - - if ((v = s1 & 0xf) != 0) - s = format (s, "status 0x%x, ", v); - - if ((v = (s1 >> 4) & 0xf)) - s = format (s, "context 0x%x, ", v); - - if ((v = (s1 >> 8) & 0x3f)) - s = format (s, "options 0x%x, ", v); - - return s; -} - -typedef struct -{ - ixge_descriptor_t before, after; - - u32 buffer_index; - - u16 device_index; - - u8 queue_index; - - u8 is_start_of_packet; - - /* Copy of VLIB buffer; packet data stored in pre_data. */ - vlib_buffer_t buffer; -} ixge_rx_dma_trace_t; - -static u8 * -format_ixge_rx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - vlib_node_t *node = va_arg (*va, vlib_node_t *); - vnet_main_t *vnm = vnet_get_main (); - ixge_rx_dma_trace_t *t = va_arg (*va, ixge_rx_dma_trace_t *); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); - format_function_t *f; - u32 indent = format_get_indent (s); - - { - vnet_sw_interface_t *sw = - vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - s = - format (s, "%U rx queue %d", format_vnet_sw_interface_name, vnm, sw, - t->queue_index); - } - - s = format (s, "\n%Ubefore: %U", - format_white_space, indent, - format_ixge_rx_from_hw_descriptor, &t->before); - s = format (s, "\n%Uafter : head/tail address 0x%Lx/0x%Lx", - format_white_space, indent, - t->after.rx_to_hw.head_address, t->after.rx_to_hw.tail_address); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vnet_buffer, &t->buffer); - - s = format (s, "\n%U", format_white_space, indent); - - f = node->format_buffer; - if (!f || !t->is_start_of_packet) - f = format_hex_bytes; - s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); - - return s; -} - -#define foreach_ixge_error \ - _ (none, "no error") \ - _ (tx_full_drops, "tx ring full drops") \ - _ (ip4_checksum_error, "ip4 checksum errors") \ - _ (rx_alloc_fail, "rx buf alloc from free list failed") \ - _ (rx_alloc_no_physmem, "rx buf alloc failed no physmem") - -typedef enum -{ -#define _(f,s) IXGE_ERROR_##f, - foreach_ixge_error -#undef _ - IXGE_N_ERROR, -} ixge_error_t; - -always_inline void -ixge_rx_next_and_error_from_status_x1 (ixge_device_t * xd, - u32 s00, u32 s02, - u8 * next0, u8 * error0, u32 * flags0) -{ - u8 is0_ip4, is0_ip6, n0, e0; - u32 f0; - - e0 = IXGE_ERROR_none; - n0 = IXGE_RX_NEXT_ETHERNET_INPUT; - - is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; - n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; - - e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) - ? IXGE_ERROR_ip4_checksum_error : e0); - - is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; - n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; - - n0 = (xd->per_interface_next_index != ~0) ? - xd->per_interface_next_index : n0; - - /* Check for error. */ - n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; - - f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED - | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) - ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0); - - f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR - | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) - ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT); - - *error0 = e0; - *next0 = n0; - *flags0 = f0; -} - -always_inline void -ixge_rx_next_and_error_from_status_x2 (ixge_device_t * xd, - u32 s00, u32 s02, - u32 s10, u32 s12, - u8 * next0, u8 * error0, u32 * flags0, - u8 * next1, u8 * error1, u32 * flags1) -{ - u8 is0_ip4, is0_ip6, n0, e0; - u8 is1_ip4, is1_ip6, n1, e1; - u32 f0, f1; - - e0 = e1 = IXGE_ERROR_none; - n0 = n1 = IXGE_RX_NEXT_IP4_INPUT; - - is0_ip4 = s02 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; - is1_ip4 = s12 & IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED; - - n0 = is0_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n0; - n1 = is1_ip4 ? IXGE_RX_NEXT_IP4_INPUT : n1; - - e0 = (is0_ip4 && (s02 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) - ? IXGE_ERROR_ip4_checksum_error : e0); - e1 = (is1_ip4 && (s12 & IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR) - ? IXGE_ERROR_ip4_checksum_error : e1); - - is0_ip6 = s00 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; - is1_ip6 = s10 & IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6; - - n0 = is0_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n0; - n1 = is1_ip6 ? IXGE_RX_NEXT_IP6_INPUT : n1; - - n0 = (xd->per_interface_next_index != ~0) ? - xd->per_interface_next_index : n0; - n1 = (xd->per_interface_next_index != ~0) ? - xd->per_interface_next_index : n1; - - /* Check for error. */ - n0 = e0 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n0; - n1 = e1 != IXGE_ERROR_none ? IXGE_RX_NEXT_DROP : n1; - - *error0 = e0; - *error1 = e1; - - *next0 = n0; - *next1 = n1; - - f0 = ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED - | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) - ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0); - f1 = ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED - | IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED)) - ? VNET_BUFFER_F_L4_CHECKSUM_COMPUTED : 0); - - f0 |= ((s02 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR - | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) - ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT); - f1 |= ((s12 & (IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR - | IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR)) - ? 0 : VNET_BUFFER_F_L4_CHECKSUM_CORRECT); - - *flags0 = f0; - *flags1 = f1; -} - -static void -ixge_rx_trace (ixge_main_t * xm, - ixge_device_t * xd, - ixge_dma_queue_t * dq, - ixge_descriptor_t * before_descriptors, - u32 * before_buffers, - ixge_descriptor_t * after_descriptors, uword n_descriptors) -{ - vlib_main_t *vm = xm->vlib_main; - vlib_node_runtime_t *node = dq->rx.node; - ixge_rx_from_hw_descriptor_t *bd; - ixge_rx_to_hw_descriptor_t *ad; - u32 *b, n_left, is_sop, next_index_sop; - - n_left = n_descriptors; - b = before_buffers; - bd = &before_descriptors->rx_from_hw; - ad = &after_descriptors->rx_to_hw; - is_sop = dq->rx.is_start_of_packet; - next_index_sop = dq->rx.saved_start_of_packet_next_index; - - while (n_left >= 2) - { - u32 bi0, bi1, flags0, flags1; - vlib_buffer_t *b0, *b1; - ixge_rx_dma_trace_t *t0, *t1; - u8 next0, error0, next1, error1; - - bi0 = b[0]; - bi1 = b[1]; - n_left -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - ixge_rx_next_and_error_from_status_x2 (xd, - bd[0].status[0], bd[0].status[2], - bd[1].status[0], bd[1].status[2], - &next0, &error0, &flags0, - &next1, &error1, &flags1); - - next_index_sop = is_sop ? next0 : next_index_sop; - vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->is_start_of_packet = is_sop; - is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - next_index_sop = is_sop ? next1 : next_index_sop; - vlib_trace_buffer (vm, node, next_index_sop, b1, /* follow_chain */ 0); - t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); - t1->is_start_of_packet = is_sop; - is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t0->queue_index = dq->queue_index; - t1->queue_index = dq->queue_index; - t0->device_index = xd->device_index; - t1->device_index = xd->device_index; - t0->before.rx_from_hw = bd[0]; - t1->before.rx_from_hw = bd[1]; - t0->after.rx_to_hw = ad[0]; - t1->after.rx_to_hw = ad[1]; - t0->buffer_index = bi0; - t1->buffer_index = bi1; - memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); - memcpy (t0->buffer.pre_data, b0->data + b0->current_data, - sizeof (t0->buffer.pre_data)); - memcpy (t1->buffer.pre_data, b1->data + b1->current_data, - sizeof (t1->buffer.pre_data)); - - b += 2; - bd += 2; - ad += 2; - } - - while (n_left >= 1) - { - u32 bi0, flags0; - vlib_buffer_t *b0; - ixge_rx_dma_trace_t *t0; - u8 next0, error0; - - bi0 = b[0]; - n_left -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - ixge_rx_next_and_error_from_status_x1 (xd, - bd[0].status[0], bd[0].status[2], - &next0, &error0, &flags0); - - next_index_sop = is_sop ? next0 : next_index_sop; - vlib_trace_buffer (vm, node, next_index_sop, b0, /* follow_chain */ 0); - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->is_start_of_packet = is_sop; - is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t0->queue_index = dq->queue_index; - t0->device_index = xd->device_index; - t0->before.rx_from_hw = bd[0]; - t0->after.rx_to_hw = ad[0]; - t0->buffer_index = bi0; - memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - memcpy (t0->buffer.pre_data, b0->data + b0->current_data, - sizeof (t0->buffer.pre_data)); - - b += 1; - bd += 1; - ad += 1; - } -} - -typedef struct -{ - ixge_tx_descriptor_t descriptor; - - u32 buffer_index; - - u16 device_index; - - u8 queue_index; - - u8 is_start_of_packet; - - /* Copy of VLIB buffer; packet data stored in pre_data. */ - vlib_buffer_t buffer; -} ixge_tx_dma_trace_t; - -static u8 * -format_ixge_tx_dma_trace (u8 * s, va_list * va) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); - ixge_tx_dma_trace_t *t = va_arg (*va, ixge_tx_dma_trace_t *); - vnet_main_t *vnm = vnet_get_main (); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, t->device_index); - format_function_t *f; - u32 indent = format_get_indent (s); - - { - vnet_sw_interface_t *sw = - vnet_get_sw_interface (vnm, xd->vlib_sw_if_index); - s = - format (s, "%U tx queue %d", format_vnet_sw_interface_name, vnm, sw, - t->queue_index); - } - - s = format (s, "\n%Udescriptor: %U", - format_white_space, indent, - format_ixge_tx_descriptor, &t->descriptor); - - s = format (s, "\n%Ubuffer 0x%x: %U", - format_white_space, indent, - t->buffer_index, format_vnet_buffer, &t->buffer); - - s = format (s, "\n%U", format_white_space, indent); - - f = format_ethernet_header_with_length; - if (!f || !t->is_start_of_packet) - f = format_hex_bytes; - s = format (s, "%U", f, t->buffer.pre_data, sizeof (t->buffer.pre_data)); - - return s; -} - -typedef struct -{ - vlib_node_runtime_t *node; - - u32 is_start_of_packet; - - u32 n_bytes_in_packet; - - ixge_tx_descriptor_t *start_of_packet_descriptor; -} ixge_tx_state_t; - -static void -ixge_tx_trace (ixge_main_t * xm, - ixge_device_t * xd, - ixge_dma_queue_t * dq, - ixge_tx_state_t * tx_state, - ixge_tx_descriptor_t * descriptors, - u32 * buffers, uword n_descriptors) -{ - vlib_main_t *vm = xm->vlib_main; - vlib_node_runtime_t *node = tx_state->node; - ixge_tx_descriptor_t *d; - u32 *b, n_left, is_sop; - - n_left = n_descriptors; - b = buffers; - d = descriptors; - is_sop = tx_state->is_start_of_packet; - - while (n_left >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - ixge_tx_dma_trace_t *t0, *t1; - - bi0 = b[0]; - bi1 = b[1]; - n_left -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->is_start_of_packet = is_sop; - is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); - t1->is_start_of_packet = is_sop; - is_sop = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t0->queue_index = dq->queue_index; - t1->queue_index = dq->queue_index; - t0->device_index = xd->device_index; - t1->device_index = xd->device_index; - t0->descriptor = d[0]; - t1->descriptor = d[1]; - t0->buffer_index = bi0; - t1->buffer_index = bi1; - memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - memcpy (&t1->buffer, b1, sizeof (b1[0]) - sizeof (b0->pre_data)); - memcpy (t0->buffer.pre_data, b0->data + b0->current_data, - sizeof (t0->buffer.pre_data)); - memcpy (t1->buffer.pre_data, b1->data + b1->current_data, - sizeof (t1->buffer.pre_data)); - - b += 2; - d += 2; - } - - while (n_left >= 1) - { - u32 bi0; - vlib_buffer_t *b0; - ixge_tx_dma_trace_t *t0; - - bi0 = b[0]; - n_left -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); - t0->is_start_of_packet = is_sop; - is_sop = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - t0->queue_index = dq->queue_index; - t0->device_index = xd->device_index; - t0->descriptor = d[0]; - t0->buffer_index = bi0; - memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data)); - memcpy (t0->buffer.pre_data, b0->data + b0->current_data, - sizeof (t0->buffer.pre_data)); - - b += 1; - d += 1; - } -} - -always_inline uword -ixge_ring_sub (ixge_dma_queue_t * q, u32 i0, u32 i1) -{ - i32 d = i1 - i0; - ASSERT (i0 < q->n_descriptors); - ASSERT (i1 < q->n_descriptors); - return d < 0 ? q->n_descriptors + d : d; -} - -always_inline uword -ixge_ring_add (ixge_dma_queue_t * q, u32 i0, u32 i1) -{ - u32 d = i0 + i1; - ASSERT (i0 < q->n_descriptors); - ASSERT (i1 < q->n_descriptors); - d -= d >= q->n_descriptors ? q->n_descriptors : 0; - return d; -} - -always_inline uword -ixge_tx_descriptor_matches_template (ixge_main_t * xm, - ixge_tx_descriptor_t * d) -{ - u32 cmp; - - cmp = ((d->status0 & xm->tx_descriptor_template_mask.status0) - ^ xm->tx_descriptor_template.status0); - if (cmp) - return 0; - cmp = ((d->status1 & xm->tx_descriptor_template_mask.status1) - ^ xm->tx_descriptor_template.status1); - if (cmp) - return 0; - - return 1; -} - -static uword -ixge_tx_no_wrap (ixge_main_t * xm, - ixge_device_t * xd, - ixge_dma_queue_t * dq, - u32 * buffers, - u32 start_descriptor_index, - u32 n_descriptors, ixge_tx_state_t * tx_state) -{ - vlib_main_t *vm = xm->vlib_main; - ixge_tx_descriptor_t *d, *d_sop; - u32 n_left = n_descriptors; - u32 *to_free = vec_end (xm->tx_buffers_pending_free); - u32 *to_tx = - vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); - u32 is_sop = tx_state->is_start_of_packet; - u32 len_sop = tx_state->n_bytes_in_packet; - u16 template_status = xm->tx_descriptor_template.status0; - u32 descriptor_prefetch_rotor = 0; - - ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); - d = &dq->descriptors[start_descriptor_index].tx; - d_sop = is_sop ? d : tx_state->start_of_packet_descriptor; - - while (n_left >= 4) - { - vlib_buffer_t *b0, *b1; - u32 bi0, fi0, len0; - u32 bi1, fi1, len1; - u8 is_eop0, is_eop1; - - /* Prefetch next iteration. */ - vlib_prefetch_buffer_with_index (vm, buffers[2], LOAD); - vlib_prefetch_buffer_with_index (vm, buffers[3], LOAD); - - if ((descriptor_prefetch_rotor & 0x3) == 0) - CLIB_PREFETCH (d + 4, CLIB_CACHE_LINE_BYTES, STORE); - - descriptor_prefetch_rotor += 2; - - bi0 = buffers[0]; - bi1 = buffers[1]; - - to_free[0] = fi0 = to_tx[0]; - to_tx[0] = bi0; - to_free += fi0 != 0; - - to_free[0] = fi1 = to_tx[1]; - to_tx[1] = bi1; - to_free += fi1 != 0; - - buffers += 2; - n_left -= 2; - to_tx += 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - is_eop1 = (b1->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - len0 = b0->current_length; - len1 = b1->current_length; - - ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); - ASSERT (ixge_tx_descriptor_matches_template (xm, d + 1)); - - d[0].buffer_address = vlib_buffer_get_pa (vm, b0); - d[1].buffer_address = vlib_buffer_get_pa (vm, b1); - - d[0].n_bytes_this_buffer = len0; - d[1].n_bytes_this_buffer = len1; - - d[0].status0 = - template_status | (is_eop0 << - IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); - d[1].status0 = - template_status | (is_eop1 << - IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); - - len_sop = (is_sop ? 0 : len_sop) + len0; - d_sop[0].status1 = - IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); - d += 1; - d_sop = is_eop0 ? d : d_sop; - - is_sop = is_eop0; - - len_sop = (is_sop ? 0 : len_sop) + len1; - d_sop[0].status1 = - IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); - d += 1; - d_sop = is_eop1 ? d : d_sop; - - is_sop = is_eop1; - } - - while (n_left > 0) - { - vlib_buffer_t *b0; - u32 bi0, fi0, len0; - u8 is_eop0; - - bi0 = buffers[0]; - - to_free[0] = fi0 = to_tx[0]; - to_tx[0] = bi0; - to_free += fi0 != 0; - - buffers += 1; - n_left -= 1; - to_tx += 1; - - b0 = vlib_get_buffer (vm, bi0); - - is_eop0 = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0; - - len0 = b0->current_length; - - ASSERT (ixge_tx_descriptor_matches_template (xm, d + 0)); - - d[0].buffer_address = vlib_buffer_get_pa (vm, b0); - d[0].n_bytes_this_buffer = len0; - - d[0].status0 = - template_status | (is_eop0 << - IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET); - - len_sop = (is_sop ? 0 : len_sop) + len0; - d_sop[0].status1 = - IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET (len_sop); - d += 1; - d_sop = is_eop0 ? d : d_sop; - - is_sop = is_eop0; - } - - if (tx_state->node->flags & VLIB_NODE_FLAG_TRACE) - { - to_tx = - vec_elt_at_index (dq->descriptor_buffer_indices, - start_descriptor_index); - ixge_tx_trace (xm, xd, dq, tx_state, - &dq->descriptors[start_descriptor_index].tx, to_tx, - n_descriptors); - } - - _vec_len (xm->tx_buffers_pending_free) = - to_free - xm->tx_buffers_pending_free; - - /* When we are done d_sop can point to end of ring. Wrap it if so. */ - { - ixge_tx_descriptor_t *d_start = &dq->descriptors[0].tx; - - ASSERT (d_sop - d_start <= dq->n_descriptors); - d_sop = d_sop - d_start == dq->n_descriptors ? d_start : d_sop; - } - - tx_state->is_start_of_packet = is_sop; - tx_state->start_of_packet_descriptor = d_sop; - tx_state->n_bytes_in_packet = len_sop; - - return n_descriptors; -} - -static uword -ixge_interface_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * f) -{ - ixge_main_t *xm = &ixge_main; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - ixge_device_t *xd = vec_elt_at_index (xm->devices, rd->dev_instance); - ixge_dma_queue_t *dq; - u32 *from, n_left_tx, n_descriptors_to_tx, n_tail_drop; - u32 queue_index = 0; /* fixme parameter */ - ixge_tx_state_t tx_state; - - tx_state.node = node; - tx_state.is_start_of_packet = 1; - tx_state.start_of_packet_descriptor = 0; - tx_state.n_bytes_in_packet = 0; - - from = vlib_frame_vector_args (f); - - dq = vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); - - dq->head_index = dq->tx.head_index_write_back[0]; - - /* Since head == tail means ring is empty we can send up to dq->n_descriptors - 1. */ - n_left_tx = dq->n_descriptors - 1; - n_left_tx -= ixge_ring_sub (dq, dq->head_index, dq->tail_index); - - _vec_len (xm->tx_buffers_pending_free) = 0; - - n_descriptors_to_tx = f->n_vectors; - n_tail_drop = 0; - if (PREDICT_FALSE (n_descriptors_to_tx > n_left_tx)) - { - i32 i, n_ok, i_eop, i_sop; - - i_sop = i_eop = ~0; - for (i = n_left_tx - 1; i >= 0; i--) - { - vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); - if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - if (i_sop != ~0 && i_eop != ~0) - break; - i_eop = i; - i_sop = i + 1; - } - } - if (i == 0) - n_ok = 0; - else - n_ok = i_eop + 1; - - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d, ring full to tx %d head %d tail %d",.format_args = - "i2i2i2i2",}; - struct - { - u16 instance, to_tx, head, tail; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->to_tx = n_descriptors_to_tx; - ed->head = dq->head_index; - ed->tail = dq->tail_index; - } - - if (n_ok < n_descriptors_to_tx) - { - n_tail_drop = n_descriptors_to_tx - n_ok; - vec_add (xm->tx_buffers_pending_free, from + n_ok, n_tail_drop); - vlib_error_count (vm, ixge_input_node.index, - IXGE_ERROR_tx_full_drops, n_tail_drop); - } - - n_descriptors_to_tx = n_ok; - } - - dq->tx.n_buffers_on_ring += n_descriptors_to_tx; - - /* Process from tail to end of descriptor ring. */ - if (n_descriptors_to_tx > 0 && dq->tail_index < dq->n_descriptors) - { - u32 n = - clib_min (dq->n_descriptors - dq->tail_index, n_descriptors_to_tx); - n = ixge_tx_no_wrap (xm, xd, dq, from, dq->tail_index, n, &tx_state); - from += n; - n_descriptors_to_tx -= n; - dq->tail_index += n; - ASSERT (dq->tail_index <= dq->n_descriptors); - if (dq->tail_index == dq->n_descriptors) - dq->tail_index = 0; - } - - if (n_descriptors_to_tx > 0) - { - u32 n = - ixge_tx_no_wrap (xm, xd, dq, from, 0, n_descriptors_to_tx, &tx_state); - from += n; - ASSERT (n == n_descriptors_to_tx); - dq->tail_index += n; - ASSERT (dq->tail_index <= dq->n_descriptors); - if (dq->tail_index == dq->n_descriptors) - dq->tail_index = 0; - } - - /* We should only get full packets. */ - ASSERT (tx_state.is_start_of_packet); - - /* Report status when last descriptor is done. */ - { - u32 i = dq->tail_index == 0 ? dq->n_descriptors - 1 : dq->tail_index - 1; - ixge_tx_descriptor_t *d = &dq->descriptors[i].tx; - d->status0 |= IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS; - } - - /* Give new descriptors to hardware. */ - { - ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_TX, queue_index); - - CLIB_MEMORY_BARRIER (); - - dr->tail_index = dq->tail_index; - } - - /* Free any buffers that are done. */ - { - u32 n = _vec_len (xm->tx_buffers_pending_free); - if (n > 0) - { - vlib_buffer_free_no_next (vm, xm->tx_buffers_pending_free, n); - _vec_len (xm->tx_buffers_pending_free) = 0; - ASSERT (dq->tx.n_buffers_on_ring >= n); - dq->tx.n_buffers_on_ring -= (n - n_tail_drop); - } - } - - return f->n_vectors; -} - -static uword -ixge_rx_queue_no_wrap (ixge_main_t * xm, - ixge_device_t * xd, - ixge_dma_queue_t * dq, - u32 start_descriptor_index, u32 n_descriptors) -{ - vlib_main_t *vm = xm->vlib_main; - vlib_node_runtime_t *node = dq->rx.node; - ixge_descriptor_t *d; - static ixge_descriptor_t *d_trace_save; - static u32 *d_trace_buffers; - u32 n_descriptors_left = n_descriptors; - u32 *to_rx = - vec_elt_at_index (dq->descriptor_buffer_indices, start_descriptor_index); - u32 *to_add; - u32 bi_sop = dq->rx.saved_start_of_packet_buffer_index; - u32 bi_last = dq->rx.saved_last_buffer_index; - u32 next_index_sop = dq->rx.saved_start_of_packet_next_index; - u32 is_sop = dq->rx.is_start_of_packet; - u32 next_index, n_left_to_next, *to_next; - u32 n_packets = 0; - u32 n_bytes = 0; - u32 n_trace = vlib_get_trace_count (vm, node); - vlib_buffer_t *b_last, b_dummy; - - ASSERT (start_descriptor_index + n_descriptors <= dq->n_descriptors); - d = &dq->descriptors[start_descriptor_index]; - - b_last = bi_last != ~0 ? vlib_get_buffer (vm, bi_last) : &b_dummy; - next_index = dq->rx.next_index; - - if (n_trace > 0) - { - u32 n = clib_min (n_trace, n_descriptors); - if (d_trace_save) - { - _vec_len (d_trace_save) = 0; - _vec_len (d_trace_buffers) = 0; - } - vec_add (d_trace_save, (ixge_descriptor_t *) d, n); - vec_add (d_trace_buffers, to_rx, n); - } - - { - uword l = vec_len (xm->rx_buffers_to_add); - - if (l < n_descriptors_left) - { - u32 n_to_alloc = 2 * dq->n_descriptors - l; - u32 n_allocated; - - vec_resize (xm->rx_buffers_to_add, n_to_alloc); - - _vec_len (xm->rx_buffers_to_add) = l; - n_allocated = - vlib_buffer_alloc (vm, xm->rx_buffers_to_add + l, n_to_alloc); - _vec_len (xm->rx_buffers_to_add) += n_allocated; - - /* Handle transient allocation failure */ - if (PREDICT_FALSE (l + n_allocated <= n_descriptors_left)) - { - if (n_allocated == 0) - vlib_error_count (vm, ixge_input_node.index, - IXGE_ERROR_rx_alloc_no_physmem, 1); - else - vlib_error_count (vm, ixge_input_node.index, - IXGE_ERROR_rx_alloc_fail, 1); - - n_descriptors_left = l + n_allocated; - } - n_descriptors = n_descriptors_left; - } - - /* Add buffers from end of vector going backwards. */ - to_add = vec_end (xm->rx_buffers_to_add) - 1; - } - - while (n_descriptors_left > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_descriptors_left >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t *b0, *b1; - vlib_buffer_t *f0, *f1; - u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; - u32 bi1, fi1, len1, l3_offset1, s21, s01, flags1; - u8 is_eop0, error0, next0; - u8 is_eop1, error1, next1; - ixge_descriptor_t d0, d1; - - vlib_prefetch_buffer_with_index (vm, to_rx[2], STORE); - vlib_prefetch_buffer_with_index (vm, to_rx[3], STORE); - - CLIB_PREFETCH (d + 2, 32, STORE); - - d0.as_u32x4 = d[0].as_u32x4; - d1.as_u32x4 = d[1].as_u32x4; - - s20 = d0.rx_from_hw.status[2]; - s21 = d1.rx_from_hw.status[2]; - - s00 = d0.rx_from_hw.status[0]; - s01 = d1.rx_from_hw.status[0]; - - if (! - ((s20 & s21) & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) - goto found_hw_owned_descriptor_x2; - - bi0 = to_rx[0]; - bi1 = to_rx[1]; - - ASSERT (to_add - 1 >= xm->rx_buffers_to_add); - fi0 = to_add[0]; - fi1 = to_add[-1]; - - to_rx[0] = fi0; - to_rx[1] = fi1; - to_rx += 2; - to_add -= 2; - -#if 0 - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (bi0)); - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (bi1)); - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (fi0)); - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (fi1)); -#endif - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... See main.c... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); - - CLIB_PREFETCH (b0->data, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (b1->data, CLIB_CACHE_LINE_BYTES, LOAD); - - is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; - is_eop1 = (s21 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; - - ixge_rx_next_and_error_from_status_x2 (xd, s00, s20, s01, s21, - &next0, &error0, &flags0, - &next1, &error1, &flags1); - - next0 = is_sop ? next0 : next_index_sop; - next1 = is_eop0 ? next1 : next0; - next_index_sop = next1; - - b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); - b1->flags |= flags1 | (!is_eop1 << VLIB_BUFFER_LOG2_NEXT_PRESENT); - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - b0->error = node->errors[error0]; - b1->error = node->errors[error1]; - - len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; - len1 = d1.rx_from_hw.n_packet_bytes_this_descriptor; - n_bytes += len0 + len1; - n_packets += is_eop0 + is_eop1; - - /* Give new buffers to hardware. */ - f0 = vlib_get_buffer (vm, fi0); - f1 = vlib_get_buffer (vm, fi1); - d0.rx_to_hw.tail_address = vlib_buffer_get_pa (vm, f0); - d1.rx_to_hw.tail_address = vlib_buffer_get_pa (vm, f1); - d0.rx_to_hw.head_address = d[0].rx_to_hw.tail_address; - d1.rx_to_hw.head_address = d[1].rx_to_hw.tail_address; - d[0].as_u32x4 = d0.as_u32x4; - d[1].as_u32x4 = d1.as_u32x4; - - d += 2; - n_descriptors_left -= 2; - - /* Point to either l2 or l3 header depending on next. */ - l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) - ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; - l3_offset1 = (is_eop0 && (next1 != IXGE_RX_NEXT_ETHERNET_INPUT)) - ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s01) : 0; - - b0->current_length = len0 - l3_offset0; - b1->current_length = len1 - l3_offset1; - b0->current_data = l3_offset0; - b1->current_data = l3_offset1; - - b_last->next_buffer = is_sop ? ~0 : bi0; - b0->next_buffer = is_eop0 ? ~0 : bi1; - bi_last = bi1; - b_last = b1; - - if (CLIB_DEBUG > 0) - { - u32 bi_sop0 = is_sop ? bi0 : bi_sop; - u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; - - if (is_eop0) - { - u8 *msg = vlib_validate_buffer (vm, bi_sop0, - /* follow_buffer_next */ 1); - ASSERT (!msg); - } - if (is_eop1) - { - u8 *msg = vlib_validate_buffer (vm, bi_sop1, - /* follow_buffer_next */ 1); - ASSERT (!msg); - } - } - if (0) /* "Dave" version */ - { - u32 bi_sop0 = is_sop ? bi0 : bi_sop; - u32 bi_sop1 = is_eop0 ? bi1 : bi_sop0; - - if (is_eop0) - { - to_next[0] = bi_sop0; - to_next++; - n_left_to_next--; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi_sop0, next0); - } - if (is_eop1) - { - to_next[0] = bi_sop1; - to_next++; - n_left_to_next--; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi_sop1, next1); - } - is_sop = is_eop1; - bi_sop = bi_sop1; - } - if (1) /* "Eliot" version */ - { - /* Speculatively enqueue to cached next. */ - u8 saved_is_sop = is_sop; - u32 bi_sop_save = bi_sop; - - bi_sop = saved_is_sop ? bi0 : bi_sop; - to_next[0] = bi_sop; - to_next += is_eop0; - n_left_to_next -= is_eop0; - - bi_sop = is_eop0 ? bi1 : bi_sop; - to_next[0] = bi_sop; - to_next += is_eop1; - n_left_to_next -= is_eop1; - - is_sop = is_eop1; - - if (PREDICT_FALSE - (!(next0 == next_index && next1 == next_index))) - { - /* Undo speculation. */ - to_next -= is_eop0 + is_eop1; - n_left_to_next += is_eop0 + is_eop1; - - /* Re-do both descriptors being careful about where we enqueue. */ - bi_sop = saved_is_sop ? bi0 : bi_sop_save; - if (is_eop0) - { - if (next0 != next_index) - vlib_set_next_frame_buffer (vm, node, next0, bi_sop); - else - { - to_next[0] = bi_sop; - to_next += 1; - n_left_to_next -= 1; - } - } - - bi_sop = is_eop0 ? bi1 : bi_sop; - if (is_eop1) - { - if (next1 != next_index) - vlib_set_next_frame_buffer (vm, node, next1, bi_sop); - else - { - to_next[0] = bi_sop; - to_next += 1; - n_left_to_next -= 1; - } - } - - /* Switch cached next index when next for both packets is the same. */ - if (is_eop0 && is_eop1 && next0 == next1) - { - vlib_put_next_frame (vm, node, next_index, - n_left_to_next); - next_index = next0; - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - } - } - } - } - - /* Bail out of dual loop and proceed with single loop. */ - found_hw_owned_descriptor_x2: - - while (n_descriptors_left > 0 && n_left_to_next > 0) - { - vlib_buffer_t *b0; - vlib_buffer_t *f0; - u32 bi0, fi0, len0, l3_offset0, s20, s00, flags0; - u8 is_eop0, error0, next0; - ixge_descriptor_t d0; - - d0.as_u32x4 = d[0].as_u32x4; - - s20 = d0.rx_from_hw.status[2]; - s00 = d0.rx_from_hw.status[0]; - - if (!(s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE)) - goto found_hw_owned_descriptor_x1; - - bi0 = to_rx[0]; - ASSERT (to_add >= xm->rx_buffers_to_add); - fi0 = to_add[0]; - - to_rx[0] = fi0; - to_rx += 1; - to_add -= 1; - -#if 0 - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (bi0)); - ASSERT (VLIB_BUFFER_KNOWN_ALLOCATED == vlib_buffer_is_known (fi0)); -#endif - - b0 = vlib_get_buffer (vm, bi0); - - /* - * Turn this on if you run into - * "bad monkey" contexts, and you want to know exactly - * which nodes they've visited... - */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - - is_eop0 = (s20 & IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET) != 0; - ixge_rx_next_and_error_from_status_x1 - (xd, s00, s20, &next0, &error0, &flags0); - - next0 = is_sop ? next0 : next_index_sop; - next_index_sop = next0; - - b0->flags |= flags0 | (!is_eop0 << VLIB_BUFFER_LOG2_NEXT_PRESENT); - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - - b0->error = node->errors[error0]; - - len0 = d0.rx_from_hw.n_packet_bytes_this_descriptor; - n_bytes += len0; - n_packets += is_eop0; - - /* Give new buffer to hardware. */ - f0 = vlib_get_buffer (vm, fi0); - d0.rx_to_hw.tail_address = vlib_buffer_get_pa (vm, f0); - d0.rx_to_hw.head_address = d0.rx_to_hw.tail_address; - d[0].as_u32x4 = d0.as_u32x4; - - d += 1; - n_descriptors_left -= 1; - - /* Point to either l2 or l3 header depending on next. */ - l3_offset0 = (is_sop && (next0 != IXGE_RX_NEXT_ETHERNET_INPUT)) - ? IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET (s00) : 0; - b0->current_length = len0 - l3_offset0; - b0->current_data = l3_offset0; - - b_last->next_buffer = is_sop ? ~0 : bi0; - bi_last = bi0; - b_last = b0; - - bi_sop = is_sop ? bi0 : bi_sop; - - if (CLIB_DEBUG > 0 && is_eop0) - { - u8 *msg = - vlib_validate_buffer (vm, bi_sop, /* follow_buffer_next */ 1); - ASSERT (!msg); - } - - if (0) /* "Dave" version */ - { - if (is_eop0) - { - to_next[0] = bi_sop; - to_next++; - n_left_to_next--; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi_sop, next0); - } - } - if (1) /* "Eliot" version */ - { - if (PREDICT_TRUE (next0 == next_index)) - { - to_next[0] = bi_sop; - to_next += is_eop0; - n_left_to_next -= is_eop0; - } - else - { - if (next0 != next_index && is_eop0) - vlib_set_next_frame_buffer (vm, node, next0, bi_sop); - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - next_index = next0; - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - } - } - is_sop = is_eop0; - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - -found_hw_owned_descriptor_x1: - if (n_descriptors_left > 0) - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - - _vec_len (xm->rx_buffers_to_add) = (to_add + 1) - xm->rx_buffers_to_add; - - { - u32 n_done = n_descriptors - n_descriptors_left; - - if (n_trace > 0 && n_done > 0) - { - u32 n = clib_min (n_trace, n_done); - ixge_rx_trace (xm, xd, dq, - d_trace_save, - d_trace_buffers, - &dq->descriptors[start_descriptor_index], n); - vlib_set_trace_count (vm, node, n_trace - n); - } - if (d_trace_save) - { - _vec_len (d_trace_save) = 0; - _vec_len (d_trace_buffers) = 0; - } - - /* Don't keep a reference to b_last if we don't have to. - Otherwise we can over-write a next_buffer pointer after already haven - enqueued a packet. */ - if (is_sop) - { - b_last->next_buffer = ~0; - bi_last = ~0; - } - - dq->rx.n_descriptors_done_this_call = n_done; - dq->rx.n_descriptors_done_total += n_done; - dq->rx.is_start_of_packet = is_sop; - dq->rx.saved_start_of_packet_buffer_index = bi_sop; - dq->rx.saved_last_buffer_index = bi_last; - dq->rx.saved_start_of_packet_next_index = next_index_sop; - dq->rx.next_index = next_index; - dq->rx.n_bytes += n_bytes; - - return n_packets; - } -} - -static uword -ixge_rx_queue (ixge_main_t * xm, - ixge_device_t * xd, - vlib_node_runtime_t * node, u32 queue_index) -{ - ixge_dma_queue_t *dq = - vec_elt_at_index (xd->dma_queues[VLIB_RX], queue_index); - ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, dq->queue_index); - uword n_packets = 0; - u32 hw_head_index, sw_head_index; - - /* One time initialization. */ - if (!dq->rx.node) - { - dq->rx.node = node; - dq->rx.is_start_of_packet = 1; - dq->rx.saved_start_of_packet_buffer_index = ~0; - dq->rx.saved_last_buffer_index = ~0; - } - - dq->rx.next_index = node->cached_next_index; - - dq->rx.n_descriptors_done_total = 0; - dq->rx.n_descriptors_done_this_call = 0; - dq->rx.n_bytes = 0; - - /* Fetch head from hardware and compare to where we think we are. */ - hw_head_index = dr->head_index; - sw_head_index = dq->head_index; - - if (hw_head_index == sw_head_index) - goto done; - - if (hw_head_index < sw_head_index) - { - u32 n_tried = dq->n_descriptors - sw_head_index; - n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); - sw_head_index = - ixge_ring_add (dq, sw_head_index, - dq->rx.n_descriptors_done_this_call); - - if (dq->rx.n_descriptors_done_this_call != n_tried) - goto done; - } - if (hw_head_index >= sw_head_index) - { - u32 n_tried = hw_head_index - sw_head_index; - n_packets += ixge_rx_queue_no_wrap (xm, xd, dq, sw_head_index, n_tried); - sw_head_index = - ixge_ring_add (dq, sw_head_index, - dq->rx.n_descriptors_done_this_call); - } - -done: - dq->head_index = sw_head_index; - dq->tail_index = - ixge_ring_add (dq, dq->tail_index, dq->rx.n_descriptors_done_total); - - /* Give tail back to hardware. */ - CLIB_MEMORY_BARRIER (); - - dr->tail_index = dq->tail_index; - - vlib_increment_combined_counter (vnet_main. - interface_main.combined_sw_if_counters + - VNET_INTERFACE_COUNTER_RX, - 0 /* thread_index */ , - xd->vlib_sw_if_index, n_packets, - dq->rx.n_bytes); - - return n_packets; -} - -static void -ixge_interrupt (ixge_main_t * xm, ixge_device_t * xd, u32 i) -{ - vlib_main_t *vm = xm->vlib_main; - ixge_regs_t *r = xd->regs; - - if (i != 20) - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d, %s",.format_args = "i1t1",.n_enum_strings = - 16,.enum_strings = - { - "flow director", - "rx miss", - "pci exception", - "mailbox", - "link status change", - "linksec key exchange", - "manageability event", - "reserved23", - "sdp0", - "sdp1", - "sdp2", - "sdp3", - "ecc", "descriptor handler error", "tcp timer", "other",},}; - struct - { - u8 instance; - u8 index; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->index = i - 16; - } - else - { - u32 v = r->xge_mac.link_status; - uword is_up = (v & (1 << 30)) != 0; - - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d, link status change 0x%x",.format_args = "i4i4",}; - struct - { - u32 instance, link_status; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->link_status = v; - xd->link_status_at_last_link_change = v; - - vlib_process_signal_event (vm, ixge_process_node.index, - EVENT_SET_FLAGS, - ((is_up << 31) | xd->vlib_hw_if_index)); - } -} - -always_inline u32 -clean_block (u32 * b, u32 * t, u32 n_left) -{ - u32 *t0 = t; - - while (n_left >= 4) - { - u32 bi0, bi1, bi2, bi3; - - t[0] = bi0 = b[0]; - b[0] = 0; - t += bi0 != 0; - - t[0] = bi1 = b[1]; - b[1] = 0; - t += bi1 != 0; - - t[0] = bi2 = b[2]; - b[2] = 0; - t += bi2 != 0; - - t[0] = bi3 = b[3]; - b[3] = 0; - t += bi3 != 0; - - b += 4; - n_left -= 4; - } - - while (n_left > 0) - { - u32 bi0; - - t[0] = bi0 = b[0]; - b[0] = 0; - t += bi0 != 0; - b += 1; - n_left -= 1; - } - - return t - t0; -} - -static void -ixge_tx_queue (ixge_main_t * xm, ixge_device_t * xd, u32 queue_index) -{ - vlib_main_t *vm = xm->vlib_main; - ixge_dma_queue_t *dq = - vec_elt_at_index (xd->dma_queues[VLIB_TX], queue_index); - u32 n_clean, *b, *t, *t0; - i32 n_hw_owned_descriptors; - i32 first_to_clean, last_to_clean; - u64 hwbp_race = 0; - - /* Handle case where head write back pointer update - * arrives after the interrupt during high PCI bus loads. - */ - while ((dq->head_index == dq->tx.head_index_write_back[0]) && - dq->tx.n_buffers_on_ring && (dq->head_index != dq->tail_index)) - { - hwbp_race++; - if (IXGE_HWBP_RACE_ELOG && (hwbp_race == 1)) - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d tx head index race: head %4d, tail %4d, buffs %4d",.format_args - = "i4i4i4i4",}; - struct - { - u32 instance, head_index, tail_index, n_buffers_on_ring; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->head_index = dq->head_index; - ed->tail_index = dq->tail_index; - ed->n_buffers_on_ring = dq->tx.n_buffers_on_ring; - } - } - - dq->head_index = dq->tx.head_index_write_back[0]; - n_hw_owned_descriptors = ixge_ring_sub (dq, dq->head_index, dq->tail_index); - ASSERT (dq->tx.n_buffers_on_ring >= n_hw_owned_descriptors); - n_clean = dq->tx.n_buffers_on_ring - n_hw_owned_descriptors; - - if (IXGE_HWBP_RACE_ELOG && hwbp_race) - { - ELOG_TYPE_DECLARE (e) = - { - .function = (char *) __FUNCTION__,.format = - "ixge %d tx head index race: head %4d, hw_owned %4d, n_clean %4d, retries %d",.format_args - = "i4i4i4i4i4",}; - struct - { - u32 instance, head_index, n_hw_owned_descriptors, n_clean, retries; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->instance = xd->device_index; - ed->head_index = dq->head_index; - ed->n_hw_owned_descriptors = n_hw_owned_descriptors; - ed->n_clean = n_clean; - ed->retries = hwbp_race; - } - - /* - * This function used to wait until hardware owned zero descriptors. - * At high PPS rates, that doesn't happen until the TX ring is - * completely full of descriptors which need to be cleaned up. - * That, in turn, causes TX ring-full drops and/or long RX service - * interruptions. - */ - if (n_clean == 0) - return; - - /* Clean the n_clean descriptors prior to the reported hardware head */ - last_to_clean = dq->head_index - 1; - last_to_clean = (last_to_clean < 0) ? last_to_clean + dq->n_descriptors : - last_to_clean; - - first_to_clean = (last_to_clean) - (n_clean - 1); - first_to_clean = (first_to_clean < 0) ? first_to_clean + dq->n_descriptors : - first_to_clean; - - vec_resize (xm->tx_buffers_pending_free, dq->n_descriptors - 1); - t0 = t = xm->tx_buffers_pending_free; - b = dq->descriptor_buffer_indices + first_to_clean; - - /* Wrap case: clean from first to end, then start to last */ - if (first_to_clean > last_to_clean) - { - t += clean_block (b, t, (dq->n_descriptors - 1) - first_to_clean); - first_to_clean = 0; - b = dq->descriptor_buffer_indices; - } - - /* Typical case: clean from first to last */ - if (first_to_clean <= last_to_clean) - t += clean_block (b, t, (last_to_clean - first_to_clean) + 1); - - if (t > t0) - { - u32 n = t - t0; - vlib_buffer_free_no_next (vm, t0, n); - ASSERT (dq->tx.n_buffers_on_ring >= n); - dq->tx.n_buffers_on_ring -= n; - _vec_len (xm->tx_buffers_pending_free) = 0; - } -} - -/* RX queue interrupts 0 thru 7; TX 8 thru 15. */ -always_inline uword -ixge_interrupt_is_rx_queue (uword i) -{ - return i < 8; -} - -always_inline uword -ixge_interrupt_is_tx_queue (uword i) -{ - return i >= 8 && i < 16; -} - -always_inline uword -ixge_tx_queue_to_interrupt (uword i) -{ - return 8 + i; -} - -always_inline uword -ixge_rx_queue_to_interrupt (uword i) -{ - return 0 + i; -} - -always_inline uword -ixge_interrupt_rx_queue (uword i) -{ - ASSERT (ixge_interrupt_is_rx_queue (i)); - return i - 0; -} - -always_inline uword -ixge_interrupt_tx_queue (uword i) -{ - ASSERT (ixge_interrupt_is_tx_queue (i)); - return i - 8; -} - -static uword -ixge_device_input (ixge_main_t * xm, - ixge_device_t * xd, vlib_node_runtime_t * node) -{ - ixge_regs_t *r = xd->regs; - u32 i, s; - uword n_rx_packets = 0; - - s = r->interrupt.status_write_1_to_set; - if (s) - r->interrupt.status_write_1_to_clear = s; - - /* *INDENT-OFF* */ - foreach_set_bit (i, s, ({ - if (ixge_interrupt_is_rx_queue (i)) - n_rx_packets += ixge_rx_queue (xm, xd, node, ixge_interrupt_rx_queue (i)); - - else if (ixge_interrupt_is_tx_queue (i)) - ixge_tx_queue (xm, xd, ixge_interrupt_tx_queue (i)); - - else - ixge_interrupt (xm, xd, i); - })); - /* *INDENT-ON* */ - - return n_rx_packets; -} - -static uword -ixge_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) -{ - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd; - uword n_rx_packets = 0; - - if (node->state == VLIB_NODE_STATE_INTERRUPT) - { - uword i; - - /* Loop over devices with interrupts. */ - /* *INDENT-OFF* */ - foreach_set_bit (i, node->runtime_data[0], ({ - xd = vec_elt_at_index (xm->devices, i); - n_rx_packets += ixge_device_input (xm, xd, node); - - /* Re-enable interrupts since we're going to stay in interrupt mode. */ - if (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) - xd->regs->interrupt.enable_write_1_to_set = ~0; - })); - /* *INDENT-ON* */ - - /* Clear mask of devices with pending interrupts. */ - node->runtime_data[0] = 0; - } - else - { - /* Poll all devices for input/interrupts. */ - vec_foreach (xd, xm->devices) - { - n_rx_packets += ixge_device_input (xm, xd, node); - - /* Re-enable interrupts when switching out of polling mode. */ - if (node->flags & - VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) - xd->regs->interrupt.enable_write_1_to_set = ~0; - } - } - - return n_rx_packets; -} - -static char *ixge_error_strings[] = { -#define _(n,s) s, - foreach_ixge_error -#undef _ -}; - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ixge_input_node, static) = { - .function = ixge_input, - .type = VLIB_NODE_TYPE_INPUT, - .name = "ixge-input", - .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, - - /* Will be enabled if/when hardware is detected. */ - .state = VLIB_NODE_STATE_DISABLED, - - .format_buffer = format_ethernet_header_with_length, - .format_trace = format_ixge_rx_dma_trace, - - .n_errors = IXGE_N_ERROR, - .error_strings = ixge_error_strings, - - .n_next_nodes = IXGE_RX_N_NEXT, - .next_nodes = { - [IXGE_RX_NEXT_DROP] = "error-drop", - [IXGE_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", - [IXGE_RX_NEXT_IP4_INPUT] = "ip4-input", - [IXGE_RX_NEXT_IP6_INPUT] = "ip6-input", - }, -}; - -/* *INDENT-ON* */ - -static u8 * -format_ixge_device_name (u8 * s, va_list * args) -{ - vlib_main_t *vm = vlib_get_main (); - u32 i = va_arg (*args, u32); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, i); - vlib_pci_addr_t *addr = vlib_pci_get_addr (vm, xd->pci_dev_handle); - return format (s, "TenGigabitEthernet%x/%x/%x/%x", - addr->domain, addr->bus, addr->slot, addr->function); -} - -#define IXGE_COUNTER_IS_64_BIT (1 << 0) -#define IXGE_COUNTER_NOT_CLEAR_ON_READ (1 << 1) - -static u8 ixge_counter_flags[] = { -#define _(a,f) 0, -#define _64(a,f) IXGE_COUNTER_IS_64_BIT, - foreach_ixge_counter -#undef _ -#undef _64 -}; - -static void -ixge_update_counters (ixge_device_t * xd) -{ - /* Byte offset for counter registers. */ - static u32 reg_offsets[] = { -#define _(a,f) (a) / sizeof (u32), -#define _64(a,f) _(a,f) - foreach_ixge_counter -#undef _ -#undef _64 - }; - volatile u32 *r = (volatile u32 *) xd->regs; - int i; - - for (i = 0; i < ARRAY_LEN (xd->counters); i++) - { - u32 o = reg_offsets[i]; - xd->counters[i] += r[o]; - if (ixge_counter_flags[i] & IXGE_COUNTER_NOT_CLEAR_ON_READ) - r[o] = 0; - if (ixge_counter_flags[i] & IXGE_COUNTER_IS_64_BIT) - xd->counters[i] += (u64) r[o + 1] << (u64) 32; - } -} - -static u8 * -format_ixge_device_id (u8 * s, va_list * args) -{ - u32 device_id = va_arg (*args, u32); - char *t = 0; - switch (device_id) - { -#define _(f,n) case n: t = #f; break; - foreach_ixge_pci_device_id; -#undef _ - default: - t = 0; - break; - } - if (t == 0) - s = format (s, "unknown 0x%x", device_id); - else - s = format (s, "%s", t); - return s; -} - -static u8 * -format_ixge_link_status (u8 * s, va_list * args) -{ - ixge_device_t *xd = va_arg (*args, ixge_device_t *); - u32 v = xd->link_status_at_last_link_change; - - s = format (s, "%s", (v & (1 << 30)) ? "up" : "down"); - - { - char *modes[] = { - "1g", "10g parallel", "10g serial", "autoneg", - }; - char *speeds[] = { - "unknown", "100m", "1g", "10g", - }; - s = format (s, ", mode %s, speed %s", - modes[(v >> 26) & 3], speeds[(v >> 28) & 3]); - } - - return s; -} - -static u8 * -format_ixge_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - CLIB_UNUSED (int verbose) = va_arg (*args, int); - vlib_main_t *vm = vlib_get_main (); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, dev_instance); - ixge_phy_t *phy = xd->phys + xd->phy_index; - u32 indent = format_get_indent (s); - - ixge_update_counters (xd); - xd->link_status_at_last_link_change = xd->regs->xge_mac.link_status; - - s = format (s, "Intel 8259X: id %U\n%Ulink %U", - format_ixge_device_id, xd->device_id, - format_white_space, indent + 2, format_ixge_link_status, xd); - - { - - vlib_pci_addr_t *addr = vlib_pci_get_addr (vm, xd->pci_dev_handle); - vlib_pci_device_info_t *d = vlib_pci_get_device_info (vm, addr, 0); - - if (d) - s = format (s, "\n%UPCIe %U", format_white_space, indent + 2, - format_vlib_pci_link_speed, d); - } - - s = format (s, "\n%U", format_white_space, indent + 2); - if (phy->mdio_address != ~0) - s = format (s, "PHY address %d, id 0x%x", phy->mdio_address, phy->id); - else if (xd->sfp_eeprom.id == SFP_ID_SFP) - s = format (s, "SFP %U", format_sfp_eeprom, &xd->sfp_eeprom); - else - s = format (s, "PHY not found"); - - /* FIXME */ - { - ixge_dma_queue_t *dq = vec_elt_at_index (xd->dma_queues[VLIB_RX], 0); - ixge_dma_regs_t *dr = get_dma_regs (xd, VLIB_RX, 0); - u32 hw_head_index = dr->head_index; - u32 sw_head_index = dq->head_index; - u32 nitems; - - nitems = ixge_ring_sub (dq, hw_head_index, sw_head_index); - s = format (s, "\n%U%d unprocessed, %d total buffers on rx queue 0 ring", - format_white_space, indent + 2, nitems, dq->n_descriptors); - - s = format (s, "\n%U%d buffers in driver rx cache", - format_white_space, indent + 2, - vec_len (xm->rx_buffers_to_add)); - - s = format (s, "\n%U%d buffers on tx queue 0 ring", - format_white_space, indent + 2, - xd->dma_queues[VLIB_TX][0].tx.n_buffers_on_ring); - } - { - u32 i; - u64 v; - static char *names[] = { -#define _(a,f) #f, -#define _64(a,f) _(a,f) - foreach_ixge_counter -#undef _ -#undef _64 - }; - - for (i = 0; i < ARRAY_LEN (names); i++) - { - v = xd->counters[i] - xd->counters_last_clear[i]; - if (v != 0) - s = format (s, "\n%U%-40U%16Ld", - format_white_space, indent + 2, - format_c_identifier, names[i], v); - } - } - - return s; -} - -static void -ixge_clear_hw_interface_counters (u32 instance) -{ - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd = vec_elt_at_index (xm->devices, instance); - ixge_update_counters (xd); - memcpy (xd->counters_last_clear, xd->counters, sizeof (xd->counters)); -} - -/* - * Dynamically redirect all pkts from a specific interface - * to the specified node - */ -static void -ixge_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) -{ - ixge_main_t *xm = &ixge_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - ixge_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance); - - /* Shut off redirection */ - if (node_index == ~0) - { - xd->per_interface_next_index = node_index; - return; - } - - xd->per_interface_next_index = - vlib_node_add_next (xm->vlib_main, ixge_input_node.index, node_index); -} - - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (ixge_device_class) = { - .name = "ixge", - .tx_function = ixge_interface_tx, - .format_device_name = format_ixge_device_name, - .format_device = format_ixge_device, - .format_tx_trace = format_ixge_tx_dma_trace, - .clear_counters = ixge_clear_hw_interface_counters, - .admin_up_down_function = ixge_interface_admin_up_down, - .rx_redirect_to_node = ixge_set_interface_next_node, -}; -/* *INDENT-ON* */ - -#define IXGE_N_BYTES_IN_RX_BUFFER (2048) // DAW-HACK: Set Rx buffer size so all packets < ETH_MTU_SIZE fit in the buffer (i.e. sop & eop for all descriptors). - -static clib_error_t * -ixge_dma_init (ixge_device_t * xd, vlib_rx_or_tx_t rt, u32 queue_index) -{ - ixge_main_t *xm = &ixge_main; - vlib_main_t *vm = xm->vlib_main; - ixge_dma_queue_t *dq; - clib_error_t *error = 0; - - vec_validate (xd->dma_queues[rt], queue_index); - dq = vec_elt_at_index (xd->dma_queues[rt], queue_index); - - if (!xm->n_descriptors_per_cache_line) - xm->n_descriptors_per_cache_line = - CLIB_CACHE_LINE_BYTES / sizeof (dq->descriptors[0]); - - if (!xm->n_bytes_in_rx_buffer) - xm->n_bytes_in_rx_buffer = IXGE_N_BYTES_IN_RX_BUFFER; - xm->n_bytes_in_rx_buffer = round_pow2 (xm->n_bytes_in_rx_buffer, 1024); - - if (!xm->n_descriptors[rt]) - xm->n_descriptors[rt] = 4 * VLIB_FRAME_SIZE; - - dq->queue_index = queue_index; - dq->n_descriptors = - round_pow2 (xm->n_descriptors[rt], xm->n_descriptors_per_cache_line); - dq->head_index = dq->tail_index = 0; - - dq->descriptors = vlib_physmem_alloc_aligned (vm, dq->n_descriptors * - sizeof (dq->descriptors[0]), - 128 /* per chip spec */ ); - if (!dq->descriptors) - return vlib_physmem_last_error (vm); - - clib_memset (dq->descriptors, 0, - dq->n_descriptors * sizeof (dq->descriptors[0])); - vec_resize (dq->descriptor_buffer_indices, dq->n_descriptors); - - if (rt == VLIB_RX) - { - u32 n_alloc, i; - - n_alloc = vlib_buffer_alloc (vm, dq->descriptor_buffer_indices, - vec_len (dq->descriptor_buffer_indices)); - ASSERT (n_alloc == vec_len (dq->descriptor_buffer_indices)); - for (i = 0; i < n_alloc; i++) - { - dq->descriptors[i].rx_to_hw.tail_address = - vlib_buffer_get_pa - (vm, vlib_get_buffer (vm, dq->descriptor_buffer_indices[i])); - } - } - else - { - u32 i; - - dq->tx.head_index_write_back = - vlib_physmem_alloc (vm, CLIB_CACHE_LINE_BYTES); - if (!dq->tx.head_index_write_back) - return vlib_physmem_last_error (vm); - - for (i = 0; i < dq->n_descriptors; i++) - dq->descriptors[i].tx = xm->tx_descriptor_template; - - vec_validate (xm->tx_buffers_pending_free, dq->n_descriptors - 1); - } - - { - ixge_dma_regs_t *dr = get_dma_regs (xd, rt, queue_index); - u64 a; - - a = vlib_physmem_get_pa (vm, dq->descriptors); - dr->descriptor_address[0] = a & 0xFFFFFFFF; - dr->descriptor_address[1] = a >> (u64) 32; - dr->n_descriptor_bytes = dq->n_descriptors * sizeof (dq->descriptors[0]); - dq->head_index = dq->tail_index = 0; - - if (rt == VLIB_RX) - { - ASSERT ((xm->n_bytes_in_rx_buffer / 1024) < 32); - dr->rx_split_control = - ( /* buffer size */ ((xm->n_bytes_in_rx_buffer / 1024) << 0) - | ( /* lo free descriptor threshold (units of 64 descriptors) */ - (1 << 22)) | ( /* descriptor type: advanced one buffer */ - (1 << 25)) | ( /* drop if no descriptors available */ - (1 << 28))); - - /* Give hardware all but last 16 cache lines' worth of descriptors. */ - dq->tail_index = dq->n_descriptors - - 16 * xm->n_descriptors_per_cache_line; - } - else - { - /* Make sure its initialized before hardware can get to it. */ - dq->tx.head_index_write_back[0] = dq->head_index; - - a = vlib_physmem_get_pa (vm, dq->tx.head_index_write_back); - dr->tx.head_index_write_back_address[0] = /* enable bit */ 1 | a; - dr->tx.head_index_write_back_address[1] = (u64) a >> (u64) 32; - } - - /* DMA on 82599 does not work with [13] rx data write relaxed ordering - and [12] undocumented set. */ - if (rt == VLIB_RX) - dr->dca_control &= ~((1 << 13) | (1 << 12)); - - CLIB_MEMORY_BARRIER (); - - if (rt == VLIB_TX) - { - xd->regs->tx_dma_control |= (1 << 0); - dr->control |= ((32 << 0) /* prefetch threshold */ - | (64 << 8) /* host threshold */ - | (0 << 16) /* writeback threshold */ ); - } - - /* Enable this queue and wait for hardware to initialize - before adding to tail. */ - if (rt == VLIB_TX) - { - dr->control |= 1 << 25; - while (!(dr->control & (1 << 25))) - ; - } - - /* Set head/tail indices and enable DMA. */ - dr->head_index = dq->head_index; - dr->tail_index = dq->tail_index; - } - - return error; -} - -static u32 -ixge_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags) -{ - ixge_device_t *xd; - ixge_regs_t *r; - u32 old; - ixge_main_t *xm = &ixge_main; - - xd = vec_elt_at_index (xm->devices, hw->dev_instance); - r = xd->regs; - - old = r->filter_control; - - if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) - r->filter_control = old | (1 << 9) /* unicast promiscuous */ ; - else - r->filter_control = old & ~(1 << 9); - - return old; -} - -static void -ixge_device_init (ixge_main_t * xm) -{ - vnet_main_t *vnm = vnet_get_main (); - ixge_device_t *xd; - - /* Reset chip(s). */ - vec_foreach (xd, xm->devices) - { - ixge_regs_t *r = xd->regs; - const u32 reset_bit = (1 << 26) | (1 << 3); - - r->control |= reset_bit; - - /* No need to suspend. Timed to take ~1e-6 secs */ - while (r->control & reset_bit) - ; - - /* Software loaded. */ - r->extended_control |= (1 << 28); - - ixge_phy_init (xd); - - /* Register ethernet interface. */ - { - u8 addr8[6]; - u32 i, addr32[2]; - clib_error_t *error; - - addr32[0] = r->rx_ethernet_address0[0][0]; - addr32[1] = r->rx_ethernet_address0[0][1]; - for (i = 0; i < 6; i++) - addr8[i] = addr32[i / 4] >> ((i % 4) * 8); - - error = ethernet_register_interface - (vnm, ixge_device_class.index, xd->device_index, - /* ethernet address */ addr8, - &xd->vlib_hw_if_index, ixge_flag_change); - if (error) - clib_error_report (error); - } - - { - vnet_sw_interface_t *sw = - vnet_get_hw_sw_interface (vnm, xd->vlib_hw_if_index); - xd->vlib_sw_if_index = sw->sw_if_index; - } - - ixge_dma_init (xd, VLIB_RX, /* queue_index */ 0); - - xm->n_descriptors[VLIB_TX] = 20 * VLIB_FRAME_SIZE; - - ixge_dma_init (xd, VLIB_TX, /* queue_index */ 0); - - /* RX/TX queue 0 gets mapped to interrupt bits 0 & 8. */ - r->interrupt.queue_mapping[0] = (( /* valid bit */ (1 << 7) | - ixge_rx_queue_to_interrupt (0)) << 0); - - r->interrupt.queue_mapping[0] |= (( /* valid bit */ (1 << 7) | - ixge_tx_queue_to_interrupt (0)) << 8); - - /* No use in getting too many interrupts. - Limit them to one every 3/4 ring size at line rate - min sized packets. - No need for this since kernel/vlib main loop provides adequate interrupt - limiting scheme. */ - if (0) - { - f64 line_rate_max_pps = - 10e9 / (8 * (64 + /* interframe padding */ 20)); - ixge_throttle_queue_interrupt (r, 0, - .75 * xm->n_descriptors[VLIB_RX] / - line_rate_max_pps); - } - - /* Accept all multicast and broadcast packets. Should really add them - to the dst_ethernet_address register array. */ - r->filter_control |= (1 << 10) | (1 << 8); - - /* Enable frames up to size in mac frame size register. */ - r->xge_mac.control |= 1 << 2; - r->xge_mac.rx_max_frame_size = (9216 + 14) << 16; - - /* Enable all interrupts. */ - if (!IXGE_ALWAYS_POLL) - r->interrupt.enable_write_1_to_set = ~0; - } -} - -static uword -ixge_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - vnet_main_t *vnm = vnet_get_main (); - ixge_main_t *xm = &ixge_main; - ixge_device_t *xd; - uword event_type, *event_data = 0; - f64 timeout, link_debounce_deadline; - - ixge_device_init (xm); - - /* Clear all counters. */ - vec_foreach (xd, xm->devices) - { - ixge_update_counters (xd); - clib_memset (xd->counters, 0, sizeof (xd->counters)); - } - - timeout = 30.0; - link_debounce_deadline = 1e70; - - while (1) - { - /* 36 bit stat counters could overflow in ~50 secs. - We poll every 30 secs to be conservative. */ - vlib_process_wait_for_event_or_clock (vm, timeout); - - event_type = vlib_process_get_events (vm, &event_data); - - switch (event_type) - { - case EVENT_SET_FLAGS: - /* 1 ms */ - link_debounce_deadline = vlib_time_now (vm) + 1e-3; - timeout = 1e-3; - break; - - case ~0: - /* No events found: timer expired. */ - if (vlib_time_now (vm) > link_debounce_deadline) - { - vec_foreach (xd, xm->devices) - { - ixge_regs_t *r = xd->regs; - u32 v = r->xge_mac.link_status; - uword is_up = (v & (1 << 30)) != 0; - - vnet_hw_interface_set_flags - (vnm, xd->vlib_hw_if_index, - is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); - } - link_debounce_deadline = 1e70; - timeout = 30.0; - } - break; - - default: - ASSERT (0); - } - - if (event_data) - _vec_len (event_data) = 0; - - /* Query stats every 30 secs. */ - { - f64 now = vlib_time_now (vm); - if (now - xm->time_last_stats_update > 30) - { - xm->time_last_stats_update = now; - vec_foreach (xd, xm->devices) ixge_update_counters (xd); - } - } - } - - return 0; -} - -static vlib_node_registration_t ixge_process_node = { - .function = ixge_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "ixge-process", -}; - -clib_error_t * -ixge_init (vlib_main_t * vm) -{ - ixge_main_t *xm = &ixge_main; - - xm->vlib_main = vm; - clib_memset (&xm->tx_descriptor_template, 0, - sizeof (xm->tx_descriptor_template)); - clib_memset (&xm->tx_descriptor_template_mask, 0, - sizeof (xm->tx_descriptor_template_mask)); - xm->tx_descriptor_template.status0 = - (IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED | - IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED | - IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS); - xm->tx_descriptor_template_mask.status0 = 0xffff; - xm->tx_descriptor_template_mask.status1 = 0x00003fff; - - xm->tx_descriptor_template_mask.status0 &= - ~(IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET - | IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS); - xm->tx_descriptor_template_mask.status1 &= - ~(IXGE_TX_DESCRIPTOR_STATUS1_DONE); - return 0; -} - -/* *INDENT-OFF* */ -VLIB_INIT_FUNCTION (ixge_init) = -{ - .runs_before = VLIB_INITS("pci_bus_init"), -}; -/* *INDENT-ON* */ - - -static void -ixge_pci_intr_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h) -{ - uword private_data = vlib_pci_get_private_data (vm, h); - - vlib_node_set_interrupt_pending (vm, ixge_input_node.index); - - /* Let node know which device is interrupting. */ - { - vlib_node_runtime_t *rt = - vlib_node_get_runtime (vm, ixge_input_node.index); - rt->runtime_data[0] |= 1 << private_data; - } -} - -static clib_error_t * -ixge_pci_init (vlib_main_t * vm, vlib_pci_dev_handle_t h) -{ - ixge_main_t *xm = &ixge_main; - clib_error_t *error = 0; - void *r; - ixge_device_t *xd; - vlib_pci_addr_t *addr = vlib_pci_get_addr (vm, h); - vlib_pci_device_info_t *d = vlib_pci_get_device_info (vm, addr, 0); - - error = vlib_pci_map_region (vm, h, 0, &r); - if (error) - return error; - - vec_add2 (xm->devices, xd, 1); - - if (vec_len (xm->devices) == 1) - { - ixge_input_node.function = ixge_input; - } - - xd->pci_dev_handle = h; - xd->device_id = d->device_id; - xd->regs = r; - xd->device_index = xd - xm->devices; - xd->pci_function = addr->function; - xd->per_interface_next_index = ~0; - - vlib_pci_set_private_data (vm, h, xd->device_index); - - /* Chip found so enable node. */ - { - vlib_node_set_state (vm, ixge_input_node.index, - (IXGE_ALWAYS_POLL - ? VLIB_NODE_STATE_POLLING - : VLIB_NODE_STATE_INTERRUPT)); - - //dev->private_data = xd->device_index; - } - - if (vec_len (xm->devices) == 1) - { - vlib_register_node (vm, &ixge_process_node); - xm->process_node_index = ixge_process_node.index; - } - - error = vlib_pci_bus_master_enable (vm, h); - - if (error) - return error; - - return vlib_pci_intr_enable (vm, h); -} - -/* *INDENT-OFF* */ -PCI_REGISTER_DEVICE (ixge_pci_device_registration,static) = { - .init_function = ixge_pci_init, - .interrupt_handler = ixge_pci_intr_handler, - .supported_devices = { -#define _(t,i) { .vendor_id = PCI_VENDOR_ID_INTEL, .device_id = i, }, - foreach_ixge_pci_device_id -#undef _ - { 0 }, - }, -}; -/* *INDENT-ON* */ - -void -ixge_set_next_node (ixge_rx_next_t next, char *name) -{ - vlib_node_registration_t *r = &ixge_input_node; - - switch (next) - { - case IXGE_RX_NEXT_IP4_INPUT: - case IXGE_RX_NEXT_IP6_INPUT: - case IXGE_RX_NEXT_ETHERNET_INPUT: - r->next_nodes[next] = name; - break; - - default: - clib_warning ("%s: illegal next %d\n", __FUNCTION__, next); - break; - } -} - -/* *INDENT-OFF* */ -VLIB_PLUGIN_REGISTER () = { - .version = VPP_BUILD_VER, - .default_disabled = 1, - .description = "Intel 82599 Family Native Driver (experimental)", -}; -#endif - -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/ixge/ixge.h b/src/plugins/ixge/ixge.h deleted file mode 100644 index f80d9c0e7cf..00000000000 --- a/src/plugins/ixge/ixge.h +++ /dev/null @@ -1,1292 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_ixge_h -#define included_ixge_h - -#include <vnet/vnet.h> -#include <vlib/pci/pci.h> -#include <vlib/i2c.h> -#include <vnet/ethernet/sfp.h> -#include <vnet/ip/ip4_packet.h> -#include <vnet/ip/ip6_packet.h> - -typedef volatile struct -{ - /* [31:7] 128 byte aligned. */ - u32 descriptor_address[2]; - u32 n_descriptor_bytes; - - /* [5] rx/tx descriptor dca enable - [6] rx packet head dca enable - [7] rx packet tail dca enable - [9] rx/tx descriptor relaxed order - [11] rx/tx descriptor write back relaxed order - [13] rx/tx data write/read relaxed order - [15] rx head data write relaxed order - [31:24] apic id for cpu's cache. */ - u32 dca_control; - - u32 head_index; - - /* [4:0] tail buffer size (in 1k byte units) - [13:8] head buffer size (in 64 byte units) - [24:22] lo free descriptors threshold (units of 64 descriptors) - [27:25] descriptor type 0 = legacy, 1 = advanced one buffer (e.g. tail), - 2 = advanced header splitting (head + tail), 5 = advanced header - splitting (head only). - [28] drop if no descriptors available. */ - u32 rx_split_control; - - u32 tail_index; - CLIB_PAD_FROM_TO (0x1c, 0x28); - - /* [7:0] rx/tx prefetch threshold - [15:8] rx/tx host threshold - [24:16] rx/tx write back threshold - [25] rx/tx enable - [26] tx descriptor writeback flush - [30] rx strip vlan enable */ - u32 control; - - u32 rx_coallesce_control; - - union - { - struct - { - /* packets bytes lo hi */ - u32 stats[3]; - - u32 unused; - } rx; - - struct - { - u32 unused[2]; - - /* [0] enables head write back. */ - u32 head_index_write_back_address[2]; - } tx; - }; -} ixge_dma_regs_t; - -/* Only advanced descriptors are supported. */ -typedef struct -{ - u64 tail_address; - u64 head_address; -} ixge_rx_to_hw_descriptor_t; - -typedef struct -{ - u32 status[3]; - u16 n_packet_bytes_this_descriptor; - u16 vlan_tag; -} ixge_rx_from_hw_descriptor_t; - -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_LAYER2 (1 << (4 + 11)) -/* Valid if not layer2. */ -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4 (1 << (4 + 0)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP4_EXT (1 << (4 + 1)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6 (1 << (4 + 2)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_IP6_EXT (1 << (4 + 3)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_TCP (1 << (4 + 4)) -#define IXGE_RX_DESCRIPTOR_STATUS0_IS_UDP (1 << (4 + 5)) -#define IXGE_RX_DESCRIPTOR_STATUS0_L3_OFFSET(s) (((s) >> 21) & 0x3ff) - -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_OWNED_BY_SOFTWARE (1 << (0 + 0)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_END_OF_PACKET (1 << (0 + 1)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_VLAN (1 << (0 + 3)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_UDP_CHECKSUMMED (1 << (0 + 4)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_TCP_CHECKSUMMED (1 << (0 + 5)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_IP4_CHECKSUMMED (1 << (0 + 6)) -#define IXGE_RX_DESCRIPTOR_STATUS2_NOT_UNICAST (1 << (0 + 7)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IS_DOUBLE_VLAN (1 << (0 + 9)) -#define IXGE_RX_DESCRIPTOR_STATUS2_UDP_CHECKSUM_ERROR (1 << (0 + 10)) -#define IXGE_RX_DESCRIPTOR_STATUS2_ETHERNET_ERROR (1 << (20 + 9)) -#define IXGE_RX_DESCRIPTOR_STATUS2_TCP_CHECKSUM_ERROR (1 << (20 + 10)) -#define IXGE_RX_DESCRIPTOR_STATUS2_IP4_CHECKSUM_ERROR (1 << (20 + 11)) - -/* For layer2 packets stats0 bottom 3 bits give ether type index from filter. */ -#define IXGE_RX_DESCRIPTOR_STATUS0_LAYER2_ETHERNET_TYPE(s) ((s) & 7) - -typedef struct -{ - u64 buffer_address; - u16 n_bytes_this_buffer; - u16 status0; - u32 status1; -#define IXGE_TX_DESCRIPTOR_STATUS0_ADVANCED (3 << 4) -#define IXGE_TX_DESCRIPTOR_STATUS0_IS_ADVANCED (1 << (8 + 5)) -#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS (8 + 3) -#define IXGE_TX_DESCRIPTOR_STATUS0_REPORT_STATUS (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_REPORT_STATUS) -#define IXGE_TX_DESCRIPTOR_STATUS0_INSERT_FCS (1 << (8 + 1)) -#define IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET (8 + 0) -#define IXGE_TX_DESCRIPTOR_STATUS0_IS_END_OF_PACKET (1 << IXGE_TX_DESCRIPTOR_STATUS0_LOG2_IS_END_OF_PACKET) -#define IXGE_TX_DESCRIPTOR_STATUS1_DONE (1 << 0) -#define IXGE_TX_DESCRIPTOR_STATUS1_CONTEXT(i) (/* valid */ (1 << 7) | ((i) << 4)) -#define IXGE_TX_DESCRIPTOR_STATUS1_IPSEC_OFFLOAD (1 << (8 + 2)) -#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_TCP_UDP_CHECKSUM (1 << (8 + 1)) -#define IXGE_TX_DESCRIPTOR_STATUS1_INSERT_IP4_CHECKSUM (1 << (8 + 0)) -#define IXGE_TX_DESCRIPTOR_STATUS0_N_BYTES_THIS_BUFFER(l) ((l) << 0) -#define IXGE_TX_DESCRIPTOR_STATUS1_N_BYTES_IN_PACKET(l) ((l) << 14) -} ixge_tx_descriptor_t; - -typedef struct -{ - struct - { - u8 checksum_start_offset; - u8 checksum_insert_offset; - u16 checksum_end_offset; - } ip, tcp; - u32 status0; - - u8 status1; - - /* Byte offset after UDP/TCP header. */ - u8 payload_offset; - - u16 max_tcp_segment_size; -} __attribute__ ((packed)) ixge_tx_context_descriptor_t; - -typedef union -{ - ixge_rx_to_hw_descriptor_t rx_to_hw; - ixge_rx_from_hw_descriptor_t rx_from_hw; - ixge_tx_descriptor_t tx; - u32x4 as_u32x4; -} ixge_descriptor_t; - -typedef volatile struct -{ - /* [2] pcie master disable - [3] mac reset - [26] global device reset */ - u32 control; - u32 control_alias; - /* [3:2] device id (0 or 1 for dual port chips) - [7] link is up - [17:10] num vfs - [18] io active - [19] pcie master enable status */ - u32 status_read_only; - CLIB_PAD_FROM_TO (0xc, 0x18); - /* [14] pf reset done - [17] relaxed ordering disable - [26] extended vlan enable - [28] driver loaded */ - u32 extended_control; - CLIB_PAD_FROM_TO (0x1c, 0x20); - - /* software definable pins. - sdp_data [7:0] - sdp_is_output [15:8] - sdp_is_native [23:16] - sdp_function [31:24]. - */ - u32 sdp_control; - CLIB_PAD_FROM_TO (0x24, 0x28); - - /* [0] i2c clock in - [1] i2c clock out - [2] i2c data in - [3] i2c data out */ - u32 i2c_control; - CLIB_PAD_FROM_TO (0x2c, 0x4c); - u32 tcp_timer; - - CLIB_PAD_FROM_TO (0x50, 0x200); - - u32 led_control; - - CLIB_PAD_FROM_TO (0x204, 0x600); - u32 core_spare; - CLIB_PAD_FROM_TO (0x604, 0x700); - - struct - { - u32 vflr_events_clear[4]; - u32 mailbox_interrupt_status[4]; - u32 mailbox_interrupt_enable[4]; - CLIB_PAD_FROM_TO (0x730, 0x800); - } pf_foo; - - struct - { - u32 status_write_1_to_clear; - CLIB_PAD_FROM_TO (0x804, 0x808); - u32 status_write_1_to_set; - CLIB_PAD_FROM_TO (0x80c, 0x810); - u32 status_auto_clear_enable; - CLIB_PAD_FROM_TO (0x814, 0x820); - - /* [11:3] minimum inter-interrupt interval - (2e-6 units; 20e-6 units for fast ethernet). - [15] low-latency interrupt moderation enable - [20:16] low-latency interrupt credit - [27:21] interval counter - [31] write disable for credit and counter (write only). */ - u32 throttle0[24]; - - u32 enable_write_1_to_set; - CLIB_PAD_FROM_TO (0x884, 0x888); - u32 enable_write_1_to_clear; - CLIB_PAD_FROM_TO (0x88c, 0x890); - u32 enable_auto_clear; - u32 msi_to_eitr_select; - /* [3:0] spd 0-3 interrupt detection enable - [4] msi-x enable - [5] other clear disable (makes other bits in status not clear on read) - etc. */ - u32 control; - CLIB_PAD_FROM_TO (0x89c, 0x900); - - /* Defines interrupt mapping for 128 rx + 128 tx queues. - 64 x 4 8 bit entries. - For register [i]: - [5:0] bit in interrupt status for rx queue 2*i + 0 - [7] valid bit - [13:8] bit for tx queue 2*i + 0 - [15] valid bit - similar for rx 2*i + 1 and tx 2*i + 1. */ - u32 queue_mapping[64]; - - /* tcp timer [7:0] and other interrupts [15:8] */ - u32 misc_mapping; - CLIB_PAD_FROM_TO (0xa04, 0xa90); - - /* 64 interrupts determined by mappings. */ - u32 status1_write_1_to_clear[4]; - u32 enable1_write_1_to_set[4]; - u32 enable1_write_1_to_clear[4]; - CLIB_PAD_FROM_TO (0xac0, 0xad0); - u32 status1_enable_auto_clear[4]; - CLIB_PAD_FROM_TO (0xae0, 0x1000); - } interrupt; - - ixge_dma_regs_t rx_dma0[64]; - - CLIB_PAD_FROM_TO (0x2000, 0x2140); - u32 dcb_rx_packet_plane_t4_config[8]; - u32 dcb_rx_packet_plane_t4_status[8]; - CLIB_PAD_FROM_TO (0x2180, 0x2300); - - /* reg i defines mapping for 4 rx queues starting at 4*i + 0. */ - u32 rx_queue_stats_mapping[32]; - u32 rx_queue_stats_control; - - CLIB_PAD_FROM_TO (0x2384, 0x2410); - u32 fc_user_descriptor_ptr[2]; - u32 fc_buffer_control; - CLIB_PAD_FROM_TO (0x241c, 0x2420); - u32 fc_rx_dma; - CLIB_PAD_FROM_TO (0x2424, 0x2430); - u32 dcb_packet_plane_control; - CLIB_PAD_FROM_TO (0x2434, 0x2f00); - - u32 rx_dma_control; - u32 pf_queue_drop_enable; - CLIB_PAD_FROM_TO (0x2f08, 0x2f20); - u32 rx_dma_descriptor_cache_config; - CLIB_PAD_FROM_TO (0x2f24, 0x3000); - - /* 1 bit. */ - u32 rx_enable; - CLIB_PAD_FROM_TO (0x3004, 0x3008); - /* [15:0] ether type (little endian) - [31:16] opcode (big endian) */ - u32 flow_control_control; - CLIB_PAD_FROM_TO (0x300c, 0x3020); - /* 3 bit traffic class for each of 8 priorities. */ - u32 rx_priority_to_traffic_class; - CLIB_PAD_FROM_TO (0x3024, 0x3028); - u32 rx_coallesce_data_buffer_control; - CLIB_PAD_FROM_TO (0x302c, 0x3190); - u32 rx_packet_buffer_flush_detect; - CLIB_PAD_FROM_TO (0x3194, 0x3200); - u32 flow_control_tx_timers[4]; /* 2 timer values */ - CLIB_PAD_FROM_TO (0x3210, 0x3220); - u32 flow_control_rx_threshold_lo[8]; - CLIB_PAD_FROM_TO (0x3240, 0x3260); - u32 flow_control_rx_threshold_hi[8]; - CLIB_PAD_FROM_TO (0x3280, 0x32a0); - u32 flow_control_refresh_threshold; - CLIB_PAD_FROM_TO (0x32a4, 0x3c00); - /* For each of 8 traffic classes (units of bytes). */ - u32 rx_packet_buffer_size[8]; - CLIB_PAD_FROM_TO (0x3c20, 0x3d00); - u32 flow_control_config; - CLIB_PAD_FROM_TO (0x3d04, 0x4200); - - struct - { - u32 pcs_config; - CLIB_PAD_FROM_TO (0x4204, 0x4208); - u32 link_control; - u32 link_status; - u32 pcs_debug[2]; - u32 auto_negotiation; - u32 link_partner_ability; - u32 auto_negotiation_tx_next_page; - u32 auto_negotiation_link_partner_next_page; - CLIB_PAD_FROM_TO (0x4228, 0x4240); - } gige_mac; - - struct - { - /* [0] tx crc enable - [2] enable frames up to max frame size register [31:16] - [10] pad frames < 64 bytes if specified by user - [15] loopback enable - [16] mdc hi speed - [17] turn off mdc between mdio packets */ - u32 control; - - /* [5] rx symbol error (all bits clear on read) - [6] rx illegal symbol - [7] rx idle error - [8] rx local fault - [9] rx remote fault */ - u32 status; - - u32 pause_and_pace_control; - CLIB_PAD_FROM_TO (0x424c, 0x425c); - u32 phy_command; - u32 phy_data; - CLIB_PAD_FROM_TO (0x4264, 0x4268); - - /* [31:16] max frame size in bytes. */ - u32 rx_max_frame_size; - CLIB_PAD_FROM_TO (0x426c, 0x4288); - - /* [0] - [2] pcs receive link up? (latch lo) - [7] local fault - [1] - [0] pcs 10g base r capable - [1] pcs 10g base x capable - [2] pcs 10g base w capable - [10] rx local fault - [11] tx local fault - [15:14] 2 => device present at this address (else not present) */ - u32 xgxs_status[2]; - - u32 base_x_pcs_status; - - /* [0] pass unrecognized flow control frames - [1] discard pause frames - [2] rx priority flow control enable (only in dcb mode) - [3] rx flow control enable. */ - u32 flow_control; - - /* [3:0] tx lanes change polarity - [7:4] rx lanes change polarity - [11:8] swizzle tx lanes - [15:12] swizzle rx lanes - 4 x 2 bit tx lane swap - 4 x 2 bit rx lane swap. */ - u32 serdes_control; - - u32 fifo_control; - - /* [0] force link up - [1] autoneg ack2 bit to transmit - [6:2] autoneg selector field to transmit - [8:7] 10g pma/pmd type 0 => xaui, 1 kx4, 2 cx4 - [9] 1g pma/pmd type 0 => sfi, 1 => kx/bx - [10] disable 10g on without main power - [11] restart autoneg on transition to dx power state - [12] restart autoneg - [15:13] link mode: - 0 => 1g no autoneg - 1 => 10g kx4 parallel link no autoneg - 2 => 1g bx autoneg - 3 => 10g sfi serdes - 4 => kx4/kx/kr - 5 => xgmii 1g/100m - 6 => kx4/kx/kr 1g an - 7 kx4/kx/kr sgmii. - [16] kr support - [17] fec requested - [18] fec ability - etc. */ - u32 auto_negotiation_control; - - /* [0] signal detect 1g/100m - [1] fec signal detect - [2] 10g serial pcs fec block lock - [3] 10g serial high error rate - [4] 10g serial pcs block lock - [5] kx/kx4/kr autoneg next page received - [6] kx/kx4/kr backplane autoneg next page received - [7] link status clear to read - [11:8] 10g signal detect (4 lanes) (for serial just lane 0) - [12] 10g serial signal detect - [16:13] 10g parallel lane sync status - [17] 10g parallel align status - [18] 1g sync status - [19] kx/kx4/kr backplane autoneg is idle - [20] 1g autoneg enabled - [21] 1g pcs enabled for sgmii - [22] 10g xgxs enabled - [23] 10g serial fec enabled (forward error detection) - [24] 10g kr pcs enabled - [25] sgmii enabled - [27:26] mac link mode - 0 => 1g - 1 => 10g parallel - 2 => 10g serial - 3 => autoneg - [29:28] link speed - 1 => 100m - 2 => 1g - 3 => 10g - [30] link is up - [31] kx/kx4/kr backplane autoneg completed successfully. */ - u32 link_status; - - /* [17:16] pma/pmd for 10g serial - 0 => kr, 2 => sfi - [18] disable dme pages */ - u32 auto_negotiation_control2; - - CLIB_PAD_FROM_TO (0x42ac, 0x42b0); - u32 link_partner_ability[2]; - CLIB_PAD_FROM_TO (0x42b8, 0x42d0); - u32 manageability_control; - u32 link_partner_next_page[2]; - CLIB_PAD_FROM_TO (0x42dc, 0x42e0); - u32 kr_pcs_control; - u32 kr_pcs_status; - u32 fec_status[2]; - CLIB_PAD_FROM_TO (0x42f0, 0x4314); - u32 sgmii_control; - CLIB_PAD_FROM_TO (0x4318, 0x4324); - u32 link_status2; - CLIB_PAD_FROM_TO (0x4328, 0x4900); - } xge_mac; - - u32 tx_dcb_control; - u32 tx_dcb_descriptor_plane_queue_select; - u32 tx_dcb_descriptor_plane_t1_config; - u32 tx_dcb_descriptor_plane_t1_status; - CLIB_PAD_FROM_TO (0x4910, 0x4950); - - /* For each TC in units of 1k bytes. */ - u32 tx_packet_buffer_thresholds[8]; - CLIB_PAD_FROM_TO (0x4970, 0x4980); - struct - { - u32 mmw; - u32 config; - u32 status; - u32 rate_drift; - } dcb_tx_rate_scheduler; - CLIB_PAD_FROM_TO (0x4990, 0x4a80); - u32 tx_dma_control; - CLIB_PAD_FROM_TO (0x4a84, 0x4a88); - u32 tx_dma_tcp_flags_control[2]; - CLIB_PAD_FROM_TO (0x4a90, 0x4b00); - u32 pf_mailbox[64]; - CLIB_PAD_FROM_TO (0x4c00, 0x5000); - - /* RX */ - u32 checksum_control; - CLIB_PAD_FROM_TO (0x5004, 0x5008); - u32 rx_filter_control; - CLIB_PAD_FROM_TO (0x500c, 0x5010); - u32 management_vlan_tag[8]; - u32 management_udp_tcp_ports[8]; - CLIB_PAD_FROM_TO (0x5050, 0x5078); - /* little endian. */ - u32 extended_vlan_ether_type; - CLIB_PAD_FROM_TO (0x507c, 0x5080); - /* [1] store/dma bad packets - [8] accept all multicast - [9] accept all unicast - [10] accept all broadcast. */ - u32 filter_control; - CLIB_PAD_FROM_TO (0x5084, 0x5088); - /* [15:0] vlan ethernet type (0x8100) little endian - [28] cfi bit expected - [29] drop packets with unexpected cfi bit - [30] vlan filter enable. */ - u32 vlan_control; - CLIB_PAD_FROM_TO (0x508c, 0x5090); - /* [1:0] hi bit of ethernet address for 12 bit index into multicast table - 0 => 47, 1 => 46, 2 => 45, 3 => 43. - [2] enable multicast filter - */ - u32 multicast_control; - CLIB_PAD_FROM_TO (0x5094, 0x5100); - u32 fcoe_rx_control; - CLIB_PAD_FROM_TO (0x5104, 0x5108); - u32 fc_flt_context; - CLIB_PAD_FROM_TO (0x510c, 0x5110); - u32 fc_filter_control; - CLIB_PAD_FROM_TO (0x5114, 0x5120); - u32 rx_message_type_lo; - CLIB_PAD_FROM_TO (0x5124, 0x5128); - /* [15:0] ethernet type (little endian) - [18:16] matche pri in vlan tag - [19] priority match enable - [25:20] virtualization pool - [26] pool enable - [27] is fcoe - [30] ieee 1588 timestamp enable - [31] filter enable. - (See ethernet_type_queue_select.) */ - u32 ethernet_type_queue_filter[8]; - CLIB_PAD_FROM_TO (0x5148, 0x5160); - /* [7:0] l2 ethernet type and - [15:8] l2 ethernet type or */ - u32 management_decision_filters1[8]; - u32 vf_vm_tx_switch_loopback_enable[2]; - u32 rx_time_sync_control; - CLIB_PAD_FROM_TO (0x518c, 0x5190); - u32 management_ethernet_type_filters[4]; - u32 rx_timestamp_attributes_lo; - u32 rx_timestamp_hi; - u32 rx_timestamp_attributes_hi; - CLIB_PAD_FROM_TO (0x51ac, 0x51b0); - u32 pf_virtual_control; - CLIB_PAD_FROM_TO (0x51b4, 0x51d8); - u32 fc_offset_parameter; - CLIB_PAD_FROM_TO (0x51dc, 0x51e0); - u32 vf_rx_enable[2]; - u32 rx_timestamp_lo; - CLIB_PAD_FROM_TO (0x51ec, 0x5200); - /* 12 bits determined by multicast_control - lookup bits in this vector. */ - u32 multicast_enable[128]; - - /* [0] ethernet address [31:0] - [1] [15:0] ethernet address [47:32] - [31] valid bit. - Index 0 is read from eeprom after reset. */ - u32 rx_ethernet_address0[16][2]; - - CLIB_PAD_FROM_TO (0x5480, 0x5800); - u32 wake_up_control; - CLIB_PAD_FROM_TO (0x5804, 0x5808); - u32 wake_up_filter_control; - CLIB_PAD_FROM_TO (0x580c, 0x5818); - u32 multiple_rx_queue_command_82598; - CLIB_PAD_FROM_TO (0x581c, 0x5820); - u32 management_control; - u32 management_filter_control; - CLIB_PAD_FROM_TO (0x5828, 0x5838); - u32 wake_up_ip4_address_valid; - CLIB_PAD_FROM_TO (0x583c, 0x5840); - u32 wake_up_ip4_address_table[4]; - u32 management_control_to_host; - CLIB_PAD_FROM_TO (0x5854, 0x5880); - u32 wake_up_ip6_address_table[4]; - - /* unicast_and broadcast_and vlan_and ip_address_and - etc. */ - u32 management_decision_filters[8]; - - u32 management_ip4_or_ip6_address_filters[4][4]; - CLIB_PAD_FROM_TO (0x58f0, 0x5900); - u32 wake_up_packet_length; - CLIB_PAD_FROM_TO (0x5904, 0x5910); - u32 management_ethernet_address_filters[4][2]; - CLIB_PAD_FROM_TO (0x5930, 0x5a00); - u32 wake_up_packet_memory[32]; - CLIB_PAD_FROM_TO (0x5a80, 0x5c00); - u32 redirection_table_82598[32]; - u32 rss_random_keys_82598[10]; - CLIB_PAD_FROM_TO (0x5ca8, 0x6000); - - ixge_dma_regs_t tx_dma[128]; - - u32 pf_vm_vlan_insert[64]; - u32 tx_dma_tcp_max_alloc_size_requests; - CLIB_PAD_FROM_TO (0x8104, 0x8110); - u32 vf_tx_enable[2]; - CLIB_PAD_FROM_TO (0x8118, 0x8120); - /* [0] dcb mode enable - [1] virtualization mode enable - [3:2] number of tcs/qs per pool. */ - u32 multiple_tx_queues_command; - CLIB_PAD_FROM_TO (0x8124, 0x8200); - u32 pf_vf_anti_spoof[8]; - u32 pf_dma_tx_switch_control; - CLIB_PAD_FROM_TO (0x8224, 0x82e0); - u32 tx_strict_low_latency_queues[4]; - CLIB_PAD_FROM_TO (0x82f0, 0x8600); - u32 tx_queue_stats_mapping_82599[32]; - u32 tx_queue_packet_counts[32]; - u32 tx_queue_byte_counts[32][2]; - - struct - { - u32 control; - u32 status; - u32 buffer_almost_full; - CLIB_PAD_FROM_TO (0x880c, 0x8810); - u32 buffer_min_ifg; - CLIB_PAD_FROM_TO (0x8814, 0x8900); - } tx_security; - - struct - { - u32 index; - u32 salt; - u32 key[4]; - CLIB_PAD_FROM_TO (0x8918, 0x8a00); - } tx_ipsec; - - struct - { - u32 capabilities; - u32 control; - u32 tx_sci[2]; - u32 sa; - u32 sa_pn[2]; - u32 key[2][4]; - /* untagged packets, encrypted packets, protected packets, - encrypted bytes, protected bytes */ - u32 stats[5]; - CLIB_PAD_FROM_TO (0x8a50, 0x8c00); - } tx_link_security; - - struct - { - u32 control; - u32 timestamp_value[2]; - u32 system_time[2]; - u32 increment_attributes; - u32 time_adjustment_offset[2]; - u32 aux_control; - u32 target_time[2][2]; - CLIB_PAD_FROM_TO (0x8c34, 0x8c3c); - u32 aux_time_stamp[2][2]; - CLIB_PAD_FROM_TO (0x8c4c, 0x8d00); - } tx_timesync; - - struct - { - u32 control; - u32 status; - CLIB_PAD_FROM_TO (0x8d08, 0x8e00); - } rx_security; - - struct - { - u32 index; - u32 ip_address[4]; - u32 spi; - u32 ip_index; - u32 key[4]; - u32 salt; - u32 mode; - CLIB_PAD_FROM_TO (0x8e34, 0x8f00); - } rx_ipsec; - - struct - { - u32 capabilities; - u32 control; - u32 sci[2]; - u32 sa[2]; - u32 sa_pn[2]; - u32 key[2][4]; - /* see datasheet */ - u32 stats[17]; - CLIB_PAD_FROM_TO (0x8f84, 0x9000); - } rx_link_security; - - /* 4 wake up, 2 management, 2 wake up. */ - u32 flexible_filters[8][16][4]; - CLIB_PAD_FROM_TO (0x9800, 0xa000); - - /* 4096 bits. */ - u32 vlan_filter[128]; - - /* [0] ethernet address [31:0] - [1] [15:0] ethernet address [47:32] - [31] valid bit. - Index 0 is read from eeprom after reset. */ - u32 rx_ethernet_address1[128][2]; - - /* select one of 64 pools for each rx address. */ - u32 rx_ethernet_address_pool_select[128][2]; - CLIB_PAD_FROM_TO (0xaa00, 0xc800); - u32 tx_priority_to_traffic_class; - CLIB_PAD_FROM_TO (0xc804, 0xcc00); - - /* In bytes units of 1k. Total packet buffer is 160k. */ - u32 tx_packet_buffer_size[8]; - - CLIB_PAD_FROM_TO (0xcc20, 0xcd10); - u32 tx_manageability_tc_mapping; - CLIB_PAD_FROM_TO (0xcd14, 0xcd20); - u32 dcb_tx_packet_plane_t2_config[8]; - u32 dcb_tx_packet_plane_t2_status[8]; - CLIB_PAD_FROM_TO (0xcd60, 0xce00); - - u32 tx_flow_control_status; - CLIB_PAD_FROM_TO (0xce04, 0xd000); - - ixge_dma_regs_t rx_dma1[64]; - - struct - { - /* Bigendian ip4 src/dst address. */ - u32 src_address[128]; - u32 dst_address[128]; - - /* TCP/UDP ports [15:0] src [31:16] dst; bigendian. */ - u32 tcp_udp_port[128]; - - /* [1:0] protocol tcp, udp, sctp, other - [4:2] match priority (highest wins) - [13:8] pool - [25] src address match disable - [26] dst address match disable - [27] src port match disable - [28] dst port match disable - [29] protocol match disable - [30] pool match disable - [31] enable. */ - u32 control[128]; - - /* [12] size bypass - [19:13] must be 0x80 - [20] low-latency interrupt - [27:21] rx queue. */ - u32 interrupt[128]; - } ip4_filters; - - CLIB_PAD_FROM_TO (0xea00, 0xeb00); - /* 4 bit rss output index indexed by 7 bit hash. - 128 8 bit fields = 32 registers. */ - u32 redirection_table_82599[32]; - - u32 rss_random_key_82599[10]; - CLIB_PAD_FROM_TO (0xeba8, 0xec00); - /* [15:0] reserved - [22:16] rx queue index - [29] low-latency interrupt on match - [31] enable */ - u32 ethernet_type_queue_select[8]; - CLIB_PAD_FROM_TO (0xec20, 0xec30); - u32 syn_packet_queue_filter; - CLIB_PAD_FROM_TO (0xec34, 0xec60); - u32 immediate_interrupt_rx_vlan_priority; - CLIB_PAD_FROM_TO (0xec64, 0xec70); - u32 rss_queues_per_traffic_class; - CLIB_PAD_FROM_TO (0xec74, 0xec90); - u32 lli_size_threshold; - CLIB_PAD_FROM_TO (0xec94, 0xed00); - - struct - { - u32 control; - CLIB_PAD_FROM_TO (0xed04, 0xed10); - u32 table[8]; - CLIB_PAD_FROM_TO (0xed30, 0xee00); - } fcoe_redirection; - - struct - { - /* [1:0] packet buffer allocation 0 => disabled, else 64k*2^(f-1) - [3] packet buffer initialization done - [4] perfetch match mode - [5] report status in rss field of rx descriptors - [7] report status always - [14:8] drop queue - [20:16] flex 2 byte packet offset (units of 2 bytes) - [27:24] max linked list length - [31:28] full threshold. */ - u32 control; - CLIB_PAD_FROM_TO (0xee04, 0xee0c); - - u32 data[8]; - - /* [1:0] 0 => no action, 1 => add, 2 => remove, 3 => query. - [2] valid filter found by query command - [3] filter update override - [4] ip6 adress table - [6:5] l4 protocol reserved, udp, tcp, sctp - [7] is ip6 - [8] clear head/tail - [9] packet drop action - [10] matched packet generates low-latency interrupt - [11] last in linked list - [12] collision - [15] rx queue enable - [22:16] rx queue - [29:24] pool. */ - u32 command; - - CLIB_PAD_FROM_TO (0xee30, 0xee3c); - /* ip4 dst/src address, tcp ports, udp ports. - set bits mean bit is ignored. */ - u32 ip4_masks[4]; - u32 filter_length; - u32 usage_stats; - u32 failed_usage_stats; - u32 filters_match_stats; - u32 filters_miss_stats; - CLIB_PAD_FROM_TO (0xee60, 0xee68); - /* Lookup, signature. */ - u32 hash_keys[2]; - /* [15:0] ip6 src address 1 bit per byte - [31:16] ip6 dst address. */ - u32 ip6_mask; - /* [0] vlan id - [1] vlan priority - [2] pool - [3] ip protocol - [4] flex - [5] dst ip6. */ - u32 other_mask; - CLIB_PAD_FROM_TO (0xee78, 0xf000); - } flow_director; - - struct - { - u32 l2_control[64]; - u32 vlan_pool_filter[64]; - u32 vlan_pool_filter_bitmap[128]; - u32 dst_ethernet_address[128]; - u32 mirror_rule[4]; - u32 mirror_rule_vlan[8]; - u32 mirror_rule_pool[8]; - CLIB_PAD_FROM_TO (0xf650, 0x10010); - } pf_bar; - - u32 eeprom_flash_control; - /* [0] start - [1] done - [15:2] address - [31:16] read data. */ - u32 eeprom_read; - CLIB_PAD_FROM_TO (0x10018, 0x1001c); - u32 flash_access; - CLIB_PAD_FROM_TO (0x10020, 0x10114); - u32 flash_data; - u32 flash_control; - u32 flash_read_data; - CLIB_PAD_FROM_TO (0x10120, 0x1013c); - u32 flash_opcode; - u32 software_semaphore; - CLIB_PAD_FROM_TO (0x10144, 0x10148); - u32 firmware_semaphore; - CLIB_PAD_FROM_TO (0x1014c, 0x10160); - u32 software_firmware_sync; - CLIB_PAD_FROM_TO (0x10164, 0x10200); - u32 general_rx_control; - CLIB_PAD_FROM_TO (0x10204, 0x11000); - - struct - { - u32 control; - CLIB_PAD_FROM_TO (0x11004, 0x11010); - /* [3:0] enable counters - [7:4] leaky bucket counter mode - [29] reset - [30] stop - [31] start. */ - u32 counter_control; - /* [7:0],[15:8],[23:16],[31:24] event for counters 0-3. - event codes: - 0x0 bad tlp - 0x10 reqs that reached timeout - etc. */ - u32 counter_event; - CLIB_PAD_FROM_TO (0x11018, 0x11020); - u32 counters_clear_on_read[4]; - u32 counter_config[4]; - struct - { - u32 address; - u32 data; - } indirect_access; - CLIB_PAD_FROM_TO (0x11048, 0x11050); - u32 extended_control; - CLIB_PAD_FROM_TO (0x11054, 0x11064); - u32 mirrored_revision_id; - CLIB_PAD_FROM_TO (0x11068, 0x11070); - u32 dca_requester_id_information; - - /* [0] global disable - [4:1] mode: 0 => legacy, 1 => dca 1.0. */ - u32 dca_control; - CLIB_PAD_FROM_TO (0x11078, 0x110b0); - /* [0] pci completion abort - [1] unsupported i/o address - [2] wrong byte enable - [3] pci timeout */ - u32 pcie_interrupt_status; - CLIB_PAD_FROM_TO (0x110b4, 0x110b8); - u32 pcie_interrupt_enable; - CLIB_PAD_FROM_TO (0x110bc, 0x110c0); - u32 msi_x_pba_clear[8]; - CLIB_PAD_FROM_TO (0x110e0, 0x12300); - } pcie; - - u32 interrupt_throttle1[128 - 24]; - CLIB_PAD_FROM_TO (0x124a0, 0x14f00); - - u32 core_analog_config; - CLIB_PAD_FROM_TO (0x14f04, 0x14f10); - u32 core_common_config; - CLIB_PAD_FROM_TO (0x14f14, 0x15f14); - - u32 link_sec_software_firmware_interface; -} ixge_regs_t; - -typedef union -{ - struct - { - /* Addresses bigendian. */ - union - { - struct - { - ip6_address_t src_address; - u32 unused[1]; - } ip6; - struct - { - u32 unused[3]; - ip4_address_t src_address, dst_address; - } ip4; - }; - - /* [15:0] src port (little endian). - [31:16] dst port. */ - u32 tcp_udp_ports; - - /* [15:0] vlan (cfi bit set to 0). - [31:16] flex bytes. bigendian. */ - u32 vlan_and_flex_word; - - /* [14:0] hash - [15] bucket valid - [31:16] signature (signature filers)/sw-index (perfect match). */ - u32 hash; - }; - - u32 as_u32[8]; -} ixge_flow_director_key_t; - -always_inline void -ixge_throttle_queue_interrupt (ixge_regs_t * r, - u32 queue_interrupt_index, - f64 inter_interrupt_interval_in_secs) -{ - volatile u32 *tr = - (queue_interrupt_index < ARRAY_LEN (r->interrupt.throttle0) - ? &r->interrupt.throttle0[queue_interrupt_index] - : &r->interrupt_throttle1[queue_interrupt_index]); - ASSERT (queue_interrupt_index < 128); - u32 v; - i32 i, mask = (1 << 9) - 1; - - i = flt_round_nearest (inter_interrupt_interval_in_secs / 2e-6); - i = i < 1 ? 1 : i; - i = i >= mask ? mask : i; - - v = tr[0]; - v &= ~(mask << 3); - v |= i << 3; - tr[0] = v; -} - -#define foreach_ixge_counter \ - _ (0x40d0, rx_total_packets) \ - _64 (0x40c0, rx_total_bytes) \ - _ (0x41b0, rx_good_packets_before_filtering) \ - _64 (0x41b4, rx_good_bytes_before_filtering) \ - _ (0x2f50, rx_dma_good_packets) \ - _64 (0x2f54, rx_dma_good_bytes) \ - _ (0x2f5c, rx_dma_duplicated_good_packets) \ - _64 (0x2f60, rx_dma_duplicated_good_bytes) \ - _ (0x2f68, rx_dma_good_loopback_packets) \ - _64 (0x2f6c, rx_dma_good_loopback_bytes) \ - _ (0x2f74, rx_dma_good_duplicated_loopback_packets) \ - _64 (0x2f78, rx_dma_good_duplicated_loopback_bytes) \ - _ (0x4074, rx_good_packets) \ - _64 (0x4088, rx_good_bytes) \ - _ (0x407c, rx_multicast_packets) \ - _ (0x4078, rx_broadcast_packets) \ - _ (0x405c, rx_64_byte_packets) \ - _ (0x4060, rx_65_127_byte_packets) \ - _ (0x4064, rx_128_255_byte_packets) \ - _ (0x4068, rx_256_511_byte_packets) \ - _ (0x406c, rx_512_1023_byte_packets) \ - _ (0x4070, rx_gt_1023_byte_packets) \ - _ (0x4000, rx_crc_errors) \ - _ (0x4120, rx_ip_checksum_errors) \ - _ (0x4004, rx_illegal_symbol_errors) \ - _ (0x4008, rx_error_symbol_errors) \ - _ (0x4034, rx_mac_local_faults) \ - _ (0x4038, rx_mac_remote_faults) \ - _ (0x4040, rx_length_errors) \ - _ (0x41a4, rx_xons) \ - _ (0x41a8, rx_xoffs) \ - _ (0x40a4, rx_undersize_packets) \ - _ (0x40a8, rx_fragments) \ - _ (0x40ac, rx_oversize_packets) \ - _ (0x40b0, rx_jabbers) \ - _ (0x40b4, rx_management_packets) \ - _ (0x40b8, rx_management_drops) \ - _ (0x3fa0, rx_missed_packets_pool_0) \ - _ (0x40d4, tx_total_packets) \ - _ (0x4080, tx_good_packets) \ - _64 (0x4090, tx_good_bytes) \ - _ (0x40f0, tx_multicast_packets) \ - _ (0x40f4, tx_broadcast_packets) \ - _ (0x87a0, tx_dma_good_packets) \ - _64 (0x87a4, tx_dma_good_bytes) \ - _ (0x40d8, tx_64_byte_packets) \ - _ (0x40dc, tx_65_127_byte_packets) \ - _ (0x40e0, tx_128_255_byte_packets) \ - _ (0x40e4, tx_256_511_byte_packets) \ - _ (0x40e8, tx_512_1023_byte_packets) \ - _ (0x40ec, tx_gt_1023_byte_packets) \ - _ (0x4010, tx_undersize_drops) \ - _ (0x8780, switch_security_violation_packets) \ - _ (0x5118, fc_crc_errors) \ - _ (0x241c, fc_rx_drops) \ - _ (0x2424, fc_last_error_count) \ - _ (0x2428, fcoe_rx_packets) \ - _ (0x242c, fcoe_rx_dwords) \ - _ (0x8784, fcoe_tx_packets) \ - _ (0x8788, fcoe_tx_dwords) \ - _ (0x1030, queue_0_rx_count) \ - _ (0x1430, queue_0_drop_count) \ - _ (0x1070, queue_1_rx_count) \ - _ (0x1470, queue_1_drop_count) \ - _ (0x10b0, queue_2_rx_count) \ - _ (0x14b0, queue_2_drop_count) \ - _ (0x10f0, queue_3_rx_count) \ - _ (0x14f0, queue_3_drop_count) \ - _ (0x1130, queue_4_rx_count) \ - _ (0x1530, queue_4_drop_count) \ - _ (0x1170, queue_5_rx_count) \ - _ (0x1570, queue_5_drop_count) \ - _ (0x11b0, queue_6_rx_count) \ - _ (0x15b0, queue_6_drop_count) \ - _ (0x11f0, queue_7_rx_count) \ - _ (0x15f0, queue_7_drop_count) \ - _ (0x1230, queue_8_rx_count) \ - _ (0x1630, queue_8_drop_count) \ - _ (0x1270, queue_9_rx_count) \ - _ (0x1270, queue_9_drop_count) - - - - -typedef enum -{ -#define _(a,f) IXGE_COUNTER_##f, -#define _64(a,f) _(a,f) - foreach_ixge_counter -#undef _ -#undef _64 - IXGE_N_COUNTER, -} ixge_counter_type_t; - -typedef struct -{ - u32 mdio_address; - - /* 32 bit ID read from ID registers. */ - u32 id; -} ixge_phy_t; - -typedef struct -{ - /* Cache aligned descriptors. */ - ixge_descriptor_t *descriptors; - - /* Number of descriptors in table. */ - u32 n_descriptors; - - /* Software head and tail pointers into descriptor ring. */ - u32 head_index, tail_index; - - /* Index into dma_queues vector. */ - u32 queue_index; - - /* Buffer indices corresponding to each active descriptor. */ - u32 *descriptor_buffer_indices; - - union - { - struct - { - u32 *volatile head_index_write_back; - - u32 n_buffers_on_ring; - } tx; - - struct - { - /* Buffer indices to use to replenish each descriptor. */ - u32 *replenish_buffer_indices; - - vlib_node_runtime_t *node; - u32 next_index; - - u32 saved_start_of_packet_buffer_index; - - u32 saved_start_of_packet_next_index; - u32 saved_last_buffer_index; - - u32 is_start_of_packet; - - u32 n_descriptors_done_total; - - u32 n_descriptors_done_this_call; - - u32 n_bytes; - } rx; - }; -} ixge_dma_queue_t; - -#define foreach_ixge_pci_device_id \ - _ (82598, 0x10b6) \ - _ (82598_bx, 0x1508) \ - _ (82598af_dual_port, 0x10c6) \ - _ (82598af_single_port, 0x10c7) \ - _ (82598at, 0x10c8) \ - _ (82598at2, 0x150b) \ - _ (82598eb_sfp_lom, 0x10db) \ - _ (82598eb_cx4, 0x10dd) \ - _ (82598_cx4_dual_port, 0x10ec) \ - _ (82598_da_dual_port, 0x10f1) \ - _ (82598_sr_dual_port_em, 0x10e1) \ - _ (82598eb_xf_lr, 0x10f4) \ - _ (82599_kx4, 0x10f7) \ - _ (82599_kx4_mezz, 0x1514) \ - _ (82599_kr, 0x1517) \ - _ (82599_combo_backplane, 0x10f8) \ - _ (82599_cx4, 0x10f9) \ - _ (82599_sfp, 0x10fb) \ - _ (82599_backplane_fcoe, 0x152a) \ - _ (82599_sfp_fcoe, 0x1529) \ - _ (82599_sfp_em, 0x1507) \ - _ (82599_xaui_lom, 0x10fc) \ - _ (82599_t3_lom, 0x151c) \ - _ (x540t, 0x1528) - -typedef enum -{ -#define _(f,n) IXGE_##f = n, - foreach_ixge_pci_device_id -#undef _ -} ixge_pci_device_id_t; - -typedef struct -{ - /* registers */ - ixge_regs_t *regs; - - /* Specific next index when using dynamic redirection */ - u32 per_interface_next_index; - - /* PCI bus info. */ - vlib_pci_dev_handle_t pci_dev_handle; - - /* From PCI config space header. */ - ixge_pci_device_id_t device_id; - - u16 device_index; - - /* 0 or 1. */ - u16 pci_function; - - /* VLIB interface for this instance. */ - u32 vlib_hw_if_index, vlib_sw_if_index; - - ixge_dma_queue_t *dma_queues[VLIB_N_RX_TX]; - - /* Phy index (0 or 1) and address on MDI bus. */ - u32 phy_index; - ixge_phy_t phys[2]; - - /* Value of link_status register at last link change. */ - u32 link_status_at_last_link_change; - - i2c_bus_t i2c_bus; - sfp_eeprom_t sfp_eeprom; - - /* Counters. */ - u64 counters[IXGE_N_COUNTER], counters_last_clear[IXGE_N_COUNTER]; -} ixge_device_t; - -typedef struct -{ - vlib_main_t *vlib_main; - - /* Vector of devices. */ - ixge_device_t *devices; - - /* Descriptor ring sizes. */ - u32 n_descriptors[VLIB_N_RX_TX]; - - /* RX buffer size. Must be at least 1k; will be rounded to - next largest 1k size. */ - u32 n_bytes_in_rx_buffer; - - u32 n_descriptors_per_cache_line; - - u32 process_node_index; - - /* Template and mask for initializing/validating TX descriptors. */ - ixge_tx_descriptor_t tx_descriptor_template, tx_descriptor_template_mask; - - /* Vector of buffers for which TX is done and can be freed. */ - u32 *tx_buffers_pending_free; - - u32 *rx_buffers_to_add; - - f64 time_last_stats_update; - -} ixge_main_t; - -extern ixge_main_t ixge_main; -extern vnet_device_class_t ixge_device_class; - -typedef enum -{ - IXGE_RX_NEXT_IP4_INPUT, - IXGE_RX_NEXT_IP6_INPUT, - IXGE_RX_NEXT_ETHERNET_INPUT, - IXGE_RX_NEXT_DROP, - IXGE_RX_N_NEXT, -} ixge_rx_next_t; - -void ixge_set_next_node (ixge_rx_next_t, char *); - -#endif /* included_ixge_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 02e1c8d1517..8e4abc3ae98 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -5109,8 +5109,6 @@ _(gpe_add_del_iface_reply) \ _(gpe_add_del_native_fwd_rpath_reply) \ _(af_packet_delete_reply) \ _(policer_classify_set_interface_reply) \ -_(netmap_create_reply) \ -_(netmap_delete_reply) \ _(set_ipfix_exporter_reply) \ _(set_ipfix_classify_stream_reply) \ _(ipfix_classify_table_add_del_reply) \ @@ -5386,8 +5384,6 @@ _(POLICER_ADD_DEL_REPLY, policer_add_del_reply) \ _(POLICER_DETAILS, policer_details) \ _(POLICER_CLASSIFY_SET_INTERFACE_REPLY, policer_classify_set_interface_reply) \ _(POLICER_CLASSIFY_DETAILS, policer_classify_details) \ -_(NETMAP_CREATE_REPLY, netmap_create_reply) \ -_(NETMAP_DELETE_REPLY, netmap_delete_reply) \ _(MPLS_TUNNEL_DETAILS, mpls_tunnel_details) \ _(MPLS_TABLE_DETAILS, mpls_table_details) \ _(MPLS_ROUTE_DETAILS, mpls_route_details) \ @@ -17516,100 +17512,6 @@ api_policer_classify_dump (vat_main_t * vam) return ret; } -static int -api_netmap_create (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_netmap_create_t *mp; - u8 *if_name = 0; - u8 hw_addr[6]; - u8 random_hw_addr = 1; - u8 is_pipe = 0; - u8 is_master = 0; - int ret; - - clib_memset (hw_addr, 0, sizeof (hw_addr)); - - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "name %s", &if_name)) - vec_add1 (if_name, 0); - else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr)) - random_hw_addr = 0; - else if (unformat (i, "pipe")) - is_pipe = 1; - else if (unformat (i, "master")) - is_master = 1; - else if (unformat (i, "slave")) - is_master = 0; - else - break; - } - - if (!vec_len (if_name)) - { - errmsg ("interface name must be specified"); - return -99; - } - - if (vec_len (if_name) > 64) - { - errmsg ("interface name too long"); - return -99; - } - - M (NETMAP_CREATE, mp); - - clib_memcpy (mp->netmap_if_name, if_name, vec_len (if_name)); - clib_memcpy (mp->hw_addr, hw_addr, 6); - mp->use_random_hw_addr = random_hw_addr; - mp->is_pipe = is_pipe; - mp->is_master = is_master; - vec_free (if_name); - - S (mp); - W (ret); - return ret; -} - -static int -api_netmap_delete (vat_main_t * vam) -{ - unformat_input_t *i = vam->input; - vl_api_netmap_delete_t *mp; - u8 *if_name = 0; - int ret; - - while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) - { - if (unformat (i, "name %s", &if_name)) - vec_add1 (if_name, 0); - else - break; - } - - if (!vec_len (if_name)) - { - errmsg ("interface name must be specified"); - return -99; - } - - if (vec_len (if_name) > 64) - { - errmsg ("interface name too long"); - return -99; - } - - M (NETMAP_DELETE, mp); - - clib_memcpy (mp->netmap_if_name, if_name, vec_len (if_name)); - vec_free (if_name); - - S (mp); - W (ret); - return ret; -} - static u8 * format_fib_api_path_nh_proto (u8 * s, va_list * args) { @@ -21040,9 +20942,6 @@ _(policer_classify_set_interface, \ "<intfc> | sw_if_index <nn> [ip4-table <nn>] [ip6-table <nn>]\n" \ " [l2-table <nn>] [del]") \ _(policer_classify_dump, "type [ip4|ip6|l2]") \ -_(netmap_create, "name <interface name> [hw-addr <mac>] [pipe] " \ - "[master|slave]") \ -_(netmap_delete, "name <interface name>") \ _(mpls_tunnel_dump, "tunnel_index <tunnel-id>") \ _(mpls_table_dump, "") \ _(mpls_route_dump, "table-id <ID>") \ diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index eb96699193c..6f9f6bcc974 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -1090,7 +1090,6 @@ list(APPEND VNET_HEADERS list(APPEND VNET_MULTIARCH_SOURCES devices/virtio/vhost_user_input.c devices/virtio/vhost_user_output.c - devices/netmap/node.c devices/virtio/node.c devices/af_packet/node.c devices/virtio/device.c @@ -1202,7 +1201,6 @@ list(APPEND VNET_SOURCES ) list(APPEND VNET_MULTIARCH_SOURCES - devices/netmap/device.c devices/af_packet/device.c ) @@ -1213,24 +1211,6 @@ list(APPEND VNET_HEADERS list(APPEND VNET_API_FILES devices/af_packet/af_packet.api) ############################################################################## -# NETMAP interface -############################################################################## - -list(APPEND VNET_SOURCES - devices/netmap/netmap.c - devices/netmap/device.c - devices/netmap/node.c - devices/netmap/cli.c - devices/netmap/netmap_api.c -) - -list(APPEND VNET_HEADERS - devices/netmap/netmap.h -) - -list(APPEND VNET_API_FILES devices/netmap/netmap.api) - -############################################################################## # Driver feature graph arc support ############################################################################## diff --git a/src/vnet/devices/netmap/FEATURE.yaml b/src/vnet/devices/netmap/FEATURE.yaml deleted file mode 100644 index e23e5c243e7..00000000000 --- a/src/vnet/devices/netmap/FEATURE.yaml +++ /dev/null @@ -1,12 +0,0 @@ ---- -name: Netmap Device -maintainer: Damjan Marion <damarion@cisco.com> -features: - - L4 checksum offload -description: "Create a netmap interface, which is a high speed user-space - interface that allows VPP to patch into a linux namespace, - a linux container, or a physical NIC without the use of DPDK." -missing: - - API dump -state: production -properties: [API, CLI, STATS, MULTITHREAD] diff --git a/src/vnet/devices/netmap/cli.c b/src/vnet/devices/netmap/cli.c deleted file mode 100644 index 713632947a1..00000000000 --- a/src/vnet/devices/netmap/cli.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ -#include <stdint.h> -#include <net/if.h> -#include <sys/ioctl.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ethernet/ethernet.h> - -#include <vnet/devices/netmap/net_netmap.h> -#include <vnet/devices/netmap/netmap.h> - -static clib_error_t * -netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u8 *host_if_name = NULL; - u8 hwaddr[6]; - u8 *hw_addr_ptr = 0; - int r; - u8 is_pipe = 0; - u8 is_master = 0; - u32 sw_if_index = ~0; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "name %s", &host_if_name)) - ; - else - if (unformat - (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) - hw_addr_ptr = hwaddr; - else if (unformat (line_input, "pipe")) - is_pipe = 1; - else if (unformat (line_input, "master")) - is_master = 1; - else if (unformat (line_input, "slave")) - is_master = 0; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (host_if_name == NULL) - { - error = clib_error_return (0, "missing host interface name"); - goto done; - } - - r = - netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master, - &sw_if_index); - - if (r == VNET_API_ERROR_SYSCALL_ERROR_1) - { - error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); - goto done; - } - - if (r == VNET_API_ERROR_INVALID_INTERFACE) - { - error = clib_error_return (0, "Invalid interface name"); - goto done; - } - - if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - { - error = clib_error_return (0, "Interface already exists"); - goto done; - } - - vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), - sw_if_index); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * '<em>netmap</em>' is a framework for very fast packet I/O from userspace. - * '<em>VALE</em>' is an equally fast in-kernel software switch using the - * netmap API. '<em>netmap</em>' includes '<em>netmap pipes</em>', a shared - * memory packet transport channel. Together, they provide a high speed - * user-space interface that allows VPP to patch into a linux namespace, a - * linux container, or a physical NIC without the use of DPDK. Netmap/VALE - * generates the '<em>netmap.ko</em>' kernel module that needs to be loaded - * before netmap interfaces can be created. - * - https://github.com/luigirizzo/netmap - Netmap/VALE repo. - * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE, - * which is a snapshot of the Netmap/VALE repo with minor changes to work - * with containers and modified kernel drivers to work with NICs. - * - * Create a netmap interface that will attach to a linux interface. - * The interface must already exist. Once created, a new netmap interface - * will exist in VPP with the name '<em>netmap-<ifname></em>', where - * '<em><ifname></em>' takes one of two forms: - * - <b>ifname</b> - Linux interface to bind too. - * - <b>valeXXX:YYY</b> - - * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE - * interface that must start with '<em>vale</em>' and is less - * than 16 characters. - * - Where '<em>YYY</em>' is an existing linux namespace. - * - * This command has the following optional parameters: - * - * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either - * X:X:X:X:X:X unix or X.X.X cisco format. - * - * - <b>pipe</b> - Optional flag to indicate that a '<em>netmap pipe</em>' - * instance should be created. - * - * - <b>master | slave</b> - Optional flag to indicate whether VPP should - * be the master or slave of the '<em>netmap pipe</em>'. Only considered - * if '<em>pipe</em>' is entered. Defaults to '<em>slave</em>' if not entered. - * - * @cliexpar - * Example of how to create a netmap interface tied to the linux - * namespace '<em>vpp1</em>': - * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master} - * netmap-vale00:vpp1 - * @cliexend - * Once the netmap interface is created, enable the interface using: - * @cliexcmd{set interface state netmap-vale00:vpp1 up} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (netmap_create_command, static) = { - .path = "create netmap", - .short_help = "create netmap name <ifname>|valeXXX:YYY " - "[hw-addr <mac-addr>] [pipe] [master|slave]", - .function = netmap_create_command_fn, -}; -/* *INDENT-ON* */ - -static clib_error_t * -netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u8 *host_if_name = NULL; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "name %s", &host_if_name)) - ; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - } - - if (host_if_name == NULL) - { - error = clib_error_return (0, "missing host interface name"); - goto done; - } - - netmap_delete_if (vm, host_if_name); - -done: - unformat_free (line_input); - - return error; -} - -/*? - * Delete a netmap interface. Use the '<em><ifname></em>' to identify - * the netmap interface to be deleted. In VPP, netmap interfaces are - * named as '<em>netmap-<ifname></em>', where '<em><ifname></em>' - * takes one of two forms: - * - <b>ifname</b> - Linux interface to bind too. - * - <b>valeXXX:YYY</b> - - * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE - * interface that must start with '<em>vale</em>' and is less - * than 16 characters. - * - Where '<em>YYY</em>' is an existing linux namespace. - * - * @cliexpar - * Example of how to delete a netmap interface named '<em>netmap-vale00:vpp1</em>': - * @cliexcmd{delete netmap name vale00:vpp1} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (netmap_delete_command, static) = { - .path = "delete netmap", - .short_help = "delete netmap name <ifname>|valeXXX:YYY", - .function = netmap_delete_command_fn, -}; -/* *INDENT-ON* */ - -clib_error_t * -netmap_cli_init (vlib_main_t * vm) -{ - return 0; -} - -VLIB_INIT_FUNCTION (netmap_cli_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/netmap/device.c b/src/vnet/devices/netmap/device.c deleted file mode 100644 index 47407aaaa26..00000000000 --- a/src/vnet/devices/netmap/device.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <stdint.h> -#include <net/if.h> -#include <sys/ioctl.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ethernet/ethernet.h> - -#include <vnet/devices/netmap/net_netmap.h> -#include <vnet/devices/netmap/netmap.h> - -#define foreach_netmap_tx_func_error \ -_(NO_FREE_SLOTS, "no free tx slots") \ -_(PENDING_MSGS, "pending msgs in tx ring") - -typedef enum -{ -#define _(f,s) NETMAP_TX_ERROR_##f, - foreach_netmap_tx_func_error -#undef _ - NETMAP_TX_N_ERROR, -} netmap_tx_func_error_t; - -static char *netmap_tx_func_error_strings[] = { -#define _(n,s) s, - foreach_netmap_tx_func_error -#undef _ -}; - - -static u8 * -format_netmap_device_name (u8 * s, va_list * args) -{ - u32 i = va_arg (*args, u32); - netmap_main_t *apm = &netmap_main; - netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i); - - s = format (s, "netmap-%s", nif->host_if_name); - return s; -} - -static u8 * -format_netmap_device (u8 * s, va_list * args) -{ - u32 dev_instance = va_arg (*args, u32); - int verbose = va_arg (*args, int); - netmap_main_t *nm = &netmap_main; - netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance); - u32 indent = format_get_indent (s); - - s = format (s, "NETMAP interface"); - if (verbose) - { - s = format (s, "\n%U version %d flags 0x%x" - "\n%U region %u memsize 0x%x offset 0x%x" - "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u", - format_white_space, indent + 2, - nif->req->nr_version, - nif->req->nr_flags, - format_white_space, indent + 2, - nif->mem_region, - nif->req->nr_memsize, - nif->req->nr_offset, - format_white_space, indent + 2, - nif->req->nr_tx_slots, - nif->req->nr_rx_slots, - nif->req->nr_tx_rings, nif->req->nr_rx_rings); - } - return s; -} - -static u8 * -format_netmap_tx_trace (u8 * s, va_list * args) -{ - s = format (s, "Unimplemented..."); - return s; -} - -VNET_DEVICE_CLASS_TX_FN (netmap_device_class) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - netmap_main_t *nm = &netmap_main; - u32 *buffers = vlib_frame_vector_args (frame); - u32 n_left = frame->n_vectors; - f64 const time_constant = 1e3; - vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; - netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance); - int cur_ring; - - clib_spinlock_lock_if_init (&nif->lockp); - - cur_ring = nif->first_tx_ring; - - while (n_left && cur_ring <= nif->last_tx_ring) - { - struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring); - int n_free_slots = nm_ring_space (ring); - uint cur = ring->cur; - - if (nm_tx_pending (ring)) - { - if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0) - clib_unix_warning ("NIOCTXSYNC"); - clib_cpu_time_wait (time_constant); - - if (nm_tx_pending (ring) && !n_free_slots) - { - cur_ring++; - continue; - } - } - - while (n_left && n_free_slots) - { - vlib_buffer_t *b0 = 0; - u32 bi = buffers[0]; - u32 len; - u32 offset = 0; - buffers++; - - struct netmap_slot *slot = &ring->slot[cur]; - - do - { - b0 = vlib_get_buffer (vm, bi); - len = b0->current_length; - /* memcpy */ - clib_memcpy_fast ((u8 *) NETMAP_BUF (ring, slot->buf_idx) + - offset, vlib_buffer_get_current (b0), len); - offset += len; - } - while ((bi = b0->next_buffer)); - - slot->len = offset; - cur = (cur + 1) % ring->num_slots; - n_free_slots--; - n_left--; - } - CLIB_MEMORY_BARRIER (); - ring->head = ring->cur = cur; - } - - if (n_left < frame->n_vectors) - ioctl (nif->fd, NIOCTXSYNC, NULL); - - clib_spinlock_unlock_if_init (&nif->lockp); - - if (n_left) - vlib_error_count (vm, node->node_index, - (n_left == - frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS : - NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left); - - vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); - return frame->n_vectors; -} - -static void -netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, - u32 node_index) -{ - netmap_main_t *apm = &netmap_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); - - /* Shut off redirection */ - if (node_index == ~0) - { - nif->per_interface_next_index = node_index; - return; - } - - nif->per_interface_next_index = - vlib_node_add_next (vlib_get_main (), netmap_input_node.index, - node_index); -} - -static void -netmap_clear_hw_interface_counters (u32 instance) -{ - /* Nothing for now */ -} - -static clib_error_t * -netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) -{ - netmap_main_t *apm = &netmap_main; - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); - u32 hw_flags; - - nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - - if (nif->is_admin_up) - hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; - else - hw_flags = 0; - - vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); - - return 0; -} - -static clib_error_t * -netmap_subif_add_del_function (vnet_main_t * vnm, - u32 hw_if_index, - struct vnet_sw_interface_t *st, int is_add) -{ - /* Nothing for now */ - return 0; -} - -/* *INDENT-OFF* */ -VNET_DEVICE_CLASS (netmap_device_class) = { - .name = "netmap", - .format_device_name = format_netmap_device_name, - .format_device = format_netmap_device, - .format_tx_trace = format_netmap_tx_trace, - .tx_function_n_errors = NETMAP_TX_N_ERROR, - .tx_function_error_strings = netmap_tx_func_error_strings, - .rx_redirect_to_node = netmap_set_interface_next_node, - .clear_counters = netmap_clear_hw_interface_counters, - .admin_up_down_function = netmap_interface_admin_up_down, - .subif_add_del_function = netmap_subif_add_del_function, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/netmap/dir.dox b/src/vnet/devices/netmap/dir.dox deleted file mode 100644 index 7ddbf947c29..00000000000 --- a/src/vnet/devices/netmap/dir.dox +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Doxygen directory documentation */ - -/** -@dir -@brief netmap Interface Implementation. - -This directory contains the source code for the netmap driver. - -*/ -/*? %%clicmd:group_label netmap %% ?*/ -/*? %%syscfg:group_label netmap %% ?*/ diff --git a/src/vnet/devices/netmap/net_netmap.h b/src/vnet/devices/netmap/net_netmap.h deleted file mode 100644 index fd4253b7c0c..00000000000 --- a/src/vnet/devices/netmap/net_netmap.h +++ /dev/null @@ -1,650 +0,0 @@ -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ - * - * Definitions of constants and the structures used by the netmap - * framework, for the part visible to both kernel and userspace. - * Detailed info on netmap is available with "man netmap" or at - * - * http://info.iet.unipi.it/~luigi/netmap/ - * - * This API is also used to communicate with the VALE software switch - */ - -#ifndef _NET_NETMAP_H_ -#define _NET_NETMAP_H_ - -#define NETMAP_API 11 /* current API version */ - -#define NETMAP_MIN_API 11 /* min and max versions accepted */ -#define NETMAP_MAX_API 15 -/* - * Some fields should be cache-aligned to reduce contention. - * The alignment is architecture and OS dependent, but rather than - * digging into OS headers to find the exact value we use an estimate - * that should cover most architectures. - */ -#define NM_CACHE_ALIGN 128 - -/* - * --- Netmap data structures --- - * - * The userspace data structures used by netmap are shown below. - * They are allocated by the kernel and mmap()ed by userspace threads. - * Pointers are implemented as memory offsets or indexes, - * so that they can be easily dereferenced in kernel and userspace. - - KERNEL (opaque, obviously) - - ==================================================================== - | - USERSPACE | struct netmap_ring - +---->+---------------+ - / | head,cur,tail | - struct netmap_if (nifp, 1 per fd) / | buf_ofs | - +---------------+ / | other fields | - | ni_tx_rings | / +===============+ - | ni_rx_rings | / | buf_idx, len | slot[0] - | | / | flags, ptr | - | | / +---------------+ - +===============+ / | buf_idx, len | slot[1] - | txring_ofs[0] | (rel.to nifp)--' | flags, ptr | - | txring_ofs[1] | +---------------+ - (tx+1 entries) (num_slots entries) - | txring_ofs[t] | | buf_idx, len | slot[n-1] - +---------------+ | flags, ptr | - | rxring_ofs[0] | +---------------+ - | rxring_ofs[1] | - (rx+1 entries) - | rxring_ofs[r] | - +---------------+ - - * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to - * a file descriptor, the mmap()ed region contains a (logically readonly) - * struct netmap_if pointing to struct netmap_ring's. - * - * There is one netmap_ring per physical NIC ring, plus one tx/rx ring - * pair attached to the host stack (this pair is unused for non-NIC ports). - * - * All physical/host stack ports share the same memory region, - * so that zero-copy can be implemented between them. - * VALE switch ports instead have separate memory regions. - * - * The netmap_ring is the userspace-visible replica of the NIC ring. - * Each slot has the index of a buffer (MTU-sized and residing in the - * mmapped region), its length and some flags. An extra 64-bit pointer - * is provided for user-supplied buffers in the tx path. - * - * In user space, the buffer address is computed as - * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE - * - * Added in NETMAP_API 11: - * - * + NIOCREGIF can request the allocation of extra spare buffers from - * the same memory pool. The desired number of buffers must be in - * nr_arg3. The ioctl may return fewer buffers, depending on memory - * availability. nr_arg3 will return the actual value, and, once - * mapped, nifp->ni_bufs_head will be the index of the first buffer. - * - * The buffers are linked to each other using the first uint32_t - * as the index. On close, ni_bufs_head must point to the list of - * buffers to be released. - * - * + NIOCREGIF can request space for extra rings (and buffers) - * allocated in the same memory space. The number of extra rings - * is in nr_arg1, and is advisory. This is a no-op on NICs where - * the size of the memory space is fixed. - * - * + NIOCREGIF can attach to PIPE rings sharing the same memory - * space with a parent device. The ifname indicates the parent device, - * which must already exist. Flags in nr_flags indicate if we want to - * bind the master or slave side, the index (from nr_ringid) - * is just a cookie and does not need to be sequential. - * - * + NIOCREGIF can also attach to 'monitor' rings that replicate - * the content of specific rings, also from the same memory space. - * - * Extra flags in nr_flags support the above functions. - * Application libraries may use the following naming scheme: - * netmap:foo all NIC ring pairs - * netmap:foo^ only host ring pair - * netmap:foo+ all NIC ring + host ring pairs - * netmap:foo-k the k-th NIC ring pair - * netmap:foo{k PIPE ring pair k, master side - * netmap:foo}k PIPE ring pair k, slave side - */ - -/* - * struct netmap_slot is a buffer descriptor - */ -struct netmap_slot { - uint32_t buf_idx; /* buffer index */ - uint16_t len; /* length for this slot */ - uint16_t flags; /* buf changed, etc. */ - uint64_t ptr; /* pointer for indirect buffers */ -}; - -/* - * The following flags control how the slot is used - */ - -#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */ - /* - * must be set whenever buf_idx is changed (as it might be - * necessary to recompute the physical address and mapping) - * - * It is also set by the kernel whenever the buf_idx is - * changed internally (e.g., by pipes). Applications may - * use this information to know when they can reuse the - * contents of previously prepared buffers. - */ - -#define NS_REPORT 0x0002 /* ask the hardware to report results */ - /* - * Request notification when slot is used by the hardware. - * Normally transmit completions are handled lazily and - * may be unreported. This flag lets us know when a slot - * has been sent (e.g. to terminate the sender). - */ - -#define NS_FORWARD 0x0004 /* pass packet 'forward' */ - /* - * (Only for physical ports, rx rings with NR_FORWARD set). - * Slot released to the kernel (i.e. before ring->head) with - * this flag set are passed to the peer ring (host/NIC), - * thus restoring the host-NIC connection for these slots. - * This supports efficient traffic monitoring or firewalling. - */ - -#define NS_NO_LEARN 0x0008 /* disable bridge learning */ - /* - * On a VALE switch, do not 'learn' the source port for - * this buffer. - */ - -#define NS_INDIRECT 0x0010 /* userspace buffer */ - /* - * (VALE tx rings only) data is in a userspace buffer, - * whose address is in the 'ptr' field in the slot. - */ - -#define NS_MOREFRAG 0x0020 /* packet has more fragments */ - /* - * (VALE ports only) - * Set on all but the last slot of a multi-segment packet. - * The 'len' field refers to the individual fragment. - */ - -#define NS_PORT_SHIFT 8 -#define NS_PORT_MASK (0xff << NS_PORT_SHIFT) - /* - * The high 8 bits of the flag, if not zero, indicate the - * destination port for the VALE switch, overriding - * the lookup table. - */ - -#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff) - /* - * (VALE rx rings only) the high 8 bits - * are the number of fragments. - */ - - -/* - * struct netmap_ring - * - * Netmap representation of a TX or RX ring (also known as "queue"). - * This is a queue implemented as a fixed-size circular array. - * At the software level the important fields are: head, cur, tail. - * - * In TX rings: - * - * head first slot available for transmission. - * cur wakeup point. select() and poll() will unblock - * when 'tail' moves past 'cur' - * tail (readonly) first slot reserved to the kernel - * - * [head .. tail-1] can be used for new packets to send; - * 'head' and 'cur' must be incremented as slots are filled - * with new packets to be sent; - * 'cur' can be moved further ahead if we need more space - * for new transmissions. XXX todo (2014-03-12) - * - * In RX rings: - * - * head first valid received packet - * cur wakeup point. select() and poll() will unblock - * when 'tail' moves past 'cur' - * tail (readonly) first slot reserved to the kernel - * - * [head .. tail-1] contain received packets; - * 'head' and 'cur' must be incremented as slots are consumed - * and can be returned to the kernel; - * 'cur' can be moved further ahead if we want to wait for - * new packets without returning the previous ones. - * - * DATA OWNERSHIP/LOCKING: - * The netmap_ring, and all slots and buffers in the range - * [head .. tail-1] are owned by the user program; - * the kernel only accesses them during a netmap system call - * and in the user thread context. - * - * Other slots and buffers are reserved for use by the kernel - */ -struct netmap_ring { - /* - * buf_ofs is meant to be used through macros. - * It contains the offset of the buffer region from this - * descriptor. - */ - const int64_t buf_ofs; - const uint32_t num_slots; /* number of slots in the ring. */ - const uint32_t nr_buf_size; - const uint16_t ringid; - const uint16_t dir; /* 0: tx, 1: rx */ - - uint32_t head; /* (u) first user slot */ - uint32_t cur; /* (u) wakeup point */ - uint32_t tail; /* (k) first kernel slot */ - - uint32_t flags; - - struct timeval ts; /* (k) time of last *sync() */ - - /* opaque room for a mutex or similar object */ -#if !defined(_WIN32) || defined(__CYGWIN__) - uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; -#else - uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128]; -#endif - - /* the slots follow. This struct has variable size */ - struct netmap_slot slot[0]; /* array of slots. */ -}; - - -/* - * RING FLAGS - */ -#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ - /* - * updates the 'ts' field on each netmap syscall. This saves - * saves a separate gettimeofday(), and is not much worse than - * software timestamps generated in the interrupt handler. - */ - -#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */ - /* - * Enables the NS_FORWARD slot flag for the ring. - */ - - -/* - * Netmap representation of an interface and its queue(s). - * This is initialized by the kernel when binding a file - * descriptor to a port, and should be considered as readonly - * by user programs. The kernel never uses it. - * - * There is one netmap_if for each file descriptor on which we want - * to select/poll. - * select/poll operates on one or all pairs depending on the value of - * nmr_queueid passed on the ioctl. - */ -struct netmap_if { - char ni_name[IFNAMSIZ]; /* name of the interface. */ - const uint32_t ni_version; /* API version, currently unused */ - const uint32_t ni_flags; /* properties */ -#define NI_PRIV_MEM 0x1 /* private memory region */ - - /* - * The number of packet rings available in netmap mode. - * Physical NICs can have different numbers of tx and rx rings. - * Physical NICs also have a 'host' ring pair. - * Additionally, clients can request additional ring pairs to - * be used for internal communication. - */ - const uint32_t ni_tx_rings; /* number of HW tx rings */ - const uint32_t ni_rx_rings; /* number of HW rx rings */ - - uint32_t ni_bufs_head; /* head index for extra bufs */ - uint32_t ni_spare1[5]; - /* - * The following array contains the offset of each netmap ring - * from this structure, in the following order: - * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; - * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. - * - * The area is filled up by the kernel on NIOCREGIF, - * and then only read by userspace code. - */ - const ssize_t ring_ofs[0]; -}; - - -#ifndef NIOCREGIF -/* - * ioctl names and related fields - * - * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, - * whose identity is set in NIOCREGIF through nr_ringid. - * These are non blocking and take no argument. - * - * NIOCGINFO takes a struct ifreq, the interface name is the input, - * the outputs are number of queues and number of descriptor - * for each queue (useful to set number of threads etc.). - * The info returned is only advisory and may change before - * the interface is bound to a file descriptor. - * - * NIOCREGIF takes an interface name within a struct nmre, - * and activates netmap mode on the interface (if possible). - * - * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we - * can pass it down to other NIC-related ioctls. - * - * The actual argument (struct nmreq) has a number of options to request - * different functions. - * The following are used in NIOCREGIF when nr_cmd == 0: - * - * nr_name (in) - * The name of the port (em0, valeXXX:YYY, etc.) - * limited to IFNAMSIZ for backward compatibility. - * - * nr_version (in/out) - * Must match NETMAP_API as used in the kernel, error otherwise. - * Always returns the desired value on output. - * - * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) - * On input, non-zero values may be used to reconfigure the port - * according to the requested values, but this is not guaranteed. - * On output the actual values in use are reported. - * - * nr_ringid (in) - * Indicates how rings should be bound to the file descriptors. - * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK) - * are used to indicate the ring number, and nr_flags specifies - * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected. - * - * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED: - * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control - * the binding as follows: - * 0 (default) binds all physical rings - * NETMAP_HW_RING | ring number binds a single ring pair - * NETMAP_SW_RING binds only the host tx/rx rings - * - * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push - * packets on tx rings only if POLLOUT is set. - * The default is to push any pending packet. - * - * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release - * packets on rx rings also when POLLIN is NOT set. - * The default is to touch the rx ring only with POLLIN. - * Note that this is the opposite of TX because it - * reflects the common usage. - * - * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead. - * NETMAP_PRIV_MEM is set on return for ports that do not use - * the global memory allocator. - * This information is not significant and applications - * should look at the region id in nr_arg2 - * - * nr_flags is the recommended mode to indicate which rings should - * be bound to a file descriptor. Values are NR_REG_* - * - * nr_arg1 (in) The number of extra rings to be reserved. - * Especially when allocating a VALE port the system only - * allocates the amount of memory needed for the port. - * If more shared memory rings are desired (e.g. for pipes), - * the first invocation for the same basename/allocator - * should specify a suitable number. Memory cannot be - * extended after the first allocation without closing - * all ports on the same region. - * - * nr_arg2 (in/out) The identity of the memory region used. - * On input, 0 means the system decides autonomously, - * other values may try to select a specific region. - * On return the actual value is reported. - * Region '1' is the global allocator, normally shared - * by all interfaces. Other values are private regions. - * If two ports the same region zero-copy is possible. - * - * nr_arg3 (in/out) number of extra buffers to be allocated. - * - * - * - * nr_cmd (in) if non-zero indicates a special command: - * NETMAP_BDG_ATTACH and nr_name = vale*:ifname - * attaches the NIC to the switch; nr_ringid specifies - * which rings to use. Used by vale-ctl -a ... - * nr_arg1 = NETMAP_BDG_HOST also attaches the host port - * as in vale-ctl -h ... - * - * NETMAP_BDG_DETACH and nr_name = vale*:ifname - * disconnects a previously attached NIC. - * Used by vale-ctl -d ... - * - * NETMAP_BDG_LIST - * list the configuration of VALE switches. - * - * NETMAP_BDG_VNET_HDR - * Set the virtio-net header length used by the client - * of a VALE switch port. - * - * NETMAP_BDG_NEWIF - * create a persistent VALE port with name nr_name. - * Used by vale-ctl -n ... - * - * NETMAP_BDG_DELIF - * delete a persistent VALE port. Used by vale-ctl -d ... - * - * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific - * - * - * - */ - - -/* - * struct nmreq overlays a struct ifreq (just the name) - */ -struct nmreq { - char nr_name[IFNAMSIZ]; - uint32_t nr_version; /* API version */ - uint32_t nr_offset; /* nifp offset in the shared region */ - uint32_t nr_memsize; /* size of the shared region */ - uint32_t nr_tx_slots; /* slots in tx rings */ - uint32_t nr_rx_slots; /* slots in rx rings */ - uint16_t nr_tx_rings; /* number of tx rings */ - uint16_t nr_rx_rings; /* number of rx rings */ - - uint16_t nr_ringid; /* ring(s) we care about */ -#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */ -#define NETMAP_SW_RING 0x2000 /* only host ring pair */ - -#define NETMAP_RING_MASK 0x0fff /* the ring number */ - -#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ - -#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */ - - uint16_t nr_cmd; -#define NETMAP_BDG_ATTACH 1 /* attach the NIC */ -#define NETMAP_BDG_DETACH 2 /* detach the NIC */ -#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */ -#define NETMAP_BDG_LIST 4 /* get bridge's info */ -#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */ -#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ -#define NETMAP_BDG_NEWIF 6 /* create a virtual port */ -#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */ -#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */ -#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */ -#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */ -#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ -#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ - uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ -#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ - - uint16_t nr_arg2; - uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */ - uint32_t nr_flags; - /* various modes, extends nr_ringid */ - uint32_t spare2[1]; -}; - -#define NR_REG_MASK 0xf /* values for nr_flags */ -enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ - NR_REG_ALL_NIC = 1, - NR_REG_SW = 2, - NR_REG_NIC_SW = 3, - NR_REG_ONE_NIC = 4, - NR_REG_PIPE_MASTER = 5, - NR_REG_PIPE_SLAVE = 6, -}; -/* monitor uses the NR_REG to select the rings to monitor */ -#define NR_MONITOR_TX 0x100 -#define NR_MONITOR_RX 0x200 -#define NR_ZCOPY_MON 0x400 -/* request exclusive access to the selected rings */ -#define NR_EXCLUSIVE 0x800 -/* request ptnetmap host support */ -#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ -#define NR_PTNETMAP_HOST 0x1000 -#define NR_RX_RINGS_ONLY 0x2000 -#define NR_TX_RINGS_ONLY 0x4000 -/* Applications set this flag if they are able to deal with virtio-net headers, - * that is send/receive frames that start with a virtio-net header. - * If not set, NIOCREGIF will fail with netmap ports that require applications - * to use those headers. If the flag is set, the application can use the - * NETMAP_VNET_HDR_GET command to figure out the header length. */ -#define NR_ACCEPT_VNET_HDR 0x8000 - - -/* - * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined - * in ws2def.h but not sure if they are in the form we need. - * XXX so we redefine them - * in a convenient way to use for DeviceIoControl signatures - */ -#ifdef _WIN32 -#undef _IO // ws2def.h -#define _WIN_NM_IOCTL_TYPE 40000 -#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ - METHOD_BUFFERED, FILE_ANY_ACCESS ) -#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ - METHOD_OUT_DIRECT, FILE_ANY_ACCESS ) - -#define _IOWR(_c, _n, _s) _IO(_c, _n) - -/* We havesome internal sysctl in addition to the externally visible ones */ -#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT -#define NETMAP_POLL _IO('i', 162) - -/* and also two setsockopt for sysctl emulation */ -#define NETMAP_SETSOCKOPT _IO('i', 140) -#define NETMAP_GETSOCKOPT _IO('i', 141) - - -//These linknames are for the Netmap Core Driver -#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP" -#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap" - -//Definition of a structure used to pass a virtual address within an IOCTL -typedef struct _MEMORY_ENTRY { - PVOID pUsermodeVirtualAddress; -} MEMORY_ENTRY, *PMEMORY_ENTRY; - -typedef struct _POLL_REQUEST_DATA { - int events; - int timeout; - int revents; -} POLL_REQUEST_DATA; - -#endif /* _WIN32 */ - -/* - * FreeBSD uses the size value embedded in the _IOWR to determine - * how much to copy in/out. So we need it to match the actual - * data structure we pass. We put some spares in the structure - * to ease compatibility with other versions - */ -#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ -#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ -#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ -#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ -#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */ -#endif /* !NIOCREGIF */ - - -/* - * Helper functions for kernel and userspace - */ - -/* - * check if space is available in the ring. - */ -static inline int -nm_ring_empty(struct netmap_ring *ring) -{ - return (ring->cur == ring->tail); -} - -/* - * Opaque structure that is passed to an external kernel - * module via ioctl(fd, NIOCCONFIG, req) for a user-owned - * bridge port (at this point ephemeral VALE interface). - */ -#define NM_IFRDATA_LEN 256 -struct nm_ifreq { - char nifr_name[IFNAMSIZ]; - char data[NM_IFRDATA_LEN]; -}; - -/* - * netmap kernel thread configuration - */ -/* bhyve/vmm.ko MSIX parameters for IOCTL */ -struct ptn_vmm_ioctl_msix { - uint64_t msg; - uint64_t addr; -}; - -/* IOCTL parameters */ -struct nm_kth_ioctl { - u_long com; - /* TODO: use union */ - union { - struct ptn_vmm_ioctl_msix msix; - } data; -}; - -/* Configuration of a ptnetmap ring */ -struct ptnet_ring_cfg { - uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */ - uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */ - struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */ -}; -#endif /* _NET_NETMAP_H_ */ diff --git a/src/vnet/devices/netmap/netmap.api b/src/vnet/devices/netmap/netmap.api deleted file mode 100644 index a14753cad9c..00000000000 --- a/src/vnet/devices/netmap/netmap.api +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -option version = "1.0.0"; - -/** \brief Create netmap - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param netmap_if_name - interface name - @param hw_addr - interface MAC - @param use_random_hw_addr - use random generated MAC - @param is_pipe - is pipe - @param is_master - 0=slave, 1=master -*/ -autoreply define netmap_create -{ - u32 client_index; - u32 context; - - u8 netmap_if_name[64]; - u8 hw_addr[6]; - u8 use_random_hw_addr; - u8 is_pipe; - u8 is_master; -}; - -/** \brief Delete netmap - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param netmap_if_name - interface name -*/ -autoreply define netmap_delete -{ - u32 client_index; - u32 context; - - u8 netmap_if_name[64]; -}; - -/* - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/netmap/netmap.c b/src/vnet/devices/netmap/netmap.c deleted file mode 100644 index 03d96216bb0..00000000000 --- a/src/vnet/devices/netmap/netmap.c +++ /dev/null @@ -1,314 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <stdint.h> -#include <net/if.h> -#include <sys/ioctl.h> -#include <sys/types.h> -#include <fcntl.h> -#include <vnet/devices/netmap/net_netmap.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/devices/netmap/netmap.h> - -netmap_main_t netmap_main; - -static u32 -netmap_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, - u32 flags) -{ - /* nothing for now */ - return 0; -} - -static clib_error_t * -netmap_fd_read_ready (clib_file_t * uf) -{ - vlib_main_t *vm = vlib_get_main (); - netmap_main_t *nm = &netmap_main; - u32 idx = uf->private_data; - - nm->pending_input_bitmap = - clib_bitmap_set (nm->pending_input_bitmap, idx, 1); - - /* Schedule the rx node */ - vlib_node_set_interrupt_pending (vm, netmap_input_node.index); - - return 0; -} - -static void -close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) -{ - if (nif->clib_file_index != ~0) - { - clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index); - nif->clib_file_index = ~0; - } - else if (nif->fd > -1) - close (nif->fd); - - if (nif->mem_region) - { - netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region]; - if (--reg->refcnt == 0) - { - munmap (reg->mem, reg->region_size); - reg->region_size = 0; - } - } - - - mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name, - &nif->if_index); - vec_free (nif->host_if_name); - vec_free (nif->req); - - clib_memset (nif, 0, sizeof (*nif)); - pool_put (nm->interfaces, nif); -} - -int -netmap_worker_thread_enable () -{ - /* if worker threads are enabled, switch to polling mode */ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - netmap_input_node.index, - VLIB_NODE_STATE_POLLING); - })); - - return 0; -} - -int -netmap_worker_thread_disable () -{ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - netmap_input_node.index, - VLIB_NODE_STATE_INTERRUPT); - })); - - return 0; -} - -int -netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, - u8 is_pipe, u8 is_master, u32 * sw_if_index) -{ - netmap_main_t *nm = &netmap_main; - int ret = 0; - netmap_if_t *nif = 0; - u8 hw_addr[6]; - clib_error_t *error = 0; - vnet_sw_interface_t *sw; - vnet_main_t *vnm = vnet_get_main (); - uword *p; - struct nmreq *req = 0; - netmap_mem_region_t *reg; - vlib_thread_main_t *tm = vlib_get_thread_main (); - int fd; - - p = mhash_get (&nm->if_index_by_host_if_name, if_name); - if (p) - return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; - - fd = open ("/dev/netmap", O_RDWR); - if (fd < 0) - return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; - - pool_get (nm->interfaces, nif); - nif->if_index = nif - nm->interfaces; - nif->fd = fd; - nif->clib_file_index = ~0; - - vec_validate (req, 0); - nif->req = req; - req->nr_version = NETMAP_API; - req->nr_flags = NR_REG_ALL_NIC; - - if (is_pipe) - req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE; - else - req->nr_flags = NR_REG_ALL_NIC; - - req->nr_flags |= NR_ACCEPT_VNET_HDR; - snprintf (req->nr_name, IFNAMSIZ, "%s", if_name); - req->nr_name[IFNAMSIZ - 1] = 0; - - if (ioctl (nif->fd, NIOCREGIF, req)) - { - ret = VNET_API_ERROR_NOT_CONNECTED; - goto error; - } - - nif->mem_region = req->nr_arg2; - vec_validate (nm->mem_regions, nif->mem_region); - reg = &nm->mem_regions[nif->mem_region]; - if (reg->region_size == 0) - { - reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - clib_warning ("mem %p", reg->mem); - if (reg->mem == MAP_FAILED) - { - ret = VNET_API_ERROR_NOT_CONNECTED; - goto error; - } - reg->region_size = req->nr_memsize; - } - reg->refcnt++; - - nif->nifp = NETMAP_IF (reg->mem, req->nr_offset); - nif->first_rx_ring = 0; - nif->last_rx_ring = 0; - nif->first_tx_ring = 0; - nif->last_tx_ring = 0; - nif->host_if_name = if_name; - nif->per_interface_next_index = ~0; - - if (tm->n_vlib_mains > 1) - clib_spinlock_init (&nif->lockp); - - { - clib_file_t template = { 0 }; - template.read_function = netmap_fd_read_ready; - template.file_descriptor = nif->fd; - template.private_data = nif->if_index; - nif->clib_file_index = clib_file_add (&file_main, &template); - } - - /*use configured or generate random MAC address */ - if (hw_addr_set) - memcpy (hw_addr, hw_addr_set, 6); - else - { - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - - memcpy (hw_addr + 2, &rnd, sizeof (rnd)); - hw_addr[0] = 2; - hw_addr[1] = 0xfe; - } - - error = ethernet_register_interface (vnm, netmap_device_class.index, - nif->if_index, hw_addr, - &nif->hw_if_index, - netmap_eth_flag_change); - - if (error) - { - clib_error_report (error); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - - sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index); - nif->sw_if_index = sw->sw_if_index; - - mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0); - - if (sw_if_index) - *sw_if_index = nif->sw_if_index; - - if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1) - netmap_worker_thread_enable (); - - return 0; - -error: - close_netmap_if (nm, nif); - return ret; -} - -int -netmap_delete_if (vlib_main_t * vm, u8 * host_if_name) -{ - vnet_main_t *vnm = vnet_get_main (); - netmap_main_t *nm = &netmap_main; - netmap_if_t *nif; - uword *p; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - p = mhash_get (&nm->if_index_by_host_if_name, host_if_name); - if (p == NULL) - { - clib_warning ("Host interface %s does not exist", host_if_name); - return VNET_API_ERROR_SYSCALL_ERROR_1; - } - nif = pool_elt_at_index (nm->interfaces, p[0]); - - /* bring down the interface */ - vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0); - - ethernet_delete_interface (vnm, nif->hw_if_index); - - close_netmap_if (nm, nif); - - if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0) - netmap_worker_thread_disable (); - - return 0; -} - -static clib_error_t * -netmap_init (vlib_main_t * vm) -{ - netmap_main_t *nm = &netmap_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - uword *p; - - clib_memset (nm, 0, sizeof (netmap_main_t)); - - nm->input_cpu_first_index = 0; - nm->input_cpu_count = 1; - - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - nm->input_cpu_first_index = tr->first_index; - nm->input_cpu_count = tr->count; - } - - mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword)); - - vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - return 0; -} - -VLIB_INIT_FUNCTION (netmap_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/netmap/netmap.h b/src/vnet/devices/netmap/netmap.h deleted file mode 100644 index 29f855fda8e..00000000000 --- a/src/vnet/devices/netmap/netmap.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ -/* - * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <vppinfra/lock.h> - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - clib_spinlock_t lockp; - u8 *host_if_name; - uword if_index; - u32 hw_if_index; - u32 sw_if_index; - u32 clib_file_index; - - u32 per_interface_next_index; - u8 is_admin_up; - - /* netmap */ - struct nmreq *req; - u16 mem_region; - int fd; - struct netmap_if *nifp; - u16 first_tx_ring; - u16 last_tx_ring; - u16 first_rx_ring; - u16 last_rx_ring; - -} netmap_if_t; - -typedef struct -{ - char *mem; - u32 region_size; - int refcnt; -} netmap_mem_region_t; - -typedef struct -{ - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - netmap_if_t *interfaces; - - /* bitmap of pending rx interfaces */ - uword *pending_input_bitmap; - - /* rx buffer cache */ - u32 **rx_buffers; - - /* hash of host interface names */ - mhash_t if_index_by_host_if_name; - - /* vector of memory regions */ - netmap_mem_region_t *mem_regions; - - /* first cpu index */ - u32 input_cpu_first_index; - - /* total cpu count */ - u32 input_cpu_count; -} netmap_main_t; - -extern netmap_main_t netmap_main; -extern vnet_device_class_t netmap_device_class; -extern vlib_node_registration_t netmap_input_node; - -int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, - u8 is_pipe, u8 is_master, u32 * sw_if_index); -int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name); - - -/* Macros and helper functions from sys/net/netmap_user.h */ - -#ifdef _NET_NETMAP_H_ - -#define _NETMAP_OFFSET(type, ptr, offset) \ - ((type)(void *)((char *)(ptr) + (offset))) - -#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) - -#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ - nifp, (nifp)->ring_ofs[index] ) - -#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ - nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) - -#define NETMAP_BUF(ring, index) \ - ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) - -#define NETMAP_BUF_IDX(ring, buf) \ - ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ - (ring)->nr_buf_size ) - -static inline uint32_t -nm_ring_next (struct netmap_ring *ring, uint32_t i) -{ - return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1); -} - - -/* - * Return 1 if we have pending transmissions in the tx ring. - * When everything is complete ring->head = ring->tail + 1 (modulo ring size) - */ -static inline int -nm_tx_pending (struct netmap_ring *ring) -{ - return nm_ring_next (ring, ring->tail) != ring->head; -} - -static inline uint32_t -nm_ring_space (struct netmap_ring *ring) -{ - int ret = ring->tail - ring->cur; - if (ret < 0) - ret += ring->num_slots; - return ret; -} -#endif - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/netmap/netmap_api.c b/src/vnet/devices/netmap/netmap_api.c deleted file mode 100644 index ee05ec22d25..00000000000 --- a/src/vnet/devices/netmap/netmap_api.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - *------------------------------------------------------------------ - * netmap_api.c - netmap api - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <vnet/vnet.h> -#include <vlibmemory/api.h> - -#include <vnet/interface.h> -#include <vnet/api_errno.h> -#include <vnet/devices/netmap/netmap.h> - -#include <vnet/vnet_msg_enum.h> - -#define vl_typedefs /* define message structures */ -#include <vnet/vnet_all_api_h.h> -#undef vl_typedefs - -#define vl_endianfun /* define message structures */ -#include <vnet/vnet_all_api_h.h> -#undef vl_endianfun - -/* instantiate all the print functions we know about */ -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) -#define vl_printfun -#include <vnet/vnet_all_api_h.h> -#undef vl_printfun - -#include <vlibapi/api_helper_macros.h> - -#define foreach_vpe_api_msg \ -_(NETMAP_CREATE, netmap_create) \ -_(NETMAP_DELETE, netmap_delete) \ - -static void -vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - vl_api_netmap_create_reply_t *rmp; - int rv = 0; - u8 *if_name = NULL; - - if_name = format (0, "%s", mp->netmap_if_name); - vec_add1 (if_name, 0); - - rv = - netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr, - mp->is_pipe, mp->is_master, 0); - - vec_free (if_name); - - REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY); -} - -static void -vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp) -{ - vlib_main_t *vm = vlib_get_main (); - vl_api_netmap_delete_reply_t *rmp; - int rv = 0; - u8 *if_name = NULL; - - if_name = format (0, "%s", mp->netmap_if_name); - vec_add1 (if_name, 0); - - rv = netmap_delete_if (vm, if_name); - - vec_free (if_name); - - REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY); -} - -/* - * netmap_api_hookup - * Add vpe's API message handlers to the table. - * vlib has already mapped shared memory and - * added the client registration handlers. - * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() - */ -#define vl_msg_name_crc_list -#include <vnet/vnet_all_api_h.h> -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (api_main_t * am) -{ -#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); - foreach_vl_msg_name_crc_netmap; -#undef _ -} - -static clib_error_t * -netmap_api_hookup (vlib_main_t * vm) -{ - api_main_t *am = vlibapi_get_main (); - -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vpe_api_msg; -#undef _ - - /* - * Set up the (msg_name, crc, message-id) table - */ - setup_message_id_table (am); - - return 0; -} - -VLIB_API_INIT_FUNCTION (netmap_api_hookup); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c deleted file mode 100644 index bc55ecc8eb1..00000000000 --- a/src/vnet/devices/netmap/node.c +++ /dev/null @@ -1,298 +0,0 @@ -/* - *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------ - */ - -#include <stdint.h> -#include <net/if.h> -#include <sys/ioctl.h> - -#include <vlib/vlib.h> -#include <vlib/unix/unix.h> -#include <vnet/ethernet/ethernet.h> -#include <vnet/devices/devices.h> -#include <vnet/feature/feature.h> - -#include <vnet/devices/netmap/net_netmap.h> -#include <vnet/devices/netmap/netmap.h> - -#define foreach_netmap_input_error - -typedef enum -{ -#define _(f,s) NETMAP_INPUT_ERROR_##f, - foreach_netmap_input_error -#undef _ - NETMAP_INPUT_N_ERROR, -} netmap_input_error_t; - -static char *netmap_input_error_strings[] = { -#define _(n,s) s, - foreach_netmap_input_error -#undef _ -}; - -typedef struct -{ - u32 next_index; - u32 hw_if_index; - struct netmap_slot slot; -} netmap_input_trace_t; - -static u8 * -format_netmap_input_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *); - u32 indent = format_get_indent (s); - - s = format (s, "netmap: hw_if_index %d next-index %d", - t->hw_if_index, t->next_index); - s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u", - format_white_space, indent + 2, - t->slot.flags, t->slot.len, t->slot.buf_idx); - return s; -} - -always_inline void -buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) -{ - vlib_buffer_t *b = vlib_get_buffer (vm, bi); - vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); - vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); - - /* update first buffer */ - first_b->total_length_not_including_first_buffer += b->current_length; - - /* update previous buffer */ - prev_b->next_buffer = bi; - prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; - - /* update current buffer */ - b->next_buffer = 0; -} - -always_inline uword -netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, netmap_if_t * nif) -{ - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - uword n_trace = vlib_get_trace_count (vm, node); - netmap_main_t *nm = &netmap_main; - u32 n_rx_packets = 0; - u32 n_rx_bytes = 0; - u32 *to_next = 0; - u32 n_free_bufs; - struct netmap_ring *ring; - int cur_ring; - u32 thread_index = vm->thread_index; - u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); - - if (nif->per_interface_next_index != ~0) - next_index = nif->per_interface_next_index; - - n_free_bufs = vec_len (nm->rx_buffers[thread_index]); - if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) - { - vec_validate (nm->rx_buffers[thread_index], - VLIB_FRAME_SIZE + n_free_bufs - 1); - n_free_bufs += - vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs], - VLIB_FRAME_SIZE); - _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs; - } - - cur_ring = nif->first_rx_ring; - while (cur_ring <= nif->last_rx_ring && n_free_bufs) - { - int r = 0; - u32 cur_slot_index; - ring = NETMAP_RXRING (nif->nifp, cur_ring); - r = nm_ring_space (ring); - - if (!r) - { - cur_ring++; - continue; - } - - if (r > n_free_bufs) - r = n_free_bufs; - - cur_slot_index = ring->cur; - while (r) - { - u32 n_left_to_next; - u32 next0 = next_index; - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (r && n_left_to_next) - { - vlib_buffer_t *first_b0 = 0; - u32 offset = 0; - u32 bi0 = 0, first_bi0 = 0, prev_bi0; - u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots; - u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots; - struct netmap_slot *slot = &ring->slot[cur_slot_index]; - u32 data_len = slot->len; - - /* prefetch 2 slots in advance */ - CLIB_PREFETCH (&ring->slot[next2_slot_index], - CLIB_CACHE_LINE_BYTES, LOAD); - /* prefetch start of next packet */ - CLIB_PREFETCH (NETMAP_BUF - (ring, ring->slot[next_slot_index].buf_idx), - CLIB_CACHE_LINE_BYTES, LOAD); - - while (data_len && n_free_bufs) - { - vlib_buffer_t *b0; - /* grab free buffer */ - u32 last_empty_buffer = - vec_len (nm->rx_buffers[thread_index]) - 1; - prev_bi0 = bi0; - bi0 = nm->rx_buffers[thread_index][last_empty_buffer]; - b0 = vlib_get_buffer (vm, bi0); - _vec_len (nm->rx_buffers[thread_index]) = last_empty_buffer; - n_free_bufs--; - - /* copy data */ - u32 bytes_to_copy = - data_len > n_buffer_bytes ? n_buffer_bytes : data_len; - b0->current_data = 0; - clib_memcpy_fast (vlib_buffer_get_current (b0), - (u8 *) NETMAP_BUF (ring, slot->buf_idx) + - offset, bytes_to_copy); - - /* fill buffer header */ - b0->current_length = bytes_to_copy; - - if (offset == 0) - { - b0->total_length_not_including_first_buffer = 0; - b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; - vnet_buffer (b0)->sw_if_index[VLIB_RX] = - nif->sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - first_bi0 = bi0; - first_b0 = vlib_get_buffer (vm, first_bi0); - } - else - buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0); - - offset += bytes_to_copy; - data_len -= bytes_to_copy; - } - - /* trace */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0); - if (PREDICT_FALSE (n_trace > 0)) - { - if (PREDICT_TRUE (first_b0 != 0)) - { - netmap_input_trace_t *tr; - vlib_trace_buffer (vm, node, next0, first_b0, - /* follow_chain */ 0); - vlib_set_trace_count (vm, node, --n_trace); - tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); - tr->next_index = next0; - tr->hw_if_index = nif->hw_if_index; - memcpy (&tr->slot, slot, sizeof (struct netmap_slot)); - } - } - - /* redirect if feature path enabled */ - vnet_feature_start_device_input_x1 (nif->sw_if_index, &next0, - first_b0); - - /* enque and take next packet */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, first_bi0, - next0); - - /* next packet */ - n_rx_packets++; - n_rx_bytes += slot->len; - to_next[0] = first_bi0; - to_next += 1; - n_left_to_next--; - cur_slot_index = next_slot_index; - - r--; - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - ring->head = ring->cur = cur_slot_index; - cur_ring++; - } - - if (n_rx_packets) - ioctl (nif->fd, NIOCRXSYNC, NULL); - - vlib_increment_combined_counter - (vnet_get_main ()->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, - vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes); - - vnet_device_increment_rx_packets (thread_index, n_rx_packets); - - return n_rx_packets; -} - -VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - int i; - u32 n_rx_packets = 0; - u32 thread_index = vm->thread_index; - netmap_main_t *nm = &netmap_main; - netmap_if_t *nmi; - - for (i = 0; i < vec_len (nm->interfaces); i++) - { - nmi = vec_elt_at_index (nm->interfaces, i); - if (nmi->is_admin_up && - (i % nm->input_cpu_count) == - (thread_index - nm->input_cpu_first_index)) - n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi); - } - - return n_rx_packets; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (netmap_input_node) = { - .name = "netmap-input", - .sibling_of = "device-input", - .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, - .format_trace = format_netmap_input_trace, - .type = VLIB_NODE_TYPE_INPUT, - /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */ - .state = VLIB_NODE_STATE_INTERRUPT, - .n_errors = NETMAP_INPUT_N_ERROR, - .error_strings = netmap_input_error_strings, -}; -/* *INDENT-ON* */ - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index a0bf8f72d14..400caf5b513 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -35,7 +35,6 @@ #include <vnet/bonding/bond.api.h> #include <vnet/devices/af_packet/af_packet.api.h> -#include <vnet/devices/netmap/netmap.api.h> #include <vnet/devices/virtio/vhost_user.api.h> #include <vnet/devices/tap/tapv2.api.h> #include <vnet/gre/gre.api.h> diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api index deab6a22593..7846b8f34d7 100644 --- a/src/vpp/api/vpe.api +++ b/src/vpp/api/vpe.api @@ -32,7 +32,6 @@ import "vpp/api/vpe_types.api"; * GENEVE APIs: see .../src/vnet/geneve/{geneve.api, geneve_api.c} * LLDP APIs: see .../src/vnet/lldp/{lldp.api, lldp_api.c} * AF-PACKET APIs: see ... /vnet/devices/af_packet/{af_packet.api, af_packet_api.c} - * NETMAP APIs: see ... /src/vnet/devices/netmap/{netmap.api, netmap_api.c} * VHOST-USER APIs: see .../vnet/devices/virtio/{vhost_user.api, vhost_user_api.c} * VXLAN GPE APIs: see .../src/vnet/vxlan-gpe/{vxlan_gpe.api, vxlan_gpe_api.c} * GRE APIs: see .../src/vnet/gre/{gre.api, gre_api.c} |