/* * Copyright (c) 2016 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * pci.c: Linux user space PCI bus management. * * Copyright (c) 2008 Eliot Dresselhaus * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static const char *sysfs_pci_dev_path = "/sys/bus/pci/devices"; static const char *sysfs_pci_drv_path = "/sys/bus/pci/drivers"; static char *sysfs_mod_vfio_noiommu = "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"; #define pci_log_debug(vm, dev, f, ...) \ vlib_log(VLIB_LOG_LEVEL_DEBUG, pci_main.log_default, "%U: " f, \ format_vlib_pci_addr, vlib_pci_get_addr(vm, dev->handle), ## __VA_ARGS__) #define pci_log_err(vm, dev, f, ...) \ vlib_log(VLIB_LOG_LEVEL_ERR, pci_main.log_default, "%U: " f, \ format_vlib_pci_addr, vlib_pci_get_addr(vm, dev->handle), ## __VA_ARGS__) typedef struct { int fd; void *addr; size_t size; } linux_pci_region_t; typedef struct { int fd; u32 clib_file_index; union { pci_intx_handler_function_t *intx_handler; pci_msix_handler_function_t *msix_handler; }; } linux_pci_irq_t; typedef enum { LINUX_PCI_DEVICE_TYPE_UNKNOWN, LINUX_PCI_DEVICE_TYPE_UIO, LINUX_PCI_DEVICE_TYPE_VFIO, } linux_pci_device_type_t; typedef struct { linux_pci_device_type_t type; vlib_pci_dev_handle_t handle; vlib_pci_addr_t addr; u32 numa_node; /* Resource file descriptors. */ linux_pci_region_t *regions; /* File descriptor for config space read/write. */ int config_fd; u64 config_offset; /* Device File descriptor */ int fd; /* read/write file descriptor for io bar */ int io_fd; u64 io_offset; /* Minor device for uio device. */ u32 uio_minor; /* Interrupt handlers */ linux_pci_irq_t intx_irq; linux_pci_irq_t *msix_irqs; /* private data */ uword private_data; u8 supports_va_dma; } linux_pci_device_t; /* Pool of PCI devices. */ typedef struct { vlib_main_t *vlib_main; linux_pci_device_t *linux_pci_devices; } linux_pci_main_t; extern linux_pci_main_t linux_pci_main; static linux_pci_device_t * linux_pci_get_device (vlib_pci_dev_handle_t h) { linux_pci_main_t *lpm = &linux_pci_main; return pool_elt_at_index (lpm->linux_pci_devices, h); } uword vlib_pci_get_private_data (vlib_main_t * vm, vlib_pci_dev_handle_t h) { linux_pci_device_t *d = linux_pci_get_device (h); return d->private_data; } void vlib_pci_set_private_data (vlib_main_t * vm, vlib_pci_dev_handle_t h, uword private_data) { linux_pci_device_t *d = linux_pci_get_device (h); d->private_data = private_data; } vlib_pci_addr_t * vlib_pci_get_addr (vlib_main_t * vm, vlib_pci_dev_handle_t h) { linux_pci_device_t *d = linux_pci_get_device (h); return &d->addr; } u32 vlib_pci_get_numa_node (vlib_main_t * vm, vlib_pci_dev_handle_t h) { linux_pci_device_t *d = linux_pci_get_device (h); return d->numa_node; } u32 vlib_pci_get_num_msix_interrupts (vlib_main_t * vm, vlib_pci_dev_handle_t h) { linux_pci_device_t *d = linux_pci_get_device (h); if (d->type == LINUX_PCI_DEVICE_TYPE_VFIO) { struct vfio_irq_info ii = { 0 }; ii.argsz = sizeof (struct vfio_irq_info); ii.index = VFIO_PCI_MSIX_IRQ_INDEX; if (ioctl (d->fd, VFIO_DEVICE_GET_IRQ_INFO, &ii) < 0) return 0; return ii.count; } return 0; } /* Call to allocate/initialize the pci subsystem. This is not an init function so that users can explicitly enable pci only when it's needed. */ clib_error_t *pci_bus_init (vlib_main_t * vm); linux_pci_main_t linux_pci_main; vlib_pci_device_info_t * vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr, clib_error_t ** error) { clib_error_t *err; vlib_pci_device_info_t *di; u8 *f = 0; u32 tmp; int fd; u8 *tmpstr; clib_bitmap_t *bmp = 0; di = clib_mem_alloc (sizeof (vlib_pci_device_info_t)); clib_memset (di, 0, sizeof (vlib_pci_device_info_t)); di->addr.as_u32 = addr->as_u32; u8 *dev_dir_name = format (0, "%s/%U", sysfs_pci_dev_path, format_vlib_pci_addr, addr); f = format (0, "%v/config%c", dev_dir_name, 0); fd = open ((char *) f, O_RDWR); /* Try read-only access if write fails. */ if (fd < 0) fd = open ((char *) f, O_RDONLY); if (fd < 0) { err = clib_error_return_unix (0, "open `%s'", f); goto error; } /* You can only read more that 64 bytes of config space as root; so we try to read the full space but fall back to just the first 64 bytes. */ if (read (fd, &di->config_data, sizeof (di->config_data)) < sizeof (di->config0)) { err = clib_error_return_unix (0, "read `%s'", f); close (fd); goto error; } { static pci_config_header_t all_ones; if (all_ones.vendor_id == 0) clib_memset (&all_ones, ~0, sizeof (all_ones)); if (!memcmp (&di->config0.header, &all_ones, sizeof (all_ones))) { err = clib_error_return (0, "invalid PCI config for `%s'", f); close (fd); goto error; } } if (di->config0.header.header_type == 0) pci_config_type0_little_to_host (&di->config0); else pci_config_type1_little_to_host (&di->config1); di->numa_node = -1; vec_reset_length (f); f = format (f, "%v/numa_node%c", dev_dir_name, 0); err = clib_sysfs_read ((char *) f, "%d", &di->numa_node); if (err) { di->numa_node = -1; clib_error_free (err); } if (di->numa_node == -1) { /* if '/sys/bus/pci/devices//numa_node' returns -1 and it is a SMP system, set numa_node to 0. */ if ((err = clib_sysfs_read ("/sys/devices/system/node/online", "%U", unformat_bitmap_list, &bmp))) clib_error_free (err); if (clib_bitmap_count_set_bits (bmp) == 1) di->numa_node = 0; } vec_reset_length (f); f = format (f, "%v/class%c", dev_dir_name, 0); err = clib_sysfs_read ((char *) f, "0x%x", &tmp); if (err) goto error; di->device_class = tmp >> 8; vec_reset_length (f); f = format (f, "%v/vendor%c", dev_dir_name, 0); err = clib_sysfs_read ((char *) f, "0x%x", &tmp); if (err) goto error; di->vendor_id = tmp; vec_reset_length (f); f = format (f, "%v/device%c", dev_dir_name, 0); err = clib_sysfs_read ((char *) f, "0x%x", &tmp); if (err) goto error; di->device_id = tmp; vec_reset_length (f); f = format (f, "%v/driver%c", dev_dir_name, 0); di->driver_name = clib_sysfs_link_to_name ((char *) f); if (!di->driver_name) di->driver_name = format (0, "%c", 0); di->iommu_group = -1; vec_reset_length (f); f = format (f, "%v/iommu_group%c", dev_dir_name, 0); tmpstr = clib_sysfs_link_to_name ((char *) f); if (tmpstr) { di->iommu_group = atoi ((char *) tmpstr); vec_free (tmpstr); } vec_reset_length (f); f = format (f, "%v/iommu_group/name%c", dev_dir_name, 0); err = clib_sysfs_read ((char *) f, "%s", &tmpstr); if (err == 0) { if (strncmp ((char *) tmpstr, "vfio-noiommu", 12) == 0) di->flags |= VLIB_PCI_DEVICE_INFO_F_NOIOMMU; vec_free (tmpstr); } else clib_error_free (err); close (fd); vec_reset_length (f); f = format (f, "%v/vpd%c", dev_dir_name, 0); fd = open ((char *) f, O_RDONLY); if (fd >= 0) { while (1) { u8 tag[3]; u8 *data = 0; uword len; if (read (fd, &tag, 3) != 3) break; if (tag[0] != 0x82 && tag[0] != 0x90 && tag[0] != 0x91) break; len = (tag[2] << 8) | tag[1]; vec_validate (data, len); if (read (fd, data, len) != len) { vec_free (data); break; } if (tag[0] == 0x82) di->product_name = data; else if (tag[0] == 0x90) di->vpd_r = data; else if (tag[0] == 0x91) di->vpd_w = data; data = 0; } close (fd); } goto done; error: vlib_pci_free_device_info (di); di = 0; done: vec_free (bmp); vec_free (f); vec_free (dev_dir_name); if (error) *error = err; else clib_error_free (err); return di; } static int directory_exists (char *path) { struct stat s = { 0 }; if (stat (path, &s) == -1) return 0; return S_ISDIR (s.st_mode); } clib_error_t * vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr, char *uio_drv_name) { clib_error_t *error = 0; u8 *s = 0, *driver_name = 0; DIR *dir = 0; struct dirent *e; vlib_pci_device_info_t *di; int fd, clear_driver_override = 0; u8 *dev_dir_name = format (0, "%s/%U", sysfs_pci_dev_path, format_vlib_pci_addr, addr); di = vlib_pci_get_device_info (vm, addr, &error); if (error) return error; if (strncmp ("auto", uio_drv_name, 5) == 0) { int vfio_pci_loaded = 0; if (directory_exists ("/sys/module/vfio_pci")) vfio_pci_loaded = 1; if (di->iommu_group != -1) { /* device is bound to IOMMU group */ if
#!/usr/bin/env python3

import unittest
import os
from socket import AF_INET6, inet_pton, inet_ntop

from framework import tag_fixme_vpp_workers
from framework import VppTestCase, VppTestRunner
from vpp_neighbor import VppNeighbor, find_nbr
from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, \
    VppIpTable, DpoProto, FibPathType, VppIpInterfaceAddress
from vpp_papi import VppEnum
from vpp_ip import VppIpPuntRedirect

import scapy.compat
from scapy.packet import Raw
from scapy.layers.l2 import Ether, ARP, Dot1Q
from scapy.layers.inet import IP, UDP, TCP
from scapy.layers.inet6 import IPv6, ipv6nh, ICMPv6ND_NS, ICMPv6ND_NA, \
    ICMPv6NDOptSrcLLAddr, ICMPv6NDOptDstLLAddr, ICMPv6EchoRequest, \
    ICMPv6EchoReply
from scapy.utils6 import in6_ptop, in6_getnsma, in6_getnsmac, in6_ismaddr


class TestNDPROXY(VppTestCase):
    """ IP6 ND (mirror) Proxy Test Case """

    @classmethod
    def setUpClass(self):
        super(TestNDPROXY, self).setUpClass()
        self.create_pg_interfaces(range(2))

    @classmethod
    def tearDownClass(self):
        super(TestNDPROXY, self).tearDownClass()

    def setUp(self):
        super(TestNDPROXY, self).setUp()
        for i in self.pg_interfaces:
            i.admin_up()
            i.config_ip6()
            i.disable_ipv6_ra()

    def tearDown(self):
        super(TestNDPROXY, self).tearDown()
        if not self.vpp_dead:
            for i in self.pg_interfaces:
                i.unconfig_ip6()
                i.admin_down()

    def test_nd_mirror_proxy(self):
        """ Interface (Mirror) Proxy ND """

        #
        # When VPP has an interface whose address is also applied to a TAP
        # interface on the host, then VPP's TAP interface will be unnumbered
        # to the 'real' interface and do proxy ND from the host.
        # the curious aspect of this setup is that ND requests from the host
        # will come from the VPP's own address.
        #
        addr = self.pg0.remote_ip6
        nsma = in6_getnsma(inet_pton(socket.AF_INET6, addr))
        d = inet_ntop(socket.AF_INET6, nsma)

        # Make pg1 un-numbered to pg0
        #
        self.pg1.unconfig_ip6()
        self.pg1.set_unnumbered(self.pg0.sw_if_index)

        #
        # Enable ND proxy on pg1
        #
        self.vapi.ip6nd_proxy_enable_disable(sw_if_index=self.pg1.sw_if_index,
                                             is_enable=1)
        #
        # Send the ND request with an originating address that
        # is VPP's own address
        #
        nd_req_from_host = (Ether(src=self.pg1.remote_mac,
                                  dst=in6_getnsmac(nsma)) /
                            IPv6(dst=d, src=self.pg0.local_ip6) /
                            ICMPv6ND_NS(tgt=addr) /
                            ICMPv6NDOptSrcLLAddr(lladdr=self.pg1.remote_mac))

        rx = self.send_and_expect(self.pg1, [nd_req_from_host], self.pg1)
        self.assertEqual(rx[0][Ether].src, self.pg1.local_mac)
        self.assertEqual(rx[0][Ether].dst, self.pg1.remote_mac)
        self.assertEqual(rx[0][IPv6].src, self.pg0.remote_ip6)
        self.assertEqual(rx[0][IPv6].dst, self.pg0.local_ip6)
        self.assertEqual(ipv6nh[rx[0][IPv6].nh], "ICMPv6")
        self.assertEqual(rx[0][ICMPv6ND_NA].tgt, self.pg0.remote_ip6)
        self.assertTrue(rx[0].haslayer(ICMPv6NDOptDstLLAddr))
        self.assertEqual(rx[0][ICMPv6NDOptDstLLAddr].lladdr,
                         self.pg1.local_mac)

        #
        # Send the unicast ND request
        #
        unicast_nd_req_from_host = (Ether(src=self.pg1.remote_mac,
                                          dst=self.pg1.local_mac) /
                                    IPv6(dst=self.pg0.remote_ip6,
                                         src=self.pg1.remote_ip6_ll) /
                                    ICMPv6ND_NS(tgt=self.pg0.remote_ip6) /
                                    ICMPv6NDOptSrcLLAddr(
                                         lladdr=self.pg1.remote_mac))

        rx = self.send_and_expect(self.pg1, [unicast_nd_req_from_host],
                                  self.pg0)
        self.assertEqual(rx[0][Ether].src, self.pg0.local_mac)
        self.assertEqual(rx[0][Ether].dst, in6_getnsmac(nsma))
        self.assertEqual(rx[0][IPv6].src, self.pg0.local_ip6)
        self.assertEqual(rx[0][