aboutsummaryrefslogtreecommitdiffstats
path: root/vlib/vlib/unix/pci.c
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-04-13 18:03:20 +0200
committerGerrit Code Review <gerrit@fd.io>2016-04-14 23:29:13 +0000
commita42cd34f106869d5afc26f5b5db7e0cb2f73ae97 (patch)
tree0c2b31263d7d77a57db3b56dc0736a1d8ef96e07 /vlib/vlib/unix/pci.c
parent550b5f62528c435e4b9d41729f1d92e8ed9e161a (diff)
Rework of DPDK PCI device uio driver binding process
This is complete rework of DPDK PCI initialization. It drops previous scheme where lspci/route/awk/sed are used and instead sysfs is solely used for discovering Ethernet PCI devices. Criteria for blacklisting device is changed from exsiting routing table entry to simple interface state obtained by SIOCGIFFLAGS ioctl(). It checks for IFF_UP flag, so as long as interface is declared up and even when carrier is down interface will be blacklisted. Change-Id: I59961ddcf1c19c728934e7fe746f343983741bf1 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'vlib/vlib/unix/pci.c')
-rw-r--r--vlib/vlib/unix/pci.c235
1 files changed, 131 insertions, 104 deletions
diff --git a/vlib/vlib/unix/pci.c b/vlib/vlib/unix/pci.c
index 02c37f72707..75241f3f1c6 100644
--- a/vlib/vlib/unix/pci.c
+++ b/vlib/vlib/unix/pci.c
@@ -46,87 +46,105 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
linux_pci_main_t linux_pci_main;
-static clib_error_t *
-foreach_directory_file (char * dir_name,
- clib_error_t * (* f) (void * arg, u8 * path_name, u8 * file_name),
- void * arg,
- int scan_dirs)
+clib_error_t *
+vlib_pci_bind_to_uio (vlib_pci_device_t * d, char * uio_driver_name)
{
- DIR * d;
- struct dirent * e;
clib_error_t * error = 0;
- u8 * s, * t;
+ u8 *s = 0;
+ DIR *dir = 0;
+ struct dirent *e;
+ int fd;
+ pci_config_header_t * c;
+ u8 * dev_dir_name = format(0, "/sys/bus/pci/devices/%U",
+ format_vlib_pci_addr, &d->bus_address);
+
+ c = &d->config0.header;
+
+ /* if uio sub-directory exists, we are fine, device is
+ already bound to UIO driver */
+ s = format (s, "%v/uio%c", dev_dir_name, 0);
+ if (access ( (char *) s, F_OK) == 0)
+ goto done;
+ vec_reset_length (s);
+
+ /* walk trough all linux interfaces and if interface belonging to
+ this device is founf check if interface is admin up */
+ dir = opendir ("/sys/class/net");
+ s = format(s, "%U%c", format_vlib_pci_addr, &d->bus_address, 0);
- d = opendir (dir_name);
- if (! d)
+ if (!dir)
{
- /* System has no PCI bus. */
- if (errno == ENOENT)
- return 0;
- return clib_error_return_unix (0, "open `%s'", dir_name);
+ error = clib_error_return (0, "Skipping PCI device %U: failed to "
+ "read /sys/class/net",
+ format_vlib_pci_addr, &d->bus_address);
+ goto done;
}
- s = t = 0;
- while (1)
+ fd = socket(PF_INET, SOCK_DGRAM, 0);
+
+ while((e = readdir (dir)))
{
- e = readdir (d);
- if (! e)
- break;
- if (scan_dirs)
- {
- if (e->d_type == DT_DIR
- && (! strcmp (e->d_name, ".")
- || ! strcmp (e->d_name, "..")))
- continue;
- }
- else
- {
- if (e->d_type == DT_DIR)
- continue;
- }
+ struct ifreq ifr;
+ struct ethtool_drvinfo drvinfo;
- s = format (s, "%s/%s", dir_name, e->d_name);
- t = format (t, "%s", e->d_name);
- error = f (arg, s, t);
- _vec_len (s) = 0;
- _vec_len (t) = 0;
+ if (e->d_name[0] == '.') /* skip . and .. */
+ continue;
- if (error)
- break;
- }
+ memset(&ifr, 0, sizeof ifr);
+ memset(&drvinfo, 0, sizeof drvinfo);
+ ifr.ifr_data = (char *) &drvinfo;
+ strncpy(ifr.ifr_name, e->d_name, IFNAMSIZ);
+ drvinfo.cmd = ETHTOOL_GDRVINFO;
+ ioctl (fd, SIOCETHTOOL, &ifr);
- vec_free (s);
- closedir (d);
+ if (strcmp ((char *) s, drvinfo.bus_info))
+ continue;
- return error;
-}
+ memset (&ifr, 0, sizeof(ifr));
+ strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ);
+ ioctl (fd, SIOCGIFFLAGS, &ifr);
+ close (fd);
-static clib_error_t *
-write_sys_fs (char * file_name, char * fmt, ...)
-{
- u8 * s;
- int fd;
+ if (ifr.ifr_flags & IFF_UP)
+ {
+ error = clib_error_return (0, "Skipping PCI device %U as host "
+ "interface %s is up",
+ format_vlib_pci_addr, &d->bus_address,
+ e->d_name);
+ goto done;
+ }
+ }
- fd = open (file_name, O_WRONLY);
- if (fd < 0)
- return clib_error_return_unix (0, "open `%s'", file_name);
+ close (fd);
+ vec_reset_length (s);
+
+ s = format (s, "%v/driver/unbind%c", dev_dir_name, 0);
+ write_sys_fs ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
+ vec_reset_length (s);
- va_list va;
- va_start (va, fmt);
- s = va_format (0, fmt, &va);
- va_end (va);
+ s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0);
+ write_sys_fs ((char *) s, "0x%04x 0x%04x", c->vendor_id, c->device_id);
+ vec_reset_length (s);
- if (write (fd, s, vec_len (s)) < 0)
- return clib_error_return_unix (0, "write `%s'", file_name);
+ s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0);
+ write_sys_fs ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
+done:
+ closedir (dir);
vec_free (s);
- close (fd);
- return 0;
+ vec_free (dev_dir_name);
+ return error;
}
+
static clib_error_t *
scan_uio_dir (void * arg, u8 * path_name, u8 * file_name)
{
@@ -149,7 +167,7 @@ static clib_error_t * linux_pci_uio_read_ready (unix_file_t * uf)
linux_pci_device_t * l;
u32 li = uf->private_data;
- l = pool_elt_at_index (pm->pci_devices, li);
+ l = pool_elt_at_index (pm->linux_pci_devices, li);
vlib_node_set_interrupt_pending (vm, l->device_input_node_index);
/* Let node know which device is interrupting. */
@@ -176,7 +194,7 @@ static uword pci_resource_size (uword os_handle, uword resource)
struct stat b;
uword result = 0;
- p = pool_elt_at_index (pm->pci_devices, os_handle);
+ p = pool_elt_at_index (pm->linux_pci_devices, os_handle);
file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0);
if (stat ((char *) file_name, &b) >= 0)
@@ -193,7 +211,7 @@ void os_add_pci_disable_interrupts_reg (uword os_handle, u32 resource,
char * file_name;
clib_error_t * error;
- l = pool_elt_at_index (pm->pci_devices, os_handle);
+ l = pool_elt_at_index (pm->linux_pci_devices, os_handle);
ASSERT (resource == 0);
ASSERT (reg_offset < pci_resource_size (os_handle, resource));
file_name = (char *) format (0, "%s/disable_interrupt_regs%c", l->dev_dir_name, 0);
@@ -203,7 +221,7 @@ void os_add_pci_disable_interrupts_reg (uword os_handle, u32 resource,
vec_free (file_name);
}
-static void add_device (pci_device_t * dev, linux_pci_device_t * pdev)
+static void add_device (vlib_pci_device_t * dev, linux_pci_device_t * pdev)
{
linux_pci_main_t * pm = &linux_pci_main;
linux_pci_device_t * l;
@@ -213,30 +231,12 @@ static void add_device (pci_device_t * dev, linux_pci_device_t * pdev)
c = &dev->config0.header;
- pool_get (pm->pci_devices, l);
+ pool_get (pm->linux_pci_devices, l);
l[0] = pdev[0];
l->dev_dir_name = vec_dup (l->dev_dir_name);
- /* Parse bus, dev, function from directory name. */
- {
- unformat_input_t input;
-
- unformat_init_string (&input, (char *) l->dev_dir_name,
- vec_len (l->dev_dir_name));
-
- if (! unformat (&input, "/sys/bus/pci/devices/%x:%x:%x.%x",
- &x[0], &x[1], &x[2], &x[3]))
- abort ();
-
- unformat_free (&input);
-
- l->bus_address.bus = x[1];
- l->bus_address.slot_function = (x[2] << 3) | x[3];
- dev->bus_address = l->bus_address;
- }
-
- dev->os_handle = l - pm->pci_devices;
+ dev->os_handle = l - pm->linux_pci_devices;
error = write_sys_fs ("/sys/bus/pci/drivers/uio_pci_dma/new_id",
"%x %x", c->vendor_id, c->device_id);
@@ -269,7 +269,7 @@ static void add_device (pci_device_t * dev, linux_pci_device_t * pdev)
template.read_function = linux_pci_uio_read_ready;
template.file_descriptor = l->uio_fd;
template.error_function = linux_pci_uio_error_ready;
- template.private_data = l - pm->pci_devices;
+ template.private_data = l - pm->linux_pci_devices;
/* To be filled in by driver. */
l->device_input_node_index = ~0;
@@ -305,7 +305,7 @@ os_read_write_pci_config (uword os_handle,
linux_pci_device_t * p;
int n;
- p = pool_elt_at_index (pm->pci_devices, os_handle);
+ p = pool_elt_at_index (pm->linux_pci_devices, os_handle);
if (address != lseek (p->config_fd, address, SEEK_SET))
return clib_error_return_unix (0, "seek offset %d", address);
@@ -338,7 +338,7 @@ os_map_pci_resource_internal (uword os_handle,
int flags = MAP_SHARED;
error = 0;
- p = pool_elt_at_index (pm->pci_devices, os_handle);
+ p = pool_elt_at_index (pm->linux_pci_devices, os_handle);
file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0);
fd = open ((char *) file_name, O_RDWR);
@@ -404,9 +404,9 @@ void os_free_pci_device (uword os_handle)
linux_pci_main_t * pm = &linux_pci_main;
linux_pci_device_t * l;
- l = pool_elt_at_index (pm->pci_devices, os_handle);
+ l = pool_elt_at_index (pm->linux_pci_devices, os_handle);
linux_pci_device_free (l);
- pool_put (pm->pci_devices, l);
+ pool_put (pm->linux_pci_devices, l);
}
u8 * format_os_pci_handle (u8 * s, va_list * va)
@@ -415,10 +415,9 @@ u8 * format_os_pci_handle (u8 * s, va_list * va)
uword os_pci_handle = va_arg (*va, uword);
linux_pci_device_t * l;
- l = pool_elt_at_index (pm->pci_devices, os_pci_handle);
+ l = pool_elt_at_index (pm->linux_pci_devices, os_pci_handle);
return format (s, "%x/%x/%x", l->bus_address.bus,
- (l->bus_address.slot_function >> 3),
- (l->bus_address.slot_function & 0x7));
+ l->bus_address.slot, l->bus_address.function);
}
static inline pci_device_registration_t *
@@ -450,17 +449,17 @@ static inline u8 kernel_driver_installed (pci_device_registration_t *r)
static clib_error_t *
init_device_from_registered (vlib_main_t * vm,
- pci_device_t * dev,
+ vlib_pci_device_t * dev,
linux_pci_device_t * pdev)
{
- unix_main_t * um = vlib_unix_get_main();
+ linux_pci_main_t * lpm = &linux_pci_main;
pci_device_registration_t * r;
pci_device_id_t * i;
pci_config_header_t * c;
c = &dev->config0.header;
- r = um->pci_device_registrations;
+ r = lpm->pci_device_registrations;
while (r)
{
@@ -490,7 +489,7 @@ init_device_from_registered (vlib_main_t * vm,
static clib_error_t *
init_device (vlib_main_t * vm,
- pci_device_t * dev,
+ vlib_pci_device_t * dev,
linux_pci_device_t * pdev)
{
return init_device_from_registered (vm, dev, pdev);
@@ -500,10 +499,11 @@ static clib_error_t *
scan_device (void * arg, u8 * dev_dir_name, u8 * ignored)
{
vlib_main_t * vm = arg;
+ linux_pci_main_t * pm = &linux_pci_main;
int fd;
u8 * f;
clib_error_t * error = 0;
- pci_device_t dev = {0};
+ vlib_pci_device_t * dev;
linux_pci_device_t pdev = {0};
f = format (0, "%v/config%c", dev_dir_name, 0);
@@ -519,11 +519,14 @@ scan_device (void * arg, u8 * dev_dir_name, u8 * ignored)
goto done;
}
+ pool_get (pm->pci_devs, dev);
+
/* You can only read more that 64 bytes of config space as root; so we try to
read the full space but fall back to just the first 64 bytes. */
- if (read (fd, &dev.config_data, sizeof (dev.config_data)) != sizeof (dev.config_data)
- && read (fd, &dev.config0, sizeof (dev.config0)) != sizeof (dev.config0))
+ if (read (fd, &dev->config_data, sizeof (dev->config_data)) != sizeof (dev->config_data)
+ && read (fd, &dev->config0, sizeof (dev->config0)) != sizeof (dev->config0))
{
+ pool_put (pm->pci_devs, dev);
error = clib_error_return_unix (0, "read `%s'", f);
goto done;
}
@@ -532,23 +535,44 @@ scan_device (void * arg, u8 * dev_dir_name, u8 * ignored)
static pci_config_header_t all_ones;
if (all_ones.vendor_id == 0)
memset (&all_ones, ~0, sizeof (all_ones));
-
- if (! memcmp (&dev.config0.header, &all_ones, sizeof (all_ones)))
+
+ if (! memcmp (&dev->config0.header, &all_ones, sizeof (all_ones)))
{
+ pool_put (pm->pci_devs, dev);
error = clib_error_return (0, "invalid PCI config for `%s'", f);
goto done;
}
}
- if (dev.config0.header.header_type == 0)
- pci_config_type0_little_to_host (&dev.config0);
+ if (dev->config0.header.header_type == 0)
+ pci_config_type0_little_to_host (&dev->config0);
else
- pci_config_type1_little_to_host (&dev.config1);
+ pci_config_type1_little_to_host (&dev->config1);
+
+ /* Parse bus, dev, function from directory name. */
+ {
+ unformat_input_t input;
+
+ unformat_init_string (&input, (char *) dev_dir_name,
+ vec_len (dev_dir_name));
+
+ if (! unformat (&input, "/sys/bus/pci/devices/%U",
+ unformat_vlib_pci_addr, &dev->bus_address))
+ abort ();
+
+ unformat_free (&input);
+
+ pdev.bus_address = dev->bus_address;
+ }
+
pdev.config_fd = fd;
pdev.dev_dir_name = dev_dir_name;
- error = init_device (vm, &dev, &pdev);
+ hash_set(pm->pci_dev_index_by_pci_addr, dev->bus_address.as_u32,
+ dev - pm->pci_devs);
+
+ error = init_device (vm, dev, &pdev);
done:
vec_free (f);
@@ -565,6 +589,9 @@ clib_error_t * pci_bus_init (vlib_main_t * vm)
if ((error = vlib_call_init_function (vm, unix_input_init)))
return error;
+ ASSERT(sizeof(vlib_pci_addr_t) == sizeof(u32));
+ pm->pci_dev_index_by_pci_addr = hash_create (0, sizeof (uword));
+
error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm, /* scan_dirs */ 0);
/* Complain and continue. might not be root, etc. */