aboutsummaryrefslogtreecommitdiffstats
path: root/src/vlib/linux
diff options
context:
space:
mode:
authorStephen Hemminger <stephen@networkplumber.org>2018-10-15 12:52:30 -0700
committerDamjan Marion <damarion@cisco.com>2018-12-19 08:34:39 +0100
commit6fbef2322870bbe1768537caecc3ca06bfd70dd7 (patch)
tree9c588f17b05d5bedf57b7207694d3e92136a4cbd /src/vlib/linux
parentb0b9dadc5c57b96ed43427ca78430e52fed9196e (diff)
vlib: support Hyper-v/Azure VMBus
This patch adds support for VMBus to the VPP infrastructure. Since the only device that matters is the netvsc Poll Mode Driver in DPDK, the infrastructure is much simpler than PCI. Change-Id: Ie96c897ad9c426716c2398e4528688ce2217419b Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Diffstat (limited to 'src/vlib/linux')
-rw-r--r--src/vlib/linux/vmbus.c405
1 files changed, 405 insertions, 0 deletions
diff --git a/src/vlib/linux/vmbus.c b/src/vlib/linux/vmbus.c
new file mode 100644
index 00000000000..2af62241d4d
--- /dev/null
+++ b/src/vlib/linux/vmbus.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2018, Microsoft Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * vmbus.c: Linux user space VMBus bus management.
+ */
+
+#include <vppinfra/linux/sysfs.h>
+
+#include <vlib/vlib.h>
+#include <vlib/vmbus/vmbus.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
+#include <uuid/uuid.h>
+
+static const char sysfs_vmbus_dev_path[] = "/sys/bus/vmbus/devices";
+static const char sysfs_vmbus_drv_path[] = "/sys/bus/vmbus/drivers";
+static const char sysfs_class_net_path[] = "/sys/class/net";
+static const char uio_drv_name[] = "uio_hv_generic";
+static const char netvsc_uuid[] = "f8615163-df3e-46c5-913f-f2d2f965ed0e";
+
+typedef struct
+{
+ int fd;
+ void *addr;
+ size_t size;
+} linux_vmbus_region_t;
+
+typedef struct
+{
+ int fd;
+ u32 clib_file_index;
+} linux_vmbus_irq_t;
+
+typedef struct
+{
+ vlib_vmbus_dev_handle_t handle;
+ vlib_vmbus_addr_t addr;
+
+ /* Device File descriptor */
+ int fd;
+
+ /* Minor device for uio device. */
+ u32 uio_minor;
+
+ /* private data */
+ uword private_data;
+
+} linux_vmbus_device_t;
+
+/* Pool of VMBUS devices. */
+typedef struct
+{
+ vlib_main_t *vlib_main;
+ linux_vmbus_device_t *linux_vmbus_devices;
+
+} linux_vmbus_main_t;
+
+linux_vmbus_main_t linux_vmbus_main;
+
+static linux_vmbus_device_t *
+linux_vmbus_get_device (vlib_vmbus_dev_handle_t h)
+{
+ linux_vmbus_main_t *lpm = &linux_vmbus_main;
+ return pool_elt_at_index (lpm->linux_vmbus_devices, h);
+}
+
+uword
+vlib_vmbus_get_private_data (vlib_vmbus_dev_handle_t h)
+{
+ linux_vmbus_device_t *d = linux_vmbus_get_device (h);
+ return d->private_data;
+}
+
+void
+vlib_vmbus_set_private_data (vlib_vmbus_dev_handle_t h, uword private_data)
+{
+ linux_vmbus_device_t *d = linux_vmbus_get_device (h);
+ d->private_data = private_data;
+}
+
+vlib_vmbus_addr_t *
+vlib_vmbus_get_addr (vlib_vmbus_dev_handle_t h)
+{
+ linux_vmbus_device_t *d = linux_vmbus_get_device (h);
+ return &d->addr;
+}
+
+/* Call to allocate/initialize the vmbus subsystem.
+ This is not an init function so that users can explicitly enable
+ vmbus only when it's needed. */
+clib_error_t *vmbus_bus_init (vlib_main_t * vm);
+
+linux_vmbus_main_t linux_vmbus_main;
+
+/*
+ * Take VMBus address represented in standard form like:
+ * "f2c086b2-ff2e-11e8-88de-7bad0a57de05" and convert
+ * it to u8[16]
+ */
+static uword
+unformat_vlib_vmbus_addr (unformat_input_t * input, va_list * args)
+{
+ vlib_vmbus_addr_t *addr = va_arg (*args, vlib_vmbus_addr_t *);
+ uword ret = 0;
+ u8 *s;
+
+ if (!unformat (input, "%s", &s))
+ return 0;
+
+ if (uuid_parse ((char *) s, addr->guid) == 0)
+ ret = 1;
+
+ vec_free (s);
+
+ return ret;
+}
+
+/* Convert bus address to standard UUID string */
+static u8 *
+format_vlib_vmbus_addr (u8 * s, va_list * va)
+{
+ vlib_vmbus_addr_t *addr = va_arg (*va, vlib_vmbus_addr_t *);
+ char tmp[40];
+
+ uuid_unparse (addr->guid, tmp);
+ return format (s, "%s", tmp);
+}
+
+/* workaround for mlx bug, bring lower device up before unbind */
+static clib_error_t *
+vlib_vmbus_raise_lower (int fd, const char *upper_name)
+{
+ clib_error_t *error = 0;
+ struct dirent *e;
+ struct ifreq ifr;
+ u8 *dev_net_dir;
+ DIR *dir;
+
+ memset (&ifr, 0, sizeof (ifr));
+
+ dev_net_dir = format (0, "%s/%s%c", sysfs_class_net_path, upper_name, 0);
+
+ dir = opendir ((char *) dev_net_dir);
+
+ if (!dir)
+ {
+ error = clib_error_return (0, "VMBUS failed to open %s", dev_net_dir);
+ goto done;
+ }
+
+ while ((e = readdir (dir)))
+ {
+ /* look for lower_enXXXX */
+ if (strncmp (e->d_name, "lower_", 6))
+ continue;
+
+ strncpy (ifr.ifr_name, e->d_name + 6, IFNAMSIZ);
+ break;
+ }
+ closedir (dir);
+
+ if (!e)
+ goto done; /* no lower device */
+
+ if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
+ error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
+ ifr.ifr_name);
+ else if (!(ifr.ifr_flags & IFF_UP))
+ {
+ ifr.ifr_flags |= IFF_UP;
+
+ if (ioctl (fd, SIOCSIFFLAGS, &ifr) < 0)
+ error = clib_error_return_unix (0, "ioctl set intf %s flags",
+ ifr.ifr_name);
+ }
+done:
+ vec_free (dev_net_dir);
+ return error;
+}
+
+clib_error_t *
+vlib_vmbus_bind_to_uio (vlib_vmbus_addr_t * addr)
+{
+ clib_error_t *error = 0;
+ u8 *dev_dir_name;
+ char *ifname = 0;
+ static int uio_new_id_needed = 1;
+ struct dirent *e;
+ struct ifreq ifr;
+ u8 *s, *driver_name;
+ DIR *dir;
+ int fd;
+
+ dev_dir_name = format (0, "%s/%U", sysfs_vmbus_dev_path,
+ format_vlib_vmbus_addr, addr);
+ s = format (0, "%v/driver%c", dev_dir_name, 0);
+
+ driver_name = clib_sysfs_link_to_name ((char *) s);
+ vec_reset_length (s);
+
+ /* skip if not using the Linux kernel netvsc driver */
+ if (!driver_name || strcmp ("hv_netvsc", (char *) driver_name) != 0)
+ goto done;
+
+ s = format (s, "%v/net%c", dev_dir_name, 0);
+ dir = opendir ((char *) s);
+ vec_reset_length (s);
+
+ if (!dir)
+ return clib_error_return (0, "VMBUS failed to open %s", s);
+
+ while ((e = readdir (dir)))
+ {
+ if (e->d_name[0] == '.') /* skip . and .. */
+ continue;
+
+ ifname = strdup (e->d_name);
+ break;
+ }
+ closedir (dir);
+
+ if (!ifname)
+ {
+ error = clib_error_return (0,
+ "VMBUS device %U eth not found",
+ format_vlib_vmbus_addr, addr);
+ goto done;
+ }
+
+
+ memset (&ifr, 0, sizeof (ifr));
+ strncpy (ifr.ifr_name, ifname, IFNAMSIZ);
+
+ /* read up/down flags */
+ fd = socket (PF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "socket");
+ goto done;
+ }
+
+ if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
+ ifr.ifr_name);
+ close (fd);
+ goto done;
+ }
+
+ if (ifr.ifr_flags & IFF_UP)
+ {
+ error = clib_error_return (0,
+ "Skipping VMBUS device %U as host interface %s is up",
+ format_vlib_vmbus_addr, addr, e->d_name);
+ close (fd);
+ goto done;
+ }
+
+ error = vlib_vmbus_raise_lower (fd, ifname);
+ close (fd);
+
+ if (error)
+ goto done;
+
+
+ /* tell uio_hv_generic about netvsc device type */
+ if (uio_new_id_needed)
+ {
+ uio_new_id_needed = 0;
+
+ vec_reset_length (s);
+ s = format (s, "%s/%s/new_id%c", sysfs_vmbus_drv_path, uio_drv_name, 0);
+ error = clib_sysfs_write ((char *) s, "%s", netvsc_uuid);
+
+ if (error)
+ goto done;
+
+ }
+
+ /* prefer the simplier driver_override model */
+ vec_reset_length (s);
+ s = format (s, "%/driver_override%c", dev_dir_name, 0);
+ if (access ((char *) s, F_OK) == 0)
+ {
+ clib_sysfs_write ((char *) s, "%s", uio_drv_name);
+ }
+ else
+ {
+ vec_reset_length (s);
+
+ s = format (s, "%v/driver/unbind%c", dev_dir_name, 0);
+ error =
+ clib_sysfs_write ((char *) s, "%U", format_vlib_vmbus_addr, addr);
+
+ if (error)
+ goto done;
+
+ vec_reset_length (s);
+
+ s = format (s, "%s/%s/bind%c", sysfs_vmbus_drv_path, uio_drv_name, 0);
+ error =
+ clib_sysfs_write ((char *) s, "%U", format_vlib_vmbus_addr, addr);
+ }
+ vec_reset_length (s);
+
+done:
+ free (ifname);
+ vec_free (s);
+ vec_free (dev_dir_name);
+ vec_free (driver_name);
+ return error;
+}
+
+static clib_error_t *
+scan_vmbus_addr (void *arg, u8 * dev_dir_name, u8 * ignored)
+{
+ vlib_vmbus_addr_t addr, **addrv = arg;
+ unformat_input_t input;
+ clib_error_t *err = 0;
+
+ unformat_init_string (&input, (char *) dev_dir_name,
+ vec_len (dev_dir_name));
+
+ if (!unformat (&input, "/sys/bus/vmbus/devices/%U",
+ unformat_vlib_vmbus_addr, &addr))
+ err = clib_error_return (0, "unformat error `%v`", dev_dir_name);
+
+ unformat_free (&input);
+
+ if (err)
+ return err;
+
+ vec_add1 (*addrv, addr);
+ return 0;
+}
+
+static int
+vmbus_addr_cmp (void *v1, void *v2)
+{
+ vlib_vmbus_addr_t *a1 = v1;
+ vlib_vmbus_addr_t *a2 = v2;
+
+ return uuid_compare (a1->guid, a2->guid);
+}
+
+vlib_vmbus_addr_t *
+vlib_vmbus_get_all_dev_addrs ()
+{
+ vlib_vmbus_addr_t *addrs = 0;
+ clib_error_t *err;
+
+ err =
+ foreach_directory_file ((char *) sysfs_vmbus_dev_path, scan_vmbus_addr,
+ &addrs, /* scan_dirs */ 0);
+ if (err)
+ {
+ vec_free (addrs);
+ return 0;
+ }
+
+ vec_sort_with_function (addrs, vmbus_addr_cmp);
+
+ return addrs;
+}
+
+clib_error_t *
+linux_vmbus_init (vlib_main_t * vm)
+{
+ linux_vmbus_main_t *pm = &linux_vmbus_main;
+
+ pm->vlib_main = vm;
+
+ return vlib_call_init_function (vm, unix_input_init);
+}
+
+VLIB_INIT_FUNCTION (linux_vmbus_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */