/*
 * Copyright (c) 2018, Microsoft Corporation.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * vmbus.c: Linux user space VMBus bus management.
 */

#include <vppinfra/linux/sysfs.h>

#include <vlib/vlib.h>
#include <vlib/vmbus/vmbus.h>
#include <vlib/unix/unix.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <linux/ethtool.h>
#include <linux/sockios.h>

#include <uuid/uuid.h>

static const char sysfs_vmbus_dev_path[] = "/sys/bus/vmbus/devices";
static const char sysfs_vmbus_drv_path[] = "/sys/bus/vmbus/drivers";
static const char sysfs_class_net_path[] = "/sys/class/net";
static const char uio_drv_name[] = "uio_hv_generic";
static const char netvsc_uuid[] = "f8615163-df3e-46c5-913f-f2d2f965ed0e";

typedef struct
{
  int fd;
  void *addr;
  size_t size;
} linux_vmbus_region_t;

typedef struct
{
  int fd;
  u32 clib_file_index;
} linux_vmbus_irq_t;

typedef struct
{
  vlib_vmbus_dev_handle_t handle;
  vlib_vmbus_addr_t addr;

  /* Device File descriptor */
  int fd;

  /* Minor device for uio device. */
  u32 uio_minor;

  /* private data */
  uword private_data;

} linux_vmbus_device_t;

/* Pool of VMBUS devices. */
typedef struct
{
  vlib_main_t *vlib_main;
  linux_vmbus_device_t *linux_vmbus_devices;

} linux_vmbus_main_t;

linux_vmbus_main_t linux_vmbus_main;

static linux_vmbus_device_t *
linux_vmbus_get_device (vlib_vmbus_dev_handle_t h)
{
  linux_vmbus_main_t *lpm = &linux_vmbus_main;
  return pool_elt_at_index (lpm->linux_vmbus_devices, h);
}

uword
vlib_vmbus_get_private_data (vlib_vmbus_dev_handle_t h)
{
  linux_vmbus_device_t *d = linux_vmbus_get_device (h);
  return d->private_data;
}

void
vlib_vmbus_set_private_data (vlib_vmbus_dev_handle_t h, uword private_data)
{
  linux_vmbus_device_t *d = linux_vmbus_get_device (h);
  d->private_data = private_data;
}

vlib_vmbus_addr_t *
vlib_vmbus_get_addr (vlib_vmbus_dev_handle_t h)
{
  linux_vmbus_device_t *d = linux_vmbus_get_device (h);
  return &d->addr;
}

/* Call to allocate/initialize the vmbus subsystem.
   This is not an init function so that users can explicitly enable
   vmbus only when it's needed. */
clib_error_t *vmbus_bus_init (vlib_main_t * vm);

linux_vmbus_main_t linux_vmbus_main;

/*
 * Take VMBus address represented in standard form like:
 * "f2c086b2-ff2e-11e8-88de-7bad0a57de05" and convert
 * it to u8[16]
 */
uword
unformat_vlib_vmbus_addr (unformat_input_t *input, va_list *args)
{
  vlib_vmbus_addr_t *addr = va_arg (*args, vlib_vmbus_addr_t *);
  uword ret = 0;
  u8 *s;

  if (!unformat (input, "%s", &s))
    return 0;

  if (uuid_parse ((char *) s, addr->guid) == 0)
    ret = 1;

  vec_free (s);

  return ret;
}

/* Convert bus address to standard UUID string */
u8 *
format_vlib_vmbus_addr (u8 *s, va_list *va)
{
  vlib_vmbus_addr_t *addr = va_arg (*va, vlib_vmbus_addr_t *);
  char tmp[40];

  uuid_unparse (addr->guid, tmp);
  return format (s, "%s", tmp);
}

/* workaround for mlx bug, bring lower device up before unbind */
static clib_error_t *
vlib_vmbus_raise_lower (int fd, const char *upper_name)
{
  clib_error_t *error = 0;
  struct dirent *e;
  struct ifreq ifr;
  u8 *dev_net_dir;
  DIR *dir;

  clib_memset (&ifr, 0, sizeof (ifr));

  dev_net_dir = format (0, "%s/%s%c", sysfs_class_net_path, upper_name, 0);

  dir = opendir ((char *) dev_net_dir);

  if (!dir)
    {
      error = clib_error_return (0, "VMBUS failed to open %s", dev_net_dir);
      goto done;
    }

  while ((e = readdir (dir)))
    {
      /* look for lower_enXXXX */
      if (strncmp (e->d_name, "lower_", 6))
	continue;

      strncpy (ifr.ifr_name, e->d_name + 6, IFNAMSIZ - 1);
      break;
    }
  closedir (dir);

  if (!e)
    goto done;			/* no lower device */

  if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
    error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
				    ifr.ifr_name);
  else if (!(ifr.ifr_flags & IFF_UP))
    {
      ifr.ifr_flags |= IFF_UP;

      if (ioctl (fd, SIOCSIFFLAGS, &ifr) < 0)
	error = clib_error_return_unix (0, "ioctl set intf %s flags",
					ifr.ifr_name);
    }
done:
  vec_free (dev_net_dir);
  return error;
}

static int
directory_exists (char *path)
{
  struct stat s = { 0 };
  if (stat (path, &s) == -1)
    return 0;

  return S_ISDIR (s.st_mode);
}

clib_error_t *
vlib_vmbus_bind_to_uio (vlib_vmbus_addr_t * addr)
{
  clib_error_t *error = 0;
  u8 *dev_dir_name;
  char *ifname = 0;
  static int uio_new_id_needed = 1;
  struct dirent *e;
  struct ifreq ifr;
  u8 *s, *driver_name;
  DIR *dir;
  int fd;

  dev_dir_name = format (0, "%s/%U", sysfs_vmbus_dev_path,
			 format_vlib_vmbus_addr, addr);
  s = format (0, "%v/driver%c", dev_dir_name, 0);

  driver_name = clib_sysfs_link_to_name ((char *) s);
  vec_reset_length (s);

  /* skip if not using the Linux kernel netvsc driver */
  if (!driver_name || strcmp ("hv_netvsc", (char *) driver_name) != 0)
    goto done;

  /* if uio_hv_generic is not loaded, then can't use native DPDK driver. */
  if (!directory_exists ("/sys/module/uio_hv_generic"))
    goto done;

  s = format (s, "%v/net%c", dev_dir_name, 0);
  dir = opendir ((char *) s);
  vec_reset_length (s);

  if (!dir)
    return clib_error_return (0, "VMBUS failed to open %s", s);

  while ((e = readdir (dir)))
    {
      if (e->d_name[0] == '.')	/* skip . and .. */
	continue;

      ifname = strdup (e->d_name);
      break;
    }
  closedir (dir);

  if (!ifname)
    {
      error = clib_error_return (0,
				 "VMBUS device %U eth not found",
				 format_vlib_vmbus_addr, addr);
      goto done;
    }


  clib_memset (&ifr, 0, sizeof (ifr));
  strncpy (ifr.ifr_name, ifname, IFNAMSIZ - 1);

  /* read up/down flags */
  fd = socket (PF_INET, SOCK_DGRAM, 0);
  if (fd < 0)
    {
      error = clib_error_return_unix (0, "socket");
      goto done;
    }

  if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
    {
      error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
				      ifr.ifr_name);
      close (fd);
      goto done;
    }

  if (ifr.ifr_flags & IFF_UP)
    {
      error = clib_error_return (0,
				 "Skipping VMBUS device %U as host interface %s is up",
				 format_vlib_vmbus_addr, addr, e->d_name);
      close (fd);
      goto done;
    }

  /* tell uio_hv_generic about netvsc device type */
  if (uio_new_id_needed)
    {
      vec_reset_length (s);
      s = format (s, "%s/%s/new_id%c", sysfs_vmbus_drv_path, uio_drv_name, 0);
      error = clib_sysfs_write ((char *) s, "%s", netvsc_uuid);
      /* If device already exists, we can bind/unbind/override driver */
      if (error)
	{
	  if (error->code == EEXIST)
	    {
	      clib_error_free (error);
	    }
	  else
	    {
	      close (fd);
	      goto done;
	    }
	}

      uio_new_id_needed = 0;
    }

  error = vlib_vmbus_raise_lower (fd, ifname);
  close (fd);

  if (error)
    goto done;

  /* prefer the simplier driver_override model */
  vec_reset_length (s);
  s = format (s, "%/driver_override%c", dev_dir_name, 0);
  if (access ((char *) s, F_OK) == 0)
    {
      clib_sysfs_write ((char *) s, "%s", uio_drv_name);
    }
  else
    {
      vec_reset_length (s);

      s = format (s, "%v/driver/unbind%c", dev_dir_name, 0);
      error =
	clib_sysfs_write ((char *) s, "%U", format_vlib_vmbus_addr, addr);

      if (error)
	goto done;

      vec_reset_length (s);

      s = format (s, "%s/%s/bind%c", sysfs_vmbus_drv_path, uio_drv_name, 0);
      error =
	clib_sysfs_write ((char *) s, "%U", format_vlib_vmbus_addr, addr);
    }
  vec_reset_length (s);

done:
  free (ifname);
  vec_free (s);
  vec_free (dev_dir_name);
  vec_free (driver_name);
  return error;
}

static clib_error_t *
scan_vmbus_addr (void *arg, u8 * dev_dir_name, u8 * ignored)
{
  vlib_vmbus_addr_t addr, **addrv = arg;
  unformat_input_t input;
  clib_error_t *err = 0;

  unformat_init_string (&input, (char *) dev_dir_name,
			vec_len (dev_dir_name));

  if (!unformat (&input, "/sys/bus/vmbus/devices/%U",
		 unformat_vlib_vmbus_addr, &addr))
    err = clib_error_return (0, "unformat error `%v`", dev_dir_name);

  unformat_free (&input);

  if (err)
    return err;

  vec_add1 (*addrv, addr);
  return 0;
}

static int
vmbus_addr_cmp (void *v1, void *v2)
{
  vlib_vmbus_addr_t *a1 = v1;
  vlib_vmbus_addr_t *a2 = v2;

  return uuid_compare (a1->guid, a2->guid);
}

vlib_vmbus_addr_t *
vlib_vmbus_get_all_dev_addrs ()
{
  vlib_vmbus_addr_t *addrs = 0;
  clib_error_t *err;

  err =
    foreach_directory_file ((char *) sysfs_vmbus_dev_path, scan_vmbus_addr,
			    &addrs, /* scan_dirs */ 0);
  if (err)
    {
      vec_free (addrs);
      return 0;
    }

  vec_sort_with_function (addrs, vmbus_addr_cmp);

  return addrs;
}

clib_error_t *
linux_vmbus_init (vlib_main_t * vm)
{
  linux_vmbus_main_t *pm = &linux_vmbus_main;

  pm->vlib_main = vm;

  return 0;
}

/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (linux_vmbus_init) =
{
  .runs_before = VLIB_INITS("unix_input_init"),
};
/* *INDENT-ON* */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */