aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/bus
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2018-08-14 18:55:37 +0100
committerLuca Boccassi <luca.boccassi@gmail.com>2018-08-14 18:57:39 +0100
commitae1479de3027a4a0f68d90bf65ac6ff2b862b837 (patch)
tree5dbca675b6717b6b61abbe1a9583d5166fed942b /drivers/bus
parent8cee230dd1f0f9f31f4b0339c671d0b1ee14e111 (diff)
parentb63264c8342e6a1b6971c79550d2af2024b6a4de (diff)
Merge branch 'upstream' into 18.08.x
Change-Id: Ifbda2d554199dd4d11e01f0090881b5f0103ae12 Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'drivers/bus')
-rw-r--r--drivers/bus/Makefile1
-rw-r--r--drivers/bus/dpaa/base/fman/fman_hw.c62
-rw-r--r--drivers/bus/dpaa/base/fman/netcfg_layer.c5
-rw-r--r--drivers/bus/dpaa/base/fman/of.c44
-rw-r--r--drivers/bus/dpaa/base/qbman/bman_driver.c4
-rw-r--r--drivers/bus/dpaa/base/qbman/qman.c21
-rw-r--r--drivers/bus/dpaa/base/qbman/qman_driver.c4
-rw-r--r--drivers/bus/dpaa/base/qbman/qman_priv.h1
-rw-r--r--drivers/bus/dpaa/dpaa_bus.c20
-rw-r--r--drivers/bus/dpaa/include/compat.h6
-rw-r--r--drivers/bus/dpaa/include/fsl_fman.h6
-rw-r--r--drivers/bus/dpaa/include/fsl_qman.h3
-rw-r--r--drivers/bus/dpaa/include/of.h2
-rw-r--r--drivers/bus/dpaa/rte_bus_dpaa_version.map10
-rw-r--r--drivers/bus/dpaa/rte_dpaa_bus.h3
-rw-r--r--drivers/bus/fslmc/fslmc_bus.c4
-rw-r--r--drivers/bus/fslmc/fslmc_logs.h2
-rw-r--r--drivers/bus/fslmc/qbman/qbman_portal.c3
-rw-r--r--drivers/bus/fslmc/qbman/qbman_portal.h1
-rw-r--r--drivers/bus/fslmc/rte_fslmc.h6
-rw-r--r--drivers/bus/ifpga/rte_bus_ifpga.h3
-rw-r--r--drivers/bus/meson.build2
-rw-r--r--drivers/bus/pci/linux/pci.c1
-rw-r--r--drivers/bus/pci/linux/pci_uio.c47
-rw-r--r--drivers/bus/pci/linux/pci_vfio.c118
-rw-r--r--drivers/bus/pci/pci_common.c97
-rw-r--r--drivers/bus/pci/private.h50
-rw-r--r--drivers/bus/pci/rte_bus_pci.h5
-rw-r--r--drivers/bus/vdev/rte_bus_vdev.h3
-rw-r--r--drivers/bus/vmbus/Makefile36
-rw-r--r--drivers/bus/vmbus/linux/Makefile3
-rw-r--r--drivers/bus/vmbus/linux/vmbus_bus.c355
-rw-r--r--drivers/bus/vmbus/linux/vmbus_uio.c398
-rw-r--r--drivers/bus/vmbus/meson.build18
-rw-r--r--drivers/bus/vmbus/private.h132
-rw-r--r--drivers/bus/vmbus/rte_bus_vmbus.h407
-rw-r--r--drivers/bus/vmbus/rte_bus_vmbus_version.map29
-rw-r--r--drivers/bus/vmbus/rte_vmbus_reg.h344
-rw-r--r--drivers/bus/vmbus/vmbus_bufring.c244
-rw-r--r--drivers/bus/vmbus/vmbus_channel.c405
-rw-r--r--drivers/bus/vmbus/vmbus_common.c286
-rw-r--r--drivers/bus/vmbus/vmbus_common_uio.c232
42 files changed, 3188 insertions, 235 deletions
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index ef7f2475..cea3b55e 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -10,5 +10,6 @@ endif
DIRS-$(CONFIG_RTE_LIBRTE_IFPGA_BUS) += ifpga
DIRS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci
DIRS-$(CONFIG_RTE_LIBRTE_VDEV_BUS) += vdev
+DIRS-$(CONFIG_RTE_LIBRTE_VMBUS) += vmbus
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/drivers/bus/dpaa/base/fman/fman_hw.c b/drivers/bus/dpaa/base/fman/fman_hw.c
index 0148b98e..4ebbc3d3 100644
--- a/drivers/bus/dpaa/base/fman/fman_hw.c
+++ b/drivers/bus/dpaa/base/fman/fman_hw.c
@@ -16,6 +16,9 @@
#include <fsl_fman_crc64.h>
#include <fsl_bman.h>
+#define FMAN_SP_SG_DISABLE 0x80000000
+#define FMAN_SP_EXT_BUF_MARG_START_SHIFT 16
+
/* Instantiate the global variable that the inline CRC64 implementation (in
* <fsl_fman.h>) depends on.
*/
@@ -422,20 +425,16 @@ fman_if_set_fc_quanta(struct fman_if *fm_if, u16 pause_quanta)
int
fman_if_get_fdoff(struct fman_if *fm_if)
{
- u32 fmbm_ricp;
+ u32 fmbm_rebm;
int fdoff;
- int iceof_mask = 0x001f0000;
- int icsz_mask = 0x0000001f;
struct __fman_if *__if = container_of(fm_if, struct __fman_if, __if);
assert(fman_ccsr_map_fd != -1);
- fmbm_ricp =
- in_be32(&((struct rx_bmi_regs *)__if->bmi_map)->fmbm_ricp);
- /*iceof + icsz*/
- fdoff = ((fmbm_ricp & iceof_mask) >> 16) * 16 +
- (fmbm_ricp & icsz_mask) * 16;
+ fmbm_rebm = in_be32(&((struct rx_bmi_regs *)__if->bmi_map)->fmbm_rebm);
+
+ fdoff = (fmbm_rebm >> FMAN_SP_EXT_BUF_MARG_START_SHIFT) & 0x1ff;
return fdoff;
}
@@ -502,12 +501,16 @@ fman_if_set_fdoff(struct fman_if *fm_if, uint32_t fd_offset)
{
struct __fman_if *__if = container_of(fm_if, struct __fman_if, __if);
unsigned int *fmbm_rebm;
+ int val = 0;
+ int fmbm_mask = 0x01ff0000;
+
+ val = fd_offset << FMAN_SP_EXT_BUF_MARG_START_SHIFT;
assert(fman_ccsr_map_fd != -1);
fmbm_rebm = &((struct rx_bmi_regs *)__if->bmi_map)->fmbm_rebm;
- out_be32(fmbm_rebm, in_be32(fmbm_rebm) | (fd_offset << 16));
+ out_be32(fmbm_rebm, (in_be32(fmbm_rebm) & ~fmbm_mask) | val);
}
void
@@ -536,6 +539,47 @@ fman_if_get_maxfrm(struct fman_if *fm_if)
return (in_be32(reg_maxfrm) | 0x0000FFFF);
}
+/* MSB in fmbm_rebm register
+ * 0 - If BMI cannot store the frame in a single buffer it may select a buffer
+ * of smaller size and store the frame in scatter gather (S/G) buffers
+ * 1 - Scatter gather format is not enabled for frame storage. If BMI cannot
+ * store the frame in a single buffer, the frame is discarded.
+ */
+
+int
+fman_if_get_sg_enable(struct fman_if *fm_if)
+{
+ u32 fmbm_rebm;
+
+ struct __fman_if *__if = container_of(fm_if, struct __fman_if, __if);
+
+ assert(fman_ccsr_map_fd != -1);
+
+ fmbm_rebm = in_be32(&((struct rx_bmi_regs *)__if->bmi_map)->fmbm_rebm);
+
+ return (fmbm_rebm & FMAN_SP_SG_DISABLE) ? 0 : 1;
+}
+
+void
+fman_if_set_sg(struct fman_if *fm_if, int enable)
+{
+ struct __fman_if *__if = container_of(fm_if, struct __fman_if, __if);
+ unsigned int *fmbm_rebm;
+ int val;
+ int fmbm_mask = FMAN_SP_SG_DISABLE;
+
+ if (enable)
+ val = 0;
+ else
+ val = FMAN_SP_SG_DISABLE;
+
+ assert(fman_ccsr_map_fd != -1);
+
+ fmbm_rebm = &((struct rx_bmi_regs *)__if->bmi_map)->fmbm_rebm;
+
+ out_be32(fmbm_rebm, (in_be32(fmbm_rebm) & ~fmbm_mask) | val);
+}
+
void
fman_if_set_dnia(struct fman_if *fm_if, uint32_t nia)
{
diff --git a/drivers/bus/dpaa/base/fman/netcfg_layer.c b/drivers/bus/dpaa/base/fman/netcfg_layer.c
index 3e956ce1..031c6f1a 100644
--- a/drivers/bus/dpaa/base/fman/netcfg_layer.c
+++ b/drivers/bus/dpaa/base/fman/netcfg_layer.c
@@ -18,11 +18,6 @@
#include <rte_dpaa_logs.h>
#include <netcfg.h>
-/* Structure contains information about all the interfaces given by user
- * on command line.
- */
-struct netcfg_interface *netcfg_interface;
-
/* This data structure contaings all configurations information
* related to usages of DPA devices.
*/
diff --git a/drivers/bus/dpaa/base/fman/of.c b/drivers/bus/dpaa/base/fman/of.c
index 1b2dbe26..a7f3174e 100644
--- a/drivers/bus/dpaa/base/fman/of.c
+++ b/drivers/bus/dpaa/base/fman/of.c
@@ -182,6 +182,11 @@ linear_dir(struct dt_dir *d)
DPAA_BUS_LOG(DEBUG, "Duplicate lphandle in %s",
d->node.node.full_name);
d->lphandle = f;
+ } else if (!strcmp(f->node.node.name, "phandle")) {
+ if (d->lphandle)
+ DPAA_BUS_LOG(DEBUG, "Duplicate lphandle in %s",
+ d->node.node.full_name);
+ d->lphandle = f;
} else if (!strcmp(f->node.node.name, "#address-cells")) {
if (d->a_cells)
DPAA_BUS_LOG(DEBUG, "Duplicate a_cells in %s",
@@ -541,3 +546,42 @@ of_device_is_compatible(const struct device_node *dev_node,
return true;
return false;
}
+
+static const void *of_get_mac_addr(const struct device_node *np,
+ const char *name)
+{
+ return of_get_property(np, name, NULL);
+}
+
+/**
+ * Search the device tree for the best MAC address to use. 'mac-address' is
+ * checked first, because that is supposed to contain to "most recent" MAC
+ * address. If that isn't set, then 'local-mac-address' is checked next,
+ * because that is the default address. If that isn't set, then the obsolete
+ * 'address' is checked, just in case we're using an old device tree.
+ *
+ * Note that the 'address' property is supposed to contain a virtual address of
+ * the register set, but some DTS files have redefined that property to be the
+ * MAC address.
+ *
+ * All-zero MAC addresses are rejected, because those could be properties that
+ * exist in the device tree, but were not set by U-Boot. For example, the
+ * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
+ * addresses. Some older U-Boots only initialized 'local-mac-address'. In
+ * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
+ * but is all zeros.
+ */
+const void *of_get_mac_address(const struct device_node *np)
+{
+ const void *addr;
+
+ addr = of_get_mac_addr(np, "mac-address");
+ if (addr)
+ return addr;
+
+ addr = of_get_mac_addr(np, "local-mac-address");
+ if (addr)
+ return addr;
+
+ return of_get_mac_addr(np, "address");
+}
diff --git a/drivers/bus/dpaa/base/qbman/bman_driver.c b/drivers/bus/dpaa/base/qbman/bman_driver.c
index 1381da36..b14b5905 100644
--- a/drivers/bus/dpaa/base/qbman/bman_driver.c
+++ b/drivers/bus/dpaa/base/qbman/bman_driver.c
@@ -15,9 +15,9 @@
/*
* Global variables of the max portal/pool number this bman version supported
*/
-u16 bman_ip_rev;
+static u16 bman_ip_rev;
u16 bman_pool_max;
-void *bman_ccsr_map;
+static void *bman_ccsr_map;
/*****************/
/* Portal driver */
diff --git a/drivers/bus/dpaa/base/qbman/qman.c b/drivers/bus/dpaa/base/qbman/qman.c
index 27d98cc1..7c17027f 100644
--- a/drivers/bus/dpaa/base/qbman/qman.c
+++ b/drivers/bus/dpaa/base/qbman/qman.c
@@ -625,7 +625,7 @@ fail_eqcr:
#define MAX_GLOBAL_PORTALS 8
static struct qman_portal global_portals[MAX_GLOBAL_PORTALS];
-rte_atomic16_t global_portals_used[MAX_GLOBAL_PORTALS];
+static rte_atomic16_t global_portals_used[MAX_GLOBAL_PORTALS];
static struct qman_portal *
qman_alloc_global_portal(void)
@@ -1058,7 +1058,7 @@ unsigned int qman_portal_poll_rx(unsigned int poll_limit,
struct qm_portal *portal = &p->p;
register struct qm_dqrr *dqrr = &portal->dqrr;
struct qm_dqrr_entry *dq[QM_DQRR_SIZE], *shadow[QM_DQRR_SIZE];
- struct qman_fq *fq[QM_DQRR_SIZE];
+ struct qman_fq *fq;
unsigned int limit = 0, rx_number = 0;
uint32_t consume = 0;
@@ -1092,14 +1092,13 @@ unsigned int qman_portal_poll_rx(unsigned int poll_limit,
/* SDQCR: context_b points to the FQ */
#ifdef CONFIG_FSL_QMAN_FQ_LOOKUP
- fq[rx_number] = qman_fq_lookup_table[be32_to_cpu(
- dq[rx_number]->contextB)];
+ fq = qman_fq_lookup_table[be32_to_cpu(dq[rx_number]->contextB)];
#else
- fq[rx_number] = (void *)be32_to_cpu(
- dq[rx_number]->contextB);
+ fq = (void *)be32_to_cpu(dq[rx_number]->contextB);
#endif
- fq[rx_number]->cb.dqrr_prepare(shadow[rx_number],
- &bufs[rx_number]);
+ if (fq->cb.dqrr_prepare)
+ fq->cb.dqrr_prepare(shadow[rx_number],
+ &bufs[rx_number]);
consume |= (1 << (31 - DQRR_PTR2IDX(shadow[rx_number])));
rx_number++;
@@ -1107,7 +1106,7 @@ unsigned int qman_portal_poll_rx(unsigned int poll_limit,
} while (++limit < poll_limit);
if (rx_number)
- fq[0]->cb.dqrr_dpdk_pull_cb(fq, shadow, bufs, rx_number);
+ fq->cb.dqrr_dpdk_pull_cb(&fq, shadow, bufs, rx_number);
/* Consume all the DQRR enries together */
qm_out(DQRR_DCAP, (1 << 8) | consume);
@@ -2003,13 +2002,13 @@ int qman_query_congestion(struct qm_mcr_querycongestion *congestion)
return 0;
}
-int qman_set_vdq(struct qman_fq *fq, u16 num)
+int qman_set_vdq(struct qman_fq *fq, u16 num, uint32_t vdqcr_flags)
{
struct qman_portal *p = get_affine_portal();
uint32_t vdqcr;
int ret = -EBUSY;
- vdqcr = QM_VDQCR_EXACT;
+ vdqcr = vdqcr_flags;
vdqcr |= QM_VDQCR_NUMFRAMES_SET(num);
if ((fq->state != qman_fq_state_parked) &&
diff --git a/drivers/bus/dpaa/base/qbman/qman_driver.c b/drivers/bus/dpaa/base/qbman/qman_driver.c
index 07b29d55..f6ecd6b2 100644
--- a/drivers/bus/dpaa/base/qbman/qman_driver.c
+++ b/drivers/bus/dpaa/base/qbman/qman_driver.c
@@ -20,9 +20,9 @@ u16 qm_channel_caam = QMAN_CHANNEL_CAAM;
u16 qm_channel_pme = QMAN_CHANNEL_PME;
/* Ccsr map address to access ccsrbased register */
-void *qman_ccsr_map;
+static void *qman_ccsr_map;
/* The qman clock frequency */
-u32 qman_clk;
+static u32 qman_clk;
static __thread int qmfd = -1;
static __thread struct qm_portal_config qpcfg;
diff --git a/drivers/bus/dpaa/base/qbman/qman_priv.h b/drivers/bus/dpaa/base/qbman/qman_priv.h
index 9e4471e6..02f6301f 100644
--- a/drivers/bus/dpaa/base/qbman/qman_priv.h
+++ b/drivers/bus/dpaa/base/qbman/qman_priv.h
@@ -139,7 +139,6 @@ struct qm_portal_config {
#define QMAN_REV31 0x0301
#define QMAN_REV32 0x0302
extern u16 qman_ip_rev; /* 0 if uninitialised, otherwise QMAN_REVx */
-extern u32 qman_clk;
int qm_set_wpm(int wpm);
int qm_get_wpm(int *wpm);
diff --git a/drivers/bus/dpaa/dpaa_bus.c b/drivers/bus/dpaa/dpaa_bus.c
index 20462065..16fabd1b 100644
--- a/drivers/bus/dpaa/dpaa_bus.c
+++ b/drivers/bus/dpaa/dpaa_bus.c
@@ -50,7 +50,7 @@ struct rte_dpaa_bus rte_dpaa_bus;
struct netcfg_info *dpaa_netcfg;
/* define a variable to hold the portal_key, once created.*/
-pthread_key_t dpaa_portal_key;
+static pthread_key_t dpaa_portal_key;
unsigned int dpaa_svr_family;
@@ -539,6 +539,13 @@ rte_dpaa_bus_probe(void)
unsigned int svr_ver;
int probe_all = rte_dpaa_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST;
+ svr_file = fopen(DPAA_SOC_ID_FILE, "r");
+ if (svr_file) {
+ if (fscanf(svr_file, "svr:%x", &svr_ver) > 0)
+ dpaa_svr_family = svr_ver & SVR_MASK;
+ fclose(svr_file);
+ }
+
/* For each registered driver, and device, call the driver->probe */
TAILQ_FOREACH(dev, &rte_dpaa_bus.device_list, next) {
TAILQ_FOREACH(drv, &rte_dpaa_bus.driver_list, next) {
@@ -569,13 +576,6 @@ rte_dpaa_bus_probe(void)
if (!TAILQ_EMPTY(&rte_dpaa_bus.device_list))
rte_mbuf_set_platform_mempool_ops(DPAA_MEMPOOL_OPS_NAME);
- svr_file = fopen(DPAA_SOC_ID_FILE, "r");
- if (svr_file) {
- if (fscanf(svr_file, "svr:%x", &svr_ver) > 0)
- dpaa_svr_family = svr_ver & SVR_MASK;
- fclose(svr_file);
- }
-
return 0;
}
@@ -626,9 +626,7 @@ struct rte_dpaa_bus rte_dpaa_bus = {
RTE_REGISTER_BUS(FSL_DPAA_BUS_NAME, rte_dpaa_bus.bus);
-RTE_INIT(dpaa_init_log);
-static void
-dpaa_init_log(void)
+RTE_INIT(dpaa_init_log)
{
dpaa_logtype_bus = rte_log_register("bus.dpaa");
if (dpaa_logtype_bus >= 0)
diff --git a/drivers/bus/dpaa/include/compat.h b/drivers/bus/dpaa/include/compat.h
index e4b57021..92241d23 100644
--- a/drivers/bus/dpaa/include/compat.h
+++ b/drivers/bus/dpaa/include/compat.h
@@ -48,9 +48,15 @@
*/
/* Required compiler attributes */
+#ifndef __maybe_unused
#define __maybe_unused __rte_unused
+#endif
+#ifndef __always_unused
#define __always_unused __rte_unused
+#endif
+#ifndef __packed
#define __packed __rte_packed
+#endif
#define noinline __attribute__((noinline))
#define L1_CACHE_BYTES 64
diff --git a/drivers/bus/dpaa/include/fsl_fman.h b/drivers/bus/dpaa/include/fsl_fman.h
index c0ef1bff..1d1ce867 100644
--- a/drivers/bus/dpaa/include/fsl_fman.h
+++ b/drivers/bus/dpaa/include/fsl_fman.h
@@ -108,6 +108,12 @@ int fman_if_get_fdoff(struct fman_if *fm_if);
/* Set interface fd->offset value */
void fman_if_set_fdoff(struct fman_if *fm_if, uint32_t fd_offset);
+/* Get interface SG enable status value */
+int fman_if_get_sg_enable(struct fman_if *fm_if);
+
+/* Set interface SG support mode */
+void fman_if_set_sg(struct fman_if *fm_if, int enable);
+
/* Get interface Max Frame length (MTU) */
uint16_t fman_if_get_maxfrm(struct fman_if *fm_if);
diff --git a/drivers/bus/dpaa/include/fsl_qman.h b/drivers/bus/dpaa/include/fsl_qman.h
index e4ad7ae4..b18cf037 100644
--- a/drivers/bus/dpaa/include/fsl_qman.h
+++ b/drivers/bus/dpaa/include/fsl_qman.h
@@ -1332,10 +1332,11 @@ unsigned int qman_portal_poll_rx(unsigned int poll_limit,
* qman_set_vdq - Issue a volatile dequeue command
* @fq: Frame Queue on which the volatile dequeue command is issued
* @num: Number of Frames requested for volatile dequeue
+ * @vdqcr_flags: QM_VDQCR_EXACT flag to for VDQCR command
*
* This function will issue a volatile dequeue command to the QMAN.
*/
-int qman_set_vdq(struct qman_fq *fq, u16 num);
+int qman_set_vdq(struct qman_fq *fq, u16 num, uint32_t vdqcr_flags);
/**
* qman_dequeue - Get the DQRR entry after volatile dequeue command
diff --git a/drivers/bus/dpaa/include/of.h b/drivers/bus/dpaa/include/of.h
index 151be5a3..7ea7608f 100644
--- a/drivers/bus/dpaa/include/of.h
+++ b/drivers/bus/dpaa/include/of.h
@@ -109,6 +109,8 @@ const struct device_node *of_get_parent(const struct device_node *dev_node);
const struct device_node *of_get_next_child(const struct device_node *dev_node,
const struct device_node *prev);
+const void *of_get_mac_address(const struct device_node *np);
+
#define for_each_child_node(parent, child) \
for (child = of_get_next_child(parent, NULL); child != NULL; \
child = of_get_next_child(parent, child))
diff --git a/drivers/bus/dpaa/rte_bus_dpaa_version.map b/drivers/bus/dpaa/rte_bus_dpaa_version.map
index 8d902854..7d6d6243 100644
--- a/drivers/bus/dpaa/rte_bus_dpaa_version.map
+++ b/drivers/bus/dpaa/rte_bus_dpaa_version.map
@@ -92,3 +92,13 @@ DPDK_18.02 {
local: *;
} DPDK_17.11;
+
+DPDK_18.08 {
+ global:
+
+ fman_if_get_sg_enable;
+ fman_if_set_sg;
+ of_get_mac_address;
+
+ local: *;
+} DPDK_18.02;
diff --git a/drivers/bus/dpaa/rte_dpaa_bus.h b/drivers/bus/dpaa/rte_dpaa_bus.h
index 8573bd6e..15dc6a4a 100644
--- a/drivers/bus/dpaa/rte_dpaa_bus.h
+++ b/drivers/bus/dpaa/rte_dpaa_bus.h
@@ -164,8 +164,7 @@ void dpaa_portal_finish(void *arg);
/** Helper for DPAA device registration from driver (eth, crypto) instance */
#define RTE_PMD_REGISTER_DPAA(nm, dpaa_drv) \
-RTE_INIT(dpaainitfn_ ##nm); \
-static void dpaainitfn_ ##nm(void) \
+RTE_INIT(dpaainitfn_ ##nm) \
{\
(dpaa_drv).driver.name = RTE_STR(nm);\
rte_dpaa_driver_register(&dpaa_drv); \
diff --git a/drivers/bus/fslmc/fslmc_bus.c b/drivers/bus/fslmc/fslmc_bus.c
index c6301b20..d2900edc 100644
--- a/drivers/bus/fslmc/fslmc_bus.c
+++ b/drivers/bus/fslmc/fslmc_bus.c
@@ -519,9 +519,7 @@ struct rte_fslmc_bus rte_fslmc_bus = {
RTE_REGISTER_BUS(FSLMC_BUS_NAME, rte_fslmc_bus.bus);
-RTE_INIT(fslmc_init_log);
-static void
-fslmc_init_log(void)
+RTE_INIT(fslmc_init_log)
{
/* Bus level logs */
dpaa2_logtype_bus = rte_log_register("bus.fslmc");
diff --git a/drivers/bus/fslmc/fslmc_logs.h b/drivers/bus/fslmc/fslmc_logs.h
index 9750b8c8..dd74cb7d 100644
--- a/drivers/bus/fslmc/fslmc_logs.h
+++ b/drivers/bus/fslmc/fslmc_logs.h
@@ -18,7 +18,7 @@ extern int dpaa2_logtype_bus;
rte_log(RTE_LOG_DEBUG, dpaa2_logtype_bus, "fslmc: %s(): " fmt "\n", \
__func__, ##args)
-#define BUS_INIT_FUNC_TRACE() DPAA2_BUS_LOG(DEBUG, " >>")
+#define BUS_INIT_FUNC_TRACE() DPAA2_BUS_DEBUG(" >>")
#define DPAA2_BUS_INFO(fmt, args...) \
DPAA2_BUS_LOG(INFO, fmt, ## args)
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c b/drivers/bus/fslmc/qbman/qbman_portal.c
index 713ec965..07145005 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.c
+++ b/drivers/bus/fslmc/qbman/qbman_portal.c
@@ -122,8 +122,7 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
p->vdq.valid_bit = QB_VALID_BIT;
p->dqrr.next_idx = 0;
p->dqrr.valid_bit = QB_VALID_BIT;
- qman_version = p->desc.qman_version;
- if ((qman_version & 0xFFFF0000) < QMAN_REV_4100) {
+ if ((p->desc.qman_version & 0xFFFF0000) < QMAN_REV_4100) {
p->dqrr.dqrr_size = 4;
p->dqrr.reset_bug = 1;
} else {
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.h b/drivers/bus/fslmc/qbman/qbman_portal.h
index 8bff0b4f..dbea22a1 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/qbman_portal.h
@@ -7,7 +7,6 @@
#include "qbman_sys.h"
#include <fsl_qbman_portal.h>
-uint32_t qman_version;
#define QMAN_REV_4000 0x04000000
#define QMAN_REV_4100 0x04010000
#define QMAN_REV_4101 0x04010001
diff --git a/drivers/bus/fslmc/rte_fslmc.h b/drivers/bus/fslmc/rte_fslmc.h
index 33552b48..cea5b78f 100644
--- a/drivers/bus/fslmc/rte_fslmc.h
+++ b/drivers/bus/fslmc/rte_fslmc.h
@@ -173,8 +173,7 @@ void rte_fslmc_driver_unregister(struct rte_dpaa2_driver *driver);
/** Helper for DPAA2 device registration from driver (eth, crypto) instance */
#define RTE_PMD_REGISTER_DPAA2(nm, dpaa2_drv) \
-RTE_INIT(dpaa2initfn_ ##nm); \
-static void dpaa2initfn_ ##nm(void) \
+RTE_INIT(dpaa2initfn_ ##nm) \
{\
(dpaa2_drv).driver.name = RTE_STR(nm);\
rte_fslmc_driver_register(&dpaa2_drv); \
@@ -203,8 +202,7 @@ uint32_t rte_fslmc_get_device_count(enum rte_dpaa2_dev_type device_type);
/** Helper for DPAA2 object registration */
#define RTE_PMD_REGISTER_DPAA2_OBJECT(nm, dpaa2_obj) \
-RTE_INIT(dpaa2objinitfn_ ##nm); \
-static void dpaa2objinitfn_ ##nm(void) \
+RTE_INIT(dpaa2objinitfn_ ##nm) \
{\
(dpaa2_obj).name = RTE_STR(nm);\
rte_fslmc_object_register(&dpaa2_obj); \
diff --git a/drivers/bus/ifpga/rte_bus_ifpga.h b/drivers/bus/ifpga/rte_bus_ifpga.h
index 981bc352..51d5ae0d 100644
--- a/drivers/bus/ifpga/rte_bus_ifpga.h
+++ b/drivers/bus/ifpga/rte_bus_ifpga.h
@@ -134,9 +134,8 @@ void rte_ifpga_driver_register(struct rte_afu_driver *driver);
void rte_ifpga_driver_unregister(struct rte_afu_driver *driver);
#define RTE_PMD_REGISTER_AFU(nm, afudrv)\
-RTE_INIT(afudrvinitfn_ ##afudrv);\
static const char *afudrvinit_ ## nm ## _alias;\
-static void afudrvinitfn_ ##afudrv(void)\
+RTE_INIT(afudrvinitfn_ ##afudrv)\
{\
(afudrv).driver.name = RTE_STR(nm);\
(afudrv).driver.alias = afudrvinit_ ## nm ## _alias;\
diff --git a/drivers/bus/meson.build b/drivers/bus/meson.build
index 52c755dc..80de2d91 100644
--- a/drivers/bus/meson.build
+++ b/drivers/bus/meson.build
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2017 Intel Corporation
-drivers = ['dpaa', 'fslmc', 'ifpga', 'pci', 'vdev']
+drivers = ['dpaa', 'fslmc', 'ifpga', 'pci', 'vdev', 'vmbus']
std_deps = ['eal']
config_flag_fmt = 'RTE_LIBRTE_@0@_BUS'
driver_name_fmt = 'rte_bus_@0@'
diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 004600f1..04648ac9 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -15,7 +15,6 @@
#include <rte_memcpy.h>
#include <rte_vfio.h>
-#include "eal_private.h"
#include "eal_filesystem.h"
#include "private.h"
diff --git a/drivers/bus/pci/linux/pci_uio.c b/drivers/bus/pci/linux/pci_uio.c
index d423e4bb..a7c14421 100644
--- a/drivers/bus/pci/linux/pci_uio.c
+++ b/drivers/bus/pci/linux/pci_uio.c
@@ -282,22 +282,19 @@ int
pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
struct mapped_pci_resource *uio_res, int map_idx)
{
- int fd;
+ int fd = -1;
char devname[PATH_MAX];
void *mapaddr;
struct rte_pci_addr *loc;
struct pci_map *maps;
+ int wc_activate = 0;
+
+ if (dev->driver != NULL)
+ wc_activate = dev->driver->drv_flags & RTE_PCI_DRV_WC_ACTIVATE;
loc = &dev->addr;
maps = uio_res->maps;
- /* update devname for mmap */
- snprintf(devname, sizeof(devname),
- "%s/" PCI_PRI_FMT "/resource%d",
- rte_pci_get_sysfs_path(),
- loc->domain, loc->bus, loc->devid,
- loc->function, res_idx);
-
/* allocate memory to keep path */
maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0);
if (maps[map_idx].path == NULL) {
@@ -309,11 +306,37 @@ pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx,
/*
* open resource file, to mmap it
*/
- fd = open(devname, O_RDWR);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+ if (wc_activate) {
+ /* update devname for mmap */
+ snprintf(devname, sizeof(devname),
+ "%s/" PCI_PRI_FMT "/resource%d_wc",
+ rte_pci_get_sysfs_path(),
+ loc->domain, loc->bus, loc->devid,
+ loc->function, res_idx);
+
+ if (access(devname, R_OK|W_OK) != -1) {
+ fd = open(devname, O_RDWR);
+ if (fd < 0)
+ RTE_LOG(INFO, EAL, "%s cannot be mapped. "
+ "Fall-back to non prefetchable mode.\n",
+ devname);
+ }
+ }
+
+ if (!wc_activate || fd < 0) {
+ snprintf(devname, sizeof(devname),
+ "%s/" PCI_PRI_FMT "/resource%d",
+ rte_pci_get_sysfs_path(),
+ loc->domain, loc->bus, loc->devid,
+ loc->function, res_idx);
+
+ /* then try to map resource file */
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
devname, strerror(errno));
- goto error;
+ goto error;
+ }
}
/* try mapping somewhere close to the end of hugepages */
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index aeeaa9ed..686386d6 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -584,6 +584,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
dev->mem_resource[i].addr = maps[i].addr;
}
+ /* we need save vfio_dev_fd, so it can be used during release */
+ dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+
return 0;
err_vfio_dev_fd:
close(vfio_dev_fd);
@@ -603,22 +606,58 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
return pci_vfio_map_resource_secondary(dev);
}
-int
-pci_vfio_unmap_resource(struct rte_pci_device *dev)
+static struct mapped_pci_resource *
+find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list,
+ struct rte_pci_device *dev,
+ const char *pci_addr)
+{
+ struct mapped_pci_resource *vfio_res = NULL;
+ struct pci_map *maps;
+ int i;
+
+ /* Get vfio_res */
+ TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+ if (rte_pci_addr_cmp(&vfio_res->pci_addr, &dev->addr))
+ continue;
+ break;
+ }
+
+ if (vfio_res == NULL)
+ return vfio_res;
+
+ RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
+ pci_addr);
+
+ maps = vfio_res->maps;
+ for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+
+ /*
+ * We do not need to be aware of MSI-X table BAR mappings as
+ * when mapping. Just using current maps array is enough
+ */
+ if (maps[i].addr) {
+ RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
+ pci_addr, maps[i].addr);
+ pci_unmap_resource(maps[i].addr, maps[i].size);
+ }
+ }
+
+ return vfio_res;
+}
+
+static int
+pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
{
char pci_addr[PATH_MAX] = {0};
struct rte_pci_addr *loc = &dev->addr;
- int i, ret;
struct mapped_pci_resource *vfio_res = NULL;
struct mapped_pci_res_list *vfio_res_list;
-
- struct pci_map *maps;
+ int ret;
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
-
if (close(dev->intr_handle.fd) < 0) {
RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
pci_addr);
@@ -639,13 +678,10 @@ pci_vfio_unmap_resource(struct rte_pci_device *dev)
return ret;
}
- vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
- /* Get vfio_res */
- TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
- continue;
- break;
- }
+ vfio_res_list =
+ RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+ vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr);
+
/* if we haven't found our tailq entry, something's wrong */
if (vfio_res == NULL) {
RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
@@ -653,30 +689,56 @@ pci_vfio_unmap_resource(struct rte_pci_device *dev)
return -1;
}
- /* unmap BARs */
- maps = vfio_res->maps;
+ TAILQ_REMOVE(vfio_res_list, vfio_res, next);
- RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
- pci_addr);
- for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+ return 0;
+}
- /*
- * We do not need to be aware of MSI-X table BAR mappings as
- * when mapping. Just using current maps array is enough
- */
- if (maps[i].addr) {
- RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
- pci_addr, maps[i].addr);
- pci_unmap_resource(maps[i].addr, maps[i].size);
- }
+static int
+pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev)
+{
+ char pci_addr[PATH_MAX] = {0};
+ struct rte_pci_addr *loc = &dev->addr;
+ struct mapped_pci_resource *vfio_res = NULL;
+ struct mapped_pci_res_list *vfio_res_list;
+ int ret;
+
+ /* store PCI address string */
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr,
+ dev->intr_handle.vfio_dev_fd);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot release device\n", __func__);
+ return ret;
}
- TAILQ_REMOVE(vfio_res_list, vfio_res, next);
+ vfio_res_list =
+ RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+ vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr);
+
+ /* if we haven't found our tailq entry, something's wrong */
+ if (vfio_res == NULL) {
+ RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
+ pci_addr);
+ return -1;
+ }
return 0;
}
int
+pci_vfio_unmap_resource(struct rte_pci_device *dev)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return pci_vfio_unmap_resource_primary(dev);
+ else
+ return pci_vfio_unmap_resource_secondary(dev);
+}
+
+int
pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
{
diff --git a/drivers/bus/pci/pci_common.c b/drivers/bus/pci/pci_common.c
index 7215aaec..7736b3f9 100644
--- a/drivers/bus/pci/pci_common.c
+++ b/drivers/bus/pci/pci_common.c
@@ -26,6 +26,7 @@
#include "private.h"
+
extern struct rte_pci_bus rte_pci_bus;
#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
@@ -69,7 +70,7 @@ pci_name_set(struct rte_pci_device *dev)
*/
if (devargs != NULL)
/* If an rte_devargs exists, the generic rte_device uses the
- * given name as its namea
+ * given name as its name.
*/
dev->device.name = dev->device.devargs->name;
else
@@ -155,17 +156,24 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
RTE_LOG(INFO, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id,
dev->id.device_id, dr->driver.name);
+ /*
+ * reference driver structure
+ * This needs to be before rte_pci_map_device(), as it enables to use
+ * driver flags for adjusting configuration.
+ */
+ dev->driver = dr;
+ dev->device.driver = &dr->driver;
+
if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
/* map resources for devices that use igb_uio */
ret = rte_pci_map_device(dev);
- if (ret != 0)
+ if (ret != 0) {
+ dev->driver = NULL;
+ dev->device.driver = NULL;
return ret;
+ }
}
- /* reference driver structure */
- dev->driver = dr;
- dev->device.driver = &dr->driver;
-
/* call the driver probe() function */
ret = dr->probe(dr, dev);
if (ret) {
@@ -255,81 +263,6 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
}
/*
- * Find the pci device specified by pci address, then invoke probe function of
- * the driver of the device.
- */
-int
-rte_pci_probe_one(const struct rte_pci_addr *addr)
-{
- struct rte_pci_device *dev = NULL;
-
- int ret = 0;
-
- if (addr == NULL)
- return -1;
-
- /* update current pci device in global list, kernel bindings might have
- * changed since last time we looked at it.
- */
- if (pci_update_device(addr) < 0)
- goto err_return;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- if (rte_pci_addr_cmp(&dev->addr, addr))
- continue;
-
- ret = pci_probe_all_drivers(dev);
- if (ret)
- goto err_return;
- return 0;
- }
- return -1;
-
-err_return:
- RTE_LOG(WARNING, EAL,
- "Requested device " PCI_PRI_FMT " cannot be used\n",
- addr->domain, addr->bus, addr->devid, addr->function);
- return -1;
-}
-
-/*
- * Detach device specified by its pci address.
- */
-int
-rte_pci_detach(const struct rte_pci_addr *addr)
-{
- struct rte_pci_device *dev = NULL;
- int ret = 0;
-
- if (addr == NULL)
- return -1;
-
- FOREACH_DEVICE_ON_PCIBUS(dev) {
- if (rte_pci_addr_cmp(&dev->addr, addr))
- continue;
-
- ret = rte_pci_detach_dev(dev);
- if (ret < 0)
- /* negative value is an error */
- goto err_return;
- if (ret > 0)
- /* positive value means driver doesn't support it */
- continue;
-
- rte_pci_remove_device(dev);
- free(dev);
- return 0;
- }
- return -1;
-
-err_return:
- RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT
- " cannot be used\n", dev->addr.domain, dev->addr.bus,
- dev->addr.devid, dev->addr.function);
- return -1;
-}
-
-/*
* Scan the content of the PCI bus, and call the probe() function for
* all registered drivers that have a matching entry in its id_table
* for discovered devices.
@@ -445,7 +378,7 @@ rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
}
/* Remove a device from PCI bus */
-void
+static void
rte_pci_remove_device(struct rte_pci_device *pci_dev)
{
TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next);
diff --git a/drivers/bus/pci/private.h b/drivers/bus/pci/private.h
index 88fa587e..8ddd03e1 100644
--- a/drivers/bus/pci/private.h
+++ b/drivers/bus/pci/private.h
@@ -33,36 +33,6 @@ rte_pci_probe(void);
int rte_pci_scan(void);
/**
- * Probe the single PCI device.
- *
- * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the probe() function for registered driver that has a
- * matching entry in its id_table for discovered device.
- *
- * @param addr
- * The PCI Bus-Device-Function address to probe.
- * @return
- * - 0 on success.
- * - Negative on error.
- */
-int rte_pci_probe_one(const struct rte_pci_addr *addr);
-
-/**
- * Close the single PCI device.
- *
- * Scan the content of the PCI bus, and find the pci device specified by pci
- * address, then call the remove() function for registered driver that has a
- * matching entry in its id_table for discovered device.
- *
- * @param addr
- * The PCI Bus-Device-Function address to close.
- * @return
- * - 0 on success.
- * - Negative on error.
- */
-int rte_pci_detach(const struct rte_pci_addr *addr);
-
-/**
* Find the name of a PCI device.
*/
void
@@ -94,16 +64,6 @@ void rte_pci_insert_device(struct rte_pci_device *exist_pci_dev,
struct rte_pci_device *new_pci_dev);
/**
- * Remove a PCI device from the PCI Bus. This sets to NULL the bus references
- * in the PCI device object as well as the generic device object.
- *
- * @param pci_device
- * PCI device to be removed from PCI Bus
- * @return void
- */
-void rte_pci_remove_device(struct rte_pci_device *pci_device);
-
-/**
* Update a pci device object by asking the kernel for the latest information.
*
* This function is private to EAL.
@@ -117,16 +77,6 @@ void rte_pci_remove_device(struct rte_pci_device *pci_device);
int pci_update_device(const struct rte_pci_addr *addr);
/**
- * Unbind kernel driver for this device
- *
- * This function is private to EAL.
- *
- * @return
- * 0 on success, negative on error
- */
-int pci_unbind_kernel_driver(struct rte_pci_device *dev);
-
-/**
* Map the PCI resource of a PCI device in virtual memory
*
* This function is private to EAL.
diff --git a/drivers/bus/pci/rte_bus_pci.h b/drivers/bus/pci/rte_bus_pci.h
index 458e6d07..0d1955ff 100644
--- a/drivers/bus/pci/rte_bus_pci.h
+++ b/drivers/bus/pci/rte_bus_pci.h
@@ -135,6 +135,8 @@ struct rte_pci_bus {
/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */
#define RTE_PCI_DRV_NEED_MAPPING 0x0001
+/** Device needs PCI BAR mapping with enabled write combining (wc) */
+#define RTE_PCI_DRV_WC_ACTIVATE 0x0002
/** Device driver supports link state interrupt */
#define RTE_PCI_DRV_INTR_LSC 0x0008
/** Device driver supports device removal interrupt */
@@ -189,8 +191,7 @@ void rte_pci_register(struct rte_pci_driver *driver);
/** Helper for PCI device registration from driver (eth, crypto) instance */
#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \
-RTE_INIT(pciinitfn_ ##nm); \
-static void pciinitfn_ ##nm(void) \
+RTE_INIT(pciinitfn_ ##nm) \
{\
(pci_drv).driver.name = RTE_STR(nm);\
rte_pci_register(&pci_drv); \
diff --git a/drivers/bus/vdev/rte_bus_vdev.h b/drivers/bus/vdev/rte_bus_vdev.h
index f9b5eb59..9ae3eaae 100644
--- a/drivers/bus/vdev/rte_bus_vdev.h
+++ b/drivers/bus/vdev/rte_bus_vdev.h
@@ -86,9 +86,8 @@ void rte_vdev_register(struct rte_vdev_driver *driver);
void rte_vdev_unregister(struct rte_vdev_driver *driver);
#define RTE_PMD_REGISTER_VDEV(nm, vdrv)\
-RTE_INIT(vdrvinitfn_ ##vdrv);\
static const char *vdrvinit_ ## nm ## _alias;\
-static void vdrvinitfn_ ##vdrv(void)\
+RTE_INIT(vdrvinitfn_ ##vdrv)\
{\
(vdrv).driver.name = RTE_STR(nm);\
(vdrv).driver.alias = vdrvinit_ ## nm ## _alias;\
diff --git a/drivers/bus/vmbus/Makefile b/drivers/bus/vmbus/Makefile
new file mode 100644
index 00000000..deee9dd1
--- /dev/null
+++ b/drivers/bus/vmbus/Makefile
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_bus_vmbus.a
+LIBABIVER := 1
+EXPORT_MAP := rte_bus_vmbus_version.map
+
+CFLAGS += -I$(SRCDIR)
+CFLAGS += -O3 $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+ifneq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),)
+SYSTEM := linux
+endif
+ifneq ($(CONFIG_RTE_EXEC_ENV_BSDAPP),)
+$(error "VMBUS not implemented for BSD yet")
+endif
+
+CFLAGS += -I$(RTE_SDK)/drivers/bus/vmbus/$(SYSTEM)
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/$(SYSTEM)app/eal
+
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev
+
+include $(RTE_SDK)/drivers/bus/vmbus/$(SYSTEM)/Makefile
+SRCS-$(CONFIG_RTE_LIBRTE_VMBUS) := $(addprefix $(SYSTEM)/,$(SRCS))
+SRCS-$(CONFIG_RTE_LIBRTE_VMBUS) += vmbus_common.c
+SRCS-$(CONFIG_RTE_LIBRTE_VMBUS) += vmbus_channel.c vmbus_bufring.c
+SRCS-$(CONFIG_RTE_LIBRTE_VMBUS) += vmbus_common_uio.c
+
+SYMLINK-$(CONFIG_RTE_LIBRTE_VMBUS)-include += rte_bus_vmbus.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_VMBUS)-include += rte_vmbus_reg.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/bus/vmbus/linux/Makefile b/drivers/bus/vmbus/linux/Makefile
new file mode 100644
index 00000000..ef0d30b2
--- /dev/null
+++ b/drivers/bus/vmbus/linux/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+SRCS += vmbus_bus.c vmbus_uio.c
diff --git a/drivers/bus/vmbus/linux/vmbus_bus.c b/drivers/bus/vmbus/linux/vmbus_bus.c
new file mode 100644
index 00000000..52d6a3c0
--- /dev/null
+++ b/drivers/bus/vmbus/linux/vmbus_bus.c
@@ -0,0 +1,355 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018, Microsoft Corporation.
+ * All Rights Reserved.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <rte_eal.h>
+#include <rte_uuid.h>
+#include <rte_tailq.h>
+#include <rte_log.h>
+#include <rte_devargs.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_bus_vmbus.h>
+
+#include "eal_filesystem.h"
+#include "private.h"
+
+/** Pathname of VMBUS devices directory. */
+#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
+
+extern struct rte_vmbus_bus rte_vmbus_bus;
+
+/* Read sysfs file to get UUID */
+static int
+parse_sysfs_uuid(const char *filename, rte_uuid_t uu)
+{
+ char buf[BUFSIZ];
+ char *cp, *in = buf;
+ FILE *f;
+
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ VMBUS_LOG(ERR, "cannot open sysfs value %s: %s",
+ filename, strerror(errno));
+ return -1;
+ }
+
+ if (fgets(buf, sizeof(buf), f) == NULL) {
+ VMBUS_LOG(ERR, "cannot read sysfs value %s",
+ filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+
+ cp = strchr(buf, '\n');
+ if (cp)
+ *cp = '\0';
+
+ /* strip { } notation */
+ if (buf[0] == '{') {
+ in = buf + 1;
+ cp = strchr(in, '}');
+ if (cp)
+ *cp = '\0';
+ }
+
+ if (rte_uuid_parse(in, uu) < 0) {
+ VMBUS_LOG(ERR, "%s %s not a valid UUID",
+ filename, buf);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+get_sysfs_string(const char *filename, char *buf, size_t buflen)
+{
+ char *cp;
+ FILE *f;
+
+ f = fopen(filename, "r");
+ if (f == NULL) {
+ VMBUS_LOG(ERR, "cannot open sysfs value %s:%s",
+ filename, strerror(errno));
+ return -1;
+ }
+
+ if (fgets(buf, buflen, f) == NULL) {
+ VMBUS_LOG(ERR, "cannot read sysfs value %s",
+ filename);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+
+ /* remove trailing newline */
+ cp = memchr(buf, '\n', buflen);
+ if (cp)
+ *cp = '\0';
+
+ return 0;
+}
+
+static int
+vmbus_get_uio_dev(const struct rte_vmbus_device *dev,
+ char *dstbuf, size_t buflen)
+{
+ char dirname[PATH_MAX];
+ unsigned int uio_num;
+ struct dirent *e;
+ DIR *dir;
+
+ /* Assume recent kernel where uio is in uio/uioX */
+ snprintf(dirname, sizeof(dirname),
+ SYSFS_VMBUS_DEVICES "/%s/uio", dev->device.name);
+
+ dir = opendir(dirname);
+ if (dir == NULL)
+ return -1; /* Not a UIO device */
+
+ /* take the first file starting with "uio" */
+ while ((e = readdir(dir)) != NULL) {
+ const int prefix_len = 3;
+ char *endptr;
+
+ if (strncmp(e->d_name, "uio", prefix_len) != 0)
+ continue;
+
+ /* try uio%d */
+ errno = 0;
+ uio_num = strtoull(e->d_name + prefix_len, &endptr, 10);
+ if (errno == 0 && endptr != (e->d_name + prefix_len)) {
+ snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num);
+ break;
+ }
+ }
+ closedir(dir);
+
+ if (e == NULL)
+ return -1;
+
+ return uio_num;
+}
+
+/* Check map names with kernel names */
+static const char *map_names[VMBUS_MAX_RESOURCE] = {
+ [HV_TXRX_RING_MAP] = "txrx_rings",
+ [HV_INT_PAGE_MAP] = "int_page",
+ [HV_MON_PAGE_MAP] = "monitor_page",
+ [HV_RECV_BUF_MAP] = "recv:",
+ [HV_SEND_BUF_MAP] = "send:",
+};
+
+
+/* map the resources of a vmbus device in virtual memory */
+int
+rte_vmbus_map_device(struct rte_vmbus_device *dev)
+{
+ char uioname[PATH_MAX], filename[PATH_MAX];
+ char dirname[PATH_MAX], mapname[64];
+ int i;
+
+ dev->uio_num = vmbus_get_uio_dev(dev, uioname, sizeof(uioname));
+ if (dev->uio_num < 0) {
+ VMBUS_LOG(DEBUG, "Not managed by UIO driver, skipped");
+ return 1;
+ }
+
+ /* Extract resource value */
+ for (i = 0; i < VMBUS_MAX_RESOURCE; i++) {
+ struct rte_mem_resource *res = &dev->resource[i];
+ unsigned long len, gpad = 0;
+ char *cp;
+
+ snprintf(dirname, sizeof(dirname),
+ "%s/maps/map%d", uioname, i);
+
+ snprintf(filename, sizeof(filename),
+ "%s/name", dirname);
+
+ if (get_sysfs_string(filename, mapname, sizeof(mapname)) < 0) {
+ VMBUS_LOG(ERR, "could not read %s", filename);
+ return -1;
+ }
+
+ if (strncmp(map_names[i], mapname, strlen(map_names[i])) != 0) {
+ VMBUS_LOG(ERR,
+ "unexpected resource %s (expected %s)",
+ mapname, map_names[i]);
+ return -1;
+ }
+
+ snprintf(filename, sizeof(filename),
+ "%s/size", dirname);
+ if (eal_parse_sysfs_value(filename, &len) < 0) {
+ VMBUS_LOG(ERR,
+ "could not read %s", filename);
+ return -1;
+ }
+ res->len = len;
+
+ /* both send and receive buffers have gpad in name */
+ cp = memchr(mapname, ':', sizeof(mapname));
+ if (cp)
+ gpad = strtoul(cp+1, NULL, 0);
+
+ /* put the GPAD value in physical address */
+ res->phys_addr = gpad;
+ }
+
+ return vmbus_uio_map_resource(dev);
+}
+
+void
+rte_vmbus_unmap_device(struct rte_vmbus_device *dev)
+{
+ vmbus_uio_unmap_resource(dev);
+}
+
+/* Scan one vmbus sysfs entry, and fill the devices list from it. */
+static int
+vmbus_scan_one(const char *name)
+{
+ struct rte_vmbus_device *dev, *dev2;
+ char filename[PATH_MAX];
+ char dirname[PATH_MAX];
+ unsigned long tmp;
+
+ dev = calloc(1, sizeof(*dev));
+ if (dev == NULL)
+ return -1;
+
+ dev->device.name = strdup(name);
+ if (!dev->device.name)
+ goto error;
+
+ /* sysfs base directory
+ * /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df
+ * or on older kernel
+ * /sys/bus/vmbus/devices/vmbus_1
+ */
+ snprintf(dirname, sizeof(dirname), "%s/%s",
+ SYSFS_VMBUS_DEVICES, name);
+
+ /* get device id */
+ snprintf(filename, sizeof(filename), "%s/device_id", dirname);
+ if (parse_sysfs_uuid(filename, dev->device_id) < 0)
+ goto error;
+
+ /* get device class */
+ snprintf(filename, sizeof(filename), "%s/class_id", dirname);
+ if (parse_sysfs_uuid(filename, dev->class_id) < 0)
+ goto error;
+
+ /* get relid */
+ snprintf(filename, sizeof(filename), "%s/id", dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0)
+ goto error;
+ dev->relid = tmp;
+
+ /* get monitor id */
+ snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
+ if (eal_parse_sysfs_value(filename, &tmp) < 0)
+ goto error;
+ dev->monitor_id = tmp;
+
+ /* get numa node (if present) */
+ snprintf(filename, sizeof(filename), "%s/numa_node",
+ dirname);
+
+ if (access(filename, R_OK) == 0) {
+ if (eal_parse_sysfs_value(filename, &tmp) < 0)
+ goto error;
+ dev->device.numa_node = tmp;
+ } else {
+ /* if no NUMA support, set default to 0 */
+ dev->device.numa_node = SOCKET_ID_ANY;
+ }
+
+ /* device is valid, add in list (sorted) */
+ VMBUS_LOG(DEBUG, "Adding vmbus device %s", name);
+
+ TAILQ_FOREACH(dev2, &rte_vmbus_bus.device_list, next) {
+ int ret;
+
+ ret = rte_uuid_compare(dev->device_id, dev2->device_id);
+ if (ret > 0)
+ continue;
+
+ if (ret < 0) {
+ vmbus_insert_device(dev2, dev);
+ } else { /* already registered */
+ VMBUS_LOG(NOTICE,
+ "%s already registered", name);
+ free(dev);
+ }
+ return 0;
+ }
+
+ vmbus_add_device(dev);
+ return 0;
+error:
+ VMBUS_LOG(DEBUG, "failed");
+
+ free(dev);
+ return -1;
+}
+
+/*
+ * Scan the content of the vmbus, and the devices in the devices list
+ */
+int
+rte_vmbus_scan(void)
+{
+ struct dirent *e;
+ DIR *dir;
+
+ dir = opendir(SYSFS_VMBUS_DEVICES);
+ if (dir == NULL) {
+ if (errno == ENOENT)
+ return 0;
+
+ VMBUS_LOG(ERR, "opendir %s failed: %s",
+ SYSFS_VMBUS_DEVICES, strerror(errno));
+ return -1;
+ }
+
+ while ((e = readdir(dir)) != NULL) {
+ if (e->d_name[0] == '.')
+ continue;
+
+ if (vmbus_scan_one(e->d_name) < 0)
+ goto error;
+ }
+ closedir(dir);
+ return 0;
+
+error:
+ closedir(dir);
+ return -1;
+}
+
+void rte_vmbus_irq_mask(struct rte_vmbus_device *device)
+{
+ vmbus_uio_irq_control(device, 1);
+}
+
+void rte_vmbus_irq_unmask(struct rte_vmbus_device *device)
+{
+ vmbus_uio_irq_control(device, 0);
+}
+
+int rte_vmbus_irq_read(struct rte_vmbus_device *device)
+{
+ return vmbus_uio_irq_read(device);
+}
diff --git a/drivers/bus/vmbus/linux/vmbus_uio.c b/drivers/bus/vmbus/linux/vmbus_uio.c
new file mode 100644
index 00000000..856c6d66
--- /dev/null
+++ b/drivers/bus/vmbus/linux/vmbus_uio.c
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018, Microsoft Corporation.
+ * All Rights Reserved.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include <rte_log.h>
+#include <rte_bus.h>
+#include <rte_memory.h>
+#include <rte_eal_memconfig.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_bus_vmbus.h>
+#include <rte_string_fns.h>
+
+#include "private.h"
+
+/** Pathname of VMBUS devices directory. */
+#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
+
+static void *vmbus_map_addr;
+
+/* Control interrupts */
+void vmbus_uio_irq_control(struct rte_vmbus_device *dev, int32_t onoff)
+{
+ if (write(dev->intr_handle.fd, &onoff, sizeof(onoff)) < 0) {
+ VMBUS_LOG(ERR, "cannot write to %d:%s",
+ dev->intr_handle.fd, strerror(errno));
+ }
+}
+
+int vmbus_uio_irq_read(struct rte_vmbus_device *dev)
+{
+ int32_t count;
+ int cc;
+
+ cc = read(dev->intr_handle.fd, &count, sizeof(count));
+ if (cc < (int)sizeof(count)) {
+ if (cc < 0) {
+ VMBUS_LOG(ERR, "IRQ read failed %s",
+ strerror(errno));
+ return -errno;
+ }
+ VMBUS_LOG(ERR, "can't read IRQ count");
+ return -EINVAL;
+ }
+
+ return count;
+}
+
+void
+vmbus_uio_free_resource(struct rte_vmbus_device *dev,
+ struct mapped_vmbus_resource *uio_res)
+{
+ rte_free(uio_res);
+
+ if (dev->intr_handle.uio_cfg_fd >= 0) {
+ close(dev->intr_handle.uio_cfg_fd);
+ dev->intr_handle.uio_cfg_fd = -1;
+ }
+
+ if (dev->intr_handle.fd >= 0) {
+ close(dev->intr_handle.fd);
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ }
+}
+
+int
+vmbus_uio_alloc_resource(struct rte_vmbus_device *dev,
+ struct mapped_vmbus_resource **uio_res)
+{
+ char devname[PATH_MAX]; /* contains the /dev/uioX */
+
+ /* save fd if in primary process */
+ snprintf(devname, sizeof(devname), "/dev/uio%u", dev->uio_num);
+ dev->intr_handle.fd = open(devname, O_RDWR);
+ if (dev->intr_handle.fd < 0) {
+ VMBUS_LOG(ERR, "Cannot open %s: %s",
+ devname, strerror(errno));
+ goto error;
+ }
+ dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
+
+ /* allocate the mapping details for secondary processes*/
+ *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
+ if (*uio_res == NULL) {
+ VMBUS_LOG(ERR, "cannot store uio mmap details");
+ goto error;
+ }
+
+ strlcpy((*uio_res)->path, devname, PATH_MAX);
+ rte_uuid_copy((*uio_res)->id, dev->device_id);
+
+ return 0;
+
+error:
+ vmbus_uio_free_resource(dev, *uio_res);
+ return -1;
+}
+
+static int
+find_max_end_va(const struct rte_memseg_list *msl, void *arg)
+{
+ size_t sz = msl->memseg_arr.len * msl->page_sz;
+ void *end_va = RTE_PTR_ADD(msl->base_va, sz);
+ void **max_va = arg;
+
+ if (*max_va < end_va)
+ *max_va = end_va;
+ return 0;
+}
+
+/*
+ * TODO: this should be part of memseg api.
+ * code is duplicated from PCI.
+ */
+static void *
+vmbus_find_max_end_va(void)
+{
+ void *va = NULL;
+
+ rte_memseg_list_walk(find_max_end_va, &va);
+ return va;
+}
+
+int
+vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev, int idx,
+ struct mapped_vmbus_resource *uio_res,
+ int flags)
+{
+ size_t size = dev->resource[idx].len;
+ struct vmbus_map *maps = uio_res->maps;
+ void *mapaddr;
+ off_t offset;
+ int fd;
+
+ /* devname for mmap */
+ fd = open(uio_res->path, O_RDWR);
+ if (fd < 0) {
+ VMBUS_LOG(ERR, "Cannot open %s: %s",
+ uio_res->path, strerror(errno));
+ return -1;
+ }
+
+ /* try mapping somewhere close to the end of hugepages */
+ if (vmbus_map_addr == NULL)
+ vmbus_map_addr = vmbus_find_max_end_va();
+
+ /* offset is special in uio it indicates which resource */
+ offset = idx * PAGE_SIZE;
+
+ mapaddr = vmbus_map_resource(vmbus_map_addr, fd, offset, size, flags);
+ close(fd);
+
+ if (mapaddr == MAP_FAILED)
+ return -1;
+
+ dev->resource[idx].addr = mapaddr;
+ vmbus_map_addr = RTE_PTR_ADD(mapaddr, size);
+
+ /* Record result of sucessful mapping for use by secondary */
+ maps[idx].addr = mapaddr;
+ maps[idx].size = size;
+
+ return 0;
+}
+
+static int vmbus_uio_map_primary(struct vmbus_channel *chan,
+ void **ring_buf, uint32_t *ring_size)
+{
+ struct mapped_vmbus_resource *uio_res;
+
+ uio_res = vmbus_uio_find_resource(chan->device);
+ if (!uio_res) {
+ VMBUS_LOG(ERR, "can not find resources!");
+ return -ENOMEM;
+ }
+
+ if (uio_res->nb_maps < VMBUS_MAX_RESOURCE) {
+ VMBUS_LOG(ERR, "VMBUS: only %u resources found!",
+ uio_res->nb_maps);
+ return -EINVAL;
+ }
+
+ *ring_size = uio_res->maps[HV_TXRX_RING_MAP].size / 2;
+ *ring_buf = uio_res->maps[HV_TXRX_RING_MAP].addr;
+ return 0;
+}
+
+static int vmbus_uio_map_subchan(const struct rte_vmbus_device *dev,
+ const struct vmbus_channel *chan,
+ void **ring_buf, uint32_t *ring_size)
+{
+ char ring_path[PATH_MAX];
+ size_t file_size;
+ struct stat sb;
+ int fd;
+
+ snprintf(ring_path, sizeof(ring_path),
+ "%s/%s/channels/%u/ring",
+ SYSFS_VMBUS_DEVICES, dev->device.name,
+ chan->relid);
+
+ fd = open(ring_path, O_RDWR);
+ if (fd < 0) {
+ VMBUS_LOG(ERR, "Cannot open %s: %s",
+ ring_path, strerror(errno));
+ return -errno;
+ }
+
+ if (fstat(fd, &sb) < 0) {
+ VMBUS_LOG(ERR, "Cannot state %s: %s",
+ ring_path, strerror(errno));
+ close(fd);
+ return -errno;
+ }
+ file_size = sb.st_size;
+
+ if (file_size == 0 || (file_size & (PAGE_SIZE - 1))) {
+ VMBUS_LOG(ERR, "incorrect size %s: %zu",
+ ring_path, file_size);
+
+ close(fd);
+ return -EINVAL;
+ }
+
+ *ring_size = file_size / 2;
+ *ring_buf = vmbus_map_resource(vmbus_map_addr, fd,
+ 0, sb.st_size, 0);
+ close(fd);
+
+ if (ring_buf == MAP_FAILED)
+ return -EIO;
+
+ vmbus_map_addr = RTE_PTR_ADD(ring_buf, file_size);
+ return 0;
+}
+
+int vmbus_uio_map_rings(struct vmbus_channel *chan)
+{
+ const struct rte_vmbus_device *dev = chan->device;
+ uint32_t ring_size;
+ void *ring_buf;
+ int ret;
+
+ /* Primary channel */
+ if (chan->subchannel_id == 0)
+ ret = vmbus_uio_map_primary(chan, &ring_buf, &ring_size);
+ else
+ ret = vmbus_uio_map_subchan(dev, chan, &ring_buf, &ring_size);
+
+ if (ret)
+ return ret;
+
+ vmbus_br_setup(&chan->txbr, ring_buf, ring_size);
+ vmbus_br_setup(&chan->rxbr, (char *)ring_buf + ring_size, ring_size);
+ return 0;
+}
+
+static int vmbus_uio_sysfs_read(const char *dir, const char *name,
+ unsigned long *val, unsigned long max_range)
+{
+ char path[PATH_MAX];
+ FILE *f;
+ int ret;
+
+ snprintf(path, sizeof(path), "%s/%s", dir, name);
+ f = fopen(path, "r");
+ if (!f) {
+ VMBUS_LOG(ERR, "can't open %s:%s",
+ path, strerror(errno));
+ return -errno;
+ }
+
+ if (fscanf(f, "%lu", val) != 1)
+ ret = -EIO;
+ else if (*val > max_range)
+ ret = -ERANGE;
+ else
+ ret = 0;
+ fclose(f);
+
+ return ret;
+}
+
+static bool vmbus_uio_ring_present(const struct rte_vmbus_device *dev,
+ uint32_t relid)
+{
+ char ring_path[PATH_MAX];
+
+ /* Check if kernel has subchannel sysfs files */
+ snprintf(ring_path, sizeof(ring_path),
+ "%s/%s/channels/%u/ring",
+ SYSFS_VMBUS_DEVICES, dev->device.name, relid);
+
+ return access(ring_path, R_OK|W_OK) == 0;
+}
+
+bool vmbus_uio_subchannels_supported(const struct rte_vmbus_device *dev,
+ const struct vmbus_channel *chan)
+{
+ return vmbus_uio_ring_present(dev, chan->relid);
+}
+
+static bool vmbus_isnew_subchannel(struct vmbus_channel *primary,
+ unsigned long id)
+{
+ const struct vmbus_channel *c;
+
+ STAILQ_FOREACH(c, &primary->subchannel_list, next) {
+ if (c->relid == id)
+ return false;
+ }
+ return true;
+}
+
+int vmbus_uio_get_subchan(struct vmbus_channel *primary,
+ struct vmbus_channel **subchan)
+{
+ const struct rte_vmbus_device *dev = primary->device;
+ char chan_path[PATH_MAX], subchan_path[PATH_MAX];
+ struct dirent *ent;
+ DIR *chan_dir;
+
+ snprintf(chan_path, sizeof(chan_path),
+ "%s/%s/channels",
+ SYSFS_VMBUS_DEVICES, dev->device.name);
+
+ chan_dir = opendir(chan_path);
+ if (!chan_dir) {
+ VMBUS_LOG(ERR, "cannot open %s: %s",
+ chan_path, strerror(errno));
+ return -errno;
+ }
+
+ while ((ent = readdir(chan_dir))) {
+ unsigned long relid, subid, monid;
+ char *endp;
+ int err;
+
+ if (ent->d_name[0] == '.')
+ continue;
+
+ errno = 0;
+ relid = strtoul(ent->d_name, &endp, 0);
+ if (*endp || errno != 0 || relid > UINT16_MAX) {
+ VMBUS_LOG(NOTICE, "not a valid channel relid: %s",
+ ent->d_name);
+ continue;
+ }
+
+ snprintf(subchan_path, sizeof(subchan_path), "%s/%lu",
+ chan_path, relid);
+ err = vmbus_uio_sysfs_read(subchan_path, "subchannel_id",
+ &subid, UINT16_MAX);
+ if (err) {
+ VMBUS_LOG(NOTICE, "invalid subchannel id %lu",
+ subid);
+ closedir(chan_dir);
+ return err;
+ }
+
+ if (subid == 0)
+ continue; /* skip primary channel */
+
+ if (!vmbus_isnew_subchannel(primary, relid))
+ continue;
+
+ if (!vmbus_uio_ring_present(dev, relid))
+ continue; /* Ring may not be ready yet */
+
+ err = vmbus_uio_sysfs_read(subchan_path, "monitor_id",
+ &monid, UINT8_MAX);
+ if (err) {
+ VMBUS_LOG(NOTICE, "invalid monitor id %lu",
+ monid);
+ return err;
+ }
+
+ err = vmbus_chan_create(dev, relid, subid, monid, subchan);
+ if (err) {
+ VMBUS_LOG(NOTICE, "subchannel setup failed");
+ return err;
+ }
+ break;
+ }
+ closedir(chan_dir);
+
+ return (ent == NULL) ? -ENOENT : 0;
+}
diff --git a/drivers/bus/vmbus/meson.build b/drivers/bus/vmbus/meson.build
new file mode 100644
index 00000000..18daabec
--- /dev/null
+++ b/drivers/bus/vmbus/meson.build
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+allow_experimental_apis = true
+
+install_headers('rte_bus_vmbus.h','rte_vmbus_reg.h')
+
+sources = files('vmbus_common.c',
+ 'vmbus_channel.c',
+ 'vmbus_bufring.c',
+ 'vmbus_common_uio.c')
+
+if host_machine.system() == 'linux'
+ sources += files('linux/vmbus_bus.c',
+ 'linux/vmbus_uio.c')
+ includes += include_directories('linux')
+else
+ build = false
+endif
diff --git a/drivers/bus/vmbus/private.h b/drivers/bus/vmbus/private.h
new file mode 100644
index 00000000..9964fc42
--- /dev/null
+++ b/drivers/bus/vmbus/private.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018, Microsoft Corporation.
+ * All Rights Reserved.
+ */
+
+#ifndef _VMBUS_PRIVATE_H_
+#define _VMBUS_PRIVATE_H_
+
+#include <stdbool.h>
+#include <sys/uio.h>
+#include <rte_log.h>
+#include <rte_vmbus_reg.h>
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+extern int vmbus_logtype_bus;
+#define VMBUS_LOG(level, fmt, args...) \
+ rte_log(RTE_LOG_ ## level, vmbus_logtype_bus, "%s(): " fmt "\n", \
+ __func__, ##args)
+
+struct vmbus_br {
+ struct vmbus_bufring *vbr;
+ uint32_t dsize;
+ uint32_t windex; /* next available location */
+};
+
+#define UIO_NAME_MAX 64
+
+struct vmbus_map {
+ void *addr; /* user mmap of resource */
+ uint64_t size; /* length */
+};
+
+/*
+ * For multi-process we need to reproduce all vmbus mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct mapped_vmbus_resource {
+ TAILQ_ENTRY(mapped_vmbus_resource) next;
+
+ rte_uuid_t id;
+ int nb_maps;
+ struct vmbus_map maps[VMBUS_MAX_RESOURCE];
+ char path[PATH_MAX];
+};
+
+TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource);
+
+#define HV_MON_TRIG_LEN 32
+#define HV_MON_TRIG_MAX 4
+
+struct vmbus_channel {
+ STAILQ_HEAD(, vmbus_channel) subchannel_list;
+ STAILQ_ENTRY(vmbus_channel) next;
+ const struct rte_vmbus_device *device;
+
+ struct vmbus_br rxbr;
+ struct vmbus_br txbr;
+
+ uint16_t relid;
+ uint16_t subchannel_id;
+ uint8_t monitor_id;
+};
+
+#define VMBUS_MAX_CHANNELS 64
+
+int vmbus_chan_create(const struct rte_vmbus_device *device,
+ uint16_t relid, uint16_t subid, uint8_t monitor_id,
+ struct vmbus_channel **new_chan);
+
+void vmbus_add_device(struct rte_vmbus_device *vmbus_dev);
+void vmbus_insert_device(struct rte_vmbus_device *exist_vmbus_dev,
+ struct rte_vmbus_device *new_vmbus_dev);
+void vmbus_remove_device(struct rte_vmbus_device *vmbus_device);
+
+void vmbus_uio_irq_control(struct rte_vmbus_device *dev, int32_t onoff);
+int vmbus_uio_irq_read(struct rte_vmbus_device *dev);
+
+int vmbus_uio_map_resource(struct rte_vmbus_device *dev);
+void vmbus_uio_unmap_resource(struct rte_vmbus_device *dev);
+
+int vmbus_uio_alloc_resource(struct rte_vmbus_device *dev,
+ struct mapped_vmbus_resource **uio_res);
+void vmbus_uio_free_resource(struct rte_vmbus_device *dev,
+ struct mapped_vmbus_resource *uio_res);
+
+struct mapped_vmbus_resource *
+vmbus_uio_find_resource(const struct rte_vmbus_device *dev);
+int vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev, int res_idx,
+ struct mapped_vmbus_resource *uio_res,
+ int flags);
+
+void *vmbus_map_resource(void *requested_addr, int fd, off_t offset,
+ size_t size, int additional_flags);
+void vmbus_unmap_resource(void *requested_addr, size_t size);
+
+bool vmbus_uio_subchannels_supported(const struct rte_vmbus_device *dev,
+ const struct vmbus_channel *chan);
+int vmbus_uio_get_subchan(struct vmbus_channel *primary,
+ struct vmbus_channel **subchan);
+int vmbus_uio_map_rings(struct vmbus_channel *chan);
+
+void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen);
+
+/* Amount of space available for write */
+static inline uint32_t
+vmbus_br_availwrite(const struct vmbus_br *br, uint32_t windex)
+{
+ uint32_t rindex = br->vbr->rindex;
+
+ if (windex >= rindex)
+ return br->dsize - (windex - rindex);
+ else
+ return rindex - windex;
+}
+
+static inline uint32_t
+vmbus_br_availread(const struct vmbus_br *br)
+{
+ return br->dsize - vmbus_br_availwrite(br, br->vbr->windex);
+}
+
+int vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen,
+ bool *need_sig);
+
+int vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen);
+
+int vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t hlen);
+
+#endif /* _VMBUS_PRIVATE_H_ */
diff --git a/drivers/bus/vmbus/rte_bus_vmbus.h b/drivers/bus/vmbus/rte_bus_vmbus.h
new file mode 100644
index 00000000..4a2c1f6f
--- /dev/null
+++ b/drivers/bus/vmbus/rte_bus_vmbus.h
@@ -0,0 +1,407 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018, Microsoft Corporation.
+ * All Rights Reserved.
+ */
+
+#ifndef _VMBUS_H_
+#define _VMBUS_H_
+
+/**
+ * @file
+ *
+ * VMBUS Interface
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_compat.h>
+#include <rte_uuid.h>
+#include <rte_debug.h>
+#include <rte_interrupts.h>
+#include <rte_dev.h>
+#include <rte_vmbus_reg.h>
+
+/* Forward declarations */
+struct rte_vmbus_device;
+struct rte_vmbus_driver;
+struct rte_vmbus_bus;
+struct vmbus_channel;
+struct vmbus_mon_page;
+
+TAILQ_HEAD(rte_vmbus_device_list, rte_vmbus_device);
+TAILQ_HEAD(rte_vmbus_driver_list, rte_vmbus_driver);
+
+/* VMBus iterators */
+#define FOREACH_DEVICE_ON_VMBUS(p) \
+ TAILQ_FOREACH(p, &(rte_vmbus_bus.device_list), next)
+
+#define FOREACH_DRIVER_ON_VMBUS(p) \
+ TAILQ_FOREACH(p, &(rte_vmbus_bus.driver_list), next)
+
+/** Maximum number of VMBUS resources. */
+enum hv_uio_map {
+ HV_TXRX_RING_MAP = 0,
+ HV_INT_PAGE_MAP,
+ HV_MON_PAGE_MAP,
+ HV_RECV_BUF_MAP,
+ HV_SEND_BUF_MAP
+};
+#define VMBUS_MAX_RESOURCE 5
+
+/**
+ * A structure describing a VMBUS device.
+ */
+struct rte_vmbus_device {
+ TAILQ_ENTRY(rte_vmbus_device) next; /**< Next probed VMBUS device */
+ const struct rte_vmbus_driver *driver; /**< Associated driver */
+ struct rte_device device; /**< Inherit core device */
+ rte_uuid_t device_id; /**< VMBUS device id */
+ rte_uuid_t class_id; /**< VMBUS device type */
+ uint32_t relid; /**< id for primary */
+ uint8_t monitor_id; /**< monitor page */
+ int uio_num; /**< UIO device number */
+ uint32_t *int_page; /**< VMBUS interrupt page */
+ struct vmbus_channel *primary; /**< VMBUS primary channel */
+ struct vmbus_mon_page *monitor_page; /**< VMBUS monitor page */
+
+ struct rte_intr_handle intr_handle; /**< Interrupt handle */
+ struct rte_mem_resource resource[VMBUS_MAX_RESOURCE];
+};
+
+/**
+ * Initialization function for the driver called during VMBUS probing.
+ */
+typedef int (vmbus_probe_t)(struct rte_vmbus_driver *,
+ struct rte_vmbus_device *);
+
+/**
+ * Initialization function for the driver called during hot plugging.
+ */
+typedef int (vmbus_remove_t)(struct rte_vmbus_device *);
+
+/**
+ * A structure describing a VMBUS driver.
+ */
+struct rte_vmbus_driver {
+ TAILQ_ENTRY(rte_vmbus_driver) next; /**< Next in list. */
+ struct rte_driver driver;
+ struct rte_vmbus_bus *bus; /**< VM bus reference. */
+ vmbus_probe_t *probe; /**< Device Probe function. */
+ vmbus_remove_t *remove; /**< Device Remove function. */
+
+ const rte_uuid_t *id_table; /**< ID table. */
+};
+
+
+/**
+ * Structure describing the VM bus
+ */
+struct rte_vmbus_bus {
+ struct rte_bus bus; /**< Inherit the generic class */
+ struct rte_vmbus_device_list device_list; /**< List of devices */
+ struct rte_vmbus_driver_list driver_list; /**< List of drivers */
+};
+
+/**
+ * Scan the content of the VMBUS bus, and the devices in the devices
+ * list
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_vmbus_scan(void);
+
+/**
+ * Probe the VMBUS bus
+ *
+ * @return
+ * - 0 on success.
+ * - !0 on error.
+ */
+int rte_vmbus_probe(void);
+
+/**
+ * Map the VMBUS device resources in user space virtual memory address
+ *
+ * @param dev
+ * A pointer to a rte_vmbus_device structure describing the device
+ * to use
+ *
+ * @return
+ * 0 on success, negative on error and positive if no driver
+ * is found for the device.
+ */
+int rte_vmbus_map_device(struct rte_vmbus_device *dev);
+
+/**
+ * Unmap this device
+ *
+ * @param dev
+ * A pointer to a rte_vmbus_device structure describing the device
+ * to use
+ */
+void rte_vmbus_unmap_device(struct rte_vmbus_device *dev);
+
+/**
+ * Get connection to primary VMBUS channel
+ *
+ * @param device
+ * A pointer to a rte_vmbus_device structure describing the device
+ * @param chan
+ * A pointer to a VMBUS channel pointer that will be filled.
+ * @return
+ * - 0 Success; channel opened.
+ * - -ENOMEM: Not enough memory available.
+ * - -EINVAL: Regions could not be mapped.
+ */
+int rte_vmbus_chan_open(struct rte_vmbus_device *device,
+ struct vmbus_channel **chan);
+
+/**
+ * Free connection to VMBUS channel
+ *
+ * @param chan
+ * VMBUS channel
+ */
+void rte_vmbus_chan_close(struct vmbus_channel *chan);
+
+/**
+ * Gets the maximum number of channels supported on device
+ *
+ * @param device
+ * A pointer to a rte_vmbus_device structure describing the device
+ * @return
+ * Number of channels available.
+ */
+int rte_vmbus_max_channels(const struct rte_vmbus_device *device);
+
+/**
+ * Get a connection to new secondary vmbus channel
+ *
+ * @param primary
+ * A pointer to primary VMBUS channel
+ * @param chan
+ * A pointer to a secondary VMBUS channel pointer that will be filled.
+ * @return
+ * - 0 Success; channel opened.
+ * - -ENOMEM: Not enough memory available.
+ * - -EINVAL: Regions could not be mapped.
+ */
+int rte_vmbus_subchan_open(struct vmbus_channel *primary,
+ struct vmbus_channel **new_chan);
+
+/**
+ * Disable IRQ for device
+ *
+ * @param device
+ * VMBUS device
+ */
+void rte_vmbus_irq_mask(struct rte_vmbus_device *device);
+
+/**
+ * Enable IRQ for device
+ *
+ * @param device
+ * VMBUS device
+ */
+void rte_vmbus_irq_unmask(struct rte_vmbus_device *device);
+
+/**
+ * Read (and wait) for IRQ
+ *
+ * @param device
+ * VMBUS device
+ */
+int rte_vmbus_irq_read(struct rte_vmbus_device *device);
+
+/**
+ * Test if channel is empty
+ *
+ * @param channel
+ * Pointer to vmbus_channel structure.
+ * @return
+ * Return true if no data present in incoming ring.
+ */
+bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel);
+
+/**
+ * Send the specified buffer on the given channel
+ *
+ * @param channel
+ * Pointer to vmbus_channel structure.
+ * @param type
+ * Type of packet that is being send e.g. negotiate, time
+ * packet etc.
+ * @param data
+ * Pointer to the buffer to send
+ * @param dlen
+ * Number of bytes of data to send
+ * @param xact
+ * Identifier of the request
+ * @param flags
+ * Message type inband, rxbuf, gpa
+ * @param need_sig
+ * Is host signal tx is required (optional)
+ *
+ * Sends data in buffer directly to hyper-v via the vmbus
+ */
+int rte_vmbus_chan_send(struct vmbus_channel *channel, uint16_t type,
+ void *data, uint32_t dlen,
+ uint64_t xact, uint32_t flags, bool *need_sig);
+
+/**
+ * Explicitly signal host that data is available
+ *
+ * @param
+ * Pointer to vmbus_channel structure.
+ *
+ * Used when batching multiple sends and only signaling host
+ * after the last send.
+ */
+void rte_vmbus_chan_signal_tx(const struct vmbus_channel *channel);
+
+/* Structure for scatter/gather I/O */
+struct iova_list {
+ rte_iova_t addr;
+ uint32_t len;
+};
+#define MAX_PAGE_BUFFER_COUNT 32
+
+/**
+ * Send a scattered buffer on the given channel
+ *
+ * @param channel
+ * Pointer to vmbus_channel structure.
+ * @param type
+ * Type of packet that is being send e.g. negotiate, time
+ * packet etc.
+ * @param gpa
+ * Array of buffers to send
+ * @param gpacnt
+ * Number of elements in iov
+ * @param data
+ * Pointer to the buffer additional data to send
+ * @param dlen
+ * Maximum size of what the the buffer will hold
+ * @param xact
+ * Identifier of the request
+ * @param flags
+ * Message type inband, rxbuf, gpa
+ * @param need_sig
+ * Is host signal tx is required (optional)
+ *
+ * Sends data in buffer directly to hyper-v via the vmbus
+ */
+int rte_vmbus_chan_send_sglist(struct vmbus_channel *channel,
+ struct vmbus_gpa gpa[], uint32_t gpacnt,
+ void *data, uint32_t dlen,
+ uint64_t xact, bool *need_sig);
+/**
+ * Receive response to request on the given channel
+ * skips the channel header.
+ *
+ * @param channel
+ * Pointer to vmbus_channel structure.
+ * @param data
+ * Pointer to the buffer you want to receive the data into.
+ * @param len
+ * Pointer to size of receive buffer (in/out)
+ * @param
+ * Pointer to received transaction_id
+ * @return
+ * On success, returns 0
+ * On failure, returns negative errno.
+ */
+int rte_vmbus_chan_recv(struct vmbus_channel *chan,
+ void *data, uint32_t *len,
+ uint64_t *request_id);
+
+/**
+ * Receive response to request on the given channel
+ * includes the channel header.
+ *
+ * @param channel
+ * Pointer to vmbus_channel structure.
+ * @param data
+ * Pointer to the buffer you want to receive the data into.
+ * @param len
+ * Pointer to size of receive buffer (in/out)
+ * @return
+ * On success, returns number of bytes read.
+ * On failure, returns negative errno.
+ */
+int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan,
+ void *data, uint32_t *len);
+
+/**
+ * Notify host of bytes read (after recv_raw)
+ * Signals host if required.
+ *
+ * @param channel
+ * Pointer to vmbus_channel structure.
+ * @param bytes_read
+ * Number of bytes read since last signal
+ */
+void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read);
+
+/**
+ * Determine sub channel index of the given channel
+ *
+ * @param channel
+ * Pointer to vmbus_channel structure.
+ * @return
+ * Sub channel index (0 for primary)
+ */
+uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan);
+
+/**
+ * Register a VMBUS driver.
+ *
+ * @param driver
+ * A pointer to a rte_vmbus_driver structure describing the driver
+ * to be registered.
+ */
+void rte_vmbus_register(struct rte_vmbus_driver *driver);
+
+/**
+ * For debug dump contents of ring buffer.
+ *
+ * @param channel
+ * Pointer to vmbus_channel structure.
+ */
+void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan);
+
+/**
+ * Unregister a VMBUS driver.
+ *
+ * @param driver
+ * A pointer to a rte_vmbus_driver structure describing the driver
+ * to be unregistered.
+ */
+void rte_vmbus_unregister(struct rte_vmbus_driver *driver);
+
+/** Helper for VMBUS device registration from driver instance */
+#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \
+ RTE_INIT(vmbusinitfn_ ##nm); \
+ static void vmbusinitfn_ ##nm(void) \
+ { \
+ (vmbus_drv).driver.name = RTE_STR(nm); \
+ rte_vmbus_register(&vmbus_drv); \
+ } \
+ RTE_PMD_EXPORT_NAME(nm, __COUNTER__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VMBUS_H_ */
diff --git a/drivers/bus/vmbus/rte_bus_vmbus_version.map b/drivers/bus/vmbus/rte_bus_vmbus_version.map
new file mode 100644
index 00000000..dabb9203
--- /dev/null
+++ b/drivers/bus/vmbus/rte_bus_vmbus_version.map
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+DPDK_18.08 {
+ global:
+
+ rte_vmbus_chan_close;
+ rte_vmbus_chan_open;
+ rte_vmbus_chan_recv;
+ rte_vmbus_chan_recv_raw;
+ rte_vmbus_chan_rx_empty;
+ rte_vmbus_chan_send;
+ rte_vmbus_chan_send_sglist;
+ rte_vmbus_chan_signal_read;
+ rte_vmbus_chan_signal_tx;
+ rte_vmbus_irq_mask;
+ rte_vmbus_irq_read;
+ rte_vmbus_irq_unmask;
+ rte_vmbus_map_device;
+ rte_vmbus_max_channels;
+ rte_vmbus_probe;
+ rte_vmbus_register;
+ rte_vmbus_scan;
+ rte_vmbus_sub_channel_index;
+ rte_vmbus_subchan_open;
+ rte_vmbus_unmap_device;
+ rte_vmbus_unregister;
+
+ local: *;
+};
diff --git a/drivers/bus/vmbus/rte_vmbus_reg.h b/drivers/bus/vmbus/rte_vmbus_reg.h
new file mode 100644
index 00000000..f5a0693d
--- /dev/null
+++ b/drivers/bus/vmbus/rte_vmbus_reg.h
@@ -0,0 +1,344 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018, Microsoft Corporation.
+ * All Rights Reserved.
+ */
+
+#ifndef _VMBUS_REG_H_
+#define _VMBUS_REG_H_
+
+/*
+ * Hyper-V SynIC message format.
+ */
+#define VMBUS_MSG_DSIZE_MAX 240
+#define VMBUS_MSG_SIZE 256
+
+struct vmbus_message {
+ uint32_t type; /* HYPERV_MSGTYPE_ */
+ uint8_t dsize; /* data size */
+ uint8_t flags; /* VMBUS_MSGFLAG_ */
+ uint16_t rsvd;
+ uint64_t id;
+ uint8_t data[VMBUS_MSG_DSIZE_MAX];
+} __rte_packed;
+
+#define VMBUS_MSGFLAG_PENDING 0x01
+
+/*
+ * Hyper-V Monitor Notification Facility
+ */
+
+struct vmbus_mon_trig {
+ uint32_t pending;
+ uint32_t armed;
+} __rte_packed;
+
+#define VMBUS_MONTRIGS_MAX 4
+#define VMBUS_MONTRIG_LEN 32
+
+/*
+ * Hyper-V Monitor Notification Facility
+ */
+struct hyperv_mon_param {
+ uint32_t connid;
+ uint16_t evtflag_ofs;
+ uint16_t rsvd;
+} __rte_packed;
+
+struct vmbus_mon_page {
+ uint32_t state;
+ uint32_t rsvd1;
+
+ struct vmbus_mon_trig trigs[VMBUS_MONTRIGS_MAX];
+ uint8_t rsvd2[536];
+
+ uint16_t lat[VMBUS_MONTRIGS_MAX][VMBUS_MONTRIG_LEN];
+ uint8_t rsvd3[256];
+
+ struct hyperv_mon_param
+ param[VMBUS_MONTRIGS_MAX][VMBUS_MONTRIG_LEN];
+ uint8_t rsvd4[1984];
+} __rte_packed;
+
+/*
+ * Buffer ring
+ */
+
+struct vmbus_bufring {
+ volatile uint32_t windex;
+ volatile uint32_t rindex;
+
+ /*
+ * Interrupt mask {0,1}
+ *
+ * For TX bufring, host set this to 1, when it is processing
+ * the TX bufring, so that we can safely skip the TX event
+ * notification to host.
+ *
+ * For RX bufring, once this is set to 1 by us, host will not
+ * further dispatch interrupts to us, even if there are data
+ * pending on the RX bufring. This effectively disables the
+ * interrupt of the channel to which this RX bufring is attached.
+ */
+ volatile uint32_t imask;
+
+ /*
+ * Win8 uses some of the reserved bits to implement
+ * interrupt driven flow management. On the send side
+ * we can request that the receiver interrupt the sender
+ * when the ring transitions from being full to being able
+ * to handle a message of size "pending_send_sz".
+ *
+ * Add necessary state for this enhancement.
+ */
+ volatile uint32_t pending_send;
+ uint32_t reserved1[12];
+
+ union {
+ struct {
+ uint32_t feat_pending_send_sz:1;
+ };
+ uint32_t value;
+ } feature_bits;
+
+ /* Pad it to PAGE_SIZE so that data starts on page boundary */
+ uint8_t reserved2[4028];
+
+ /*
+ * Ring data starts here + RingDataStartOffset
+ * !!! DO NOT place any fields below this !!!
+ */
+ uint8_t data[0];
+} __rte_packed;
+
+/*
+ * Channel packets
+ */
+
+/* Channel packet flags */
+#define VMBUS_CHANPKT_TYPE_INBAND 0x0006
+#define VMBUS_CHANPKT_TYPE_RXBUF 0x0007
+#define VMBUS_CHANPKT_TYPE_GPA 0x0009
+#define VMBUS_CHANPKT_TYPE_COMP 0x000b
+
+#define VMBUS_CHANPKT_FLAG_NONE 0
+#define VMBUS_CHANPKT_FLAG_RC 0x0001 /* report completion */
+
+#define VMBUS_CHANPKT_SIZE_SHIFT 3
+#define VMBUS_CHANPKT_SIZE_ALIGN (1 << VMBUS_CHANPKT_SIZE_SHIFT)
+#define VMBUS_CHANPKT_HLEN_MIN \
+ (sizeof(struct vmbus_chanpkt_hdr) >> VMBUS_CHANPKT_SIZE_SHIFT)
+
+static inline uint32_t
+vmbus_chanpkt_getlen(uint16_t pktlen)
+{
+ return (uint32_t)pktlen << VMBUS_CHANPKT_SIZE_SHIFT;
+}
+
+/*
+ * GPA stuffs.
+ */
+struct vmbus_gpa_range {
+ uint32_t len;
+ uint32_t ofs;
+ uint64_t page[0];
+} __rte_packed;
+
+/* This is actually vmbus_gpa_range.gpa_page[1] */
+struct vmbus_gpa {
+ uint32_t len;
+ uint32_t ofs;
+ uint64_t page;
+} __rte_packed;
+
+struct vmbus_chanpkt_hdr {
+ uint16_t type; /* VMBUS_CHANPKT_TYPE_ */
+ uint16_t hlen; /* header len, in 8 bytes */
+ uint16_t tlen; /* total len, in 8 bytes */
+ uint16_t flags; /* VMBUS_CHANPKT_FLAG_ */
+ uint64_t xactid;
+} __rte_packed;
+
+static inline uint32_t
+vmbus_chanpkt_datalen(const struct vmbus_chanpkt_hdr *pkt)
+{
+ return vmbus_chanpkt_getlen(pkt->tlen)
+ - vmbus_chanpkt_getlen(pkt->hlen);
+}
+
+struct vmbus_chanpkt {
+ struct vmbus_chanpkt_hdr hdr;
+} __rte_packed;
+
+struct vmbus_rxbuf_desc {
+ uint32_t len;
+ uint32_t ofs;
+} __rte_packed;
+
+struct vmbus_chanpkt_rxbuf {
+ struct vmbus_chanpkt_hdr hdr;
+ uint16_t rxbuf_id;
+ uint16_t rsvd;
+ uint32_t rxbuf_cnt;
+ struct vmbus_rxbuf_desc rxbuf[];
+} __rte_packed;
+
+struct vmbus_chanpkt_sglist {
+ struct vmbus_chanpkt_hdr hdr;
+ uint32_t rsvd;
+ uint32_t gpa_cnt;
+ struct vmbus_gpa gpa[];
+} __rte_packed;
+
+/*
+ * Channel messages
+ * - Embedded in vmbus_message.msg_data, e.g. response and notification.
+ * - Embedded in hypercall_postmsg_in.hc_data, e.g. request.
+ */
+
+#define VMBUS_CHANMSG_TYPE_CHOFFER 1 /* NOTE */
+#define VMBUS_CHANMSG_TYPE_CHRESCIND 2 /* NOTE */
+#define VMBUS_CHANMSG_TYPE_CHREQUEST 3 /* REQ */
+#define VMBUS_CHANMSG_TYPE_CHOFFER_DONE 4 /* NOTE */
+#define VMBUS_CHANMSG_TYPE_CHOPEN 5 /* REQ */
+#define VMBUS_CHANMSG_TYPE_CHOPEN_RESP 6 /* RESP */
+#define VMBUS_CHANMSG_TYPE_CHCLOSE 7 /* REQ */
+#define VMBUS_CHANMSG_TYPE_GPADL_CONN 8 /* REQ */
+#define VMBUS_CHANMSG_TYPE_GPADL_SUBCONN 9 /* REQ */
+#define VMBUS_CHANMSG_TYPE_GPADL_CONNRESP 10 /* RESP */
+#define VMBUS_CHANMSG_TYPE_GPADL_DISCONN 11 /* REQ */
+#define VMBUS_CHANMSG_TYPE_GPADL_DISCONNRESP 12 /* RESP */
+#define VMBUS_CHANMSG_TYPE_CHFREE 13 /* REQ */
+#define VMBUS_CHANMSG_TYPE_CONNECT 14 /* REQ */
+#define VMBUS_CHANMSG_TYPE_CONNECT_RESP 15 /* RESP */
+#define VMBUS_CHANMSG_TYPE_DISCONNECT 16 /* REQ */
+#define VMBUS_CHANMSG_TYPE_MAX 22
+
+struct vmbus_chanmsg_hdr {
+ uint32_t type; /* VMBUS_CHANMSG_TYPE_ */
+ uint32_t rsvd;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_CONNECT */
+struct vmbus_chanmsg_connect {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t ver;
+ uint32_t rsvd;
+ uint64_t evtflags;
+ uint64_t mnf1;
+ uint64_t mnf2;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_CONNECT_RESP */
+struct vmbus_chanmsg_connect_resp {
+ struct vmbus_chanmsg_hdr hdr;
+ uint8_t done;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_CHREQUEST */
+struct vmbus_chanmsg_chrequest {
+ struct vmbus_chanmsg_hdr hdr;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_DISCONNECT */
+struct vmbus_chanmsg_disconnect {
+ struct vmbus_chanmsg_hdr hdr;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_CHOPEN */
+struct vmbus_chanmsg_chopen {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t chanid;
+ uint32_t openid;
+ uint32_t gpadl;
+ uint32_t vcpuid;
+ uint32_t txbr_pgcnt;
+#define VMBUS_CHANMSG_CHOPEN_UDATA_SIZE 120
+ uint8_t udata[VMBUS_CHANMSG_CHOPEN_UDATA_SIZE];
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_CHOPEN_RESP */
+struct vmbus_chanmsg_chopen_resp {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t chanid;
+ uint32_t openid;
+ uint32_t status;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_GPADL_CONN */
+struct vmbus_chanmsg_gpadl_conn {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t chanid;
+ uint32_t gpadl;
+ uint16_t range_len;
+ uint16_t range_cnt;
+ struct vmbus_gpa_range range;
+} __rte_packed;
+
+#define VMBUS_CHANMSG_GPADL_CONN_PGMAX 26
+
+/* VMBUS_CHANMSG_TYPE_GPADL_SUBCONN */
+struct vmbus_chanmsg_gpadl_subconn {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t msgno;
+ uint32_t gpadl;
+ uint64_t gpa_page[];
+} __rte_packed;
+
+#define VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX 28
+
+/* VMBUS_CHANMSG_TYPE_GPADL_CONNRESP */
+struct vmbus_chanmsg_gpadl_connresp {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t chanid;
+ uint32_t gpadl;
+ uint32_t status;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_CHCLOSE */
+struct vmbus_chanmsg_chclose {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t chanid;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_GPADL_DISCONN */
+struct vmbus_chanmsg_gpadl_disconn {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t chanid;
+ uint32_t gpadl;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_CHFREE */
+struct vmbus_chanmsg_chfree {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t chanid;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_CHRESCIND */
+struct vmbus_chanmsg_chrescind {
+ struct vmbus_chanmsg_hdr hdr;
+ uint32_t chanid;
+} __rte_packed;
+
+/* VMBUS_CHANMSG_TYPE_CHOFFER */
+struct vmbus_chanmsg_choffer {
+ struct vmbus_chanmsg_hdr hdr;
+ rte_uuid_t chtype;
+ rte_uuid_t chinst;
+ uint64_t chlat; /* unit: 100ns */
+ uint32_t chrev;
+ uint32_t svrctx_sz;
+ uint16_t chflags;
+ uint16_t mmio_sz; /* unit: MB */
+ uint8_t udata[120];
+ uint16_t subidx;
+ uint16_t rsvd;
+ uint32_t chanid;
+ uint8_t montrig;
+ uint8_t flags1; /* VMBUS_CHOFFER_FLAG1_ */
+ uint16_t flags2;
+ uint32_t connid;
+} __rte_packed;
+
+#define VMBUS_CHOFFER_FLAG1_HASMNF 0x01
+
+#endif /* !_VMBUS_REG_H_ */
diff --git a/drivers/bus/vmbus/vmbus_bufring.c b/drivers/bus/vmbus/vmbus_bufring.c
new file mode 100644
index 00000000..c8800160
--- /dev/null
+++ b/drivers/bus/vmbus/vmbus_bufring.c
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ */
+
+#include <unistd.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/uio.h>
+
+#include <rte_eal.h>
+#include <rte_tailq.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_bus.h>
+#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_pause.h>
+#include <rte_bus_vmbus.h>
+
+#include "private.h"
+
+/* Increase bufring index by inc with wraparound */
+static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz)
+{
+ idx += inc;
+ if (idx >= sz)
+ idx -= sz;
+
+ return idx;
+}
+
+void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen)
+{
+ br->vbr = buf;
+ br->windex = br->vbr->windex;
+ br->dsize = blen - sizeof(struct vmbus_bufring);
+}
+
+/*
+ * When we write to the ring buffer, check if the host needs to be
+ * signaled.
+ *
+ * The contract:
+ * - The host guarantees that while it is draining the TX bufring,
+ * it will set the br_imask to indicate it does not need to be
+ * interrupted when new data are added.
+ * - The host guarantees that it will completely drain the TX bufring
+ * before exiting the read loop. Further, once the TX bufring is
+ * empty, it will clear the br_imask and re-check to see if new
+ * data have arrived.
+ */
+static inline bool
+vmbus_txbr_need_signal(const struct vmbus_br *tbr, uint32_t old_windex)
+{
+ rte_smp_mb();
+ if (tbr->vbr->imask)
+ return false;
+
+ rte_smp_rmb();
+
+ /*
+ * This is the only case we need to signal when the
+ * ring transitions from being empty to non-empty.
+ */
+ return old_windex == tbr->vbr->rindex;
+}
+
+static inline uint32_t
+vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex,
+ const void *src0, uint32_t cplen)
+{
+ uint8_t *br_data = tbr->vbr->data;
+ uint32_t br_dsize = tbr->dsize;
+ const uint8_t *src = src0;
+
+ /* XXX use double mapping like Linux kernel? */
+ if (cplen > br_dsize - windex) {
+ uint32_t fraglen = br_dsize - windex;
+
+ /* Wrap-around detected */
+ memcpy(br_data + windex, src, fraglen);
+ memcpy(br_data, src + fraglen, cplen - fraglen);
+ } else {
+ memcpy(br_data + windex, src, cplen);
+ }
+
+ return vmbus_br_idxinc(windex, cplen, br_dsize);
+}
+
+/*
+ * Write scattered channel packet to TX bufring.
+ *
+ * The offset of this channel packet is written as a 64bits value
+ * immediately after this channel packet.
+ *
+ * The write goes through three stages:
+ * 1. Reserve space in ring buffer for the new data.
+ * Writer atomically moves priv_write_index.
+ * 2. Copy the new data into the ring.
+ * 3. Update the tail of the ring (visible to host) that indicates
+ * next read location. Writer updates write_index
+ */
+int
+vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen,
+ bool *need_sig)
+{
+ struct vmbus_bufring *vbr = tbr->vbr;
+ uint32_t ring_size = tbr->dsize;
+ uint32_t old_windex, next_windex, windex, total;
+ uint64_t save_windex;
+ int i;
+
+ total = 0;
+ for (i = 0; i < iovlen; i++)
+ total += iov[i].iov_len;
+ total += sizeof(save_windex);
+
+ /* Reserve space in ring */
+ do {
+ uint32_t avail;
+
+ /* Get current free location */
+ old_windex = tbr->windex;
+
+ /* Prevent compiler reordering this with calculation */
+ rte_compiler_barrier();
+
+ avail = vmbus_br_availwrite(tbr, old_windex);
+
+ /* If not enough space in ring, then tell caller. */
+ if (avail <= total)
+ return -EAGAIN;
+
+ next_windex = vmbus_br_idxinc(old_windex, total, ring_size);
+
+ /* Atomic update of next write_index for other threads */
+ } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex));
+
+ /* Space from old..new is now reserved */
+ windex = old_windex;
+ for (i = 0; i < iovlen; i++) {
+ windex = vmbus_txbr_copyto(tbr, windex,
+ iov[i].iov_base, iov[i].iov_len);
+ }
+
+ /* Set the offset of the current channel packet. */
+ save_windex = ((uint64_t)old_windex) << 32;
+ windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
+ sizeof(save_windex));
+
+ /* The region reserved should match region used */
+ RTE_ASSERT(windex == next_windex);
+
+ /* Ensure that data is available before updating host index */
+ rte_smp_wmb();
+
+ /* Checkin for our reservation. wait for our turn to update host */
+ while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex))
+ rte_pause();
+
+ /* If host had read all data before this, then need to signal */
+ *need_sig |= vmbus_txbr_need_signal(tbr, old_windex);
+ return 0;
+}
+
+static inline uint32_t
+vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex,
+ void *dst0, size_t cplen)
+{
+ const uint8_t *br_data = rbr->vbr->data;
+ uint32_t br_dsize = rbr->dsize;
+ uint8_t *dst = dst0;
+
+ if (cplen > br_dsize - rindex) {
+ uint32_t fraglen = br_dsize - rindex;
+
+ /* Wrap-around detected. */
+ memcpy(dst, br_data + rindex, fraglen);
+ memcpy(dst + fraglen, br_data, cplen - fraglen);
+ } else {
+ memcpy(dst, br_data + rindex, cplen);
+ }
+
+ return vmbus_br_idxinc(rindex, cplen, br_dsize);
+}
+
+/* Copy data from receive ring but don't change index */
+int
+vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen)
+{
+ uint32_t avail;
+
+ /*
+ * The requested data and the 64bits channel packet
+ * offset should be there at least.
+ */
+ avail = vmbus_br_availread(rbr);
+ if (avail < dlen + sizeof(uint64_t))
+ return -EAGAIN;
+
+ vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen);
+ return 0;
+}
+
+/*
+ * Copy data from receive ring and change index
+ * NOTE:
+ * We assume (dlen + skip) == sizeof(channel packet).
+ */
+int
+vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip)
+{
+ struct vmbus_bufring *vbr = rbr->vbr;
+ uint32_t br_dsize = rbr->dsize;
+ uint32_t rindex;
+
+ if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t))
+ return -EAGAIN;
+
+ /* Record where host was when we started read (for debug) */
+ rbr->windex = rbr->vbr->windex;
+
+ /*
+ * Copy channel packet from RX bufring.
+ */
+ rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize);
+ rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
+
+ /*
+ * Discard this channel packet's 64bits offset, which is useless to us.
+ */
+ rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize);
+
+ /* Update the read index _after_ the channel packet is fetched. */
+ rte_compiler_barrier();
+
+ vbr->rindex = rindex;
+
+ return 0;
+}
diff --git a/drivers/bus/vmbus/vmbus_channel.c b/drivers/bus/vmbus/vmbus_channel.c
new file mode 100644
index 00000000..cc5f3e83
--- /dev/null
+++ b/drivers/bus/vmbus/vmbus_channel.c
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018, Microsoft Corporation.
+ * All Rights Reserved.
+ */
+
+#include <unistd.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/uio.h>
+
+#include <rte_eal.h>
+#include <rte_tailq.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_bus.h>
+#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_bus_vmbus.h>
+
+#include "private.h"
+
+static inline void
+vmbus_sync_set_bit(volatile uint32_t *addr, uint32_t mask)
+{
+ /* Use GCC builtin which atomic does atomic OR operation */
+ __sync_or_and_fetch(addr, mask);
+}
+
+static inline void
+vmbus_send_interrupt(const struct rte_vmbus_device *dev, uint32_t relid)
+{
+ uint32_t *int_addr;
+ uint32_t int_mask;
+
+ int_addr = dev->int_page + relid / 32;
+ int_mask = 1u << (relid % 32);
+
+ vmbus_sync_set_bit(int_addr, int_mask);
+}
+
+static inline void
+vmbus_set_monitor(const struct rte_vmbus_device *dev, uint32_t monitor_id)
+{
+ uint32_t *monitor_addr, monitor_mask;
+ unsigned int trigger_index;
+
+ trigger_index = monitor_id / HV_MON_TRIG_LEN;
+ monitor_mask = 1u << (monitor_id % HV_MON_TRIG_LEN);
+
+ monitor_addr = &dev->monitor_page->trigs[trigger_index].pending;
+ vmbus_sync_set_bit(monitor_addr, monitor_mask);
+}
+
+static void
+vmbus_set_event(const struct rte_vmbus_device *dev,
+ const struct vmbus_channel *chan)
+{
+ vmbus_send_interrupt(dev, chan->relid);
+ vmbus_set_monitor(dev, chan->monitor_id);
+}
+
+/*
+ * Notify host that there are data pending on our TX bufring.
+ *
+ * Since this in userspace, rely on the monitor page.
+ * Can't do a hypercall from userspace.
+ */
+void
+rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan)
+{
+ const struct rte_vmbus_device *dev = chan->device;
+ const struct vmbus_br *tbr = &chan->txbr;
+
+ /* Make sure all updates are done before signaling host */
+ rte_smp_wmb();
+
+ /* If host is ignoring interrupts? */
+ if (tbr->vbr->imask)
+ return;
+
+ vmbus_set_event(dev, chan);
+}
+
+
+/* Do a simple send directly using transmit ring. */
+int rte_vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
+ void *data, uint32_t dlen,
+ uint64_t xactid, uint32_t flags, bool *need_sig)
+{
+ struct vmbus_chanpkt pkt;
+ unsigned int pktlen, pad_pktlen;
+ const uint32_t hlen = sizeof(pkt);
+ bool send_evt = false;
+ uint64_t pad = 0;
+ struct iovec iov[3];
+ int error;
+
+ pktlen = hlen + dlen;
+ pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
+
+ pkt.hdr.type = type;
+ pkt.hdr.flags = flags;
+ pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
+ pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
+ pkt.hdr.xactid = xactid;
+
+ iov[0].iov_base = &pkt;
+ iov[0].iov_len = hlen;
+ iov[1].iov_base = data;
+ iov[1].iov_len = dlen;
+ iov[2].iov_base = &pad;
+ iov[2].iov_len = pad_pktlen - pktlen;
+
+ error = vmbus_txbr_write(&chan->txbr, iov, 3, &send_evt);
+
+ /*
+ * caller sets need_sig to non-NULL if it will handle
+ * signaling if required later.
+ * if need_sig is NULL, signal now if needed.
+ */
+ if (need_sig)
+ *need_sig |= send_evt;
+ else if (error == 0 && send_evt)
+ rte_vmbus_chan_signal_tx(chan);
+ return error;
+}
+
+/* Do a scatter/gather send where the descriptor points to data. */
+int rte_vmbus_chan_send_sglist(struct vmbus_channel *chan,
+ struct vmbus_gpa sg[], uint32_t sglen,
+ void *data, uint32_t dlen,
+ uint64_t xactid, bool *need_sig)
+{
+ struct vmbus_chanpkt_sglist pkt;
+ unsigned int pktlen, pad_pktlen, hlen;
+ bool send_evt = false;
+ struct iovec iov[4];
+ uint64_t pad = 0;
+ int error;
+
+ hlen = offsetof(struct vmbus_chanpkt_sglist, gpa[sglen]);
+ pktlen = hlen + dlen;
+ pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
+
+ pkt.hdr.type = VMBUS_CHANPKT_TYPE_GPA;
+ pkt.hdr.flags = VMBUS_CHANPKT_FLAG_RC;
+ pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
+ pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
+ pkt.hdr.xactid = xactid;
+ pkt.rsvd = 0;
+ pkt.gpa_cnt = sglen;
+
+ iov[0].iov_base = &pkt;
+ iov[0].iov_len = sizeof(pkt);
+ iov[1].iov_base = sg;
+ iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
+ iov[2].iov_base = data;
+ iov[2].iov_len = dlen;
+ iov[3].iov_base = &pad;
+ iov[3].iov_len = pad_pktlen - pktlen;
+
+ error = vmbus_txbr_write(&chan->txbr, iov, 4, &send_evt);
+
+ /* if caller is batching, just propagate the status */
+ if (need_sig)
+ *need_sig |= send_evt;
+ else if (error == 0 && send_evt)
+ rte_vmbus_chan_signal_tx(chan);
+ return error;
+}
+
+bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel)
+{
+ const struct vmbus_br *br = &channel->rxbr;
+
+ return br->vbr->rindex == br->vbr->windex;
+}
+
+/* Signal host after reading N bytes */
+void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read)
+{
+ struct vmbus_br *rbr = &chan->rxbr;
+ uint32_t write_sz, pending_sz;
+
+ /* No need for signaling on older versions */
+ if (!rbr->vbr->feature_bits.feat_pending_send_sz)
+ return;
+
+ /* Make sure reading of pending happens after new read index */
+ rte_mb();
+
+ pending_sz = rbr->vbr->pending_send;
+ if (!pending_sz)
+ return;
+
+ rte_smp_rmb();
+ write_sz = vmbus_br_availwrite(rbr, rbr->vbr->windex);
+
+ /* If there was space before then host was not blocked */
+ if (write_sz - bytes_read > pending_sz)
+ return;
+
+ /* If pending write will not fit */
+ if (write_sz <= pending_sz)
+ return;
+
+ vmbus_set_event(chan->device, chan);
+}
+
+int rte_vmbus_chan_recv(struct vmbus_channel *chan, void *data, uint32_t *len,
+ uint64_t *request_id)
+{
+ struct vmbus_chanpkt_hdr pkt;
+ uint32_t dlen, hlen, bufferlen = *len;
+ int error;
+
+ *len = 0;
+
+ error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
+ if (error)
+ return error;
+
+ if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
+ VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
+ /* XXX this channel is dead actually. */
+ return -EIO;
+ }
+
+ if (unlikely(pkt.hlen > pkt.tlen)) {
+ VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
+ pkt.hlen, pkt.tlen);
+ return -EIO;
+ }
+
+ /* Length are in quad words */
+ hlen = pkt.hlen << VMBUS_CHANPKT_SIZE_SHIFT;
+ dlen = (pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT) - hlen;
+ *len = dlen;
+
+ /* If caller buffer is not large enough */
+ if (unlikely(dlen > bufferlen))
+ return -ENOBUFS;
+
+ if (request_id)
+ *request_id = pkt.xactid;
+
+ /* Read data and skip packet header */
+ error = vmbus_rxbr_read(&chan->rxbr, data, dlen, hlen);
+ if (error)
+ return error;
+
+ rte_vmbus_chan_signal_read(chan, dlen + hlen + sizeof(uint64_t));
+ return 0;
+}
+
+/* TODO: replace this with inplace ring buffer (no copy) */
+int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan,
+ void *data, uint32_t *len)
+{
+ struct vmbus_chanpkt_hdr pkt;
+ uint32_t dlen, bufferlen = *len;
+ int error;
+
+ error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
+ if (error)
+ return error;
+
+ if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
+ VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
+ /* XXX this channel is dead actually. */
+ return -EIO;
+ }
+
+ if (unlikely(pkt.hlen > pkt.tlen)) {
+ VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
+ pkt.hlen, pkt.tlen);
+ return -EIO;
+ }
+
+ /* Length are in quad words */
+ dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
+ *len = dlen;
+
+ /* If caller buffer is not large enough */
+ if (unlikely(dlen > bufferlen))
+ return -ENOBUFS;
+
+ /* Read data and skip packet header */
+ error = vmbus_rxbr_read(&chan->rxbr, data, dlen, 0);
+ if (error)
+ return error;
+
+ /* Return the number of bytes read */
+ return dlen + sizeof(uint64_t);
+}
+
+int vmbus_chan_create(const struct rte_vmbus_device *device,
+ uint16_t relid, uint16_t subid, uint8_t monitor_id,
+ struct vmbus_channel **new_chan)
+{
+ struct vmbus_channel *chan;
+ int err;
+
+ chan = rte_zmalloc_socket("VMBUS", sizeof(*chan), RTE_CACHE_LINE_SIZE,
+ device->device.numa_node);
+ if (!chan)
+ return -ENOMEM;
+
+ STAILQ_INIT(&chan->subchannel_list);
+ chan->device = device;
+ chan->subchannel_id = subid;
+ chan->relid = relid;
+ chan->monitor_id = monitor_id;
+ *new_chan = chan;
+
+ err = vmbus_uio_map_rings(chan);
+ if (err) {
+ rte_free(chan);
+ return err;
+ }
+
+ return 0;
+}
+
+/* Setup the primary channel */
+int rte_vmbus_chan_open(struct rte_vmbus_device *device,
+ struct vmbus_channel **new_chan)
+{
+ int err;
+
+ err = vmbus_chan_create(device, device->relid, 0,
+ device->monitor_id, new_chan);
+ if (!err)
+ device->primary = *new_chan;
+
+ return err;
+}
+
+int rte_vmbus_max_channels(const struct rte_vmbus_device *device)
+{
+ if (vmbus_uio_subchannels_supported(device, device->primary))
+ return VMBUS_MAX_CHANNELS;
+ else
+ return 1;
+}
+
+/* Setup secondary channel */
+int rte_vmbus_subchan_open(struct vmbus_channel *primary,
+ struct vmbus_channel **new_chan)
+{
+ struct vmbus_channel *chan;
+ int err;
+
+ err = vmbus_uio_get_subchan(primary, &chan);
+ if (err)
+ return err;
+
+ STAILQ_INSERT_TAIL(&primary->subchannel_list, chan, next);
+ *new_chan = chan;
+ return 0;
+}
+
+uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan)
+{
+ return chan->subchannel_id;
+}
+
+void rte_vmbus_chan_close(struct vmbus_channel *chan)
+{
+ const struct rte_vmbus_device *device = chan->device;
+ struct vmbus_channel *primary = device->primary;
+
+ if (chan != primary)
+ STAILQ_REMOVE(&primary->subchannel_list, chan,
+ vmbus_channel, next);
+
+ rte_free(chan);
+}
+
+static void vmbus_dump_ring(FILE *f, const char *id, const struct vmbus_br *br)
+{
+ const struct vmbus_bufring *vbr = br->vbr;
+ struct vmbus_chanpkt_hdr pkt;
+
+ fprintf(f, "%s windex=%u rindex=%u mask=%u pending=%u feature=%#x\n",
+ id, vbr->windex, vbr->rindex, vbr->imask,
+ vbr->pending_send, vbr->feature_bits.value);
+ fprintf(f, " size=%u avail write=%u read=%u\n",
+ br->dsize, vmbus_br_availwrite(br, vbr->windex),
+ vmbus_br_availread(br));
+
+ if (vmbus_rxbr_peek(br, &pkt, sizeof(pkt)) == 0)
+ fprintf(f, " pkt type %#x len %u flags %#x xactid %#"PRIx64"\n",
+ pkt.type,
+ pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT,
+ pkt.flags, pkt.xactid);
+}
+
+void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan)
+{
+ fprintf(f, "channel[%u] relid=%u monitor=%u\n",
+ chan->subchannel_id, chan->relid, chan->monitor_id);
+ vmbus_dump_ring(f, "rxbr", &chan->rxbr);
+ vmbus_dump_ring(f, "txbr", &chan->txbr);
+}
diff --git a/drivers/bus/vmbus/vmbus_common.c b/drivers/bus/vmbus/vmbus_common.c
new file mode 100644
index 00000000..c7165ad5
--- /dev/null
+++ b/drivers/bus/vmbus/vmbus_common.c
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018, Microsoft Corporation.
+ * All Rights Reserved.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+
+#include <rte_log.h>
+#include <rte_bus.h>
+#include <rte_eal.h>
+#include <rte_tailq.h>
+#include <rte_devargs.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_bus_vmbus.h>
+
+#include "private.h"
+
+int vmbus_logtype_bus;
+extern struct rte_vmbus_bus rte_vmbus_bus;
+
+/* map a particular resource from a file */
+void *
+vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
+ int flags)
+{
+ void *mapaddr;
+
+ /* Map the memory resource of device */
+ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | flags, fd, offset);
+ if (mapaddr == MAP_FAILED) {
+ VMBUS_LOG(ERR,
+ "mmap(%d, %p, %zu, %ld) failed: %s",
+ fd, requested_addr, size, (long)offset,
+ strerror(errno));
+ }
+ return mapaddr;
+}
+
+/* unmap a particular resource */
+void
+vmbus_unmap_resource(void *requested_addr, size_t size)
+{
+ if (requested_addr == NULL)
+ return;
+
+ /* Unmap the VMBUS memory resource of device */
+ if (munmap(requested_addr, size)) {
+ VMBUS_LOG(ERR, "munmap(%p, 0x%lx) failed: %s",
+ requested_addr, (unsigned long)size,
+ strerror(errno));
+ } else
+ VMBUS_LOG(DEBUG, " VMBUS memory unmapped at %p",
+ requested_addr);
+}
+
+/**
+ * Match the VMBUS driver and device using UUID table
+ *
+ * @param drv
+ * VMBUS driver from which ID table would be extracted
+ * @param pci_dev
+ * VMBUS device to match against the driver
+ * @return
+ * true for successful match
+ * false for unsuccessful match
+ */
+static bool
+vmbus_match(const struct rte_vmbus_driver *dr,
+ const struct rte_vmbus_device *dev)
+{
+ const rte_uuid_t *id_table;
+
+ for (id_table = dr->id_table; !rte_uuid_is_null(*id_table); ++id_table) {
+ if (rte_uuid_compare(*id_table, dev->class_id) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * If device ID match, call the devinit() function of the driver.
+ */
+static int
+vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+ struct rte_vmbus_device *dev)
+{
+ char guid[RTE_UUID_STRLEN];
+ int ret;
+
+ if (!vmbus_match(dr, dev))
+ return 1; /* not supported */
+
+ rte_uuid_unparse(dev->device_id, guid, sizeof(guid));
+ VMBUS_LOG(INFO, "VMBUS device %s on NUMA socket %i",
+ guid, dev->device.numa_node);
+
+ /* TODO add blacklisted */
+
+ /* map resources for device */
+ ret = rte_vmbus_map_device(dev);
+ if (ret != 0)
+ return ret;
+
+ /* reference driver structure */
+ dev->driver = dr;
+ dev->device.driver = &dr->driver;
+
+ if (dev->device.numa_node < 0) {
+ VMBUS_LOG(WARNING, " Invalid NUMA socket, default to 0");
+ dev->device.numa_node = 0;
+ }
+
+ /* call the driver probe() function */
+ VMBUS_LOG(INFO, " probe driver: %s", dr->driver.name);
+ ret = dr->probe(dr, dev);
+ if (ret) {
+ dev->driver = NULL;
+ rte_vmbus_unmap_device(dev);
+ }
+
+ return ret;
+}
+
+/*
+ * IF device class GUID mathces, call the probe function of
+ * registere drivers for the vmbus device.
+ * Return -1 if initialization failed,
+ * and 1 if no driver found for this device.
+ */
+static int
+vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
+{
+ struct rte_vmbus_driver *dr;
+ int rc;
+
+ /* Check if a driver is already loaded */
+ if (dev->driver != NULL) {
+ VMBUS_LOG(DEBUG, "VMBUS driver already loaded");
+ return 0;
+ }
+
+ FOREACH_DRIVER_ON_VMBUS(dr) {
+ rc = vmbus_probe_one_driver(dr, dev);
+ if (rc < 0) /* negative is an error */
+ return -1;
+
+ if (rc > 0) /* positive driver doesn't support it */
+ continue;
+
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Scan the vmbus, and call the devinit() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ */
+int
+rte_vmbus_probe(void)
+{
+ struct rte_vmbus_device *dev;
+ size_t probed = 0, failed = 0;
+ char ubuf[RTE_UUID_STRLEN];
+
+ FOREACH_DEVICE_ON_VMBUS(dev) {
+ probed++;
+
+ rte_uuid_unparse(dev->device_id, ubuf, sizeof(ubuf));
+
+ /* TODO: add whitelist/blacklist */
+
+ if (vmbus_probe_all_drivers(dev) < 0) {
+ VMBUS_LOG(NOTICE,
+ "Requested device %s cannot be used", ubuf);
+ rte_errno = errno;
+ failed++;
+ }
+ }
+
+ return (probed && probed == failed) ? -1 : 0;
+}
+
+static int
+vmbus_parse(const char *name, void *addr)
+{
+ rte_uuid_t guid;
+ int ret;
+
+ ret = rte_uuid_parse(name, guid);
+ if (ret == 0 && addr)
+ memcpy(addr, &guid, sizeof(guid));
+
+ return ret;
+}
+
+/* register vmbus driver */
+void
+rte_vmbus_register(struct rte_vmbus_driver *driver)
+{
+ VMBUS_LOG(DEBUG,
+ "Registered driver %s", driver->driver.name);
+
+ TAILQ_INSERT_TAIL(&rte_vmbus_bus.driver_list, driver, next);
+ driver->bus = &rte_vmbus_bus;
+}
+
+/* unregister vmbus driver */
+void
+rte_vmbus_unregister(struct rte_vmbus_driver *driver)
+{
+ TAILQ_REMOVE(&rte_vmbus_bus.driver_list, driver, next);
+ driver->bus = NULL;
+}
+
+/* Add a device to VMBUS bus */
+void
+vmbus_add_device(struct rte_vmbus_device *vmbus_dev)
+{
+ TAILQ_INSERT_TAIL(&rte_vmbus_bus.device_list, vmbus_dev, next);
+}
+
+/* Insert a device into a predefined position in VMBUS bus */
+void
+vmbus_insert_device(struct rte_vmbus_device *exist_vmbus_dev,
+ struct rte_vmbus_device *new_vmbus_dev)
+{
+ TAILQ_INSERT_BEFORE(exist_vmbus_dev, new_vmbus_dev, next);
+}
+
+/* Remove a device from VMBUS bus */
+void
+vmbus_remove_device(struct rte_vmbus_device *vmbus_dev)
+{
+ TAILQ_REMOVE(&rte_vmbus_bus.device_list, vmbus_dev, next);
+}
+
+/* VMBUS doesn't support hotplug */
+static struct rte_device *
+vmbus_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
+ const void *data)
+{
+ struct rte_vmbus_device *dev;
+
+ FOREACH_DEVICE_ON_VMBUS(dev) {
+ if (start && &dev->device == start) {
+ start = NULL;
+ continue;
+ }
+ if (cmp(&dev->device, data) == 0)
+ return &dev->device;
+ }
+
+ return NULL;
+}
+
+
+struct rte_vmbus_bus rte_vmbus_bus = {
+ .bus = {
+ .scan = rte_vmbus_scan,
+ .probe = rte_vmbus_probe,
+ .find_device = vmbus_find_device,
+ .parse = vmbus_parse,
+ },
+ .device_list = TAILQ_HEAD_INITIALIZER(rte_vmbus_bus.device_list),
+ .driver_list = TAILQ_HEAD_INITIALIZER(rte_vmbus_bus.driver_list),
+};
+
+RTE_REGISTER_BUS(vmbus, rte_vmbus_bus.bus);
+
+RTE_INIT(vmbus_init_log)
+{
+ vmbus_logtype_bus = rte_log_register("bus.vmbus");
+ if (vmbus_logtype_bus >= 0)
+ rte_log_set_level(vmbus_logtype_bus, RTE_LOG_NOTICE);
+}
diff --git a/drivers/bus/vmbus/vmbus_common_uio.c b/drivers/bus/vmbus/vmbus_common_uio.c
new file mode 100644
index 00000000..5ddd36ab
--- /dev/null
+++ b/drivers/bus/vmbus/vmbus_common_uio.c
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018, Microsoft Corporation.
+ * All Rights Reserved.
+ */
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+#include <rte_tailq.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_bus.h>
+#include <rte_bus_vmbus.h>
+
+#include "private.h"
+
+static struct rte_tailq_elem vmbus_tailq = {
+ .name = "VMBUS_RESOURCE_LIST",
+};
+EAL_REGISTER_TAILQ(vmbus_tailq)
+
+static int
+vmbus_uio_map_secondary(struct rte_vmbus_device *dev)
+{
+ int fd, i;
+ struct mapped_vmbus_resource *uio_res;
+ struct mapped_vmbus_res_list *uio_res_list
+ = RTE_TAILQ_CAST(vmbus_tailq.head, mapped_vmbus_res_list);
+
+ TAILQ_FOREACH(uio_res, uio_res_list, next) {
+
+ /* skip this element if it doesn't match our UUID */
+ if (rte_uuid_compare(uio_res->id, dev->device_id) != 0)
+ continue;
+
+ /* open /dev/uioX */
+ fd = open(uio_res->path, O_RDWR);
+ if (fd < 0) {
+ VMBUS_LOG(ERR, "Cannot open %s: %s",
+ uio_res->path, strerror(errno));
+ return -1;
+ }
+
+ for (i = 0; i != uio_res->nb_maps; i++) {
+ void *mapaddr;
+
+ mapaddr = vmbus_map_resource(uio_res->maps[i].addr,
+ fd, 0,
+ uio_res->maps[i].size, 0);
+
+ if (mapaddr == uio_res->maps[i].addr)
+ continue;
+
+ VMBUS_LOG(ERR,
+ "Cannot mmap device resource file %s to address: %p",
+ uio_res->path, uio_res->maps[i].addr);
+
+ if (mapaddr != MAP_FAILED)
+ /* unmap addr wrongly mapped */
+ vmbus_unmap_resource(mapaddr,
+ (size_t)uio_res->maps[i].size);
+
+ /* unmap addrs correctly mapped */
+ while (--i >= 0)
+ vmbus_unmap_resource(uio_res->maps[i].addr,
+ (size_t)uio_res->maps[i].size);
+
+ close(fd);
+ return -1;
+ }
+
+ /* fd is not needed in slave process, close it */
+ close(fd);
+ return 0;
+ }
+
+ VMBUS_LOG(ERR, "Cannot find resource for device");
+ return 1;
+}
+
+static int
+vmbus_uio_map_primary(struct rte_vmbus_device *dev)
+{
+ int i, ret;
+ struct mapped_vmbus_resource *uio_res = NULL;
+ struct mapped_vmbus_res_list *uio_res_list =
+ RTE_TAILQ_CAST(vmbus_tailq.head, mapped_vmbus_res_list);
+
+ /* allocate uio resource */
+ ret = vmbus_uio_alloc_resource(dev, &uio_res);
+ if (ret)
+ return ret;
+
+ /* Map the resources */
+ for (i = 0; i < VMBUS_MAX_RESOURCE; i++) {
+ /* skip empty BAR */
+ if (dev->resource[i].len == 0)
+ continue;
+
+ ret = vmbus_uio_map_resource_by_index(dev, i, uio_res, 0);
+ if (ret)
+ goto error;
+ }
+
+ uio_res->nb_maps = i;
+
+ TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
+
+ return 0;
+error:
+ while (--i >= 0) {
+ vmbus_unmap_resource(uio_res->maps[i].addr,
+ (size_t)uio_res->maps[i].size);
+ }
+ vmbus_uio_free_resource(dev, uio_res);
+ return -1;
+}
+
+
+struct mapped_vmbus_resource *
+vmbus_uio_find_resource(const struct rte_vmbus_device *dev)
+{
+ struct mapped_vmbus_resource *uio_res;
+ struct mapped_vmbus_res_list *uio_res_list =
+ RTE_TAILQ_CAST(vmbus_tailq.head, mapped_vmbus_res_list);
+
+ if (dev == NULL)
+ return NULL;
+
+ TAILQ_FOREACH(uio_res, uio_res_list, next) {
+ /* skip this element if it doesn't match our VMBUS address */
+ if (rte_uuid_compare(uio_res->id, dev->device_id) == 0)
+ return uio_res;
+ }
+ return NULL;
+}
+
+/* map the VMBUS resource of a VMBUS device in virtual memory */
+int
+vmbus_uio_map_resource(struct rte_vmbus_device *dev)
+{
+ struct mapped_vmbus_resource *uio_res;
+ int ret;
+
+ /* TODO: handle rescind */
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.uio_cfg_fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+
+ /* secondary processes - use already recorded details */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ ret = vmbus_uio_map_secondary(dev);
+ else
+ ret = vmbus_uio_map_primary(dev);
+
+ if (ret != 0)
+ return ret;
+
+ uio_res = vmbus_uio_find_resource(dev);
+ if (!uio_res) {
+ VMBUS_LOG(ERR, "can not find resources!");
+ return -EIO;
+ }
+
+ if (uio_res->nb_maps <= HV_MON_PAGE_MAP) {
+ VMBUS_LOG(ERR, "VMBUS: only %u resources found!",
+ uio_res->nb_maps);
+ return -EINVAL;
+ }
+
+ dev->int_page = (uint32_t *)((char *)uio_res->maps[HV_INT_PAGE_MAP].addr
+ + (PAGE_SIZE >> 1));
+ dev->monitor_page = uio_res->maps[HV_MON_PAGE_MAP].addr;
+ return 0;
+}
+
+static void
+vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res)
+{
+ int i;
+
+ if (uio_res == NULL)
+ return;
+
+ for (i = 0; i != uio_res->nb_maps; i++) {
+ vmbus_unmap_resource(uio_res->maps[i].addr,
+ (size_t)uio_res->maps[i].size);
+ }
+}
+
+/* unmap the VMBUS resource of a VMBUS device in virtual memory */
+void
+vmbus_uio_unmap_resource(struct rte_vmbus_device *dev)
+{
+ struct mapped_vmbus_resource *uio_res;
+ struct mapped_vmbus_res_list *uio_res_list =
+ RTE_TAILQ_CAST(vmbus_tailq.head, mapped_vmbus_res_list);
+
+ if (dev == NULL)
+ return;
+
+ /* find an entry for the device */
+ uio_res = vmbus_uio_find_resource(dev);
+ if (uio_res == NULL)
+ return;
+
+ /* secondary processes - just free maps */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return vmbus_uio_unmap(uio_res);
+
+ TAILQ_REMOVE(uio_res_list, uio_res, next);
+
+ /* unmap all resources */
+ vmbus_uio_unmap(uio_res);
+
+ /* free uio resource */
+ rte_free(uio_res);
+
+ /* close fd if in primary process */
+ close(dev->intr_handle.fd);
+ if (dev->intr_handle.uio_cfg_fd >= 0) {
+ close(dev->intr_handle.uio_cfg_fd);
+ dev->intr_handle.uio_cfg_fd = -1;
+ }
+
+ dev->intr_handle.fd = -1;
+ dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+}