/*- * BSD LICENSE * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200 #include #else #include #endif #include #include #include #include #include #include #include #include "xen_vhost.h" /* xenstore handle */ static struct xs_handle *xs = NULL; /* gntdev file descriptor to map grant pages */ static int d_fd = -1; /* * The grant node format in xenstore for vring/mpool is like: * idx#_rx_vring_gref = "gref1#, gref2#, gref3#" * idx#_mempool_gref = "gref1#, gref2#, gref3#" * each gref# is the grant reference for a shared page. * In each shared page, we store the grant_node_item items. */ struct grant_node_item { uint32_t gref; uint32_t pfn; } __attribute__((packed)); int cmdline_parse_etheraddr(void *tk, const char *srcbuf, void *res, unsigned ressize); /* Map grant ref refid at addr_ori*/ static void * xen_grant_mmap(void *addr_ori, int domid, int refid, uint64_t *pindex) { struct ioctl_gntdev_map_grant_ref arg; void *addr = NULL; int pg_sz = getpagesize(); arg.count = 1; arg.refs[0].domid = domid; arg.refs[0].ref = refid; int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg); if (rv) { RTE_LOG(ERR, XENHOST, " %s: (%d,%d) %s (ioctl failed)\n", __func__, domid, refid, strerror(errno)); return NULL; } if (addr_ori == NULL) addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED, d_fd, arg.index); else addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED, d_fd, arg.index); if (addr == MAP_FAILED) { RTE_LOG(ERR, XENHOST, " %s: (%d, %d) %s (map failed)\n", __func__, domid, refid, strerror(errno)); return NULL; } if (pindex) *pindex = arg.index; return addr; } /* Unmap one grant ref, and munmap must be called before this */ static int xen_unmap_grant_ref(uint64_t index) { struct ioctl_gntdev_unmap_grant_ref arg; int rv; arg.count = 1; arg.index = index; rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg); if (rv) { RTE_LOG(ERR, XENHOST, " %s: index 0x%" PRIx64 "unmap failed\n", __func__, index); return -1; } return 0; } /* * Reserve a virtual address space. * On success, returns the pointer. On failure, returns NULL. */ static void * get_xen_virtual(size_t size, size_t page_sz) { void *addr; uintptr_t aligned_addr; addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (addr == MAP_FAILED) { RTE_LOG(ERR, XENHOST, "failed get a virtual area\n"); return NULL; } aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz); munmap(addr, aligned_addr - (uintptr_t)addr); munmap((void *)(aligned_addr + size), page_sz + (uintptr_t)addr - aligned_addr); addr = (void *)(aligned_addr); return addr; } static void free_xen_virtual(void *addr, size_t size, size_t page_sz __rte_unused) { if (addr) munmap(addr, size); } /* * Returns val str in xenstore. * @param path * Full path string for key * @return * Pointer to Val str, NULL on failure */ static char * xen_read_node(char *path, uint32_t *len) { char *buf; buf = xs_read(xs, XBT_NULL, path, len); return buf; } static int cal_pagenum(struct xen_gnt *gnt) { unsigned int i; /* * the items in the page are in the format of * gref#,pfn#,...,gref#,pfn# * FIXME, 0 is reserved by system, use it as terminator. */ for (i = 0; i < (PAGE_PFNNUM) / 2; i++) { if (gnt->gref_pfn[i * 2].gref <= 0) break; } return i; } /* Frees memory allocated to a grant node */ static void xen_free_gntnode(struct xen_gntnode *gntnode) { if (gntnode == NULL) return; free(gntnode->gnt_info); free(gntnode); } /* * Parse a grant node. * @param domid * Guest domain id. * @param path * Full path string for a grant node, like for the following (key, val) pair * idx#_mempool_gref = "gref#, gref#, gref#" * path = 'local/domain/domid/control/dpdk/idx#_mempool_gref' * gref# is a shared page contain packed (gref,pfn) entries * @return * Returns the pointer to xen_gntnode */ static struct xen_gntnode * parse_gntnode(int dom_id, char *path) { char **gref_list = NULL; uint32_t i, len, gref_num; void *addr = NULL; char *buf = NULL; struct xen_gntnode *gntnode = NULL; struct xen_gnt *gnt = NULL; int pg_sz = getpagesize(); char *end; uint64_t index; if ((buf = xen_read_node(path, &len)) == NULL) goto err; gref_list = malloc(MAX_GREF_PER_NODE * sizeof(char *)); if (gref_list == NULL) goto err; gref_num = rte_strsplit(buf, len, gref_list, MAX_GREF_PER_NODE, XEN_GREF_SPLITTOKEN); if (gref_num == 0) { RTE_LOG(ERR, XENHOST, " %s: invalid grant node format\n", __func__); goto err; } gntnode = calloc(1, sizeof(struct xen_gntnode)); gnt = calloc(gref_num, sizeof(struct xen_gnt)); if (gnt == NULL || gntnode == NULL) goto err; for (i = 0; i < gref_num; i++) { errno = 0; gnt[i].gref = strtol(gref_list[i], &end, 0); if (errno != 0 || end == NULL || end == gref_list[i] || (*end != '\0' && *end != XEN_GREF_SPLITTOKEN)) { RTE_LOG(ERR, XENHOST, " %s: parse grant node item failed\n", __func__); goto err; } addr = xen_grant_mmap(NULL, dom_id, gnt[i].gref, &index); if (addr == NULL) { RTE_LOG(ERR, XENHOST, " %s: map gref %u failed\n", __func__, gnt[i].gref); goto err; } RTE_LOG(INFO, XENHOST, " %s: map gref %u to %p\n", __func__, gnt[i].gref, addr); memcpy(gnt[i].gref_pfn, addr, pg_sz); if (munmap(addr, pg_sz)) { RTE_LOG(INFO, XENHOST, " %s: unmap gref %u failed\n", __func__, gnt[i].gref); goto err; } if (xen_unmap_grant_ref(index)) { RTE_LOG(INFO, XENHOST, " %s: release gref %u failed\n", __func__, gnt[i].gref); goto err; } } gntnode->gnt_num = gref_num; gntnode->gnt_info = gnt; free(buf); free(gref_list); return gntnode; err: free(gnt); free(gntnode); free(gref_list); free(buf); return NULL; } /* * This function maps grant node of vring or mbuf pool to a continous virtual address space, * and returns mapped address, pfn array, index array * @param gntnode * Pointer to grant node * @param domid * Guest domain id * @param ppfn * Pointer to pfn array, caller should free this array * @param pgs * Pointer to number of pages * @param ppindex * Pointer to index array, used to release grefs when to free this node * @return * Pointer to mapped virtual address, NULL on failure */ static void * map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex) { struct xen_gnt *gnt; uint32_t i, j; size_t total_pages = 0; void *addr; uint32_t *pfn; uint64_t *pindex; uint32_t pfn_num = 0; int pg_sz; if (gntnode == NULL) return NULL; pg_sz = getpagesize(); for (i = 0; i < gntnode->gnt_num; i++) { gnt = gntnode->gnt_info + i; total_pages += cal_pagenum(gnt); } if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) { RTE_LOG(ERR, XENHOST, " %s: failed get_xen_virtual\n", __func__); return NULL; } pfn = calloc(total_pages, (size_t)sizeof(uint32_t)); pindex = calloc(total_pages, (size_t)sizeof(uint64_t)); if (pfn == NULL || pindex == NULL) { free_xen_virtual(addr, total_pages * pg_sz, pg_sz); free(pfn); free(pindex); return NULL; } RTE_LOG(INFO, XENHOST, " %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1)); for (i = 0; i < gntnode->gnt_num; i++) { gnt = gntnode->gnt_info + i; for (j = 0; j < (PAGE_PFNNUM) / 2; j++) { if ((gnt->gref_pfn[j * 2].gref) <= 0) goto _end; /*alternative: batch map, or through libxc*/ if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz), domid, gnt->gref_pfn[j * 2].gref, &pindex[pfn_num]) == NULL) { goto mmap_failed; } pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num; pfn_num++; } } mmap_failed: if (pfn_num) munmap(addr, pfn_num * pg_sz); for (i = 0; i < pfn_num; i++) { xen_unmap_grant_ref(pindex[i]); } free(pindex); free(pfn); return NULL; _end: if (ppindex) *ppindex = pindex; else free(pindex); if (ppfn) *ppfn = pfn; else free(pfn); if (pgs) *pgs = total_pages; return addr; } static int parse_mpool_va(struct xen_mempool *mempool) { char path[PATH_MAX] = {0}; char *buf; uint32_t len; char *end; int ret = -1; errno = 0; snprintf(path, sizeof(path), XEN_VM_ROOTNODE_FMT"/%d_"XEN_GVA_SUFFIX, mempool->dom_id, mempool->pool_idx); if((buf = xen_read_node(path, &len)) == NULL) goto out; mempool->gva = (void *)strtoul(buf, &end, 16); if (errno != 0 || end == NULL || end == buf || *end != '\0') { mempool->gva = NULL; goto out; } ret = 0; out: free(buf); return ret; } /* * map mbuf pool */ static int map_mempoolnode(struct xen_gntnode *gntnode, struct xen_mempool *mempool) { if (gntnode == NULL || mempool == NULL) return -1; mempool->hva = map_gntnode(gntnode, mempool->dom_id, &mempool->mempfn_tbl, &mempool->mempfn_num, &mempool->pindex); RTE_LOG(INFO, XENHOST, " %s: map mempool at %p\n", __func__, (void *)mempool->hva); if (mempool->hva) return 0; else { return -1; } } void cleanup_mempool(struct xen_mempool *mempool) { int pg_sz = getpagesize(); uint32_t i; if (mempool->hva) munmap(mempool->hva, mempool->mempfn_num * pg_sz); mempool->hva = NULL; if (mempool->pindex) { RTE_LOG(INFO, XENHOST, " %s: unmap dom %02u mempool%02u %u grefs\n", __func__, mempool->dom_id, mempool->pool_idx, mempool->mempfn_num); for (i = 0; i < mempool->mempfn_num; i ++) { xen_unmap_grant_ref(mempool->pindex[i]); } } mempool->pindex = NULL; free(mempool->mempfn_tbl); mempool->mempfn_tbl = NULL; } /* * process mempool node idx#_mempool_gref, idx = 0, 1, 2... * untill we encounter a node that doesn't exist. */ int parse_mempoolnode(struct xen_guest *guest) { uint32_t i, len; char path[PATH_MAX] = {0}; struct xen_gntnode *gntnode = NULL; struct xen_mempool *mempool = NULL; char *buf; bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0])); guest->pool_num = 0; while (1) { /* check if null terminated */ snprintf(path, sizeof(path), XEN_VM_ROOTNODE_FMT"/%d_"XEN_MEMPOOL_SUFFIX, guest->dom_id, guest->pool_num); if ((buf = xen_read_node(path, &len)) != NULL) { /* this node exists */ free(buf); } else { if (guest->pool_num == 0) { RTE_LOG(ERR, PMD, "no mempool found\n"); return -1; } break; } mempool = &guest->mempool[guest->pool_num]; mempool->dom_id = guest->dom_id; mempool->pool_idx = guest->pool_num; RTE_LOG(INFO, XENHOST, " %s: mempool %u parse gntnode %s\n", __func__, guest->pool_num, path); gntnode = parse_gntnode(guest->dom_id, path); if (gntnode == NULL) goto err; if (parse_mpool_va(mempool)) goto err; RTE_LOG(INFO, XENHOST, " %s: mempool %u map gntnode %s\n", __func__, guest->pool_num, path); if (map_mempoolnode(gntnode, mempool)) goto err; xen_free_gntnode(gntnode); guest->pool_num++; } return 0; err: if (gntnode) xen_free_gntnode(gntnode); for (i = 0; i < MAX_XENVIRT_MEMPOOL ; i++) { cleanup_mempool(&guest->mempool[i]); } /* reinitialise mempool */ bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0])); return -1; } static int xen_map_vringflag(struct xen_vring *vring) { char path[PATH_MAX] = {0}; char *buf; uint32_t len,gref; int pg_sz = getpagesize(); char *end; snprintf(path, sizeof(path), XEN_VM_ROOTNODE_FMT"/%d_"XEN_VRINGFLAG_SUFFIX, vring->dom_id, vring->virtio_idx); if((buf = xen_read_node(path, &len)) == NULL) goto err; errno = 0; gref = strtol(buf, &end, 0); if (errno != 0 || end == NULL || end == buf) { goto err; } vring->flag = xen_grant_mmap(0, vring->dom_id, gref, &vring->flag_index); if (vring->flag == NULL || *vring->flag == 0) goto err; free(buf); return 0; err: free(buf); if (vring->flag) { munmap(vring->flag, pg_sz); vring->flag = NULL; xen_unmap_grant_ref(vring->flag_index); } return -1; } static int xen_map_rxvringnode(struct xen_gntnode *gntnode, struct xen_vring *vring) { vring->rxvring_addr = map_gntnode(gntnode, vring->dom_id, &vring->rxpfn_tbl, &vring->rxpfn_num, &vring->rx_pindex); RTE_LOG(INFO, XENHOST, " %s: map rx vring at %p\n", __func__, (void *)vring->rxvring_addr); if (vring->rxvring_addr) return 0; else return -1; } static int xen_map_txvringnode(struct xen_gntnode *gntnode, struct xen_vring *vring) { vring->txvring_addr = map_gntnode(gntnode, vring->dom_id, &vring->txpfn_tbl, &vring->txpfn_num, &vring->tx_pindex); RTE_LOG(INFO, XENHOST, " %s: map tx vring at %p\n", __func__, (void *)vring->txvring_addr); if (vring->txvring_addr) return 0; else return -1; } void cleanup_vring(struct xen_vring *vring) { int pg_sz = getpagesize(); uint32_t i; RTE_LOG(INFO, XENHOST, " %s: cleanup dom %u vring %u\n", __func__, vring->dom_id, vring->virtio_idx); if (vring->rxvring_addr) { munmap(vring->rxvring_addr, vring->rxpfn_num * pg_sz); RTE_LOG(INFO, XENHOST, " %s: unmap rx vring [%p, %p]\n", __func__, vring->rxvring_addr, RTE_PTR_ADD(vring->rxvring_addr, vring->rxpfn_num * pg_sz - 1)); } vring->rxvring_addr = NULL; if (vring->rx_pindex) { RTE_LOG(INFO, XENHOST, " %s: unmap rx vring %u grefs\n", __func__, vring->rxpfn_num); for (i = 0; i < vring->rxpfn_num; i++) { xen_unmap_grant_ref(vring->rx_pindex[i]); } } vring->rx_pindex = NULL; free(vring->rxpfn_tbl); vring->rxpfn_tbl = NULL; if (vring->txvring_addr) { munmap(vring->txvring_addr, vring->txpfn_num * pg_sz); RTE_LOG(INFO, XENHOST, " %s: unmap tx vring [%p, %p]\n", __func__, vring->txvring_addr, RTE_PTR_ADD(vring->txvring_addr, vring->txpfn_num * pg_sz - 1)); } vring->txvring_addr = NULL; if (vring->tx_pindex) { RTE_LOG(INFO, XENHOST, " %s: unmap tx vring %u grefs\n", __func__, vring->txpfn_num); for (i = 0; i < vring->txpfn_num; i++) { xen_unmap_grant_ref(vring->tx_pindex[i]); } } vring->tx_pindex = NULL; free(vring->txpfn_tbl); vring->txpfn_tbl = NULL; if (vring->flag) { if (!munmap((void *)vring->flag, pg_sz)) RTE_LOG(INFO, XENHOST, " %s: unmap flag page at %p\n", __func__, vring->flag); if (!xen_unmap_grant_ref(vring->flag_index)) RTE_LOG(INFO, XENHOST, " %s: release flag ref index 0x%" PRIx64 "\n", __func__, vring->flag_index); } vring->flag = NULL; return; } static int xen_parse_etheraddr(struct xen_vring *vring) { char path[PATH_MAX] = {0}; char *buf; uint32_t len; int ret = -1; snprintf(path, sizeof(path), XEN_VM_ROOTNODE_FMT"/%d_"XEN_ADDR_SUFFIX, vring->dom_id, vring->virtio_idx); if ((buf = xen_read_node(path, &len)) == NULL) goto out; if (cmdline_parse_etheraddr(NULL, buf, &vring->addr, sizeof(vring->addr)) < 0) goto out; ret = 0; out: free(buf); return ret; } int parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx) { char path[PATH_MAX] = {0}; struct xen_gntnode *rx_gntnode = NULL; struct xen_gntnode *tx_gntnode = NULL; struct xen_vring *vring = NULL; /*check if null terminated */ snprintf(path, sizeof(path), XEN_VM_ROOTNODE_FMT"/%d_"XEN_RXVRING_SUFFIX, guest->dom_id, virtio_idx); RTE_LOG(INFO, XENHOST, " %s: virtio %u parse rx gntnode %s\n", __func__, virtio_idx, path); rx_gntnode = parse_gntnode(guest->dom_id, path); if (rx_gntnode == NULL) goto err; /*check if null terminated */ snprintf(path, sizeof(path), XEN_VM_ROOTNODE_FMT"/%d_"XEN_TXVRING_SUFFIX, guest->dom_id, virtio_idx); RTE_LOG(INFO, XENHOST, " %s: virtio %u parse tx gntnode %s\n", __func__, virtio_idx, path); tx_gntnode = parse_gntnode(guest->dom_id, path); if (tx_gntnode == NULL) goto err; vring = &guest->vring[virtio_idx]; bzero(vring, sizeof(*vring)); vring->dom_id = guest->dom_id; vring->virtio_idx = virtio_idx; if (xen_parse_etheraddr(vring) != 0) goto err; RTE_LOG(INFO, XENHOST, " %s: virtio %u map rx gntnode %s\n", __func__, virtio_idx, path); if (xen_map_rxvringnode(rx_gntnode, vring) != 0) goto err; RTE_LOG(INFO, XENHOST, " %s: virtio %u map tx gntnode %s\n", __func__, virtio_idx, path); if (xen_map_txvringnode(tx_gntnode, vring) != 0) goto err; if (xen_map_vringflag(vring) != 0) goto err; guest->vring_num++; xen_free_gntnode(rx_gntnode); xen_free_gntnode(tx_gntnode); return 0; err: if (rx_gntnode) xen_free_gntnode(rx_gntnode); if (tx_gntnode) xen_free_gntnode(tx_gntnode); if (vring) { cleanup_vring(vring); bzero(vring, sizeof(*vring)); } return -1; } /* * Open xen grant dev driver * @return * 0 on success, -1 on failure. */ static int xen_grant_init(void) { d_fd = open(XEN_GNTDEV_FNAME, O_RDWR); return d_fd == -1? (-1): (0); } /* * Initialise xenstore handle and open grant dev driver. * @return * 0 on success, -1 on failure. */ int xenhost_init(void) { xs = xs_daemon_open(); if (xs == NULL) { rte_panic("failed initialize xen daemon handler"); return -1; } if (xen_grant_init()) return -1; return 0; }