diff options
author | Damjan Marion <damarion@cisco.com> | 2018-09-30 18:26:20 +0200 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2018-10-23 14:21:10 +0000 |
commit | 68b4da67deb2e8ca224bb5abaeb9dbc7ae8e378c (patch) | |
tree | cd1ee2c463aefdb31c73665eafb876568054f49e /src/vlib/linux | |
parent | fc3b8b8ad08d2d4cc375149ecdc10c37d4a80940 (diff) |
Numa-aware, growable physical memory allocator (pmalloc)
Change-Id: Ic4c46bc733afae8bf0d8146623ed15633928de30
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vlib/linux')
-rw-r--r-- | src/vlib/linux/pci.c | 34 | ||||
-rwxr-xr-x | src/vlib/linux/physmem.c | 307 | ||||
-rw-r--r-- | src/vlib/linux/vfio.c | 68 | ||||
-rw-r--r-- | src/vlib/linux/vfio.h | 4 |
4 files changed, 68 insertions, 345 deletions
diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index b55fb5042f7..0e2241b0e58 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -951,6 +951,21 @@ add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p, linux_pci_vfio_unmask_intx (vm, p); } + if (p->supports_va_dma) + { + vlib_buffer_pool_t *bp; + /* *INDENT-OFF* */ + vec_foreach (bp, buffer_main.buffer_pools) + { + u32 i; + vlib_physmem_map_t *pm; + pm = vlib_physmem_get_map (vm, bp->physmem_map_index); + for (i = 0; i < pm->n_pages; i++) + vfio_map_physmem_page (vm, pm->base + (i << pm->log2_page_size)); + } + /* *INDENT-ON* */ + } + if (r && r->init_function) err = r->init_function (lpm->vlib_main, p->handle); @@ -1092,6 +1107,25 @@ vlib_pci_map_region_fixed (vlib_main_t * vm, vlib_pci_dev_handle_t h, } clib_error_t * +vlib_pci_map_dma (vlib_main_t * vm, vlib_pci_dev_handle_t h, void *ptr) +{ + linux_pci_device_t *p = linux_pci_get_device (h); + + if (!p->supports_va_dma) + return 0; + + return vfio_map_physmem_page (vm, ptr); +} + +int +vlib_pci_supports_virtual_addr_dma (vlib_main_t * vm, vlib_pci_dev_handle_t h) +{ + linux_pci_device_t *p = linux_pci_get_device (h); + + return p->supports_va_dma != 0; +} + +clib_error_t * vlib_pci_device_open (vlib_main_t * vm, vlib_pci_addr_t * addr, pci_device_id_t ids[], vlib_pci_dev_handle_t * handle) { diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c deleted file mode 100755 index 90b0f8cab3d..00000000000 --- a/src/vlib/linux/physmem.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * physmem.c: Unix physical memory - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <unistd.h> -#include <sys/types.h> -#include <sys/mount.h> -#include <sys/mman.h> -#include <sys/fcntl.h> -#include <sys/stat.h> -#include <unistd.h> - -#include <vppinfra/linux/syscall.h> -#include <vppinfra/linux/sysfs.h> -#include <vlib/vlib.h> -#include <vlib/physmem.h> -#include <vlib/unix/unix.h> -#include <vlib/pci/pci.h> -#include <vlib/linux/vfio.h> - -static void * -unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, - uword n_bytes, uword alignment) -{ - vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); - uword lo_offset, hi_offset; - uword *to_free = 0; - - if (pr->heap == 0) - return 0; - - /* IO memory is always at least cache aligned. */ - alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES); - - while (1) - { -#if USE_DLMALLOC == 0 - - mheap_get_aligned (pr->heap, n_bytes, - /* align */ alignment, - /* align offset */ 0, - &lo_offset); -#else - lo_offset = (uword) mspace_get_aligned (pr->heap, n_bytes, - alignment, ~0ULL /* offset */ ); - if (lo_offset == 0) - lo_offset = ~0ULL; -#endif - - /* Allocation failed? */ - if (lo_offset == ~0) - break; - - /* Make sure allocation does not span DMA physical chunk boundary. */ - hi_offset = lo_offset + n_bytes - 1; - - if (((pointer_to_uword (pr->heap) + lo_offset) >> pr->log2_page_size) == - ((pointer_to_uword (pr->heap) + hi_offset) >> pr->log2_page_size)) - break; - - /* Allocation would span chunk boundary, queue it to be freed as soon as - we find suitable chunk. */ - vec_add1 (to_free, lo_offset); - } - - if (to_free != 0) - { - uword i; - for (i = 0; i < vec_len (to_free); i++) - { -#if USE_DLMALLOC == 0 - mheap_put (pr->heap, to_free[i]); -#else - mspace_put_no_offset (pr->heap, (void *) to_free[i]); -#endif - } - vec_free (to_free); - } - -#if USE_DLMALLOC == 0 - return lo_offset != ~0 ? (void *) (pr->heap + lo_offset) : 0; -#else - return lo_offset != ~0 ? (void *) lo_offset : 0; -#endif -} - -static void -unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) -{ - vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); - /* Return object to region's heap. */ -#if USE_DLMALLOC == 0 - mheap_put (pr->heap, x - pr->heap); -#else - mspace_put_no_offset (pr->heap, x); -#endif -} - -static clib_error_t * -unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, - u8 numa_node, u32 flags, - vlib_physmem_region_index_t * idx) -{ - vlib_physmem_main_t *vpm = &physmem_main; - vlib_physmem_region_t *pr; - clib_error_t *error = 0; - clib_mem_vm_alloc_t alloc = { 0 }; - int i; - - pool_get (vpm->regions, pr); - - if ((pr - vpm->regions) >= 256) - { - error = clib_error_return (0, "maximum number of regions reached"); - goto error; - } - - alloc.name = name; - alloc.size = size; - alloc.numa_node = numa_node; - - alloc.flags = (flags & VLIB_PHYSMEM_F_SHARED) ? - CLIB_MEM_VM_F_SHARED : CLIB_MEM_VM_F_LOCKED; - - if ((flags & VLIB_PHYSMEM_F_HUGETLB)) - { - alloc.flags |= CLIB_MEM_VM_F_HUGETLB; - alloc.flags |= CLIB_MEM_VM_F_HUGETLB_PREALLOC; - alloc.flags |= CLIB_MEM_VM_F_NUMA_FORCE; - } - else - { - alloc.flags |= CLIB_MEM_VM_F_NUMA_PREFER; - } - - error = clib_mem_vm_ext_alloc (&alloc); - if (error) - goto error; - - pr->index = pr - vpm->regions; - pr->flags = flags; - pr->fd = alloc.fd; - pr->mem = alloc.addr; - pr->log2_page_size = alloc.log2_page_size; - pr->n_pages = alloc.n_pages; - pr->size = (u64) pr->n_pages << (u64) pr->log2_page_size; - pr->page_mask = (1ull << pr->log2_page_size) - 1; - pr->numa_node = numa_node; - pr->name = format (0, "%s%c", name, 0); - - for (i = 0; i < pr->n_pages; i++) - { - void *ptr = pr->mem + ((u64) i << pr->log2_page_size); - int node; - if ((move_pages (0, 1, &ptr, 0, &node, 0) == 0) && (numa_node != node)) - { - clib_warning ("physmem page for region \'%s\' allocated on the" - " wrong numa node (requested %u actual %u)", - pr->name, pr->numa_node, node, i); - break; - } - } - - pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size, - pr->n_pages); - - linux_vfio_dma_map_regions (vm); - - if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) - { -#if USE_DLMALLOC == 0 - pr->heap = mheap_alloc_with_flags (pr->mem, pr->size, - /* Don't want mheap mmap/munmap with IO memory. */ - MHEAP_FLAG_DISABLE_VM | - MHEAP_FLAG_THREAD_SAFE); -#else - pr->heap = create_mspace_with_base (pr->mem, pr->size, 1 /* locked */ ); - mspace_disable_expand (pr->heap); -#endif - } - - *idx = pr->index; - - goto done; - -error: - clib_memset (pr, 0, sizeof (*pr)); - pool_put (vpm->regions, pr); - -done: - return error; -} - -static void -unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx) -{ - vlib_physmem_main_t *vpm = &physmem_main; - vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); - - if (pr->fd > 0) - close (pr->fd); - munmap (pr->mem, pr->size); - vec_free (pr->name); - pool_put (vpm->regions, pr); -} - -clib_error_t * -unix_physmem_init (vlib_main_t * vm) -{ - vlib_physmem_main_t *vpm = &physmem_main; - clib_error_t *error = 0; - u64 *pt = 0; - - /* Avoid multiple calls. */ - if (vm->os_physmem_alloc_aligned) - return error; - - /* check if pagemap is accessible */ - pt = clib_mem_vm_get_paddr (&pt, min_log2 (sysconf (_SC_PAGESIZE)), 1); - if (pt[0]) - vpm->flags |= VLIB_PHYSMEM_MAIN_F_HAVE_PAGEMAP; - vec_free (pt); - - if ((error = linux_vfio_init (vm))) - return error; - - vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned; - vm->os_physmem_free = unix_physmem_free; - vm->os_physmem_region_alloc = unix_physmem_region_alloc; - vm->os_physmem_region_free = unix_physmem_region_free; - - return error; -} - -static clib_error_t * -show_physmem (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vlib_physmem_main_t *vpm = &physmem_main; - vlib_physmem_region_t *pr; - - /* *INDENT-OFF* */ - pool_foreach (pr, vpm->regions, ( - { - vlib_cli_output (vm, "index %u name '%s' page-size %uKB num-pages %d " - "numa-node %u fd %d\n", - pr->index, pr->name, (1 << (pr->log2_page_size -10)), - pr->n_pages, pr->numa_node, pr->fd); - if (pr->heap) - vlib_cli_output (vm, " %U", format_mheap, pr->heap, /* verbose */ 1); - else - vlib_cli_output (vm, " no heap\n"); - })); - /* *INDENT-ON* */ - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_physmem_command, static) = { - .path = "show physmem", - .short_help = "Show physical memory allocation", - .function = show_physmem, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/linux/vfio.c b/src/vlib/linux/vfio.c index e72f10388ca..d300a683dd7 100644 --- a/src/vlib/linux/vfio.c +++ b/src/vlib/linux/vfio.c @@ -34,52 +34,46 @@ linux_vfio_main_t vfio_main; -static int -vfio_map_regions (vlib_main_t * vm, int fd) +clib_error_t * +vfio_map_physmem_page (vlib_main_t * vm, void *addr) { - vlib_physmem_main_t *vpm = &physmem_main; + vlib_physmem_main_t *vpm = &vm->physmem_main; linux_vfio_main_t *lvm = &vfio_main; - vlib_physmem_region_t *pr; struct vfio_iommu_type1_dma_map dm = { 0 }; - int i; + uword log2_page_size = vpm->pmalloc_main->log2_page_sz; + uword physmem_start = pointer_to_uword (vpm->pmalloc_main->base); + + if (lvm->container_fd == -1) + return clib_error_return (0, "No cointainer fd"); + + u32 page_index = vlib_physmem_get_page_index (vm, addr); + + if (clib_bitmap_get (lvm->physmem_pages_mapped, page_index)) + { + vlib_log_debug (lvm->log_default, "map DMA va:%p page:%u already " + "mapped", addr, page_index); + return 0; + } dm.argsz = sizeof (struct vfio_iommu_type1_dma_map); dm.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + dm.vaddr = physmem_start + (page_index << log2_page_size); + dm.size = 1ULL << log2_page_size; + dm.iova = dm.vaddr; + vlib_log_debug (lvm->log_default, "map DMA page:%u va:0x%lx iova:%lx " + "size:0x%lx", page_index, dm.vaddr, dm.iova, dm.size); - /* *INDENT-OFF* */ - pool_foreach (pr, vpm->regions, + if (ioctl (lvm->container_fd, VFIO_IOMMU_MAP_DMA, &dm) == -1) { - vec_foreach_index (i, pr->page_table) - { - int rv; - dm.vaddr = pointer_to_uword (pr->mem) + ((u64)i << pr->log2_page_size); - dm.size = 1ull << pr->log2_page_size; - dm.iova = dm.vaddr; - vlib_log_debug (lvm->log_default, "map DMA va:0x%lx iova:%lx " - "size:0x%lx", dm.vaddr, dm.iova, dm.size); - - if ((rv = ioctl (fd, VFIO_IOMMU_MAP_DMA, &dm)) && - errno != EINVAL) - { - vlib_log_err (lvm->log_default, "map DMA va:0x%lx iova:%lx " - "size:0x%lx failed, error %s (errno %d)", - dm.vaddr, dm.iova, dm.size, strerror (errno), - errno); - return rv; - } - } - }); - /* *INDENT-ON* */ - return 0; -} - -void -linux_vfio_dma_map_regions (vlib_main_t * vm) -{ - linux_vfio_main_t *lvm = &vfio_main; + vlib_log_err (lvm->log_default, "map DMA page:%u va:0x%lx iova:%lx " + "size:0x%lx failed, error %s (errno %d)", page_index, + dm.vaddr, dm.iova, dm.size, strerror (errno), errno); + return clib_error_return_unix (0, "physmem DMA map failed"); + } - if (lvm->container_fd != -1) - vfio_map_regions (vm, lvm->container_fd); + lvm->physmem_pages_mapped = clib_bitmap_set (lvm->physmem_pages_mapped, + page_index, 1); + return 0; } static linux_pci_vfio_iommu_group_t * diff --git a/src/vlib/linux/vfio.h b/src/vlib/linux/vfio.h index aae8e3c6ee7..c1d815664c9 100644 --- a/src/vlib/linux/vfio.h +++ b/src/vlib/linux/vfio.h @@ -36,6 +36,8 @@ typedef struct /* iommu group pool index by group id hash */ uword *iommu_pool_index_by_group; + clib_bitmap_t *physmem_pages_mapped; + /* logging */ vlib_log_class_t log_default; } linux_vfio_main_t; @@ -43,7 +45,7 @@ typedef struct extern linux_vfio_main_t vfio_main; clib_error_t *linux_vfio_init (vlib_main_t * vm); -void linux_vfio_dma_map_regions (vlib_main_t * vm); +clib_error_t *vfio_map_physmem_page (vlib_main_t * vm, void *addr); clib_error_t *linux_vfio_group_get_device_fd (vlib_pci_addr_t * addr, int *fd, int *is_noiommu); |