aboutsummaryrefslogtreecommitdiffstats
path: root/src/vlib
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2018-09-30 18:26:20 +0200
committerDamjan Marion <dmarion@me.com>2018-10-23 14:21:10 +0000
commit68b4da67deb2e8ca224bb5abaeb9dbc7ae8e378c (patch)
treecd1ee2c463aefdb31c73665eafb876568054f49e /src/vlib
parentfc3b8b8ad08d2d4cc375149ecdc10c37d4a80940 (diff)
Numa-aware, growable physical memory allocator (pmalloc)
Change-Id: Ic4c46bc733afae8bf0d8146623ed15633928de30 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vlib')
-rw-r--r--src/vlib/CMakeLists.txt2
-rw-r--r--src/vlib/buffer.c75
-rw-r--r--src/vlib/buffer.h16
-rw-r--r--src/vlib/buffer_funcs.h5
-rw-r--r--src/vlib/linux/pci.c34
-rwxr-xr-xsrc/vlib/linux/physmem.c307
-rw-r--r--src/vlib/linux/vfio.c68
-rw-r--r--src/vlib/linux/vfio.h4
-rw-r--r--src/vlib/main.c2
-rw-r--r--src/vlib/main.h19
-rw-r--r--src/vlib/pci/pci.h5
-rwxr-xr-xsrc/vlib/physmem.c150
-rw-r--r--src/vlib/physmem.h32
-rw-r--r--src/vlib/physmem_funcs.h113
-rwxr-xr-xsrc/vlib/unix/main.c1
-rw-r--r--src/vlib/unix/unix.h2
16 files changed, 292 insertions, 543 deletions
diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt
index 72c73f3c2d8..2a6cbd54da2 100644
--- a/src/vlib/CMakeLists.txt
+++ b/src/vlib/CMakeLists.txt
@@ -39,7 +39,6 @@ add_vpp_library(vlib
i2c.c
init.c
linux/pci.c
- linux/physmem.c
linux/vfio.c
log.c
main.c
@@ -47,6 +46,7 @@ add_vpp_library(vlib
node_cli.c
node_format.c
pci/pci.c
+ physmem.c
threads.c
threads_cli.c
trace.c
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index ee76290e25a..711b83514f4 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -470,26 +470,10 @@ vlib_buffer_delete_free_list_internal (vlib_main_t * vm,
}
static_always_inline void *
-vlib_buffer_pool_get_buffer (vlib_buffer_pool_t * bp)
+vlib_buffer_pool_get_buffer (vlib_main_t * vm, vlib_buffer_pool_t * bp)
{
- uword slot, page, addr;
-
- if (PREDICT_FALSE (bp->n_elts == bp->n_used))
- {
- clib_spinlock_unlock (&bp->lock);
- return 0;
- }
- slot = bp->next_clear;
- bp->bitmap = clib_bitmap_set (bp->bitmap, slot, 1);
- bp->next_clear = clib_bitmap_next_clear (bp->bitmap, slot + 1);
- bp->n_used++;
-
- page = slot / bp->buffers_per_page;
- slot -= page * bp->buffers_per_page;
-
- addr = bp->start + (page << bp->log2_page_size) + slot * bp->buffer_size;
-
- return uword_to_pointer (addr, void *);
+ return vlib_physmem_alloc_from_map (vm, bp->physmem_map_index,
+ bp->buffer_size, CLIB_CACHE_LINE_BYTES);
}
/* Make sure free list has at least given number of free buffers. */
@@ -533,7 +517,7 @@ vlib_buffer_fill_free_list_internal (vlib_main_t * vm,
clib_spinlock_lock (&bp->lock);
while (n_alloc < n)
{
- if ((b = vlib_buffer_pool_get_buffer (bp)) == 0)
+ if ((b = vlib_buffer_pool_get_buffer (vm, bp)) == 0)
goto done;
n_alloc += 1;
@@ -866,14 +850,13 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
}
u8
-vlib_buffer_pool_create (vlib_main_t * vm, vlib_physmem_region_index_t pri,
- u16 buffer_size)
+vlib_buffer_register_physmem_map (vlib_main_t * vm, u32 physmem_map_index)
{
vlib_buffer_main_t *bm = &buffer_main;
- vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, pri);
vlib_buffer_pool_t *p;
- uword start = pointer_to_uword (pr->mem);
- uword size = pr->size;
+ vlib_physmem_map_t *m = vlib_physmem_get_map (vm, physmem_map_index);
+ uword start = pointer_to_uword (m->base);
+ uword size = m->n_pages << m->log2_page_size;
if (bm->buffer_mem_size == 0)
{
@@ -903,18 +886,8 @@ vlib_buffer_pool_create (vlib_main_t * vm, vlib_physmem_region_index_t pri,
vec_add2 (bm->buffer_pools, p, 1);
p->start = start;
p->size = size;
- p->physmem_region = pri;
-
- if (buffer_size == 0)
- goto done;
+ p->physmem_map_index = physmem_map_index;
- p->log2_page_size = pr->log2_page_size;
- p->buffer_size = buffer_size;
- p->buffers_per_page = (1ull << pr->log2_page_size) / p->buffer_size;
- p->n_elts = p->buffers_per_page * pr->n_pages;
- p->n_used = 0;
- clib_spinlock_init (&p->lock);
-done:
ASSERT (p - bm->buffer_pools < 256);
return p - bm->buffer_pools;
}
@@ -983,8 +956,9 @@ clib_error_t *
vlib_buffer_main_init (struct vlib_main_t * vm)
{
vlib_buffer_main_t *bm = &buffer_main;
- vlib_physmem_region_index_t pri;
clib_error_t *error;
+ u32 physmem_map_index;
+ u8 pool_index;
if (vlib_buffer_callbacks)
{
@@ -1003,25 +977,18 @@ vlib_buffer_main_init (struct vlib_main_t * vm)
&vlib_buffer_delete_free_list_internal;
clib_spinlock_init (&bm->buffer_known_hash_lockp);
- /* allocate default region */
- error = vlib_physmem_region_alloc (vm, "buffers",
- vlib_buffer_physmem_sz, 0,
- VLIB_PHYSMEM_F_SHARED |
- VLIB_PHYSMEM_F_HUGETLB, &pri);
+ if ((error = vlib_physmem_shared_map_create (vm, "buffers",
+ vlib_buffer_physmem_sz, 1,
+ &physmem_map_index)))
+ return error;
- if (error == 0)
- goto done;
+ pool_index = vlib_buffer_register_physmem_map (vm, physmem_map_index);
+ vlib_buffer_pool_t *bp = vlib_buffer_pool_get (pool_index);
+ clib_spinlock_init (&bp->lock);
+ bp->buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES +
+ sizeof (vlib_buffer_t);
- clib_error_free (error);
-
- error = vlib_physmem_region_alloc (vm, "buffers",
- vlib_buffer_physmem_sz, 0,
- VLIB_PHYSMEM_F_SHARED, &pri);
-done:
- if (error == 0)
- vlib_buffer_pool_create (vm, pri, sizeof (vlib_buffer_t) +
- VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES);
- return error;
+ return 0;
}
static clib_error_t *
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
index 0d24779e52b..f9750ecf3e1 100644
--- a/src/vlib/buffer.h
+++ b/src/vlib/buffer.h
@@ -412,16 +412,9 @@ typedef struct
uword start;
uword size;
uword log2_page_size;
- vlib_physmem_region_index_t physmem_region;
-
+ u32 physmem_map_index;
+ u32 buffer_size;
u32 *buffers;
-
- u16 buffer_size;
- uword buffers_per_page;
- uword n_elts;
- uword n_used;
- uword next_clear;
- uword *bitmap;
clib_spinlock_t lock;
} vlib_buffer_pool_t;
@@ -466,9 +459,8 @@ vlib_buffer_pool_get (u8 buffer_pool_index)
return vec_elt_at_index (bm->buffer_pools, buffer_pool_index);
}
-u8 vlib_buffer_pool_create (struct vlib_main_t * vm,
- vlib_physmem_region_index_t region,
- u16 buffer_size);
+u8 vlib_buffer_register_physmem_map (struct vlib_main_t * vm,
+ u32 physmem_map_index);
clib_error_t *vlib_buffer_main_init (struct vlib_main_t *vm);
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 1110c206e52..6106b7984a2 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -317,10 +317,7 @@ vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents)
always_inline uword
vlib_buffer_get_pa (vlib_main_t * vm, vlib_buffer_t * b)
{
- vlib_buffer_main_t *bm = &buffer_main;
- vlib_buffer_pool_t *pool = vec_elt_at_index (bm->buffer_pools,
- b->buffer_pool_index);
- return vlib_physmem_virtual_to_physical (vm, pool->physmem_region, b->data);
+ return vlib_physmem_get_pa (vm, b->data);
}
always_inline uword
diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c
index b55fb5042f7..0e2241b0e58 100644
--- a/src/vlib/linux/pci.c
+++ b/src/vlib/linux/pci.c
@@ -951,6 +951,21 @@ add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p,
linux_pci_vfio_unmask_intx (vm, p);
}
+ if (p->supports_va_dma)
+ {
+ vlib_buffer_pool_t *bp;
+ /* *INDENT-OFF* */
+ vec_foreach (bp, buffer_main.buffer_pools)
+ {
+ u32 i;
+ vlib_physmem_map_t *pm;
+ pm = vlib_physmem_get_map (vm, bp->physmem_map_index);
+ for (i = 0; i < pm->n_pages; i++)
+ vfio_map_physmem_page (vm, pm->base + (i << pm->log2_page_size));
+ }
+ /* *INDENT-ON* */
+ }
+
if (r && r->init_function)
err = r->init_function (lpm->vlib_main, p->handle);
@@ -1092,6 +1107,25 @@ vlib_pci_map_region_fixed (vlib_main_t * vm, vlib_pci_dev_handle_t h,
}
clib_error_t *
+vlib_pci_map_dma (vlib_main_t * vm, vlib_pci_dev_handle_t h, void *ptr)
+{
+ linux_pci_device_t *p = linux_pci_get_device (h);
+
+ if (!p->supports_va_dma)
+ return 0;
+
+ return vfio_map_physmem_page (vm, ptr);
+}
+
+int
+vlib_pci_supports_virtual_addr_dma (vlib_main_t * vm, vlib_pci_dev_handle_t h)
+{
+ linux_pci_device_t *p = linux_pci_get_device (h);
+
+ return p->supports_va_dma != 0;
+}
+
+clib_error_t *
vlib_pci_device_open (vlib_main_t * vm, vlib_pci_addr_t * addr,
pci_device_id_t ids[], vlib_pci_dev_handle_t * handle)
{
diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c
deleted file mode 100755
index 90b0f8cab3d..00000000000
--- a/src/vlib/linux/physmem.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * physmem.c: Unix physical memory
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mount.h>
-#include <sys/mman.h>
-#include <sys/fcntl.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include <vppinfra/linux/syscall.h>
-#include <vppinfra/linux/sysfs.h>
-#include <vlib/vlib.h>
-#include <vlib/physmem.h>
-#include <vlib/unix/unix.h>
-#include <vlib/pci/pci.h>
-#include <vlib/linux/vfio.h>
-
-static void *
-unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx,
- uword n_bytes, uword alignment)
-{
- vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
- uword lo_offset, hi_offset;
- uword *to_free = 0;
-
- if (pr->heap == 0)
- return 0;
-
- /* IO memory is always at least cache aligned. */
- alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
-
- while (1)
- {
-#if USE_DLMALLOC == 0
-
- mheap_get_aligned (pr->heap, n_bytes,
- /* align */ alignment,
- /* align offset */ 0,
- &lo_offset);
-#else
- lo_offset = (uword) mspace_get_aligned (pr->heap, n_bytes,
- alignment, ~0ULL /* offset */ );
- if (lo_offset == 0)
- lo_offset = ~0ULL;
-#endif
-
- /* Allocation failed? */
- if (lo_offset == ~0)
- break;
-
- /* Make sure allocation does not span DMA physical chunk boundary. */
- hi_offset = lo_offset + n_bytes - 1;
-
- if (((pointer_to_uword (pr->heap) + lo_offset) >> pr->log2_page_size) ==
- ((pointer_to_uword (pr->heap) + hi_offset) >> pr->log2_page_size))
- break;
-
- /* Allocation would span chunk boundary, queue it to be freed as soon as
- we find suitable chunk. */
- vec_add1 (to_free, lo_offset);
- }
-
- if (to_free != 0)
- {
- uword i;
- for (i = 0; i < vec_len (to_free); i++)
- {
-#if USE_DLMALLOC == 0
- mheap_put (pr->heap, to_free[i]);
-#else
- mspace_put_no_offset (pr->heap, (void *) to_free[i]);
-#endif
- }
- vec_free (to_free);
- }
-
-#if USE_DLMALLOC == 0
- return lo_offset != ~0 ? (void *) (pr->heap + lo_offset) : 0;
-#else
- return lo_offset != ~0 ? (void *) lo_offset : 0;
-#endif
-}
-
-static void
-unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x)
-{
- vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
- /* Return object to region's heap. */
-#if USE_DLMALLOC == 0
- mheap_put (pr->heap, x - pr->heap);
-#else
- mspace_put_no_offset (pr->heap, x);
-#endif
-}
-
-static clib_error_t *
-unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size,
- u8 numa_node, u32 flags,
- vlib_physmem_region_index_t * idx)
-{
- vlib_physmem_main_t *vpm = &physmem_main;
- vlib_physmem_region_t *pr;
- clib_error_t *error = 0;
- clib_mem_vm_alloc_t alloc = { 0 };
- int i;
-
- pool_get (vpm->regions, pr);
-
- if ((pr - vpm->regions) >= 256)
- {
- error = clib_error_return (0, "maximum number of regions reached");
- goto error;
- }
-
- alloc.name = name;
- alloc.size = size;
- alloc.numa_node = numa_node;
-
- alloc.flags = (flags & VLIB_PHYSMEM_F_SHARED) ?
- CLIB_MEM_VM_F_SHARED : CLIB_MEM_VM_F_LOCKED;
-
- if ((flags & VLIB_PHYSMEM_F_HUGETLB))
- {
- alloc.flags |= CLIB_MEM_VM_F_HUGETLB;
- alloc.flags |= CLIB_MEM_VM_F_HUGETLB_PREALLOC;
- alloc.flags |= CLIB_MEM_VM_F_NUMA_FORCE;
- }
- else
- {
- alloc.flags |= CLIB_MEM_VM_F_NUMA_PREFER;
- }
-
- error = clib_mem_vm_ext_alloc (&alloc);
- if (error)
- goto error;
-
- pr->index = pr - vpm->regions;
- pr->flags = flags;
- pr->fd = alloc.fd;
- pr->mem = alloc.addr;
- pr->log2_page_size = alloc.log2_page_size;
- pr->n_pages = alloc.n_pages;
- pr->size = (u64) pr->n_pages << (u64) pr->log2_page_size;
- pr->page_mask = (1ull << pr->log2_page_size) - 1;
- pr->numa_node = numa_node;
- pr->name = format (0, "%s%c", name, 0);
-
- for (i = 0; i < pr->n_pages; i++)
- {
- void *ptr = pr->mem + ((u64) i << pr->log2_page_size);
- int node;
- if ((move_pages (0, 1, &ptr, 0, &node, 0) == 0) && (numa_node != node))
- {
- clib_warning ("physmem page for region \'%s\' allocated on the"
- " wrong numa node (requested %u actual %u)",
- pr->name, pr->numa_node, node, i);
- break;
- }
- }
-
- pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size,
- pr->n_pages);
-
- linux_vfio_dma_map_regions (vm);
-
- if (flags & VLIB_PHYSMEM_F_INIT_MHEAP)
- {
-#if USE_DLMALLOC == 0
- pr->heap = mheap_alloc_with_flags (pr->mem, pr->size,
- /* Don't want mheap mmap/munmap with IO memory. */
- MHEAP_FLAG_DISABLE_VM |
- MHEAP_FLAG_THREAD_SAFE);
-#else
- pr->heap = create_mspace_with_base (pr->mem, pr->size, 1 /* locked */ );
- mspace_disable_expand (pr->heap);
-#endif
- }
-
- *idx = pr->index;
-
- goto done;
-
-error:
- clib_memset (pr, 0, sizeof (*pr));
- pool_put (vpm->regions, pr);
-
-done:
- return error;
-}
-
-static void
-unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx)
-{
- vlib_physmem_main_t *vpm = &physmem_main;
- vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
-
- if (pr->fd > 0)
- close (pr->fd);
- munmap (pr->mem, pr->size);
- vec_free (pr->name);
- pool_put (vpm->regions, pr);
-}
-
-clib_error_t *
-unix_physmem_init (vlib_main_t * vm)
-{
- vlib_physmem_main_t *vpm = &physmem_main;
- clib_error_t *error = 0;
- u64 *pt = 0;
-
- /* Avoid multiple calls. */
- if (vm->os_physmem_alloc_aligned)
- return error;
-
- /* check if pagemap is accessible */
- pt = clib_mem_vm_get_paddr (&pt, min_log2 (sysconf (_SC_PAGESIZE)), 1);
- if (pt[0])
- vpm->flags |= VLIB_PHYSMEM_MAIN_F_HAVE_PAGEMAP;
- vec_free (pt);
-
- if ((error = linux_vfio_init (vm)))
- return error;
-
- vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
- vm->os_physmem_free = unix_physmem_free;
- vm->os_physmem_region_alloc = unix_physmem_region_alloc;
- vm->os_physmem_region_free = unix_physmem_region_free;
-
- return error;
-}
-
-static clib_error_t *
-show_physmem (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_physmem_main_t *vpm = &physmem_main;
- vlib_physmem_region_t *pr;
-
- /* *INDENT-OFF* */
- pool_foreach (pr, vpm->regions, (
- {
- vlib_cli_output (vm, "index %u name '%s' page-size %uKB num-pages %d "
- "numa-node %u fd %d\n",
- pr->index, pr->name, (1 << (pr->log2_page_size -10)),
- pr->n_pages, pr->numa_node, pr->fd);
- if (pr->heap)
- vlib_cli_output (vm, " %U", format_mheap, pr->heap, /* verbose */ 1);
- else
- vlib_cli_output (vm, " no heap\n");
- }));
- /* *INDENT-ON* */
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_physmem_command, static) = {
- .path = "show physmem",
- .short_help = "Show physical memory allocation",
- .function = show_physmem,
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vlib/linux/vfio.c b/src/vlib/linux/vfio.c
index e72f10388ca..d300a683dd7 100644
--- a/src/vlib/linux/vfio.c
+++ b/src/vlib/linux/vfio.c
@@ -34,52 +34,46 @@
linux_vfio_main_t vfio_main;
-static int
-vfio_map_regions (vlib_main_t * vm, int fd)
+clib_error_t *
+vfio_map_physmem_page (vlib_main_t * vm, void *addr)
{
- vlib_physmem_main_t *vpm = &physmem_main;
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
linux_vfio_main_t *lvm = &vfio_main;
- vlib_physmem_region_t *pr;
struct vfio_iommu_type1_dma_map dm = { 0 };
- int i;
+ uword log2_page_size = vpm->pmalloc_main->log2_page_sz;
+ uword physmem_start = pointer_to_uword (vpm->pmalloc_main->base);
+
+ if (lvm->container_fd == -1)
+ return clib_error_return (0, "No cointainer fd");
+
+ u32 page_index = vlib_physmem_get_page_index (vm, addr);
+
+ if (clib_bitmap_get (lvm->physmem_pages_mapped, page_index))
+ {
+ vlib_log_debug (lvm->log_default, "map DMA va:%p page:%u already "
+ "mapped", addr, page_index);
+ return 0;
+ }
dm.argsz = sizeof (struct vfio_iommu_type1_dma_map);
dm.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+ dm.vaddr = physmem_start + (page_index << log2_page_size);
+ dm.size = 1ULL << log2_page_size;
+ dm.iova = dm.vaddr;
+ vlib_log_debug (lvm->log_default, "map DMA page:%u va:0x%lx iova:%lx "
+ "size:0x%lx", page_index, dm.vaddr, dm.iova, dm.size);
- /* *INDENT-OFF* */
- pool_foreach (pr, vpm->regions,
+ if (ioctl (lvm->container_fd, VFIO_IOMMU_MAP_DMA, &dm) == -1)
{
- vec_foreach_index (i, pr->page_table)
- {
- int rv;
- dm.vaddr = pointer_to_uword (pr->mem) + ((u64)i << pr->log2_page_size);
- dm.size = 1ull << pr->log2_page_size;
- dm.iova = dm.vaddr;
- vlib_log_debug (lvm->log_default, "map DMA va:0x%lx iova:%lx "
- "size:0x%lx", dm.vaddr, dm.iova, dm.size);
-
- if ((rv = ioctl (fd, VFIO_IOMMU_MAP_DMA, &dm)) &&
- errno != EINVAL)
- {
- vlib_log_err (lvm->log_default, "map DMA va:0x%lx iova:%lx "
- "size:0x%lx failed, error %s (errno %d)",
- dm.vaddr, dm.iova, dm.size, strerror (errno),
- errno);
- return rv;
- }
- }
- });
- /* *INDENT-ON* */
- return 0;
-}
-
-void
-linux_vfio_dma_map_regions (vlib_main_t * vm)
-{
- linux_vfio_main_t *lvm = &vfio_main;
+ vlib_log_err (lvm->log_default, "map DMA page:%u va:0x%lx iova:%lx "
+ "size:0x%lx failed, error %s (errno %d)", page_index,
+ dm.vaddr, dm.iova, dm.size, strerror (errno), errno);
+ return clib_error_return_unix (0, "physmem DMA map failed");
+ }
- if (lvm->container_fd != -1)
- vfio_map_regions (vm, lvm->container_fd);
+ lvm->physmem_pages_mapped = clib_bitmap_set (lvm->physmem_pages_mapped,
+ page_index, 1);
+ return 0;
}
static linux_pci_vfio_iommu_group_t *
diff --git a/src/vlib/linux/vfio.h b/src/vlib/linux/vfio.h
index aae8e3c6ee7..c1d815664c9 100644
--- a/src/vlib/linux/vfio.h
+++ b/src/vlib/linux/vfio.h
@@ -36,6 +36,8 @@ typedef struct
/* iommu group pool index by group id hash */
uword *iommu_pool_index_by_group;
+ clib_bitmap_t *physmem_pages_mapped;
+
/* logging */
vlib_log_class_t log_default;
} linux_vfio_main_t;
@@ -43,7 +45,7 @@ typedef struct
extern linux_vfio_main_t vfio_main;
clib_error_t *linux_vfio_init (vlib_main_t * vm);
-void linux_vfio_dma_map_regions (vlib_main_t * vm);
+clib_error_t *vfio_map_physmem_page (vlib_main_t * vm, void *addr);
clib_error_t *linux_vfio_group_get_device_fd (vlib_pci_addr_t * addr,
int *fd, int *is_noiommu);
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 14d89141ff8..a6ad4032dae 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -1756,7 +1756,7 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
if (!vm->name)
vm->name = "VLIB";
- if ((error = unix_physmem_init (vm)))
+ if ((error = vlib_physmem_init (vm)))
{
clib_error_report (error);
goto done;
diff --git a/src/vlib/main.h b/src/vlib/main.h
index ddc14df5360..7c34fb6528d 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -118,23 +118,8 @@ typedef struct vlib_main_t
/* Pool of buffer free lists. */
vlib_buffer_free_list_t *buffer_free_list_pool;
- /* Allocate/free buffer memory for DMA transfers, descriptor rings, etc.
- buffer memory is guaranteed to be cache-aligned. */
-
- clib_error_t *(*os_physmem_region_alloc) (struct vlib_main_t * vm,
- char *name, u32 size,
- u8 numa_node, u32 flags,
- vlib_physmem_region_index_t *
- idx);
-
- void (*os_physmem_region_free) (struct vlib_main_t * vm,
- vlib_physmem_region_index_t idx);
-
- void *(*os_physmem_alloc_aligned) (struct vlib_main_t * vm,
- vlib_physmem_region_index_t idx,
- uword n_bytes, uword alignment);
- void (*os_physmem_free) (struct vlib_main_t * vm,
- vlib_physmem_region_index_t idx, void *x);
+ /* physical memory main structure. */
+ vlib_physmem_main_t physmem_main;
/* Node graph main structure. */
vlib_node_main_t node_main;
diff --git a/src/vlib/pci/pci.h b/src/vlib/pci/pci.h
index 3d5cd405c89..e0eacf42948 100644
--- a/src/vlib/pci/pci.h
+++ b/src/vlib/pci/pci.h
@@ -293,6 +293,11 @@ clib_error_t *vlib_pci_enable_msix_irq (vlib_main_t * vm,
clib_error_t *vlib_pci_disable_msix_irq (vlib_main_t * vm,
vlib_pci_dev_handle_t h, u16 start,
u16 count);
+clib_error_t *vlib_pci_map_dma (vlib_main_t * vm, vlib_pci_dev_handle_t h,
+ void *ptr);
+
+int vlib_pci_supports_virtual_addr_dma (vlib_main_t * vm,
+ vlib_pci_dev_handle_t h);
unformat_function_t unformat_vlib_pci_addr;
format_function_t format_vlib_pci_addr;
diff --git a/src/vlib/physmem.c b/src/vlib/physmem.c
new file mode 100755
index 00000000000..e2d88922f56
--- /dev/null
+++ b/src/vlib/physmem.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <vppinfra/linux/syscall.h>
+#include <vppinfra/linux/sysfs.h>
+#include <vlib/vlib.h>
+#include <vlib/physmem.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vlib/linux/vfio.h>
+
+clib_error_t *
+vlib_physmem_shared_map_create (vlib_main_t * vm, char *name, uword size,
+ u32 numa_node, u32 * map_index)
+{
+ clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ vlib_physmem_map_t *map;
+ clib_pmalloc_arena_t *a;
+ clib_error_t *error = 0;
+ void *va;
+ int i;
+
+ va = clib_pmalloc_create_shared_arena (pm, name, size, numa_node);
+
+ if (va == 0)
+ return clib_error_return (0, "%U", format_clib_error,
+ clib_pmalloc_last_error (pm));
+
+ a = clib_pmalloc_get_arena (pm, va);
+
+ pool_get (vpm->maps, map);
+ *map_index = map->index = map - vpm->maps;
+ map->base = va;
+ map->fd = a->fd;
+ map->n_pages = a->n_pages;
+ map->log2_page_size = a->log2_page_sz;
+
+ for (i = 0; i < a->n_pages; i++)
+ {
+ uword pa = clib_pmalloc_get_pa (pm, (u8 *) va + (i << a->log2_page_sz));
+
+ /* maybe iova */
+ if (pa == 0)
+ pa = pointer_to_uword (va);
+
+ vec_add1 (map->page_table, pa);
+ }
+
+ return error;
+}
+
+vlib_physmem_map_t *
+vlib_physmem_get_map (vlib_main_t * vm, u32 index)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ return pool_elt_at_index (vpm->maps, index);
+}
+
+clib_error_t *
+vlib_physmem_init (vlib_main_t * vm)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ clib_error_t *error = 0;
+ u64 *pt = 0;
+ void *p;
+
+ /* check if pagemap is accessible */
+ pt = clib_mem_vm_get_paddr (&pt, min_log2 (sysconf (_SC_PAGESIZE)), 1);
+ if (pt[0])
+ vpm->flags |= VLIB_PHYSMEM_MAIN_F_HAVE_PAGEMAP;
+ vec_free (pt);
+
+ if ((error = linux_vfio_init (vm)))
+ return error;
+
+ p = clib_mem_alloc_aligned (sizeof (clib_pmalloc_main_t),
+ CLIB_CACHE_LINE_BYTES);
+ memset (p, 0, sizeof (clib_pmalloc_main_t));
+ vpm->pmalloc_main = (clib_pmalloc_main_t *) p;
+ clib_pmalloc_init (vpm->pmalloc_main, 0);
+
+ return error;
+}
+
+static clib_error_t *
+show_physmem (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 verbose = 0;
+
+ if (unformat_user (input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "verbose"))
+ verbose = 1;
+ else if (unformat (line_input, "v"))
+ verbose = 1;
+ else if (unformat (line_input, "detail"))
+ verbose = 2;
+ else if (unformat (line_input, "d"))
+ verbose = 2;
+ else
+ break;
+ }
+ unformat_free (line_input);
+ }
+
+ vlib_cli_output (vm, " %U", format_pmalloc, vpm->pmalloc_main, verbose);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_physmem_command, static) = {
+ .path = "show physmem",
+ .short_help = "Show physical memory allocation",
+ .function = show_physmem,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/physmem.h b/src/vlib/physmem.h
index 2f54938ff10..3e73a1b03f0 100644
--- a/src/vlib/physmem.h
+++ b/src/vlib/physmem.h
@@ -40,41 +40,27 @@
#ifndef included_vlib_physmem_h
#define included_vlib_physmem_h
-typedef u8 vlib_physmem_region_index_t;
+#include <vppinfra/pmalloc.h>
typedef struct
{
- vlib_physmem_region_index_t index;
- void *mem;
- uword size;
+ int index;
int fd;
- u8 log2_page_size;
- u16 n_pages;
- u32 page_mask;
-
- void *heap;
- u32 flags;
-#define VLIB_PHYSMEM_F_INIT_MHEAP (1 << 0)
-#define VLIB_PHYSMEM_F_HUGETLB (1 << 1)
-#define VLIB_PHYSMEM_F_SHARED (1 << 2)
-
- u8 numa_node;
- u64 *page_table;
- u8 *name;
-} vlib_physmem_region_t;
-
-
+ void *base;
+ u32 n_pages;
+ uword *page_table;
+ u32 log2_page_size;
+} vlib_physmem_map_t;
typedef struct
{
u32 flags;
#define VLIB_PHYSMEM_MAIN_F_HAVE_PAGEMAP (1 << 0)
#define VLIB_PHYSMEM_MAIN_F_HAVE_IOMMU (1 << 1)
- vlib_physmem_region_t *regions;
+ vlib_physmem_map_t *maps;
+ clib_pmalloc_main_t *pmalloc_main;
} vlib_physmem_main_t;
-extern vlib_physmem_main_t physmem_main;
-
#endif /* included_vlib_physmem_h */
/*
diff --git a/src/vlib/physmem_funcs.h b/src/vlib/physmem_funcs.h
index bff66aa5726..0082f85c70d 100644
--- a/src/vlib/physmem_funcs.h
+++ b/src/vlib/physmem_funcs.h
@@ -40,115 +40,62 @@
#ifndef included_vlib_physmem_funcs_h
#define included_vlib_physmem_funcs_h
-always_inline vlib_physmem_region_t *
-vlib_physmem_get_region (vlib_main_t * vm, u8 index)
-{
- vlib_physmem_main_t *vpm = &physmem_main;
- return pool_elt_at_index (vpm->regions, index);
-}
-
-always_inline u64
-vlib_physmem_offset_to_physical (vlib_main_t * vm,
- vlib_physmem_region_index_t idx, uword o)
-{
- vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
- uword page_index = o >> pr->log2_page_size;
- ASSERT (o < pr->size);
- ASSERT (pr->page_table[page_index] != 0);
- return (vec_elt (pr->page_table, page_index) + (o & pr->page_mask));
-}
-
-always_inline int
-vlib_physmem_is_virtual (vlib_main_t * vm, vlib_physmem_region_index_t idx,
- uword p)
-{
- vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
- return p >= pointer_to_uword (pr->mem)
- && p < (pointer_to_uword (pr->mem) + pr->size);
-}
-
-always_inline uword
-vlib_physmem_offset_of (vlib_main_t * vm, vlib_physmem_region_index_t idx,
- void *p)
-{
- vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
- uword a = pointer_to_uword (p);
- uword o;
+clib_error_t *vlib_physmem_init (vlib_main_t * vm);
+clib_error_t *vlib_physmem_shared_map_create (vlib_main_t * vm, char *name,
+ uword size, u32 numa_node,
+ u32 * map_index);
- ASSERT (vlib_physmem_is_virtual (vm, idx, a));
- o = a - pointer_to_uword (pr->mem);
-
- /* Offset must fit in 32 bits. */
- ASSERT ((uword) o == a - pointer_to_uword (pr->mem));
-
- return o;
-}
+vlib_physmem_map_t *vlib_physmem_get_map (vlib_main_t * vm, u32 index);
always_inline void *
-vlib_physmem_at_offset (vlib_main_t * vm, vlib_physmem_region_index_t idx,
- uword offset)
+vlib_physmem_alloc_aligned (vlib_main_t * vm, uword n_bytes, uword alignment)
{
- vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
- ASSERT (offset < pr->size);
- return uword_to_pointer (pointer_to_uword (pr->mem) + offset, void *);
+ clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
+ return clib_pmalloc_alloc_aligned (pm, n_bytes, alignment);
}
+/* By default allocate I/O memory with cache line alignment. */
always_inline void *
-vlib_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx,
- clib_error_t ** error,
- uword n_bytes, uword alignment)
+vlib_physmem_alloc (vlib_main_t * vm, uword n_bytes)
{
- void *r = vm->os_physmem_alloc_aligned (vm, idx, n_bytes, alignment);
- if (!r)
- *error =
- clib_error_return (0, "failed to allocate %wd bytes of I/O memory",
- n_bytes);
- else
- *error = 0;
- return r;
+ return vlib_physmem_alloc_aligned (vm, n_bytes, CLIB_CACHE_LINE_BYTES);
}
-/* By default allocate I/O memory with cache line alignment. */
always_inline void *
-vlib_physmem_alloc (vlib_main_t * vm, vlib_physmem_region_index_t idx,
- clib_error_t ** error, uword n_bytes)
+vlib_physmem_alloc_from_map (vlib_main_t * vm, u32 physmem_map_index,
+ uword n_bytes, uword alignment)
{
- return vlib_physmem_alloc_aligned (vm, idx, error, n_bytes,
- CLIB_CACHE_LINE_BYTES);
+ clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
+ vlib_physmem_map_t *map = vlib_physmem_get_map (vm, physmem_map_index);
+ return clib_pmalloc_alloc_from_arena (pm, map->base, n_bytes,
+ CLIB_CACHE_LINE_BYTES);
}
always_inline void
-vlib_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx,
- void *mem)
+vlib_physmem_free (vlib_main_t * vm, void *p)
{
- if (mem)
- vm->os_physmem_free (vm, idx, mem);
+ if (p)
+ clib_pmalloc_free (vm->physmem_main.pmalloc_main, p);
}
always_inline u64
-vlib_physmem_virtual_to_physical (vlib_main_t * vm,
- vlib_physmem_region_index_t idx, void *mem)
+vlib_physmem_get_page_index (vlib_main_t * vm, void *mem)
{
- vlib_physmem_main_t *vpm = &physmem_main;
- vlib_physmem_region_t *pr = pool_elt_at_index (vpm->regions, idx);
- uword o = mem - pr->mem;
- return vlib_physmem_offset_to_physical (vm, idx, o);
+ clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
+ return clib_pmalloc_get_page_index (pm, mem);
}
-
-always_inline clib_error_t *
-vlib_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size,
- u8 numa_node, u32 flags,
- vlib_physmem_region_index_t * idx)
+always_inline u64
+vlib_physmem_get_pa (vlib_main_t * vm, void *mem)
{
- return vm->os_physmem_region_alloc (vm, name, size, numa_node, flags, idx);
+ clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
+ return clib_pmalloc_get_pa (pm, mem);
}
-always_inline void
-vlib_physmem_region_free (struct vlib_main_t *vm,
- vlib_physmem_region_index_t idx)
+always_inline clib_error_t *
+vlib_physmem_last_error (struct vlib_main_t * vm)
{
- vm->os_physmem_region_free (vm, idx);
+ return clib_error_return (0, "unknown error");
}
#endif /* included_vlib_physmem_funcs_h */
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index 45b12cd5b77..08f0506fbc2 100755
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -61,7 +61,6 @@ char *vlib_default_runtime_dir = "vlib";
unix_main_t unix_main;
clib_file_main_t file_main;
-vlib_physmem_main_t physmem_main;
static clib_error_t *
unix_main_init (vlib_main_t * vm)
diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h
index 7856e5b7df7..e71b0bac6a5 100644
--- a/src/vlib/unix/unix.h
+++ b/src/vlib/unix/unix.h
@@ -126,8 +126,6 @@ unix_save_error (unix_main_t * um, clib_error_t * error)
/* Main function for Unix VLIB. */
int vlib_unix_main (int argc, char *argv[]);
-clib_error_t *unix_physmem_init (vlib_main_t * vm);
-
/* Set prompt for CLI. */
void vlib_unix_cli_set_prompt (char *prompt);