From 3b64d6334b4e8d0759cff043a55042f88d1ccb0e Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 8 Sep 2017 12:26:12 +0200 Subject: vlib: move linux-specific code to vlib/linux Change-Id: Id79d2c2be7a98e15416a537c890a8f2dd6d4464d Signed-off-by: Damjan Marion --- src/vlib/linux/physmem.c | 411 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 411 insertions(+) create mode 100644 src/vlib/linux/physmem.c (limited to 'src/vlib/linux/physmem.c') diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c new file mode 100644 index 00000000..6731295c --- /dev/null +++ b/src/vlib/linux/physmem.c @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * physmem.c: Unix physical memory + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static void * +unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, + uword n_bytes, uword alignment) +{ + vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); + uword lo_offset, hi_offset; + uword *to_free = 0; + + if (pr->heap == 0) + return 0; + + /* IO memory is always at least cache aligned. */ + alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES); + + while (1) + { + mheap_get_aligned (pr->heap, n_bytes, + /* align */ alignment, + /* align offset */ 0, + &lo_offset); + + /* Allocation failed? */ + if (lo_offset == ~0) + break; + + if (pr->flags & VLIB_PHYSMEM_F_FAKE) + break; + + /* Make sure allocation does not span DMA physical chunk boundary. */ + hi_offset = lo_offset + n_bytes - 1; + + if ((lo_offset >> pr->log2_page_size) == + (hi_offset >> pr->log2_page_size)) + break; + + /* Allocation would span chunk boundary, queue it to be freed as soon as + we find suitable chunk. */ + vec_add1 (to_free, lo_offset); + } + + if (to_free != 0) + { + uword i; + for (i = 0; i < vec_len (to_free); i++) + mheap_put (pr->heap, to_free[i]); + vec_free (to_free); + } + + return lo_offset != ~0 ? pr->heap + lo_offset : 0; +} + +static void +unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) +{ + vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); + /* Return object to region's heap. */ + mheap_put (pr->heap, x - pr->heap); +} + +static u64 +get_page_paddr (int fd, uword addr) +{ + int pagesize = sysconf (_SC_PAGESIZE); + u64 seek, pagemap = 0; + + seek = ((u64) addr / pagesize) * sizeof (u64); + if (lseek (fd, seek, SEEK_SET) != seek) + { + clib_unix_warning ("lseek to 0x%llx", seek); + return 0; + } + if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) + { + clib_unix_warning ("read ptbits"); + return 0; + } + if ((pagemap & (1ULL << 63)) == 0) + return 0; + + pagemap &= pow2_mask (55); + + return pagemap * pagesize; +} + +static clib_error_t * +unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, + u8 numa_node, u32 flags, + vlib_physmem_region_index_t * idx) +{ + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_region_t *pr; + clib_error_t *error = 0; + int pagemap_fd = -1; + u8 *mount_dir = 0; + u8 *filename = 0; + struct stat st; + int old_mpol; + int mmap_flags; + struct bitmask *old_mask = numa_allocate_nodemask (); + + if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0) + return clib_error_return (0, "not allowed"); + + pool_get (vpm->regions, pr); + + if ((pr - vpm->regions) >= 256) + { + error = clib_error_return (0, "maximum number of regions reached"); + goto error; + } + + pr->index = pr - vpm->regions; + pr->fd = -1; + pr->flags = flags; + + if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) + == -1) + { + error = clib_error_return_unix (0, "get_mempolicy"); + goto error; + } + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) + { + error = clib_error_return_unix (0, "open '/proc/self/pagemap'"); + goto error; + } + + mount_dir = format (0, "%s/physmem_region%d%c", + vlib_unix_get_runtime_dir (), pr->index, 0); + filename = format (0, "%s/mem%c", mount_dir, 0); + + unlink ((char *) mount_dir); + + error = vlib_unix_recursive_mkdir ((char *) mount_dir); + if (error) + goto error; + + if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) + { + error = clib_error_return_unix (0, "mount hugetlb directory '%s'", + mount_dir); + goto error; + } + + if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) + { + error = clib_error_return_unix (0, "open"); + goto error; + } + + mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED; + } + else + { + if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) + return clib_error_return_unix (0, "memfd_create"); + + if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + { + error = + clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); + goto error; + } + mmap_flags = MAP_SHARED; + } + + if (fstat (pr->fd, &st)) + { + error = clib_error_return_unix (0, "fstat"); + goto error; + } + + pr->log2_page_size = min_log2 (st.st_blksize); + pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1; + size = pr->n_pages * (1 << pr->log2_page_size); + + if ((ftruncate (pr->fd, size)) == -1) + { + error = clib_error_return_unix (0, "ftruncate length: %d", size); + goto error; + } + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + error = vlib_sysfs_prealloc_hugepages (numa_node, + 1 << (pr->log2_page_size - 10), + pr->n_pages); + if (error) + goto error; + } + + numa_set_preferred (numa_node); + + pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); + + if (pr->mem == MAP_FAILED) + { + pr->mem = 0; + error = clib_error_return_unix (0, "mmap"); + goto error; + } + + if (set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) + { + error = clib_error_return_unix (0, "set_mempolicy"); + goto error; + } + + pr->size = pr->n_pages << pr->log2_page_size; + pr->page_mask = (1 << pr->log2_page_size) - 1; + pr->numa_node = numa_node; + pr->name = format (0, "%s", name); + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + int i; + for (i = 0; i < pr->n_pages; i++) + { + void *ptr = pr->mem + (i << pr->log2_page_size); + int node; + move_pages (0, 1, &ptr, 0, &node, 0); + if (numa_node != node) + { + clib_warning + ("physmem page for region \'%s\' allocated on the wrong" + " numa node (requested %u actual %u)", pr->name, + pr->numa_node, node, i); + break; + } + } + } + + if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) + { + pr->heap = mheap_alloc_with_flags (pr->mem, pr->size, + /* Don't want mheap mmap/munmap with IO memory. */ + MHEAP_FLAG_DISABLE_VM | + MHEAP_FLAG_THREAD_SAFE); + fformat (stdout, "%U", format_mheap, pr->heap, /* verbose */ 1); + } + + if (flags & VLIB_PHYSMEM_F_HAVE_BUFFERS) + { + vlib_buffer_add_mem_range (vm, pointer_to_uword (pr->mem), pr->size); + } + + *idx = pr->index; + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + int i; + for (i = 0; i < pr->n_pages; i++) + { + uword vaddr = + pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size); + u64 page_paddr = get_page_paddr (pagemap_fd, vaddr); + vec_add1 (pr->page_table, page_paddr); + } + } + + goto done; + +error: + if (pr->fd > -1) + close (pr->fd); + + if (pr->mem) + munmap (pr->mem, size); + + memset (pr, 0, sizeof (*pr)); + pool_put (vpm->regions, pr); + +done: + if (mount_dir) + { + umount2 ((char *) mount_dir, MNT_DETACH); + rmdir ((char *) mount_dir); + vec_free (mount_dir); + } + numa_free_cpumask (old_mask); + vec_free (filename); + if (pagemap_fd > -1) + close (pagemap_fd); + return error; +} + +static void +unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx) +{ + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); + + if (pr->fd > 0) + close (pr->fd); + munmap (pr->mem, pr->size); + vec_free (pr->name); + pool_put (vpm->regions, pr); +} + +clib_error_t * +unix_physmem_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + + /* Avoid multiple calls. */ + if (vm->os_physmem_alloc_aligned) + return error; + + vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned; + vm->os_physmem_free = unix_physmem_free; + vm->os_physmem_region_alloc = unix_physmem_region_alloc; + vm->os_physmem_region_free = unix_physmem_region_free; + + return error; +} + +static clib_error_t * +show_physmem (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_region_t *pr; + + /* *INDENT-OFF* */ + pool_foreach (pr, vpm->regions, ( + { + vlib_cli_output (vm, "index %u name '%s' page-size %uKB num-pages %d " + "numa-node %u fd %d\n", + pr->index, pr->name, (1 << (pr->log2_page_size -10)), + pr->n_pages, pr->numa_node, pr->fd); + if (pr->heap) + vlib_cli_output (vm, " %U", format_mheap, pr->heap, /* verbose */ 1); + else + vlib_cli_output (vm, " no heap\n"); + })); + /* *INDENT-ON* */ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_physmem_command, static) = { + .path = "show physmem", + .short_help = "Show physical memory allocation", + .function = show_physmem, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 816122e303efb5012f6897bd53ff8fe28cd8fa1c Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 8 Sep 2017 19:21:00 +0200 Subject: physmem: make fake allocation non-fatal if we cannot pick numa node Change-Id: I563c043ed82e3ef199fc3d47931108f31cc01728 Signed-off-by: Damjan Marion --- src/vlib/linux/physmem.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'src/vlib/linux/physmem.c') diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c index 6731295c..fddff2ea 100644 --- a/src/vlib/linux/physmem.c +++ b/src/vlib/linux/physmem.c @@ -170,8 +170,13 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) == -1) { - error = clib_error_return_unix (0, "get_mempolicy"); - goto error; + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + error = clib_error_return_unix (0, "get_mempolicy"); + goto error; + } + else + old_mpol = -1; } if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) @@ -246,7 +251,8 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, goto error; } - numa_set_preferred (numa_node); + if (old_mpol != -1) + numa_set_preferred (numa_node); pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); @@ -257,7 +263,8 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, goto error; } - if (set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) + if (old_mpol != -1 && + set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) { error = clib_error_return_unix (0, "set_mempolicy"); goto error; -- cgit 1.2.3-korg From d9226b25f145c64e5bc4a38c3fee7e9b2eaac2de Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 12 Sep 2017 15:34:17 +0200 Subject: physmem: remove debug leftovers Change-Id: I5a5dc0794d3398e749b64b07dfd1e2fc2230089b Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/cli.c | 1 - src/vlib/linux/physmem.c | 1 - 2 files changed, 2 deletions(-) (limited to 'src/vlib/linux/physmem.c') diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c index fe1c41c2..aeeb772d 100644 --- a/src/plugins/dpdk/device/cli.c +++ b/src/plugins/dpdk/device/cli.c @@ -357,7 +357,6 @@ show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input, "name=\"%s\" available = %7d allocated = %7d total = %7d\n", rmp->name, (u32) count, (u32) free_count, (u32) (count + free_count)); - rte_mempool_dump (stderr, rmp); } else { diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c index fddff2ea..d8c5dc9b 100644 --- a/src/vlib/linux/physmem.c +++ b/src/vlib/linux/physmem.c @@ -300,7 +300,6 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, /* Don't want mheap mmap/munmap with IO memory. */ MHEAP_FLAG_DISABLE_VM | MHEAP_FLAG_THREAD_SAFE); - fformat (stdout, "%U", format_mheap, pr->heap, /* verbose */ 1); } if (flags & VLIB_PHYSMEM_F_HAVE_BUFFERS) -- cgit 1.2.3-korg From 01914ce45729833cec88c65689de9a0336cd40cc Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 14 Sep 2017 19:04:50 +0200 Subject: vppinfra: add clib_mem_vm_ext_alloc function Change-Id: Iff33694fc42cc3bcc73cf1372339053a6365039c Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/init.c | 6 +- src/plugins/memif/memif.c | 21 ++- src/vlib.am | 5 +- src/vlib/linux/pci.c | 25 ++-- src/vlib/linux/physmem.c | 192 ++++-------------------- src/vlib/linux/syscall.h | 58 -------- src/vlib/linux/sysfs.c | 250 ------------------------------- src/vlib/linux/sysfs.h | 44 ------ src/vlib/threads.c | 6 +- src/vlib/threads_cli.c | 6 +- src/vnet/devices/af_packet/af_packet.c | 4 +- src/vppinfra.am | 5 +- src/vppinfra/linux/mem.c | 260 +++++++++++++++++++++++++++++++++ src/vppinfra/linux/syscall.h | 56 +++++++ src/vppinfra/linux/sysfs.c | 250 +++++++++++++++++++++++++++++++ src/vppinfra/linux/sysfs.h | 46 ++++++ src/vppinfra/mem.h | 94 ++++++++++-- src/vppinfra/vm_linux_kernel.h | 78 ---------- src/vppinfra/vm_standalone.h | 74 ---------- src/vppinfra/vm_unix.h | 106 -------------- 20 files changed, 761 insertions(+), 825 deletions(-) delete mode 100644 src/vlib/linux/syscall.h delete mode 100644 src/vlib/linux/sysfs.c delete mode 100644 src/vlib/linux/sysfs.h create mode 100644 src/vppinfra/linux/mem.c create mode 100644 src/vppinfra/linux/syscall.h create mode 100644 src/vppinfra/linux/sysfs.c create mode 100644 src/vppinfra/linux/sysfs.h delete mode 100644 src/vppinfra/vm_linux_kernel.h delete mode 100644 src/vppinfra/vm_standalone.h delete mode 100644 src/vppinfra/vm_unix.h (limited to 'src/vlib/linux/physmem.c') diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 95176fb8..ee61f94e 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -1040,7 +1040,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) mem = mem_by_socket[c]; page_size = 1024; - e = vlib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); + e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) use_1g = 0; @@ -1049,7 +1049,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) clib_error_free (e); page_size = 2; - e = vlib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); + e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) use_2m = 0; diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 8fec409a..6a609a57 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include @@ -267,6 +267,8 @@ memif_init_regions_and_queues (memif_if_t * mif) int i, j; u64 buffer_offset; memif_region_t *r; + clib_mem_vm_alloc_t alloc = { 0 }; + clib_error_t *err; vec_validate_aligned (mif->regions, 0, CLIB_CACHE_LINE_BYTES); r = vec_elt_at_index (mif->regions, 0); @@ -279,18 +281,15 @@ memif_init_regions_and_queues (memif_if_t * mif) mif->run.buffer_size * (1 << mif->run.log2_ring_size) * (mif->run.num_s2m_rings + mif->run.num_m2s_rings); - if ((r->fd = memfd_create ("memif region 0", MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (r->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - return clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); + alloc.name = "memif region"; + alloc.size = r->region_size; + alloc.flags = CLIB_MEM_VM_F_SHARED; - if ((ftruncate (r->fd, r->region_size)) == -1) - return clib_error_return_unix (0, "ftruncate"); + err = clib_mem_vm_ext_alloc (&alloc); + if (err) + return err; - if ((r->shm = mmap (NULL, r->region_size, PROT_READ | PROT_WRITE, - MAP_SHARED, r->fd, 0)) == MAP_FAILED) - return clib_error_return_unix (0, "mmap"); + r->fd = alloc.fd; for (i = 0; i < mif->run.num_s2m_rings; i++) { diff --git a/src/vlib.am b/src/vlib.am index 41d68690..067e4afc 100644 --- a/src/vlib.am +++ b/src/vlib.am @@ -13,7 +13,7 @@ lib_LTLIBRARIES += libvlib.la -libvlib_la_LIBADD = libvppinfra.la -ldl -lpthread -lnuma +libvlib_la_LIBADD = libvppinfra.la -ldl -lpthread libvlib_la_DEPENDENCIES = libvppinfra.la BUILT_SOURCES += vlib/config.h @@ -34,7 +34,6 @@ libvlib_la_SOURCES = \ vlib/init.c \ vlib/linux/pci.c \ vlib/linux/physmem.c \ - vlib/linux/sysfs.c \ vlib/main.c \ vlib/mc.c \ vlib/node.c \ @@ -60,8 +59,6 @@ nobase_include_HEADERS += \ vlib/global_funcs.h \ vlib/i2c.h \ vlib/init.h \ - vlib/linux/sysfs.h \ - vlib/linux/syscall.h \ vlib/main.h \ vlib/mc.h \ vlib/node_funcs.h \ diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index 4ce19190..790f168a 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -37,10 +37,11 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include #include #include -#include #include #include @@ -104,7 +105,7 @@ vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) format_vlib_pci_addr, &d->bus_address); s = format (s, "%v/driver%c", dev_dir_name, 0); - driver_name = vlib_sysfs_link_to_name ((char *) s); + driver_name = clib_sysfs_link_to_name ((char *) s); vec_reset_length (s); if (driver_name && @@ -183,32 +184,32 @@ vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) vec_reset_length (s); s = format (s, "%v/driver/unbind%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); vec_reset_length (s); s = format (s, "%v/driver_override%c", dev_dir_name, 0); if (access ((char *) s, F_OK) == 0) { - vlib_sysfs_write ((char *) s, "%s", uio_driver_name); + clib_sysfs_write ((char *) s, "%s", uio_driver_name); clear_driver_override = 1; } else { vec_reset_length (s); s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, + clib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, d->device_id); } vec_reset_length (s); s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); vec_reset_length (s); if (clear_driver_override) { s = format (s, "%v/driver_override%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%c", 0); + clib_sysfs_write ((char *) s, "%c", 0); vec_reset_length (s); } @@ -602,28 +603,28 @@ scan_device (void *arg, u8 * dev_dir_name, u8 * ignored) dev->numa_node = -1; vec_reset_length (f); f = format (f, "%v/numa_node%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "%u", &dev->numa_node); + clib_sysfs_read ((char *) f, "%u", &dev->numa_node); vec_reset_length (f); f = format (f, "%v/class%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); + clib_sysfs_read ((char *) f, "0x%x", &tmp); dev->device_class = tmp >> 8; vec_reset_length (f); f = format (f, "%v/vendor%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); + clib_sysfs_read ((char *) f, "0x%x", &tmp); dev->vendor_id = tmp; vec_reset_length (f); f = format (f, "%v/device%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); + clib_sysfs_read ((char *) f, "0x%x", &tmp); dev->device_id = tmp; error = init_device (vm, dev, &pdev); vec_reset_length (f); f = format (f, "%v/driver%c", dev_dir_name, 0); - dev->driver_name = vlib_sysfs_link_to_name ((char *) f); + dev->driver_name = clib_sysfs_link_to_name ((char *) f); done: vec_free (f); diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c index d8c5dc9b..3cc42a06 100644 --- a/src/vlib/linux/physmem.c +++ b/src/vlib/linux/physmem.c @@ -43,14 +43,12 @@ #include #include #include -#include -#include +#include +#include #include #include #include -#include -#include static void * unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, @@ -111,31 +109,6 @@ unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) mheap_put (pr->heap, x - pr->heap); } -static u64 -get_page_paddr (int fd, uword addr) -{ - int pagesize = sysconf (_SC_PAGESIZE); - u64 seek, pagemap = 0; - - seek = ((u64) addr / pagesize) * sizeof (u64); - if (lseek (fd, seek, SEEK_SET) != seek) - { - clib_unix_warning ("lseek to 0x%llx", seek); - return 0; - } - if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) - { - clib_unix_warning ("read ptbits"); - return 0; - } - if ((pagemap & (1ULL << 63)) == 0) - return 0; - - pagemap &= pow2_mask (55); - - return pagemap * pagesize; -} - static clib_error_t * unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, u8 numa_node, u32 flags, @@ -144,13 +117,8 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, vlib_physmem_main_t *vpm = &vm->physmem_main; vlib_physmem_region_t *pr; clib_error_t *error = 0; - int pagemap_fd = -1; - u8 *mount_dir = 0; - u8 *filename = 0; - struct stat st; - int old_mpol; - int mmap_flags; - struct bitmask *old_mask = numa_allocate_nodemask (); + clib_mem_vm_alloc_t alloc = { 0 }; + if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0) return clib_error_return (0, "not allowed"); @@ -163,113 +131,32 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, goto error; } - pr->index = pr - vpm->regions; - pr->fd = -1; - pr->flags = flags; - - if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) - == -1) - { - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = clib_error_return_unix (0, "get_mempolicy"); - goto error; - } - else - old_mpol = -1; - } + alloc.name = name; + alloc.size = size; + alloc.numa_node = numa_node; + alloc.flags = CLIB_MEM_VM_F_SHARED; if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) { - if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) - { - error = clib_error_return_unix (0, "open '/proc/self/pagemap'"); - goto error; - } - - mount_dir = format (0, "%s/physmem_region%d%c", - vlib_unix_get_runtime_dir (), pr->index, 0); - filename = format (0, "%s/mem%c", mount_dir, 0); - - unlink ((char *) mount_dir); - - error = vlib_unix_recursive_mkdir ((char *) mount_dir); - if (error) - goto error; - - if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) - { - error = clib_error_return_unix (0, "mount hugetlb directory '%s'", - mount_dir); - goto error; - } - - if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) - { - error = clib_error_return_unix (0, "open"); - goto error; - } - - mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED; + alloc.flags |= CLIB_MEM_VM_F_HUGETLB; + alloc.flags |= CLIB_MEM_VM_F_HUGETLB_PREALLOC; + alloc.flags |= CLIB_MEM_VM_F_NUMA_FORCE; } else { - if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - error = - clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); - goto error; - } - mmap_flags = MAP_SHARED; - } - - if (fstat (pr->fd, &st)) - { - error = clib_error_return_unix (0, "fstat"); - goto error; - } - - pr->log2_page_size = min_log2 (st.st_blksize); - pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1; - size = pr->n_pages * (1 << pr->log2_page_size); - - if ((ftruncate (pr->fd, size)) == -1) - { - error = clib_error_return_unix (0, "ftruncate length: %d", size); - goto error; - } - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = vlib_sysfs_prealloc_hugepages (numa_node, - 1 << (pr->log2_page_size - 10), - pr->n_pages); - if (error) - goto error; - } - - if (old_mpol != -1) - numa_set_preferred (numa_node); - - pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); - - if (pr->mem == MAP_FAILED) - { - pr->mem = 0; - error = clib_error_return_unix (0, "mmap"); - goto error; + alloc.flags |= CLIB_MEM_VM_F_NUMA_PREFER; } - if (old_mpol != -1 && - set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) - { - error = clib_error_return_unix (0, "set_mempolicy"); - goto error; - } + error = clib_mem_vm_ext_alloc (&alloc); + if (error) + goto error; + pr->index = pr - vpm->regions; + pr->flags = flags; + pr->fd = alloc.fd; + pr->mem = alloc.addr; + pr->log2_page_size = alloc.log2_page_size; + pr->n_pages = alloc.n_pages; pr->size = pr->n_pages << pr->log2_page_size; pr->page_mask = (1 << pr->log2_page_size) - 1; pr->numa_node = numa_node; @@ -285,13 +172,14 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, move_pages (0, 1, &ptr, 0, &node, 0); if (numa_node != node) { - clib_warning - ("physmem page for region \'%s\' allocated on the wrong" - " numa node (requested %u actual %u)", pr->name, - pr->numa_node, node, i); + clib_warning ("physmem page for region \'%s\' allocated on the" + " wrong numa node (requested %u actual %u)", + pr->name, pr->numa_node, node, i); break; } } + pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size, + pr->n_pages); } if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) @@ -309,41 +197,13 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, *idx = pr->index; - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - int i; - for (i = 0; i < pr->n_pages; i++) - { - uword vaddr = - pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size); - u64 page_paddr = get_page_paddr (pagemap_fd, vaddr); - vec_add1 (pr->page_table, page_paddr); - } - } - goto done; error: - if (pr->fd > -1) - close (pr->fd); - - if (pr->mem) - munmap (pr->mem, size); - memset (pr, 0, sizeof (*pr)); pool_put (vpm->regions, pr); done: - if (mount_dir) - { - umount2 ((char *) mount_dir, MNT_DETACH); - rmdir ((char *) mount_dir); - vec_free (mount_dir); - } - numa_free_cpumask (old_mask); - vec_free (filename); - if (pagemap_fd > -1) - close (pagemap_fd); return error; } diff --git a/src/vlib/linux/syscall.h b/src/vlib/linux/syscall.h deleted file mode 100644 index 9e37997e..00000000 --- a/src/vlib/linux/syscall.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_linux_syscall_h -#define included_linux_syscall_h - -#ifndef __NR_memfd_create -#if defined __x86_64__ -#define __NR_memfd_create 319 -#elif defined __arm__ -#define __NR_memfd_create 385 -#elif defined __aarch64__ -#define __NR_memfd_create 279 -#else -#error "__NR_memfd_create unknown for this architecture" -#endif -#endif - -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} - -#ifndef F_LINUX_SPECIFIC_BASE -#define F_LINUX_SPECIFIC_BASE 1024 -#endif -#define MFD_ALLOW_SEALING 0x0002U -#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) - -#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -#define F_SEAL_GROW 0x0004 /* prevent file from growing */ -#define F_SEAL_WRITE 0x0008 /* prevent writes */ - - -#endif /* included_linux_syscall_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/linux/sysfs.c b/src/vlib/linux/sysfs.c deleted file mode 100644 index f92f9ef5..00000000 --- a/src/vlib/linux/sysfs.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include -#include - -clib_error_t * -vlib_sysfs_write (char *file_name, char *fmt, ...) -{ - u8 *s; - int fd; - clib_error_t *error = 0; - - fd = open (file_name, O_WRONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - va_list va; - va_start (va, fmt); - s = va_format (0, fmt, &va); - va_end (va); - - if (write (fd, s, vec_len (s)) < 0) - error = clib_error_return_unix (0, "write `%s'", file_name); - - vec_free (s); - close (fd); - return error; -} - -clib_error_t * -vlib_sysfs_read (char *file_name, char *fmt, ...) -{ - unformat_input_t input; - u8 *s = 0; - int fd; - ssize_t sz; - uword result; - - fd = open (file_name, O_RDONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - vec_validate (s, 4095); - - sz = read (fd, s, vec_len (s)); - if (sz < 0) - { - close (fd); - vec_free (s); - return clib_error_return_unix (0, "read `%s'", file_name); - } - - _vec_len (s) = sz; - unformat_init_vector (&input, s); - - va_list va; - va_start (va, fmt); - result = va_unformat (&input, fmt, &va); - va_end (va); - - vec_free (s); - close (fd); - - if (result == 0) - return clib_error_return (0, "unformat error"); - - return 0; -} - -u8 * -vlib_sysfs_link_to_name (char *link) -{ - char *p, buffer[64]; - unformat_input_t in; - u8 *s = 0; - int r; - - r = readlink (link, buffer, sizeof (buffer) - 1); - - if (r < 0) - return 0; - - buffer[r] = 0; - p = strrchr (buffer, '/'); - - if (!p) - return 0; - - unformat_init_string (&in, p + 1, strlen (p + 1)); - if (unformat (&in, "%s", &s) != 1) - clib_unix_warning ("no string?"); - unformat_free (&in); - - return s; -} - -clib_error_t * -vlib_sysfs_set_nr_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); - vlib_sysfs_write ((char *) p, "%d", nr); - -done: - vec_free (p); - return error; -} - - -static clib_error_t * -vlib_sysfs_get_xxx_hugepages (char *type, unsigned int numa_node, - int page_size, int *val) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, - type, 0); - error = vlib_sysfs_read ((char *) p, "%d", val); - -done: - vec_free (p); - return error; -} - -clib_error_t * -vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_nr_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, int page_size, - int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_prealloc_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - int n, needed; - error = vlib_sysfs_get_free_hugepages (numa_node, page_size, &n); - if (error) - return error; - needed = nr - n; - if (needed <= 0) - return 0; - - error = vlib_sysfs_get_nr_hugepages (numa_node, page_size, &n); - if (error) - return error; - clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", - needed, page_size, numa_node); - return vlib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); -} - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/linux/sysfs.h b/src/vlib/linux/sysfs.h deleted file mode 100644 index 14b71317..00000000 --- a/src/vlib/linux/sysfs.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_linux_sysfs_h -#define included_linux_sysfs_h - -clib_error_t *vlib_sysfs_write (char *file_name, char *fmt, ...); - -clib_error_t *vlib_sysfs_read (char *file_name, char *fmt, ...); - -u8 *vlib_sysfs_link_to_name (char *link); - -clib_error_t *vlib_sysfs_set_nr_hugepages (unsigned int numa_node, - int page_size, int nr); -clib_error_t *vlib_sysfs_get_nr_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_free_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_prealloc_hugepages (unsigned int numa_node, - int page_size, int nr); - -#endif /* included_linux_sysfs_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 2d9ce84a..f9c7043c 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -289,7 +289,7 @@ sort_registrations_by_no_clone (void *a0, void *a1) } static uword * -vlib_sysfs_list_to_bitmap (char *filename) +clib_sysfs_list_to_bitmap (char *filename) { FILE *fp; uword *r = 0; @@ -331,9 +331,9 @@ vlib_thread_init (vlib_main_t * vm) /* get bitmaps of active cpu cores and sockets */ tm->cpu_core_bitmap = - vlib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online"); + clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online"); tm->cpu_socket_bitmap = - vlib_sysfs_list_to_bitmap ("/sys/devices/system/node/online"); + clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online"); avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap); diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index f8d5d8f9..02bdea5c 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -15,10 +15,10 @@ #define _GNU_SOURCE #include +#include #include #include -#include #include static u8 * @@ -98,14 +98,14 @@ show_threads_fn (vlib_main_t * vm, u8 *p = 0; p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, lcore, 0); - vlib_sysfs_read ((char *) p, "%d", &core_id); + clib_sysfs_read ((char *) p, "%d", &core_id); vec_reset_length (p); p = format (p, "%s%u/topology/physical_package_id%c", sys_cpu_path, lcore, 0); - vlib_sysfs_read ((char *) p, "%d", &socket_id); + clib_sysfs_read ((char *) p, "%d", &socket_id); vec_free (p); line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id); diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 62bb228f..32696014 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -24,9 +24,9 @@ #include #include +#include #include #include -#include #include #include @@ -75,7 +75,7 @@ af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, { s = format (0, "/sys/class/net/%s/mtu%c", apif->host_if_name, 0); - error = vlib_sysfs_write ((char *) s, "%d", hi->max_packet_bytes); + error = clib_sysfs_write ((char *) s, "%d", hi->max_packet_bytes); vec_free (s); if (error) diff --git a/src/vppinfra.am b/src/vppinfra.am index a5769a0d..daca9954 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -188,6 +188,8 @@ nobase_include_HEADERS = \ vppinfra/graph.h \ vppinfra/hash.h \ vppinfra/heap.h \ + vppinfra/linux/sysfs.h \ + vppinfra/linux/syscall.h \ vppinfra/lock.h \ vppinfra/longjmp.h \ vppinfra/macros.h \ @@ -233,7 +235,6 @@ nobase_include_HEADERS = \ vppinfra/vector_neon.h \ vppinfra/vector_sse2.h \ vppinfra/valgrind.h \ - vppinfra/vm_unix.h \ vppinfra/xxhash.h \ vppinfra/xy.h \ vppinfra/zvec.h @@ -291,6 +292,8 @@ CLIB_CORE = \ libvppinfra_la_SOURCES = \ $(CLIB_CORE) \ vppinfra/elf_clib.c \ + vppinfra/linux/mem.c \ + vppinfra/linux/sysfs.c \ vppinfra/socket.c \ vppinfra/timer.c \ vppinfra/unix-formats.c \ diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c new file mode 100644 index 00000000..665ddf61 --- /dev/null +++ b/src/vppinfra/linux/mem.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +int +clib_mem_vm_get_log2_page_size (int fd) +{ + struct stat st = { 0 }; + if (fstat (fd, &st)) + return 0; + return min_log2 (st.st_blksize); +} + +clib_error_t * +clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a) +{ + int fd = -1; + clib_error_t *err = 0; + void *addr = 0; + u8 *filename = 0; + int mmap_flags = MAP_SHARED; + int log2_page_size; + int n_pages; + int old_mpol = -1; + u64 old_mask[16] = { 0 }; + + /* save old numa mem policy if needed */ + if (a->flags & (CLIB_MEM_VM_F_NUMA_PREFER | CLIB_MEM_VM_F_NUMA_FORCE)) + { + int rv; + rv = + get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0, 0); + + if (rv == -1) + { + if ((a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0) + { + err = clib_error_return_unix (0, "get_mempolicy"); + goto error; + } + else + old_mpol = -1; + } + } + + /* if we are creating shared segment, we need file descriptor */ + if (a->flags & CLIB_MEM_VM_F_SHARED) + { + /* if hugepages are needed we need to create mount point */ + if (a->flags & CLIB_MEM_VM_F_HUGETLB) + { + char *mount_dir; + char template[] = "/tmp/hugepage_mount.XXXXXX"; + + mount_dir = mkdtemp (template); + if (mount_dir == 0) + return clib_error_return_unix (0, "mkdtemp \'%s\'", template); + + if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) + { + err = clib_error_return_unix (0, "mount hugetlb directory '%s'", + mount_dir); + goto error; + } + + filename = format (0, "%s/%s%c", mount_dir, a->name, 0); + + if ((fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) + { + err = clib_error_return_unix (0, "open"); + goto error; + } + umount2 ((char *) mount_dir, MNT_DETACH); + rmdir ((char *) mount_dir); + mmap_flags |= MAP_LOCKED; + } + else + { + if ((fd = memfd_create (a->name, MFD_ALLOW_SEALING)) == -1) + { + err = clib_error_return_unix (0, "memfd_create"); + goto error; + } + + if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + { + err = clib_error_return_unix (0, "fcntl (F_ADD_SEALS)"); + goto error; + } + } + log2_page_size = clib_mem_vm_get_log2_page_size (fd); + } + else /* not CLIB_MEM_VM_F_SHARED */ + { + if (a->flags & CLIB_MEM_VM_F_HUGETLB) + { + mmap_flags |= MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS; + log2_page_size = 21; + } + else + { + mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS; + log2_page_size = min_log2 (sysconf (_SC_PAGESIZE)); + } + } + + n_pages = ((a->size - 1) >> log2_page_size) + 1; + + + if (a->flags & CLIB_MEM_VM_F_HUGETLB_PREALLOC) + { + err = clib_sysfs_prealloc_hugepages (a->numa_node, + 1 << (log2_page_size - 10), + n_pages); + if (err) + goto error; + + } + + if (fd != -1) + if ((ftruncate (fd, a->size)) == -1) + { + err = clib_error_return_unix (0, "ftruncate"); + goto error; + } + + if (old_mpol != -1) + { + int rv; + u64 mask[16] = { 0 }; + mask[0] = 1 << a->numa_node; + rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1); + if (rv) + { + err = clib_error_return_unix (0, "set_mempolicy"); + goto error; + } + } + + addr = mmap (0, a->size, (PROT_READ | PROT_WRITE), mmap_flags, fd, 0); + if (addr == MAP_FAILED) + { + err = clib_error_return_unix (0, "mmap"); + goto error; + } + + /* re-apply ole numa memory policy */ + if (old_mpol != -1 && + set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1) == -1) + { + err = clib_error_return_unix (0, "set_mempolicy"); + goto error; + } + + a->log2_page_size = log2_page_size; + a->n_pages = n_pages; + a->addr = addr; + a->fd = fd; + goto done; + +error: + if (fd != -1) + close (fd); + +done: + vec_free (filename); + return err; +} + +u64 * +clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages) +{ + int pagesize = sysconf (_SC_PAGESIZE); + int fd; + int i; + u64 *r = 0; + + if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) + return 0; + + for (i = 0; i < n_pages; i++) + { + u64 seek, pagemap = 0; + uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size); + seek = ((u64) vaddr / pagesize) * sizeof (u64); + if (lseek (fd, seek, SEEK_SET) != seek) + goto done; + + if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) + goto done; + + if ((pagemap & (1ULL << 63)) == 0) + goto done; + + pagemap &= pow2_mask (55); + vec_add1 (r, pagemap * pagesize); + } + +done: + close (fd); + if (vec_len (r) != n_pages) + { + vec_free (r); + return 0; + } + return r; +} + + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/linux/syscall.h b/src/vppinfra/linux/syscall.h new file mode 100644 index 00000000..f8ec5919 --- /dev/null +++ b/src/vppinfra/linux/syscall.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_syscall_h +#define included_linux_syscall_h + +#include +#include + +static inline long +set_mempolicy (int mode, const unsigned long *nodemask, unsigned long maxnode) +{ + return syscall (__NR_set_mempolicy, mode, nodemask, maxnode); +} + +static inline int +get_mempolicy (int *mode, unsigned long *nodemask, unsigned long maxnode, + void *addr, unsigned long flags) +{ + return syscall (__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags); +} + +static inline long +move_pages (int pid, unsigned long count, void **pages, const int *nodes, + int *status, int flags) +{ + return syscall (__NR_move_pages, pid, count, pages, nodes, status, flags); +} + +static inline int +memfd_create (const char *name, unsigned int flags) +{ + return syscall (__NR_memfd_create, name, flags); +} + +#endif /* included_linux_syscall_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/linux/sysfs.c b/src/vppinfra/linux/sysfs.c new file mode 100644 index 00000000..5f611e6a --- /dev/null +++ b/src/vppinfra/linux/sysfs.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +clib_error_t * +clib_sysfs_write (char *file_name, char *fmt, ...) +{ + u8 *s; + int fd; + clib_error_t *error = 0; + + fd = open (file_name, O_WRONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + va_list va; + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + if (write (fd, s, vec_len (s)) < 0) + error = clib_error_return_unix (0, "write `%s'", file_name); + + vec_free (s); + close (fd); + return error; +} + +clib_error_t * +clib_sysfs_read (char *file_name, char *fmt, ...) +{ + unformat_input_t input; + u8 *s = 0; + int fd; + ssize_t sz; + uword result; + + fd = open (file_name, O_RDONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + vec_validate (s, 4095); + + sz = read (fd, s, vec_len (s)); + if (sz < 0) + { + close (fd); + vec_free (s); + return clib_error_return_unix (0, "read `%s'", file_name); + } + + _vec_len (s) = sz; + unformat_init_vector (&input, s); + + va_list va; + va_start (va, fmt); + result = va_unformat (&input, fmt, &va); + va_end (va); + + vec_free (s); + close (fd); + + if (result == 0) + return clib_error_return (0, "unformat error"); + + return 0; +} + +u8 * +clib_sysfs_link_to_name (char *link) +{ + char *p, buffer[64]; + unformat_input_t in; + u8 *s = 0; + int r; + + r = readlink (link, buffer, sizeof (buffer) - 1); + + if (r < 0) + return 0; + + buffer[r] = 0; + p = strrchr (buffer, '/'); + + if (!p) + return 0; + + unformat_init_string (&in, p + 1, strlen (p + 1)); + if (unformat (&in, "%s", &s) != 1) + clib_unix_warning ("no string?"); + unformat_free (&in); + + return s; +} + +clib_error_t * +clib_sysfs_set_nr_hugepages (int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); + clib_sysfs_write ((char *) p, "%d", nr); + +done: + vec_free (p); + return error; +} + + +static clib_error_t * +clib_sysfs_get_xxx_hugepages (char *type, int numa_node, + int page_size, int *val) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, + type, 0); + error = clib_sysfs_read ((char *) p, "%d", val); + +done: + vec_free (p); + return error; +} + +clib_error_t * +clib_sysfs_get_free_hugepages (int numa_node, int page_size, int *v) +{ + return clib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); +} + +clib_error_t * +clib_sysfs_get_nr_hugepages (int numa_node, int page_size, int *v) +{ + return clib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); +} + +clib_error_t * +clib_sysfs_get_surplus_hugepages (int numa_node, int page_size, int *v) +{ + return clib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); +} + +clib_error_t * +clib_sysfs_prealloc_hugepages (int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + int n, needed; + error = clib_sysfs_get_free_hugepages (numa_node, page_size, &n); + if (error) + return error; + needed = nr - n; + if (needed <= 0) + return 0; + + error = clib_sysfs_get_nr_hugepages (numa_node, page_size, &n); + if (error) + return error; + clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", + needed, page_size, numa_node); + return clib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/linux/sysfs.h b/src/vppinfra/linux/sysfs.h new file mode 100644 index 00000000..6c80cf95 --- /dev/null +++ b/src/vppinfra/linux/sysfs.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_sysfs_h +#define included_linux_sysfs_h + +#include + +clib_error_t *clib_sysfs_write (char *file_name, char *fmt, ...); + +clib_error_t *clib_sysfs_read (char *file_name, char *fmt, ...); + +u8 *clib_sysfs_link_to_name (char *link); + +clib_error_t *clib_sysfs_set_nr_hugepages (int numa_node, + int page_size, int nr); +clib_error_t *clib_sysfs_get_nr_hugepages (int numa_node, + int page_size, int *v); +clib_error_t *clib_sysfs_get_free_hugepages (int numa_node, + int page_size, int *v); +clib_error_t *clib_sysfs_get_surplus_hugepages (int numa_node, + int page_size, int *v); +clib_error_t *clib_sysfs_prealloc_hugepages (int numa_node, + int page_size, int nr); + +#endif /* included_linux_sysfs_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index 63c5ac16..69ab8803 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -39,8 +39,11 @@ #define _included_clib_mem_h #include +#include +#include #include /* uword, etc */ +#include #include #include #include /* memcpy, memset */ @@ -264,19 +267,90 @@ void clib_mem_usage (clib_mem_usage_t * usage); u8 *format_clib_mem_usage (u8 * s, va_list * args); -/* Include appropriate VM functions depending on whether - we are compiling for linux kernel, for Unix or standalone. */ -#ifdef CLIB_LINUX_KERNEL -#include -#endif +/* Allocate virtual address space. */ +always_inline void * +clib_mem_vm_alloc (uword size) +{ + void *mmap_addr; + uword flags = MAP_PRIVATE; -#ifdef CLIB_UNIX -#include +#ifdef MAP_ANONYMOUS + flags |= MAP_ANONYMOUS; #endif -#ifdef CLIB_STANDALONE -#include -#endif + mmap_addr = mmap (0, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (mmap_addr == (void *) -1) + mmap_addr = 0; + + return mmap_addr; +} + +always_inline void +clib_mem_vm_free (void *addr, uword size) +{ + munmap (addr, size); +} + +always_inline void * +clib_mem_vm_unmap (void *addr, uword size) +{ + void *mmap_addr; + uword flags = MAP_PRIVATE | MAP_FIXED; + + /* To unmap we "map" with no protection. If we actually called + munmap then other callers could steal the address space. By + changing to PROT_NONE the kernel can free up the pages which is + really what we want "unmap" to mean. */ + mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0); + if (mmap_addr == (void *) -1) + mmap_addr = 0; + + return mmap_addr; +} + +always_inline void * +clib_mem_vm_map (void *addr, uword size) +{ + void *mmap_addr; + uword flags = MAP_PRIVATE | MAP_FIXED; + + mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0); + if (mmap_addr == (void *) -1) + mmap_addr = 0; + + return mmap_addr; +} + +typedef struct +{ +#define CLIB_MEM_VM_F_SHARED (1 << 0) +#define CLIB_MEM_VM_F_HUGETLB (1 << 1) +#define CLIB_MEM_VM_F_NUMA_PREFER (1 << 2) +#define CLIB_MEM_VM_F_NUMA_FORCE (1 << 3) +#define CLIB_MEM_VM_F_HUGETLB_PREALLOC (1 << 4) + u32 flags; /**< vm allocation flags: +
CLIB_MEM_VM_F_SHARED: request shared memory, file + destiptor will be provided on successful allocation. +
CLIB_MEM_VM_F_HUGETLB: request hugepages. +
CLIB_MEM_VM_F_NUMA_PREFER: numa_node field contains valid + numa node preference. +
CLIB_MEM_VM_F_NUMA_FORCE: fail if setting numa policy fails. +
CLIB_MEM_VM_F_HUGETLB_PREALLOC: pre-allocate hugepages if + number of available pages is not sufficient. + */ + char *name; /**< Name for memory allocation, set by caller. */ + uword size; /**< Allocation size, set by caller. */ + int numa_node; /**< numa node preference. Valid if CLIB_MEM_VM_F_NUMA_PREFER set. */ + void *addr; /**< Pointer to allocated memory, set on successful allocation. */ + int fd; /**< File desriptor, set on successful allocation if CLIB_MEM_VM_F_SHARED is set. */ + int log2_page_size; /* Page size in log2 format, set on successful allocation. */ + int n_pages; /* Number of pages. */ +} clib_mem_vm_alloc_t; + +clib_error_t *clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a); +int clib_mem_vm_get_log2_page_size (int fd); +u64 *clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages); + #include /* clib_panic */ diff --git a/src/vppinfra/vm_linux_kernel.h b/src/vppinfra/vm_linux_kernel.h deleted file mode 100644 index fd9e6148..00000000 --- a/src/vppinfra/vm_linux_kernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_vm_linux_kernel_h -#define included_vm_linux_kernel_h - -#include -#include /* for GFP_* */ -#include /* for PAGE_KERNEL */ - -/* Allocate virtual address space. */ -always_inline void * -clib_mem_vm_alloc (uword size) -{ - return vmalloc (size); -} - -always_inline void -clib_mem_vm_free (void *addr, uword size) -{ - vfree (addr); -} - -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - return 0; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - return addr; -} - -#endif /* included_vm_linux_kernel_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/vm_standalone.h b/src/vppinfra/vm_standalone.h deleted file mode 100644 index 2cd431bc..00000000 --- a/src/vppinfra/vm_standalone.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_vm_standalone_h -#define included_vm_standalone_h - -/* Stubs for standalone "system" which has no VM support. */ - -always_inline void * -clib_mem_vm_alloc (uword size) -{ - return 0; -} - -always_inline void -clib_mem_vm_free (void *addr, uword size) -{ -} - -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - return 0; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - return addr; -} - -#endif /* included_vm_standalone_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/vm_unix.h b/src/vppinfra/vm_unix.h deleted file mode 100644 index 07e86516..00000000 --- a/src/vppinfra/vm_unix.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_vm_unix_h -#define included_vm_unix_h - -#include -#include - -/* Allocate virtual address space. */ -always_inline void * -clib_mem_vm_alloc (uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE; - -#ifdef MAP_ANONYMOUS - flags |= MAP_ANONYMOUS; -#endif - - mmap_addr = mmap (0, size, PROT_READ | PROT_WRITE, flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - - return mmap_addr; -} - -always_inline void -clib_mem_vm_free (void *addr, uword size) -{ - munmap (addr, size); -} - -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE | MAP_FIXED; - - /* To unmap we "map" with no protection. If we actually called - munmap then other callers could steal the address space. By - changing to PROT_NONE the kernel can free up the pages which is - really what we want "unmap" to mean. */ - mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - - return mmap_addr; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE | MAP_FIXED; - - mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - - return mmap_addr; -} - -#endif /* included_vm_unix_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ -- cgit 1.2.3-korg From b2bcad6238b7e8a669ae29c74079eb9bb9fbb694 Mon Sep 17 00:00:00 2001 From: Chris Luke Date: Mon, 18 Sep 2017 08:51:22 -0400 Subject: Fixes for issues Coverity has reported (VPP-972) 177117: fstat() returns -1 on error; the code is checking for any positive value instead 175142: final return could never be reached; simple refactoring 175235,175236: Warning suppressed with an explicit cast to (void) 174817: Final return couldn't be reached; is is_in_order is 0 then 'rv' is already returned above 172095,172093: If is_is_set does not get set to 1, then return 0 has already been invoked 174405: Re-kill this (nothing sets rv) 171136: Looks like a cmd line flag to set test_bytes was missing; added it, and refactored the argc/argv processing to avoid two other potential segv's 176813: Add range checking for term width/height. First stab at a reasonable range is 1-512 for both. 175350: Fix implicit casting in shift operation 174272: Not a c+p error; try using a coverity annotation to ignore it 174273,175320: Annotated FORWARD_NULL Change-Id: I58d0f860fc2209f59f8d1b6b344d631b8d429ace Signed-off-by: Chris Luke --- src/uri/sock_test_client.c | 1 + src/uri/uri_socket_test.c | 24 ++++++++++++++++++++++-- src/uri/vppcom.c | 8 ++++++-- src/vlib/linux/physmem.c | 2 +- src/vlib/unix/cli.c | 30 ++++++++++++++++++++++++++++++ src/vnet/mpls/mpls_api.c | 2 +- src/vnet/session/session.c | 5 +---- src/vnet/session/session_cli.c | 37 +++++++------------------------------ src/vnet/session/session_lookup.c | 8 ++++---- src/vppinfra/linux/mem.c | 2 +- 10 files changed, 74 insertions(+), 45 deletions(-) (limited to 'src/vlib/linux/physmem.c') diff --git a/src/uri/sock_test_client.c b/src/uri/sock_test_client.c index ab8e5a0e..151c90b2 100644 --- a/src/uri/sock_test_client.c +++ b/src/uri/sock_test_client.c @@ -429,6 +429,7 @@ exit_client (void) tsock = &scm->test_socket[i]; tsock->cfg.test = SOCK_TEST_TYPE_EXIT; + /* coverity[COPY_PASTE_ERROR] */ if (ctrl->cfg.verbose) { printf ("\nCLIENT (fd %d): Sending exit cfg to server...\n", diff --git a/src/uri/uri_socket_test.c b/src/uri/uri_socket_test.c index 5f7084d5..4469b03d 100644 --- a/src/uri/uri_socket_test.c +++ b/src/uri/uri_socket_test.c @@ -36,8 +36,6 @@ main (int argc, char *argv[]) if (argc >= 3) { - bytes = ((long) atoi (argv[4])) << 20; - no_echo = atoi (argv[3]); portno = atoi (argv[2]); server = gethostbyname (argv[1]); if (server == NULL) @@ -45,6 +43,28 @@ main (int argc, char *argv[]) clib_unix_warning ("gethostbyname"); exit (1); } + + argc -= 3; + argv += 3; + + if (argc) + { + bytes = ((long) atoi (argv[0])) << 20; + argc--; + argv++; + } + if (argc) + { + no_echo = atoi (argv[0]); + argc--; + argv++; + } + if (argc) + { + test_bytes = atoi (argv[0]); + argc--; + argv++; + } } else { diff --git a/src/uri/vppcom.c b/src/uri/vppcom.c index 8a8a806c..c7ae0ea5 100644 --- a/src/uri/vppcom.c +++ b/src/uri/vppcom.c @@ -1478,7 +1478,7 @@ vppcom_cfg_read (char *conf_fname) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - unformat_user (input, unformat_line_input, line_input); + (void) unformat_user (input, unformat_line_input, line_input); unformat_skip_white_space (line_input); if (unformat (line_input, "vppcom {")) @@ -2359,12 +2359,14 @@ vppcom_select (unsigned long n_bits, unsigned long *read_map, clib_bitmap_get (vcm->ex_bitmap, session_index) && (rv < 0)) { // TBD: clib_warning + /* coverity[FORWARD_NULL] */ clib_bitmap_set_no_check (except_map, session_index, 1); bits_set++; } else if (rv > 0) { // TBD: clib_warning + /* coverity[FORWARD_NULL] */ clib_bitmap_set_no_check (read_map, session_index, 1); bits_set++; } @@ -2387,9 +2389,10 @@ vppcom_select (unsigned long n_bits, unsigned long *read_map, rv = vppcom_session_write_ready (session, session_index); clib_spinlock_unlock (&vcm->sessions_lockp); - if (rv > 0) + if (rv > 0 ) { // TBD: clib_warning + /* coverity[FORWARD_NULL] */ clib_bitmap_set_no_check (write_map, session_index, 1); bits_set++; } @@ -2415,6 +2418,7 @@ vppcom_select (unsigned long n_bits, unsigned long *read_map, if (rv < 0) { // TBD: clib_warning + /* coverity[FORWARD_NULL] */ clib_bitmap_set_no_check (except_map, session_index, 1); bits_set++; } diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c index 3cc42a06..6d3f7c55 100644 --- a/src/vlib/linux/physmem.c +++ b/src/vlib/linux/physmem.c @@ -157,7 +157,7 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, pr->mem = alloc.addr; pr->log2_page_size = alloc.log2_page_size; pr->n_pages = alloc.n_pages; - pr->size = pr->n_pages << pr->log2_page_size; + pr->size = (u64) pr->n_pages << (u64) pr->log2_page_size; pr->page_mask = (1 << pr->log2_page_size) - 1; pr->numa_node = numa_node; pr->name = format (0, "%s", name); diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 1567cc2a..1624ce38 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -91,6 +91,15 @@ * protocol message. This is a saftey measure. */ #define UNIX_CLI_MAX_DEPTH_TELNET 24 +/** Minimum terminal width we will accept */ +#define UNIX_CLI_MIN_TERMINAL_WIDTH 1 +/** Maximum terminal width we will accept */ +#define UNIX_CLI_MAX_TERMINAL_WIDTH 512 +/** Minimum terminal height we will accept */ +#define UNIX_CLI_MIN_TERMINAL_HEIGHT 1 +/** Maximum terminal height we will accept */ +#define UNIX_CLI_MAX_TERMINAL_HEIGHT 512 + /** Unix standard in */ #define UNIX_CLI_STDIN_FD 0 @@ -1164,10 +1173,21 @@ unix_cli_process_telnet (unix_main_t * um, /* Window size */ if (i != 8) /* check message is correct size */ break; + cf->width = clib_net_to_host_u16 (*((u16 *) (input_vector + 3))); + if (cf->width > UNIX_CLI_MAX_TERMINAL_WIDTH) + cf->width = UNIX_CLI_MAX_TERMINAL_WIDTH; + if (cf->width < UNIX_CLI_MIN_TERMINAL_WIDTH) + cf->width = UNIX_CLI_MIN_TERMINAL_WIDTH; + cf->height = clib_net_to_host_u16 (*((u16 *) (input_vector + 5))); + if (cf->height > UNIX_CLI_MAX_TERMINAL_HEIGHT) + cf->height = UNIX_CLI_MAX_TERMINAL_HEIGHT; + if (cf->height < UNIX_CLI_MIN_TERMINAL_HEIGHT) + cf->height = UNIX_CLI_MIN_TERMINAL_HEIGHT; + /* reindex pager buffer */ unix_cli_pager_reindex (cf); /* redraw page */ @@ -2539,8 +2559,18 @@ unix_cli_resize_interrupt (int signum) /* We can't trust ws.XXX... */ return; } + cf->width = ws.ws_col; + if (cf->width > UNIX_CLI_MAX_TERMINAL_WIDTH) + cf->width = UNIX_CLI_MAX_TERMINAL_WIDTH; + if (cf->width < UNIX_CLI_MIN_TERMINAL_WIDTH) + cf->width = UNIX_CLI_MIN_TERMINAL_WIDTH; + cf->height = ws.ws_row; + if (cf->height > UNIX_CLI_MAX_TERMINAL_HEIGHT) + cf->height = UNIX_CLI_MAX_TERMINAL_HEIGHT; + if (cf->height < UNIX_CLI_MIN_TERMINAL_HEIGHT) + cf->height = UNIX_CLI_MIN_TERMINAL_HEIGHT; /* Reindex the pager buffer */ unix_cli_pager_reindex (cf); diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index 988c2c98..762c40ff 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -96,7 +96,7 @@ vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp) else mpls_table_delete (ntohl (mp->mt_table_id), 1); - rv = (rv == 0) ? vnm->api_errno : rv; + // NB: Nothing sets rv; none of the above returns an error REPLY_MACRO (VL_API_MPLS_TABLE_ADD_DEL_REPLY); } diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 4544f9a0..792e6612 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -267,10 +267,7 @@ stream_session_enqueue_data (transport_connection_t * tc, vlib_buffer_t * b, } } - if (is_in_order) - return enqueued; - - return 0; + return enqueued; } /** Check if we have space in rx fifo to push more bytes */ diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c index d9f516be..8c30a1df 100755 --- a/src/vnet/session/session_cli.c +++ b/src/vnet/session/session_cli.c @@ -127,13 +127,8 @@ unformat_stream_session_id (unformat_input_t * input, va_list * args) *is_ip4 = 0; tuple_is_set = 1; } - else - return 0; - - if (tuple_is_set) - return 1; - return 0; + return tuple_is_set; } uword @@ -144,21 +139,12 @@ unformat_stream_session (unformat_input_t * input, va_list * args) u8 proto = ~0; ip46_address_t lcl, rmt; u32 lcl_port = 0, rmt_port = 0; - u8 is_ip4 = 0, s_type = ~0, id_is_set = 0; + u8 is_ip4 = 0, s_type = ~0; - if (unformat (input, "%U", unformat_stream_session_id, &proto, &lcl, &rmt, - &lcl_port, &rmt_port, &is_ip4)) - { - id_is_set = 1; - } - else + if (!unformat (input, "%U", unformat_stream_session_id, &proto, &lcl, &rmt, + &lcl_port, &rmt_port, &is_ip4)) return 0; - if (!id_is_set) - { - return 0; - } - s_type = session_type_from_proto_and_ip (proto, is_ip4); if (is_ip4) s = stream_session_lookup4 (&lcl.ip4, &rmt.ip4, @@ -185,21 +171,12 @@ unformat_transport_connection (unformat_input_t * input, va_list * args) u8 proto = ~0; ip46_address_t lcl, rmt; u32 lcl_port = 0, rmt_port = 0; - u8 is_ip4 = 0, s_type = ~0, id_is_set = 0; + u8 is_ip4 = 0, s_type = ~0; - if (unformat (input, "%U", unformat_stream_session_id, &proto, &lcl, &rmt, - &lcl_port, &rmt_port, &is_ip4)) - { - id_is_set = 1; - } - else + if (!unformat (input, "%U", unformat_stream_session_id, &proto, &lcl, &rmt, + &lcl_port, &rmt_port, &is_ip4)) return 0; - if (!id_is_set) - { - return 0; - } - proto = (proto == (u8) ~ 0) ? suggested_proto : proto; if (proto == (u8) ~ 0) return 0; diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 0f9abf9a..4487b1c3 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -233,15 +233,15 @@ stream_session_half_open_table_add (transport_connection_t * tc, u64 value) { make_v4_ss_kv_from_tc (&kv4, tc); kv4.value = value; - clib_bihash_add_del_16_8 (&sl->v4_half_open_hash, &kv4, - 1 /* is_add */ ); + (void) clib_bihash_add_del_16_8 (&sl->v4_half_open_hash, &kv4, + 1 /* is_add */ ); } else { make_v6_ss_kv_from_tc (&kv6, tc); kv6.value = value; - clib_bihash_add_del_48_8 (&sl->v6_half_open_hash, &kv6, - 1 /* is_add */ ); + (void) clib_bihash_add_del_48_8 (&sl->v6_half_open_hash, &kv6, + 1 /* is_add */ ); } } diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c index 665ddf61..df46763a 100644 --- a/src/vppinfra/linux/mem.c +++ b/src/vppinfra/linux/mem.c @@ -49,7 +49,7 @@ int clib_mem_vm_get_log2_page_size (int fd) { struct stat st = { 0 }; - if (fstat (fd, &st)) + if (fstat (fd, &st) == -1) return 0; return min_log2 (st.st_blksize); } -- cgit 1.2.3-korg