diff options
Diffstat (limited to 'src/svm')
-rw-r--r-- | src/svm/memfd.c | 137 | ||||
-rw-r--r-- | src/svm/memfd.h | 194 | ||||
-rw-r--r-- | src/svm/svm.c | 193 | ||||
-rw-r--r-- | src/svm/svm_common.h | 2 | ||||
-rw-r--r-- | src/svm/svmdb.c | 4 |
5 files changed, 437 insertions, 93 deletions
diff --git a/src/svm/memfd.c b/src/svm/memfd.c new file mode 100644 index 00000000000..9fe487db788 --- /dev/null +++ b/src/svm/memfd.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "memfd.h" + +int +memfd_master_init (memfd_private_t * memfd, u32 master_index) +{ + int flags; + memfd_shared_header_t *sh; + u64 ticks = clib_cpu_time_now (); + u64 randomize_baseva; + void *oldheap; + + if (memfd->memfd_size == 0) + return MEMFD_API_ERROR_NO_SIZE; + + ASSERT (vec_c_string_is_terminated (memfd->name)); + memfd->name = format (0, "memfd svm region %d", master_index); + + memfd->fd = memfd_create ((char *) memfd->name, MFD_ALLOW_SEALING); + if (memfd->fd < 0) + { + clib_unix_warning ("create segment '%s'", memfd->name); + return MEMFD_API_ERROR_CREATE_FAILURE; + } + + if ((ftruncate (memfd->fd, memfd->memfd_size)) == -1) + { + clib_unix_warning ("set memfd size"); + return MEMFD_API_ERROR_SET_SIZE; + } + + if ((fcntl (memfd->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + clib_unix_warning ("fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); + + flags = MAP_SHARED; + if (memfd->requested_va) + flags |= MAP_FIXED; + + randomize_baseva = (ticks & 15) * MMAP_PAGESIZE; + + if (memfd->requested_va) + memfd->requested_va += randomize_baseva; + + sh = memfd->sh = + (memfd_shared_header_t *) mmap ((void *) memfd->requested_va, + memfd->memfd_size, PROT_READ | PROT_WRITE, + flags, memfd->fd, 0); + + if (memfd->sh == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (memfd->fd); + return MEMFD_API_ERROR_MMAP; + } + + memfd->my_pid = getpid (); + sh->master_pid = memfd->my_pid; + sh->memfd_size = memfd->memfd_size; + sh->heap = mheap_alloc_with_flags + (((u8 *) sh) + MMAP_PAGESIZE, memfd->memfd_size - MMAP_PAGESIZE, + MHEAP_FLAG_DISABLE_VM | MHEAP_FLAG_THREAD_SAFE); + + sh->memfd_va = pointer_to_uword (sh); + sh->master_index = master_index; + + oldheap = memfd_push_heap (sh); + sh->name = format (0, "%s%c", memfd->name, 0); + memfd_pop_heap (oldheap); + + memfd->i_am_master = 1; + + /* The application has to set set sh->ready... */ + return 0; +} + +/* + * Subtly different than svm_slave_init. The caller + * needs to acquire a usable file descriptor for the memfd segment + * e.g. via vppinfra/socket.c:default_socket_recvmsg + */ + +int +memfd_slave_init (memfd_private_t * memfd) +{ + memfd_shared_header_t *sh; + + memfd->i_am_master = 0; + + /* Map the segment once, to look at the shared header */ + sh = (void *) mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + memfd->fd, 0); + if (sh == MAP_FAILED) + { + clib_unix_warning ("slave research mmap"); + close (memfd->fd); + return MEMFD_API_ERROR_MMAP; + } + + memfd->requested_va = (u64) sh->memfd_va; + memfd->memfd_size = sh->memfd_size; + munmap (sh, MMAP_PAGESIZE); + + sh = memfd->sh = + (void *) mmap ((void *) memfd->requested_va, memfd->memfd_size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, memfd->fd, 0); + + if (sh == MAP_FAILED) + { + clib_unix_warning ("slave final mmap"); + close (memfd->fd); + return MEMFD_API_ERROR_MMAP; + } + sh->slave_pid = getpid (); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/memfd.h b/src/svm/memfd.h new file mode 100644 index 00000000000..3ed4a9ab067 --- /dev/null +++ b/src/svm/memfd.h @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_memfd_h__ +#define __included_memfd_h__ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> +/* DGMS, memfd syscall not in glibc... */ +#include <vppinfra/linux/syscall.h> + +#ifndef MMAP_PAGESIZE +#define MMAP_PAGESIZE (clib_mem_get_page_size()) +#endif + +#define MEMFD_N_OPAQUE 7 + +typedef struct +{ + /* Spin-lock */ + volatile u32 lock; + volatile u32 owner_pid; + int recursion_count; + u32 tag; /* for debugging */ + + /* The allocation arena */ + void *heap; + + /* Segment must be mapped at this address, or no supper */ + u64 memfd_va; + /* The actual mmap size */ + u64 memfd_size; + u32 master_pid; + u32 slave_pid; + u8 *name; + void *opaque[MEMFD_N_OPAQUE]; + + /* Set when the master application thinks it's time to make the donuts */ + volatile u32 ready; + + /* Needed to make unique MAC addresses, etc. */ + u32 master_index; +} memfd_shared_header_t; + +typedef struct +{ + memfd_shared_header_t *sh; + int fd; + u64 memfd_size; + u32 my_pid; + u32 vlib_hw_if_index; + uword requested_va; + int i_am_master; + u32 per_interface_next_index; + u32 *rx_queue; + u8 *name; +} memfd_private_t; + +always_inline void +memfd_lock (memfd_shared_header_t * h, u32 my_pid, u32 tag) +{ + if (h->owner_pid == my_pid) + { + h->recursion_count++; + return; + } + + while (__sync_lock_test_and_set (&h->lock, 1)) + ; + + h->owner_pid = my_pid; + h->recursion_count = 1; + h->tag = tag; +} + +always_inline void +memfd_lock_non_recursive (memfd_shared_header_t * h, u32 tag) +{ + while (__sync_lock_test_and_set (&h->lock, 1)) + ; + + h->tag = tag; +} + +always_inline void +memfd_unlock (memfd_shared_header_t * h) +{ + if (--h->recursion_count == 0) + { + h->owner_pid = 0; + h->tag = 0; + CLIB_MEMORY_BARRIER (); + h->lock = 0; + } +} + +always_inline void +memfd_unlock_non_recursive (memfd_shared_header_t * h) +{ + h->tag = 0; + CLIB_MEMORY_BARRIER (); + h->lock = 0; +} + +static inline void * +memfd_push_heap (memfd_shared_header_t * sh) +{ + u8 *oldheap; + oldheap = clib_mem_set_heap (sh->heap); + return ((void *) oldheap); +} + +static inline void +memfd_pop_heap (void *oldheap) +{ + clib_mem_set_heap (oldheap); +} + +#define foreach_memfd_api_error \ +_(NO_NAME, "No shared segment name", -100) \ +_(NO_SIZE, "Size not set (master)", -101) \ +_(CREATE_FAILURE, "Create failed", -102) \ +_(SET_SIZE, "Set size failed", -103) \ +_(MMAP, "mmap failed", -104) \ +_(SLAVE_TIMEOUT, "Slave map timeout", -105) + +typedef enum +{ +#define _(n,s,c) MEMFD_API_ERROR_##n = c, + foreach_memfd_api_error +#undef _ +} memfd_api_error_enum_t; + +#define MEMFD_API_ERROR_NO_NAME (-10) + +int memfd_master_init (memfd_private_t * memfd, u32 master_index); +int memfd_slave_init (memfd_private_t * memfd); +void memfd_delete (memfd_private_t * memfd); + +/* These do not belong here, but the original keeps running around... */ +/* $$$$ work w/ Damjan to fix properly */ + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif +#define MFD_ALLOW_SEALING 0x0002U +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ + +#endif /* __included_memfd_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm.c b/src/svm/svm.c index f97803cd7af..c54f9730094 100644 --- a/src/svm/svm.c +++ b/src/svm/svm.c @@ -434,6 +434,107 @@ shm_name_from_svm_map_region_args (svm_map_region_args_t * a) return (shm_name); } +void +svm_region_init_mapped_region (svm_map_region_args_t * a, svm_region_t * rp) +{ + pthread_mutexattr_t attr; + pthread_condattr_t cattr; + int nbits, words, bit; + int overhead_space; + void *oldheap; + uword data_base; + ASSERT (rp); + int rv; + + memset (rp, 0, sizeof (*rp)); + + if (pthread_mutexattr_init (&attr)) + clib_unix_warning ("mutexattr_init"); + + if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("mutexattr_setpshared"); + + if (pthread_mutex_init (&rp->mutex, &attr)) + clib_unix_warning ("mutex_init"); + + if (pthread_mutexattr_destroy (&attr)) + clib_unix_warning ("mutexattr_destroy"); + + if (pthread_condattr_init (&cattr)) + clib_unix_warning ("condattr_init"); + + if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("condattr_setpshared"); + + if (pthread_cond_init (&rp->condvar, &cattr)) + clib_unix_warning ("cond_init"); + + if (pthread_condattr_destroy (&cattr)) + clib_unix_warning ("condattr_destroy"); + + region_lock (rp, 1); + + rp->virtual_base = a->baseva; + rp->virtual_size = a->size; + + rp->region_heap = + mheap_alloc_with_flags (uword_to_pointer + (a->baseva + MMAP_PAGESIZE, void *), + (a->pvt_heap_size != + 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, + MHEAP_FLAG_DISABLE_VM); + oldheap = svm_push_pvt_heap (rp); + + rp->region_name = (char *) format (0, "%s%c", a->name, 0); + vec_add1 (rp->client_pids, getpid ()); + + nbits = rp->virtual_size / MMAP_PAGESIZE; + + ASSERT (nbits > 0); + rp->bitmap_size = nbits; + words = (nbits + BITS (uword) - 1) / BITS (uword); + vec_validate (rp->bitmap, words - 1); + + overhead_space = MMAP_PAGESIZE /* header */ + + ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); + + bit = 0; + data_base = (uword) rp->virtual_base; + + if (a->flags & SVM_FLAGS_NODATA) + rp->flags |= SVM_FLAGS_NEED_DATA_INIT; + + do + { + clib_bitmap_set_no_check (rp->bitmap, bit, 1); + bit++; + overhead_space -= MMAP_PAGESIZE; + data_base += MMAP_PAGESIZE; + } + while (overhead_space > 0); + + rp->data_base = (void *) data_base; + + /* + * Note: although the POSIX spec guarantees that only one + * process enters this block, we have to play games + * to hold off clients until e.g. the mutex is ready + */ + rp->version = SVM_VERSION; + + /* setup the data portion of the region */ + + rv = svm_data_region_create (a, rp); + if (rv) + { + clib_warning ("data_region_create: %d", rv); + } + + region_unlock (rp); + + svm_pop_heap (oldheap); +} + /* * svm_map_region */ @@ -442,15 +543,10 @@ svm_map_region (svm_map_region_args_t * a) { int svm_fd; svm_region_t *rp; - pthread_mutexattr_t attr; - pthread_condattr_t cattr; int deadman = 0; u8 junk = 0; void *oldheap; - int overhead_space; int rv; - uword data_base; - int nbits, words, bit; int pid_holding_region_lock; u8 *shm_name; int dead_region_recovery = 0; @@ -502,93 +598,8 @@ svm_map_region (svm_map_region_args_t * a) return (0); } close (svm_fd); - memset (rp, 0, sizeof (*rp)); - - if (pthread_mutexattr_init (&attr)) - clib_unix_warning ("mutexattr_init"); - - if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) - clib_unix_warning ("mutexattr_setpshared"); - - if (pthread_mutex_init (&rp->mutex, &attr)) - clib_unix_warning ("mutex_init"); - if (pthread_mutexattr_destroy (&attr)) - clib_unix_warning ("mutexattr_destroy"); - - if (pthread_condattr_init (&cattr)) - clib_unix_warning ("condattr_init"); - - if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) - clib_unix_warning ("condattr_setpshared"); - - if (pthread_cond_init (&rp->condvar, &cattr)) - clib_unix_warning ("cond_init"); - - if (pthread_condattr_destroy (&cattr)) - clib_unix_warning ("condattr_destroy"); - - region_lock (rp, 1); - - rp->virtual_base = a->baseva; - rp->virtual_size = a->size; - - rp->region_heap = - mheap_alloc_with_flags (uword_to_pointer - (a->baseva + MMAP_PAGESIZE, void *), - (a->pvt_heap_size != - 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, - MHEAP_FLAG_DISABLE_VM); - oldheap = svm_push_pvt_heap (rp); - - rp->region_name = (char *) format (0, "%s%c", a->name, 0); - vec_add1 (rp->client_pids, getpid ()); - - nbits = rp->virtual_size / MMAP_PAGESIZE; - - ASSERT (nbits > 0); - rp->bitmap_size = nbits; - words = (nbits + BITS (uword) - 1) / BITS (uword); - vec_validate (rp->bitmap, words - 1); - - overhead_space = MMAP_PAGESIZE /* header */ + - ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); - - bit = 0; - data_base = (uword) rp->virtual_base; - - if (a->flags & SVM_FLAGS_NODATA) - rp->flags |= SVM_FLAGS_NEED_DATA_INIT; - - do - { - clib_bitmap_set_no_check (rp->bitmap, bit, 1); - bit++; - overhead_space -= MMAP_PAGESIZE; - data_base += MMAP_PAGESIZE; - } - while (overhead_space > 0); - - rp->data_base = (void *) data_base; - - /* - * Note: although the POSIX spec guarantees that only one - * process enters this block, we have to play games - * to hold off clients until e.g. the mutex is ready - */ - rp->version = SVM_VERSION; - - /* setup the data portion of the region */ - - rv = svm_data_region_create (a, rp); - if (rv) - { - clib_warning ("data_region_create: %d", rv); - } - - region_unlock (rp); - - svm_pop_heap (oldheap); + svm_region_init_mapped_region (a, rp); return ((void *) rp); } diff --git a/src/svm/svm_common.h b/src/svm/svm_common.h index ea3ec87a212..a7160286a44 100644 --- a/src/svm/svm_common.h +++ b/src/svm/svm_common.h @@ -112,6 +112,8 @@ typedef struct void *svm_region_find_or_create (svm_map_region_args_t * a); void svm_region_init (void); +void svm_region_init_mapped_region (svm_map_region_args_t * a, + svm_region_t * rp); int svm_region_init_chroot (const char *root_path); void svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid); void svm_region_init_args (svm_map_region_args_t * a); diff --git a/src/svm/svmdb.c b/src/svm/svmdb.c index 043b0924d84..03aa1f17494 100644 --- a/src/svm/svmdb.c +++ b/src/svm/svmdb.c @@ -456,7 +456,7 @@ svmdb_local_serialize_strings (svmdb_client_t * client, char *filename) goto out; } - serialize_open_unix_file_descriptor (sm, fd); + serialize_open_clib_file_descriptor (sm, fd); region_lock (client->db_rp, 20); @@ -512,7 +512,7 @@ svmdb_local_unserialize_strings (svmdb_client_t * client, char *filename) goto out; } - unserialize_open_unix_file_descriptor (sm, fd); + unserialize_open_clib_file_descriptor (sm, fd); region_lock (client->db_rp, 21); oldheap = svm_push_data_heap (client->db_rp); |