diff options
Diffstat (limited to 'src/svm')
-rw-r--r-- | src/svm/dir.dox | 21 | ||||
-rw-r--r-- | src/svm/persist.c | 258 | ||||
-rw-r--r-- | src/svm/ssvm.c | 178 | ||||
-rw-r--r-- | src/svm/ssvm.h | 155 | ||||
-rw-r--r-- | src/svm/svm.c | 1237 | ||||
-rw-r--r-- | src/svm/svm.h | 207 | ||||
-rw-r--r-- | src/svm/svm_test.c | 79 | ||||
-rw-r--r-- | src/svm/svmdb.c | 671 | ||||
-rw-r--r-- | src/svm/svmdb.h | 135 | ||||
-rw-r--r-- | src/svm/svmdbtool.c | 537 | ||||
-rw-r--r-- | src/svm/svmtool.c | 528 |
11 files changed, 4006 insertions, 0 deletions
diff --git a/src/svm/dir.dox b/src/svm/dir.dox new file mode 100644 index 00000000000..83246979ca8 --- /dev/null +++ b/src/svm/dir.dox @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2016 Comcast Cable Communications Management, LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Doxygen directory documentation */ +/** +@dir +@brief Shared virtual memory allocation library. +*/ diff --git a/src/svm/persist.c b/src/svm/persist.c new file mode 100644 index 00000000000..023c596b9cf --- /dev/null +++ b/src/svm/persist.c @@ -0,0 +1,258 @@ +/* + *------------------------------------------------------------------ + * persist.c - persistent data structure storage test / demo code + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> +#include <vppinfra/serialize.h> +#include <svmdb.h> + +typedef struct +{ + svmdb_client_t *c; +} persist_main_t; + +persist_main_t persist_main; + +typedef struct +{ + u8 *string1; + u8 *string2; +} demo_struct2_t; + +typedef struct +{ + demo_struct2_t *demo2; + u8 *name; +} demo_struct1_t; + +/* + * Data structures in persistent shared memory, all the time + */ +clib_error_t * +persist_malloc (persist_main_t * pm) +{ + demo_struct2_t *demo2; + demo_struct1_t *demo1; + time_t starttime = time (0); + char *datestring = ctime (&starttime); + void *oldheap; + + /* Get back the root pointer */ + demo1 = svmdb_local_get_variable_reference + (pm->c, SVMDB_NAMESPACE_VEC, "demo1_location"); + + /* It doesnt exist create our data structures */ + if (demo1 == 0) + { + /* If you want MP / thread safety, lock the region... */ + pthread_mutex_lock (&pm->c->db_rp->mutex); + + /* Switch to the shared memory region heap */ + oldheap = svm_push_data_heap (pm->c->db_rp); + + /* Allocate the top-level structure as a single element vector */ + vec_validate (demo1, 0); + + /* Allocate the next-level structure as a plain old memory obj */ + demo2 = clib_mem_alloc (sizeof (*demo2)); + + demo1->demo2 = demo2; + demo1->name = format (0, "My name is Ishmael%c", 0); + demo2->string1 = format (0, "Here is string1%c", 0); + demo2->string2 = format (0, "Born at %s%c", datestring, 0); + + /* Back to the process-private heap */ + svm_pop_heap (oldheap); + pthread_mutex_unlock (&pm->c->db_rp->mutex); + + /* + * Set the root pointer. Note: this guy switches heaps, locks, etc. + * We allocated demo1 as a vector to make this "just work..." + */ + svmdb_local_set_vec_variable (pm->c, "demo1_location", + demo1, sizeof (demo1)); + + } + else + { + /* retrieve and print data from shared memory */ + demo2 = demo1->demo2; + fformat (stdout, "name: %s\n", demo1->name); + fformat (stdout, "demo2 location: %llx\n", demo2); + fformat (stdout, "string1: %s\n", demo2->string1); + fformat (stdout, "string2: %s\n", demo2->string2); + } + return 0; +} + +void +unserialize_demo1 (serialize_main_t * sm, va_list * args) +{ + demo_struct1_t **result = va_arg (*args, demo_struct1_t **); + demo_struct1_t *demo1; + demo_struct2_t *demo2; + + /* Allocate data structures in process private memory */ + demo1 = clib_mem_alloc (sizeof (*demo1)); + demo2 = clib_mem_alloc (sizeof (*demo2)); + demo1->demo2 = demo2; + + /* retrieve data from shared memory checkpoint */ + unserialize_cstring (sm, (char **) &demo1->name); + unserialize_cstring (sm, (char **) &demo2->string1); + unserialize_cstring (sm, (char **) &demo2->string2); + *result = demo1; +} + +void +serialize_demo1 (serialize_main_t * sm, va_list * args) +{ + demo_struct1_t *demo1 = va_arg (*args, demo_struct1_t *); + demo_struct2_t *demo2 = demo1->demo2; + + serialize_cstring (sm, (char *) demo1->name); + serialize_cstring (sm, (char *) demo2->string1); + serialize_cstring (sm, (char *) demo2->string2); +} + +/* Serialize / unserialize variant */ +clib_error_t * +persist_serialize (persist_main_t * pm) +{ + u8 *checkpoint; + serialize_main_t sm; + + demo_struct2_t *demo2; + demo_struct1_t *demo1; + time_t starttime = time (0); + char *datestring = ctime (&starttime); + + /* Get back the root pointer */ + checkpoint = svmdb_local_get_vec_variable (pm->c, "demo1_checkpoint", + sizeof (u8)); + + /* It doesnt exist create our data structures */ + if (checkpoint == 0) + { + /* Allocate data structures in process-private memory */ + demo1 = clib_mem_alloc (sizeof (*demo2)); + vec_validate (demo1, 0); + demo2 = clib_mem_alloc (sizeof (*demo2)); + + demo1->demo2 = demo2; + demo1->name = format (0, "My name is Ishmael%c", 0); + demo2->string1 = format (0, "Here is string1%c", 0); + demo2->string2 = format (0, "Born at %s%c", datestring, 0); + + /* Create checkpoint */ + serialize_open_vector (&sm, checkpoint); + serialize (&sm, serialize_demo1, demo1); + checkpoint = serialize_close_vector (&sm); + + /* Copy checkpoint into shared memory */ + svmdb_local_set_vec_variable (pm->c, "demo1_checkpoint", + checkpoint, sizeof (u8)); + /* Toss the process-private-memory original.. */ + vec_free (checkpoint); + } + else + { + /* Open the checkpoint */ + unserialize_open_data (&sm, checkpoint, vec_len (checkpoint)); + unserialize (&sm, unserialize_demo1, &demo1); + + /* Toss the process-private-memory checkpoint copy */ + vec_free (checkpoint); + + /* Off we go... */ + demo2 = demo1->demo2; + fformat (stdout, "name: %s\n", demo1->name); + fformat (stdout, "demo2 location: %llx\n", demo2); + fformat (stdout, "string1: %s\n", demo2->string1); + fformat (stdout, "string2: %s\n", demo2->string2); + } + return 0; +} + + +int +main (int argc, char **argv) +{ + unformat_input_t _input, *input = &_input; + persist_main_t *pm = &persist_main; + clib_error_t *error = 0; + + /* Make a 4mb database arena, chroot so it's truly private */ + pm->c = svmdb_map_chroot_size ("/ptest", 4 << 20); + + ASSERT (pm->c); + + unformat_init_command_line (input, argv); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "malloc")) + error = persist_malloc (pm); + else if (unformat (input, "serialize")) + error = persist_serialize (pm); + else + { + error = clib_error_return (0, "Unknown flavor '%U'", + format_unformat_error, input); + break; + } + } + + svmdb_unmap (pm->c); + + if (error) + { + clib_error_report (error); + exit (1); + } + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/ssvm.c b/src/svm/ssvm.c new file mode 100644 index 00000000000..6f409eb68b6 --- /dev/null +++ b/src/svm/ssvm.c @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ssvm.h" + +int +ssvm_master_init (ssvm_private_t * ssvm, u32 master_index) +{ + int ssvm_fd; + u8 *ssvm_filename; + u8 junk = 0; + int flags; + ssvm_shared_header_t *sh; + u64 ticks = clib_cpu_time_now (); + u64 randomize_baseva; + void *oldheap; + + if (ssvm->ssvm_size == 0) + return SSVM_API_ERROR_NO_SIZE; + + ssvm_filename = format (0, "/dev/shm/%s%c", ssvm->name, 0); + + unlink ((char *) ssvm_filename); + + vec_free (ssvm_filename); + + ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR | O_CREAT | O_EXCL, 0777); + + if (ssvm_fd < 0) + { + clib_unix_warning ("create segment '%s'", ssvm->name); + return SSVM_API_ERROR_CREATE_FAILURE; + } + + if (lseek (ssvm_fd, ssvm->ssvm_size, SEEK_SET) < 0) + { + clib_unix_warning ("lseek"); + close (ssvm_fd); + return SSVM_API_ERROR_SET_SIZE; + } + + if (write (ssvm_fd, &junk, 1) != 1) + { + clib_unix_warning ("set ssvm size"); + close (ssvm_fd); + return SSVM_API_ERROR_SET_SIZE; + } + + flags = MAP_SHARED; + if (ssvm->requested_va) + flags |= MAP_FIXED; + + randomize_baseva = (ticks & 15) * MMAP_PAGESIZE; + + if (ssvm->requested_va) + ssvm->requested_va += randomize_baseva; + + sh = ssvm->sh = + (ssvm_shared_header_t *) mmap ((void *) ssvm->requested_va, + ssvm->ssvm_size, PROT_READ | PROT_WRITE, + flags, ssvm_fd, 0); + + if (ssvm->sh == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (ssvm_fd); + return SSVM_API_ERROR_MMAP; + } + + close (ssvm_fd); + + ssvm->my_pid = getpid (); + sh->master_pid = ssvm->my_pid; + sh->ssvm_size = ssvm->ssvm_size; + sh->heap = mheap_alloc_with_flags + (((u8 *) sh) + MMAP_PAGESIZE, ssvm->ssvm_size - MMAP_PAGESIZE, + MHEAP_FLAG_DISABLE_VM | MHEAP_FLAG_THREAD_SAFE); + + sh->ssvm_va = pointer_to_uword (sh); + sh->master_index = master_index; + + oldheap = ssvm_push_heap (sh); + sh->name = format (0, "%s%c", ssvm->name, 0); + ssvm_pop_heap (oldheap); + + ssvm->i_am_master = 1; + + /* The application has to set set sh->ready... */ + return 0; +} + +int +ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds) +{ + struct stat stat; + int ssvm_fd = -1; + ssvm_shared_header_t *sh; + + ssvm->i_am_master = 0; + + while (timeout_in_seconds-- > 0) + { + if (ssvm_fd < 0) + ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR, 0777); + if (ssvm_fd < 0) + { + sleep (1); + continue; + } + if (fstat (ssvm_fd, &stat) < 0) + { + sleep (1); + continue; + } + + if (stat.st_size > 0) + goto map_it; + } + clib_warning ("slave timeout"); + return SSVM_API_ERROR_SLAVE_TIMEOUT; + +map_it: + sh = (void *) mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + ssvm_fd, 0); + if (sh == MAP_FAILED) + { + clib_unix_warning ("slave research mmap"); + close (ssvm_fd); + return SSVM_API_ERROR_MMAP; + } + + while (timeout_in_seconds-- > 0) + { + if (sh->ready) + goto re_map_it; + } + close (ssvm_fd); + munmap (sh, MMAP_PAGESIZE); + clib_warning ("slave timeout 2"); + return SSVM_API_ERROR_SLAVE_TIMEOUT; + +re_map_it: + ssvm->requested_va = (u64) sh->ssvm_va; + ssvm->ssvm_size = sh->ssvm_size; + munmap (sh, MMAP_PAGESIZE); + + sh = ssvm->sh = (void *) mmap ((void *) ssvm->requested_va, ssvm->ssvm_size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, ssvm_fd, 0); + + if (sh == MAP_FAILED) + { + clib_unix_warning ("slave final mmap"); + close (ssvm_fd); + return SSVM_API_ERROR_MMAP; + } + sh->slave_pid = getpid (); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/ssvm.h b/src/svm/ssvm.h new file mode 100644 index 00000000000..9e61b9a0827 --- /dev/null +++ b/src/svm/ssvm.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_h__ +#define __included_ssvm_h__ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> + +#define MMAP_PAGESIZE (4<<10) +#define SSVM_N_OPAQUE 7 + +typedef struct +{ + /* Spin-lock */ + volatile u32 lock; + volatile u32 owner_pid; + int recursion_count; + u32 tag; /* for debugging */ + + /* The allocation arena */ + void *heap; + + /* Segment must be mapped at this address, or no supper */ + u64 ssvm_va; + /* The actual mmap size */ + u64 ssvm_size; + u32 master_pid; + u32 slave_pid; + u8 *name; + void *opaque[SSVM_N_OPAQUE]; + + /* Set when the master application thinks it's time to make the donuts */ + volatile u32 ready; + + /* Needed to make unique MAC addresses, etc. */ + u32 master_index; +} ssvm_shared_header_t; + +typedef struct +{ + ssvm_shared_header_t *sh; + u64 ssvm_size; + u32 my_pid; + u32 vlib_hw_if_index; + u8 *name; + uword requested_va; + int i_am_master; + u32 per_interface_next_index; + u32 *rx_queue; +} ssvm_private_t; + +always_inline void +ssvm_lock (ssvm_shared_header_t * h, u32 my_pid, u32 tag) +{ + if (h->owner_pid == my_pid) + { + h->recursion_count++; + return; + } + + while (__sync_lock_test_and_set (&h->lock, 1)) + ; + + h->owner_pid = my_pid; + h->recursion_count = 1; + h->tag = tag; +} + +always_inline void +ssvm_unlock (ssvm_shared_header_t * h) +{ + if (--h->recursion_count == 0) + { + h->owner_pid = 0; + h->tag = 0; + CLIB_MEMORY_BARRIER (); + h->lock = 0; + } +} + +static inline void * +ssvm_push_heap (ssvm_shared_header_t * sh) +{ + u8 *oldheap; + oldheap = clib_mem_set_heap (sh->heap); + return ((void *) oldheap); +} + +static inline void +ssvm_pop_heap (void *oldheap) +{ + clib_mem_set_heap (oldheap); +} + +#define foreach_ssvm_api_error \ +_(NO_NAME, "No shared segment name", -10) \ +_(NO_SIZE, "Size not set (master)", -11) \ +_(CREATE_FAILURE, "Create failed", -12) \ +_(SET_SIZE, "Set size failed", -13) \ +_(MMAP, "mmap failed", -14) \ +_(SLAVE_TIMEOUT, "Slave map timeout", -15) + +typedef enum +{ +#define _(n,s,c) SSVM_API_ERROR_##n = c, + foreach_ssvm_api_error +#undef _ +} ssvm_api_error_enum_t; + +#define SSVM_API_ERROR_NO_NAME (-10) + +int ssvm_master_init (ssvm_private_t * ssvm, u32 master_index); +int ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds); + +#endif /* __included_ssvm_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm.c b/src/svm/svm.c new file mode 100644 index 00000000000..e4ca98e1ed2 --- /dev/null +++ b/src/svm/svm.c @@ -0,0 +1,1237 @@ +/* + *------------------------------------------------------------------ + * svm.c - shared VM allocation, mmap(...MAP_FIXED...) + * library + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> + +#include "svm.h" + +static svm_region_t *root_rp; +static int root_rp_refcount; + +#define MAXLOCK 2 +static pthread_mutex_t *mutexes_held[MAXLOCK]; +static int nheld; + +svm_region_t * +svm_get_root_rp (void) +{ + return root_rp; +} + +#define MUTEX_DEBUG + +static void +region_lock (svm_region_t * rp, int tag) +{ + pthread_mutex_lock (&rp->mutex); +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = getpid (); + rp->mutex_owner_tag = tag; +#endif + ASSERT (nheld < MAXLOCK); + /* + * Keep score of held mutexes so we can try to exit + * cleanly if the world comes to an end at the worst possible + * moment + */ + mutexes_held[nheld++] = &rp->mutex; +} + +static void +region_unlock (svm_region_t * rp) +{ + int i, j; +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = 0; + rp->mutex_owner_tag = 0; +#endif + + for (i = nheld - 1; i >= 0; i--) + { + if (mutexes_held[i] == &rp->mutex) + { + for (j = i; j < MAXLOCK - 1; j++) + mutexes_held[j] = mutexes_held[j + 1]; + nheld--; + goto found; + } + } + ASSERT (0); + +found: + CLIB_MEMORY_BARRIER (); + pthread_mutex_unlock (&rp->mutex); +} + + +static u8 * +format_svm_flags (u8 * s, va_list * args) +{ + uword f = va_arg (*args, uword); + + if (f & SVM_FLAGS_MHEAP) + s = format (s, "MHEAP "); + if (f & SVM_FLAGS_FILE) + s = format (s, "FILE "); + if (f & SVM_FLAGS_NODATA) + s = format (s, "NODATA "); + if (f & SVM_FLAGS_NEED_DATA_INIT) + s = format (s, "INIT "); + + return (s); +} + +static u8 * +format_svm_size (u8 * s, va_list * args) +{ + uword size = va_arg (*args, uword); + + if (size >= (1 << 20)) + { + s = format (s, "(%d mb)", size >> 20); + } + else if (size >= (1 << 10)) + { + s = format (s, "(%d kb)", size >> 10); + } + else + { + s = format (s, "(%d bytes)", size); + } + return (s); +} + +u8 * +format_svm_region (u8 * s, va_list * args) +{ + svm_region_t *rp = va_arg (*args, svm_region_t *); + int verbose = va_arg (*args, int); + int i; + uword lo, hi; + + s = format (s, "%s: base va 0x%x size 0x%x %U\n", + rp->region_name, rp->virtual_base, + rp->virtual_size, format_svm_size, rp->virtual_size); + s = format (s, " user_ctx 0x%x, bitmap_size %d\n", + rp->user_ctx, rp->bitmap_size); + + if (verbose) + { + s = format (s, " flags: 0x%x %U\n", rp->flags, + format_svm_flags, rp->flags); + s = format (s, + " region_heap 0x%x data_base 0x%x data_heap 0x%x\n", + rp->region_heap, rp->data_base, rp->data_heap); + } + + s = format (s, " %d clients, pids: ", vec_len (rp->client_pids)); + + for (i = 0; i < vec_len (rp->client_pids); i++) + s = format (s, "%d ", rp->client_pids[i]); + + s = format (s, "\n"); + + if (verbose) + { + lo = hi = ~0; + + s = format (s, " VM in use: "); + + for (i = 0; i < rp->bitmap_size; i++) + { + if (clib_bitmap_get_no_check (rp->bitmap, i) != 0) + { + if (lo == ~0) + { + hi = lo = rp->virtual_base + i * MMAP_PAGESIZE; + } + else + { + hi = rp->virtual_base + i * MMAP_PAGESIZE; + } + } + else + { + if (lo != ~0) + { + hi = rp->virtual_base + i * MMAP_PAGESIZE - 1; + s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi, + (hi - lo) >> 10); + lo = hi = ~0; + } + } + } + s = format (s, " rgn heap stats: %U", format_mheap, + rp->region_heap, 0); + if ((rp->flags & SVM_FLAGS_MHEAP) && rp->data_heap) + { + s = format (s, "\n data heap stats: %U", format_mheap, + rp->data_heap, 1); + } + s = format (s, "\n"); + } + + return (s); +} + +/* + * rnd_pagesize + * Round to a pagesize multiple, presumably 4k works + */ +static u64 +rnd_pagesize (u64 size) +{ + u64 rv; + + rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1); + return (rv); +} + +/* + * svm_data_region_setup + */ +static int +svm_data_region_create (svm_map_region_args_t * a, svm_region_t * rp) +{ + int fd; + u8 junk = 0; + uword map_size; + + map_size = rp->virtual_size - (MMAP_PAGESIZE + + (a->pvt_heap_size ? a->pvt_heap_size : + SVM_PVT_MHEAP_SIZE)); + + if (a->flags & SVM_FLAGS_FILE) + { + struct stat statb; + + fd = open (a->backing_file, O_RDWR | O_CREAT, 0777); + + if (fd < 0) + { + clib_unix_warning ("open"); + return -1; + } + + if (fstat (fd, &statb) < 0) + { + clib_unix_warning ("fstat"); + close (fd); + return -2; + } + + if (statb.st_mode & S_IFREG) + { + if (statb.st_size == 0) + { + if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1) + { + clib_unix_warning ("seek region size"); + close (fd); + return -3; + } + if (write (fd, &junk, 1) != 1) + { + clib_unix_warning ("set region size"); + close (fd); + return -3; + } + } + else + { + map_size = rnd_pagesize (statb.st_size); + } + } + else + { + map_size = a->backing_mmap_size; + } + + ASSERT (map_size <= rp->virtual_size - + (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE)); + + if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (fd); + return -3; + } + close (fd); + rp->backing_file = (char *) format (0, "%s\0", a->backing_file); + rp->flags |= SVM_FLAGS_FILE; + } + + if (a->flags & SVM_FLAGS_MHEAP) + { + rp->data_heap = + mheap_alloc_with_flags ((void *) (rp->data_base), map_size, + MHEAP_FLAG_DISABLE_VM); + rp->flags |= SVM_FLAGS_MHEAP; + } + return 0; +} + +static int +svm_data_region_map (svm_map_region_args_t * a, svm_region_t * rp) +{ + int fd; + u8 junk = 0; + uword map_size; + struct stat statb; + + map_size = rp->virtual_size - + (MMAP_PAGESIZE + + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE)); + + if (a->flags & SVM_FLAGS_FILE) + { + + fd = open (a->backing_file, O_RDWR, 0777); + + if (fd < 0) + { + clib_unix_warning ("open"); + return -1; + } + + if (fstat (fd, &statb) < 0) + { + clib_unix_warning ("fstat"); + close (fd); + return -2; + } + + if (statb.st_mode & S_IFREG) + { + if (statb.st_size == 0) + { + if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1) + { + clib_unix_warning ("seek region size"); + close (fd); + return -3; + } + if (write (fd, &junk, 1) != 1) + { + clib_unix_warning ("set region size"); + close (fd); + return -3; + } + } + else + { + map_size = rnd_pagesize (statb.st_size); + } + } + else + { + map_size = a->backing_mmap_size; + } + + ASSERT (map_size <= rp->virtual_size + - (MMAP_PAGESIZE + + + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE))); + + if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (fd); + return -3; + } + close (fd); + } + return 0; +} + +u8 * +shm_name_from_svm_map_region_args (svm_map_region_args_t * a) +{ + u8 *path; + u8 *shm_name; + u8 *split_point; + u8 *mkdir_arg = 0; + int root_path_offset = 0; + int name_offset = 0; + + if (a->root_path) + { + /* Tolerate present or absent slashes */ + if (a->root_path[0] == '/') + root_path_offset++; + + /* create the root_path under /dev/shm + iterate through path creating directories */ + + path = format (0, "/dev/shm/%s%c", &a->root_path[root_path_offset], 0); + split_point = path + 1; + vec_add1 (mkdir_arg, '-'); + + while (*split_point) + { + while (*split_point && *split_point != '/') + { + vec_add1 (mkdir_arg, *split_point); + split_point++; + } + vec_add1 (mkdir_arg, 0); + + /* ready to descend another level */ + mkdir_arg[vec_len (mkdir_arg) - 1] = '-'; + split_point++; + } + vec_free (mkdir_arg); + vec_free (path); + + if (a->name[0] == '/') + name_offset = 1; + + shm_name = format (0, "/%s-%s%c", a->root_path, + &a->name[name_offset], 0); + } + else + shm_name = format (0, "%s%c", a->name, 0); + return (shm_name); +} + +/* + * svm_map_region + */ +void * +svm_map_region (svm_map_region_args_t * a) +{ + int svm_fd; + svm_region_t *rp; + pthread_mutexattr_t attr; + pthread_condattr_t cattr; + int deadman = 0; + u8 junk = 0; + void *oldheap; + int overhead_space; + int rv; + uword data_base; + int nbits, words, bit; + int pid_holding_region_lock; + u8 *shm_name; + int dead_region_recovery = 0; + int time_left; + struct stat stat; + struct timespec ts, tsrem; + + if (CLIB_DEBUG > 1) + clib_warning ("[%d] map region %s", getpid (), a->name); + + ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size); + ASSERT (a->name); + + shm_name = shm_name_from_svm_map_region_args (a); + + svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777); + + if (svm_fd >= 0) + { + if (fchmod (svm_fd, 0770) < 0) + clib_unix_warning ("segment chmod"); + /* This turns out to fail harmlessly if the client starts first */ + if (fchown (svm_fd, a->uid, a->gid) < 0) + clib_unix_warning ("segment chown [ok if client starts first]"); + + vec_free (shm_name); + + if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1) + { + clib_warning ("seek region size"); + close (svm_fd); + return (0); + } + if (write (svm_fd, &junk, 1) != 1) + { + clib_warning ("set region size"); + close (svm_fd); + return (0); + } + + rp = mmap ((void *) a->baseva, a->size, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0); + + if (rp == (svm_region_t *) MAP_FAILED) + { + clib_unix_warning ("mmap create"); + close (svm_fd); + return (0); + } + close (svm_fd); + memset (rp, 0, sizeof (*rp)); + + if (pthread_mutexattr_init (&attr)) + clib_unix_warning ("mutexattr_init"); + + if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("mutexattr_setpshared"); + + if (pthread_mutex_init (&rp->mutex, &attr)) + clib_unix_warning ("mutex_init"); + + if (pthread_mutexattr_destroy (&attr)) + clib_unix_warning ("mutexattr_destroy"); + + if (pthread_condattr_init (&cattr)) + clib_unix_warning ("condattr_init"); + + if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("condattr_setpshared"); + + if (pthread_cond_init (&rp->condvar, &cattr)) + clib_unix_warning ("cond_init"); + + if (pthread_condattr_destroy (&cattr)) + clib_unix_warning ("condattr_destroy"); + + region_lock (rp, 1); + + rp->virtual_base = a->baseva; + rp->virtual_size = a->size; + + rp->region_heap = + mheap_alloc_with_flags ((void *) (a->baseva + MMAP_PAGESIZE), + (a->pvt_heap_size != 0) ? + a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, + MHEAP_FLAG_DISABLE_VM); + oldheap = svm_push_pvt_heap (rp); + + rp->region_name = (char *) format (0, "%s%c", a->name, 0); + vec_add1 (rp->client_pids, getpid ()); + + nbits = rp->virtual_size / MMAP_PAGESIZE; + + ASSERT (nbits > 0); + rp->bitmap_size = nbits; + words = (nbits + BITS (uword) - 1) / BITS (uword); + vec_validate (rp->bitmap, words - 1); + + overhead_space = MMAP_PAGESIZE /* header */ + + ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); + + bit = 0; + data_base = (uword) rp->virtual_base; + + if (a->flags & SVM_FLAGS_NODATA) + rp->flags |= SVM_FLAGS_NEED_DATA_INIT; + + do + { + clib_bitmap_set_no_check (rp->bitmap, bit, 1); + bit++; + overhead_space -= MMAP_PAGESIZE; + data_base += MMAP_PAGESIZE; + } + while (overhead_space > 0); + + rp->data_base = (void *) data_base; + + /* + * Note: although the POSIX spec guarantees that only one + * process enters this block, we have to play games + * to hold off clients until e.g. the mutex is ready + */ + rp->version = SVM_VERSION; + + /* setup the data portion of the region */ + + rv = svm_data_region_create (a, rp); + if (rv) + { + clib_warning ("data_region_create: %d", rv); + } + + region_unlock (rp); + + svm_pop_heap (oldheap); + + return ((void *) rp); + } + else + { + svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777); + + vec_free (shm_name); + + if (svm_fd < 0) + { + perror ("svm_region_map(mmap open)"); + return (0); + } + + time_left = 20; + while (1) + { + if (0 != fstat (svm_fd, &stat)) + { + clib_warning ("fstat failed: %d", errno); + close (svm_fd); + return (0); + } + if (stat.st_size > 0) + { + break; + } + if (0 == time_left) + { + clib_warning ("waiting for resize of shm file timed out"); + close (svm_fd); + return (0); + } + ts.tv_sec = 0; + ts.tv_nsec = 100000000; + while (nanosleep (&ts, &tsrem) < 0) + ts = tsrem; + time_left--; + } + + rp = mmap (0, MMAP_PAGESIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0); + + if (rp == (svm_region_t *) MAP_FAILED) + { + close (svm_fd); + clib_warning ("mmap"); + return (0); + } + /* + * We lost the footrace to create this region; make sure + * the winner has crossed the finish line. + */ + while (rp->version == 0 && deadman++ < 5) + { + sleep (1); + } + + /* + * <bleep>-ed? + */ + if (rp->version == 0) + { + clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION); + close (svm_fd); + munmap (rp, a->size); + return (0); + } + /* Remap now that the region has been placed */ + a->baseva = rp->virtual_base; + a->size = rp->virtual_size; + munmap (rp, MMAP_PAGESIZE); + + rp = (void *) mmap ((void *) a->baseva, a->size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, svm_fd, 0); + if ((uword) rp == (uword) MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (svm_fd); + return (0); + } + + if ((uword) rp != rp->virtual_base) + { + clib_warning ("mmap botch"); + } + + /* + * Try to fix the region mutex if it is held by + * a dead process + */ + pid_holding_region_lock = rp->mutex_owner_pid; + if (pid_holding_region_lock && kill (pid_holding_region_lock, 0) < 0) + { + clib_warning + ("region %s mutex held by dead pid %d, tag %d, force unlock", + rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag); + /* owner pid is nonexistent */ + rp->mutex.__data.__owner = 0; + rp->mutex.__data.__lock = 0; + dead_region_recovery = 1; + } + + if (dead_region_recovery) + clib_warning ("recovery: attempt to re-lock region"); + + region_lock (rp, 2); + oldheap = svm_push_pvt_heap (rp); + vec_add1 (rp->client_pids, getpid ()); + + if (dead_region_recovery) + clib_warning ("recovery: attempt svm_data_region_map"); + + rv = svm_data_region_map (a, rp); + if (rv) + { + clib_warning ("data_region_map: %d", rv); + } + + if (dead_region_recovery) + clib_warning ("unlock and continue"); + + region_unlock (rp); + + svm_pop_heap (oldheap); + + return ((void *) rp); + + } + return 0; /* NOTREACHED */ +} + +static void +svm_mutex_cleanup (void) +{ + int i; + for (i = 0; i < nheld; i++) + { + pthread_mutex_unlock (mutexes_held[i]); + } +} + +static void +svm_region_init_internal (svm_map_region_args_t * a) +{ + svm_region_t *rp; + u64 ticks = clib_cpu_time_now (); + uword randomize_baseva; + + /* guard against klutz calls */ + if (root_rp) + return; + + root_rp_refcount++; + + atexit (svm_mutex_cleanup); + + /* Randomize the shared-VM base at init time */ + if (MMAP_PAGESIZE <= (4 << 10)) + randomize_baseva = (ticks & 15) * MMAP_PAGESIZE; + else + randomize_baseva = (ticks & 3) * MMAP_PAGESIZE; + + a->baseva += randomize_baseva; + + rp = svm_map_region (a); + ASSERT (rp); + + region_lock (rp, 3); + + /* Set up the main region data structures */ + if (rp->flags & SVM_FLAGS_NEED_DATA_INIT) + { + svm_main_region_t *mp = 0; + void *oldheap; + + rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT); + + oldheap = svm_push_pvt_heap (rp); + vec_validate (mp, 0); + mp->name_hash = hash_create_string (0, sizeof (uword)); + mp->root_path = a->root_path ? format (0, "%s%c", a->root_path, 0) : 0; + rp->data_base = mp; + svm_pop_heap (oldheap); + } + region_unlock (rp); + root_rp = rp; +} + +void +svm_region_init (void) +{ + svm_map_region_args_t _a, *a = &_a; + + memset (a, 0, sizeof (*a)); + a->root_path = 0; + a->name = SVM_GLOBAL_REGION_NAME; + a->baseva = SVM_GLOBAL_REGION_BASEVA; + a->size = SVM_GLOBAL_REGION_SIZE; + a->flags = SVM_FLAGS_NODATA; + a->uid = 0; + a->gid = 0; + + svm_region_init_internal (a); +} + +void +svm_region_init_chroot (char *root_path) +{ + svm_map_region_args_t _a, *a = &_a; + + memset (a, 0, sizeof (*a)); + a->root_path = root_path; + a->name = SVM_GLOBAL_REGION_NAME; + a->baseva = SVM_GLOBAL_REGION_BASEVA; + a->size = SVM_GLOBAL_REGION_SIZE; + a->flags = SVM_FLAGS_NODATA; + a->uid = 0; + a->gid = 0; + + svm_region_init_internal (a); +} + +void +svm_region_init_chroot_uid_gid (char *root_path, int uid, int gid) +{ + svm_map_region_args_t _a, *a = &_a; + + memset (a, 0, sizeof (*a)); + a->root_path = root_path; + a->name = SVM_GLOBAL_REGION_NAME; + a->baseva = SVM_GLOBAL_REGION_BASEVA; + a->size = SVM_GLOBAL_REGION_SIZE; + a->flags = SVM_FLAGS_NODATA; + a->uid = uid; + a->gid = gid; + + svm_region_init_internal (a); +} + +void +svm_region_init_args (svm_map_region_args_t * a) +{ + svm_region_init_internal (a); +} + +void * +svm_region_find_or_create (svm_map_region_args_t * a) +{ + svm_main_region_t *mp; + svm_region_t *rp; + uword need_nbits; + int index, i; + void *oldheap; + uword *p; + u8 *name; + svm_subregion_t *subp; + + ASSERT (root_rp); + + a->size += MMAP_PAGESIZE + + ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); + a->size = rnd_pagesize (a->size); + + region_lock (root_rp, 4); + oldheap = svm_push_pvt_heap (root_rp); + mp = root_rp->data_base; + + ASSERT (mp); + + /* Map the named region from the correct chroot environment */ + a->root_path = (char *) mp->root_path; + + /* + * See if this region is already known. If it is, we're + * almost done... + */ + p = hash_get_mem (mp->name_hash, a->name); + + if (p) + { + rp = svm_map_region (a); + region_unlock (root_rp); + svm_pop_heap (oldheap); + return rp; + } + + /* Create the region. */ + ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size); + + need_nbits = a->size / MMAP_PAGESIZE; + + index = 1; /* $$$ fixme, figure out how many bit to really skip */ + + /* + * Scan the virtual space allocation bitmap, looking for a large + * enough chunk + */ + do + { + if (clib_bitmap_get_no_check (root_rp->bitmap, index) == 0) + { + for (i = 0; i < (need_nbits - 1); i++) + { + if (clib_bitmap_get_no_check (root_rp->bitmap, index + i) == 1) + { + index = index + i; + goto next; + } + } + break; + } + index++; + next:; + } + while (index < root_rp->bitmap_size); + + /* Completely out of VM? */ + if (index >= root_rp->bitmap_size) + { + clib_warning ("region %s: not enough VM to allocate 0x%llx (%lld)", + root_rp->region_name, a->size, a->size); + svm_pop_heap (oldheap); + region_unlock (root_rp); + return 0; + } + + /* + * Mark virtual space allocated + */ +#if CLIB_DEBUG > 1 + clib_warning ("set %d bits at index %d", need_nbits, index); +#endif + + for (i = 0; i < need_nbits; i++) + { + clib_bitmap_set_no_check (root_rp->bitmap, index + i, 1); + } + + /* Place this region where it goes... */ + a->baseva = root_rp->virtual_base + index * MMAP_PAGESIZE; + + rp = svm_map_region (a); + + pool_get (mp->subregions, subp); + name = format (0, "%s%c", a->name, 0); + subp->subregion_name = name; + + hash_set_mem (mp->name_hash, name, subp - mp->subregions); + + svm_pop_heap (oldheap); + + region_unlock (root_rp); + + return (rp); +} + +/* + * svm_region_unmap + * + * Let go of the indicated region. If the calling process + * is the last customer, throw it away completely. + * The root region mutex guarantees atomicity with respect to + * a new region client showing up at the wrong moment. + */ +void +svm_region_unmap (void *rp_arg) +{ + int i, mypid = getpid (); + int nclients_left; + void *oldheap; + uword virtual_base, virtual_size; + svm_region_t *rp = rp_arg; + char *name; + + /* + * If we take a signal while holding one or more shared-memory + * mutexes, we may end up back here from an otherwise + * benign exit handler. Bail out to avoid a recursive + * mutex screw-up. + */ + if (nheld) + return; + + ASSERT (rp); + ASSERT (root_rp); + + if (CLIB_DEBUG > 1) + clib_warning ("[%d] unmap region %s", getpid (), rp->region_name); + + region_lock (root_rp, 5); + region_lock (rp, 6); + + oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */ + + /* Remove the caller from the list of mappers */ + for (i = 0; i < vec_len (rp->client_pids); i++) + { + if (rp->client_pids[i] == mypid) + { + vec_delete (rp->client_pids, 1, i); + goto found; + } + } + clib_warning ("pid %d AWOL", mypid); + +found: + + svm_pop_heap (oldheap); + + nclients_left = vec_len (rp->client_pids); + virtual_base = rp->virtual_base; + virtual_size = rp->virtual_size; + + if (nclients_left == 0) + { + int index, nbits, i; + svm_main_region_t *mp; + uword *p; + svm_subregion_t *subp; + + /* Kill the region, last guy on his way out */ + + oldheap = svm_push_pvt_heap (root_rp); + name = vec_dup (rp->region_name); + + virtual_base = rp->virtual_base; + virtual_size = rp->virtual_size; + + /* Figure out which bits to clear in the root region bitmap */ + index = (virtual_base - root_rp->virtual_base) / MMAP_PAGESIZE; + + nbits = (virtual_size + MMAP_PAGESIZE - 1) / MMAP_PAGESIZE; + +#if CLIB_DEBUG > 1 + clib_warning ("clear %d bits at index %d", nbits, index); +#endif + /* Give back the allocated VM */ + for (i = 0; i < nbits; i++) + { + clib_bitmap_set_no_check (root_rp->bitmap, index + i, 0); + } + + mp = root_rp->data_base; + + p = hash_get_mem (mp->name_hash, name); + + /* Better never happen ... */ + if (p == NULL) + { + region_unlock (rp); + region_unlock (root_rp); + svm_pop_heap (oldheap); + clib_warning ("Region name '%s' not found?", name); + return; + } + + /* Remove from the root region subregion pool */ + subp = mp->subregions + p[0]; + pool_put (mp->subregions, subp); + + hash_unset_mem (mp->name_hash, name); + + vec_free (name); + + region_unlock (rp); + shm_unlink (rp->region_name); + munmap ((void *) virtual_base, virtual_size); + region_unlock (root_rp); + svm_pop_heap (oldheap); + return; + } + + region_unlock (rp); + region_unlock (root_rp); + + munmap ((void *) virtual_base, virtual_size); +} + +/* + * svm_region_exit + * There is no clean way to unlink the + * root region when all clients go away, + * so remove the pid entry and call it a day. + */ +void +svm_region_exit () +{ + void *oldheap; + int i, mypid = getpid (); + uword virtual_base, virtual_size; + + /* It felt so nice we did it twice... */ + if (root_rp == 0) + return; + + if (--root_rp_refcount > 0) + return; + + /* + * If we take a signal while holding one or more shared-memory + * mutexes, we may end up back here from an otherwise + * benign exit handler. Bail out to avoid a recursive + * mutex screw-up. + */ + if (nheld) + return; + + region_lock (root_rp, 7); + oldheap = svm_push_pvt_heap (root_rp); + + virtual_base = root_rp->virtual_base; + virtual_size = root_rp->virtual_size; + + for (i = 0; i < vec_len (root_rp->client_pids); i++) + { + if (root_rp->client_pids[i] == mypid) + { + vec_delete (root_rp->client_pids, 1, i); + goto found; + } + } + clib_warning ("pid %d AWOL", mypid); + +found: + + region_unlock (root_rp); + svm_pop_heap (oldheap); + + root_rp = 0; + munmap ((void *) virtual_base, virtual_size); +} + +void +svm_client_scan_this_region_nolock (svm_region_t * rp) +{ + int j; + int mypid = getpid (); + void *oldheap; + + for (j = 0; j < vec_len (rp->client_pids); j++) + { + if (mypid == rp->client_pids[j]) + continue; + if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0)) + { + clib_warning ("%s: cleanup ghost pid %d", + rp->region_name, rp->client_pids[j]); + /* nb: client vec in rp->region_heap */ + oldheap = svm_push_pvt_heap (rp); + vec_delete (rp->client_pids, 1, j); + j--; + svm_pop_heap (oldheap); + } + } +} + + +/* + * Scan svm regions for dead clients + */ +void +svm_client_scan (char *root_path) +{ + int i, j; + svm_main_region_t *mp; + svm_map_region_args_t *a = 0; + svm_region_t *root_rp; + svm_region_t *rp; + svm_subregion_t *subp; + u8 *name = 0; + u8 **svm_names = 0; + void *oldheap; + int mypid = getpid (); + + vec_validate (a, 0); + + svm_region_init_chroot (root_path); + + root_rp = svm_get_root_rp (); + + pthread_mutex_lock (&root_rp->mutex); + + mp = root_rp->data_base; + + for (j = 0; j < vec_len (root_rp->client_pids); j++) + { + if (mypid == root_rp->client_pids[j]) + continue; + if (root_rp->client_pids[j] && (kill (root_rp->client_pids[j], 0) < 0)) + { + clib_warning ("%s: cleanup ghost pid %d", + root_rp->region_name, root_rp->client_pids[j]); + /* nb: client vec in root_rp->region_heap */ + oldheap = svm_push_pvt_heap (root_rp); + vec_delete (root_rp->client_pids, 1, j); + j--; + svm_pop_heap (oldheap); + } + } + + /* + * Snapshoot names, can't hold root rp mutex across + * find_or_create. + */ + /* *INDENT-OFF* */ + pool_foreach (subp, mp->subregions, ({ + name = vec_dup (subp->subregion_name); + vec_add1(svm_names, name); + })); + /* *INDENT-ON* */ + + pthread_mutex_unlock (&root_rp->mutex); + + for (i = 0; i < vec_len (svm_names); i++) + { + vec_validate (a, 0); + a->root_path = root_path; + a->name = (char *) svm_names[i]; + rp = svm_region_find_or_create (a); + if (rp) + { + pthread_mutex_lock (&rp->mutex); + + svm_client_scan_this_region_nolock (rp); + + pthread_mutex_unlock (&rp->mutex); + svm_region_unmap (rp); + vec_free (svm_names[i]); + } + vec_free (a); + } + vec_free (svm_names); + + svm_region_exit (); + + vec_free (a); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm.h b/src/svm/svm.h new file mode 100644 index 00000000000..0b87dbcbc64 --- /dev/null +++ b/src/svm/svm.h @@ -0,0 +1,207 @@ +/* + *------------------------------------------------------------------ + * svm.h - shared VM allocation, mmap(...MAP_FIXED...) + * brain police + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __included_svm_h__ +#define __included_svm_h__ + +#include <pthread.h> +#include <vppinfra/clib.h> +#include <vppinfra/mem.h> + +#define MMAP_PAGESIZE (clib_mem_get_page_size()) + +#define SVM_VERSION ((1<<16) | 1) /* set to declare region ready. */ + +#define SVM_FLAGS_MHEAP (1<<0) /* region contains an mheap */ +#define SVM_FLAGS_FILE (1<<1) /* region backed by one or more files */ +#define SVM_FLAGS_NODATA (1<<2) /* region will be further subdivided */ +#define SVM_FLAGS_NEED_DATA_INIT (1<<3) + +#define SVM_PVT_MHEAP_SIZE (128<<10) /* region's private mheap (128k) */ + +typedef struct svm_region_ +{ + volatile uword version; + pthread_mutex_t mutex; + pthread_cond_t condvar; + int mutex_owner_pid; /* in case of trouble */ + int mutex_owner_tag; + uword flags; + uword virtual_base; /* base of the region object */ + uword virtual_size; + void *region_heap; + void *data_base; /* data portion base address */ + void *data_heap; /* data heap, if any */ + volatile void *user_ctx; /* user context pointer */ + /* stuff allocated in the region's heap */ + uword bitmap_size; /* nbits in virtual alloc bitmap */ + uword *bitmap; /* the bitmap */ + char *region_name; + char *backing_file; + char **filenames; + uword *client_pids; + /* pad */ + + /* next page: + * (64K) clib heap for the region itself + * + * data_base -> whatever is in this region + */ + +} svm_region_t; + +typedef struct svm_map_region_args_ +{ + char *root_path; /* NULL means use the truly global arena */ + char *name; + u64 baseva; + u64 size; + u64 pvt_heap_size; + uword flags; + char *backing_file; + uword backing_mmap_size; + /* uid, gid to own the svm region(s) */ + int uid; + int gid; +} svm_map_region_args_t; + + +/* + * Memory shared across all router instances. Packet buffers, etc + * Base should be "out of the way," and size should be big enough to + * cover everything we plan to put here. + */ +#define SVM_GLOBAL_REGION_BASEVA 0x30000000 +#define SVM_GLOBAL_REGION_SIZE (64<<20) +#define SVM_GLOBAL_REGION_NAME "/global_vm" + +/* + * Memory shared across individual router instances. + */ +#define SVM_OVERLAY_REGION_BASEVA \ + (SVM_GLOBAL_REGION_BASEVA + SVM_GLOBAL_REGION_SIZE) +#define SVM_OVERLAY_REGION_SIZE (1<<20) +#define SVM_OVERLAY_REGION_BASENAME "/overlay_vm" + +typedef struct +{ + u8 *subregion_name; +} svm_subregion_t; + +typedef struct +{ + svm_subregion_t *subregions; /* subregion pool */ + uword *name_hash; + u8 *root_path; +} svm_main_region_t; + + +void *svm_region_find_or_create (svm_map_region_args_t * a); +void svm_region_init (void); +void svm_region_init_chroot (char *root_path); +void svm_region_init_chroot_uid_gid (char *root_path, int uid, int gid); +void svm_region_init_args (svm_map_region_args_t * a); +void svm_region_exit (void); +void svm_region_unmap (void *rp_arg); +void svm_client_scan (char *root_path); +void svm_client_scan_this_region_nolock (svm_region_t * rp); +u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t * a); + +static inline void * +svm_mem_alloc (svm_region_t * rp, uword size) +{ + u8 *oldheap; + ASSERT (rp->flags & SVM_FLAGS_MHEAP); + u8 *rv; + + pthread_mutex_lock (&rp->mutex); + oldheap = clib_mem_set_heap (rp->data_heap); + rv = clib_mem_alloc (size); + clib_mem_set_heap (oldheap); + pthread_mutex_unlock (&rp->mutex); + return (rv); +} + +static inline void * +svm_mem_alloc_aligned_at_offset (svm_region_t * rp, + uword size, uword align, uword offset) +{ + u8 *oldheap; + ASSERT (rp->flags & SVM_FLAGS_MHEAP); + u8 *rv; + + pthread_mutex_lock (&rp->mutex); + oldheap = clib_mem_set_heap (rp->data_heap); + rv = clib_mem_alloc_aligned_at_offset (size, align, offset, + 1 /* yes, call os_out_of_memory */ ); + clib_mem_set_heap (oldheap); + pthread_mutex_unlock (&rp->mutex); + return (rv); +} + +static inline void +svm_mem_free (svm_region_t * rp, void *ptr) +{ + u8 *oldheap; + ASSERT (rp->flags & SVM_FLAGS_MHEAP); + + pthread_mutex_lock (&rp->mutex); + oldheap = clib_mem_set_heap (rp->data_heap); + clib_mem_free (ptr); + clib_mem_set_heap (oldheap); + pthread_mutex_unlock (&rp->mutex); + +} + +static inline void * +svm_push_pvt_heap (svm_region_t * rp) +{ + u8 *oldheap; + oldheap = clib_mem_set_heap (rp->region_heap); + return ((void *) oldheap); +} + +static inline void * +svm_push_data_heap (svm_region_t * rp) +{ + u8 *oldheap; + oldheap = clib_mem_set_heap (rp->data_heap); + return ((void *) oldheap); +} + +static inline void +svm_pop_heap (void *oldheap) +{ + clib_mem_set_heap (oldheap); +} + +u8 *format_svm_region (u8 * s, va_list * args); + +svm_region_t *svm_get_root_rp (void); + +#endif /* __included_svm_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_test.c b/src/svm/svm_test.c new file mode 100644 index 00000000000..ab0b9e248e6 --- /dev/null +++ b/src/svm/svm_test.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * svm_test.c -- brain police + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> + +#include "svm.h" + + +int +main (int argc, char **argv) +{ + svm_region_t *root_rp, *rp; + svm_map_region_args_t *a = 0; + + vec_validate (a, 0); + + root_rp = svm_region_init (); + + ASSERT (root_rp); + + a->name = "/qvnet"; + a->size = (4 << 10); + + rp = svm_region_find_or_create (root_rp, a); + + ASSERT (rp); + + *((u32 *) rp->data_base) = 0xdeadbeef; + svm_region_unmap (root_rp, rp); + + fformat (stdout, "exiting...\n"); + + exit (0); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svmdb.c b/src/svm/svmdb.c new file mode 100644 index 00000000000..03dfe7c33d3 --- /dev/null +++ b/src/svm/svmdb.c @@ -0,0 +1,671 @@ +/* + *------------------------------------------------------------------ + * svmdb.c -- simple shared memory database + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> +#include <vppinfra/serialize.h> + +#include "svmdb.h" + +static void local_set_variable_nolock (svmdb_client_t * client, + svmdb_namespace_t namespace, + u8 * var, u8 * val, u32 elsize); + +always_inline void +region_lock (svm_region_t * rp, int tag) +{ + pthread_mutex_lock (&rp->mutex); +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = getpid (); + rp->mutex_owner_tag = tag; +#endif +} + +always_inline void +region_unlock (svm_region_t * rp) +{ +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = 0; + rp->mutex_owner_tag = 0; +#endif + pthread_mutex_unlock (&rp->mutex); +} + +svmdb_client_t * +svmdb_map (svmdb_map_args_t * dba) +{ + svmdb_client_t *client = 0; + svm_map_region_args_t *a = 0; + svm_region_t *db_rp; + void *oldheap; + svmdb_shm_hdr_t *hp = 0; + + vec_validate (client, 0); + vec_validate (a, 0); + + svm_region_init_chroot_uid_gid (dba->root_path, dba->uid, dba->gid); + + a->root_path = dba->root_path; + a->name = "/db"; + a->size = dba->size ? dba->size : SVMDB_DEFAULT_SIZE; + a->flags = SVM_FLAGS_MHEAP; + a->uid = dba->uid; + a->gid = dba->gid; + + db_rp = client->db_rp = svm_region_find_or_create (a); + + ASSERT (db_rp); + + vec_free (a); + + region_lock (client->db_rp, 10); + /* Has someone else set up the shared-memory variable table? */ + if (db_rp->user_ctx) + { + client->shm = (void *) db_rp->user_ctx; + client->pid = getpid (); + region_unlock (client->db_rp); + ASSERT (client->shm->version == SVMDB_SHM_VERSION); + return (client); + } + /* Nope, it's our problem... */ + + /* Add a bogus client (pid=0) so the svm won't be deallocated */ + oldheap = svm_push_pvt_heap (db_rp); + vec_add1 (client->db_rp->client_pids, 0); + svm_pop_heap (oldheap); + + oldheap = svm_push_data_heap (db_rp); + + vec_validate (hp, 0); + hp->version = SVMDB_SHM_VERSION; + hp->namespaces[SVMDB_NAMESPACE_STRING] + = hash_create_string (0, sizeof (uword)); + hp->namespaces[SVMDB_NAMESPACE_VEC] + = hash_create_string (0, sizeof (uword)); + + db_rp->user_ctx = hp; + client->shm = hp; + + svm_pop_heap (oldheap); + region_unlock (client->db_rp); + client->pid = getpid (); + + return (client); +} + +void +svmdb_unmap (svmdb_client_t * client) +{ + ASSERT (client); + + if (!svm_get_root_rp ()) + return; + + svm_region_unmap ((void *) client->db_rp); + svm_region_exit (); + vec_free (client); +} + +static void +notify_value (svmdb_value_t * v, svmdb_action_t a) +{ + int i; + int rv; + union sigval sv; + u32 value; + u32 *dead_registrations = 0; + + svmdb_notify_t *np; + + for (i = 0; i < vec_len (v->notifications); i++) + { + np = vec_elt_at_index (v->notifications, i); + if (np->action == a) + { + value = (np->action << 28) | (np->opaque); + sv.sival_ptr = (void *) (uword) value; + do + { + rv = 0; + if (sigqueue (np->pid, np->signum, sv) == 0) + break; + rv = errno; + } + while (rv == EAGAIN); + if (rv == 0) + continue; + vec_add1 (dead_registrations, i); + } + } + + for (i = 0; i < vec_len (dead_registrations); i++) + { + np = vec_elt_at_index (v->notifications, dead_registrations[i]); + clib_warning ("dead reg pid %d sig %d action %d opaque %x", + np->pid, np->signum, np->action, np->opaque); + vec_delete (v->notifications, 1, dead_registrations[i]); + } + vec_free (dead_registrations); +} + +int +svmdb_local_add_del_notification (svmdb_client_t * client, + svmdb_notification_args_t * a) +{ + uword *h; + void *oldheap; + hash_pair_t *hp; + svmdb_shm_hdr_t *shm; + u8 *dummy_value = 0; + svmdb_value_t *value; + svmdb_notify_t *np; + int i; + int rv = 0; + + ASSERT (a->elsize); + + region_lock (client->db_rp, 18); + shm = client->shm; + oldheap = svm_push_data_heap (client->db_rp); + + h = shm->namespaces[a->nspace]; + + hp = hash_get_pair_mem (h, a->var); + if (hp == 0) + { + local_set_variable_nolock (client, a->nspace, (u8 *) a->var, + dummy_value, a->elsize); + /* might have moved */ + h = shm->namespaces[a->nspace]; + hp = hash_get_pair_mem (h, a->var); + ASSERT (hp); + } + + value = pool_elt_at_index (shm->values, hp->value[0]); + + for (i = 0; i < vec_len (value->notifications); i++) + { + np = vec_elt_at_index (value->notifications, i); + if ((np->pid == client->pid) + && (np->signum == a->signum) + && (np->action == a->action) && (np->opaque == a->opaque)) + { + if (a->add_del == 0 /* delete */ ) + { + vec_delete (value->notifications, 1, i); + goto out; + } + else + { /* add */ + clib_warning + ("%s: ignore dup reg pid %d signum %d action %d opaque %x", + a->var, client->pid, a->signum, a->action, a->opaque); + rv = -2; + goto out; + } + } + } + if (a->add_del == 0) + { + rv = -3; + goto out; + } + + vec_add2 (value->notifications, np, 1); + np->pid = client->pid; + np->signum = a->signum; + np->action = a->action; + np->opaque = a->opaque; + +out: + svm_pop_heap (oldheap); + region_unlock (client->db_rp); + return rv; +} + + +static void +local_unset_variable_nolock (svmdb_client_t * client, + svmdb_namespace_t namespace, char *var) +{ + uword *h; + svmdb_value_t *oldvalue; + hash_pair_t *hp; + + h = client->shm->namespaces[namespace]; + hp = hash_get_pair_mem (h, var); + if (hp) + { + oldvalue = pool_elt_at_index (client->shm->values, hp->value[0]); + if (vec_len (oldvalue->notifications)) + notify_value (oldvalue, SVMDB_ACTION_UNSET); + /* zero length value means unset */ + _vec_len (oldvalue->value) = 0; + } + client->shm->namespaces[namespace] = h; +} + +void +svmdb_local_unset_string_variable (svmdb_client_t * client, char *var) +{ + void *oldheap; + + region_lock (client->db_rp, 11); + oldheap = svm_push_data_heap (client->db_rp); + local_unset_variable_nolock (client, SVMDB_NAMESPACE_STRING, var); + svm_pop_heap (oldheap); + region_unlock (client->db_rp); +} + +static void +local_set_variable_nolock (svmdb_client_t * client, + svmdb_namespace_t namespace, + u8 * var, u8 * val, u32 elsize) +{ + uword *h; + hash_pair_t *hp; + u8 *name; + svmdb_shm_hdr_t *shm; + + shm = client->shm; + h = shm->namespaces[namespace]; + hp = hash_get_pair_mem (h, var); + if (hp) + { + svmdb_value_t *oldvalue; + oldvalue = pool_elt_at_index (client->shm->values, hp->value[0]); + vec_alloc (oldvalue->value, vec_len (val) * elsize); + clib_memcpy (oldvalue->value, val, vec_len (val) * elsize); + _vec_len (oldvalue->value) = vec_len (val); + notify_value (oldvalue, SVMDB_ACTION_SET); + } + else + { + svmdb_value_t *newvalue; + pool_get (shm->values, newvalue); + memset (newvalue, 0, sizeof (*newvalue)); + newvalue->elsize = elsize; + vec_alloc (newvalue->value, vec_len (val) * elsize); + clib_memcpy (newvalue->value, val, vec_len (val) * elsize); + _vec_len (newvalue->value) = vec_len (val); + name = format (0, "%s%c", var, 0); + hash_set_mem (h, name, newvalue - shm->values); + } + shm->namespaces[namespace] = h; +} + +void +svmdb_local_set_string_variable (svmdb_client_t * client, + char *var, char *val) +{ + void *oldheap; + + region_lock (client->db_rp, 12); + oldheap = svm_push_data_heap (client->db_rp); + + local_unset_variable_nolock (client, SVMDB_NAMESPACE_STRING, var); + + local_set_variable_nolock (client, SVMDB_NAMESPACE_STRING, + (u8 *) var, (u8 *) val, 1 /* elsize */ ); + svm_pop_heap (oldheap); + region_unlock (client->db_rp); +} + +static u8 * +local_get_variable_nolock (svmdb_client_t * client, + svmdb_namespace_t namespace, u8 * var) +{ + uword *h; + uword *p; + svmdb_shm_hdr_t *shm; + svmdb_value_t *oldvalue; + + shm = client->shm; + h = shm->namespaces[namespace]; + p = hash_get_mem (h, var); + if (p) + { + oldvalue = pool_elt_at_index (shm->values, p[0]); + notify_value (oldvalue, SVMDB_ACTION_GET); + return (oldvalue->value); + } + return 0; +} + +void * +svmdb_local_get_variable_reference (svmdb_client_t * client, + svmdb_namespace_t namespace, char *var) +{ + u8 *rv; + + region_lock (client->db_rp, 19); + rv = local_get_variable_nolock (client, namespace, (u8 *) var); + region_unlock (client->db_rp); + return (void *) rv; +} + +char * +svmdb_local_get_string_variable (svmdb_client_t * client, char *var) +{ + u8 *rv = 0; + + region_lock (client->db_rp, 13); + rv = local_get_variable_nolock (client, SVMDB_NAMESPACE_STRING, (u8 *) var); + + if (rv && vec_len (rv)) + { + rv = format (0, "%s", rv); + vec_add1 (rv, 0); + } + region_unlock (client->db_rp); + return ((char *) rv); +} + +void +svmdb_local_dump_strings (svmdb_client_t * client) +{ + uword *h; + u8 *key; + u32 value; + svmdb_shm_hdr_t *shm = client->shm; + + region_lock (client->db_rp, 14); + + h = client->shm->namespaces[SVMDB_NAMESPACE_STRING]; + + /* *INDENT-OFF* */ + hash_foreach_mem(key, value, h, + ({ + svmdb_value_t *v = pool_elt_at_index (shm->values, value); + + fformat(stdout, "%s: %s\n", key, + vec_len(v->value) ? v->value : (u8 *)"(nil)"); + })); + /* *INDENT-ON* */ + region_unlock (client->db_rp); +} + +int +svmdb_local_serialize_strings (svmdb_client_t * client, char *filename) +{ + uword *h; + u8 *key; + u32 value; + svmdb_shm_hdr_t *shm = client->shm; + serialize_main_t _sm, *sm = &_sm; + clib_error_t *error = 0; + u8 *sanitized_name = 0; + int fd = 0; + + if (strstr (filename, "..") || index (filename, '/')) + { + error = clib_error_return (0, "Illegal characters in filename '%s'", + filename); + goto out; + } + + sanitized_name = format (0, "/tmp/%s%c", filename, 0); + + fd = creat ((char *) sanitized_name, 0644); + + if (fd < 0) + { + error = clib_error_return_unix (0, "Create '%s'", sanitized_name); + goto out; + } + + serialize_open_unix_file_descriptor (sm, fd); + + region_lock (client->db_rp, 20); + + h = client->shm->namespaces[SVMDB_NAMESPACE_STRING]; + + serialize_likely_small_unsigned_integer (sm, hash_elts (h)); + + /* *INDENT-OFF* */ + hash_foreach_mem(key, value, h, + ({ + svmdb_value_t *v = pool_elt_at_index (shm->values, value); + + /* Omit names with nil values */ + if (vec_len(v->value)) + { + serialize_cstring (sm, (char *)key); + serialize_cstring (sm, (char *)v->value); + } + })); + /* *INDENT-ON* */ + region_unlock (client->db_rp); + + serialize_close (sm); + +out: + if (fd > 0 && close (fd) < 0) + error = clib_error_return_unix (0, "close fd %d", fd); + + if (error) + { + clib_error_report (error); + return -1; + } + return 0; +} + +int +svmdb_local_unserialize_strings (svmdb_client_t * client, char *filename) +{ + serialize_main_t _sm, *sm = &_sm; + void *oldheap; + clib_error_t *error = 0; + u8 *key, *value; + int fd = 0; + u32 nelts; + int i; + + fd = open (filename, O_RDONLY); + + if (fd < 0) + { + error = clib_error_return_unix (0, "Failed to open '%s'", filename); + goto out; + } + + unserialize_open_unix_file_descriptor (sm, fd); + + region_lock (client->db_rp, 21); + oldheap = svm_push_data_heap (client->db_rp); + + nelts = unserialize_likely_small_unsigned_integer (sm); + + for (i = 0; i < nelts; i++) + { + unserialize_cstring (sm, (char **) &key); + unserialize_cstring (sm, (char **) &value); + local_set_variable_nolock (client, SVMDB_NAMESPACE_STRING, + key, value, 1 /* elsize */ ); + vec_free (key); + vec_free (value); + } + svm_pop_heap (oldheap); + region_unlock (client->db_rp); + + serialize_close (sm); + +out: + if (fd > 0 && close (fd) < 0) + error = clib_error_return_unix (0, "close fd %d", fd); + + if (error) + { + clib_error_report (error); + return -1; + } + return 0; +} + +void +svmdb_local_unset_vec_variable (svmdb_client_t * client, char *var) +{ + void *oldheap; + + region_lock (client->db_rp, 15); + oldheap = svm_push_data_heap (client->db_rp); + local_unset_variable_nolock (client, SVMDB_NAMESPACE_VEC, var); + svm_pop_heap (oldheap); + region_unlock (client->db_rp); +} + +void +svmdb_local_set_vec_variable (svmdb_client_t * client, + char *var, void *val_arg, u32 elsize) +{ + u8 *val = (u8 *) val_arg; + void *oldheap; + + region_lock (client->db_rp, 16); + oldheap = svm_push_data_heap (client->db_rp); + + local_unset_variable_nolock (client, SVMDB_NAMESPACE_VEC, var); + local_set_variable_nolock (client, SVMDB_NAMESPACE_VEC, (u8 *) var, + val, elsize); + + svm_pop_heap (oldheap); + region_unlock (client->db_rp); +} + +void * +svmdb_local_get_vec_variable (svmdb_client_t * client, char *var, u32 elsize) +{ + u8 *rv = 0; + u8 *copy = 0; + + region_lock (client->db_rp, 17); + + rv = local_get_variable_nolock (client, SVMDB_NAMESPACE_VEC, (u8 *) var); + + if (rv && vec_len (rv)) + { + /* Make a copy in process-local memory */ + vec_alloc (copy, vec_len (rv) * elsize); + clib_memcpy (copy, rv, vec_len (rv) * elsize); + _vec_len (copy) = vec_len (rv); + region_unlock (client->db_rp); + return (copy); + } + region_unlock (client->db_rp); + return (0); +} + +void +svmdb_local_dump_vecs (svmdb_client_t * client) +{ + uword *h; + u8 *key; + u32 value; + svmdb_shm_hdr_t *shm; + + region_lock (client->db_rp, 17); + shm = client->shm; + + h = client->shm->namespaces[SVMDB_NAMESPACE_VEC]; + + /* *INDENT-OFF* */ + hash_foreach_mem(key, value, h, + ({ + svmdb_value_t *v = pool_elt_at_index (shm->values, value); + (void) fformat(stdout, "%s:\n %U (%.2f)\n", key, + format_hex_bytes, v->value, + vec_len(v->value)*v->elsize, ((f64 *)(v->value))[0]); + })); + /* *INDENT-ON* */ + + region_unlock (client->db_rp); +} + +void * +svmdb_local_find_or_add_vec_variable (svmdb_client_t * client, + char *var, u32 nbytes) +{ + void *oldheap; + u8 *rv = 0; + + region_lock (client->db_rp, 18); + oldheap = svm_push_data_heap (client->db_rp); + + rv = local_get_variable_nolock (client, SVMDB_NAMESPACE_VEC, (u8 *) var); + + if (rv) + { + goto out; + } + else + { + uword *h; + u8 *name; + svmdb_shm_hdr_t *shm; + svmdb_value_t *newvalue; + + shm = client->shm; + h = shm->namespaces[SVMDB_NAMESPACE_VEC]; + + pool_get (shm->values, newvalue); + memset (newvalue, 0, sizeof (*newvalue)); + newvalue->elsize = 1; + vec_alloc (newvalue->value, nbytes); + _vec_len (newvalue->value) = nbytes; + name = format (0, "%s%c", var, 0); + hash_set_mem (h, name, newvalue - shm->values); + shm->namespaces[SVMDB_NAMESPACE_VEC] = h; + rv = newvalue->value; + } + +out: + svm_pop_heap (oldheap); + region_unlock (client->db_rp); + return (rv); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svmdb.h b/src/svm/svmdb.h new file mode 100644 index 00000000000..e02628a0bb9 --- /dev/null +++ b/src/svm/svmdb.h @@ -0,0 +1,135 @@ +/* + *------------------------------------------------------------------ + * svmdb.h - shared VM database + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __included_svmdb_h__ +#define __included_svmdb_h__ + +#include "svm.h" + +typedef enum +{ + SVMDB_ACTION_ILLEGAL = 0, + SVMDB_ACTION_GET, /* not clear why anyone would care */ + SVMDB_ACTION_SET, + SVMDB_ACTION_UNSET, +} svmdb_action_t; + +typedef struct +{ + int pid; + int signum; + u32 action:4; + u32 opaque:28; +} svmdb_notify_t; + +typedef struct +{ + u8 *value; + svmdb_notify_t *notifications; + u32 elsize; +} svmdb_value_t; + +typedef enum +{ + SVMDB_NAMESPACE_STRING = 0, + SVMDB_NAMESPACE_VEC, + SVMDB_N_NAMESPACES, +} svmdb_namespace_t; + +typedef struct +{ + uword version; + /* pool of values */ + svmdb_value_t *values; + uword *namespaces[SVMDB_N_NAMESPACES]; +} svmdb_shm_hdr_t; + +#define SVMDB_SHM_VERSION 2 + +typedef struct +{ + int flags; + int pid; + svm_region_t *db_rp; + svmdb_shm_hdr_t *shm; +} svmdb_client_t; + +typedef struct +{ + int add_del; + svmdb_namespace_t nspace; + char *var; + u32 elsize; + int signum; + u32 action:4; + u32 opaque:28; +} svmdb_notification_args_t; + +typedef struct +{ + char *root_path; + uword size; + u32 uid; + u32 gid; +} svmdb_map_args_t; + +/* + * Must be a reasonable number, several mb smaller than + * SVM_GLOBAL_REGION_SIZE, or no donut for you... + */ +#define SVMDB_DEFAULT_SIZE (4<<20) + +svmdb_client_t *svmdb_map (svmdb_map_args_t *); + +void svmdb_unmap (svmdb_client_t * client); +void svmdb_local_unset_string_variable (svmdb_client_t * client, char *var); +void svmdb_local_set_string_variable (svmdb_client_t * client, + char *var, char *val); +char *svmdb_local_get_string_variable (svmdb_client_t * client, char *var); +void *svmdb_local_get_variable_reference (svmdb_client_t * client, + svmdb_namespace_t ns, char *var); + +void svmdb_local_dump_strings (svmdb_client_t * client); + +void svmdb_local_unset_vec_variable (svmdb_client_t * client, char *var); +void svmdb_local_set_vec_variable (svmdb_client_t * client, + char *var, void *val, u32 elsize); +void *svmdb_local_get_vec_variable (svmdb_client_t * client, char *var, + u32 elsize); +void svmdb_local_dump_vecs (svmdb_client_t * client); + +int svmdb_local_add_del_notification (svmdb_client_t * client, + svmdb_notification_args_t * args); + +void *svmdb_local_find_or_add_vec_variable (svmdb_client_t * client, + char *var, u32 nbytes); + +int svmdb_local_serialize_strings (svmdb_client_t * client, char *filename); +int svmdb_local_unserialize_strings (svmdb_client_t * client, char *filename); + + +#endif /* __included_svmdb_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svmdbtool.c b/src/svm/svmdbtool.c new file mode 100644 index 00000000000..a0af15fcbbf --- /dev/null +++ b/src/svm/svmdbtool.c @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <pwd.h> +#include <grp.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> +#include <vppinfra/serialize.h> +#include "svmdb.h" + +typedef struct +{ + svmdb_map_args_t map_args; + int uid, gid; + uword size; +} svmdbtool_main_t; + +svmdbtool_main_t svmdbtool_main; + +static inline svmdb_map_args_t * +map_arg_setup (char *chroot_path) +{ + svmdbtool_main_t *sm = &svmdbtool_main; + svmdb_map_args_t *ma = &sm->map_args; + + memset (ma, 0, sizeof (*ma)); + ma->root_path = chroot_path; + ma->size = sm->size; + ma->uid = sm->uid; + ma->gid = sm->gid; + return ma; +} + +static void +get_string (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + char *rv; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + rv = svmdb_local_get_string_variable (c, (char *) vbl); + + fformat (stdout, "%s\n", rv ? rv : "UNSET"); + vec_free (rv); + svmdb_unmap (c); +} + +static void +set_string (char *chroot_path, u8 * vbl, u8 * value) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + svmdb_local_set_string_variable (c, (char *) vbl, (char *) value); + svmdb_unmap (c); +} + +static void +unset_string (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + svmdb_local_unset_string_variable (c, (char *) vbl); + svmdb_unmap (c); +} + +static void +dump_strings (char *chroot_path) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + svmdb_local_dump_strings (c); + svmdb_unmap (c); +} + +static void +serialize_strings (char *chroot_path, char *filename) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + (void) svmdb_local_serialize_strings (c, filename); + svmdb_unmap (c); +} + +static void +unserialize_strings (char *chroot_path, char *filename) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + (void) svmdb_local_unserialize_strings (c, filename); + svmdb_unmap (c); +} + +static void +test_vlib_vec_rate (char *chroot_path, f64 vr) +{ + svmdb_client_t *c; + f64 *tv = 0; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + vec_add1 (tv, vr); + + svmdb_local_set_vec_variable (c, "vlib_vector_rate", (char *) tv, + sizeof (*tv)); + svmdb_unmap (c); + + vec_free (tv); +} + + + +static void +test_vec (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + u64 *tv = 0; + int i; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + /* my amp goes to 11 */ + for (i = 0; i < 11; i++) + { + vec_add1 (tv, i); + } + + svmdb_local_set_vec_variable (c, (char *) vbl, (char *) tv, sizeof (tv[0])); + svmdb_unmap (c); + + vec_free (tv); +} + +static void +fake_install (char *chroot_path, u8 * add_value) +{ + svmdb_client_t *c; + u8 *v = 0; + u8 **values = 0; + u8 *oldvalue; + u8 *value; + int nitems = 0, i; + serialize_main_t m; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + oldvalue = svmdb_local_get_vec_variable (c, "installed_sw", 1); + if (oldvalue) + { + unserialize_open_data (&m, oldvalue, vec_len (oldvalue)); + nitems = unserialize_likely_small_unsigned_integer (&m); + for (i = 0; i < nitems; i++) + { + unserialize_cstring (&m, (char **) &value); + vec_add1 (values, value); + } + vec_free (v); + } + nitems++; + value = format (0, "%s%c", add_value, 0); + + vec_add1 (values, value); + + fformat (stdout, "Resulting installed_sw vector:\n"); + + serialize_open_vector (&m, v); + serialize_likely_small_unsigned_integer (&m, vec_len (values)); + for (i = 0; i < vec_len (values); i++) + { + fformat (stdout, "%s\n", values[i]); + serialize_cstring (&m, (char *) values[i]); + } + + v = serialize_close_vector (&m); + + svmdb_local_set_vec_variable (c, "installed_sw", v, sizeof (v[0])); + svmdb_unmap (c); + + for (i = 0; i < vec_len (values); i++) + vec_free (values[i]); + vec_free (values); +} + +static void +sigaction_handler (int signum, siginfo_t * i, void *notused) +{ + u32 action, opaque; + + action = (u32) (uword) i->si_ptr; + action >>= 28; + opaque = (u32) (uword) i->si_ptr; + opaque &= ~(0xF0000000); + + clib_warning ("signal %d, action %d, opaque %x", signum, action, opaque); +} + +static void +test_reg (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + svmdb_notification_args_t args; + svmdb_notification_args_t *a = &args; + struct sigaction sa; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + memset (&sa, 0, sizeof (sa)); + sa.sa_sigaction = sigaction_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction (SIGUSR2, &sa, 0) < 0) + { + clib_unix_warning ("sigaction"); + return; + } + + memset (a, 0, sizeof (*a)); + + c = svmdb_map (ma); + + a->add_del = 1 /* add */ ; + a->nspace = SVMDB_NAMESPACE_STRING; + a->var = (char *) vbl; + a->elsize = 1; + a->signum = SIGUSR2; + a->action = SVMDB_ACTION_GET; + a->opaque = 0x0eadbeef; + + svmdb_local_add_del_notification (c, a); + + (void) svmdb_local_get_string_variable (c, (char *) vbl); + + a->add_del = 0; /* del */ + svmdb_local_add_del_notification (c, a); + + + + svmdb_unmap (c); +} + +static void +unset_vec (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + svmdb_local_unset_vec_variable (c, (char *) vbl); + svmdb_unmap (c); +} + +static void +dump_vecs (char *chroot_path) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + svmdb_local_dump_vecs (c); + svmdb_unmap (c); +} + +static void +crash_test (char *chroot_path) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + clib_warning ("Grab region mutex and crash deliberately!"); + c->db_rp->mutex_owner_pid = getpid (); + c->db_rp->mutex_owner_tag = -13; + pthread_mutex_lock (&c->db_rp->mutex); + + abort (); +} + +static void +map_with_size (char *chroot_path, uword size) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + svmdbtool_main.size = size; + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + svmdb_unmap (c); +} + +int +main (int argc, char **argv) +{ + unformat_input_t input; + int parsed = 0; + u8 *vbl = 0, *value = 0; + char *chroot_path = 0; + u8 *chroot_path_u8; + u8 *filename; + uword size; + f64 vr; + int uid, gid, rv; + struct passwd _pw, *pw; + struct group _grp, *grp; + char *s, buf[128]; + + svmdbtool_main.uid = geteuid (); + svmdbtool_main.gid = getegid (); + + unformat_init_command_line (&input, argv); + + while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&input, "get-string %s", &vbl)) + { + get_string (chroot_path, vbl); + vec_free (vbl); + parsed++; + } + else if (unformat (&input, "set-string %s %s", &vbl, &value)) + { + set_string (chroot_path, vbl, value); + vec_free (vbl); + vec_free (value); + parsed++; + } + else if (unformat (&input, "unset-string %s", &vbl)) + { + unset_string (chroot_path, vbl); + vec_free (vbl); + parsed++; + } + else if (unformat (&input, "dump-strings")) + { + dump_strings (chroot_path); + parsed++; + } + else if (unformat (&input, "unset-vec %s", &vbl)) + { + unset_vec (chroot_path, vbl); + vec_free (vbl); + parsed++; + } + else if (unformat (&input, "dump-vecs")) + { + dump_vecs (chroot_path); + parsed++; + } + else if (unformat (&input, "test-vec %s", &vbl)) + { + test_vec (chroot_path, vbl); + // vec_free(vbl); + parsed++; + } + else if (unformat (&input, "vlib-vec-rate %f", &vr)) + { + test_vlib_vec_rate (chroot_path, vr); + parsed++; + } + else if (unformat (&input, "test-reg %s", &vbl)) + { + test_reg (chroot_path, vbl); + parsed++; + } + else if (unformat (&input, "crash-test")) + { + crash_test (chroot_path); + } + else if (unformat (&input, "chroot %s", &chroot_path_u8)) + { + chroot_path = (char *) chroot_path_u8; + } + else if (unformat (&input, "fake-install %s", &value)) + { + fake_install (chroot_path, value); + parsed++; + } + else if (unformat (&input, "size %d", &size)) + { + map_with_size (chroot_path, size); + parsed++; + } + else if (unformat (&input, "uid %d", &uid)) + svmdbtool_main.uid = uid; + else if (unformat (&input, "gid %d", &gid)) + svmdbtool_main.gid = gid; + else if (unformat (&input, "uid %s", &s)) + { + /* lookup the username */ + pw = NULL; + rv = getpwnam_r (s, &_pw, buf, sizeof (buf), &pw); + if (rv < 0) + { + fformat (stderr, "cannot fetch username %s", s); + exit (1); + } + if (pw == NULL) + { + fformat (stderr, "username %s does not exist", s); + exit (1); + } + vec_free (s); + svmdbtool_main.uid = pw->pw_uid; + } + else if (unformat (&input, "gid %s", &s)) + { + /* lookup the group name */ + grp = NULL; + rv = getgrnam_r (s, &_grp, buf, sizeof (buf), &grp); + if (rv != 0) + { + fformat (stderr, "cannot fetch group %s", s); + exit (1); + } + if (grp == NULL) + { + fformat (stderr, "group %s does not exist", s); + exit (1); + } + vec_free (s); + svmdbtool_main.gid = grp->gr_gid; + } + else if (unformat (&input, "serialize-strings %s", &filename)) + { + vec_add1 (filename, 0); + serialize_strings (chroot_path, (char *) filename); + parsed++; + } + else if (unformat (&input, "unserialize-strings %s", &filename)) + { + vec_add1 (filename, 0); + unserialize_strings (chroot_path, (char *) filename); + parsed++; + } + else + { + break; + } + } + + unformat_free (&input); + + if (!parsed) + { + fformat (stdout, "%s: get-string <name> | set-string <name> <value>\n", + argv[0]); + fformat (stdout, " unset-string <name> | dump-strings\n"); + fformat (stdout, " test-vec <name> |\n"); + fformat (stdout, " unset-vec <name> | dump-vecs\n"); + fformat (stdout, " chroot <prefix> [uid <nnn-or-userid>]\n"); + fformat (stdout, " [gid <nnn-or-group-name>]\n"); + } + + exit (0); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svmtool.c b/src/svm/svmtool.c new file mode 100644 index 00000000000..b319551425c --- /dev/null +++ b/src/svm/svmtool.c @@ -0,0 +1,528 @@ +/* + *------------------------------------------------------------------ + * svmtool.c + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> + +#include "svm.h" + + + +/* + * format_all_svm_regions + * Maps / unmaps regions. Do NOT call from client code! + */ +u8 * +format_all_svm_regions (u8 * s, va_list * args) +{ + int verbose = va_arg (*args, int); + svm_region_t *root_rp = svm_get_root_rp (); + svm_main_region_t *mp; + svm_subregion_t *subp; + svm_region_t *rp; + svm_map_region_args_t *a = 0; + u8 **svm_names = 0; + u8 *name = 0; + int i; + + ASSERT (root_rp); + + pthread_mutex_lock (&root_rp->mutex); + + s = format (s, "%U", format_svm_region, root_rp, verbose); + + mp = root_rp->data_base; + + /* + * Snapshoot names, can't hold root rp mutex across + * find_or_create. + */ + /* *INDENT-OFF* */ + pool_foreach (subp, mp->subregions, ({ + name = vec_dup (subp->subregion_name); + vec_add1(svm_names, name); + })); + /* *INDENT-ON* */ + + pthread_mutex_unlock (&root_rp->mutex); + + for (i = 0; i < vec_len (svm_names); i++) + { + vec_validate (a, 0); + a->name = (char *) svm_names[i]; + rp = svm_region_find_or_create (a); + if (rp) + { + pthread_mutex_lock (&rp->mutex); + s = format (s, "%U", format_svm_region, rp, verbose); + pthread_mutex_unlock (&rp->mutex); + svm_region_unmap (rp); + vec_free (svm_names[i]); + } + vec_free (a); + } + vec_free (svm_names); + return (s); +} + +void +show (char *chroot_path, int verbose) +{ + svm_map_region_args_t *a = 0; + + vec_validate (a, 0); + + svm_region_init_chroot (chroot_path); + + fformat (stdout, "My pid is %d\n", getpid ()); + + fformat (stdout, "%U", format_all_svm_regions, verbose); + + svm_region_exit (); + + vec_free (a); +} + + +static void * +svm_map_region_nolock (svm_map_region_args_t * a) +{ + int svm_fd; + svm_region_t *rp; + int deadman = 0; + u8 *shm_name; + + ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size); + + shm_name = shm_name_from_svm_map_region_args (a); + + svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777); + + if (svm_fd < 0) + { + perror ("svm_region_map(mmap open)"); + return (0); + } + vec_free (shm_name); + + rp = mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0); + + if (rp == (svm_region_t *) MAP_FAILED) + { + close (svm_fd); + clib_warning ("mmap"); + return (0); + } + /* + * We lost the footrace to create this region; make sure + * the winner has crossed the finish line. + */ + while (rp->version == 0 && deadman++ < 5) + { + sleep (1); + } + + /* + * <bleep>-ed? + */ + if (rp->version == 0) + { + clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION); + munmap (rp, MMAP_PAGESIZE); + return (0); + } + /* Remap now that the region has been placed */ + a->baseva = rp->virtual_base; + a->size = rp->virtual_size; + munmap (rp, MMAP_PAGESIZE); + + rp = (void *) mmap ((void *) a->baseva, a->size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, svm_fd, 0); + if ((uword) rp == (uword) MAP_FAILED) + { + clib_unix_warning ("mmap"); + return (0); + } + + if ((uword) rp != rp->virtual_base) + { + clib_warning ("mmap botch"); + } + + if (pthread_mutex_trylock (&rp->mutex)) + { + clib_warning ("rp->mutex LOCKED by pid %d, tag %d, cleared...", + rp->mutex_owner_pid, rp->mutex_owner_tag); + memset (&rp->mutex, 0, sizeof (rp->mutex)); + + } + else + { + clib_warning ("mutex OK...\n"); + pthread_mutex_unlock (&rp->mutex); + } + + return ((void *) rp); +} + +/* + * rnd_pagesize + * Round to a pagesize multiple, presumably 4k works + */ +static u64 +rnd_pagesize (u64 size) +{ + u64 rv; + + rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1); + return (rv); +} + +#define MUTEX_DEBUG + +always_inline void +region_lock (svm_region_t * rp, int tag) +{ + pthread_mutex_lock (&rp->mutex); +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = getpid (); + rp->mutex_owner_tag = tag; +#endif +} + +always_inline void +region_unlock (svm_region_t * rp) +{ +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = 0; + rp->mutex_owner_tag = 0; +#endif + pthread_mutex_unlock (&rp->mutex); +} + + +static void * +svm_existing_region_map_nolock (void *root_arg, svm_map_region_args_t * a) +{ + svm_region_t *root_rp = root_arg; + svm_main_region_t *mp; + svm_region_t *rp; + void *oldheap; + uword *p; + + a->size += MMAP_PAGESIZE + + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); + a->size = rnd_pagesize (a->size); + + region_lock (root_rp, 4); + oldheap = svm_push_pvt_heap (root_rp); + mp = root_rp->data_base; + + ASSERT (mp); + + p = hash_get_mem (mp->name_hash, a->name); + + if (p) + { + rp = svm_map_region_nolock (a); + region_unlock (root_rp); + svm_pop_heap (oldheap); + return rp; + } + return 0; + +} + +static void +trace (char *chroot_path, char *name, int enable_disable) +{ + svm_map_region_args_t *a = 0; + svm_region_t *db_rp; + void *oldheap; + + vec_validate (a, 0); + + svm_region_init_chroot (chroot_path); + + a->name = name; + a->size = 1 << 20; + a->flags = SVM_FLAGS_MHEAP; + + db_rp = svm_region_find_or_create (a); + + ASSERT (db_rp); + + region_lock (db_rp, 20); + + oldheap = svm_push_data_heap (db_rp); + + mheap_trace (db_rp->data_heap, enable_disable); + + svm_pop_heap (oldheap); + region_unlock (db_rp); + + svm_region_unmap ((void *) db_rp); + svm_region_exit (); + vec_free (a); +} + + + +static void +subregion_repair (char *chroot_path) +{ + int i; + svm_main_region_t *mp; + svm_map_region_args_t a; + svm_region_t *root_rp; + svm_region_t *rp; + svm_subregion_t *subp; + u8 *name = 0; + u8 **svm_names = 0; + + svm_region_init_chroot (chroot_path); + root_rp = svm_get_root_rp (); + + pthread_mutex_lock (&root_rp->mutex); + + mp = root_rp->data_base; + + /* + * Snapshoot names, can't hold root rp mutex across + * find_or_create. + */ + /* *INDENT-OFF* */ + pool_foreach (subp, mp->subregions, ({ + name = vec_dup (subp->subregion_name); + vec_add1(svm_names, name); + })); + /* *INDENT-ON* */ + + pthread_mutex_unlock (&root_rp->mutex); + + for (i = 0; i < vec_len (svm_names); i++) + { + memset (&a, 0, sizeof (a)); + a.root_path = chroot_path; + a.name = (char *) svm_names[i]; + fformat (stdout, "Checking %s region...\n", a.name); + rp = svm_existing_region_map_nolock (root_rp, &a); + if (rp) + { + svm_region_unmap (rp); + vec_free (svm_names[i]); + } + } + vec_free (svm_names); +} + +void +repair (char *chroot_path, int crash_root_region) +{ + svm_region_t *root_rp = 0; + svm_map_region_args_t *a = 0; + void *svm_map_region (svm_map_region_args_t * a); + int svm_fd; + u8 *shm_name; + + fformat (stdout, "our pid: %d\n", getpid ()); + + vec_validate (a, 0); + + a->root_path = chroot_path; + a->name = SVM_GLOBAL_REGION_NAME; + a->baseva = SVM_GLOBAL_REGION_BASEVA; + a->size = SVM_GLOBAL_REGION_SIZE; + a->flags = SVM_FLAGS_NODATA; + + shm_name = shm_name_from_svm_map_region_args (a); + + svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777); + + if (svm_fd < 0) + { + perror ("svm_region_map(mmap open)"); + goto out; + } + + vec_free (shm_name); + + root_rp = mmap (0, MMAP_PAGESIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0); + + if (root_rp == (svm_region_t *) MAP_FAILED) + { + close (svm_fd); + clib_warning ("mmap"); + goto out; + } + + /* Remap now that the region has been placed */ + clib_warning ("remap to 0x%x", root_rp->virtual_base); + + a->baseva = root_rp->virtual_base; + a->size = root_rp->virtual_size; + munmap (root_rp, MMAP_PAGESIZE); + + root_rp = (void *) mmap ((void *) a->baseva, a->size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, svm_fd, 0); + if ((uword) root_rp == (uword) MAP_FAILED) + { + clib_unix_warning ("mmap"); + goto out; + } + + close (svm_fd); + + if ((uword) root_rp != root_rp->virtual_base) + { + clib_warning ("mmap botch"); + goto out; + } + + if (pthread_mutex_trylock (&root_rp->mutex)) + { + clib_warning ("root_rp->mutex LOCKED by pid %d, tag %d, cleared...", + root_rp->mutex_owner_pid, root_rp->mutex_owner_tag); + memset (&root_rp->mutex, 0, sizeof (root_rp->mutex)); + goto out; + } + else + { + clib_warning ("root_rp->mutex OK...\n"); + pthread_mutex_unlock (&root_rp->mutex); + } + +out: + vec_free (a); + /* + * Now that the root region is known to be OK, + * fix broken subregions + */ + subregion_repair (chroot_path); + + if (crash_root_region) + { + clib_warning ("Leaving root region locked on purpose..."); + pthread_mutex_lock (&root_rp->mutex); + root_rp->mutex_owner_pid = getpid (); + root_rp->mutex_owner_tag = 99; + } + svm_region_exit (); +} + +int +main (int argc, char **argv) +{ + unformat_input_t input; + int parsed = 0; + char *name; + char *chroot_path = 0; + u8 *chroot_u8; + + unformat_init_command_line (&input, argv); + + while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&input, "show-verbose")) + { + show (chroot_path, 1); + parsed++; + } + else if (unformat (&input, "show")) + { + show (chroot_path, 0); + parsed++; + } + else if (unformat (&input, "client-scan")) + { + svm_client_scan (chroot_path); + parsed++; + } + else if (unformat (&input, "repair")) + { + repair (chroot_path, 0 /* fix it */ ); + parsed++; + } + else if (unformat (&input, "crash")) + { + repair (chroot_path, 1 /* crash it */ ); + parsed++; + } + else if (unformat (&input, "trace-on %s", &name)) + { + trace (chroot_path, name, 1); + parsed++; + } + else if (unformat (&input, "trace-off %s", &name)) + { + trace (chroot_path, name, 0); + parsed++; + } + else if (unformat (&input, "chroot %s", &chroot_u8)) + { + chroot_path = (char *) chroot_u8; + } + else + { + break; + } + } + + unformat_free (&input); + + if (!parsed) + { + fformat (stdout, + "%s: show | show-verbose | client-scan | trace-on <region-name>\n", + argv[0]); + fformat (stdout, " trace-off <region-name>\n"); + } + exit (0); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |