diff options
Diffstat (limited to 'src/svm')
-rw-r--r-- | src/svm/dir.dox | 21 | ||||
-rw-r--r-- | src/svm/persist.c | 258 | ||||
-rw-r--r-- | src/svm/ssvm.c | 212 | ||||
-rw-r--r-- | src/svm/ssvm.h | 176 | ||||
-rw-r--r-- | src/svm/svm.c | 1268 | ||||
-rw-r--r-- | src/svm/svm.h | 107 | ||||
-rw-r--r-- | src/svm/svm_common.h | 135 | ||||
-rw-r--r-- | src/svm/svm_fifo.c | 838 | ||||
-rw-r--r-- | src/svm/svm_fifo.h | 228 | ||||
-rw-r--r-- | src/svm/svm_fifo_segment.c | 643 | ||||
-rw-r--r-- | src/svm/svm_fifo_segment.h | 136 | ||||
-rw-r--r-- | src/svm/svm_test.c | 79 | ||||
-rw-r--r-- | src/svm/svmdb.c | 676 | ||||
-rw-r--r-- | src/svm/svmdb.h | 135 | ||||
-rw-r--r-- | src/svm/svmdbtool.c | 537 | ||||
-rw-r--r-- | src/svm/svmtool.c | 528 | ||||
-rw-r--r-- | src/svm/test_svm_fifo1.c | 357 |
17 files changed, 6334 insertions, 0 deletions
diff --git a/src/svm/dir.dox b/src/svm/dir.dox new file mode 100644 index 00000000..83246979 --- /dev/null +++ b/src/svm/dir.dox @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2016 Comcast Cable Communications Management, LLC. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Doxygen directory documentation */ +/** +@dir +@brief Shared virtual memory allocation library. +*/ diff --git a/src/svm/persist.c b/src/svm/persist.c new file mode 100644 index 00000000..023c596b --- /dev/null +++ b/src/svm/persist.c @@ -0,0 +1,258 @@ +/* + *------------------------------------------------------------------ + * persist.c - persistent data structure storage test / demo code + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> +#include <vppinfra/serialize.h> +#include <svmdb.h> + +typedef struct +{ + svmdb_client_t *c; +} persist_main_t; + +persist_main_t persist_main; + +typedef struct +{ + u8 *string1; + u8 *string2; +} demo_struct2_t; + +typedef struct +{ + demo_struct2_t *demo2; + u8 *name; +} demo_struct1_t; + +/* + * Data structures in persistent shared memory, all the time + */ +clib_error_t * +persist_malloc (persist_main_t * pm) +{ + demo_struct2_t *demo2; + demo_struct1_t *demo1; + time_t starttime = time (0); + char *datestring = ctime (&starttime); + void *oldheap; + + /* Get back the root pointer */ + demo1 = svmdb_local_get_variable_reference + (pm->c, SVMDB_NAMESPACE_VEC, "demo1_location"); + + /* It doesnt exist create our data structures */ + if (demo1 == 0) + { + /* If you want MP / thread safety, lock the region... */ + pthread_mutex_lock (&pm->c->db_rp->mutex); + + /* Switch to the shared memory region heap */ + oldheap = svm_push_data_heap (pm->c->db_rp); + + /* Allocate the top-level structure as a single element vector */ + vec_validate (demo1, 0); + + /* Allocate the next-level structure as a plain old memory obj */ + demo2 = clib_mem_alloc (sizeof (*demo2)); + + demo1->demo2 = demo2; + demo1->name = format (0, "My name is Ishmael%c", 0); + demo2->string1 = format (0, "Here is string1%c", 0); + demo2->string2 = format (0, "Born at %s%c", datestring, 0); + + /* Back to the process-private heap */ + svm_pop_heap (oldheap); + pthread_mutex_unlock (&pm->c->db_rp->mutex); + + /* + * Set the root pointer. Note: this guy switches heaps, locks, etc. + * We allocated demo1 as a vector to make this "just work..." + */ + svmdb_local_set_vec_variable (pm->c, "demo1_location", + demo1, sizeof (demo1)); + + } + else + { + /* retrieve and print data from shared memory */ + demo2 = demo1->demo2; + fformat (stdout, "name: %s\n", demo1->name); + fformat (stdout, "demo2 location: %llx\n", demo2); + fformat (stdout, "string1: %s\n", demo2->string1); + fformat (stdout, "string2: %s\n", demo2->string2); + } + return 0; +} + +void +unserialize_demo1 (serialize_main_t * sm, va_list * args) +{ + demo_struct1_t **result = va_arg (*args, demo_struct1_t **); + demo_struct1_t *demo1; + demo_struct2_t *demo2; + + /* Allocate data structures in process private memory */ + demo1 = clib_mem_alloc (sizeof (*demo1)); + demo2 = clib_mem_alloc (sizeof (*demo2)); + demo1->demo2 = demo2; + + /* retrieve data from shared memory checkpoint */ + unserialize_cstring (sm, (char **) &demo1->name); + unserialize_cstring (sm, (char **) &demo2->string1); + unserialize_cstring (sm, (char **) &demo2->string2); + *result = demo1; +} + +void +serialize_demo1 (serialize_main_t * sm, va_list * args) +{ + demo_struct1_t *demo1 = va_arg (*args, demo_struct1_t *); + demo_struct2_t *demo2 = demo1->demo2; + + serialize_cstring (sm, (char *) demo1->name); + serialize_cstring (sm, (char *) demo2->string1); + serialize_cstring (sm, (char *) demo2->string2); +} + +/* Serialize / unserialize variant */ +clib_error_t * +persist_serialize (persist_main_t * pm) +{ + u8 *checkpoint; + serialize_main_t sm; + + demo_struct2_t *demo2; + demo_struct1_t *demo1; + time_t starttime = time (0); + char *datestring = ctime (&starttime); + + /* Get back the root pointer */ + checkpoint = svmdb_local_get_vec_variable (pm->c, "demo1_checkpoint", + sizeof (u8)); + + /* It doesnt exist create our data structures */ + if (checkpoint == 0) + { + /* Allocate data structures in process-private memory */ + demo1 = clib_mem_alloc (sizeof (*demo2)); + vec_validate (demo1, 0); + demo2 = clib_mem_alloc (sizeof (*demo2)); + + demo1->demo2 = demo2; + demo1->name = format (0, "My name is Ishmael%c", 0); + demo2->string1 = format (0, "Here is string1%c", 0); + demo2->string2 = format (0, "Born at %s%c", datestring, 0); + + /* Create checkpoint */ + serialize_open_vector (&sm, checkpoint); + serialize (&sm, serialize_demo1, demo1); + checkpoint = serialize_close_vector (&sm); + + /* Copy checkpoint into shared memory */ + svmdb_local_set_vec_variable (pm->c, "demo1_checkpoint", + checkpoint, sizeof (u8)); + /* Toss the process-private-memory original.. */ + vec_free (checkpoint); + } + else + { + /* Open the checkpoint */ + unserialize_open_data (&sm, checkpoint, vec_len (checkpoint)); + unserialize (&sm, unserialize_demo1, &demo1); + + /* Toss the process-private-memory checkpoint copy */ + vec_free (checkpoint); + + /* Off we go... */ + demo2 = demo1->demo2; + fformat (stdout, "name: %s\n", demo1->name); + fformat (stdout, "demo2 location: %llx\n", demo2); + fformat (stdout, "string1: %s\n", demo2->string1); + fformat (stdout, "string2: %s\n", demo2->string2); + } + return 0; +} + + +int +main (int argc, char **argv) +{ + unformat_input_t _input, *input = &_input; + persist_main_t *pm = &persist_main; + clib_error_t *error = 0; + + /* Make a 4mb database arena, chroot so it's truly private */ + pm->c = svmdb_map_chroot_size ("/ptest", 4 << 20); + + ASSERT (pm->c); + + unformat_init_command_line (input, argv); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "malloc")) + error = persist_malloc (pm); + else if (unformat (input, "serialize")) + error = persist_serialize (pm); + else + { + error = clib_error_return (0, "Unknown flavor '%U'", + format_unformat_error, input); + break; + } + } + + svmdb_unmap (pm->c); + + if (error) + { + clib_error_report (error); + exit (1); + } + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/ssvm.c b/src/svm/ssvm.c new file mode 100644 index 00000000..c04982de --- /dev/null +++ b/src/svm/ssvm.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ssvm.h" +#include "svm_common.h" + +int +ssvm_master_init (ssvm_private_t * ssvm, u32 master_index) +{ + svm_main_region_t *smr = svm_get_root_rp ()->data_base; + int ssvm_fd; + u8 *ssvm_filename; + u8 junk = 0; + int flags; + ssvm_shared_header_t *sh; + u64 ticks = clib_cpu_time_now (); + u64 randomize_baseva; + void *oldheap; + + if (ssvm->ssvm_size == 0) + return SSVM_API_ERROR_NO_SIZE; + + if (CLIB_DEBUG > 1) + clib_warning ("[%d] creating segment '%s'", getpid (), ssvm->name); + + ASSERT (vec_c_string_is_terminated (ssvm->name)); + ssvm_filename = format (0, "/dev/shm/%s%c", ssvm->name, 0); + + unlink ((char *) ssvm_filename); + + vec_free (ssvm_filename); + + ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR | O_CREAT | O_EXCL, 0777); + + if (ssvm_fd < 0) + { + clib_unix_warning ("create segment '%s'", ssvm->name); + return SSVM_API_ERROR_CREATE_FAILURE; + } + + if (fchmod (ssvm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0) + clib_unix_warning ("ssvm segment chmod"); + if (fchown (ssvm_fd, smr->uid, smr->gid) < 0) + clib_unix_warning ("ssvm segment chown"); + + if (lseek (ssvm_fd, ssvm->ssvm_size, SEEK_SET) < 0) + { + clib_unix_warning ("lseek"); + close (ssvm_fd); + return SSVM_API_ERROR_SET_SIZE; + } + + if (write (ssvm_fd, &junk, 1) != 1) + { + clib_unix_warning ("set ssvm size"); + close (ssvm_fd); + return SSVM_API_ERROR_SET_SIZE; + } + + flags = MAP_SHARED; + if (ssvm->requested_va) + flags |= MAP_FIXED; + + randomize_baseva = (ticks & 15) * MMAP_PAGESIZE; + + if (ssvm->requested_va) + ssvm->requested_va += randomize_baseva; + + sh = ssvm->sh = + (ssvm_shared_header_t *) mmap ((void *) ssvm->requested_va, + ssvm->ssvm_size, PROT_READ | PROT_WRITE, + flags, ssvm_fd, 0); + + if (ssvm->sh == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (ssvm_fd); + return SSVM_API_ERROR_MMAP; + } + + close (ssvm_fd); + + ssvm->my_pid = getpid (); + sh->master_pid = ssvm->my_pid; + sh->ssvm_size = ssvm->ssvm_size; + sh->heap = mheap_alloc_with_flags + (((u8 *) sh) + MMAP_PAGESIZE, ssvm->ssvm_size - MMAP_PAGESIZE, + MHEAP_FLAG_DISABLE_VM | MHEAP_FLAG_THREAD_SAFE); + + sh->ssvm_va = pointer_to_uword (sh); + sh->master_index = master_index; + + oldheap = ssvm_push_heap (sh); + sh->name = format (0, "%s%c", ssvm->name, 0); + ssvm_pop_heap (oldheap); + + ssvm->i_am_master = 1; + + /* The application has to set set sh->ready... */ + return 0; +} + +int +ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds) +{ + struct stat stat; + int ssvm_fd = -1; + ssvm_shared_header_t *sh; + + ASSERT (vec_c_string_is_terminated (ssvm->name)); + ssvm->i_am_master = 0; + + while (timeout_in_seconds-- > 0) + { + if (ssvm_fd < 0) + ssvm_fd = shm_open ((char *) ssvm->name, O_RDWR, 0777); + if (ssvm_fd < 0) + { + sleep (1); + continue; + } + if (fstat (ssvm_fd, &stat) < 0) + { + sleep (1); + continue; + } + + if (stat.st_size > 0) + goto map_it; + } + clib_warning ("slave timeout"); + return SSVM_API_ERROR_SLAVE_TIMEOUT; + +map_it: + sh = (void *) mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + ssvm_fd, 0); + if (sh == MAP_FAILED) + { + clib_unix_warning ("slave research mmap"); + close (ssvm_fd); + return SSVM_API_ERROR_MMAP; + } + + while (timeout_in_seconds-- > 0) + { + if (sh->ready) + goto re_map_it; + } + close (ssvm_fd); + munmap (sh, MMAP_PAGESIZE); + clib_warning ("slave timeout 2"); + return SSVM_API_ERROR_SLAVE_TIMEOUT; + +re_map_it: + ssvm->requested_va = (u64) sh->ssvm_va; + ssvm->ssvm_size = sh->ssvm_size; + munmap (sh, MMAP_PAGESIZE); + + sh = ssvm->sh = (void *) mmap ((void *) ssvm->requested_va, ssvm->ssvm_size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, ssvm_fd, 0); + + if (sh == MAP_FAILED) + { + clib_unix_warning ("slave final mmap"); + close (ssvm_fd); + return SSVM_API_ERROR_MMAP; + } + sh->slave_pid = getpid (); + return 0; +} + +void +ssvm_delete (ssvm_private_t * ssvm) +{ + u8 *fn; + + fn = format (0, "/dev/shm/%s%c", ssvm->name, 0); + + if (CLIB_DEBUG > 1) + clib_warning ("[%d] unlinking ssvm (%s) backing file '%s'", getpid (), + ssvm->name, fn); + + /* Throw away the backing file */ + if (unlink ((char *) fn) < 0) + clib_unix_warning ("unlink segment '%s'", ssvm->name); + + vec_free (fn); + vec_free (ssvm->name); + + munmap ((void *) ssvm->requested_va, ssvm->ssvm_size); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/ssvm.h b/src/svm/ssvm.h new file mode 100644 index 00000000..8466e155 --- /dev/null +++ b/src/svm/ssvm.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_h__ +#define __included_ssvm_h__ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> + +#ifndef MMAP_PAGESIZE +#define MMAP_PAGESIZE (clib_mem_get_page_size()) +#endif + +#define SSVM_N_OPAQUE 7 + +typedef struct +{ + /* Spin-lock */ + volatile u32 lock; + volatile u32 owner_pid; + int recursion_count; + u32 tag; /* for debugging */ + + /* The allocation arena */ + void *heap; + + /* Segment must be mapped at this address, or no supper */ + u64 ssvm_va; + /* The actual mmap size */ + u64 ssvm_size; + u32 master_pid; + u32 slave_pid; + u8 *name; + void *opaque[SSVM_N_OPAQUE]; + + /* Set when the master application thinks it's time to make the donuts */ + volatile u32 ready; + + /* Needed to make unique MAC addresses, etc. */ + u32 master_index; +} ssvm_shared_header_t; + +typedef struct +{ + ssvm_shared_header_t *sh; + u64 ssvm_size; + u32 my_pid; + u32 vlib_hw_if_index; + u8 *name; + uword requested_va; + int i_am_master; + u32 per_interface_next_index; + u32 *rx_queue; +} ssvm_private_t; + +always_inline void +ssvm_lock (ssvm_shared_header_t * h, u32 my_pid, u32 tag) +{ + if (h->owner_pid == my_pid) + { + h->recursion_count++; + return; + } + + while (__sync_lock_test_and_set (&h->lock, 1)) + ; + + h->owner_pid = my_pid; + h->recursion_count = 1; + h->tag = tag; +} + +always_inline void +ssvm_lock_non_recursive (ssvm_shared_header_t * h, u32 tag) +{ + while (__sync_lock_test_and_set (&h->lock, 1)) + ; + + h->tag = tag; +} + +always_inline void +ssvm_unlock (ssvm_shared_header_t * h) +{ + if (--h->recursion_count == 0) + { + h->owner_pid = 0; + h->tag = 0; + CLIB_MEMORY_BARRIER (); + h->lock = 0; + } +} + +always_inline void +ssvm_unlock_non_recursive (ssvm_shared_header_t * h) +{ + h->tag = 0; + CLIB_MEMORY_BARRIER (); + h->lock = 0; +} + +static inline void * +ssvm_push_heap (ssvm_shared_header_t * sh) +{ + u8 *oldheap; + oldheap = clib_mem_set_heap (sh->heap); + return ((void *) oldheap); +} + +static inline void +ssvm_pop_heap (void *oldheap) +{ + clib_mem_set_heap (oldheap); +} + +#define foreach_ssvm_api_error \ +_(NO_NAME, "No shared segment name", -100) \ +_(NO_SIZE, "Size not set (master)", -101) \ +_(CREATE_FAILURE, "Create failed", -102) \ +_(SET_SIZE, "Set size failed", -103) \ +_(MMAP, "mmap failed", -104) \ +_(SLAVE_TIMEOUT, "Slave map timeout", -105) + +typedef enum +{ +#define _(n,s,c) SSVM_API_ERROR_##n = c, + foreach_ssvm_api_error +#undef _ +} ssvm_api_error_enum_t; + +#define SSVM_API_ERROR_NO_NAME (-10) + +int ssvm_master_init (ssvm_private_t * ssvm, u32 master_index); +int ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds); +void ssvm_delete (ssvm_private_t * ssvm); + +#endif /* __included_ssvm_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm.c b/src/svm/svm.c new file mode 100644 index 00000000..f97803cd --- /dev/null +++ b/src/svm/svm.c @@ -0,0 +1,1268 @@ +/* + *------------------------------------------------------------------ + * svm.c - shared VM allocation, mmap(...MAP_FIXED...) + * library + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> + +#include "svm.h" + +static svm_region_t *root_rp; +static int root_rp_refcount; + +#define MAXLOCK 2 +static pthread_mutex_t *mutexes_held[MAXLOCK]; +static int nheld; + +svm_region_t * +svm_get_root_rp (void) +{ + return root_rp; +} + +#define MUTEX_DEBUG + +static void +region_lock (svm_region_t * rp, int tag) +{ + pthread_mutex_lock (&rp->mutex); +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = getpid (); + rp->mutex_owner_tag = tag; +#endif + ASSERT (nheld < MAXLOCK); + /* + * Keep score of held mutexes so we can try to exit + * cleanly if the world comes to an end at the worst possible + * moment + */ + mutexes_held[nheld++] = &rp->mutex; +} + +static void +region_unlock (svm_region_t * rp) +{ + int i, j; +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = 0; + rp->mutex_owner_tag = 0; +#endif + + for (i = nheld - 1; i >= 0; i--) + { + if (mutexes_held[i] == &rp->mutex) + { + for (j = i; j < MAXLOCK - 1; j++) + mutexes_held[j] = mutexes_held[j + 1]; + nheld--; + goto found; + } + } + ASSERT (0); + +found: + CLIB_MEMORY_BARRIER (); + pthread_mutex_unlock (&rp->mutex); +} + + +static u8 * +format_svm_flags (u8 * s, va_list * args) +{ + uword f = va_arg (*args, uword); + + if (f & SVM_FLAGS_MHEAP) + s = format (s, "MHEAP "); + if (f & SVM_FLAGS_FILE) + s = format (s, "FILE "); + if (f & SVM_FLAGS_NODATA) + s = format (s, "NODATA "); + if (f & SVM_FLAGS_NEED_DATA_INIT) + s = format (s, "INIT "); + + return (s); +} + +static u8 * +format_svm_size (u8 * s, va_list * args) +{ + uword size = va_arg (*args, uword); + + if (size >= (1 << 20)) + { + s = format (s, "(%d mb)", size >> 20); + } + else if (size >= (1 << 10)) + { + s = format (s, "(%d kb)", size >> 10); + } + else + { + s = format (s, "(%d bytes)", size); + } + return (s); +} + +u8 * +format_svm_region (u8 * s, va_list * args) +{ + svm_region_t *rp = va_arg (*args, svm_region_t *); + int verbose = va_arg (*args, int); + int i; + uword lo, hi; + + s = format (s, "%s: base va 0x%x size 0x%x %U\n", + rp->region_name, rp->virtual_base, + rp->virtual_size, format_svm_size, rp->virtual_size); + s = format (s, " user_ctx 0x%x, bitmap_size %d\n", + rp->user_ctx, rp->bitmap_size); + + if (verbose) + { + s = format (s, " flags: 0x%x %U\n", rp->flags, + format_svm_flags, rp->flags); + s = format (s, + " region_heap 0x%x data_base 0x%x data_heap 0x%x\n", + rp->region_heap, rp->data_base, rp->data_heap); + } + + s = format (s, " %d clients, pids: ", vec_len (rp->client_pids)); + + for (i = 0; i < vec_len (rp->client_pids); i++) + s = format (s, "%d ", rp->client_pids[i]); + + s = format (s, "\n"); + + if (verbose) + { + lo = hi = ~0; + + s = format (s, " VM in use: "); + + for (i = 0; i < rp->bitmap_size; i++) + { + if (clib_bitmap_get_no_check (rp->bitmap, i) != 0) + { + if (lo == ~0) + { + hi = lo = rp->virtual_base + i * MMAP_PAGESIZE; + } + else + { + hi = rp->virtual_base + i * MMAP_PAGESIZE; + } + } + else + { + if (lo != ~0) + { + hi = rp->virtual_base + i * MMAP_PAGESIZE - 1; + s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi, + (hi - lo) >> 10); + lo = hi = ~0; + } + } + } + s = format (s, " rgn heap stats: %U", format_mheap, + rp->region_heap, 0); + if ((rp->flags & SVM_FLAGS_MHEAP) && rp->data_heap) + { + s = format (s, "\n data heap stats: %U", format_mheap, + rp->data_heap, 1); + } + s = format (s, "\n"); + } + + return (s); +} + +/* + * rnd_pagesize + * Round to a pagesize multiple, presumably 4k works + */ +static u64 +rnd_pagesize (u64 size) +{ + u64 rv; + + rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1); + return (rv); +} + +/* + * svm_data_region_setup + */ +static int +svm_data_region_create (svm_map_region_args_t * a, svm_region_t * rp) +{ + int fd; + u8 junk = 0; + uword map_size; + + map_size = rp->virtual_size - (MMAP_PAGESIZE + + (a->pvt_heap_size ? a->pvt_heap_size : + SVM_PVT_MHEAP_SIZE)); + + if (a->flags & SVM_FLAGS_FILE) + { + struct stat statb; + + fd = open (a->backing_file, O_RDWR | O_CREAT, 0777); + + if (fd < 0) + { + clib_unix_warning ("open"); + return -1; + } + + if (fstat (fd, &statb) < 0) + { + clib_unix_warning ("fstat"); + close (fd); + return -2; + } + + if (statb.st_mode & S_IFREG) + { + if (statb.st_size == 0) + { + if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1) + { + clib_unix_warning ("seek region size"); + close (fd); + return -3; + } + if (write (fd, &junk, 1) != 1) + { + clib_unix_warning ("set region size"); + close (fd); + return -3; + } + } + else + { + map_size = rnd_pagesize (statb.st_size); + } + } + else + { + map_size = a->backing_mmap_size; + } + + ASSERT (map_size <= rp->virtual_size - + (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE)); + + if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (fd); + return -3; + } + close (fd); + rp->backing_file = (char *) format (0, "%s\0", a->backing_file); + rp->flags |= SVM_FLAGS_FILE; + } + + if (a->flags & SVM_FLAGS_MHEAP) + { + rp->data_heap = + mheap_alloc_with_flags ((void *) (rp->data_base), map_size, + MHEAP_FLAG_DISABLE_VM); + rp->flags |= SVM_FLAGS_MHEAP; + } + return 0; +} + +static int +svm_data_region_map (svm_map_region_args_t * a, svm_region_t * rp) +{ + int fd; + u8 junk = 0; + uword map_size; + struct stat statb; + + map_size = rp->virtual_size - + (MMAP_PAGESIZE + + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE)); + + if (a->flags & SVM_FLAGS_FILE) + { + + fd = open (a->backing_file, O_RDWR, 0777); + + if (fd < 0) + { + clib_unix_warning ("open"); + return -1; + } + + if (fstat (fd, &statb) < 0) + { + clib_unix_warning ("fstat"); + close (fd); + return -2; + } + + if (statb.st_mode & S_IFREG) + { + if (statb.st_size == 0) + { + if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1) + { + clib_unix_warning ("seek region size"); + close (fd); + return -3; + } + if (write (fd, &junk, 1) != 1) + { + clib_unix_warning ("set region size"); + close (fd); + return -3; + } + } + else + { + map_size = rnd_pagesize (statb.st_size); + } + } + else + { + map_size = a->backing_mmap_size; + } + + ASSERT (map_size <= rp->virtual_size + - (MMAP_PAGESIZE + + + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE))); + + if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (fd); + return -3; + } + close (fd); + } + return 0; +} + +u8 * +shm_name_from_svm_map_region_args (svm_map_region_args_t * a) +{ + u8 *path; + u8 *shm_name; + u8 *split_point; + u8 *mkdir_arg = 0; + int root_path_offset = 0; + int name_offset = 0; + + if (a->root_path) + { + /* Tolerate present or absent slashes */ + if (a->root_path[0] == '/') + root_path_offset++; + + /* create the root_path under /dev/shm + iterate through path creating directories */ + + path = format (0, "/dev/shm/%s%c", &a->root_path[root_path_offset], 0); + split_point = path + 1; + vec_add1 (mkdir_arg, '-'); + + while (*split_point) + { + while (*split_point && *split_point != '/') + { + vec_add1 (mkdir_arg, *split_point); + split_point++; + } + vec_add1 (mkdir_arg, 0); + + /* ready to descend another level */ + mkdir_arg[vec_len (mkdir_arg) - 1] = '-'; + split_point++; + } + vec_free (mkdir_arg); + vec_free (path); + + if (a->name[0] == '/') + name_offset = 1; + + shm_name = format (0, "/%s-%s%c", &a->root_path[root_path_offset], + &a->name[name_offset], 0); + } + else + shm_name = format (0, "%s%c", a->name, 0); + return (shm_name); +} + +/* + * svm_map_region + */ +void * +svm_map_region (svm_map_region_args_t * a) +{ + int svm_fd; + svm_region_t *rp; + pthread_mutexattr_t attr; + pthread_condattr_t cattr; + int deadman = 0; + u8 junk = 0; + void *oldheap; + int overhead_space; + int rv; + uword data_base; + int nbits, words, bit; + int pid_holding_region_lock; + u8 *shm_name; + int dead_region_recovery = 0; + int time_left; + struct stat stat; + struct timespec ts, tsrem; + + ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size); + ASSERT (a->name); + + shm_name = shm_name_from_svm_map_region_args (a); + + if (CLIB_DEBUG > 1) + clib_warning ("[%d] map region %s: shm_open (%s)", + getpid (), a->name, shm_name); + + svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777); + + if (svm_fd >= 0) + { + if (fchmod (svm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0) + clib_unix_warning ("segment chmod"); + /* This turns out to fail harmlessly if the client starts first */ + if (fchown (svm_fd, a->uid, a->gid) < 0) + clib_unix_warning ("segment chown [ok if client starts first]"); + + vec_free (shm_name); + + if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1) + { + clib_warning ("seek region size"); + close (svm_fd); + return (0); + } + if (write (svm_fd, &junk, 1) != 1) + { + clib_warning ("set region size"); + close (svm_fd); + return (0); + } + + rp = mmap (uword_to_pointer (a->baseva, void *), a->size, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0); + + if (rp == (svm_region_t *) MAP_FAILED) + { + clib_unix_warning ("mmap create"); + close (svm_fd); + return (0); + } + close (svm_fd); + memset (rp, 0, sizeof (*rp)); + + if (pthread_mutexattr_init (&attr)) + clib_unix_warning ("mutexattr_init"); + + if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("mutexattr_setpshared"); + + if (pthread_mutex_init (&rp->mutex, &attr)) + clib_unix_warning ("mutex_init"); + + if (pthread_mutexattr_destroy (&attr)) + clib_unix_warning ("mutexattr_destroy"); + + if (pthread_condattr_init (&cattr)) + clib_unix_warning ("condattr_init"); + + if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED)) + clib_unix_warning ("condattr_setpshared"); + + if (pthread_cond_init (&rp->condvar, &cattr)) + clib_unix_warning ("cond_init"); + + if (pthread_condattr_destroy (&cattr)) + clib_unix_warning ("condattr_destroy"); + + region_lock (rp, 1); + + rp->virtual_base = a->baseva; + rp->virtual_size = a->size; + + rp->region_heap = + mheap_alloc_with_flags (uword_to_pointer + (a->baseva + MMAP_PAGESIZE, void *), + (a->pvt_heap_size != + 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, + MHEAP_FLAG_DISABLE_VM); + oldheap = svm_push_pvt_heap (rp); + + rp->region_name = (char *) format (0, "%s%c", a->name, 0); + vec_add1 (rp->client_pids, getpid ()); + + nbits = rp->virtual_size / MMAP_PAGESIZE; + + ASSERT (nbits > 0); + rp->bitmap_size = nbits; + words = (nbits + BITS (uword) - 1) / BITS (uword); + vec_validate (rp->bitmap, words - 1); + + overhead_space = MMAP_PAGESIZE /* header */ + + ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); + + bit = 0; + data_base = (uword) rp->virtual_base; + + if (a->flags & SVM_FLAGS_NODATA) + rp->flags |= SVM_FLAGS_NEED_DATA_INIT; + + do + { + clib_bitmap_set_no_check (rp->bitmap, bit, 1); + bit++; + overhead_space -= MMAP_PAGESIZE; + data_base += MMAP_PAGESIZE; + } + while (overhead_space > 0); + + rp->data_base = (void *) data_base; + + /* + * Note: although the POSIX spec guarantees that only one + * process enters this block, we have to play games + * to hold off clients until e.g. the mutex is ready + */ + rp->version = SVM_VERSION; + + /* setup the data portion of the region */ + + rv = svm_data_region_create (a, rp); + if (rv) + { + clib_warning ("data_region_create: %d", rv); + } + + region_unlock (rp); + + svm_pop_heap (oldheap); + + return ((void *) rp); + } + else + { + svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777); + + vec_free (shm_name); + + if (svm_fd < 0) + { + perror ("svm_region_map(mmap open)"); + return (0); + } + + time_left = 20; + while (1) + { + if (0 != fstat (svm_fd, &stat)) + { + clib_warning ("fstat failed: %d", errno); + close (svm_fd); + return (0); + } + if (stat.st_size > 0) + { + break; + } + if (0 == time_left) + { + clib_warning ("waiting for resize of shm file timed out"); + close (svm_fd); + return (0); + } + ts.tv_sec = 0; + ts.tv_nsec = 100000000; + while (nanosleep (&ts, &tsrem) < 0) + ts = tsrem; + time_left--; + } + + rp = mmap (0, MMAP_PAGESIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0); + + if (rp == (svm_region_t *) MAP_FAILED) + { + close (svm_fd); + clib_warning ("mmap"); + return (0); + } + /* + * We lost the footrace to create this region; make sure + * the winner has crossed the finish line. + */ + while (rp->version == 0 && deadman++ < 5) + { + sleep (1); + } + + /* + * <bleep>-ed? + */ + if (rp->version == 0) + { + clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION); + close (svm_fd); + munmap (rp, a->size); + return (0); + } + /* Remap now that the region has been placed */ + a->baseva = rp->virtual_base; + a->size = rp->virtual_size; + munmap (rp, MMAP_PAGESIZE); + + rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, svm_fd, 0); + if ((uword) rp == (uword) MAP_FAILED) + { + clib_unix_warning ("mmap"); + close (svm_fd); + return (0); + } + + if ((uword) rp != rp->virtual_base) + { + clib_warning ("mmap botch"); + } + + /* + * Try to fix the region mutex if it is held by + * a dead process + */ + pid_holding_region_lock = rp->mutex_owner_pid; + if (pid_holding_region_lock && kill (pid_holding_region_lock, 0) < 0) + { + clib_warning + ("region %s mutex held by dead pid %d, tag %d, force unlock", + rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag); + /* owner pid is nonexistent */ + rp->mutex.__data.__owner = 0; + rp->mutex.__data.__lock = 0; + dead_region_recovery = 1; + } + + if (dead_region_recovery) + clib_warning ("recovery: attempt to re-lock region"); + + region_lock (rp, 2); + oldheap = svm_push_pvt_heap (rp); + vec_add1 (rp->client_pids, getpid ()); + + if (dead_region_recovery) + clib_warning ("recovery: attempt svm_data_region_map"); + + rv = svm_data_region_map (a, rp); + if (rv) + { + clib_warning ("data_region_map: %d", rv); + } + + if (dead_region_recovery) + clib_warning ("unlock and continue"); + + region_unlock (rp); + + svm_pop_heap (oldheap); + + return ((void *) rp); + + } + return 0; /* NOTREACHED */ +} + +static void +svm_mutex_cleanup (void) +{ + int i; + for (i = 0; i < nheld; i++) + { + pthread_mutex_unlock (mutexes_held[i]); + } +} + +static int +svm_region_init_internal (svm_map_region_args_t * a) +{ + svm_region_t *rp; + u64 ticks = clib_cpu_time_now (); + uword randomize_baseva; + + /* guard against klutz calls */ + if (root_rp) + return -1; + + root_rp_refcount++; + + atexit (svm_mutex_cleanup); + + /* Randomize the shared-VM base at init time */ + if (MMAP_PAGESIZE <= (4 << 10)) + randomize_baseva = (ticks & 15) * MMAP_PAGESIZE; + else + randomize_baseva = (ticks & 3) * MMAP_PAGESIZE; + + a->baseva += randomize_baseva; + + rp = svm_map_region (a); + if (!rp) + return -1; + + region_lock (rp, 3); + + /* Set up the main region data structures */ + if (rp->flags & SVM_FLAGS_NEED_DATA_INIT) + { + svm_main_region_t *mp = 0; + void *oldheap; + + rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT); + + oldheap = svm_push_pvt_heap (rp); + vec_validate (mp, 0); + mp->name_hash = hash_create_string (0, sizeof (uword)); + mp->root_path = a->root_path ? format (0, "%s%c", a->root_path, 0) : 0; + mp->uid = a->uid; + mp->gid = a->gid; + rp->data_base = mp; + svm_pop_heap (oldheap); + } + region_unlock (rp); + root_rp = rp; + + return 0; +} + +void +svm_region_init (void) +{ + svm_map_region_args_t _a, *a = &_a; + + memset (a, 0, sizeof (*a)); + a->root_path = 0; + a->name = SVM_GLOBAL_REGION_NAME; + a->baseva = SVM_GLOBAL_REGION_BASEVA; + a->size = SVM_GLOBAL_REGION_SIZE; + a->flags = SVM_FLAGS_NODATA; + a->uid = 0; + a->gid = 0; + + svm_region_init_internal (a); +} + +int +svm_region_init_chroot (const char *root_path) +{ + svm_map_region_args_t _a, *a = &_a; + + memset (a, 0, sizeof (*a)); + a->root_path = root_path; + a->name = SVM_GLOBAL_REGION_NAME; + a->baseva = SVM_GLOBAL_REGION_BASEVA; + a->size = SVM_GLOBAL_REGION_SIZE; + a->flags = SVM_FLAGS_NODATA; + a->uid = 0; + a->gid = 0; + + return svm_region_init_internal (a); +} + +void +svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid) +{ + svm_map_region_args_t _a, *a = &_a; + + memset (a, 0, sizeof (*a)); + a->root_path = root_path; + a->name = SVM_GLOBAL_REGION_NAME; + a->baseva = SVM_GLOBAL_REGION_BASEVA; + a->size = SVM_GLOBAL_REGION_SIZE; + a->flags = SVM_FLAGS_NODATA; + a->uid = uid; + a->gid = gid; + + svm_region_init_internal (a); +} + +void +svm_region_init_args (svm_map_region_args_t * a) +{ + svm_region_init_internal (a); +} + +void * +svm_region_find_or_create (svm_map_region_args_t * a) +{ + svm_main_region_t *mp; + svm_region_t *rp; + uword need_nbits; + int index, i; + void *oldheap; + uword *p; + u8 *name; + svm_subregion_t *subp; + + ASSERT (root_rp); + + a->size += MMAP_PAGESIZE + + ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); + a->size = rnd_pagesize (a->size); + + region_lock (root_rp, 4); + oldheap = svm_push_pvt_heap (root_rp); + mp = root_rp->data_base; + + ASSERT (mp); + + /* Map the named region from the correct chroot environment */ + if (a->root_path == NULL) + a->root_path = (char *) mp->root_path; + + /* + * See if this region is already known. If it is, we're + * almost done... + */ + p = hash_get_mem (mp->name_hash, a->name); + + if (p) + { + rp = svm_map_region (a); + region_unlock (root_rp); + svm_pop_heap (oldheap); + return rp; + } + + /* Create the region. */ + ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size); + + need_nbits = a->size / MMAP_PAGESIZE; + + index = 1; /* $$$ fixme, figure out how many bit to really skip */ + + /* + * Scan the virtual space allocation bitmap, looking for a large + * enough chunk + */ + do + { + if (clib_bitmap_get_no_check (root_rp->bitmap, index) == 0) + { + for (i = 0; i < (need_nbits - 1); i++) + { + if (clib_bitmap_get_no_check (root_rp->bitmap, index + i) == 1) + { + index = index + i; + goto next; + } + } + break; + } + index++; + next:; + } + while (index < root_rp->bitmap_size); + + /* Completely out of VM? */ + if (index >= root_rp->bitmap_size) + { + clib_warning ("region %s: not enough VM to allocate 0x%llx (%lld)", + root_rp->region_name, a->size, a->size); + svm_pop_heap (oldheap); + region_unlock (root_rp); + return 0; + } + + /* + * Mark virtual space allocated + */ +#if CLIB_DEBUG > 1 + clib_warning ("set %d bits at index %d", need_nbits, index); +#endif + + for (i = 0; i < need_nbits; i++) + { + clib_bitmap_set_no_check (root_rp->bitmap, index + i, 1); + } + + /* Place this region where it goes... */ + a->baseva = root_rp->virtual_base + index * MMAP_PAGESIZE; + + rp = svm_map_region (a); + + pool_get (mp->subregions, subp); + name = format (0, "%s%c", a->name, 0); + subp->subregion_name = name; + + hash_set_mem (mp->name_hash, name, subp - mp->subregions); + + svm_pop_heap (oldheap); + + region_unlock (root_rp); + + return (rp); +} + +void +svm_region_unlink (svm_region_t * rp) +{ + svm_map_region_args_t _a, *a = &_a; + svm_main_region_t *mp; + u8 *shm_name; + + ASSERT (root_rp); + ASSERT (rp); + ASSERT (vec_c_string_is_terminated (rp->region_name)); + + mp = root_rp->data_base; + ASSERT (mp); + + a->root_path = (char *) mp->root_path; + a->name = rp->region_name; + shm_name = shm_name_from_svm_map_region_args (a); + if (CLIB_DEBUG > 1) + clib_warning ("[%d] shm_unlink (%s)", getpid (), shm_name); + shm_unlink ((const char *) shm_name); + vec_free (shm_name); +} + +/* + * svm_region_unmap + * + * Let go of the indicated region. If the calling process + * is the last customer, throw it away completely. + * The root region mutex guarantees atomicity with respect to + * a new region client showing up at the wrong moment. + */ +void +svm_region_unmap (void *rp_arg) +{ + int i, mypid = getpid (); + int nclients_left; + void *oldheap; + uword virtual_base, virtual_size; + svm_region_t *rp = rp_arg; + char *name; + + /* + * If we take a signal while holding one or more shared-memory + * mutexes, we may end up back here from an otherwise + * benign exit handler. Bail out to avoid a recursive + * mutex screw-up. + */ + if (nheld) + return; + + ASSERT (rp); + ASSERT (root_rp); + + if (CLIB_DEBUG > 1) + clib_warning ("[%d] unmap region %s", getpid (), rp->region_name); + + region_lock (root_rp, 5); + region_lock (rp, 6); + + oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */ + + /* Remove the caller from the list of mappers */ + for (i = 0; i < vec_len (rp->client_pids); i++) + { + if (rp->client_pids[i] == mypid) + { + vec_delete (rp->client_pids, 1, i); + goto found; + } + } + clib_warning ("pid %d AWOL", mypid); + +found: + + svm_pop_heap (oldheap); + + nclients_left = vec_len (rp->client_pids); + virtual_base = rp->virtual_base; + virtual_size = rp->virtual_size; + + if (nclients_left == 0) + { + int index, nbits, i; + svm_main_region_t *mp; + uword *p; + svm_subregion_t *subp; + + /* Kill the region, last guy on his way out */ + + oldheap = svm_push_pvt_heap (root_rp); + name = vec_dup (rp->region_name); + + virtual_base = rp->virtual_base; + virtual_size = rp->virtual_size; + + /* Figure out which bits to clear in the root region bitmap */ + index = (virtual_base - root_rp->virtual_base) / MMAP_PAGESIZE; + + nbits = (virtual_size + MMAP_PAGESIZE - 1) / MMAP_PAGESIZE; + +#if CLIB_DEBUG > 1 + clib_warning ("clear %d bits at index %d", nbits, index); +#endif + /* Give back the allocated VM */ + for (i = 0; i < nbits; i++) + { + clib_bitmap_set_no_check (root_rp->bitmap, index + i, 0); + } + + mp = root_rp->data_base; + + p = hash_get_mem (mp->name_hash, name); + + /* Better never happen ... */ + if (p == NULL) + { + region_unlock (rp); + region_unlock (root_rp); + svm_pop_heap (oldheap); + clib_warning ("Region name '%s' not found?", name); + return; + } + + /* Remove from the root region subregion pool */ + subp = mp->subregions + p[0]; + pool_put (mp->subregions, subp); + + hash_unset_mem (mp->name_hash, name); + + vec_free (name); + + region_unlock (rp); + svm_region_unlink (rp); + munmap ((void *) virtual_base, virtual_size); + region_unlock (root_rp); + svm_pop_heap (oldheap); + return; + } + + region_unlock (rp); + region_unlock (root_rp); + + munmap ((void *) virtual_base, virtual_size); +} + +/* + * svm_region_exit + */ +void +svm_region_exit () +{ + void *oldheap; + int i, mypid = getpid (); + uword virtual_base, virtual_size; + + /* It felt so nice we did it twice... */ + if (root_rp == 0) + return; + + if (--root_rp_refcount > 0) + return; + + /* + * If we take a signal while holding one or more shared-memory + * mutexes, we may end up back here from an otherwise + * benign exit handler. Bail out to avoid a recursive + * mutex screw-up. + */ + if (nheld) + return; + + region_lock (root_rp, 7); + oldheap = svm_push_pvt_heap (root_rp); + + virtual_base = root_rp->virtual_base; + virtual_size = root_rp->virtual_size; + + for (i = 0; i < vec_len (root_rp->client_pids); i++) + { + if (root_rp->client_pids[i] == mypid) + { + vec_delete (root_rp->client_pids, 1, i); + goto found; + } + } + clib_warning ("pid %d AWOL", mypid); + +found: + + if (vec_len (root_rp->client_pids) == 0) + svm_region_unlink (root_rp); + + region_unlock (root_rp); + svm_pop_heap (oldheap); + + root_rp = 0; + munmap ((void *) virtual_base, virtual_size); +} + +void +svm_client_scan_this_region_nolock (svm_region_t * rp) +{ + int j; + int mypid = getpid (); + void *oldheap; + + for (j = 0; j < vec_len (rp->client_pids); j++) + { + if (mypid == rp->client_pids[j]) + continue; + if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0)) + { + clib_warning ("%s: cleanup ghost pid %d", + rp->region_name, rp->client_pids[j]); + /* nb: client vec in rp->region_heap */ + oldheap = svm_push_pvt_heap (rp); + vec_delete (rp->client_pids, 1, j); + j--; + svm_pop_heap (oldheap); + } + } +} + + +/* + * Scan svm regions for dead clients + */ +void +svm_client_scan (const char *root_path) +{ + int i, j; + svm_main_region_t *mp; + svm_map_region_args_t *a = 0; + svm_region_t *root_rp; + svm_region_t *rp; + svm_subregion_t *subp; + u8 *name = 0; + u8 **svm_names = 0; + void *oldheap; + int mypid = getpid (); + + vec_validate (a, 0); + + svm_region_init_chroot (root_path); + + root_rp = svm_get_root_rp (); + + pthread_mutex_lock (&root_rp->mutex); + + mp = root_rp->data_base; + + for (j = 0; j < vec_len (root_rp->client_pids); j++) + { + if (mypid == root_rp->client_pids[j]) + continue; + if (root_rp->client_pids[j] && (kill (root_rp->client_pids[j], 0) < 0)) + { + clib_warning ("%s: cleanup ghost pid %d", + root_rp->region_name, root_rp->client_pids[j]); + /* nb: client vec in root_rp->region_heap */ + oldheap = svm_push_pvt_heap (root_rp); + vec_delete (root_rp->client_pids, 1, j); + j--; + svm_pop_heap (oldheap); + } + } + + /* + * Snapshoot names, can't hold root rp mutex across + * find_or_create. + */ + /* *INDENT-OFF* */ + pool_foreach (subp, mp->subregions, ({ + name = vec_dup (subp->subregion_name); + vec_add1(svm_names, name); + })); + /* *INDENT-ON* */ + + pthread_mutex_unlock (&root_rp->mutex); + + for (i = 0; i < vec_len (svm_names); i++) + { + vec_validate (a, 0); + a->root_path = root_path; + a->name = (char *) svm_names[i]; + rp = svm_region_find_or_create (a); + if (rp) + { + pthread_mutex_lock (&rp->mutex); + + svm_client_scan_this_region_nolock (rp); + + pthread_mutex_unlock (&rp->mutex); + svm_region_unmap (rp); + vec_free (svm_names[i]); + } + vec_free (a); + } + vec_free (svm_names); + + svm_region_exit (); + + vec_free (a); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm.h b/src/svm/svm.h new file mode 100644 index 00000000..894c3d95 --- /dev/null +++ b/src/svm/svm.h @@ -0,0 +1,107 @@ +/* + *------------------------------------------------------------------ + * svm.h - shared VM allocation, mmap(...MAP_FIXED...) + * brain police + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __included_svm_h__ +#define __included_svm_h__ + +#include <pthread.h> +#include <vppinfra/clib.h> +#include <vppinfra/mem.h> +#include <svm/svm_common.h> + +#define MMAP_PAGESIZE (clib_mem_get_page_size()) + +static inline void * +svm_mem_alloc (svm_region_t * rp, uword size) +{ + u8 *oldheap; + ASSERT (rp->flags & SVM_FLAGS_MHEAP); + u8 *rv; + + pthread_mutex_lock (&rp->mutex); + oldheap = clib_mem_set_heap (rp->data_heap); + rv = clib_mem_alloc (size); + clib_mem_set_heap (oldheap); + pthread_mutex_unlock (&rp->mutex); + return (rv); +} + +static inline void * +svm_mem_alloc_aligned_at_offset (svm_region_t * rp, + uword size, uword align, uword offset) +{ + u8 *oldheap; + ASSERT (rp->flags & SVM_FLAGS_MHEAP); + u8 *rv; + + pthread_mutex_lock (&rp->mutex); + oldheap = clib_mem_set_heap (rp->data_heap); + rv = clib_mem_alloc_aligned_at_offset (size, align, offset, + 1 /* yes, call os_out_of_memory */ ); + clib_mem_set_heap (oldheap); + pthread_mutex_unlock (&rp->mutex); + return (rv); +} + +static inline void +svm_mem_free (svm_region_t * rp, void *ptr) +{ + u8 *oldheap; + ASSERT (rp->flags & SVM_FLAGS_MHEAP); + + pthread_mutex_lock (&rp->mutex); + oldheap = clib_mem_set_heap (rp->data_heap); + clib_mem_free (ptr); + clib_mem_set_heap (oldheap); + pthread_mutex_unlock (&rp->mutex); + +} + +static inline void * +svm_push_pvt_heap (svm_region_t * rp) +{ + u8 *oldheap; + oldheap = clib_mem_set_heap (rp->region_heap); + return ((void *) oldheap); +} + +static inline void * +svm_push_data_heap (svm_region_t * rp) +{ + u8 *oldheap; + oldheap = clib_mem_set_heap (rp->data_heap); + return ((void *) oldheap); +} + +static inline void +svm_pop_heap (void *oldheap) +{ + clib_mem_set_heap (oldheap); +} + +#endif /* __included_svm_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_common.h b/src/svm/svm_common.h new file mode 100644 index 00000000..ea3ec87a --- /dev/null +++ b/src/svm/svm_common.h @@ -0,0 +1,135 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __included_svm_common_h__ +#define __included_svm_common_h__ + +#include <stdarg.h> +#include <pthread.h> +#include <vppinfra/types.h> + +#define SVM_VERSION ((1<<16) | 1) /* set to declare region ready. */ + +#define SVM_FLAGS_MHEAP (1<<0) /* region contains an mheap */ +#define SVM_FLAGS_FILE (1<<1) /* region backed by one or more files */ +#define SVM_FLAGS_NODATA (1<<2) /* region will be further subdivided */ +#define SVM_FLAGS_NEED_DATA_INIT (1<<3) + +#define SVM_PVT_MHEAP_SIZE (128<<10) /* region's private mheap (128k) */ + +typedef struct svm_region_ +{ + volatile uword version; + pthread_mutex_t mutex; + pthread_cond_t condvar; + int mutex_owner_pid; /* in case of trouble */ + int mutex_owner_tag; + uword flags; + uword virtual_base; /* base of the region object */ + uword virtual_size; + void *region_heap; + void *data_base; /* data portion base address */ + void *data_heap; /* data heap, if any */ + volatile void *user_ctx; /* user context pointer */ + /* stuff allocated in the region's heap */ + uword bitmap_size; /* nbits in virtual alloc bitmap */ + uword *bitmap; /* the bitmap */ + char *region_name; + char *backing_file; + char **filenames; + uword *client_pids; + /* pad */ + + /* next page: + * (64K) clib heap for the region itself + * + * data_base -> whatever is in this region + */ + +} svm_region_t; + +typedef struct svm_map_region_args_ +{ + const char *root_path; /* NULL means use the truly global arena */ + const char *name; + u64 baseva; + u64 size; + u64 pvt_heap_size; + uword flags; + char *backing_file; + uword backing_mmap_size; + /* uid, gid to own the svm region(s) */ + int uid; + int gid; +} svm_map_region_args_t; + + +/* + * Memory shared across all router instances. Packet buffers, etc + * Base should be "out of the way," and size should be big enough to + * cover everything we plan to put here. + */ +#define SVM_GLOBAL_REGION_BASEVA 0x30000000 +#define SVM_GLOBAL_REGION_SIZE (64<<20) +#define SVM_GLOBAL_REGION_NAME "/global_vm" + +/* + * Memory shared across individual router instances. + */ +#define SVM_OVERLAY_REGION_BASEVA \ + (SVM_GLOBAL_REGION_BASEVA + SVM_GLOBAL_REGION_SIZE) +#define SVM_OVERLAY_REGION_SIZE (1<<20) +#define SVM_OVERLAY_REGION_BASENAME "/overlay_vm" + +typedef struct +{ + u8 *subregion_name; +} svm_subregion_t; + +typedef struct +{ + svm_subregion_t *subregions; /* subregion pool */ + uword *name_hash; + u8 *root_path; + int uid; + int gid; +} svm_main_region_t; + + +void *svm_region_find_or_create (svm_map_region_args_t * a); +void svm_region_init (void); +int svm_region_init_chroot (const char *root_path); +void svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid); +void svm_region_init_args (svm_map_region_args_t * a); +void svm_region_exit (void); +void svm_region_unmap (void *rp_arg); +void svm_client_scan (const char *root_path); +void svm_client_scan_this_region_nolock (svm_region_t * rp); +u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t * a); +u8 *format_svm_region (u8 * s, va_list * args); + +svm_region_t *svm_get_root_rp (void); + +#endif /* __included_svm_common_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c new file mode 100644 index 00000000..42eb1ee8 --- /dev/null +++ b/src/svm/svm_fifo.c @@ -0,0 +1,838 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <svm/svm_fifo.h> +#include <vppinfra/cpu.h> + +static inline u8 +position_lt (svm_fifo_t * f, u32 a, u32 b) +{ + return (ooo_segment_distance_from_tail (f, a) + < ooo_segment_distance_from_tail (f, b)); +} + +static inline u8 +position_leq (svm_fifo_t * f, u32 a, u32 b) +{ + return (ooo_segment_distance_from_tail (f, a) + <= ooo_segment_distance_from_tail (f, b)); +} + +static inline u8 +position_gt (svm_fifo_t * f, u32 a, u32 b) +{ + return (ooo_segment_distance_from_tail (f, a) + > ooo_segment_distance_from_tail (f, b)); +} + +static inline u32 +position_diff (svm_fifo_t * f, u32 posa, u32 posb) +{ + return ooo_segment_distance_from_tail (f, posa) + - ooo_segment_distance_from_tail (f, posb); +} + +static inline u32 +ooo_segment_end_pos (svm_fifo_t * f, ooo_segment_t * s) +{ + return (s->start + s->length) % f->nitems; +} + +u8 * +format_ooo_segment (u8 * s, va_list * args) +{ + svm_fifo_t *f = va_arg (*args, svm_fifo_t *); + ooo_segment_t *seg = va_arg (*args, ooo_segment_t *); + u32 normalized_start = (seg->start + f->nitems - f->tail) % f->nitems; + s = format (s, "[%u, %u], len %u, next %d, prev %d", normalized_start, + (normalized_start + seg->length) % f->nitems, seg->length, + seg->next, seg->prev); + return s; +} + +u8 * +svm_fifo_dump_trace (u8 * s, svm_fifo_t * f) +{ +#if SVM_FIFO_TRACE + svm_fifo_trace_elem_t *seg = 0; + int i = 0; + + if (f->trace) + { + vec_foreach (seg, f->trace) + { + s = format (s, "{%u, %u, %u}, ", seg->offset, seg->len, seg->action); + i++; + if (i % 5 == 0) + s = format (s, "\n"); + } + s = format (s, "\n"); + } + return s; +#else + return 0; +#endif +} + +u8 * +svm_fifo_replay (u8 * s, svm_fifo_t * f, u8 no_read, u8 verbose) +{ + int i, trace_len; + u8 *data = 0; + svm_fifo_trace_elem_t *trace; + u32 offset; + svm_fifo_t *dummy_fifo; + + if (!f) + return s; + +#if SVM_FIFO_TRACE + trace = f->trace; + trace_len = vec_len (trace); +#else + trace = 0; + trace_len = 0; +#endif + + dummy_fifo = svm_fifo_create (f->nitems); + memset (f->data, 0xFF, f->nitems); + + vec_validate (data, f->nitems); + for (i = 0; i < vec_len (data); i++) + data[i] = i; + + for (i = 0; i < trace_len; i++) + { + offset = trace[i].offset; + if (trace[i].action == 1) + { + if (verbose) + s = format (s, "adding [%u, %u]:", trace[i].offset, + (trace[i].offset + + trace[i].len) % dummy_fifo->nitems); + svm_fifo_enqueue_with_offset (dummy_fifo, trace[i].offset, + trace[i].len, &data[offset]); + } + else if (trace[i].action == 2) + { + if (verbose) + s = format (s, "adding [%u, %u]:", 0, trace[i].len); + svm_fifo_enqueue_nowait (dummy_fifo, trace[i].len, &data[offset]); + } + else if (!no_read) + { + if (verbose) + s = format (s, "read: %u", trace[i].len); + svm_fifo_dequeue_drop (dummy_fifo, trace[i].len); + } + if (verbose) + s = format (s, "%U", format_svm_fifo, dummy_fifo, 1); + } + + s = format (s, "result: %U", format_svm_fifo, dummy_fifo, 1); + + return s; +} + +u8 * +format_ooo_list (u8 * s, va_list * args) +{ + svm_fifo_t *f = va_arg (*args, svm_fifo_t *); + u32 ooo_segment_index = f->ooos_list_head; + ooo_segment_t *seg; + + while (ooo_segment_index != OOO_SEGMENT_INVALID_INDEX) + { + seg = pool_elt_at_index (f->ooo_segments, ooo_segment_index); + s = format (s, " %U\n", format_ooo_segment, f, seg); + ooo_segment_index = seg->next; + } + + return s; +} + +u8 * +format_svm_fifo (u8 * s, va_list * args) +{ + svm_fifo_t *f = va_arg (*args, svm_fifo_t *); + int verbose = va_arg (*args, int); + + s = format (s, "cursize %u nitems %u has_event %d\n", + f->cursize, f->nitems, f->has_event); + s = format (s, " head %d tail %d\n", f->head, f->tail); + + if (verbose > 1) + s = format + (s, " server session %d thread %d client session %d thread %d\n", + f->master_session_index, f->master_thread_index, + f->client_session_index, f->client_thread_index); + + if (verbose) + { + s = format (s, " ooo pool %d active elts newest %u\n", + pool_elts (f->ooo_segments), f->ooos_newest); + if (svm_fifo_has_ooo_data (f)) + s = format (s, " %U", format_ooo_list, f, verbose); + } + return s; +} + +/** create an svm fifo, in the current heap. Fails vs blow up the process */ +svm_fifo_t * +svm_fifo_create (u32 data_size_in_bytes) +{ + svm_fifo_t *f; + u32 rounded_data_size; + + /* always round fifo data size to the next highest power-of-two */ + rounded_data_size = (1 << (max_log2 (data_size_in_bytes))); + f = clib_mem_alloc_aligned_or_null (sizeof (*f) + rounded_data_size, + CLIB_CACHE_LINE_BYTES); + if (f == 0) + return 0; + + memset (f, 0, sizeof (*f)); + f->nitems = data_size_in_bytes; + f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX; + f->refcnt = 1; + return (f); +} + +void +svm_fifo_free (svm_fifo_t * f) +{ + ASSERT (f->refcnt > 0); + + if (--f->refcnt == 0) + { + pool_free (f->ooo_segments); + clib_mem_free (f); + } +} + +always_inline ooo_segment_t * +ooo_segment_new (svm_fifo_t * f, u32 start, u32 length) +{ + ooo_segment_t *s; + + pool_get (f->ooo_segments, s); + + s->start = start; + s->length = length; + + s->prev = s->next = OOO_SEGMENT_INVALID_INDEX; + + return s; +} + +always_inline void +ooo_segment_del (svm_fifo_t * f, u32 index) +{ + ooo_segment_t *cur, *prev = 0, *next = 0; + cur = pool_elt_at_index (f->ooo_segments, index); + + if (cur->next != OOO_SEGMENT_INVALID_INDEX) + { + next = pool_elt_at_index (f->ooo_segments, cur->next); + next->prev = cur->prev; + } + + if (cur->prev != OOO_SEGMENT_INVALID_INDEX) + { + prev = pool_elt_at_index (f->ooo_segments, cur->prev); + prev->next = cur->next; + } + else + { + f->ooos_list_head = cur->next; + } + + pool_put (f->ooo_segments, cur); +} + +/** + * Add segment to fifo's out-of-order segment list. Takes care of merging + * adjacent segments and removing overlapping ones. + */ +static void +ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) +{ + ooo_segment_t *s, *new_s, *prev, *next, *it; + u32 new_index, s_end_pos, s_index; + u32 normalized_position, normalized_end_position; + + ASSERT (offset + length <= ooo_segment_distance_from_tail (f, f->head)); + normalized_position = (f->tail + offset) % f->nitems; + normalized_end_position = (f->tail + offset + length) % f->nitems; + + f->ooos_newest = OOO_SEGMENT_INVALID_INDEX; + + if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX) + { + s = ooo_segment_new (f, normalized_position, length); + f->ooos_list_head = s - f->ooo_segments; + f->ooos_newest = f->ooos_list_head; + return; + } + + /* Find first segment that starts after new segment */ + s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); + while (s->next != OOO_SEGMENT_INVALID_INDEX + && position_lt (f, s->start, normalized_position)) + s = pool_elt_at_index (f->ooo_segments, s->next); + + /* If we have a previous and we overlap it, use it as starting point */ + prev = ooo_segment_get_prev (f, s); + if (prev + && position_leq (f, normalized_position, ooo_segment_end_pos (f, prev))) + { + s = prev; + s_end_pos = ooo_segment_end_pos (f, s); + + /* Since we have previous, normalized start position cannot be smaller + * than prev->start. Check tail */ + ASSERT (position_lt (f, s->start, normalized_position)); + goto check_tail; + } + + s_index = s - f->ooo_segments; + s_end_pos = ooo_segment_end_pos (f, s); + + /* No overlap, add before current segment */ + if (position_lt (f, normalized_end_position, s->start)) + { + new_s = ooo_segment_new (f, normalized_position, length); + new_index = new_s - f->ooo_segments; + + /* Pool might've moved, get segment again */ + s = pool_elt_at_index (f->ooo_segments, s_index); + if (s->prev != OOO_SEGMENT_INVALID_INDEX) + { + new_s->prev = s->prev; + prev = pool_elt_at_index (f->ooo_segments, new_s->prev); + prev->next = new_index; + } + else + { + /* New head */ + f->ooos_list_head = new_index; + } + + new_s->next = s_index; + s->prev = new_index; + f->ooos_newest = new_index; + return; + } + /* No overlap, add after current segment */ + else if (position_gt (f, normalized_position, s_end_pos)) + { + new_s = ooo_segment_new (f, normalized_position, length); + new_index = new_s - f->ooo_segments; + + /* Pool might've moved, get segment again */ + s = pool_elt_at_index (f->ooo_segments, s_index); + + /* Needs to be last */ + ASSERT (s->next == OOO_SEGMENT_INVALID_INDEX); + + new_s->prev = s_index; + s->next = new_index; + f->ooos_newest = new_index; + + return; + } + + /* + * Merge needed + */ + + /* Merge at head */ + if (position_lt (f, normalized_position, s->start)) + { + s->start = normalized_position; + s->length = position_diff (f, s_end_pos, s->start); + f->ooos_newest = s - f->ooo_segments; + } + +check_tail: + + /* Overlapping tail */ + if (position_gt (f, normalized_end_position, s_end_pos)) + { + s->length = position_diff (f, normalized_end_position, s->start); + + /* Remove the completely overlapped segments in the tail */ + it = ooo_segment_next (f, s); + while (it && position_leq (f, ooo_segment_end_pos (f, it), + normalized_end_position)) + { + next = ooo_segment_next (f, it); + ooo_segment_del (f, it - f->ooo_segments); + it = next; + } + + /* If partial overlap with last, merge */ + if (it && position_leq (f, it->start, normalized_end_position)) + { + s->length = position_diff (f, ooo_segment_end_pos (f, it), + s->start); + ooo_segment_del (f, it - f->ooo_segments); + } + f->ooos_newest = s - f->ooo_segments; + } +} + +/** + * Removes segments that can now be enqueued because the fifo's tail has + * advanced. Returns the number of bytes added to tail. + */ +static int +ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued) +{ + ooo_segment_t *s; + u32 index, bytes = 0; + i32 diff; + + s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); + diff = ooo_segment_distance_to_tail (f, s->start); + + ASSERT (diff != n_bytes_enqueued); + + if (diff > n_bytes_enqueued) + return 0; + + /* If last tail update overlaps one/multiple ooo segments, remove them */ + while (0 <= diff && diff < n_bytes_enqueued) + { + index = s - f->ooo_segments; + + /* Segment end is beyond the tail. Advance tail and remove segment */ + if (s->length > diff) + { + bytes = s->length - diff; + f->tail += bytes; + f->tail %= f->nitems; + ooo_segment_del (f, index); + break; + } + + /* If we have next go on */ + if (s->next != OOO_SEGMENT_INVALID_INDEX) + { + s = pool_elt_at_index (f->ooo_segments, s->next); + diff = ooo_segment_distance_to_tail (f, s->start); + ooo_segment_del (f, index); + } + /* End of search */ + else + { + ooo_segment_del (f, index); + break; + } + } + + ASSERT (bytes <= f->nitems); + return bytes; +} + +static int +svm_fifo_enqueue_internal (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + /* read cursize, which can only increase while we're working */ + cursize = svm_fifo_max_dequeue (f); + f->ooos_newest = OOO_SEGMENT_INVALID_INDEX; + + if (PREDICT_FALSE (cursize == f->nitems)) + return -2; /* fifo stuffed */ + + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (nitems - cursize) < max_bytes ? + (nitems - cursize) : max_bytes; + + if (PREDICT_TRUE (copy_from_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - f->tail) < total_copy_bytes) + ? (nitems - f->tail) : total_copy_bytes; + + clib_memcpy (&f->data[f->tail], copy_from_here, first_copy_bytes); + f->tail += first_copy_bytes; + f->tail = (f->tail == nitems) ? 0 : f->tail; + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (&f->data[f->tail], copy_from_here + first_copy_bytes, + second_copy_bytes); + f->tail += second_copy_bytes; + f->tail = (f->tail == nitems) ? 0 : f->tail; + } + } + else + { + ASSERT (0); + + /* Account for a zero-copy enqueue done elsewhere */ + ASSERT (max_bytes <= (nitems - cursize)); + f->tail += max_bytes; + f->tail = f->tail % nitems; + total_copy_bytes = max_bytes; + } + + svm_fifo_trace_add (f, f->head, total_copy_bytes, 2); + + /* Any out-of-order segments to collect? */ + if (PREDICT_FALSE (f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX)) + total_copy_bytes += ooo_segment_try_collect (f, total_copy_bytes); + + /* Atomically increase the queue length */ + ASSERT (cursize + total_copy_bytes <= nitems); + __sync_fetch_and_add (&f->cursize, total_copy_bytes); + + return (total_copy_bytes); +} + +#define SVM_ENQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) \ + { return fn (f, max_bytes, copy_from_here);} + +static int +svm_fifo_enqueue_nowait_ma (svm_fifo_t * f, u32 max_bytes, + u8 * copy_from_here) +{ + return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here); +} + +foreach_march_variant (SVM_ENQUEUE_CLONE_TEMPLATE, + svm_fifo_enqueue_nowait_ma); +CLIB_MULTIARCH_SELECT_FN (svm_fifo_enqueue_nowait_ma); + +int +svm_fifo_enqueue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) +{ +#if CLIB_DEBUG > 0 + return svm_fifo_enqueue_nowait_ma (f, max_bytes, copy_from_here); +#else + static int (*fp) (svm_fifo_t *, u32, u8 *); + + if (PREDICT_FALSE (fp == 0)) + fp = (void *) svm_fifo_enqueue_nowait_ma_multiarch_select (); + + return (*fp) (f, max_bytes, copy_from_here); +#endif +} + +/** + * Enqueue a future segment. + * + * Two choices: either copies the entire segment, or copies nothing + * Returns 0 of the entire segment was copied + * Returns -1 if none of the segment was copied due to lack of space + */ +static int +svm_fifo_enqueue_with_offset_internal (svm_fifo_t * f, + u32 offset, + u32 required_bytes, + u8 * copy_from_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems, normalized_offset; + + f->ooos_newest = OOO_SEGMENT_INVALID_INDEX; + + /* read cursize, which can only increase while we're working */ + cursize = svm_fifo_max_dequeue (f); + nitems = f->nitems; + + ASSERT (required_bytes < nitems); + + normalized_offset = (f->tail + offset) % nitems; + + /* Will this request fit? */ + if ((required_bytes + offset) > (nitems - cursize)) + return -1; + + svm_fifo_trace_add (f, offset, required_bytes, 1); + + ooo_segment_add (f, offset, required_bytes); + + /* Number of bytes we're going to copy */ + total_copy_bytes = required_bytes; + + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - normalized_offset) < total_copy_bytes) + ? (nitems - normalized_offset) : total_copy_bytes; + + clib_memcpy (&f->data[normalized_offset], copy_from_here, first_copy_bytes); + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + normalized_offset += first_copy_bytes; + normalized_offset %= nitems; + + ASSERT (normalized_offset == 0); + + clib_memcpy (&f->data[normalized_offset], + copy_from_here + first_copy_bytes, second_copy_bytes); + } + + return (0); +} + + +int +svm_fifo_enqueue_with_offset (svm_fifo_t * f, + u32 offset, + u32 required_bytes, u8 * copy_from_here) +{ + return svm_fifo_enqueue_with_offset_internal (f, offset, required_bytes, + copy_from_here); +} + + +static int +svm_fifo_dequeue_internal (svm_fifo_t * f, u32 max_bytes, u8 * copy_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems; + + /* read cursize, which can only increase while we're working */ + cursize = svm_fifo_max_dequeue (f); + if (PREDICT_FALSE (cursize == 0)) + return -2; /* nothing in the fifo */ + + nitems = f->nitems; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + if (PREDICT_TRUE (copy_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = ((nitems - f->head) < total_copy_bytes) + ? (nitems - f->head) : total_copy_bytes; + clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes); + f->head += first_copy_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (copy_here + first_copy_bytes, + &f->data[f->head], second_copy_bytes); + f->head += second_copy_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + } + } + else + { + ASSERT (0); + /* Account for a zero-copy dequeue done elsewhere */ + ASSERT (max_bytes <= cursize); + f->head += max_bytes; + f->head = f->head % nitems; + cursize -= max_bytes; + total_copy_bytes = max_bytes; + } + + ASSERT (f->head <= nitems); + ASSERT (cursize >= total_copy_bytes); + __sync_fetch_and_sub (&f->cursize, total_copy_bytes); + + return (total_copy_bytes); +} + +static int +svm_fifo_dequeue_nowait_ma (svm_fifo_t * f, u32 max_bytes, u8 * copy_here) +{ + return svm_fifo_dequeue_internal (f, max_bytes, copy_here); +} + +#define SVM_FIFO_DEQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, \ + u8 * copy_here) \ + { return fn (f, max_bytes, copy_here);} + +foreach_march_variant (SVM_FIFO_DEQUEUE_CLONE_TEMPLATE, + svm_fifo_dequeue_nowait_ma); +CLIB_MULTIARCH_SELECT_FN (svm_fifo_dequeue_nowait_ma); + +int +svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here) +{ +#if CLIB_DEBUG > 0 + return svm_fifo_dequeue_nowait_ma (f, max_bytes, copy_here); +#else + static int (*fp) (svm_fifo_t *, u32, u8 *); + + if (PREDICT_FALSE (fp == 0)) + fp = (void *) svm_fifo_dequeue_nowait_ma_multiarch_select (); + + return (*fp) (f, max_bytes, copy_here); +#endif +} + +static int +svm_fifo_peek_ma (svm_fifo_t * f, u32 relative_offset, u32 max_bytes, + u8 * copy_here) +{ + u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; + u32 cursize, nitems, real_head; + + /* read cursize, which can only increase while we're working */ + cursize = svm_fifo_max_dequeue (f); + if (PREDICT_FALSE (cursize < relative_offset)) + return -2; /* nothing in the fifo */ + + nitems = f->nitems; + real_head = f->head + relative_offset; + real_head = real_head >= nitems ? real_head - nitems : real_head; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (cursize - relative_offset < max_bytes) ? + cursize - relative_offset : max_bytes; + + if (PREDICT_TRUE (copy_here != 0)) + { + /* Number of bytes in first copy segment */ + first_copy_bytes = + ((nitems - real_head) < total_copy_bytes) ? + (nitems - real_head) : total_copy_bytes; + clib_memcpy (copy_here, &f->data[real_head], first_copy_bytes); + + /* Number of bytes in second copy segment, if any */ + second_copy_bytes = total_copy_bytes - first_copy_bytes; + if (second_copy_bytes) + { + clib_memcpy (copy_here + first_copy_bytes, &f->data[0], + second_copy_bytes); + } + } + return total_copy_bytes; +} + +#define SVM_FIFO_PEEK_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( svm_fifo_t * f, u32 relative_offset, u32 max_bytes, \ + u8 * copy_here) \ + { return fn (f, relative_offset, max_bytes, copy_here);} + +foreach_march_variant (SVM_FIFO_PEEK_CLONE_TEMPLATE, svm_fifo_peek_ma); +CLIB_MULTIARCH_SELECT_FN (svm_fifo_peek_ma); + +int +svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes, + u8 * copy_here) +{ +#if CLIB_DEBUG > 0 + return svm_fifo_peek_ma (f, relative_offset, max_bytes, copy_here); +#else + static int (*fp) (svm_fifo_t *, u32, u32, u8 *); + + if (PREDICT_FALSE (fp == 0)) + fp = (void *) svm_fifo_peek_ma_multiarch_select (); + + return (*fp) (f, relative_offset, max_bytes, copy_here); +#endif +} + +int +svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes) +{ + u32 total_drop_bytes, first_drop_bytes, second_drop_bytes; + u32 cursize, nitems; + + /* read cursize, which can only increase while we're working */ + cursize = svm_fifo_max_dequeue (f); + if (PREDICT_FALSE (cursize == 0)) + return -2; /* nothing in the fifo */ + + nitems = f->nitems; + + /* Number of bytes we're going to drop */ + total_drop_bytes = (cursize < max_bytes) ? cursize : max_bytes; + + svm_fifo_trace_add (f, f->tail, total_drop_bytes, 3); + + /* Number of bytes in first copy segment */ + first_drop_bytes = + ((nitems - f->head) < total_drop_bytes) ? + (nitems - f->head) : total_drop_bytes; + f->head += first_drop_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + + /* Number of bytes in second drop segment, if any */ + second_drop_bytes = total_drop_bytes - first_drop_bytes; + if (second_drop_bytes) + { + f->head += second_drop_bytes; + f->head = (f->head == nitems) ? 0 : f->head; + } + + ASSERT (f->head <= nitems); + ASSERT (cursize >= total_drop_bytes); + __sync_fetch_and_sub (&f->cursize, total_drop_bytes); + + return total_drop_bytes; +} + +u32 +svm_fifo_number_ooo_segments (svm_fifo_t * f) +{ + return pool_elts (f->ooo_segments); +} + +ooo_segment_t * +svm_fifo_first_ooo_segment (svm_fifo_t * f) +{ + return pool_elt_at_index (f->ooo_segments, f->ooos_list_head); +} + +/** + * Set fifo pointers to requested offset + */ +void +svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer) +{ + f->head = f->tail = pointer % f->nitems; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h new file mode 100644 index 00000000..84901d02 --- /dev/null +++ b/src/svm/svm_fifo.h @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_fifo_h__ +#define __included_ssvm_fifo_h__ + +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> +#include <pthread.h> + +/** Out-of-order segment */ +typedef struct +{ + u32 next; /**< Next linked-list element pool index */ + u32 prev; /**< Previous linked-list element pool index */ + + u32 start; /**< Start of segment, normalized*/ + u32 length; /**< Length of segment */ +} ooo_segment_t; + +format_function_t format_ooo_segment; +format_function_t format_ooo_list; + +#define SVM_FIFO_TRACE (0) +#define OOO_SEGMENT_INVALID_INDEX ((u32)~0) + +typedef struct +{ + u32 offset; + u32 len; + u32 action; +} svm_fifo_trace_elem_t; + +typedef struct _svm_fifo +{ + volatile u32 cursize; /**< current fifo size */ + u32 nitems; + CLIB_CACHE_LINE_ALIGN_MARK (end_cursize); + + volatile u32 has_event; /**< non-zero if deq event exists */ + + /* Backpointers */ + u32 master_session_index; + u32 client_session_index; + u8 master_thread_index; + u8 client_thread_index; + u32 segment_manager; + CLIB_CACHE_LINE_ALIGN_MARK (end_shared); + u32 head; + CLIB_CACHE_LINE_ALIGN_MARK (end_consumer); + + /* producer */ + u32 tail; + + ooo_segment_t *ooo_segments; /**< Pool of ooo segments */ + u32 ooos_list_head; /**< Head of out-of-order linked-list */ + u32 ooos_newest; /**< Last segment to have been updated */ + struct _svm_fifo *next; /**< next in freelist/active chain */ + struct _svm_fifo *prev; /**< prev in active chain */ +#if SVM_FIFO_TRACE + svm_fifo_trace_elem_t *trace; +#endif + u32 freelist_index; /**< aka log2(allocated_size) - const. */ + i8 refcnt; /**< reference count */ + CLIB_CACHE_LINE_ALIGN_MARK (data); +} svm_fifo_t; + +#if SVM_FIFO_TRACE +#define svm_fifo_trace_add(_f, _s, _l, _t) \ +{ \ + svm_fifo_trace_elem_t *trace_elt; \ + vec_add2(_f->trace, trace_elt, 1); \ + trace_elt->offset = _s; \ + trace_elt->len = _l; \ + trace_elt->action = _t; \ +} +#else +#define svm_fifo_trace_add(_f, _s, _l, _t) +#endif + +u8 *svm_fifo_dump_trace (u8 * s, svm_fifo_t * f); +u8 *svm_fifo_replay (u8 * s, svm_fifo_t * f, u8 no_read, u8 verbose); + +static inline u32 +svm_fifo_max_dequeue (svm_fifo_t * f) +{ + return f->cursize; +} + +static inline u32 +svm_fifo_max_enqueue (svm_fifo_t * f) +{ + return f->nitems - svm_fifo_max_dequeue (f); +} + +static inline u8 +svm_fifo_has_ooo_data (svm_fifo_t * f) +{ + return f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX; +} + +/** + * Sets fifo event flag. + * + * @return 1 if flag was not set. + */ +always_inline u8 +svm_fifo_set_event (svm_fifo_t * f) +{ + /* Probably doesn't need to be atomic. Still, better avoid surprises */ + return __sync_lock_test_and_set (&f->has_event, 1) == 0; +} + +/** + * Unsets fifo event flag. + */ +always_inline void +svm_fifo_unset_event (svm_fifo_t * f) +{ + /* Probably doesn't need to be atomic. Still, better avoid surprises */ + __sync_lock_release (&f->has_event); +} + +svm_fifo_t *svm_fifo_create (u32 data_size_in_bytes); +void svm_fifo_free (svm_fifo_t * f); + +int svm_fifo_enqueue_nowait (svm_fifo_t * f, u32 max_bytes, + u8 * copy_from_here); +int svm_fifo_enqueue_with_offset (svm_fifo_t * f, u32 offset, + u32 required_bytes, u8 * copy_from_here); +int svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here); + +int svm_fifo_peek (svm_fifo_t * f, u32 offset, u32 max_bytes, u8 * copy_here); +int svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes); +u32 svm_fifo_number_ooo_segments (svm_fifo_t * f); +ooo_segment_t *svm_fifo_first_ooo_segment (svm_fifo_t * f); +void svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer); + +format_function_t format_svm_fifo; + +always_inline ooo_segment_t * +svm_fifo_newest_ooo_segment (svm_fifo_t * f) +{ + if (f->ooos_newest == OOO_SEGMENT_INVALID_INDEX) + return 0; + return pool_elt_at_index (f->ooo_segments, f->ooos_newest); +} + +always_inline void +svm_fifo_newest_ooo_segment_reset (svm_fifo_t * f) +{ + f->ooos_newest = OOO_SEGMENT_INVALID_INDEX; +} + +always_inline u32 +ooo_segment_distance_from_tail (svm_fifo_t * f, u32 pos) +{ + /* Ambiguous. Assumption is that ooo segments don't touch tail */ + if (PREDICT_FALSE (pos == f->tail && f->tail == f->head)) + return f->nitems; + + return (((f->nitems + pos) - f->tail) % f->nitems); +} + +always_inline u32 +ooo_segment_distance_to_tail (svm_fifo_t * f, u32 pos) +{ + return (((f->nitems + f->tail) - pos) % f->nitems); +} + +always_inline u32 +ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s) +{ + return ooo_segment_distance_from_tail (f, s->start); +} + +always_inline u32 +ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s) +{ + return ooo_segment_distance_from_tail (f, s->start) + s->length; +} + +always_inline u32 +ooo_segment_length (svm_fifo_t * f, ooo_segment_t * s) +{ + return s->length; +} + +always_inline ooo_segment_t * +ooo_segment_get_prev (svm_fifo_t * f, ooo_segment_t * s) +{ + if (s->prev == OOO_SEGMENT_INVALID_INDEX) + return 0; + return pool_elt_at_index (f->ooo_segments, s->prev); +} + +always_inline ooo_segment_t * +ooo_segment_next (svm_fifo_t * f, ooo_segment_t * s) +{ + if (s->next == OOO_SEGMENT_INVALID_INDEX) + return 0; + return pool_elt_at_index (f->ooo_segments, s->next); +} + +#endif /* __included_ssvm_fifo_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c new file mode 100644 index 00000000..da2b7935 --- /dev/null +++ b/src/svm/svm_fifo_segment.c @@ -0,0 +1,643 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <svm/svm_fifo_segment.h> + +svm_fifo_segment_main_t svm_fifo_segment_main; + +static void +allocate_new_fifo_chunk (svm_fifo_segment_header_t * fsh, + u32 data_size_in_bytes, int chunk_size) +{ + int freelist_index; + u32 size; + u8 *fifo_space; + u32 rounded_data_size; + svm_fifo_t *f; + int i; + + rounded_data_size = (1 << (max_log2 (data_size_in_bytes))); + freelist_index = max_log2 (rounded_data_size) + - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE); + + /* Calculate space requirement $$$ round-up data_size_in_bytes */ + size = (sizeof (*f) + rounded_data_size) * chunk_size; + + /* Allocate fifo space. May fail. */ + fifo_space = clib_mem_alloc_aligned_at_offset + (size, CLIB_CACHE_LINE_BYTES, 0 /* align_offset */ , + 0 /* os_out_of_memory */ ); + + /* Out of space.. */ + if (fifo_space == 0) + return; + + /* Carve fifo space */ + f = (svm_fifo_t *) fifo_space; + for (i = 0; i < chunk_size; i++) + { + f->freelist_index = freelist_index; + f->next = fsh->free_fifos[freelist_index]; + fsh->free_fifos[freelist_index] = f; + fifo_space += sizeof (*f) + rounded_data_size; + f = (svm_fifo_t *) fifo_space; + } +} + +static void +preallocate_fifo_pairs (svm_fifo_segment_private_t * s, + svm_fifo_segment_create_args_t * a) +{ + svm_fifo_segment_header_t *fsh = s->h; + u32 rx_fifo_size, tx_fifo_size, pairs_to_allocate; + u32 rx_rounded_data_size, tx_rounded_data_size, pair_size; + svm_fifo_t *f; + u8 *rx_fifo_space, *tx_fifo_space; + int rx_freelist_index, tx_freelist_index; + int i; + + /* Parameter check */ + if (a->rx_fifo_size == 0 || a->tx_fifo_size == 0 + || a->preallocated_fifo_pairs == 0) + return; + + if (a->rx_fifo_size < FIFO_SEGMENT_MIN_FIFO_SIZE || + a->rx_fifo_size > FIFO_SEGMENT_MAX_FIFO_SIZE) + { + clib_warning ("rx fifo_size out of range %d", a->rx_fifo_size); + return; + } + + if (a->tx_fifo_size < FIFO_SEGMENT_MIN_FIFO_SIZE || + a->tx_fifo_size > FIFO_SEGMENT_MAX_FIFO_SIZE) + { + clib_warning ("tx fifo_size out of range %d", a->rx_fifo_size); + return; + } + + rx_rounded_data_size = (1 << (max_log2 (a->rx_fifo_size))); + + rx_freelist_index = max_log2 (a->rx_fifo_size) + - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE); + + tx_rounded_data_size = (1 << (max_log2 (a->rx_fifo_size))); + + tx_freelist_index = max_log2 (a->tx_fifo_size) + - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE); + + /* Calculate space requirements */ + pair_size = 2 * sizeof (*f) + rx_rounded_data_size + tx_rounded_data_size; + pairs_to_allocate = clib_min (s->ssvm.ssvm_size / pair_size, + a->preallocated_fifo_pairs); + rx_fifo_size = (sizeof (*f) + rx_rounded_data_size) * pairs_to_allocate; + tx_fifo_size = (sizeof (*f) + tx_rounded_data_size) * pairs_to_allocate; + + vec_validate_init_empty (fsh->free_fifos, + clib_max (rx_freelist_index, tx_freelist_index), + 0); + if (0) + clib_warning ("rx_fifo_size %u (%d mb), tx_fifo_size %u (%d mb)", + rx_fifo_size, rx_fifo_size >> 20, + tx_fifo_size, tx_fifo_size >> 20); + + /* Allocate rx fifo space. May fail. */ + rx_fifo_space = clib_mem_alloc_aligned_at_offset + (rx_fifo_size, CLIB_CACHE_LINE_BYTES, 0 /* align_offset */ , + 0 /* os_out_of_memory */ ); + + /* Same for TX */ + tx_fifo_space = clib_mem_alloc_aligned_at_offset + (tx_fifo_size, CLIB_CACHE_LINE_BYTES, 0 /* align_offset */ , + 0 /* os_out_of_memory */ ); + + /* Make sure it worked. Clean up if it didn't... */ + if (rx_fifo_space == 0 || tx_fifo_space == 0) + { + if (rx_fifo_space) + clib_mem_free (rx_fifo_space); + else + clib_warning ("rx fifo preallocation failure: size %d npairs %d", + a->rx_fifo_size, a->preallocated_fifo_pairs); + + if (tx_fifo_space) + clib_mem_free (tx_fifo_space); + else + clib_warning ("tx fifo preallocation failure: size %d nfifos %d", + a->tx_fifo_size, a->preallocated_fifo_pairs); + return; + } + + /* Carve rx fifo space */ + f = (svm_fifo_t *) rx_fifo_space; + for (i = 0; i < pairs_to_allocate; i++) + { + f->freelist_index = rx_freelist_index; + f->next = fsh->free_fifos[rx_freelist_index]; + fsh->free_fifos[rx_freelist_index] = f; + rx_fifo_space += sizeof (*f) + rx_rounded_data_size; + f = (svm_fifo_t *) rx_fifo_space; + } + /* Carve tx fifo space */ + f = (svm_fifo_t *) tx_fifo_space; + for (i = 0; i < pairs_to_allocate; i++) + { + f->freelist_index = tx_freelist_index; + f->next = fsh->free_fifos[tx_freelist_index]; + fsh->free_fifos[tx_freelist_index] = f; + tx_fifo_space += sizeof (*f) + tx_rounded_data_size; + f = (svm_fifo_t *) tx_fifo_space; + } + + /* Account for the pairs allocated */ + a->preallocated_fifo_pairs -= pairs_to_allocate; +} + +/** (master) create an svm fifo segment */ +int +svm_fifo_segment_create (svm_fifo_segment_create_args_t * a) +{ + int rv; + svm_fifo_segment_private_t *s; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + void *oldheap; + + /* Allocate a fresh segment */ + pool_get (sm->segments, s); + memset (s, 0, sizeof (*s)); + + s->ssvm.ssvm_size = a->segment_size; + s->ssvm.i_am_master = 1; + s->ssvm.my_pid = getpid (); + s->ssvm.name = format (0, "%s%c", a->segment_name, 0); + s->ssvm.requested_va = sm->next_baseva; + + rv = ssvm_master_init (&s->ssvm, s - sm->segments); + + if (rv) + { + _vec_len (s) = vec_len (s) - 1; + return (rv); + } + + /* Note: requested_va updated due to seg base addr randomization */ + sm->next_baseva = s->ssvm.requested_va + a->segment_size; + + sh = s->ssvm.sh; + oldheap = ssvm_push_heap (sh); + + /* Set up svm_fifo_segment shared header */ + fsh = clib_mem_alloc (sizeof (*fsh)); + memset (fsh, 0, sizeof (*fsh)); + sh->opaque[0] = fsh; + s->h = fsh; + fsh->segment_name = format (0, "%s%c", a->segment_name, 0); + preallocate_fifo_pairs (s, a); + + ssvm_pop_heap (oldheap); + + sh->ready = 1; + vec_add1 (a->new_segment_indices, s - sm->segments); + return (0); +} + +/** Create an svm fifo segment in process-private memory */ +int +svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t * a) +{ + svm_fifo_segment_private_t *s; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + void *oldheap; + u8 **heaps = 0; + mheap_t *heap_header; + int segment_count = 1; + u32 rnd_size = 0; + int i; + + if (a->private_segment_count && a->private_segment_size) + { + u8 *heap; + u32 pagesize = clib_mem_get_page_size (); + rnd_size = (a->private_segment_size + (pagesize - 1)) & ~pagesize; + + for (i = 0; i < a->private_segment_count; i++) + { + heap = mheap_alloc (0, rnd_size); + if (heap == 0) + { + clib_unix_warning ("mheap alloc"); + return -1; + } + heap_header = mheap_header (heap); + heap_header->flags |= MHEAP_FLAG_THREAD_SAFE; + vec_add1 (heaps, heap); + } + segment_count = a->private_segment_count; + } + + /* Allocate segments */ + for (i = 0; i < segment_count; i++) + { + pool_get (sm->segments, s); + memset (s, 0, sizeof (*s)); + + s->ssvm.ssvm_size = rnd_size; + s->ssvm.i_am_master = 1; + s->ssvm.my_pid = getpid (); + s->ssvm.name = format (0, "%s%c", a->segment_name, 0); + s->ssvm.requested_va = ~0; + + /* Allocate a [sic] shared memory header, in process memory... */ + sh = clib_mem_alloc_aligned (sizeof (*sh), CLIB_CACHE_LINE_BYTES); + s->ssvm.sh = sh; + + memset (sh, 0, sizeof (*sh)); + sh->heap = a->private_segment_count ? heaps[i] : clib_mem_get_heap (); + + /* Set up svm_fifo_segment shared header */ + fsh = clib_mem_alloc (sizeof (*fsh)); + memset (fsh, 0, sizeof (*fsh)); + sh->opaque[0] = fsh; + s->h = fsh; + fsh->flags = FIFO_SEGMENT_F_IS_PRIVATE; + if (!a->private_segment_count) + fsh->flags |= FIFO_SEGMENT_F_IS_MAIN_HEAP; + fsh->segment_name = format (0, "%s%c", a->segment_name, 0); + + if (a->private_segment_count) + { + if (i != 0) + fsh->flags |= FIFO_SEGMENT_F_IS_PREALLOCATED; + oldheap = clib_mem_get_heap (); + clib_mem_set_heap (sh->heap); + preallocate_fifo_pairs (s, a); + clib_mem_set_heap (oldheap); + } + sh->ready = 1; + vec_add1 (a->new_segment_indices, s - sm->segments); + } + vec_free (heaps); + return (0); +} + +/** (slave) attach to an svm fifo segment */ +int +svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a) +{ + int rv; + svm_fifo_segment_private_t *s; + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + + /* Allocate a fresh segment */ + pool_get (sm->segments, s); + memset (s, 0, sizeof (*s)); + + s->ssvm.ssvm_size = a->segment_size; + s->ssvm.my_pid = getpid (); + s->ssvm.name = format (0, "%s%c", a->segment_name, 0); + s->ssvm.requested_va = sm->next_baseva; + + rv = ssvm_slave_init (&s->ssvm, sm->timeout_in_seconds); + + if (rv) + { + _vec_len (s) = vec_len (s) - 1; + return (rv); + } + + /* Fish the segment header */ + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + s->h = fsh; + + vec_add1 (a->new_segment_indices, s - sm->segments); + return (0); +} + +void +svm_fifo_segment_delete (svm_fifo_segment_private_t * s) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + if (s->h->flags & FIFO_SEGMENT_F_IS_PRIVATE) + { + /* Don't try to free vpp's heap! */ + if (!(s->h->flags & FIFO_SEGMENT_F_IS_MAIN_HEAP)) + mheap_free (s->ssvm.sh->heap); + clib_mem_free (s->ssvm.sh); + clib_mem_free (s->h); + pool_put (sm->segments, s); + } + else + { + ssvm_delete (&s->ssvm); + pool_put (sm->segments, s); + } +} + +svm_fifo_t * +svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s, + u32 data_size_in_bytes, + svm_fifo_segment_freelist_t list_index) +{ + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + svm_fifo_t *f; + void *oldheap; + int freelist_index; + + /* + * 4K minimum. It's not likely that anything good will happen + * with a smaller FIFO. + */ + if (data_size_in_bytes < FIFO_SEGMENT_MIN_FIFO_SIZE || + data_size_in_bytes > FIFO_SEGMENT_MAX_FIFO_SIZE) + { + clib_warning ("fifo size out of range %d", data_size_in_bytes); + return 0; + } + + freelist_index = max_log2 (data_size_in_bytes) + - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE); + + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + ssvm_lock_non_recursive (sh, 1); + oldheap = ssvm_push_heap (sh); + + switch (list_index) + { + case FIFO_SEGMENT_RX_FREELIST: + case FIFO_SEGMENT_TX_FREELIST: + vec_validate_init_empty (fsh->free_fifos, freelist_index, 0); + + f = fsh->free_fifos[freelist_index]; + if (PREDICT_FALSE (f == 0)) + { + allocate_new_fifo_chunk (fsh, data_size_in_bytes, + FIFO_SEGMENT_ALLOC_CHUNK_SIZE); + f = fsh->free_fifos[freelist_index]; + } + if (PREDICT_TRUE (f != 0)) + { + fsh->free_fifos[freelist_index] = f->next; + /* (re)initialize the fifo, as in svm_fifo_create */ + memset (f, 0, sizeof (*f)); + f->nitems = data_size_in_bytes; + f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX; + f->refcnt = 1; + f->freelist_index = freelist_index; + goto found; + } + /* FALLTHROUGH */ + case FIFO_SEGMENT_FREELIST_NONE: + break; + + default: + clib_warning ("ignore bogus freelist %d", list_index); + break; + } + + /* Note: this can fail, in which case: create another segment */ + f = svm_fifo_create (data_size_in_bytes); + if (PREDICT_FALSE (f == 0)) + { + ssvm_pop_heap (oldheap); + ssvm_unlock_non_recursive (sh); + return (0); + } + f->freelist_index = freelist_index; + +found: + /* If rx_freelist add to active fifos list. When cleaning up segment, + * we need a list of active sessions that should be disconnected. Since + * both rx and tx fifos keep pointers to the session, it's enough to track + * only one. */ + if (list_index == FIFO_SEGMENT_RX_FREELIST) + { + if (fsh->fifos) + { + fsh->fifos->prev = f; + f->next = fsh->fifos; + } + fsh->fifos = f; + } + fsh->n_active_fifos++; + + ssvm_pop_heap (oldheap); + ssvm_unlock_non_recursive (sh); + return (f); +} + +void +svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f, + svm_fifo_segment_freelist_t list_index) +{ + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + void *oldheap; + int freelist_index; + + ASSERT (f->refcnt > 0); + + if (--f->refcnt > 0) + return; + + sh = s->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + freelist_index = f->freelist_index; + + ASSERT (freelist_index < vec_len (fsh->free_fifos)); + + ssvm_lock_non_recursive (sh, 2); + oldheap = ssvm_push_heap (sh); + + switch (list_index) + { + case FIFO_SEGMENT_RX_FREELIST: + /* Remove from active list */ + if (f->prev) + f->prev->next = f->next; + else + fsh->fifos = f->next; + if (f->next) + f->next->prev = f->prev; + /* Fall through: we add only rx fifos to active pool */ + case FIFO_SEGMENT_TX_FREELIST: + /* Add to free list */ + f->next = fsh->free_fifos[freelist_index]; + f->prev = 0; + fsh->free_fifos[freelist_index] = f; + break; + case FIFO_SEGMENT_FREELIST_NONE: + break; + + default: + clib_warning ("ignore bogus freelist %d", list_index); + break; + } + + if (CLIB_DEBUG) + { + f->master_session_index = ~0; + f->master_thread_index = ~0; + } + + fsh->n_active_fifos--; + ssvm_pop_heap (oldheap); + ssvm_unlock_non_recursive (sh); +} + +void +svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + + sm->next_baseva = baseva; + sm->timeout_in_seconds = timeout_in_seconds; +} + +u32 +svm_fifo_segment_index (svm_fifo_segment_private_t * s) +{ + return s - svm_fifo_segment_main.segments; +} + +/** + * Retrieve svm segments pool. Used only for debug purposes. + */ +svm_fifo_segment_private_t * +svm_fifo_segment_segments_pool (void) +{ + svm_fifo_segment_main_t *sm = &svm_fifo_segment_main; + return sm->segments; +} + +/** + * Get number of active fifos + */ +u32 +svm_fifo_segment_num_fifos (svm_fifo_segment_private_t * fifo_segment) +{ + return fifo_segment->h->n_active_fifos; +} + +u32 +svm_fifo_segment_num_free_fifos (svm_fifo_segment_private_t * fifo_segment, + u32 fifo_size_in_bytes) +{ + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + svm_fifo_t *f; + int i; + u32 count = 0, rounded_data_size, freelist_index; + + sh = fifo_segment->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + /* Count all free fifos? */ + if (fifo_size_in_bytes == ~0) + { + for (i = 0; i < vec_len (fsh->free_fifos); i++) + { + f = fsh->free_fifos[i]; + if (f == 0) + continue; + + while (f) + { + f = f->next; + count++; + } + } + return count; + } + + rounded_data_size = (1 << (max_log2 (fifo_size_in_bytes))); + freelist_index = max_log2 (rounded_data_size) + - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE); + + if (freelist_index > vec_len (fsh->free_fifos)) + return 0; + + f = fsh->free_fifos[freelist_index]; + if (f == 0) + return 0; + + while (f) + { + f = f->next; + count++; + } + return count; +} + +/** + * Segment format function + */ +u8 * +format_svm_fifo_segment (u8 * s, va_list * args) +{ + svm_fifo_segment_private_t *sp + = va_arg (*args, svm_fifo_segment_private_t *); + int verbose = va_arg (*args, int); + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + svm_fifo_t *f; + int i; + u32 count; + uword indent = format_get_indent (s) + 2; + + sh = sp->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + s = format (s, "%USegment Heap: %U\n", format_white_space, indent, + format_mheap, sh->heap, verbose); + s = format (s, "%U segment has %u active fifos\n", + format_white_space, indent, svm_fifo_segment_num_fifos (sp)); + + for (i = 0; i < vec_len (fsh->free_fifos); i++) + { + f = fsh->free_fifos[i]; + if (f == 0) + continue; + count = 0; + while (f) + { + f = f->next; + count++; + } + + s = format (s, "%U%-5u Kb: %u free", + format_white_space, indent + 2, + 1 << (i + max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE) - 10), + count); + } + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_fifo_segment.h b/src/svm/svm_fifo_segment.h new file mode 100644 index 00000000..5b771328 --- /dev/null +++ b/src/svm/svm_fifo_segment.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ssvm_fifo_segment_h__ +#define __included_ssvm_fifo_segment_h__ + +#include <svm/svm_fifo.h> +#include <svm/ssvm.h> +#include <vppinfra/lock.h> + +typedef enum +{ + FIFO_SEGMENT_FREELIST_NONE = -1, + FIFO_SEGMENT_RX_FREELIST = 0, + FIFO_SEGMENT_TX_FREELIST, + FIFO_SEGMENT_N_FREELISTS +} svm_fifo_segment_freelist_t; + +#define FIFO_SEGMENT_MIN_FIFO_SIZE 4096 +#define FIFO_SEGMENT_MAX_FIFO_SIZE (8<<20) /* 8mb max fifo size */ +#define FIFO_SEGMENT_ALLOC_CHUNK_SIZE 32 /* Allocation quantum */ + +#define FIFO_SEGMENT_F_IS_PRIVATE 1 << 0 /* Private segment */ +#define FIFO_SEGMENT_F_IS_MAIN_HEAP 1 << 1 /* Segment is main heap */ +#define FIFO_SEGMENT_F_IS_PREALLOCATED 1 << 2 /* Segment is preallocated */ + +typedef struct +{ + svm_fifo_t *fifos; /**< Linked list of active RX fifos */ + u8 *segment_name; /**< Segment name */ + svm_fifo_t **free_fifos; /**< Freelists, by fifo size */ + u32 n_active_fifos; /**< Number of active fifos */ + u8 flags; /**< Segment flags */ +} svm_fifo_segment_header_t; + +typedef struct +{ + ssvm_private_t ssvm; + svm_fifo_segment_header_t *h; +} svm_fifo_segment_private_t; + +typedef struct +{ + volatile u32 lock; + + /** pool of segments */ + svm_fifo_segment_private_t *segments; + /* Where to put the next one */ + u64 next_baseva; + u32 timeout_in_seconds; +} svm_fifo_segment_main_t; + +extern svm_fifo_segment_main_t svm_fifo_segment_main; + +typedef struct +{ + char *segment_name; + u32 segment_size; + u32 *new_segment_indices; + u32 rx_fifo_size; + u32 tx_fifo_size; + u32 preallocated_fifo_pairs; + u32 private_segment_count; + u32 private_segment_size; +} svm_fifo_segment_create_args_t; + +static inline svm_fifo_segment_private_t * +svm_fifo_segment_get_segment (u32 segment_index) +{ + svm_fifo_segment_main_t *ssm = &svm_fifo_segment_main; + return vec_elt_at_index (ssm->segments, segment_index); +} + +static inline u8 +svm_fifo_segment_has_fifos (svm_fifo_segment_private_t * fifo_segment) +{ + return fifo_segment->h->fifos != 0; +} + +static inline svm_fifo_t * +svm_fifo_segment_get_fifo_list (svm_fifo_segment_private_t * fifo_segment) +{ + return fifo_segment->h->fifos; +} + +#define foreach_ssvm_fifo_segment_api_error \ +_(OUT_OF_SPACE, "Out of space in segment", -200) + +typedef enum +{ +#define _(n,s,c) SSVM_FIFO_SEGMENT_API_ERROR_##n = c, + foreach_ssvm_fifo_segment_api_error +#undef _ +} ssvm_fifo_segment_api_error_enum_t; + +int svm_fifo_segment_create (svm_fifo_segment_create_args_t * a); +int svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t + * a); +int svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a); +void svm_fifo_segment_delete (svm_fifo_segment_private_t * s); + +svm_fifo_t *svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s, + u32 data_size_in_bytes, + svm_fifo_segment_freelist_t index); +void svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, + svm_fifo_t * f, + svm_fifo_segment_freelist_t index); +void svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds); +u32 svm_fifo_segment_index (svm_fifo_segment_private_t * s); +u32 svm_fifo_segment_num_fifos (svm_fifo_segment_private_t * fifo_segment); +u32 svm_fifo_segment_num_free_fifos (svm_fifo_segment_private_t * + fifo_segment, u32 fifo_size_in_bytes); + +svm_fifo_segment_private_t *svm_fifo_segment_segments_pool (void); +format_function_t format_svm_fifo_segment; + +#endif /* __included_ssvm_fifo_segment_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svm_test.c b/src/svm/svm_test.c new file mode 100644 index 00000000..ab0b9e24 --- /dev/null +++ b/src/svm/svm_test.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * svm_test.c -- brain police + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> + +#include "svm.h" + + +int +main (int argc, char **argv) +{ + svm_region_t *root_rp, *rp; + svm_map_region_args_t *a = 0; + + vec_validate (a, 0); + + root_rp = svm_region_init (); + + ASSERT (root_rp); + + a->name = "/qvnet"; + a->size = (4 << 10); + + rp = svm_region_find_or_create (root_rp, a); + + ASSERT (rp); + + *((u32 *) rp->data_base) = 0xdeadbeef; + svm_region_unmap (root_rp, rp); + + fformat (stdout, "exiting...\n"); + + exit (0); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svmdb.c b/src/svm/svmdb.c new file mode 100644 index 00000000..043b0924 --- /dev/null +++ b/src/svm/svmdb.c @@ -0,0 +1,676 @@ +/* + *------------------------------------------------------------------ + * svmdb.c -- simple shared memory database + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> +#include <vppinfra/serialize.h> + +#include "svmdb.h" + +static void local_set_variable_nolock (svmdb_client_t * client, + svmdb_namespace_t namespace, + u8 * var, u8 * val, u32 elsize); + +always_inline void +region_lock (svm_region_t * rp, int tag) +{ + pthread_mutex_lock (&rp->mutex); +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = getpid (); + rp->mutex_owner_tag = tag; +#endif +} + +always_inline void +region_unlock (svm_region_t * rp) +{ +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = 0; + rp->mutex_owner_tag = 0; +#endif + pthread_mutex_unlock (&rp->mutex); +} + +svmdb_client_t * +svmdb_map (svmdb_map_args_t * dba) +{ + svmdb_client_t *client = 0; + svm_map_region_args_t *a = 0; + svm_region_t *db_rp; + void *oldheap; + svmdb_shm_hdr_t *hp = 0; + + vec_validate (client, 0); + vec_validate (a, 0); + + svm_region_init_chroot_uid_gid (dba->root_path, dba->uid, dba->gid); + + a->root_path = dba->root_path; + a->name = "/db"; + a->size = dba->size ? dba->size : SVMDB_DEFAULT_SIZE; + a->flags = SVM_FLAGS_MHEAP; + a->uid = dba->uid; + a->gid = dba->gid; + + db_rp = client->db_rp = svm_region_find_or_create (a); + + ASSERT (db_rp); + + vec_free (a); + + region_lock (client->db_rp, 10); + /* Has someone else set up the shared-memory variable table? */ + if (db_rp->user_ctx) + { + client->shm = (void *) db_rp->user_ctx; + client->pid = getpid (); + region_unlock (client->db_rp); + ASSERT (client->shm->version == SVMDB_SHM_VERSION); + return (client); + } + /* Nope, it's our problem... */ + + if (CLIB_DEBUG > 2) + { + /* Add a bogus client (pid=0) so the svm won't be deallocated */ + clib_warning + ("[%d] adding fake client (pid=0) so '%s' won't be unlinked", + getpid (), db_rp->region_name); + oldheap = svm_push_pvt_heap (db_rp); + vec_add1 (client->db_rp->client_pids, 0); + svm_pop_heap (oldheap); + } + oldheap = svm_push_data_heap (db_rp); + + vec_validate (hp, 0); + hp->version = SVMDB_SHM_VERSION; + hp->namespaces[SVMDB_NAMESPACE_STRING] + = hash_create_string (0, sizeof (uword)); + hp->namespaces[SVMDB_NAMESPACE_VEC] + = hash_create_string (0, sizeof (uword)); + + db_rp->user_ctx = hp; + client->shm = hp; + + svm_pop_heap (oldheap); + region_unlock (client->db_rp); + client->pid = getpid (); + + return (client); +} + +void +svmdb_unmap (svmdb_client_t * client) +{ + ASSERT (client); + + if (!svm_get_root_rp ()) + return; + + svm_region_unmap ((void *) client->db_rp); + svm_region_exit (); + vec_free (client); +} + +static void +notify_value (svmdb_value_t * v, svmdb_action_t a) +{ + int i; + int rv; + union sigval sv; + u32 value; + u32 *dead_registrations = 0; + + svmdb_notify_t *np; + + for (i = 0; i < vec_len (v->notifications); i++) + { + np = vec_elt_at_index (v->notifications, i); + if (np->action == a) + { + value = (np->action << 28) | (np->opaque); + sv.sival_ptr = (void *) (uword) value; + do + { + rv = 0; + if (sigqueue (np->pid, np->signum, sv) == 0) + break; + rv = errno; + } + while (rv == EAGAIN); + if (rv == 0) + continue; + vec_add1 (dead_registrations, i); + } + } + + for (i = 0; i < vec_len (dead_registrations); i++) + { + np = vec_elt_at_index (v->notifications, dead_registrations[i]); + clib_warning ("dead reg pid %d sig %d action %d opaque %x", + np->pid, np->signum, np->action, np->opaque); + vec_delete (v->notifications, 1, dead_registrations[i]); + } + vec_free (dead_registrations); +} + +int +svmdb_local_add_del_notification (svmdb_client_t * client, + svmdb_notification_args_t * a) +{ + uword *h; + void *oldheap; + hash_pair_t *hp; + svmdb_shm_hdr_t *shm; + u8 *dummy_value = 0; + svmdb_value_t *value; + svmdb_notify_t *np; + int i; + int rv = 0; + + ASSERT (a->elsize); + + region_lock (client->db_rp, 18); + shm = client->shm; + oldheap = svm_push_data_heap (client->db_rp); + + h = shm->namespaces[a->nspace]; + + hp = hash_get_pair_mem (h, a->var); + if (hp == 0) + { + local_set_variable_nolock (client, a->nspace, (u8 *) a->var, + dummy_value, a->elsize); + /* might have moved */ + h = shm->namespaces[a->nspace]; + hp = hash_get_pair_mem (h, a->var); + ASSERT (hp); + } + + value = pool_elt_at_index (shm->values, hp->value[0]); + + for (i = 0; i < vec_len (value->notifications); i++) + { + np = vec_elt_at_index (value->notifications, i); + if ((np->pid == client->pid) + && (np->signum == a->signum) + && (np->action == a->action) && (np->opaque == a->opaque)) + { + if (a->add_del == 0 /* delete */ ) + { + vec_delete (value->notifications, 1, i); + goto out; + } + else + { /* add */ + clib_warning + ("%s: ignore dup reg pid %d signum %d action %d opaque %x", + a->var, client->pid, a->signum, a->action, a->opaque); + rv = -2; + goto out; + } + } + } + if (a->add_del == 0) + { + rv = -3; + goto out; + } + + vec_add2 (value->notifications, np, 1); + np->pid = client->pid; + np->signum = a->signum; + np->action = a->action; + np->opaque = a->opaque; + +out: + svm_pop_heap (oldheap); + region_unlock (client->db_rp); + return rv; +} + + +static void +local_unset_variable_nolock (svmdb_client_t * client, + svmdb_namespace_t namespace, char *var) +{ + uword *h; + svmdb_value_t *oldvalue; + hash_pair_t *hp; + + h = client->shm->namespaces[namespace]; + hp = hash_get_pair_mem (h, var); + if (hp) + { + oldvalue = pool_elt_at_index (client->shm->values, hp->value[0]); + if (vec_len (oldvalue->notifications)) + notify_value (oldvalue, SVMDB_ACTION_UNSET); + /* zero length value means unset */ + _vec_len (oldvalue->value) = 0; + } + client->shm->namespaces[namespace] = h; +} + +void +svmdb_local_unset_string_variable (svmdb_client_t * client, char *var) +{ + void *oldheap; + + region_lock (client->db_rp, 11); + oldheap = svm_push_data_heap (client->db_rp); + local_unset_variable_nolock (client, SVMDB_NAMESPACE_STRING, var); + svm_pop_heap (oldheap); + region_unlock (client->db_rp); +} + +static void +local_set_variable_nolock (svmdb_client_t * client, + svmdb_namespace_t namespace, + u8 * var, u8 * val, u32 elsize) +{ + uword *h; + hash_pair_t *hp; + u8 *name; + svmdb_shm_hdr_t *shm; + + shm = client->shm; + h = shm->namespaces[namespace]; + hp = hash_get_pair_mem (h, var); + if (hp) + { + svmdb_value_t *oldvalue; + oldvalue = pool_elt_at_index (client->shm->values, hp->value[0]); + vec_alloc (oldvalue->value, vec_len (val) * elsize); + clib_memcpy (oldvalue->value, val, vec_len (val) * elsize); + _vec_len (oldvalue->value) = vec_len (val); + notify_value (oldvalue, SVMDB_ACTION_SET); + } + else + { + svmdb_value_t *newvalue; + pool_get (shm->values, newvalue); + memset (newvalue, 0, sizeof (*newvalue)); + newvalue->elsize = elsize; + vec_alloc (newvalue->value, vec_len (val) * elsize); + clib_memcpy (newvalue->value, val, vec_len (val) * elsize); + _vec_len (newvalue->value) = vec_len (val); + name = format (0, "%s%c", var, 0); + hash_set_mem (h, name, newvalue - shm->values); + } + shm->namespaces[namespace] = h; +} + +void +svmdb_local_set_string_variable (svmdb_client_t * client, + char *var, char *val) +{ + void *oldheap; + + region_lock (client->db_rp, 12); + oldheap = svm_push_data_heap (client->db_rp); + + local_unset_variable_nolock (client, SVMDB_NAMESPACE_STRING, var); + + local_set_variable_nolock (client, SVMDB_NAMESPACE_STRING, + (u8 *) var, (u8 *) val, 1 /* elsize */ ); + svm_pop_heap (oldheap); + region_unlock (client->db_rp); +} + +static u8 * +local_get_variable_nolock (svmdb_client_t * client, + svmdb_namespace_t namespace, u8 * var) +{ + uword *h; + uword *p; + svmdb_shm_hdr_t *shm; + svmdb_value_t *oldvalue; + + shm = client->shm; + h = shm->namespaces[namespace]; + p = hash_get_mem (h, var); + if (p) + { + oldvalue = pool_elt_at_index (shm->values, p[0]); + notify_value (oldvalue, SVMDB_ACTION_GET); + return (oldvalue->value); + } + return 0; +} + +void * +svmdb_local_get_variable_reference (svmdb_client_t * client, + svmdb_namespace_t namespace, char *var) +{ + u8 *rv; + + region_lock (client->db_rp, 19); + rv = local_get_variable_nolock (client, namespace, (u8 *) var); + region_unlock (client->db_rp); + return (void *) rv; +} + +char * +svmdb_local_get_string_variable (svmdb_client_t * client, char *var) +{ + u8 *rv = 0; + + region_lock (client->db_rp, 13); + rv = local_get_variable_nolock (client, SVMDB_NAMESPACE_STRING, (u8 *) var); + + if (rv && vec_len (rv)) + { + rv = format (0, "%s", rv); + vec_add1 (rv, 0); + } + region_unlock (client->db_rp); + return ((char *) rv); +} + +void +svmdb_local_dump_strings (svmdb_client_t * client) +{ + uword *h; + u8 *key; + u32 value; + svmdb_shm_hdr_t *shm = client->shm; + + region_lock (client->db_rp, 14); + + h = client->shm->namespaces[SVMDB_NAMESPACE_STRING]; + + /* *INDENT-OFF* */ + hash_foreach_mem(key, value, h, + ({ + svmdb_value_t *v = pool_elt_at_index (shm->values, value); + + fformat(stdout, "%s: %s\n", key, + vec_len(v->value) ? v->value : (u8 *)"(nil)"); + })); + /* *INDENT-ON* */ + region_unlock (client->db_rp); +} + +int +svmdb_local_serialize_strings (svmdb_client_t * client, char *filename) +{ + uword *h; + u8 *key; + u32 value; + svmdb_shm_hdr_t *shm = client->shm; + serialize_main_t _sm, *sm = &_sm; + clib_error_t *error = 0; + u8 *sanitized_name = 0; + int fd = 0; + + if (strstr (filename, "..") || index (filename, '/')) + { + error = clib_error_return (0, "Illegal characters in filename '%s'", + filename); + goto out; + } + + sanitized_name = format (0, "/tmp/%s%c", filename, 0); + + fd = creat ((char *) sanitized_name, 0644); + + if (fd < 0) + { + error = clib_error_return_unix (0, "Create '%s'", sanitized_name); + goto out; + } + + serialize_open_unix_file_descriptor (sm, fd); + + region_lock (client->db_rp, 20); + + h = client->shm->namespaces[SVMDB_NAMESPACE_STRING]; + + serialize_likely_small_unsigned_integer (sm, hash_elts (h)); + + /* *INDENT-OFF* */ + hash_foreach_mem(key, value, h, + ({ + svmdb_value_t *v = pool_elt_at_index (shm->values, value); + + /* Omit names with nil values */ + if (vec_len(v->value)) + { + serialize_cstring (sm, (char *)key); + serialize_cstring (sm, (char *)v->value); + } + })); + /* *INDENT-ON* */ + region_unlock (client->db_rp); + + serialize_close (sm); + +out: + if (fd > 0 && close (fd) < 0) + error = clib_error_return_unix (0, "close fd %d", fd); + + if (error) + { + clib_error_report (error); + return -1; + } + return 0; +} + +int +svmdb_local_unserialize_strings (svmdb_client_t * client, char *filename) +{ + serialize_main_t _sm, *sm = &_sm; + void *oldheap; + clib_error_t *error = 0; + u8 *key, *value; + int fd = 0; + u32 nelts; + int i; + + fd = open (filename, O_RDONLY); + + if (fd < 0) + { + error = clib_error_return_unix (0, "Failed to open '%s'", filename); + goto out; + } + + unserialize_open_unix_file_descriptor (sm, fd); + + region_lock (client->db_rp, 21); + oldheap = svm_push_data_heap (client->db_rp); + + nelts = unserialize_likely_small_unsigned_integer (sm); + + for (i = 0; i < nelts; i++) + { + unserialize_cstring (sm, (char **) &key); + unserialize_cstring (sm, (char **) &value); + local_set_variable_nolock (client, SVMDB_NAMESPACE_STRING, + key, value, 1 /* elsize */ ); + vec_free (key); + vec_free (value); + } + svm_pop_heap (oldheap); + region_unlock (client->db_rp); + + serialize_close (sm); + +out: + if (fd > 0 && close (fd) < 0) + error = clib_error_return_unix (0, "close fd %d", fd); + + if (error) + { + clib_error_report (error); + return -1; + } + return 0; +} + +void +svmdb_local_unset_vec_variable (svmdb_client_t * client, char *var) +{ + void *oldheap; + + region_lock (client->db_rp, 15); + oldheap = svm_push_data_heap (client->db_rp); + local_unset_variable_nolock (client, SVMDB_NAMESPACE_VEC, var); + svm_pop_heap (oldheap); + region_unlock (client->db_rp); +} + +void +svmdb_local_set_vec_variable (svmdb_client_t * client, + char *var, void *val_arg, u32 elsize) +{ + u8 *val = (u8 *) val_arg; + void *oldheap; + + region_lock (client->db_rp, 16); + oldheap = svm_push_data_heap (client->db_rp); + + local_unset_variable_nolock (client, SVMDB_NAMESPACE_VEC, var); + local_set_variable_nolock (client, SVMDB_NAMESPACE_VEC, (u8 *) var, + val, elsize); + + svm_pop_heap (oldheap); + region_unlock (client->db_rp); +} + +void * +svmdb_local_get_vec_variable (svmdb_client_t * client, char *var, u32 elsize) +{ + u8 *rv = 0; + u8 *copy = 0; + + region_lock (client->db_rp, 17); + + rv = local_get_variable_nolock (client, SVMDB_NAMESPACE_VEC, (u8 *) var); + + if (rv && vec_len (rv)) + { + /* Make a copy in process-local memory */ + vec_alloc (copy, vec_len (rv) * elsize); + clib_memcpy (copy, rv, vec_len (rv) * elsize); + _vec_len (copy) = vec_len (rv); + region_unlock (client->db_rp); + return (copy); + } + region_unlock (client->db_rp); + return (0); +} + +void +svmdb_local_dump_vecs (svmdb_client_t * client) +{ + uword *h; + u8 *key; + u32 value; + svmdb_shm_hdr_t *shm; + + region_lock (client->db_rp, 17); + shm = client->shm; + + h = client->shm->namespaces[SVMDB_NAMESPACE_VEC]; + + /* *INDENT-OFF* */ + hash_foreach_mem(key, value, h, + ({ + svmdb_value_t *v = pool_elt_at_index (shm->values, value); + (void) fformat(stdout, "%s:\n %U (%.2f)\n", key, + format_hex_bytes, v->value, + vec_len(v->value)*v->elsize, ((f64 *)(v->value))[0]); + })); + /* *INDENT-ON* */ + + region_unlock (client->db_rp); +} + +void * +svmdb_local_find_or_add_vec_variable (svmdb_client_t * client, + char *var, u32 nbytes) +{ + void *oldheap; + u8 *rv = 0; + + region_lock (client->db_rp, 18); + oldheap = svm_push_data_heap (client->db_rp); + + rv = local_get_variable_nolock (client, SVMDB_NAMESPACE_VEC, (u8 *) var); + + if (rv) + { + goto out; + } + else + { + uword *h; + u8 *name; + svmdb_shm_hdr_t *shm; + svmdb_value_t *newvalue; + + shm = client->shm; + h = shm->namespaces[SVMDB_NAMESPACE_VEC]; + + pool_get (shm->values, newvalue); + memset (newvalue, 0, sizeof (*newvalue)); + newvalue->elsize = 1; + vec_alloc (newvalue->value, nbytes); + _vec_len (newvalue->value) = nbytes; + name = format (0, "%s%c", var, 0); + hash_set_mem (h, name, newvalue - shm->values); + shm->namespaces[SVMDB_NAMESPACE_VEC] = h; + rv = newvalue->value; + } + +out: + svm_pop_heap (oldheap); + region_unlock (client->db_rp); + return (rv); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svmdb.h b/src/svm/svmdb.h new file mode 100644 index 00000000..e35be8aa --- /dev/null +++ b/src/svm/svmdb.h @@ -0,0 +1,135 @@ +/* + *------------------------------------------------------------------ + * svmdb.h - shared VM database + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __included_svmdb_h__ +#define __included_svmdb_h__ + +#include "svm.h" + +typedef enum +{ + SVMDB_ACTION_ILLEGAL = 0, + SVMDB_ACTION_GET, /* not clear why anyone would care */ + SVMDB_ACTION_SET, + SVMDB_ACTION_UNSET, +} svmdb_action_t; + +typedef struct +{ + int pid; + int signum; + u32 action:4; + u32 opaque:28; +} svmdb_notify_t; + +typedef struct +{ + u8 *value; + svmdb_notify_t *notifications; + u32 elsize; +} svmdb_value_t; + +typedef enum +{ + SVMDB_NAMESPACE_STRING = 0, + SVMDB_NAMESPACE_VEC, + SVMDB_N_NAMESPACES, +} svmdb_namespace_t; + +typedef struct +{ + uword version; + /* pool of values */ + svmdb_value_t *values; + uword *namespaces[SVMDB_N_NAMESPACES]; +} svmdb_shm_hdr_t; + +#define SVMDB_SHM_VERSION 2 + +typedef struct +{ + int flags; + int pid; + svm_region_t *db_rp; + svmdb_shm_hdr_t *shm; +} svmdb_client_t; + +typedef struct +{ + int add_del; + svmdb_namespace_t nspace; + char *var; + u32 elsize; + int signum; + u32 action:4; + u32 opaque:28; +} svmdb_notification_args_t; + +typedef struct +{ + const char *root_path; + uword size; + u32 uid; + u32 gid; +} svmdb_map_args_t; + +/* + * Must be a reasonable number, several mb smaller than + * SVM_GLOBAL_REGION_SIZE, or no donut for you... + */ +#define SVMDB_DEFAULT_SIZE (4<<20) + +svmdb_client_t *svmdb_map (svmdb_map_args_t *); + +void svmdb_unmap (svmdb_client_t * client); +void svmdb_local_unset_string_variable (svmdb_client_t * client, char *var); +void svmdb_local_set_string_variable (svmdb_client_t * client, + char *var, char *val); +char *svmdb_local_get_string_variable (svmdb_client_t * client, char *var); +void *svmdb_local_get_variable_reference (svmdb_client_t * client, + svmdb_namespace_t ns, char *var); + +void svmdb_local_dump_strings (svmdb_client_t * client); + +void svmdb_local_unset_vec_variable (svmdb_client_t * client, char *var); +void svmdb_local_set_vec_variable (svmdb_client_t * client, + char *var, void *val, u32 elsize); +void *svmdb_local_get_vec_variable (svmdb_client_t * client, char *var, + u32 elsize); +void svmdb_local_dump_vecs (svmdb_client_t * client); + +int svmdb_local_add_del_notification (svmdb_client_t * client, + svmdb_notification_args_t * args); + +void *svmdb_local_find_or_add_vec_variable (svmdb_client_t * client, + char *var, u32 nbytes); + +int svmdb_local_serialize_strings (svmdb_client_t * client, char *filename); +int svmdb_local_unserialize_strings (svmdb_client_t * client, char *filename); + + +#endif /* __included_svmdb_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svmdbtool.c b/src/svm/svmdbtool.c new file mode 100644 index 00000000..a0af15fc --- /dev/null +++ b/src/svm/svmdbtool.c @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <pwd.h> +#include <grp.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> +#include <vppinfra/serialize.h> +#include "svmdb.h" + +typedef struct +{ + svmdb_map_args_t map_args; + int uid, gid; + uword size; +} svmdbtool_main_t; + +svmdbtool_main_t svmdbtool_main; + +static inline svmdb_map_args_t * +map_arg_setup (char *chroot_path) +{ + svmdbtool_main_t *sm = &svmdbtool_main; + svmdb_map_args_t *ma = &sm->map_args; + + memset (ma, 0, sizeof (*ma)); + ma->root_path = chroot_path; + ma->size = sm->size; + ma->uid = sm->uid; + ma->gid = sm->gid; + return ma; +} + +static void +get_string (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + char *rv; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + rv = svmdb_local_get_string_variable (c, (char *) vbl); + + fformat (stdout, "%s\n", rv ? rv : "UNSET"); + vec_free (rv); + svmdb_unmap (c); +} + +static void +set_string (char *chroot_path, u8 * vbl, u8 * value) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + svmdb_local_set_string_variable (c, (char *) vbl, (char *) value); + svmdb_unmap (c); +} + +static void +unset_string (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + svmdb_local_unset_string_variable (c, (char *) vbl); + svmdb_unmap (c); +} + +static void +dump_strings (char *chroot_path) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + svmdb_local_dump_strings (c); + svmdb_unmap (c); +} + +static void +serialize_strings (char *chroot_path, char *filename) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + (void) svmdb_local_serialize_strings (c, filename); + svmdb_unmap (c); +} + +static void +unserialize_strings (char *chroot_path, char *filename) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + (void) svmdb_local_unserialize_strings (c, filename); + svmdb_unmap (c); +} + +static void +test_vlib_vec_rate (char *chroot_path, f64 vr) +{ + svmdb_client_t *c; + f64 *tv = 0; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + vec_add1 (tv, vr); + + svmdb_local_set_vec_variable (c, "vlib_vector_rate", (char *) tv, + sizeof (*tv)); + svmdb_unmap (c); + + vec_free (tv); +} + + + +static void +test_vec (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + u64 *tv = 0; + int i; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + /* my amp goes to 11 */ + for (i = 0; i < 11; i++) + { + vec_add1 (tv, i); + } + + svmdb_local_set_vec_variable (c, (char *) vbl, (char *) tv, sizeof (tv[0])); + svmdb_unmap (c); + + vec_free (tv); +} + +static void +fake_install (char *chroot_path, u8 * add_value) +{ + svmdb_client_t *c; + u8 *v = 0; + u8 **values = 0; + u8 *oldvalue; + u8 *value; + int nitems = 0, i; + serialize_main_t m; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + oldvalue = svmdb_local_get_vec_variable (c, "installed_sw", 1); + if (oldvalue) + { + unserialize_open_data (&m, oldvalue, vec_len (oldvalue)); + nitems = unserialize_likely_small_unsigned_integer (&m); + for (i = 0; i < nitems; i++) + { + unserialize_cstring (&m, (char **) &value); + vec_add1 (values, value); + } + vec_free (v); + } + nitems++; + value = format (0, "%s%c", add_value, 0); + + vec_add1 (values, value); + + fformat (stdout, "Resulting installed_sw vector:\n"); + + serialize_open_vector (&m, v); + serialize_likely_small_unsigned_integer (&m, vec_len (values)); + for (i = 0; i < vec_len (values); i++) + { + fformat (stdout, "%s\n", values[i]); + serialize_cstring (&m, (char *) values[i]); + } + + v = serialize_close_vector (&m); + + svmdb_local_set_vec_variable (c, "installed_sw", v, sizeof (v[0])); + svmdb_unmap (c); + + for (i = 0; i < vec_len (values); i++) + vec_free (values[i]); + vec_free (values); +} + +static void +sigaction_handler (int signum, siginfo_t * i, void *notused) +{ + u32 action, opaque; + + action = (u32) (uword) i->si_ptr; + action >>= 28; + opaque = (u32) (uword) i->si_ptr; + opaque &= ~(0xF0000000); + + clib_warning ("signal %d, action %d, opaque %x", signum, action, opaque); +} + +static void +test_reg (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + svmdb_notification_args_t args; + svmdb_notification_args_t *a = &args; + struct sigaction sa; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + memset (&sa, 0, sizeof (sa)); + sa.sa_sigaction = sigaction_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction (SIGUSR2, &sa, 0) < 0) + { + clib_unix_warning ("sigaction"); + return; + } + + memset (a, 0, sizeof (*a)); + + c = svmdb_map (ma); + + a->add_del = 1 /* add */ ; + a->nspace = SVMDB_NAMESPACE_STRING; + a->var = (char *) vbl; + a->elsize = 1; + a->signum = SIGUSR2; + a->action = SVMDB_ACTION_GET; + a->opaque = 0x0eadbeef; + + svmdb_local_add_del_notification (c, a); + + (void) svmdb_local_get_string_variable (c, (char *) vbl); + + a->add_del = 0; /* del */ + svmdb_local_add_del_notification (c, a); + + + + svmdb_unmap (c); +} + +static void +unset_vec (char *chroot_path, u8 * vbl) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + svmdb_local_unset_vec_variable (c, (char *) vbl); + svmdb_unmap (c); +} + +static void +dump_vecs (char *chroot_path) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + svmdb_local_dump_vecs (c); + svmdb_unmap (c); +} + +static void +crash_test (char *chroot_path) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + clib_warning ("Grab region mutex and crash deliberately!"); + c->db_rp->mutex_owner_pid = getpid (); + c->db_rp->mutex_owner_tag = -13; + pthread_mutex_lock (&c->db_rp->mutex); + + abort (); +} + +static void +map_with_size (char *chroot_path, uword size) +{ + svmdb_client_t *c; + svmdb_map_args_t *ma; + + svmdbtool_main.size = size; + ma = map_arg_setup (chroot_path); + + c = svmdb_map (ma); + + svmdb_unmap (c); +} + +int +main (int argc, char **argv) +{ + unformat_input_t input; + int parsed = 0; + u8 *vbl = 0, *value = 0; + char *chroot_path = 0; + u8 *chroot_path_u8; + u8 *filename; + uword size; + f64 vr; + int uid, gid, rv; + struct passwd _pw, *pw; + struct group _grp, *grp; + char *s, buf[128]; + + svmdbtool_main.uid = geteuid (); + svmdbtool_main.gid = getegid (); + + unformat_init_command_line (&input, argv); + + while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&input, "get-string %s", &vbl)) + { + get_string (chroot_path, vbl); + vec_free (vbl); + parsed++; + } + else if (unformat (&input, "set-string %s %s", &vbl, &value)) + { + set_string (chroot_path, vbl, value); + vec_free (vbl); + vec_free (value); + parsed++; + } + else if (unformat (&input, "unset-string %s", &vbl)) + { + unset_string (chroot_path, vbl); + vec_free (vbl); + parsed++; + } + else if (unformat (&input, "dump-strings")) + { + dump_strings (chroot_path); + parsed++; + } + else if (unformat (&input, "unset-vec %s", &vbl)) + { + unset_vec (chroot_path, vbl); + vec_free (vbl); + parsed++; + } + else if (unformat (&input, "dump-vecs")) + { + dump_vecs (chroot_path); + parsed++; + } + else if (unformat (&input, "test-vec %s", &vbl)) + { + test_vec (chroot_path, vbl); + // vec_free(vbl); + parsed++; + } + else if (unformat (&input, "vlib-vec-rate %f", &vr)) + { + test_vlib_vec_rate (chroot_path, vr); + parsed++; + } + else if (unformat (&input, "test-reg %s", &vbl)) + { + test_reg (chroot_path, vbl); + parsed++; + } + else if (unformat (&input, "crash-test")) + { + crash_test (chroot_path); + } + else if (unformat (&input, "chroot %s", &chroot_path_u8)) + { + chroot_path = (char *) chroot_path_u8; + } + else if (unformat (&input, "fake-install %s", &value)) + { + fake_install (chroot_path, value); + parsed++; + } + else if (unformat (&input, "size %d", &size)) + { + map_with_size (chroot_path, size); + parsed++; + } + else if (unformat (&input, "uid %d", &uid)) + svmdbtool_main.uid = uid; + else if (unformat (&input, "gid %d", &gid)) + svmdbtool_main.gid = gid; + else if (unformat (&input, "uid %s", &s)) + { + /* lookup the username */ + pw = NULL; + rv = getpwnam_r (s, &_pw, buf, sizeof (buf), &pw); + if (rv < 0) + { + fformat (stderr, "cannot fetch username %s", s); + exit (1); + } + if (pw == NULL) + { + fformat (stderr, "username %s does not exist", s); + exit (1); + } + vec_free (s); + svmdbtool_main.uid = pw->pw_uid; + } + else if (unformat (&input, "gid %s", &s)) + { + /* lookup the group name */ + grp = NULL; + rv = getgrnam_r (s, &_grp, buf, sizeof (buf), &grp); + if (rv != 0) + { + fformat (stderr, "cannot fetch group %s", s); + exit (1); + } + if (grp == NULL) + { + fformat (stderr, "group %s does not exist", s); + exit (1); + } + vec_free (s); + svmdbtool_main.gid = grp->gr_gid; + } + else if (unformat (&input, "serialize-strings %s", &filename)) + { + vec_add1 (filename, 0); + serialize_strings (chroot_path, (char *) filename); + parsed++; + } + else if (unformat (&input, "unserialize-strings %s", &filename)) + { + vec_add1 (filename, 0); + unserialize_strings (chroot_path, (char *) filename); + parsed++; + } + else + { + break; + } + } + + unformat_free (&input); + + if (!parsed) + { + fformat (stdout, "%s: get-string <name> | set-string <name> <value>\n", + argv[0]); + fformat (stdout, " unset-string <name> | dump-strings\n"); + fformat (stdout, " test-vec <name> |\n"); + fformat (stdout, " unset-vec <name> | dump-vecs\n"); + fformat (stdout, " chroot <prefix> [uid <nnn-or-userid>]\n"); + fformat (stdout, " [gid <nnn-or-group-name>]\n"); + } + + exit (0); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/svmtool.c b/src/svm/svmtool.c new file mode 100644 index 00000000..01ae4221 --- /dev/null +++ b/src/svm/svmtool.c @@ -0,0 +1,528 @@ +/* + *------------------------------------------------------------------ + * svmtool.c + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> + +#include "svm.h" + + + +/* + * format_all_svm_regions + * Maps / unmaps regions. Do NOT call from client code! + */ +u8 * +format_all_svm_regions (u8 * s, va_list * args) +{ + int verbose = va_arg (*args, int); + svm_region_t *root_rp = svm_get_root_rp (); + svm_main_region_t *mp; + svm_subregion_t *subp; + svm_region_t *rp; + svm_map_region_args_t *a = 0; + u8 **svm_names = 0; + u8 *name = 0; + int i; + + ASSERT (root_rp); + + pthread_mutex_lock (&root_rp->mutex); + + s = format (s, "%U", format_svm_region, root_rp, verbose); + + mp = root_rp->data_base; + + /* + * Snapshoot names, can't hold root rp mutex across + * find_or_create. + */ + /* *INDENT-OFF* */ + pool_foreach (subp, mp->subregions, ({ + name = vec_dup (subp->subregion_name); + vec_add1(svm_names, name); + })); + /* *INDENT-ON* */ + + pthread_mutex_unlock (&root_rp->mutex); + + for (i = 0; i < vec_len (svm_names); i++) + { + vec_validate (a, 0); + a->name = (char *) svm_names[i]; + rp = svm_region_find_or_create (a); + if (rp) + { + pthread_mutex_lock (&rp->mutex); + s = format (s, "%U", format_svm_region, rp, verbose); + pthread_mutex_unlock (&rp->mutex); + svm_region_unmap (rp); + vec_free (svm_names[i]); + } + vec_free (a); + } + vec_free (svm_names); + return (s); +} + +void +show (char *chroot_path, int verbose) +{ + svm_map_region_args_t *a = 0; + + vec_validate (a, 0); + + svm_region_init_chroot (chroot_path); + + fformat (stdout, "My pid is %d\n", getpid ()); + + fformat (stdout, "%U", format_all_svm_regions, verbose); + + svm_region_exit (); + + vec_free (a); +} + + +static void * +svm_map_region_nolock (svm_map_region_args_t * a) +{ + int svm_fd; + svm_region_t *rp; + int deadman = 0; + u8 *shm_name; + + ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size); + + shm_name = shm_name_from_svm_map_region_args (a); + + svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777); + + if (svm_fd < 0) + { + perror ("svm_region_map(mmap open)"); + return (0); + } + vec_free (shm_name); + + rp = mmap (0, MMAP_PAGESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0); + + if (rp == (svm_region_t *) MAP_FAILED) + { + close (svm_fd); + clib_warning ("mmap"); + return (0); + } + /* + * We lost the footrace to create this region; make sure + * the winner has crossed the finish line. + */ + while (rp->version == 0 && deadman++ < 5) + { + sleep (1); + } + + /* + * <bleep>-ed? + */ + if (rp->version == 0) + { + clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION); + munmap (rp, MMAP_PAGESIZE); + return (0); + } + /* Remap now that the region has been placed */ + a->baseva = rp->virtual_base; + a->size = rp->virtual_size; + munmap (rp, MMAP_PAGESIZE); + + rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, svm_fd, 0); + if ((uword) rp == (uword) MAP_FAILED) + { + clib_unix_warning ("mmap"); + return (0); + } + + if ((uword) rp != rp->virtual_base) + { + clib_warning ("mmap botch"); + } + + if (pthread_mutex_trylock (&rp->mutex)) + { + clib_warning ("rp->mutex LOCKED by pid %d, tag %d, cleared...", + rp->mutex_owner_pid, rp->mutex_owner_tag); + memset (&rp->mutex, 0, sizeof (rp->mutex)); + + } + else + { + clib_warning ("mutex OK...\n"); + pthread_mutex_unlock (&rp->mutex); + } + + return ((void *) rp); +} + +/* + * rnd_pagesize + * Round to a pagesize multiple, presumably 4k works + */ +static u64 +rnd_pagesize (u64 size) +{ + u64 rv; + + rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1); + return (rv); +} + +#define MUTEX_DEBUG + +always_inline void +region_lock (svm_region_t * rp, int tag) +{ + pthread_mutex_lock (&rp->mutex); +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = getpid (); + rp->mutex_owner_tag = tag; +#endif +} + +always_inline void +region_unlock (svm_region_t * rp) +{ +#ifdef MUTEX_DEBUG + rp->mutex_owner_pid = 0; + rp->mutex_owner_tag = 0; +#endif + pthread_mutex_unlock (&rp->mutex); +} + + +static void * +svm_existing_region_map_nolock (void *root_arg, svm_map_region_args_t * a) +{ + svm_region_t *root_rp = root_arg; + svm_main_region_t *mp; + svm_region_t *rp; + void *oldheap; + uword *p; + + a->size += MMAP_PAGESIZE + + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE); + a->size = rnd_pagesize (a->size); + + region_lock (root_rp, 4); + oldheap = svm_push_pvt_heap (root_rp); + mp = root_rp->data_base; + + ASSERT (mp); + + p = hash_get_mem (mp->name_hash, a->name); + + if (p) + { + rp = svm_map_region_nolock (a); + region_unlock (root_rp); + svm_pop_heap (oldheap); + return rp; + } + return 0; + +} + +static void +trace (char *chroot_path, char *name, int enable_disable) +{ + svm_map_region_args_t *a = 0; + svm_region_t *db_rp; + void *oldheap; + + vec_validate (a, 0); + + svm_region_init_chroot (chroot_path); + + a->name = name; + a->size = 1 << 20; + a->flags = SVM_FLAGS_MHEAP; + + db_rp = svm_region_find_or_create (a); + + ASSERT (db_rp); + + region_lock (db_rp, 20); + + oldheap = svm_push_data_heap (db_rp); + + mheap_trace (db_rp->data_heap, enable_disable); + + svm_pop_heap (oldheap); + region_unlock (db_rp); + + svm_region_unmap ((void *) db_rp); + svm_region_exit (); + vec_free (a); +} + + + +static void +subregion_repair (char *chroot_path) +{ + int i; + svm_main_region_t *mp; + svm_map_region_args_t a; + svm_region_t *root_rp; + svm_region_t *rp; + svm_subregion_t *subp; + u8 *name = 0; + u8 **svm_names = 0; + + svm_region_init_chroot (chroot_path); + root_rp = svm_get_root_rp (); + + pthread_mutex_lock (&root_rp->mutex); + + mp = root_rp->data_base; + + /* + * Snapshoot names, can't hold root rp mutex across + * find_or_create. + */ + /* *INDENT-OFF* */ + pool_foreach (subp, mp->subregions, ({ + name = vec_dup (subp->subregion_name); + vec_add1(svm_names, name); + })); + /* *INDENT-ON* */ + + pthread_mutex_unlock (&root_rp->mutex); + + for (i = 0; i < vec_len (svm_names); i++) + { + memset (&a, 0, sizeof (a)); + a.root_path = chroot_path; + a.name = (char *) svm_names[i]; + fformat (stdout, "Checking %s region...\n", a.name); + rp = svm_existing_region_map_nolock (root_rp, &a); + if (rp) + { + svm_region_unmap (rp); + vec_free (svm_names[i]); + } + } + vec_free (svm_names); +} + +void +repair (char *chroot_path, int crash_root_region) +{ + svm_region_t *root_rp = 0; + svm_map_region_args_t *a = 0; + void *svm_map_region (svm_map_region_args_t * a); + int svm_fd; + u8 *shm_name; + + fformat (stdout, "our pid: %d\n", getpid ()); + + vec_validate (a, 0); + + a->root_path = chroot_path; + a->name = SVM_GLOBAL_REGION_NAME; + a->baseva = SVM_GLOBAL_REGION_BASEVA; + a->size = SVM_GLOBAL_REGION_SIZE; + a->flags = SVM_FLAGS_NODATA; + + shm_name = shm_name_from_svm_map_region_args (a); + + svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777); + + if (svm_fd < 0) + { + perror ("svm_region_map(mmap open)"); + goto out; + } + + vec_free (shm_name); + + root_rp = mmap (0, MMAP_PAGESIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0); + + if (root_rp == (svm_region_t *) MAP_FAILED) + { + close (svm_fd); + clib_warning ("mmap"); + goto out; + } + + /* Remap now that the region has been placed */ + clib_warning ("remap to 0x%x", root_rp->virtual_base); + + a->baseva = root_rp->virtual_base; + a->size = root_rp->virtual_size; + munmap (root_rp, MMAP_PAGESIZE); + + root_rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, svm_fd, 0); + if ((uword) root_rp == (uword) MAP_FAILED) + { + clib_unix_warning ("mmap"); + goto out; + } + + close (svm_fd); + + if ((uword) root_rp != root_rp->virtual_base) + { + clib_warning ("mmap botch"); + goto out; + } + + if (pthread_mutex_trylock (&root_rp->mutex)) + { + clib_warning ("root_rp->mutex LOCKED by pid %d, tag %d, cleared...", + root_rp->mutex_owner_pid, root_rp->mutex_owner_tag); + memset (&root_rp->mutex, 0, sizeof (root_rp->mutex)); + goto out; + } + else + { + clib_warning ("root_rp->mutex OK...\n"); + pthread_mutex_unlock (&root_rp->mutex); + } + +out: + vec_free (a); + /* + * Now that the root region is known to be OK, + * fix broken subregions + */ + subregion_repair (chroot_path); + + if (crash_root_region) + { + clib_warning ("Leaving root region locked on purpose..."); + pthread_mutex_lock (&root_rp->mutex); + root_rp->mutex_owner_pid = getpid (); + root_rp->mutex_owner_tag = 99; + } + svm_region_exit (); +} + +int +main (int argc, char **argv) +{ + unformat_input_t input; + int parsed = 0; + char *name; + char *chroot_path = 0; + u8 *chroot_u8; + + unformat_init_command_line (&input, argv); + + while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&input, "show-verbose")) + { + show (chroot_path, 1); + parsed++; + } + else if (unformat (&input, "show")) + { + show (chroot_path, 0); + parsed++; + } + else if (unformat (&input, "client-scan")) + { + svm_client_scan (chroot_path); + parsed++; + } + else if (unformat (&input, "repair")) + { + repair (chroot_path, 0 /* fix it */ ); + parsed++; + } + else if (unformat (&input, "crash")) + { + repair (chroot_path, 1 /* crash it */ ); + parsed++; + } + else if (unformat (&input, "trace-on %s", &name)) + { + trace (chroot_path, name, 1); + parsed++; + } + else if (unformat (&input, "trace-off %s", &name)) + { + trace (chroot_path, name, 0); + parsed++; + } + else if (unformat (&input, "chroot %s", &chroot_u8)) + { + chroot_path = (char *) chroot_u8; + } + else + { + break; + } + } + + unformat_free (&input); + + if (!parsed) + { + fformat (stdout, + "%s: show | show-verbose | client-scan | trace-on <region-name>\n", + argv[0]); + fformat (stdout, " trace-off <region-name>\n"); + } + exit (0); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/svm/test_svm_fifo1.c b/src/svm/test_svm_fifo1.c new file mode 100644 index 00000000..3bdca949 --- /dev/null +++ b/src/svm/test_svm_fifo1.c @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svm_fifo_segment.h" + +clib_error_t * +hello_world (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + clib_error_t *error = 0; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + a->rx_fifo_size = 4096; + a->tx_fifo_size = 4096; + a->preallocated_fifo_pairs = 4; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]); + + f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + while (svm_fifo_max_enqueue (f) >= vec_len (test_data)) + svm_fifo_enqueue_nowait (f, vec_len (test_data), test_data); + + while (svm_fifo_max_dequeue (f) >= vec_len (test_data)) + svm_fifo_dequeue_nowait (f, vec_len (retrieved_data), retrieved_data); + + while (svm_fifo_max_enqueue (f) >= vec_len (test_data)) + svm_fifo_enqueue_nowait (f, vec_len (test_data), test_data); + + while (svm_fifo_max_dequeue (f) >= vec_len (test_data)) + svm_fifo_dequeue_nowait (f, vec_len (retrieved_data), retrieved_data); + + if (!memcmp (retrieved_data, test_data, vec_len (test_data))) + error = clib_error_return (0, "data test OK, got '%s'", retrieved_data); + else + error = clib_error_return (0, "data test FAIL!"); + + svm_fifo_segment_free_fifo (sp, f, FIFO_SEGMENT_RX_FREELIST); + + return error; +} + +clib_error_t * +master (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]); + + f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + for (i = 0; i < 1000; i++) + svm_fifo_enqueue_nowait (f, vec_len (test_data), test_data); + + return clib_error_return (0, "master (enqueue) done"); +} + +clib_error_t * +mempig (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + svm_fifo_t **flist = 0; + int rv; + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]); + + for (i = 0; i < 1000; i++) + { + f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST); + if (f == 0) + break; + vec_add1 (flist, f); + } + + fformat (stdout, "Try #1: created %d fifos...\n", vec_len (flist)); + for (i = 0; i < vec_len (flist); i++) + { + f = flist[i]; + svm_fifo_segment_free_fifo (sp, f, FIFO_SEGMENT_RX_FREELIST); + } + + _vec_len (flist) = 0; + + for (i = 0; i < 1000; i++) + { + f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST); + if (f == 0) + break; + vec_add1 (flist, f); + } + + fformat (stdout, "Try #2: created %d fifos...\n", vec_len (flist)); + for (i = 0; i < vec_len (flist); i++) + { + f = flist[i]; + svm_fifo_segment_free_fifo (sp, f, FIFO_SEGMENT_RX_FREELIST); + } + + return 0; +} + +clib_error_t * +offset (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + int rv; + u32 *test_data = 0; + u32 *recovered_data = 0; + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + a->segment_size = 256 << 10; + + rv = svm_fifo_segment_create (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_create returned %d", rv); + + sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]); + + f = svm_fifo_segment_alloc_fifo (sp, 200 << 10, FIFO_SEGMENT_RX_FREELIST); + + if (f == 0) + return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed"); + + for (i = 0; i < (3 * 1024); i++) + vec_add1 (test_data, i); + + /* Enqueue the first 1024 u32's */ + svm_fifo_enqueue_nowait (f, 4096 /* bytes to enqueue */ , + (u8 *) test_data); + + /* Enqueue the third 1024 u32's 2048 ahead of the current tail */ + svm_fifo_enqueue_with_offset (f, 4096, 4096, (u8 *) & test_data[2048]); + + /* Enqueue the second 1024 u32's at the current tail */ + svm_fifo_enqueue_nowait (f, 4096 /* bytes to enqueue */ , + (u8 *) & test_data[1024]); + + vec_validate (recovered_data, (3 * 1024) - 1); + + svm_fifo_dequeue_nowait (f, 3 * 4096, (u8 *) recovered_data); + + for (i = 0; i < (3 * 1024); i++) + { + if (recovered_data[i] != test_data[i]) + { + clib_warning ("[%d] expected %d recovered %d", i, + test_data[i], recovered_data[i]); + return clib_error_return (0, "offset test FAILED"); + } + } + + return clib_error_return (0, "offset test OK"); +} + +clib_error_t * +slave (int verbose) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + svm_fifo_segment_private_t *sp; + svm_fifo_t *f; + ssvm_shared_header_t *sh; + svm_fifo_segment_header_t *fsh; + int rv; + u8 *test_data; + u8 *retrieved_data = 0; + int i; + + memset (a, 0, sizeof (*a)); + + a->segment_name = "fifo-test1"; + + rv = svm_fifo_segment_attach (a); + + if (rv) + return clib_error_return (0, "svm_fifo_segment_attach returned %d", rv); + + sp = svm_fifo_segment_get_segment (a->new_segment_indices[0]); + sh = sp->ssvm.sh; + fsh = (svm_fifo_segment_header_t *) sh->opaque[0]; + + /* might wanna wait.. */ + f = fsh->fifos; + + /* Lazy bastards united */ + test_data = format (0, "Hello world%c", 0); + vec_validate (retrieved_data, vec_len (test_data) - 1); + + for (i = 0; i < 1000; i++) + { + svm_fifo_dequeue_nowait (f, vec_len (retrieved_data), retrieved_data); + if (memcmp (retrieved_data, test_data, vec_len (retrieved_data))) + return clib_error_return (0, "retrieved data incorrect, '%s'", + retrieved_data); + } + + return clib_error_return (0, "slave (dequeue) done"); +} + + +int +test_ssvm_fifo1 (unformat_input_t * input) +{ + clib_error_t *error = 0; + int verbose = 0; + int test_id = 0; + + svm_fifo_segment_init (0x200000000ULL, 20); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose %d", &verbose)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "master")) + test_id = 1; + else if (unformat (input, "slave")) + test_id = 2; + else if (unformat (input, "mempig")) + test_id = 3; + else if (unformat (input, "offset")) + test_id = 4; + else + { + error = clib_error_create ("unknown input `%U'\n", + format_unformat_error, input); + goto out; + } + } + + switch (test_id) + { + case 0: + error = hello_world (verbose); + break; + + case 1: + error = master (verbose); + break; + + case 2: + error = slave (verbose); + break; + + case 3: + error = mempig (verbose); + break; + + case 4: + error = offset (verbose); + break; + + default: + error = clib_error_return (0, "test id %d unknown", test_id); + break; + } + +out: + if (error) + clib_error_report (error); + + return 0; +} + + + +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int r; + + unformat_init_command_line (&i, argv); + r = test_ssvm_fifo1 (&i); + unformat_free (&i); + return r; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |