diff options
author | Dave Barach <dave@barachs.net> | 2017-09-10 15:04:27 -0400 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2017-10-03 11:03:47 +0000 |
commit | 59b2565cd91a67ced650739f36129650830211ac (patch) | |
tree | 1ae3b8d69d7952500b07186169fb31e0f72ae04e /src/vlibmemory | |
parent | 35ffa3e8f6b032f6e324234d495f769049d8feea (diff) |
Repair vlib API socket server
- Teach vpp_api_test to send/receive API messages over sockets
- Add memfd-based shared memory
- Add api messages to create memfd-based shared memory segments
- vpp_api_test supports both socket and shared memory segment connections
- vpp_api_test pivot from socket to shared memory API messaging
- add socket client support to libvlibclient.so
- dead client reaper sends ping messages, container-friendly
- dead client reaper falls back to kill (<pid>, 0) live checking
if e.g. a python app goes silent for tens of seconds
- handle ping messages in python client support code
- teach show api ring about pairwise shared-memory segments
- fix ip probing of already resolved destinations (VPP-998)
We'll need this work to implement proper host-stack client isolation
Change-Id: Ic23b65f75c854d0393d9a2e9d6b122a9551be769
Signed-off-by: Dave Barach <dave@barachs.net>
Signed-off-by: Dave Wallace <dwallacelf@gmail.com>
Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vlibmemory')
-rw-r--r-- | src/vlibmemory/api.h | 1 | ||||
-rw-r--r-- | src/vlibmemory/api_common.h | 121 | ||||
-rw-r--r-- | src/vlibmemory/memclnt.api | 57 | ||||
-rw-r--r-- | src/vlibmemory/memory_client.c | 65 | ||||
-rw-r--r-- | src/vlibmemory/memory_shared.c | 175 | ||||
-rw-r--r-- | src/vlibmemory/memory_vlib.c | 717 | ||||
-rw-r--r-- | src/vlibmemory/socket_client.c | 240 | ||||
-rw-r--r-- | src/vlibmemory/socksvr_vlib.c | 719 |
8 files changed, 1824 insertions, 271 deletions
diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h index 2a1438fde3c..5c32f5308de 100644 --- a/src/vlibmemory/api.h +++ b/src/vlibmemory/api.h @@ -50,6 +50,7 @@ vl_msg_api_handle_from_index_and_epoch (u32 index, u32 epoch) } void vl_enable_disable_memory_api (vlib_main_t * vm, int yesno); + #endif /* included_vlibmemory_api_h */ /* diff --git a/src/vlibmemory/api_common.h b/src/vlibmemory/api_common.h index 19daecdfb6a..63a7e5e4188 100644 --- a/src/vlibmemory/api_common.h +++ b/src/vlibmemory/api_common.h @@ -19,6 +19,7 @@ #define included_vlibmemory_api_common_h #include <svm/svm_common.h> +#include <vppinfra/file.h> #include <vlibapi/api_common.h> #include <vlibmemory/unix_shared_memory_queue.h> @@ -44,18 +45,17 @@ typedef struct ring_alloc_ /* * Initializers for the (shared-memory) rings - * _(size, n). Note: each msg has an 8 byte header. - * Might want to change that to an index sometime. + * _(size, n). Note: each msg has space for a header. */ #define foreach_vl_aring_size \ -_(64+8, 1024) \ -_(256+8, 128) \ -_(1024+8, 64) +_(64+sizeof(ring_alloc_t), 1024) \ +_(256+sizeof(ring_alloc_t), 128) \ +_(1024+sizeof(ring_alloc_t), 64) #define foreach_clnt_aring_size \ -_(1024+8, 1024) \ -_(2048+8, 128) \ -_(4096+8, 8) + _(1024+sizeof(ring_alloc_t), 1024) \ + _(2048+sizeof(ring_alloc_t), 128) \ + _(4096+sizeof(ring_alloc_t), 8) typedef struct vl_shmem_hdr_ { @@ -83,7 +83,6 @@ typedef struct vl_shmem_hdr_ /* Number of garbage-collected messages */ u32 garbage_collects; - } vl_shmem_hdr_t; #define VL_SHM_VERSION 2 @@ -123,10 +122,114 @@ int vl_client_connect_to_vlib (const char *svm_name, const char *client_name, int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name, const char *client_name, int rx_queue_size); +int vl_client_connect_to_vlib_no_map (const char *svm_name, + const char *client_name, + int rx_queue_size); u16 vl_client_get_first_plugin_msg_id (const char *plugin_name); void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); u32 vl_api_memclnt_create_internal (char *, unix_shared_memory_queue_t *); +void vl_init_shmem (svm_region_t * vlib_rp, int is_vlib, + int is_private_region); +void vl_client_install_client_message_handlers (void); + +/* API messages over sockets */ + +extern vlib_node_registration_t memclnt_node; +extern volatile int **vl_api_queue_cursizes; + +/* Events sent to the memclnt process */ +#define QUEUE_SIGNAL_EVENT 1 +#define SOCKET_READ_EVENT 2 + +#define API_SOCKET_FILE "/run/vpp-api.sock" + +typedef struct +{ + clib_file_t *clib_file; + vl_api_registration_t *regp; + u8 *data; +} vl_socket_args_for_process_t; + +typedef struct +{ + /* Server port number */ + u8 *socket_name; + + /* By default, localhost... */ + u32 bind_address; + + /* + * (listen, server, client) registrations. Shared memory + * registrations are in shared memory + */ + vl_api_registration_t *registration_pool; + /* + * Chain-drag variables, so message API handlers + * (generally) don't know whether they're talking to a socket + * or to a shared-memory connection. + */ + vl_api_registration_t *current_rp; + clib_file_t *current_uf; + /* One input buffer, shared across all sockets */ + i8 *input_buffer; + + /* pool of process args for socket clients */ + vl_socket_args_for_process_t *process_args; + + /* Listen for API connections here */ + clib_socket_t socksvr_listen_socket; +} socket_main_t; + +extern socket_main_t socket_main; + +typedef struct +{ + int socket_fd; + /* Temporarily disable the connection, so we can keep it around... */ + int socket_enable; + + clib_socket_t client_socket; + + u32 socket_buffer_size; + u8 *socket_tx_buffer; + u8 *socket_rx_buffer; + u32 socket_tx_nbytes; + int control_pings_outstanding; +} socket_client_main_t; + +extern socket_client_main_t socket_client_main; + +#define SOCKET_CLIENT_DEFAULT_BUFFER_SIZE 4096 + +void socksvr_add_pending_output (struct clib_file *uf, + struct vl_api_registration_ *cf, + u8 * buffer, uword buffer_bytes); + +void vl_free_socket_registration_index (u32 pool_index); +void vl_socket_process_msg (struct clib_file *uf, + struct vl_api_registration_ *rp, i8 * input_v); +clib_error_t *vl_socket_read_ready (struct clib_file *uf); +void vl_socket_add_pending_output (struct clib_file *uf, + struct vl_api_registration_ *rp, + u8 * buffer, uword buffer_bytes); +void vl_socket_add_pending_output_no_flush (struct clib_file *uf, + struct vl_api_registration_ *rp, + u8 * buffer, uword buffer_bytes); +clib_error_t *vl_socket_write_ready (struct clib_file *uf); +void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem); +u32 sockclnt_open_index (char *client_name, char *hostname, int port); +void sockclnt_close_index (u32 index); +void vl_client_msg_api_send (vl_api_registration_t * cm, u8 * elem); +vl_api_registration_t *sockclnt_get_registration (u32 index); +void vl_api_socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp, + i8 * input_v); + +int +vl_socket_client_connect (socket_client_main_t * scm, char *socket_path, + char *client_name, u32 socket_buffer_size); +void vl_socket_client_read_reply (socket_client_main_t * scm); +void vl_socket_client_enable_disable (socket_client_main_t * scm, int enable); #endif /* included_vlibmemory_api_common_h */ diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api index 32e51407949..94c99ad5d45 100644 --- a/src/vlibmemory/memclnt.api +++ b/src/vlibmemory/memclnt.api @@ -112,3 +112,60 @@ manual_print define trace_plugin_msg_ids u16 first_msg_id; u16 last_msg_id; }; + +/* + * Create a socket client registration. + */ +define sockclnt_create { + u8 name[64]; /* for show, find by name, whatever */ + u32 context; /* opaque value to be returned in the reply */ +}; + +define sockclnt_create_reply { + i32 response; /* Non-negative = success */ + u64 handle; /* handle by which vlib knows this client */ + u32 index; /* index, used e.g. by API trace replay */ + u32 context; /* opaque value from the create request */ +}; + +/* + * Delete a client registration + */ +define sockclnt_delete { + u32 index; /* index, used e.g. by API trace replay */ + u64 handle; /* handle by which vlib knows this client */ +}; + +define sockclnt_delete_reply { + i32 response; /* Non-negative = success */ + u64 handle; /* in case the client wonders */ +}; + +/* + * Ask vpp for a memfd shared segment + */ +define memfd_segment_create { + u32 client_index; + u32 context; + u64 requested_size; +}; + +/* + * Reply + */ +define memfd_segment_create_reply +{ + u32 context; + i32 retval; + u32 master_fd; +}; + +/* + * Memory client ping / response + * Only sent on inactive connections + */ +autoreply define memclnt_keepalive +{ + u32 client_index; + u32 context; +}; diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c index a162d6bb27c..3f8b799f41f 100644 --- a/src/vlibmemory/memory_client.c +++ b/src/vlibmemory/memory_client.c @@ -319,21 +319,37 @@ vl_client_disconnect (void) } } +/** + * Stave off the binary API dead client reaper + * Only sent to inactive clients + */ +static void +vl_api_memclnt_keepalive_t_handler (vl_api_memclnt_keepalive_t * mp) +{ + vl_api_memclnt_keepalive_reply_t *rmp; + api_main_t *am; + vl_shmem_hdr_t *shmem_hdr; + + am = &api_main; + shmem_hdr = am->shmem_hdr; + + rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_MEMCLNT_KEEPALIVE_REPLY); + rmp->context = mp->context; + vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & rmp); +} + #define foreach_api_msg \ _(RX_THREAD_EXIT, rx_thread_exit) \ _(MEMCLNT_CREATE_REPLY, memclnt_create_reply) \ -_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply) +_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply) \ +_(MEMCLNT_KEEPALIVE, memclnt_keepalive) -int -vl_client_api_map (const char *region_name) +void +vl_client_install_client_message_handlers (void) { - int rv; - - if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0) - { - return rv; - } #define _(N,n) \ vl_msg_api_set_handlers(VL_API_##N, #n, \ @@ -344,6 +360,18 @@ vl_client_api_map (const char *region_name) sizeof(vl_api_##n##_t), 1); foreach_api_msg; #undef _ +} + + +int +vl_client_api_map (const char *region_name) +{ + int rv; + + if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0) + return rv; + + vl_client_install_client_message_handlers (); return 0; } @@ -356,12 +384,12 @@ vl_client_api_unmap (void) static int connect_to_vlib_internal (const char *svm_name, const char *client_name, - int rx_queue_size, int want_pthread) + int rx_queue_size, int want_pthread, int do_map) { int rv = 0; memory_client_main_t *mm = &memory_client_main; - if ((rv = vl_client_api_map (svm_name))) + if (do_map && (rv = vl_client_api_map (svm_name))) { clib_warning ("vl_client_api map rv %d", rv); return rv; @@ -393,7 +421,8 @@ vl_client_connect_to_vlib (const char *svm_name, const char *client_name, int rx_queue_size) { return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, - 1 /* want pthread */ ); + 1 /* want pthread */ , + 1 /* do map */ ); } int @@ -402,7 +431,17 @@ vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name, int rx_queue_size) { return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, - 0 /* want pthread */ ); + 0 /* want pthread */ , + 1 /* do map */ ); +} + +int +vl_client_connect_to_vlib_no_map (const char *svm_name, + const char *client_name, int rx_queue_size) +{ + return connect_to_vlib_internal (svm_name, client_name, rx_queue_size, + 1 /* want pthread */ , + 0 /* dont map */ ); } void diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c index 8c6469080d7..021c54ef953 100644 --- a/src/vlibmemory/memory_shared.c +++ b/src/vlibmemory/memory_shared.c @@ -39,6 +39,10 @@ #include <vlibmemory/vl_memory_api_h.h> #undef vl_typedefs +socket_main_t socket_main; + +#define DEBUG_MESSAGE_BUFFER_OVERRUN 0 + static inline void * vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null) { @@ -52,6 +56,10 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null) shmem_hdr = am->shmem_hdr; +#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0 + nbytes += 4; +#endif + if (shmem_hdr == 0) { clib_warning ("shared memory header NULL"); @@ -172,7 +180,16 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null) pthread_mutex_unlock (&am->vlib_rp->mutex); out: +#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0 + { + nbytes -= 4; + u32 *overrun; + overrun = (u32 *) (rv->data + nbytes - sizeof (msgbuf_t)); + *overrun = 0x1badbabe; + } +#endif rv->data_len = htonl (nbytes - sizeof (msgbuf_t)); + return (rv->data); } @@ -231,11 +248,27 @@ vl_msg_api_free (void *a) { rv->q = 0; rv->gc_mark_timestamp = 0; +#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0 + { + u32 *overrun; + overrun = (u32 *) (rv->data + ntohl (rv->data_len)); + ASSERT (*overrun == 0x1badbabe); + } +#endif return; } pthread_mutex_lock (&am->vlib_rp->mutex); oldheap = svm_push_data_heap (am->vlib_rp); + +#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0 + { + u32 *overrun; + overrun = (u32 *) (rv->data + ntohl (rv->data_len)); + ASSERT (*overrun == 0x1badbabe); + } +#endif + clib_mem_free (rv); svm_pop_heap (oldheap); pthread_mutex_unlock (&am->vlib_rp->mutex); @@ -329,17 +362,91 @@ vl_set_api_pvt_heap_size (u64 size) am->api_pvt_heap_size = size; } +void +vl_init_shmem (svm_region_t * vlib_rp, int is_vlib, int is_private_region) +{ + api_main_t *am = &api_main; + vl_shmem_hdr_t *shmem_hdr = 0; + u32 vlib_input_queue_length; + void *oldheap; + ASSERT (vlib_rp); + + /* $$$$ need private region config parameters */ + + oldheap = svm_push_data_heap (vlib_rp); + + vec_validate (shmem_hdr, 0); + shmem_hdr->version = VL_SHM_VERSION; + + /* vlib main input queue */ + vlib_input_queue_length = 1024; + if (am->vlib_input_queue_length) + vlib_input_queue_length = am->vlib_input_queue_length; + + shmem_hdr->vl_input_queue = + unix_shared_memory_queue_init (vlib_input_queue_length, sizeof (uword), + getpid (), am->vlib_signal); + + /* Set up the msg ring allocator */ +#define _(sz,n) \ + do { \ + ring_alloc_t _rp; \ + _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \ + _rp.size = (sz); \ + _rp.nitems = n; \ + _rp.hits = 0; \ + _rp.misses = 0; \ + vec_add1(shmem_hdr->vl_rings, _rp); \ + } while (0); + + foreach_vl_aring_size; +#undef _ + +#define _(sz,n) \ + do { \ + ring_alloc_t _rp; \ + _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \ + _rp.size = (sz); \ + _rp.nitems = n; \ + _rp.hits = 0; \ + _rp.misses = 0; \ + vec_add1(shmem_hdr->client_rings, _rp); \ + } while (0); + + foreach_clnt_aring_size; +#undef _ + + if (is_private_region == 0) + { + am->shmem_hdr = shmem_hdr; + am->vlib_rp = vlib_rp; + am->our_pid = getpid (); + if (is_vlib) + am->shmem_hdr->vl_pid = am->our_pid; + } + else + shmem_hdr->vl_pid = am->our_pid; + + svm_pop_heap (oldheap); + + /* + * After absolutely everything that a client might see is set up, + * declare the shmem region valid + */ + vlib_rp->user_ctx = shmem_hdr; + + pthread_mutex_unlock (&vlib_rp->mutex); +} + + int vl_map_shmem (const char *region_name, int is_vlib) { svm_map_region_args_t _a, *a = &_a; svm_region_t *vlib_rp, *root_rp; - void *oldheap; - vl_shmem_hdr_t *shmem_hdr = 0; api_main_t *am = &api_main; int i, rv; struct timespec ts, tsrem; - u32 vlib_input_queue_length; char *vpe_api_region_suffix = "-vpe-api"; memset (a, 0, sizeof (*a)); @@ -472,65 +579,8 @@ vl_map_shmem (const char *region_name, int is_vlib) } /* Nope, it's our problem... */ + vl_init_shmem (vlib_rp, 1 /* is vlib */ , 0 /* is_private_region */ ); - oldheap = svm_push_data_heap (vlib_rp); - - vec_validate (shmem_hdr, 0); - shmem_hdr->version = VL_SHM_VERSION; - - /* vlib main input queue */ - vlib_input_queue_length = 1024; - if (am->vlib_input_queue_length) - vlib_input_queue_length = am->vlib_input_queue_length; - - shmem_hdr->vl_input_queue = - unix_shared_memory_queue_init (vlib_input_queue_length, sizeof (uword), - getpid (), am->vlib_signal); - - /* Set up the msg ring allocator */ -#define _(sz,n) \ - do { \ - ring_alloc_t _rp; \ - _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \ - _rp.size = (sz); \ - _rp.nitems = n; \ - _rp.hits = 0; \ - _rp.misses = 0; \ - vec_add1(shmem_hdr->vl_rings, _rp); \ - } while (0); - - foreach_vl_aring_size; -#undef _ - -#define _(sz,n) \ - do { \ - ring_alloc_t _rp; \ - _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \ - _rp.size = (sz); \ - _rp.nitems = n; \ - _rp.hits = 0; \ - _rp.misses = 0; \ - vec_add1(shmem_hdr->client_rings, _rp); \ - } while (0); - - foreach_clnt_aring_size; -#undef _ - - am->shmem_hdr = shmem_hdr; - am->vlib_rp = vlib_rp; - am->our_pid = getpid (); - if (is_vlib) - am->shmem_hdr->vl_pid = am->our_pid; - - svm_pop_heap (oldheap); - - /* - * After absolutely everything that a client might see is set up, - * declare the shmem region valid - */ - vlib_rp->user_ctx = shmem_hdr; - - pthread_mutex_unlock (&vlib_rp->mutex); vec_add1 (am->mapped_shmem_regions, vlib_rp); return 0; } @@ -638,6 +688,9 @@ vl_api_client_index_to_registration_internal (u32 handle) vl_api_registration_t * vl_api_client_index_to_registration (u32 index) { + if (PREDICT_FALSE (socket_main.current_rp != 0)) + return socket_main.current_rp; + return (vl_api_client_index_to_registration_internal (index)); } diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c index d305ea619aa..c9b3183f592 100644 --- a/src/vlibmemory/memory_vlib.c +++ b/src/vlibmemory/memory_vlib.c @@ -96,17 +96,7 @@ vl_api_trace_plugin_msg_ids_t_print (vl_api_trace_plugin_msg_ids_t * a, #include <vlibmemory/vl_memory_api_h.h> #undef vl_endianfun -void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) - __attribute__ ((weak)); - -void -vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) -{ - static int count; - - if (count++ < 5) - clib_warning ("need to link against -lvlibsocket, msg not sent!"); -} +extern void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem); void vl_msg_api_send (vl_api_registration_t * rp, u8 * elem) @@ -117,7 +107,7 @@ vl_msg_api_send (vl_api_registration_t * rp, u8 * elem) } else { - vl_msg_api_send_shmem (rp->vl_input_queue, elem); + vl_msg_api_send_shmem (rp->vl_input_queue, (u8 *) & elem); } } @@ -196,6 +186,7 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) int rv = 0; void *oldheap; api_main_t *am = &api_main; + u8 *serialized_message_table_in_shmem; /* * This is tortured. Maintain a vlib-address-space private @@ -235,6 +226,8 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) memset (regp, 0, sizeof (*regp)); regp->registration_type = REGISTRATION_TYPE_SHMEM; regp->vl_api_registration_pool_index = regpp - am->vl_clients; + regp->vlib_rp = svm; + regp->shmem_hdr = am->shmem_hdr; q = regp->vl_input_queue = (unix_shared_memory_queue_t *) (uword) mp->input_queue; @@ -242,11 +235,11 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) regp->name = format (0, "%s", mp->name); vec_add1 (regp->name, 0); + serialized_message_table_in_shmem = vl_api_serialize_message_table (am, 0); + pthread_mutex_unlock (&svm->mutex); svm_pop_heap (oldheap); - ASSERT (am->serialized_message_table_in_shmem); - rp = vl_msg_api_alloc (sizeof (*rp)); rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE_REPLY); rp->handle = (uword) regp; @@ -255,8 +248,7 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp) am->shmem_hdr->application_restarts); rp->context = mp->context; rp->response = ntohl (rv); - rp->message_table = - pointer_to_uword (am->serialized_message_table_in_shmem); + rp->message_table = pointer_to_uword (serialized_message_table_in_shmem); vl_msg_api_send_shmem (q, (u8 *) & rp); } @@ -313,11 +305,15 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp) if (!pool_is_free (am->vl_clients, regpp)) { + int i; regp = *regpp; svm = am->vlib_rp; + int private_registration = 0; - /* $$$ check the input queue for e.g. punted sf's */ - + /* + * Note: the API message handling path will set am->vlib_rp + * as appropriate for pairwise / private memory segments + */ rp = vl_msg_api_alloc (sizeof (*rp)); rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE_REPLY); rp->handle = mp->handle; @@ -333,18 +329,56 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp) return; } + /* For horizontal scaling, add a hash table... */ + for (i = 0; i < vec_len (am->vlib_private_rps); i++) + { + /* Is this a pairwise / private API segment? */ + if (am->vlib_private_rps[i] == svm) + { + /* Note: account for the memfd header page */ + u64 virtual_base = svm->virtual_base - MMAP_PAGESIZE; + u64 virtual_size = svm->virtual_size + MMAP_PAGESIZE; + + /* + * Kill the registration pool element before we make + * the index vanish forever + */ + pool_put_index (am->vl_clients, + regp->vl_api_registration_pool_index); + + vec_delete (am->vlib_private_rps, 1, i); + /* Kill it, accounting for the memfd header page */ + if (munmap ((void *) virtual_base, virtual_size) < 0) + clib_unix_warning ("munmap"); + /* Reset the queue-length-address cache */ + vec_reset_length (vl_api_queue_cursizes); + private_registration = 1; + break; + } + } + /* No dangling references, please */ *regpp = 0; - pool_put_index (am->vl_clients, regp->vl_api_registration_pool_index); - - pthread_mutex_lock (&svm->mutex); - oldheap = svm_push_data_heap (svm); - /* Poison the old registration */ - memset (regp, 0xF1, sizeof (*regp)); - clib_mem_free (regp); - pthread_mutex_unlock (&svm->mutex); - svm_pop_heap (oldheap); + if (private_registration == 0) + { + pool_put_index (am->vl_clients, + regp->vl_api_registration_pool_index); + pthread_mutex_lock (&svm->mutex); + oldheap = svm_push_data_heap (svm); + /* Poison the old registration */ + memset (regp, 0xF1, sizeof (*regp)); + clib_mem_free (regp); + pthread_mutex_unlock (&svm->mutex); + svm_pop_heap (oldheap); + /* + * These messages must be freed manually, since they're set up + * as "bounce" messages. In the private_registration == 1 case, + * we kill the shared-memory segment which contains the message + * with munmap. + */ + vl_msg_api_free (mp); + } } else { @@ -392,10 +426,54 @@ out: vl_msg_api_send_shmem (q, (u8 *) & rmp); } -#define foreach_vlib_api_msg \ -_(MEMCLNT_CREATE, memclnt_create) \ -_(MEMCLNT_DELETE, memclnt_delete) \ -_(GET_FIRST_MSG_ID, get_first_msg_id) +/** + * client answered a ping, stave off the grim reaper... + */ + +void + vl_api_memclnt_keepalive_reply_t_handler + (vl_api_memclnt_keepalive_reply_t * mp) +{ + vl_api_registration_t *regp; + vlib_main_t *vm = vlib_get_main (); + + regp = vl_api_client_index_to_registration (mp->context); + if (regp) + { + regp->last_heard = vlib_time_now (vm); + regp->unanswered_pings = 0; + } + else + clib_warning ("BUG: anonymous memclnt_keepalive_reply"); +} + +/** + * We can send ourselves these messages if someone uses the + * builtin binary api test tool... + */ +static void +vl_api_memclnt_keepalive_t_handler (vl_api_memclnt_keepalive_t * mp) +{ + vl_api_memclnt_keepalive_reply_t *rmp; + api_main_t *am; + vl_shmem_hdr_t *shmem_hdr; + + am = &api_main; + shmem_hdr = am->shmem_hdr; + + rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_MEMCLNT_KEEPALIVE_REPLY); + rmp->context = mp->context; + vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & rmp); +} + +#define foreach_vlib_api_msg \ +_(MEMCLNT_CREATE, memclnt_create) \ +_(MEMCLNT_DELETE, memclnt_delete) \ +_(GET_FIRST_MSG_ID, get_first_msg_id) \ +_(MEMCLNT_KEEPALIVE, memclnt_keepalive) \ +_(MEMCLNT_KEEPALIVE_REPLY, memclnt_keepalive_reply) /* * vl_api_init @@ -404,6 +482,7 @@ static int memory_api_init (const char *region_name) { int rv; + api_main_t *am = &api_main; vl_msg_api_msg_config_t cfg; vl_msg_api_msg_config_t *c = &cfg; @@ -428,6 +507,13 @@ memory_api_init (const char *region_name) foreach_vlib_api_msg; #undef _ + /* + * special-case freeing of memclnt_delete messages, so we can + * simply munmap pairwise / private API segments... + */ + am->message_bounce[VL_API_MEMCLNT_DELETE] = 1; + am->is_mp_safe[VL_API_MEMCLNT_KEEPALIVE_REPLY] = 1; + return 0; } @@ -474,6 +560,203 @@ send_one_plugin_msg_ids_msg (u8 * name, u16 first_msg_id, u16 last_msg_id) vl_msg_api_send_shmem (q, (u8 *) & mp); } +static void +send_memclnt_keepalive (vl_api_registration_t * regp, f64 now) +{ + vl_api_memclnt_keepalive_t *mp; + unix_shared_memory_queue_t *q; + api_main_t *am = &api_main; + svm_region_t *save_vlib_rp = am->vlib_rp; + vl_shmem_hdr_t *save_shmem_hdr = am->shmem_hdr; + + q = regp->vl_input_queue; + + /* + * If the queue head is moving, assume that the client is processing + * messages and skip the ping. This heuristic may fail if the queue + * is in the same position as last time, net of wrapping; in which + * case, the client will receive a keepalive. + */ + if (regp->last_queue_head != q->head) + { + regp->last_heard = now; + regp->unanswered_pings = 0; + regp->last_queue_head = q->head; + return; + } + + /* + * push/pop shared memory segment, so this routine + * will work with "normal" as well as "private segment" + * memory clients.. + */ + + am->vlib_rp = regp->vlib_rp; + am->shmem_hdr = regp->shmem_hdr; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MEMCLNT_KEEPALIVE); + mp->context = mp->client_index = + vl_msg_api_handle_from_index_and_epoch + (regp->vl_api_registration_pool_index, + am->shmem_hdr->application_restarts); + + regp->unanswered_pings++; + + /* Failure-to-send due to a stuffed queue is absolutely expected */ + if (unix_shared_memory_queue_add (q, (u8 *) & mp, 1 /* nowait */ )) + vl_msg_api_free (mp); + + am->vlib_rp = save_vlib_rp; + am->shmem_hdr = save_shmem_hdr; +} + +static void +dead_client_scan (api_main_t * am, vl_shmem_hdr_t * shm, f64 now) +{ + + vl_api_registration_t **regpp; + vl_api_registration_t *regp; + static u32 *dead_indices; + static u32 *confused_indices; + + vec_reset_length (dead_indices); + vec_reset_length (confused_indices); + + /* *INDENT-OFF* */ + pool_foreach (regpp, am->vl_clients, + ({ + regp = *regpp; + if (regp) + { + /* If we haven't heard from this client recently... */ + if (regp->last_heard < (now - 10.0)) + { + if (regp->unanswered_pings == 2) + { + unix_shared_memory_queue_t *q; + q = regp->vl_input_queue; + if (kill (q->consumer_pid, 0) >=0) + { + clib_warning ("REAPER: lazy binary API client '%s'", + regp->name); + regp->unanswered_pings = 0; + regp->last_heard = now; + } + else + { + clib_warning ("REAPER: binary API client '%s' died", + regp->name); + vec_add1(dead_indices, regpp - am->vl_clients); + } + } + else + send_memclnt_keepalive (regp, now); + } + else + regp->unanswered_pings = 0; + } + else + { + clib_warning ("NULL client registration index %d", + regpp - am->vl_clients); + vec_add1 (confused_indices, regpp - am->vl_clients); + } + })); + /* *INDENT-ON* */ + /* This should "never happen," but if it does, fix it... */ + if (PREDICT_FALSE (vec_len (confused_indices) > 0)) + { + int i; + for (i = 0; i < vec_len (confused_indices); i++) + { + pool_put_index (am->vl_clients, confused_indices[i]); + } + } + + if (PREDICT_FALSE (vec_len (dead_indices) > 0)) + { + int i; + svm_region_t *svm; + void *oldheap; + + /* Allow the application to clean up its registrations */ + for (i = 0; i < vec_len (dead_indices); i++) + { + regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); + if (regpp) + { + u32 handle; + + handle = vl_msg_api_handle_from_index_and_epoch + (dead_indices[i], shm->application_restarts); + (void) call_reaper_functions (handle); + } + } + + svm = am->vlib_rp; + pthread_mutex_lock (&svm->mutex); + oldheap = svm_push_data_heap (svm); + + for (i = 0; i < vec_len (dead_indices); i++) + { + regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); + if (regpp) + { + /* Is this a pairwise SVM segment? */ + if ((*regpp)->vlib_rp != svm) + { + int i; + svm_region_t *dead_rp = (*regpp)->vlib_rp; + /* Note: account for the memfd header page */ + u64 virtual_base = dead_rp->virtual_base - MMAP_PAGESIZE; + u64 virtual_size = dead_rp->virtual_size + MMAP_PAGESIZE; + + /* For horizontal scaling, add a hash table... */ + for (i = 0; i < vec_len (am->vlib_private_rps); i++) + if (am->vlib_private_rps[i] == dead_rp) + { + vec_delete (am->vlib_private_rps, 1, i); + goto found; + } + clib_warning ("private rp %llx AWOL", dead_rp); + + found: + /* Kill it, accounting for the memfd header page */ + if (munmap ((void *) virtual_base, virtual_size) < 0) + clib_unix_warning ("munmap"); + /* Reset the queue-length-address cache */ + vec_reset_length (vl_api_queue_cursizes); + } + else + { + /* Poison the old registration */ + memset (*regpp, 0xF3, sizeof (**regpp)); + clib_mem_free (*regpp); + } + /* no dangling references, please */ + *regpp = 0; + } + else + { + svm_pop_heap (oldheap); + clib_warning ("Duplicate free, client index %d", + regpp - am->vl_clients); + oldheap = svm_push_data_heap (svm); + } + } + + svm_client_scan_this_region_nolock (am->vlib_rp); + + pthread_mutex_unlock (&svm->mutex); + svm_pop_heap (oldheap); + for (i = 0; i < vec_len (dead_indices); i++) + pool_put_index (am->vl_clients, dead_indices[i]); + } +} + + static uword memclnt_process (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) @@ -487,17 +770,29 @@ memclnt_process (vlib_main_t * vm, f64 dead_client_scan_time; f64 sleep_time, start_time; f64 vector_rate; + clib_error_t *socksvr_api_init (vlib_main_t * vm); + clib_error_t *error; int i; - u8 *serialized_message_table = 0; - svm_region_t *svm; - void *oldheap; + vl_socket_args_for_process_t *a; + uword event_type; + uword *event_data = 0; + int private_segment_rotor = 0; + svm_region_t *vlib_rp; + f64 now; vlib_set_queue_signal_callback (vm, memclnt_queue_callback); if ((rv = memory_api_init (am->region_name)) < 0) { - clib_warning ("memory_api_init returned %d, wait for godot...", rv); - vlib_process_suspend (vm, 1e70); + clib_warning ("memory_api_init returned %d, quitting...", rv); + return 0; + } + + if ((error = socksvr_api_init (vm))) + { + clib_error_report (error); + clib_warning ("socksvr_api_init failed, quitting..."); + return 0; } shm = am->shmem_hdr; @@ -510,8 +805,8 @@ memclnt_process (vlib_main_t * vm, if (e) clib_error_report (e); - sleep_time = 20.0; - dead_client_scan_time = vlib_time_now (vm) + 20.0; + sleep_time = 10.0; + dead_client_scan_time = vlib_time_now (vm) + 10.0; /* * Send plugin message range messages for each plugin we loaded @@ -524,26 +819,17 @@ memclnt_process (vlib_main_t * vm, } /* - * Snapshoot the api message table. - */ - serialized_message_table = vl_api_serialize_message_table (am, 0); - - svm = am->vlib_rp; - pthread_mutex_lock (&svm->mutex); - oldheap = svm_push_data_heap (svm); - - am->serialized_message_table_in_shmem = vec_dup (serialized_message_table); - - pthread_mutex_unlock (&svm->mutex); - svm_pop_heap (oldheap); - - /* * Save the api message table snapshot, if configured */ if (am->save_msg_table_filename) { int fd, rv; u8 *chroot_file; + u8 *serialized_message_table; + + /* + * Snapshoot the api message table. + */ if (strstr ((char *) am->save_msg_table_filename, "..") || index ((char *) am->save_msg_table_filename, '/')) { @@ -561,6 +847,9 @@ memclnt_process (vlib_main_t * vm, clib_unix_warning ("creat"); goto skip_save; } + + serialized_message_table = vl_api_serialize_message_table (am, 0); + rv = write (fd, serialized_message_table, vec_len (serialized_message_table)); @@ -572,15 +861,14 @@ memclnt_process (vlib_main_t * vm, clib_unix_warning ("close"); vec_free (chroot_file); + vec_free (serialized_message_table); } skip_save: - vec_free (serialized_message_table); /* $$$ pay attention to frame size, control CPU usage */ while (1) { - uword event_type __attribute__ ((unused)); i8 *headp; int need_broadcast; @@ -665,104 +953,89 @@ skip_save: } } - event_type = vlib_process_wait_for_event_or_clock (vm, sleep_time); - vm->queue_signal_pending = 0; - vlib_process_get_events (vm, 0 /* event_data */ ); - - if (vlib_time_now (vm) > dead_client_scan_time) + /* + * see if we have any private api shared-memory segments + * If so, push required context variables, and process + * a message. + */ + if (PREDICT_FALSE (vec_len (am->vlib_private_rps))) { - vl_api_registration_t **regpp; - vl_api_registration_t *regp; - unix_shared_memory_queue_t *q; - static u32 *dead_indices; - static u32 *confused_indices; + unix_shared_memory_queue_t *save_vlib_input_queue = q; + vl_shmem_hdr_t *save_shmem_hdr = am->shmem_hdr; + svm_region_t *save_vlib_rp = am->vlib_rp; - vec_reset_length (dead_indices); - vec_reset_length (confused_indices); + vlib_rp = am->vlib_rp = am->vlib_private_rps[private_segment_rotor]; - /* *INDENT-OFF* */ - pool_foreach (regpp, am->vl_clients, - ({ - regp = *regpp; - if (regp) - { - q = regp->vl_input_queue; - if (kill (q->consumer_pid, 0) < 0) - { - vec_add1(dead_indices, regpp - am->vl_clients); - } - } - else - { - clib_warning ("NULL client registration index %d", - regpp - am->vl_clients); - vec_add1 (confused_indices, regpp - am->vl_clients); - } - })); - /* *INDENT-ON* */ - /* This should "never happen," but if it does, fix it... */ - if (PREDICT_FALSE (vec_len (confused_indices) > 0)) - { - int i; - for (i = 0; i < vec_len (confused_indices); i++) - { - pool_put_index (am->vl_clients, confused_indices[i]); - } - } + am->shmem_hdr = (void *) vlib_rp->user_ctx; + q = am->shmem_hdr->vl_input_queue; - if (PREDICT_FALSE (vec_len (dead_indices) > 0)) + pthread_mutex_lock (&q->mutex); + if (q->cursize > 0) { - int i; - svm_region_t *svm; - void *oldheap; + headp = (i8 *) (q->data + sizeof (uword) * q->head); + clib_memcpy (&mp, headp, sizeof (uword)); - /* Allow the application to clean up its registrations */ - for (i = 0; i < vec_len (dead_indices); i++) - { - regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); - if (regpp) - { - u32 handle; - - handle = vl_msg_api_handle_from_index_and_epoch - (dead_indices[i], shm->application_restarts); - (void) call_reaper_functions (handle); - } - } + q->head++; + need_broadcast = (q->cursize == q->maxsize / 2); + q->cursize--; - svm = am->vlib_rp; - pthread_mutex_lock (&svm->mutex); - oldheap = svm_push_data_heap (svm); + if (PREDICT_FALSE (q->head == q->maxsize)) + q->head = 0; + pthread_mutex_unlock (&q->mutex); + if (need_broadcast) + (void) pthread_cond_broadcast (&q->condvar); - for (i = 0; i < vec_len (dead_indices); i++) - { - regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]); - if (regpp) - { - /* Poison the old registration */ - memset (*regpp, 0xF3, sizeof (**regpp)); - clib_mem_free (*regpp); - /* no dangling references, please */ - *regpp = 0; - } - else - { - svm_pop_heap (oldheap); - clib_warning ("Duplicate free, client index %d", - regpp - am->vl_clients); - oldheap = svm_push_data_heap (svm); - } - } + pthread_mutex_unlock (&q->mutex); - svm_client_scan_this_region_nolock (am->vlib_rp); + vl_msg_api_handler_with_vm_node (am, (void *) mp, vm, node); + } + else + pthread_mutex_unlock (&q->mutex); - pthread_mutex_unlock (&svm->mutex); - svm_pop_heap (oldheap); - for (i = 0; i < vec_len (dead_indices); i++) - pool_put_index (am->vl_clients, dead_indices[i]); + q = save_vlib_input_queue; + am->shmem_hdr = save_shmem_hdr; + am->vlib_rp = save_vlib_rp; + + private_segment_rotor++; + if (private_segment_rotor >= vec_len (am->vlib_private_rps)) + private_segment_rotor = 0; + } + + vlib_process_wait_for_event_or_clock (vm, sleep_time); + vec_reset_length (event_data); + event_type = vlib_process_get_events (vm, &event_data); + now = vlib_time_now (vm); + + switch (event_type) + { + case QUEUE_SIGNAL_EVENT: + vm->queue_signal_pending = 0; + break; + + case SOCKET_READ_EVENT: + for (i = 0; i < vec_len (event_data); i++) + { + a = pool_elt_at_index (socket_main.process_args, event_data[i]); + vl_api_socket_process_msg (a->clib_file, a->regp, + (i8 *) a->data); + vec_free (a->data); + pool_put (socket_main.process_args, a); } + break; - dead_client_scan_time = vlib_time_now (vm) + 20.0; + /* Timeout... */ + case -1: + break; + + default: + clib_warning ("unknown event type %d", event_type); + break; + } + + if (now > dead_client_scan_time) + { + dead_client_scan (am, shm, now); + dead_client_scan_time = vlib_time_now (vm) + 10.0; } if (TRACE_VLIB_MEMORY_QUEUE) @@ -785,11 +1058,12 @@ skip_save: return 0; } /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (memclnt_node,static) = { - .function = memclnt_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "api-rx-from-ring", - .state = VLIB_NODE_STATE_DISABLED, +VLIB_REGISTER_NODE (memclnt_node) = +{ + .function = memclnt_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "api-rx-from-ring", + .state = VLIB_NODE_STATE_DISABLED, }; /* *INDENT-ON* */ @@ -865,14 +1139,17 @@ VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) = }; /* *INDENT-ON* */ +volatile int **vl_api_queue_cursizes; + static void memclnt_queue_callback (vlib_main_t * vm) { - static volatile int *cursizep; + int i; + api_main_t *am = &api_main; - if (PREDICT_FALSE (cursizep == 0)) + if (PREDICT_FALSE (vec_len (vl_api_queue_cursizes) != + 1 + vec_len (am->vlib_private_rps))) { - api_main_t *am = &api_main; vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr; unix_shared_memory_queue_t *q; @@ -882,15 +1159,30 @@ memclnt_queue_callback (vlib_main_t * vm) q = shmem_hdr->vl_input_queue; if (q == 0) return; - cursizep = &q->cursize; + + vec_add1 (vl_api_queue_cursizes, &q->cursize); + + for (i = 0; i < vec_len (am->vlib_private_rps); i++) + { + svm_region_t *vlib_rp = am->vlib_private_rps[i]; + + shmem_hdr = (void *) vlib_rp->user_ctx; + q = shmem_hdr->vl_input_queue; + vec_add1 (vl_api_queue_cursizes, &q->cursize); + } } - if (*cursizep >= 1) + for (i = 0; i < vec_len (vl_api_queue_cursizes); i++) { - vm->queue_signal_pending = 1; - vm->api_queue_nonempty = 1; - vlib_process_signal_event (vm, memclnt_node.index, - /* event_type */ 0, /* event_data */ 0); + if (*vl_api_queue_cursizes[i]) + { + vm->queue_signal_pending = 1; + vm->api_queue_nonempty = 1; + vlib_process_signal_event (vm, memclnt_node.index, + /* event_type */ QUEUE_SIGNAL_EVENT, + /* event_data */ 0); + break; + } } } @@ -971,13 +1263,55 @@ setup_memclnt_exit (vlib_main_t * vm) VLIB_INIT_FUNCTION (setup_memclnt_exit); +u8 * +format_api_message_rings (u8 * s, va_list * args) +{ + api_main_t *am = va_arg (*args, api_main_t *); + vl_shmem_hdr_t *shmem_hdr = va_arg (*args, vl_shmem_hdr_t *); + int main_segment = va_arg (*args, int); + ring_alloc_t *ap; + int i; + + if (shmem_hdr == 0) + return format (s, "%8s %8s %8s %8s %8s\n", + "Owner", "Size", "Nitems", "Hits", "Misses"); + + ap = shmem_hdr->vl_rings; + + for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++) + { + s = format (s, "%8s %8d %8d %8d %8d\n", + "vlib", ap->size, ap->nitems, ap->hits, ap->misses); + ap++; + } + + ap = shmem_hdr->client_rings; + + for (i = 0; i < vec_len (shmem_hdr->client_rings); i++) + { + s = format (s, "%8s %8d %8d %8d %8d\n", + "clnt", ap->size, ap->nitems, ap->hits, ap->misses); + ap++; + } + + if (main_segment) + { + s = format (s, "%d ring miss fallback allocations\n", am->ring_misses); + s = format + (s, + "%d application restarts, %d reclaimed msgs, %d garbage collects\n", + shmem_hdr->application_restarts, shmem_hdr->restart_reclaims, + shmem_hdr->garbage_collects); + } + return s; +} + static clib_error_t * vl_api_ring_command (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cli_cmd) { int i; - ring_alloc_t *ap; vl_shmem_hdr_t *shmem_hdr; api_main_t *am = &api_main; @@ -989,34 +1323,38 @@ vl_api_ring_command (vlib_main_t * vm, return 0; } - vlib_cli_output (vm, "%8s %8s %8s %8s %8s\n", - "Owner", "Size", "Nitems", "Hits", "Misses"); - - ap = shmem_hdr->vl_rings; + vlib_cli_output (vm, "Main API segment rings:"); - for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++) - { - vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n", - "vlib", ap->size, ap->nitems, ap->hits, ap->misses); - ap++; - } + vlib_cli_output (vm, "%U", format_api_message_rings, am, + 0 /* print header */ , 0 /* notused */ ); - ap = shmem_hdr->client_rings; + vlib_cli_output (vm, "%U", format_api_message_rings, am, + shmem_hdr, 1 /* main segment */ ); - for (i = 0; i < vec_len (shmem_hdr->client_rings); i++) + for (i = 0; i < vec_len (am->vlib_private_rps); i++) { - vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n", - "clnt", ap->size, ap->nitems, ap->hits, ap->misses); - ap++; + svm_region_t *vlib_rp = am->vlib_private_rps[i]; + shmem_hdr = (void *) vlib_rp->user_ctx; + vl_api_registration_t **regpp; + vl_api_registration_t *regp; + + /* For horizontal scaling, add a hash table... */ + /* *INDENT-OFF* */ + pool_foreach (regpp, am->vl_clients, + ({ + regp = *regpp; + if (regp && regp->vlib_rp == vlib_rp) + { + vlib_cli_output (vm, "%s segment rings:", regp->name); + goto found; + } + })); + /* *INDENT-ON* */ + found: + vlib_cli_output (vm, "%U", format_api_message_rings, am, + shmem_hdr, 0 /* main segment */ ); } - vlib_cli_output (vm, "%d ring miss fallback allocations\n", - am->ring_misses); - - vlib_cli_output - (vm, "%d application restarts, %d reclaimed msgs, %d garbage collects\n", - shmem_hdr->application_restarts, - shmem_hdr->restart_reclaims, shmem_hdr->garbage_collects); return 0; } @@ -1051,15 +1389,13 @@ vl_api_client_command (vlib_main_t * vm, if (regp) { - q = regp->vl_input_queue; - if (kill (q->consumer_pid, 0) < 0) - { - health = "DEAD"; - } + if (regp->unanswered_pings > 0) + health = "questionable"; else - { - health = "alive"; - } + health = "OK"; + + q = regp->vl_input_queue; + vlib_cli_output (vm, "%16s %8d %14d 0x%016llx %s\n", regp->name, q->consumer_pid, q->cursize, q, health); @@ -1306,6 +1642,7 @@ vlibmemory_init (vlib_main_t * vm) { api_main_t *am = &api_main; svm_map_region_args_t _a, *a = &_a; + clib_error_t *error; memset (a, 0, sizeof (*a)); a->root_path = am->root_path; @@ -1321,7 +1658,10 @@ vlibmemory_init (vlib_main_t * vm) 0) ? am->global_pvt_heap_size : SVM_PVT_MHEAP_SIZE; svm_region_init_args (a); - return 0; + + error = vlib_call_init_function (vm, vlibsocket_init); + + return error; } VLIB_INIT_FUNCTION (vlibmemory_init); @@ -2227,7 +2567,7 @@ dump_api_table_file_command_fn (vlib_main_t * vm, /* Load the serialized message table from the table dump */ - error = unserialize_open_unix_file (sm, (char *) filename); + error = unserialize_open_clib_file (sm, (char *) filename); if (error) return error; @@ -2251,7 +2591,7 @@ dump_api_table_file_command_fn (vlib_main_t * vm, if (compare_current) { /* Append the current message table */ - u8 *tblv = vec_dup (am->serialized_message_table_in_shmem); + u8 *tblv = vl_api_serialize_message_table (am, 0); serialize_open_vector (sm, tblv); unserialize_integer (sm, &nmsgs, sizeof (u32)); @@ -2268,6 +2608,7 @@ dump_api_table_file_command_fn (vlib_main_t * vm, item->crc = extract_crc (name_and_crc); item->which = 1; /* current_image */ } + vec_free (tblv); } /* Sort the table. */ diff --git a/src/vlibmemory/socket_client.c b/src/vlibmemory/socket_client.c new file mode 100644 index 00000000000..8519e7f5f7c --- /dev/null +++ b/src/vlibmemory/socket_client.c @@ -0,0 +1,240 @@ +/* + *------------------------------------------------------------------ + * socket_client.c - API message handling over sockets, client code. + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <setjmp.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <netinet/in.h> +#include <signal.h> +#include <pthread.h> +#include <unistd.h> +#include <time.h> +#include <fcntl.h> +#include <string.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/fifo.h> +#include <vppinfra/time.h> +#include <vppinfra/mheap.h> +#include <vppinfra/heap.h> +#include <vppinfra/pool.h> +#include <vppinfra/format.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vlibmemory/api.h> + +#include <vlibmemory/vl_memory_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vlibmemory/vl_memory_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vlibmemory/vl_memory_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) clib_warning (__VA_ARGS__) +#define vl_printfun +#include <vlibmemory/vl_memory_api_h.h> +#undef vl_printfun + +socket_client_main_t socket_client_main; + +/* Debug aid */ +u32 vl (void *p) __attribute__ ((weak)); +u32 +vl (void *p) +{ + return vec_len (p); +} + +void +vl_socket_client_read_reply (socket_client_main_t * scm) +{ + int n, current_rx_index; + msgbuf_t *mbp; + + if (scm->socket_fd == 0 || scm->socket_enable == 0) + return; + + mbp = 0; + + while (1) + { + current_rx_index = vec_len (scm->socket_rx_buffer); + while (vec_len (scm->socket_rx_buffer) < + sizeof (*mbp) + 2 /* msg id */ ) + { + vec_validate (scm->socket_rx_buffer, current_rx_index + + scm->socket_buffer_size - 1); + _vec_len (scm->socket_rx_buffer) = current_rx_index; + n = read (scm->socket_fd, scm->socket_rx_buffer + current_rx_index, + scm->socket_buffer_size); + if (n < 0) + { + clib_unix_warning ("socket_read"); + return; + } + _vec_len (scm->socket_rx_buffer) += n; + } + +#if CLIB_DEBUG > 1 + if (n > 0) + clib_warning ("read %d bytes", n); +#endif + + if (mbp == 0) + mbp = (msgbuf_t *) (scm->socket_rx_buffer); + + if (vec_len (scm->socket_rx_buffer) >= ntohl (mbp->data_len) + + sizeof (*mbp)) + { + vl_msg_api_socket_handler ((void *) (mbp->data)); + + if (vec_len (scm->socket_rx_buffer) == ntohl (mbp->data_len) + + sizeof (*mbp)) + _vec_len (scm->socket_rx_buffer) = 0; + else + vec_delete (scm->socket_rx_buffer, ntohl (mbp->data_len) + + sizeof (*mbp), 0); + mbp = 0; + + /* Quit if we're out of data, and not expecting a ping reply */ + if (vec_len (scm->socket_rx_buffer) == 0 + && scm->control_pings_outstanding == 0) + break; + } + } +} + +int +vl_socket_client_connect (socket_client_main_t * scm, char *socket_path, + char *client_name, u32 socket_buffer_size) +{ + char buffer[256]; + char *rdptr; + int n, total_bytes; + vl_api_sockclnt_create_reply_t *rp; + vl_api_sockclnt_create_t *mp; + clib_socket_t *sock = &scm->client_socket; + msgbuf_t *mbp; + clib_error_t *error; + + /* Already connected? */ + if (scm->socket_fd) + return (-2); + + /* bogus call? */ + if (socket_path == 0 || client_name == 0) + return (-3); + + sock->config = socket_path; + sock->flags = CLIB_SOCKET_F_IS_CLIENT | CLIB_SOCKET_F_SEQPACKET; + + error = clib_socket_init (sock); + + if (error) + { + clib_error_report (error); + return (-1); + } + + scm->socket_fd = sock->fd; + + mbp = (msgbuf_t *) buffer; + mbp->q = 0; + mbp->data_len = ntohl (sizeof (*mp)); + mbp->gc_mark_timestamp = 0; + + mp = (vl_api_sockclnt_create_t *) mbp->data; + mp->_vl_msg_id = ntohs (VL_API_SOCKCLNT_CREATE); + strncpy ((char *) mp->name, client_name, sizeof (mp->name) - 1); + mp->name[sizeof (mp->name) - 1] = 0; + mp->context = 0xfeedface; + + n = write (scm->socket_fd, mbp, sizeof (*mbp) + ntohl (mbp->data_len)); + if (n < 0) + { + clib_unix_warning ("socket write (msg)"); + return (-1); + } + + memset (buffer, 0, sizeof (buffer)); + + total_bytes = 0; + rdptr = buffer; + do + { + n = read (scm->socket_fd, rdptr, sizeof (buffer) - (rdptr - buffer)); + if (n < 0) + { + clib_unix_warning ("socket read"); + } + total_bytes += n; + rdptr += n; + } + while (total_bytes < sizeof (vl_api_sockclnt_create_reply_t) + + sizeof (msgbuf_t)); + + rp = (vl_api_sockclnt_create_reply_t *) (buffer + sizeof (msgbuf_t)); + if (ntohs (rp->_vl_msg_id) != VL_API_SOCKCLNT_CREATE_REPLY) + { + clib_warning ("connect reply got msg id %d\n", ntohs (rp->_vl_msg_id)); + return (-1); + } + + /* allocate tx, rx buffers */ + scm->socket_buffer_size = socket_buffer_size ? socket_buffer_size : + SOCKET_CLIENT_DEFAULT_BUFFER_SIZE; + vec_validate (scm->socket_tx_buffer, scm->socket_buffer_size - 1); + vec_validate (scm->socket_rx_buffer, scm->socket_buffer_size - 1); + _vec_len (scm->socket_rx_buffer) = 0; + scm->socket_enable = 1; + + return (0); +} + +void +vl_socket_client_disconnect (socket_client_main_t * scm) +{ + if (scm->socket_fd && (close (scm->socket_fd) < 0)) + clib_unix_warning ("close"); + scm->socket_fd = 0; +} + +void +vl_socket_client_enable_disable (socket_client_main_t * scm, int enable) +{ + scm->socket_enable = enable; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlibmemory/socksvr_vlib.c b/src/vlibmemory/socksvr_vlib.c new file mode 100644 index 00000000000..1a263e7bf37 --- /dev/null +++ b/src/vlibmemory/socksvr_vlib.c @@ -0,0 +1,719 @@ +/* + *------------------------------------------------------------------ + * socksvr_vlib.c + * + * Copyright (c) 2009 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <sys/ioctl.h> +#include <vppinfra/byte_order.h> +#include <svm/memfd.h> + +#include <fcntl.h> +#include <sys/stat.h> + +#include <vlibmemory/api.h> + +#include <vlibmemory/vl_memory_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vlibmemory/vl_memory_api_h.h> +#undef vl_typedefs + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vlibmemory/vl_memory_api_h.h> +#undef vl_printfun + +/* instantiate all the endian swap functions we know about */ +#define vl_endianfun +#include <vlibmemory/vl_memory_api_h.h> +#undef vl_endianfun + +void +dump_socket_clients (vlib_main_t * vm, api_main_t * am) +{ + vl_api_registration_t *reg; + socket_main_t *sm = &socket_main; + clib_file_main_t *fm = &file_main; + clib_file_t *f; + + /* + * Must have at least one active client, not counting the + * REGISTRATION_TYPE_SOCKET_LISTEN bind/accept socket + */ + if (pool_elts (sm->registration_pool) < 2) + return; + + vlib_cli_output (vm, "Socket clients"); + vlib_cli_output (vm, "%16s %8s", "Name", "Fildesc"); + /* *INDENT-OFF* */ + pool_foreach (reg, sm->registration_pool, + ({ + if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) { + f = pool_elt_at_index (fm->file_pool, reg->clib_file_index); + vlib_cli_output (vm, "%16s %8d", + reg->name, f->file_descriptor); + } + })); +/* *INDENT-ON* */ +} + +void +vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) +{ + u16 msg_id = ntohs (*(u16 *) elem); + api_main_t *am = &api_main; + msgbuf_t *mb = (msgbuf_t *) (elem - offsetof (msgbuf_t, data)); +#if CLIB_DEBUG > 1 + u32 output_length; +#endif + clib_file_t *cf = rp->clib_file_index + file_main.file_pool; + + ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM); + + if (msg_id >= vec_len (am->api_trace_cfg)) + { + clib_warning ("id out of range: %d", msg_id); + vl_msg_api_free ((void *) elem); + return; + } + + /* Add the msgbuf_t to the output vector */ + vl_socket_add_pending_output_no_flush (cf, + rp->vl_api_registration_pool_index + + socket_main.registration_pool, + (u8 *) mb, sizeof (*mb)); + /* Send the message */ + vl_socket_add_pending_output (cf, + rp->vl_api_registration_pool_index + + socket_main.registration_pool, + elem, ntohl (mb->data_len)); + +#if CLIB_DEBUG > 1 + output_length = sizeof (*mb) + ntohl (mb->data_len); + clib_warning ("wrote %u bytes to fd %d", output_length, + cf->file_descriptor); +#endif + + vl_msg_api_free ((void *) elem); +} + +void +vl_free_socket_registration_index (u32 pool_index) +{ + int i; + vl_api_registration_t *rp; + if (pool_is_free_index (socket_main.registration_pool, pool_index)) + { + clib_warning ("main pool index %d already free", pool_index); + return; + } + rp = pool_elt_at_index (socket_main.registration_pool, pool_index); + + ASSERT (rp->registration_type != REGISTRATION_TYPE_FREE); + for (i = 0; i < vec_len (rp->additional_fds_to_close); i++) + if (close (rp->additional_fds_to_close[i]) < 0) + clib_unix_warning ("close"); + vec_free (rp->additional_fds_to_close); + vec_free (rp->name); + vec_free (rp->unprocessed_input); + vec_free (rp->output_vector); + rp->registration_type = REGISTRATION_TYPE_FREE; + pool_put (socket_main.registration_pool, rp); +} + +void +vl_api_socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp, + i8 * input_v) +{ + msgbuf_t *mbp = (msgbuf_t *) input_v; + + u8 *the_msg = (u8 *) (mbp->data); + socket_main.current_uf = uf; + socket_main.current_rp = rp; + vl_msg_api_socket_handler (the_msg); + socket_main.current_uf = 0; + socket_main.current_rp = 0; +} + +clib_error_t * +vl_socket_read_ready (clib_file_t * uf) +{ + clib_file_main_t *fm = &file_main; + vlib_main_t *vm = vlib_get_main (); + vl_api_registration_t *rp; + int n; + i8 *msg_buffer = 0; + u8 *data_for_process; + u32 msg_len; + u32 save_input_buffer_length = vec_len (socket_main.input_buffer); + vl_socket_args_for_process_t *a; + msgbuf_t *mbp; + int mbp_set = 0; + + rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); + + n = read (uf->file_descriptor, socket_main.input_buffer, + vec_len (socket_main.input_buffer)); + + if (n <= 0 && errno != EAGAIN) + { + clib_file_del (fm, uf); + + if (!pool_is_free (socket_main.registration_pool, rp)) + { + u32 index = rp - socket_main.registration_pool; + vl_free_socket_registration_index (index); + } + else + { + clib_warning ("client index %d already free?", + rp->vl_api_registration_pool_index); + } + return 0; + } + + _vec_len (socket_main.input_buffer) = n; + + /* + * Look for bugs here. This code is tricky because + * data read from a stream socket does not honor message + * boundaries. In the case of a long message (>4K bytes) + * we have to do (at least) 2 reads, etc. + */ + do + { + if (vec_len (rp->unprocessed_input)) + { + vec_append (rp->unprocessed_input, socket_main.input_buffer); + msg_buffer = rp->unprocessed_input; + } + else + { + msg_buffer = socket_main.input_buffer; + mbp_set = 0; + } + + if (mbp_set == 0) + { + /* Any chance that we have a complete message? */ + if (vec_len (msg_buffer) <= sizeof (msgbuf_t)) + goto save_and_split; + + mbp = (msgbuf_t *) msg_buffer; + msg_len = ntohl (mbp->data_len); + mbp_set = 1; + } + + /* We don't have the entire message yet. */ + if (mbp_set == 0 + || (msg_len + sizeof (msgbuf_t)) > vec_len (msg_buffer)) + { + save_and_split: + /* if we were using the input buffer save the fragment */ + if (msg_buffer == socket_main.input_buffer) + { + ASSERT (vec_len (rp->unprocessed_input) == 0); + vec_validate (rp->unprocessed_input, vec_len (msg_buffer) - 1); + clib_memcpy (rp->unprocessed_input, msg_buffer, + vec_len (msg_buffer)); + _vec_len (rp->unprocessed_input) = vec_len (msg_buffer); + } + _vec_len (socket_main.input_buffer) = save_input_buffer_length; + return 0; + } + + data_for_process = (u8 *) vec_dup (msg_buffer); + _vec_len (data_for_process) = (msg_len + sizeof (msgbuf_t)); + pool_get (socket_main.process_args, a); + a->clib_file = uf; + a->regp = rp; + a->data = data_for_process; + + vlib_process_signal_event (vm, memclnt_node.index, + SOCKET_READ_EVENT, + a - socket_main.process_args); + if (n > (msg_len + sizeof (*mbp))) + vec_delete (msg_buffer, msg_len + sizeof (*mbp), 0); + else + _vec_len (msg_buffer) = 0; + n -= msg_len + sizeof (msgbuf_t); + msg_len = 0; + mbp_set = 0; + } + while (n > 0); + + _vec_len (socket_main.input_buffer) = save_input_buffer_length; + + return 0; +} + +void +vl_socket_add_pending_output (clib_file_t * uf, + vl_api_registration_t * rp, + u8 * buffer, uword buffer_bytes) +{ + clib_file_main_t *fm = &file_main; + + vec_add (rp->output_vector, buffer, buffer_bytes); + if (vec_len (rp->output_vector) > 0) + { + int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + if (!skip_update) + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } +} + +void +vl_socket_add_pending_output_no_flush (clib_file_t * uf, + vl_api_registration_t * rp, + u8 * buffer, uword buffer_bytes) +{ + vec_add (rp->output_vector, buffer, buffer_bytes); +} + +static void +socket_del_pending_output (clib_file_t * uf, + vl_api_registration_t * rp, uword n_bytes) +{ + clib_file_main_t *fm = &file_main; + + vec_delete (rp->output_vector, n_bytes, 0); + if (vec_len (rp->output_vector) <= 0) + { + int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + if (!skip_update) + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } +} + +clib_error_t * +vl_socket_write_ready (clib_file_t * uf) +{ + clib_file_main_t *fm = &file_main; + vl_api_registration_t *rp; + int n; + + rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); + + /* Flush output vector. */ + n = write (uf->file_descriptor, + rp->output_vector, vec_len (rp->output_vector)); + + if (n < 0) + { +#if DEBUG > 2 + clib_warning ("write error, close the file...\n"); +#endif + clib_file_del (fm, uf); + + vl_free_socket_registration_index (rp - socket_main.registration_pool); + return 0; + } + + else if (n > 0) + socket_del_pending_output (uf, rp, n); + + return 0; +} + +clib_error_t * +vl_socket_error_ready (clib_file_t * uf) +{ + vl_api_registration_t *rp; + clib_file_main_t *fm = &file_main; + + rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); + clib_file_del (fm, uf); + vl_free_socket_registration_index (rp - socket_main.registration_pool); + + return 0; +} + +void +socksvr_file_add (clib_file_main_t * fm, int fd) +{ + vl_api_registration_t *rp; + clib_file_t template = { 0 }; + + pool_get (socket_main.registration_pool, rp); + memset (rp, 0, sizeof (*rp)); + + template.read_function = vl_socket_read_ready; + template.write_function = vl_socket_write_ready; + template.error_function = vl_socket_error_ready; + template.file_descriptor = fd; + template.private_data = rp - socket_main.registration_pool; + + rp->registration_type = REGISTRATION_TYPE_SOCKET_SERVER; + rp->vl_api_registration_pool_index = rp - socket_main.registration_pool; + rp->clib_file_index = clib_file_add (fm, &template); +} + +static clib_error_t * +socksvr_accept_ready (clib_file_t * uf) +{ + clib_file_main_t *fm = &file_main; + socket_main_t *sm = &socket_main; + clib_socket_t *sock = &sm->socksvr_listen_socket; + clib_socket_t client; + clib_error_t *error; + + error = clib_socket_accept (sock, &client); + + if (error) + return error; + + socksvr_file_add (fm, client.fd); + return 0; +} + +static clib_error_t * +socksvr_bogus_write (clib_file_t * uf) +{ + clib_warning ("why am I here?"); + return 0; +} + +/* + * vl_api_sockclnt_create_t_handler + */ +void +vl_api_sockclnt_create_t_handler (vl_api_sockclnt_create_t * mp) +{ + vl_api_registration_t *regp; + vl_api_sockclnt_create_reply_t *rp; + int rv = 1; + + regp = socket_main.current_rp; + + ASSERT (regp->registration_type == REGISTRATION_TYPE_SOCKET_SERVER); + + regp->name = format (0, "%s%c", mp->name, 0); + + rp = vl_msg_api_alloc (sizeof (*rp)); + rp->_vl_msg_id = htons (VL_API_SOCKCLNT_CREATE_REPLY); + rp->handle = (uword) regp; + rp->index = (uword) regp->vl_api_registration_pool_index; + rp->context = mp->context; + rp->response = htonl (rv); + + vl_msg_api_send (regp, (u8 *) rp); +} + +/* + * vl_api_sockclnt_delete_t_handler + */ +void +vl_api_sockclnt_delete_t_handler (vl_api_sockclnt_delete_t * mp) +{ + vl_api_registration_t *regp; + vl_api_sockclnt_delete_reply_t *rp; + + if (!pool_is_free_index (socket_main.registration_pool, mp->index)) + { + regp = pool_elt_at_index (socket_main.registration_pool, mp->index); + + rp = vl_msg_api_alloc (sizeof (*rp)); + rp->_vl_msg_id = htons (VL_API_SOCKCLNT_DELETE_REPLY); + rp->handle = mp->handle; + rp->response = htonl (1); + + vl_msg_api_send (regp, (u8 *) rp); + + clib_file_del (&file_main, file_main.file_pool + regp->clib_file_index); + + vl_free_socket_registration_index (mp->index); + } + else + { + clib_warning ("unknown client ID %d", mp->index); + } +} + +static clib_error_t * +send_fd_msg (int socket_fd, int fd_to_share) +{ + struct msghdr mh = { 0 }; + struct iovec iov[1]; + char ctl[CMSG_SPACE (sizeof (int))]; + char *msg = "memfd"; + int rv; + + iov[0].iov_base = msg; + iov[0].iov_len = strlen (msg); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + struct cmsghdr *cmsg; + memset (&ctl, 0, sizeof (ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + cmsg = CMSG_FIRSTHDR (&mh); + cmsg->cmsg_len = CMSG_LEN (sizeof (int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy (CMSG_DATA (cmsg), &fd_to_share, sizeof (int)); + + rv = sendmsg (socket_fd, &mh, 0); + if (rv < 0) + return clib_error_return_unix (0, "sendmsg"); + return 0; +} + +/* + * Create a memory-fd segment. + */ +void +vl_api_memfd_segment_create_t_handler (vl_api_memfd_segment_create_t * mp) +{ + vl_api_memfd_segment_create_reply_t *rmp; + api_main_t *am = &api_main; + clib_file_t *cf; + memfd_private_t _memfd_private, *memfd = &_memfd_private; + vl_api_registration_t *regp; + vlib_main_t *vm = vlib_get_main (); + svm_map_region_args_t _args, *a = &_args; + svm_region_t *vlib_rp; + int rv; + + regp = vl_api_client_index_to_registration (mp->client_index); + + if (regp == 0) + { + clib_warning ("API client disconnected"); + return; + } + + if (regp->registration_type != REGISTRATION_TYPE_SOCKET_SERVER) + { + rv = -31; /* VNET_API_ERROR_INVALID_REGISTRATION */ + goto reply; + } + + memset (memfd, 0, sizeof (*memfd)); + + /* Embed in api_main_t */ + memfd->memfd_size = mp->requested_size; + memfd->requested_va = 0ULL; + memfd->i_am_master = 1; + memfd->name = format (0, "%s%c", regp->name, 0); + + /* Set up a memfd segment of the requested size */ + rv = memfd_master_init (memfd, mp->client_index); + + if (rv) + goto reply; + + /* Remember to close this fd when the socket connection goes away */ + vec_add1 (regp->additional_fds_to_close, memfd->fd); + + /* And create a plausible svm_region in it */ + memset (a, 0, sizeof (*a)); + a->baseva = memfd->sh->memfd_va + MMAP_PAGESIZE; + a->size = memfd->memfd_size - MMAP_PAGESIZE; + /* $$$$ might want a different config parameter */ + a->pvt_heap_size = am->api_pvt_heap_size; + a->flags = SVM_FLAGS_MHEAP; + svm_region_init_mapped_region (a, (svm_region_t *) a->baseva); + + vlib_rp = (svm_region_t *) a->baseva; + + /* + * Part deux, initialize the svm_region_t shared-memory header + * api allocation rings, and so on. + */ + vl_init_shmem (vlib_rp, 1 /* is_vlib (dont-care) */ , 1 /* is_private */ ); + + vec_add1 (am->vlib_private_rps, vlib_rp); + + memfd->sh->ready = 1; + + /* Recompute the set of input queues to poll in memclnt_process */ + vec_reset_length (vl_api_queue_cursizes); + +reply: + + /* send the reply message */ + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_MEMFD_SEGMENT_CREATE_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (rv); + + vl_msg_api_send (regp, (u8 *) rmp); + + if (rv != 0) + return; + + /* + * We need the reply message to make it out the back door + * before we send the magic fd message. + */ + vlib_process_suspend (vm, 11e-6); + + cf = file_main.file_pool + regp->clib_file_index; + + /* send the magic "here's your sign (aka fd)" socket message */ + send_fd_msg (cf->file_descriptor, memfd->fd); +} + +#define foreach_vlib_api_msg \ +_(SOCKCLNT_CREATE, sockclnt_create) \ +_(SOCKCLNT_DELETE, sockclnt_delete) \ +_(MEMFD_SEGMENT_CREATE, memfd_segment_create) + +clib_error_t * +socksvr_api_init (vlib_main_t * vm) +{ + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; + vl_api_registration_t *rp; + vl_msg_api_msg_config_t cfg; + vl_msg_api_msg_config_t *c = &cfg; + socket_main_t *sm = &socket_main; + clib_socket_t *sock = &sm->socksvr_listen_socket; + clib_error_t *error; + + /* If not explicitly configured, do not bind/enable, etc. */ + if (sm->socket_name == 0) + return 0; + +#define _(N,n) do { \ + c->id = VL_API_##N; \ + c->name = #n; \ + c->handler = vl_api_##n##_t_handler; \ + c->cleanup = vl_noop_handler; \ + c->endian = vl_api_##n##_t_endian; \ + c->print = vl_api_##n##_t_print; \ + c->size = sizeof(vl_api_##n##_t); \ + c->traced = 1; /* trace, so these msgs print */ \ + c->replay = 0; /* don't replay client create/delete msgs */ \ + c->message_bounce = 0; /* don't bounce this message */ \ + vl_msg_api_config(c);} while (0); + + foreach_vlib_api_msg; +#undef _ + + vec_resize (sm->input_buffer, 4096); + + sock->config = (char *) sm->socket_name; + + /* mkdir of file socket, only under /run */ + if (strncmp (sock->config, "/run", 4) == 0) + { + u8 *tmp = format (0, "%s", sock->config); + int i = vec_len (tmp); + while (i && tmp[--i] != '/') + ; + + tmp[i] = 0; + + if (i) + vlib_unix_recursive_mkdir ((char *) tmp); + vec_free (tmp); + } + + sock->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_SEQPACKET | + CLIB_SOCKET_F_ALLOW_GROUP_WRITE; + error = clib_socket_init (sock); + if (error) + return error; + + pool_get (sm->registration_pool, rp); + memset (rp, 0, sizeof (*rp)); + + rp->registration_type = REGISTRATION_TYPE_SOCKET_LISTEN; + + template.read_function = socksvr_accept_ready; + template.write_function = socksvr_bogus_write; + template.file_descriptor = sock->fd; + template.private_data = rp - sm->registration_pool; + + rp->clib_file_index = clib_file_add (fm, &template); + return 0; +} + +static clib_error_t * +socket_exit (vlib_main_t * vm) +{ + clib_file_main_t *fm = &file_main; + socket_main_t *sm = &socket_main; + vl_api_registration_t *rp; + + /* Defensive driving in case something wipes out early */ + if (sm->registration_pool) + { + u32 index; + /* *INDENT-OFF* */ + pool_foreach (rp, sm->registration_pool, ({ + clib_file_del (fm, fm->file_pool + rp->clib_file_index); + index = rp->vl_api_registration_pool_index; + vl_free_socket_registration_index (index); + })); +/* *INDENT-ON* */ + } + + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (socket_exit); + +static clib_error_t * +socksvr_config (vlib_main_t * vm, unformat_input_t * input) +{ + socket_main_t *sm = &socket_main; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "socket-name %s", &sm->socket_name)) + ; + else if (unformat (input, "default")) + { + sm->socket_name = format (0, "%s%c", API_SOCKET_FILE, 0); + } + else + { + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + } + return 0; +} + +VLIB_CONFIG_FUNCTION (socksvr_config, "socksvr"); + +clib_error_t * +vlibsocket_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (vlibsocket_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |