summaryrefslogtreecommitdiffstats
path: root/src/vlibmemory
diff options
context:
space:
mode:
authorDave Barach <dave@barachs.net>2017-09-10 15:04:27 -0400
committerDamjan Marion <dmarion.lists@gmail.com>2017-10-03 11:03:47 +0000
commit59b2565cd91a67ced650739f36129650830211ac (patch)
tree1ae3b8d69d7952500b07186169fb31e0f72ae04e /src/vlibmemory
parent35ffa3e8f6b032f6e324234d495f769049d8feea (diff)
Repair vlib API socket server
- Teach vpp_api_test to send/receive API messages over sockets - Add memfd-based shared memory - Add api messages to create memfd-based shared memory segments - vpp_api_test supports both socket and shared memory segment connections - vpp_api_test pivot from socket to shared memory API messaging - add socket client support to libvlibclient.so - dead client reaper sends ping messages, container-friendly - dead client reaper falls back to kill (<pid>, 0) live checking if e.g. a python app goes silent for tens of seconds - handle ping messages in python client support code - teach show api ring about pairwise shared-memory segments - fix ip probing of already resolved destinations (VPP-998) We'll need this work to implement proper host-stack client isolation Change-Id: Ic23b65f75c854d0393d9a2e9d6b122a9551be769 Signed-off-by: Dave Barach <dave@barachs.net> Signed-off-by: Dave Wallace <dwallacelf@gmail.com> Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vlibmemory')
-rw-r--r--src/vlibmemory/api.h1
-rw-r--r--src/vlibmemory/api_common.h121
-rw-r--r--src/vlibmemory/memclnt.api57
-rw-r--r--src/vlibmemory/memory_client.c65
-rw-r--r--src/vlibmemory/memory_shared.c175
-rw-r--r--src/vlibmemory/memory_vlib.c717
-rw-r--r--src/vlibmemory/socket_client.c240
-rw-r--r--src/vlibmemory/socksvr_vlib.c719
8 files changed, 1824 insertions, 271 deletions
diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h
index 2a1438fde3c..5c32f5308de 100644
--- a/src/vlibmemory/api.h
+++ b/src/vlibmemory/api.h
@@ -50,6 +50,7 @@ vl_msg_api_handle_from_index_and_epoch (u32 index, u32 epoch)
}
void vl_enable_disable_memory_api (vlib_main_t * vm, int yesno);
+
#endif /* included_vlibmemory_api_h */
/*
diff --git a/src/vlibmemory/api_common.h b/src/vlibmemory/api_common.h
index 19daecdfb6a..63a7e5e4188 100644
--- a/src/vlibmemory/api_common.h
+++ b/src/vlibmemory/api_common.h
@@ -19,6 +19,7 @@
#define included_vlibmemory_api_common_h
#include <svm/svm_common.h>
+#include <vppinfra/file.h>
#include <vlibapi/api_common.h>
#include <vlibmemory/unix_shared_memory_queue.h>
@@ -44,18 +45,17 @@ typedef struct ring_alloc_
/*
* Initializers for the (shared-memory) rings
- * _(size, n). Note: each msg has an 8 byte header.
- * Might want to change that to an index sometime.
+ * _(size, n). Note: each msg has space for a header.
*/
#define foreach_vl_aring_size \
-_(64+8, 1024) \
-_(256+8, 128) \
-_(1024+8, 64)
+_(64+sizeof(ring_alloc_t), 1024) \
+_(256+sizeof(ring_alloc_t), 128) \
+_(1024+sizeof(ring_alloc_t), 64)
#define foreach_clnt_aring_size \
-_(1024+8, 1024) \
-_(2048+8, 128) \
-_(4096+8, 8)
+ _(1024+sizeof(ring_alloc_t), 1024) \
+ _(2048+sizeof(ring_alloc_t), 128) \
+ _(4096+sizeof(ring_alloc_t), 8)
typedef struct vl_shmem_hdr_
{
@@ -83,7 +83,6 @@ typedef struct vl_shmem_hdr_
/* Number of garbage-collected messages */
u32 garbage_collects;
-
} vl_shmem_hdr_t;
#define VL_SHM_VERSION 2
@@ -123,10 +122,114 @@ int vl_client_connect_to_vlib (const char *svm_name, const char *client_name,
int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name,
const char *client_name,
int rx_queue_size);
+int vl_client_connect_to_vlib_no_map (const char *svm_name,
+ const char *client_name,
+ int rx_queue_size);
u16 vl_client_get_first_plugin_msg_id (const char *plugin_name);
void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
u32 vl_api_memclnt_create_internal (char *, unix_shared_memory_queue_t *);
+void vl_init_shmem (svm_region_t * vlib_rp, int is_vlib,
+ int is_private_region);
+void vl_client_install_client_message_handlers (void);
+
+/* API messages over sockets */
+
+extern vlib_node_registration_t memclnt_node;
+extern volatile int **vl_api_queue_cursizes;
+
+/* Events sent to the memclnt process */
+#define QUEUE_SIGNAL_EVENT 1
+#define SOCKET_READ_EVENT 2
+
+#define API_SOCKET_FILE "/run/vpp-api.sock"
+
+typedef struct
+{
+ clib_file_t *clib_file;
+ vl_api_registration_t *regp;
+ u8 *data;
+} vl_socket_args_for_process_t;
+
+typedef struct
+{
+ /* Server port number */
+ u8 *socket_name;
+
+ /* By default, localhost... */
+ u32 bind_address;
+
+ /*
+ * (listen, server, client) registrations. Shared memory
+ * registrations are in shared memory
+ */
+ vl_api_registration_t *registration_pool;
+ /*
+ * Chain-drag variables, so message API handlers
+ * (generally) don't know whether they're talking to a socket
+ * or to a shared-memory connection.
+ */
+ vl_api_registration_t *current_rp;
+ clib_file_t *current_uf;
+ /* One input buffer, shared across all sockets */
+ i8 *input_buffer;
+
+ /* pool of process args for socket clients */
+ vl_socket_args_for_process_t *process_args;
+
+ /* Listen for API connections here */
+ clib_socket_t socksvr_listen_socket;
+} socket_main_t;
+
+extern socket_main_t socket_main;
+
+typedef struct
+{
+ int socket_fd;
+ /* Temporarily disable the connection, so we can keep it around... */
+ int socket_enable;
+
+ clib_socket_t client_socket;
+
+ u32 socket_buffer_size;
+ u8 *socket_tx_buffer;
+ u8 *socket_rx_buffer;
+ u32 socket_tx_nbytes;
+ int control_pings_outstanding;
+} socket_client_main_t;
+
+extern socket_client_main_t socket_client_main;
+
+#define SOCKET_CLIENT_DEFAULT_BUFFER_SIZE 4096
+
+void socksvr_add_pending_output (struct clib_file *uf,
+ struct vl_api_registration_ *cf,
+ u8 * buffer, uword buffer_bytes);
+
+void vl_free_socket_registration_index (u32 pool_index);
+void vl_socket_process_msg (struct clib_file *uf,
+ struct vl_api_registration_ *rp, i8 * input_v);
+clib_error_t *vl_socket_read_ready (struct clib_file *uf);
+void vl_socket_add_pending_output (struct clib_file *uf,
+ struct vl_api_registration_ *rp,
+ u8 * buffer, uword buffer_bytes);
+void vl_socket_add_pending_output_no_flush (struct clib_file *uf,
+ struct vl_api_registration_ *rp,
+ u8 * buffer, uword buffer_bytes);
+clib_error_t *vl_socket_write_ready (struct clib_file *uf);
+void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem);
+u32 sockclnt_open_index (char *client_name, char *hostname, int port);
+void sockclnt_close_index (u32 index);
+void vl_client_msg_api_send (vl_api_registration_t * cm, u8 * elem);
+vl_api_registration_t *sockclnt_get_registration (u32 index);
+void vl_api_socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp,
+ i8 * input_v);
+
+int
+vl_socket_client_connect (socket_client_main_t * scm, char *socket_path,
+ char *client_name, u32 socket_buffer_size);
+void vl_socket_client_read_reply (socket_client_main_t * scm);
+void vl_socket_client_enable_disable (socket_client_main_t * scm, int enable);
#endif /* included_vlibmemory_api_common_h */
diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api
index 32e51407949..94c99ad5d45 100644
--- a/src/vlibmemory/memclnt.api
+++ b/src/vlibmemory/memclnt.api
@@ -112,3 +112,60 @@ manual_print define trace_plugin_msg_ids
u16 first_msg_id;
u16 last_msg_id;
};
+
+/*
+ * Create a socket client registration.
+ */
+define sockclnt_create {
+ u8 name[64]; /* for show, find by name, whatever */
+ u32 context; /* opaque value to be returned in the reply */
+};
+
+define sockclnt_create_reply {
+ i32 response; /* Non-negative = success */
+ u64 handle; /* handle by which vlib knows this client */
+ u32 index; /* index, used e.g. by API trace replay */
+ u32 context; /* opaque value from the create request */
+};
+
+/*
+ * Delete a client registration
+ */
+define sockclnt_delete {
+ u32 index; /* index, used e.g. by API trace replay */
+ u64 handle; /* handle by which vlib knows this client */
+};
+
+define sockclnt_delete_reply {
+ i32 response; /* Non-negative = success */
+ u64 handle; /* in case the client wonders */
+};
+
+/*
+ * Ask vpp for a memfd shared segment
+ */
+define memfd_segment_create {
+ u32 client_index;
+ u32 context;
+ u64 requested_size;
+};
+
+/*
+ * Reply
+ */
+define memfd_segment_create_reply
+{
+ u32 context;
+ i32 retval;
+ u32 master_fd;
+};
+
+/*
+ * Memory client ping / response
+ * Only sent on inactive connections
+ */
+autoreply define memclnt_keepalive
+{
+ u32 client_index;
+ u32 context;
+};
diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c
index a162d6bb27c..3f8b799f41f 100644
--- a/src/vlibmemory/memory_client.c
+++ b/src/vlibmemory/memory_client.c
@@ -319,21 +319,37 @@ vl_client_disconnect (void)
}
}
+/**
+ * Stave off the binary API dead client reaper
+ * Only sent to inactive clients
+ */
+static void
+vl_api_memclnt_keepalive_t_handler (vl_api_memclnt_keepalive_t * mp)
+{
+ vl_api_memclnt_keepalive_reply_t *rmp;
+ api_main_t *am;
+ vl_shmem_hdr_t *shmem_hdr;
+
+ am = &api_main;
+ shmem_hdr = am->shmem_hdr;
+
+ rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_MEMCLNT_KEEPALIVE_REPLY);
+ rmp->context = mp->context;
+ vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & rmp);
+}
+
#define foreach_api_msg \
_(RX_THREAD_EXIT, rx_thread_exit) \
_(MEMCLNT_CREATE_REPLY, memclnt_create_reply) \
-_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply)
+_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply) \
+_(MEMCLNT_KEEPALIVE, memclnt_keepalive)
-int
-vl_client_api_map (const char *region_name)
+void
+vl_client_install_client_message_handlers (void)
{
- int rv;
-
- if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0)
- {
- return rv;
- }
#define _(N,n) \
vl_msg_api_set_handlers(VL_API_##N, #n, \
@@ -344,6 +360,18 @@ vl_client_api_map (const char *region_name)
sizeof(vl_api_##n##_t), 1);
foreach_api_msg;
#undef _
+}
+
+
+int
+vl_client_api_map (const char *region_name)
+{
+ int rv;
+
+ if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0)
+ return rv;
+
+ vl_client_install_client_message_handlers ();
return 0;
}
@@ -356,12 +384,12 @@ vl_client_api_unmap (void)
static int
connect_to_vlib_internal (const char *svm_name,
const char *client_name,
- int rx_queue_size, int want_pthread)
+ int rx_queue_size, int want_pthread, int do_map)
{
int rv = 0;
memory_client_main_t *mm = &memory_client_main;
- if ((rv = vl_client_api_map (svm_name)))
+ if (do_map && (rv = vl_client_api_map (svm_name)))
{
clib_warning ("vl_client_api map rv %d", rv);
return rv;
@@ -393,7 +421,8 @@ vl_client_connect_to_vlib (const char *svm_name,
const char *client_name, int rx_queue_size)
{
return connect_to_vlib_internal (svm_name, client_name, rx_queue_size,
- 1 /* want pthread */ );
+ 1 /* want pthread */ ,
+ 1 /* do map */ );
}
int
@@ -402,7 +431,17 @@ vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name,
int rx_queue_size)
{
return connect_to_vlib_internal (svm_name, client_name, rx_queue_size,
- 0 /* want pthread */ );
+ 0 /* want pthread */ ,
+ 1 /* do map */ );
+}
+
+int
+vl_client_connect_to_vlib_no_map (const char *svm_name,
+ const char *client_name, int rx_queue_size)
+{
+ return connect_to_vlib_internal (svm_name, client_name, rx_queue_size,
+ 1 /* want pthread */ ,
+ 0 /* dont map */ );
}
void
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
index 8c6469080d7..021c54ef953 100644
--- a/src/vlibmemory/memory_shared.c
+++ b/src/vlibmemory/memory_shared.c
@@ -39,6 +39,10 @@
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_typedefs
+socket_main_t socket_main;
+
+#define DEBUG_MESSAGE_BUFFER_OVERRUN 0
+
static inline void *
vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null)
{
@@ -52,6 +56,10 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null)
shmem_hdr = am->shmem_hdr;
+#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0
+ nbytes += 4;
+#endif
+
if (shmem_hdr == 0)
{
clib_warning ("shared memory header NULL");
@@ -172,7 +180,16 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null)
pthread_mutex_unlock (&am->vlib_rp->mutex);
out:
+#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0
+ {
+ nbytes -= 4;
+ u32 *overrun;
+ overrun = (u32 *) (rv->data + nbytes - sizeof (msgbuf_t));
+ *overrun = 0x1badbabe;
+ }
+#endif
rv->data_len = htonl (nbytes - sizeof (msgbuf_t));
+
return (rv->data);
}
@@ -231,11 +248,27 @@ vl_msg_api_free (void *a)
{
rv->q = 0;
rv->gc_mark_timestamp = 0;
+#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0
+ {
+ u32 *overrun;
+ overrun = (u32 *) (rv->data + ntohl (rv->data_len));
+ ASSERT (*overrun == 0x1badbabe);
+ }
+#endif
return;
}
pthread_mutex_lock (&am->vlib_rp->mutex);
oldheap = svm_push_data_heap (am->vlib_rp);
+
+#if DEBUG_MESSAGE_BUFFER_OVERRUN > 0
+ {
+ u32 *overrun;
+ overrun = (u32 *) (rv->data + ntohl (rv->data_len));
+ ASSERT (*overrun == 0x1badbabe);
+ }
+#endif
+
clib_mem_free (rv);
svm_pop_heap (oldheap);
pthread_mutex_unlock (&am->vlib_rp->mutex);
@@ -329,17 +362,91 @@ vl_set_api_pvt_heap_size (u64 size)
am->api_pvt_heap_size = size;
}
+void
+vl_init_shmem (svm_region_t * vlib_rp, int is_vlib, int is_private_region)
+{
+ api_main_t *am = &api_main;
+ vl_shmem_hdr_t *shmem_hdr = 0;
+ u32 vlib_input_queue_length;
+ void *oldheap;
+ ASSERT (vlib_rp);
+
+ /* $$$$ need private region config parameters */
+
+ oldheap = svm_push_data_heap (vlib_rp);
+
+ vec_validate (shmem_hdr, 0);
+ shmem_hdr->version = VL_SHM_VERSION;
+
+ /* vlib main input queue */
+ vlib_input_queue_length = 1024;
+ if (am->vlib_input_queue_length)
+ vlib_input_queue_length = am->vlib_input_queue_length;
+
+ shmem_hdr->vl_input_queue =
+ unix_shared_memory_queue_init (vlib_input_queue_length, sizeof (uword),
+ getpid (), am->vlib_signal);
+
+ /* Set up the msg ring allocator */
+#define _(sz,n) \
+ do { \
+ ring_alloc_t _rp; \
+ _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \
+ _rp.size = (sz); \
+ _rp.nitems = n; \
+ _rp.hits = 0; \
+ _rp.misses = 0; \
+ vec_add1(shmem_hdr->vl_rings, _rp); \
+ } while (0);
+
+ foreach_vl_aring_size;
+#undef _
+
+#define _(sz,n) \
+ do { \
+ ring_alloc_t _rp; \
+ _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \
+ _rp.size = (sz); \
+ _rp.nitems = n; \
+ _rp.hits = 0; \
+ _rp.misses = 0; \
+ vec_add1(shmem_hdr->client_rings, _rp); \
+ } while (0);
+
+ foreach_clnt_aring_size;
+#undef _
+
+ if (is_private_region == 0)
+ {
+ am->shmem_hdr = shmem_hdr;
+ am->vlib_rp = vlib_rp;
+ am->our_pid = getpid ();
+ if (is_vlib)
+ am->shmem_hdr->vl_pid = am->our_pid;
+ }
+ else
+ shmem_hdr->vl_pid = am->our_pid;
+
+ svm_pop_heap (oldheap);
+
+ /*
+ * After absolutely everything that a client might see is set up,
+ * declare the shmem region valid
+ */
+ vlib_rp->user_ctx = shmem_hdr;
+
+ pthread_mutex_unlock (&vlib_rp->mutex);
+}
+
+
int
vl_map_shmem (const char *region_name, int is_vlib)
{
svm_map_region_args_t _a, *a = &_a;
svm_region_t *vlib_rp, *root_rp;
- void *oldheap;
- vl_shmem_hdr_t *shmem_hdr = 0;
api_main_t *am = &api_main;
int i, rv;
struct timespec ts, tsrem;
- u32 vlib_input_queue_length;
char *vpe_api_region_suffix = "-vpe-api";
memset (a, 0, sizeof (*a));
@@ -472,65 +579,8 @@ vl_map_shmem (const char *region_name, int is_vlib)
}
/* Nope, it's our problem... */
+ vl_init_shmem (vlib_rp, 1 /* is vlib */ , 0 /* is_private_region */ );
- oldheap = svm_push_data_heap (vlib_rp);
-
- vec_validate (shmem_hdr, 0);
- shmem_hdr->version = VL_SHM_VERSION;
-
- /* vlib main input queue */
- vlib_input_queue_length = 1024;
- if (am->vlib_input_queue_length)
- vlib_input_queue_length = am->vlib_input_queue_length;
-
- shmem_hdr->vl_input_queue =
- unix_shared_memory_queue_init (vlib_input_queue_length, sizeof (uword),
- getpid (), am->vlib_signal);
-
- /* Set up the msg ring allocator */
-#define _(sz,n) \
- do { \
- ring_alloc_t _rp; \
- _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \
- _rp.size = (sz); \
- _rp.nitems = n; \
- _rp.hits = 0; \
- _rp.misses = 0; \
- vec_add1(shmem_hdr->vl_rings, _rp); \
- } while (0);
-
- foreach_vl_aring_size;
-#undef _
-
-#define _(sz,n) \
- do { \
- ring_alloc_t _rp; \
- _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \
- _rp.size = (sz); \
- _rp.nitems = n; \
- _rp.hits = 0; \
- _rp.misses = 0; \
- vec_add1(shmem_hdr->client_rings, _rp); \
- } while (0);
-
- foreach_clnt_aring_size;
-#undef _
-
- am->shmem_hdr = shmem_hdr;
- am->vlib_rp = vlib_rp;
- am->our_pid = getpid ();
- if (is_vlib)
- am->shmem_hdr->vl_pid = am->our_pid;
-
- svm_pop_heap (oldheap);
-
- /*
- * After absolutely everything that a client might see is set up,
- * declare the shmem region valid
- */
- vlib_rp->user_ctx = shmem_hdr;
-
- pthread_mutex_unlock (&vlib_rp->mutex);
vec_add1 (am->mapped_shmem_regions, vlib_rp);
return 0;
}
@@ -638,6 +688,9 @@ vl_api_client_index_to_registration_internal (u32 handle)
vl_api_registration_t *
vl_api_client_index_to_registration (u32 index)
{
+ if (PREDICT_FALSE (socket_main.current_rp != 0))
+ return socket_main.current_rp;
+
return (vl_api_client_index_to_registration_internal (index));
}
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index d305ea619aa..c9b3183f592 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -96,17 +96,7 @@ vl_api_trace_plugin_msg_ids_t_print (vl_api_trace_plugin_msg_ids_t * a,
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_endianfun
-void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
- __attribute__ ((weak));
-
-void
-vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
-{
- static int count;
-
- if (count++ < 5)
- clib_warning ("need to link against -lvlibsocket, msg not sent!");
-}
+extern void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem);
void
vl_msg_api_send (vl_api_registration_t * rp, u8 * elem)
@@ -117,7 +107,7 @@ vl_msg_api_send (vl_api_registration_t * rp, u8 * elem)
}
else
{
- vl_msg_api_send_shmem (rp->vl_input_queue, elem);
+ vl_msg_api_send_shmem (rp->vl_input_queue, (u8 *) & elem);
}
}
@@ -196,6 +186,7 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
int rv = 0;
void *oldheap;
api_main_t *am = &api_main;
+ u8 *serialized_message_table_in_shmem;
/*
* This is tortured. Maintain a vlib-address-space private
@@ -235,6 +226,8 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
memset (regp, 0, sizeof (*regp));
regp->registration_type = REGISTRATION_TYPE_SHMEM;
regp->vl_api_registration_pool_index = regpp - am->vl_clients;
+ regp->vlib_rp = svm;
+ regp->shmem_hdr = am->shmem_hdr;
q = regp->vl_input_queue = (unix_shared_memory_queue_t *) (uword)
mp->input_queue;
@@ -242,11 +235,11 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
regp->name = format (0, "%s", mp->name);
vec_add1 (regp->name, 0);
+ serialized_message_table_in_shmem = vl_api_serialize_message_table (am, 0);
+
pthread_mutex_unlock (&svm->mutex);
svm_pop_heap (oldheap);
- ASSERT (am->serialized_message_table_in_shmem);
-
rp = vl_msg_api_alloc (sizeof (*rp));
rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE_REPLY);
rp->handle = (uword) regp;
@@ -255,8 +248,7 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
am->shmem_hdr->application_restarts);
rp->context = mp->context;
rp->response = ntohl (rv);
- rp->message_table =
- pointer_to_uword (am->serialized_message_table_in_shmem);
+ rp->message_table = pointer_to_uword (serialized_message_table_in_shmem);
vl_msg_api_send_shmem (q, (u8 *) & rp);
}
@@ -313,11 +305,15 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp)
if (!pool_is_free (am->vl_clients, regpp))
{
+ int i;
regp = *regpp;
svm = am->vlib_rp;
+ int private_registration = 0;
- /* $$$ check the input queue for e.g. punted sf's */
-
+ /*
+ * Note: the API message handling path will set am->vlib_rp
+ * as appropriate for pairwise / private memory segments
+ */
rp = vl_msg_api_alloc (sizeof (*rp));
rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE_REPLY);
rp->handle = mp->handle;
@@ -333,18 +329,56 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp)
return;
}
+ /* For horizontal scaling, add a hash table... */
+ for (i = 0; i < vec_len (am->vlib_private_rps); i++)
+ {
+ /* Is this a pairwise / private API segment? */
+ if (am->vlib_private_rps[i] == svm)
+ {
+ /* Note: account for the memfd header page */
+ u64 virtual_base = svm->virtual_base - MMAP_PAGESIZE;
+ u64 virtual_size = svm->virtual_size + MMAP_PAGESIZE;
+
+ /*
+ * Kill the registration pool element before we make
+ * the index vanish forever
+ */
+ pool_put_index (am->vl_clients,
+ regp->vl_api_registration_pool_index);
+
+ vec_delete (am->vlib_private_rps, 1, i);
+ /* Kill it, accounting for the memfd header page */
+ if (munmap ((void *) virtual_base, virtual_size) < 0)
+ clib_unix_warning ("munmap");
+ /* Reset the queue-length-address cache */
+ vec_reset_length (vl_api_queue_cursizes);
+ private_registration = 1;
+ break;
+ }
+ }
+
/* No dangling references, please */
*regpp = 0;
- pool_put_index (am->vl_clients, regp->vl_api_registration_pool_index);
-
- pthread_mutex_lock (&svm->mutex);
- oldheap = svm_push_data_heap (svm);
- /* Poison the old registration */
- memset (regp, 0xF1, sizeof (*regp));
- clib_mem_free (regp);
- pthread_mutex_unlock (&svm->mutex);
- svm_pop_heap (oldheap);
+ if (private_registration == 0)
+ {
+ pool_put_index (am->vl_clients,
+ regp->vl_api_registration_pool_index);
+ pthread_mutex_lock (&svm->mutex);
+ oldheap = svm_push_data_heap (svm);
+ /* Poison the old registration */
+ memset (regp, 0xF1, sizeof (*regp));
+ clib_mem_free (regp);
+ pthread_mutex_unlock (&svm->mutex);
+ svm_pop_heap (oldheap);
+ /*
+ * These messages must be freed manually, since they're set up
+ * as "bounce" messages. In the private_registration == 1 case,
+ * we kill the shared-memory segment which contains the message
+ * with munmap.
+ */
+ vl_msg_api_free (mp);
+ }
}
else
{
@@ -392,10 +426,54 @@ out:
vl_msg_api_send_shmem (q, (u8 *) & rmp);
}
-#define foreach_vlib_api_msg \
-_(MEMCLNT_CREATE, memclnt_create) \
-_(MEMCLNT_DELETE, memclnt_delete) \
-_(GET_FIRST_MSG_ID, get_first_msg_id)
+/**
+ * client answered a ping, stave off the grim reaper...
+ */
+
+void
+ vl_api_memclnt_keepalive_reply_t_handler
+ (vl_api_memclnt_keepalive_reply_t * mp)
+{
+ vl_api_registration_t *regp;
+ vlib_main_t *vm = vlib_get_main ();
+
+ regp = vl_api_client_index_to_registration (mp->context);
+ if (regp)
+ {
+ regp->last_heard = vlib_time_now (vm);
+ regp->unanswered_pings = 0;
+ }
+ else
+ clib_warning ("BUG: anonymous memclnt_keepalive_reply");
+}
+
+/**
+ * We can send ourselves these messages if someone uses the
+ * builtin binary api test tool...
+ */
+static void
+vl_api_memclnt_keepalive_t_handler (vl_api_memclnt_keepalive_t * mp)
+{
+ vl_api_memclnt_keepalive_reply_t *rmp;
+ api_main_t *am;
+ vl_shmem_hdr_t *shmem_hdr;
+
+ am = &api_main;
+ shmem_hdr = am->shmem_hdr;
+
+ rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_MEMCLNT_KEEPALIVE_REPLY);
+ rmp->context = mp->context;
+ vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & rmp);
+}
+
+#define foreach_vlib_api_msg \
+_(MEMCLNT_CREATE, memclnt_create) \
+_(MEMCLNT_DELETE, memclnt_delete) \
+_(GET_FIRST_MSG_ID, get_first_msg_id) \
+_(MEMCLNT_KEEPALIVE, memclnt_keepalive) \
+_(MEMCLNT_KEEPALIVE_REPLY, memclnt_keepalive_reply)
/*
* vl_api_init
@@ -404,6 +482,7 @@ static int
memory_api_init (const char *region_name)
{
int rv;
+ api_main_t *am = &api_main;
vl_msg_api_msg_config_t cfg;
vl_msg_api_msg_config_t *c = &cfg;
@@ -428,6 +507,13 @@ memory_api_init (const char *region_name)
foreach_vlib_api_msg;
#undef _
+ /*
+ * special-case freeing of memclnt_delete messages, so we can
+ * simply munmap pairwise / private API segments...
+ */
+ am->message_bounce[VL_API_MEMCLNT_DELETE] = 1;
+ am->is_mp_safe[VL_API_MEMCLNT_KEEPALIVE_REPLY] = 1;
+
return 0;
}
@@ -474,6 +560,203 @@ send_one_plugin_msg_ids_msg (u8 * name, u16 first_msg_id, u16 last_msg_id)
vl_msg_api_send_shmem (q, (u8 *) & mp);
}
+static void
+send_memclnt_keepalive (vl_api_registration_t * regp, f64 now)
+{
+ vl_api_memclnt_keepalive_t *mp;
+ unix_shared_memory_queue_t *q;
+ api_main_t *am = &api_main;
+ svm_region_t *save_vlib_rp = am->vlib_rp;
+ vl_shmem_hdr_t *save_shmem_hdr = am->shmem_hdr;
+
+ q = regp->vl_input_queue;
+
+ /*
+ * If the queue head is moving, assume that the client is processing
+ * messages and skip the ping. This heuristic may fail if the queue
+ * is in the same position as last time, net of wrapping; in which
+ * case, the client will receive a keepalive.
+ */
+ if (regp->last_queue_head != q->head)
+ {
+ regp->last_heard = now;
+ regp->unanswered_pings = 0;
+ regp->last_queue_head = q->head;
+ return;
+ }
+
+ /*
+ * push/pop shared memory segment, so this routine
+ * will work with "normal" as well as "private segment"
+ * memory clients..
+ */
+
+ am->vlib_rp = regp->vlib_rp;
+ am->shmem_hdr = regp->shmem_hdr;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MEMCLNT_KEEPALIVE);
+ mp->context = mp->client_index =
+ vl_msg_api_handle_from_index_and_epoch
+ (regp->vl_api_registration_pool_index,
+ am->shmem_hdr->application_restarts);
+
+ regp->unanswered_pings++;
+
+ /* Failure-to-send due to a stuffed queue is absolutely expected */
+ if (unix_shared_memory_queue_add (q, (u8 *) & mp, 1 /* nowait */ ))
+ vl_msg_api_free (mp);
+
+ am->vlib_rp = save_vlib_rp;
+ am->shmem_hdr = save_shmem_hdr;
+}
+
+static void
+dead_client_scan (api_main_t * am, vl_shmem_hdr_t * shm, f64 now)
+{
+
+ vl_api_registration_t **regpp;
+ vl_api_registration_t *regp;
+ static u32 *dead_indices;
+ static u32 *confused_indices;
+
+ vec_reset_length (dead_indices);
+ vec_reset_length (confused_indices);
+
+ /* *INDENT-OFF* */
+ pool_foreach (regpp, am->vl_clients,
+ ({
+ regp = *regpp;
+ if (regp)
+ {
+ /* If we haven't heard from this client recently... */
+ if (regp->last_heard < (now - 10.0))
+ {
+ if (regp->unanswered_pings == 2)
+ {
+ unix_shared_memory_queue_t *q;
+ q = regp->vl_input_queue;
+ if (kill (q->consumer_pid, 0) >=0)
+ {
+ clib_warning ("REAPER: lazy binary API client '%s'",
+ regp->name);
+ regp->unanswered_pings = 0;
+ regp->last_heard = now;
+ }
+ else
+ {
+ clib_warning ("REAPER: binary API client '%s' died",
+ regp->name);
+ vec_add1(dead_indices, regpp - am->vl_clients);
+ }
+ }
+ else
+ send_memclnt_keepalive (regp, now);
+ }
+ else
+ regp->unanswered_pings = 0;
+ }
+ else
+ {
+ clib_warning ("NULL client registration index %d",
+ regpp - am->vl_clients);
+ vec_add1 (confused_indices, regpp - am->vl_clients);
+ }
+ }));
+ /* *INDENT-ON* */
+ /* This should "never happen," but if it does, fix it... */
+ if (PREDICT_FALSE (vec_len (confused_indices) > 0))
+ {
+ int i;
+ for (i = 0; i < vec_len (confused_indices); i++)
+ {
+ pool_put_index (am->vl_clients, confused_indices[i]);
+ }
+ }
+
+ if (PREDICT_FALSE (vec_len (dead_indices) > 0))
+ {
+ int i;
+ svm_region_t *svm;
+ void *oldheap;
+
+ /* Allow the application to clean up its registrations */
+ for (i = 0; i < vec_len (dead_indices); i++)
+ {
+ regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]);
+ if (regpp)
+ {
+ u32 handle;
+
+ handle = vl_msg_api_handle_from_index_and_epoch
+ (dead_indices[i], shm->application_restarts);
+ (void) call_reaper_functions (handle);
+ }
+ }
+
+ svm = am->vlib_rp;
+ pthread_mutex_lock (&svm->mutex);
+ oldheap = svm_push_data_heap (svm);
+
+ for (i = 0; i < vec_len (dead_indices); i++)
+ {
+ regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]);
+ if (regpp)
+ {
+ /* Is this a pairwise SVM segment? */
+ if ((*regpp)->vlib_rp != svm)
+ {
+ int i;
+ svm_region_t *dead_rp = (*regpp)->vlib_rp;
+ /* Note: account for the memfd header page */
+ u64 virtual_base = dead_rp->virtual_base - MMAP_PAGESIZE;
+ u64 virtual_size = dead_rp->virtual_size + MMAP_PAGESIZE;
+
+ /* For horizontal scaling, add a hash table... */
+ for (i = 0; i < vec_len (am->vlib_private_rps); i++)
+ if (am->vlib_private_rps[i] == dead_rp)
+ {
+ vec_delete (am->vlib_private_rps, 1, i);
+ goto found;
+ }
+ clib_warning ("private rp %llx AWOL", dead_rp);
+
+ found:
+ /* Kill it, accounting for the memfd header page */
+ if (munmap ((void *) virtual_base, virtual_size) < 0)
+ clib_unix_warning ("munmap");
+ /* Reset the queue-length-address cache */
+ vec_reset_length (vl_api_queue_cursizes);
+ }
+ else
+ {
+ /* Poison the old registration */
+ memset (*regpp, 0xF3, sizeof (**regpp));
+ clib_mem_free (*regpp);
+ }
+ /* no dangling references, please */
+ *regpp = 0;
+ }
+ else
+ {
+ svm_pop_heap (oldheap);
+ clib_warning ("Duplicate free, client index %d",
+ regpp - am->vl_clients);
+ oldheap = svm_push_data_heap (svm);
+ }
+ }
+
+ svm_client_scan_this_region_nolock (am->vlib_rp);
+
+ pthread_mutex_unlock (&svm->mutex);
+ svm_pop_heap (oldheap);
+ for (i = 0; i < vec_len (dead_indices); i++)
+ pool_put_index (am->vl_clients, dead_indices[i]);
+ }
+}
+
+
static uword
memclnt_process (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * f)
@@ -487,17 +770,29 @@ memclnt_process (vlib_main_t * vm,
f64 dead_client_scan_time;
f64 sleep_time, start_time;
f64 vector_rate;
+ clib_error_t *socksvr_api_init (vlib_main_t * vm);
+ clib_error_t *error;
int i;
- u8 *serialized_message_table = 0;
- svm_region_t *svm;
- void *oldheap;
+ vl_socket_args_for_process_t *a;
+ uword event_type;
+ uword *event_data = 0;
+ int private_segment_rotor = 0;
+ svm_region_t *vlib_rp;
+ f64 now;
vlib_set_queue_signal_callback (vm, memclnt_queue_callback);
if ((rv = memory_api_init (am->region_name)) < 0)
{
- clib_warning ("memory_api_init returned %d, wait for godot...", rv);
- vlib_process_suspend (vm, 1e70);
+ clib_warning ("memory_api_init returned %d, quitting...", rv);
+ return 0;
+ }
+
+ if ((error = socksvr_api_init (vm)))
+ {
+ clib_error_report (error);
+ clib_warning ("socksvr_api_init failed, quitting...");
+ return 0;
}
shm = am->shmem_hdr;
@@ -510,8 +805,8 @@ memclnt_process (vlib_main_t * vm,
if (e)
clib_error_report (e);
- sleep_time = 20.0;
- dead_client_scan_time = vlib_time_now (vm) + 20.0;
+ sleep_time = 10.0;
+ dead_client_scan_time = vlib_time_now (vm) + 10.0;
/*
* Send plugin message range messages for each plugin we loaded
@@ -524,26 +819,17 @@ memclnt_process (vlib_main_t * vm,
}
/*
- * Snapshoot the api message table.
- */
- serialized_message_table = vl_api_serialize_message_table (am, 0);
-
- svm = am->vlib_rp;
- pthread_mutex_lock (&svm->mutex);
- oldheap = svm_push_data_heap (svm);
-
- am->serialized_message_table_in_shmem = vec_dup (serialized_message_table);
-
- pthread_mutex_unlock (&svm->mutex);
- svm_pop_heap (oldheap);
-
- /*
* Save the api message table snapshot, if configured
*/
if (am->save_msg_table_filename)
{
int fd, rv;
u8 *chroot_file;
+ u8 *serialized_message_table;
+
+ /*
+ * Snapshoot the api message table.
+ */
if (strstr ((char *) am->save_msg_table_filename, "..")
|| index ((char *) am->save_msg_table_filename, '/'))
{
@@ -561,6 +847,9 @@ memclnt_process (vlib_main_t * vm,
clib_unix_warning ("creat");
goto skip_save;
}
+
+ serialized_message_table = vl_api_serialize_message_table (am, 0);
+
rv = write (fd, serialized_message_table,
vec_len (serialized_message_table));
@@ -572,15 +861,14 @@ memclnt_process (vlib_main_t * vm,
clib_unix_warning ("close");
vec_free (chroot_file);
+ vec_free (serialized_message_table);
}
skip_save:
- vec_free (serialized_message_table);
/* $$$ pay attention to frame size, control CPU usage */
while (1)
{
- uword event_type __attribute__ ((unused));
i8 *headp;
int need_broadcast;
@@ -665,104 +953,89 @@ skip_save:
}
}
- event_type = vlib_process_wait_for_event_or_clock (vm, sleep_time);
- vm->queue_signal_pending = 0;
- vlib_process_get_events (vm, 0 /* event_data */ );
-
- if (vlib_time_now (vm) > dead_client_scan_time)
+ /*
+ * see if we have any private api shared-memory segments
+ * If so, push required context variables, and process
+ * a message.
+ */
+ if (PREDICT_FALSE (vec_len (am->vlib_private_rps)))
{
- vl_api_registration_t **regpp;
- vl_api_registration_t *regp;
- unix_shared_memory_queue_t *q;
- static u32 *dead_indices;
- static u32 *confused_indices;
+ unix_shared_memory_queue_t *save_vlib_input_queue = q;
+ vl_shmem_hdr_t *save_shmem_hdr = am->shmem_hdr;
+ svm_region_t *save_vlib_rp = am->vlib_rp;
- vec_reset_length (dead_indices);
- vec_reset_length (confused_indices);
+ vlib_rp = am->vlib_rp = am->vlib_private_rps[private_segment_rotor];
- /* *INDENT-OFF* */
- pool_foreach (regpp, am->vl_clients,
- ({
- regp = *regpp;
- if (regp)
- {
- q = regp->vl_input_queue;
- if (kill (q->consumer_pid, 0) < 0)
- {
- vec_add1(dead_indices, regpp - am->vl_clients);
- }
- }
- else
- {
- clib_warning ("NULL client registration index %d",
- regpp - am->vl_clients);
- vec_add1 (confused_indices, regpp - am->vl_clients);
- }
- }));
- /* *INDENT-ON* */
- /* This should "never happen," but if it does, fix it... */
- if (PREDICT_FALSE (vec_len (confused_indices) > 0))
- {
- int i;
- for (i = 0; i < vec_len (confused_indices); i++)
- {
- pool_put_index (am->vl_clients, confused_indices[i]);
- }
- }
+ am->shmem_hdr = (void *) vlib_rp->user_ctx;
+ q = am->shmem_hdr->vl_input_queue;
- if (PREDICT_FALSE (vec_len (dead_indices) > 0))
+ pthread_mutex_lock (&q->mutex);
+ if (q->cursize > 0)
{
- int i;
- svm_region_t *svm;
- void *oldheap;
+ headp = (i8 *) (q->data + sizeof (uword) * q->head);
+ clib_memcpy (&mp, headp, sizeof (uword));
- /* Allow the application to clean up its registrations */
- for (i = 0; i < vec_len (dead_indices); i++)
- {
- regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]);
- if (regpp)
- {
- u32 handle;
-
- handle = vl_msg_api_handle_from_index_and_epoch
- (dead_indices[i], shm->application_restarts);
- (void) call_reaper_functions (handle);
- }
- }
+ q->head++;
+ need_broadcast = (q->cursize == q->maxsize / 2);
+ q->cursize--;
- svm = am->vlib_rp;
- pthread_mutex_lock (&svm->mutex);
- oldheap = svm_push_data_heap (svm);
+ if (PREDICT_FALSE (q->head == q->maxsize))
+ q->head = 0;
+ pthread_mutex_unlock (&q->mutex);
+ if (need_broadcast)
+ (void) pthread_cond_broadcast (&q->condvar);
- for (i = 0; i < vec_len (dead_indices); i++)
- {
- regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]);
- if (regpp)
- {
- /* Poison the old registration */
- memset (*regpp, 0xF3, sizeof (**regpp));
- clib_mem_free (*regpp);
- /* no dangling references, please */
- *regpp = 0;
- }
- else
- {
- svm_pop_heap (oldheap);
- clib_warning ("Duplicate free, client index %d",
- regpp - am->vl_clients);
- oldheap = svm_push_data_heap (svm);
- }
- }
+ pthread_mutex_unlock (&q->mutex);
- svm_client_scan_this_region_nolock (am->vlib_rp);
+ vl_msg_api_handler_with_vm_node (am, (void *) mp, vm, node);
+ }
+ else
+ pthread_mutex_unlock (&q->mutex);
- pthread_mutex_unlock (&svm->mutex);
- svm_pop_heap (oldheap);
- for (i = 0; i < vec_len (dead_indices); i++)
- pool_put_index (am->vl_clients, dead_indices[i]);
+ q = save_vlib_input_queue;
+ am->shmem_hdr = save_shmem_hdr;
+ am->vlib_rp = save_vlib_rp;
+
+ private_segment_rotor++;
+ if (private_segment_rotor >= vec_len (am->vlib_private_rps))
+ private_segment_rotor = 0;
+ }
+
+ vlib_process_wait_for_event_or_clock (vm, sleep_time);
+ vec_reset_length (event_data);
+ event_type = vlib_process_get_events (vm, &event_data);
+ now = vlib_time_now (vm);
+
+ switch (event_type)
+ {
+ case QUEUE_SIGNAL_EVENT:
+ vm->queue_signal_pending = 0;
+ break;
+
+ case SOCKET_READ_EVENT:
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ a = pool_elt_at_index (socket_main.process_args, event_data[i]);
+ vl_api_socket_process_msg (a->clib_file, a->regp,
+ (i8 *) a->data);
+ vec_free (a->data);
+ pool_put (socket_main.process_args, a);
}
+ break;
- dead_client_scan_time = vlib_time_now (vm) + 20.0;
+ /* Timeout... */
+ case -1:
+ break;
+
+ default:
+ clib_warning ("unknown event type %d", event_type);
+ break;
+ }
+
+ if (now > dead_client_scan_time)
+ {
+ dead_client_scan (am, shm, now);
+ dead_client_scan_time = vlib_time_now (vm) + 10.0;
}
if (TRACE_VLIB_MEMORY_QUEUE)
@@ -785,11 +1058,12 @@ skip_save:
return 0;
}
/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (memclnt_node,static) = {
- .function = memclnt_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "api-rx-from-ring",
- .state = VLIB_NODE_STATE_DISABLED,
+VLIB_REGISTER_NODE (memclnt_node) =
+{
+ .function = memclnt_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "api-rx-from-ring",
+ .state = VLIB_NODE_STATE_DISABLED,
};
/* *INDENT-ON* */
@@ -865,14 +1139,17 @@ VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) =
};
/* *INDENT-ON* */
+volatile int **vl_api_queue_cursizes;
+
static void
memclnt_queue_callback (vlib_main_t * vm)
{
- static volatile int *cursizep;
+ int i;
+ api_main_t *am = &api_main;
- if (PREDICT_FALSE (cursizep == 0))
+ if (PREDICT_FALSE (vec_len (vl_api_queue_cursizes) !=
+ 1 + vec_len (am->vlib_private_rps)))
{
- api_main_t *am = &api_main;
vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
unix_shared_memory_queue_t *q;
@@ -882,15 +1159,30 @@ memclnt_queue_callback (vlib_main_t * vm)
q = shmem_hdr->vl_input_queue;
if (q == 0)
return;
- cursizep = &q->cursize;
+
+ vec_add1 (vl_api_queue_cursizes, &q->cursize);
+
+ for (i = 0; i < vec_len (am->vlib_private_rps); i++)
+ {
+ svm_region_t *vlib_rp = am->vlib_private_rps[i];
+
+ shmem_hdr = (void *) vlib_rp->user_ctx;
+ q = shmem_hdr->vl_input_queue;
+ vec_add1 (vl_api_queue_cursizes, &q->cursize);
+ }
}
- if (*cursizep >= 1)
+ for (i = 0; i < vec_len (vl_api_queue_cursizes); i++)
{
- vm->queue_signal_pending = 1;
- vm->api_queue_nonempty = 1;
- vlib_process_signal_event (vm, memclnt_node.index,
- /* event_type */ 0, /* event_data */ 0);
+ if (*vl_api_queue_cursizes[i])
+ {
+ vm->queue_signal_pending = 1;
+ vm->api_queue_nonempty = 1;
+ vlib_process_signal_event (vm, memclnt_node.index,
+ /* event_type */ QUEUE_SIGNAL_EVENT,
+ /* event_data */ 0);
+ break;
+ }
}
}
@@ -971,13 +1263,55 @@ setup_memclnt_exit (vlib_main_t * vm)
VLIB_INIT_FUNCTION (setup_memclnt_exit);
+u8 *
+format_api_message_rings (u8 * s, va_list * args)
+{
+ api_main_t *am = va_arg (*args, api_main_t *);
+ vl_shmem_hdr_t *shmem_hdr = va_arg (*args, vl_shmem_hdr_t *);
+ int main_segment = va_arg (*args, int);
+ ring_alloc_t *ap;
+ int i;
+
+ if (shmem_hdr == 0)
+ return format (s, "%8s %8s %8s %8s %8s\n",
+ "Owner", "Size", "Nitems", "Hits", "Misses");
+
+ ap = shmem_hdr->vl_rings;
+
+ for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++)
+ {
+ s = format (s, "%8s %8d %8d %8d %8d\n",
+ "vlib", ap->size, ap->nitems, ap->hits, ap->misses);
+ ap++;
+ }
+
+ ap = shmem_hdr->client_rings;
+
+ for (i = 0; i < vec_len (shmem_hdr->client_rings); i++)
+ {
+ s = format (s, "%8s %8d %8d %8d %8d\n",
+ "clnt", ap->size, ap->nitems, ap->hits, ap->misses);
+ ap++;
+ }
+
+ if (main_segment)
+ {
+ s = format (s, "%d ring miss fallback allocations\n", am->ring_misses);
+ s = format
+ (s,
+ "%d application restarts, %d reclaimed msgs, %d garbage collects\n",
+ shmem_hdr->application_restarts, shmem_hdr->restart_reclaims,
+ shmem_hdr->garbage_collects);
+ }
+ return s;
+}
+
static clib_error_t *
vl_api_ring_command (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cli_cmd)
{
int i;
- ring_alloc_t *ap;
vl_shmem_hdr_t *shmem_hdr;
api_main_t *am = &api_main;
@@ -989,34 +1323,38 @@ vl_api_ring_command (vlib_main_t * vm,
return 0;
}
- vlib_cli_output (vm, "%8s %8s %8s %8s %8s\n",
- "Owner", "Size", "Nitems", "Hits", "Misses");
-
- ap = shmem_hdr->vl_rings;
+ vlib_cli_output (vm, "Main API segment rings:");
- for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++)
- {
- vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n",
- "vlib", ap->size, ap->nitems, ap->hits, ap->misses);
- ap++;
- }
+ vlib_cli_output (vm, "%U", format_api_message_rings, am,
+ 0 /* print header */ , 0 /* notused */ );
- ap = shmem_hdr->client_rings;
+ vlib_cli_output (vm, "%U", format_api_message_rings, am,
+ shmem_hdr, 1 /* main segment */ );
- for (i = 0; i < vec_len (shmem_hdr->client_rings); i++)
+ for (i = 0; i < vec_len (am->vlib_private_rps); i++)
{
- vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n",
- "clnt", ap->size, ap->nitems, ap->hits, ap->misses);
- ap++;
+ svm_region_t *vlib_rp = am->vlib_private_rps[i];
+ shmem_hdr = (void *) vlib_rp->user_ctx;
+ vl_api_registration_t **regpp;
+ vl_api_registration_t *regp;
+
+ /* For horizontal scaling, add a hash table... */
+ /* *INDENT-OFF* */
+ pool_foreach (regpp, am->vl_clients,
+ ({
+ regp = *regpp;
+ if (regp && regp->vlib_rp == vlib_rp)
+ {
+ vlib_cli_output (vm, "%s segment rings:", regp->name);
+ goto found;
+ }
+ }));
+ /* *INDENT-ON* */
+ found:
+ vlib_cli_output (vm, "%U", format_api_message_rings, am,
+ shmem_hdr, 0 /* main segment */ );
}
- vlib_cli_output (vm, "%d ring miss fallback allocations\n",
- am->ring_misses);
-
- vlib_cli_output
- (vm, "%d application restarts, %d reclaimed msgs, %d garbage collects\n",
- shmem_hdr->application_restarts,
- shmem_hdr->restart_reclaims, shmem_hdr->garbage_collects);
return 0;
}
@@ -1051,15 +1389,13 @@ vl_api_client_command (vlib_main_t * vm,
if (regp)
{
- q = regp->vl_input_queue;
- if (kill (q->consumer_pid, 0) < 0)
- {
- health = "DEAD";
- }
+ if (regp->unanswered_pings > 0)
+ health = "questionable";
else
- {
- health = "alive";
- }
+ health = "OK";
+
+ q = regp->vl_input_queue;
+
vlib_cli_output (vm, "%16s %8d %14d 0x%016llx %s\n",
regp->name, q->consumer_pid, q->cursize,
q, health);
@@ -1306,6 +1642,7 @@ vlibmemory_init (vlib_main_t * vm)
{
api_main_t *am = &api_main;
svm_map_region_args_t _a, *a = &_a;
+ clib_error_t *error;
memset (a, 0, sizeof (*a));
a->root_path = am->root_path;
@@ -1321,7 +1658,10 @@ vlibmemory_init (vlib_main_t * vm)
0) ? am->global_pvt_heap_size : SVM_PVT_MHEAP_SIZE;
svm_region_init_args (a);
- return 0;
+
+ error = vlib_call_init_function (vm, vlibsocket_init);
+
+ return error;
}
VLIB_INIT_FUNCTION (vlibmemory_init);
@@ -2227,7 +2567,7 @@ dump_api_table_file_command_fn (vlib_main_t * vm,
/* Load the serialized message table from the table dump */
- error = unserialize_open_unix_file (sm, (char *) filename);
+ error = unserialize_open_clib_file (sm, (char *) filename);
if (error)
return error;
@@ -2251,7 +2591,7 @@ dump_api_table_file_command_fn (vlib_main_t * vm,
if (compare_current)
{
/* Append the current message table */
- u8 *tblv = vec_dup (am->serialized_message_table_in_shmem);
+ u8 *tblv = vl_api_serialize_message_table (am, 0);
serialize_open_vector (sm, tblv);
unserialize_integer (sm, &nmsgs, sizeof (u32));
@@ -2268,6 +2608,7 @@ dump_api_table_file_command_fn (vlib_main_t * vm,
item->crc = extract_crc (name_and_crc);
item->which = 1; /* current_image */
}
+ vec_free (tblv);
}
/* Sort the table. */
diff --git a/src/vlibmemory/socket_client.c b/src/vlibmemory/socket_client.c
new file mode 100644
index 00000000000..8519e7f5f7c
--- /dev/null
+++ b/src/vlibmemory/socket_client.c
@@ -0,0 +1,240 @@
+/*
+ *------------------------------------------------------------------
+ * socket_client.c - API message handling over sockets, client code.
+ *
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibmemory/api.h>
+
+#include <vlibmemory/vl_memory_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) clib_warning (__VA_ARGS__)
+#define vl_printfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_printfun
+
+socket_client_main_t socket_client_main;
+
+/* Debug aid */
+u32 vl (void *p) __attribute__ ((weak));
+u32
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+void
+vl_socket_client_read_reply (socket_client_main_t * scm)
+{
+ int n, current_rx_index;
+ msgbuf_t *mbp;
+
+ if (scm->socket_fd == 0 || scm->socket_enable == 0)
+ return;
+
+ mbp = 0;
+
+ while (1)
+ {
+ current_rx_index = vec_len (scm->socket_rx_buffer);
+ while (vec_len (scm->socket_rx_buffer) <
+ sizeof (*mbp) + 2 /* msg id */ )
+ {
+ vec_validate (scm->socket_rx_buffer, current_rx_index
+ + scm->socket_buffer_size - 1);
+ _vec_len (scm->socket_rx_buffer) = current_rx_index;
+ n = read (scm->socket_fd, scm->socket_rx_buffer + current_rx_index,
+ scm->socket_buffer_size);
+ if (n < 0)
+ {
+ clib_unix_warning ("socket_read");
+ return;
+ }
+ _vec_len (scm->socket_rx_buffer) += n;
+ }
+
+#if CLIB_DEBUG > 1
+ if (n > 0)
+ clib_warning ("read %d bytes", n);
+#endif
+
+ if (mbp == 0)
+ mbp = (msgbuf_t *) (scm->socket_rx_buffer);
+
+ if (vec_len (scm->socket_rx_buffer) >= ntohl (mbp->data_len)
+ + sizeof (*mbp))
+ {
+ vl_msg_api_socket_handler ((void *) (mbp->data));
+
+ if (vec_len (scm->socket_rx_buffer) == ntohl (mbp->data_len)
+ + sizeof (*mbp))
+ _vec_len (scm->socket_rx_buffer) = 0;
+ else
+ vec_delete (scm->socket_rx_buffer, ntohl (mbp->data_len)
+ + sizeof (*mbp), 0);
+ mbp = 0;
+
+ /* Quit if we're out of data, and not expecting a ping reply */
+ if (vec_len (scm->socket_rx_buffer) == 0
+ && scm->control_pings_outstanding == 0)
+ break;
+ }
+ }
+}
+
+int
+vl_socket_client_connect (socket_client_main_t * scm, char *socket_path,
+ char *client_name, u32 socket_buffer_size)
+{
+ char buffer[256];
+ char *rdptr;
+ int n, total_bytes;
+ vl_api_sockclnt_create_reply_t *rp;
+ vl_api_sockclnt_create_t *mp;
+ clib_socket_t *sock = &scm->client_socket;
+ msgbuf_t *mbp;
+ clib_error_t *error;
+
+ /* Already connected? */
+ if (scm->socket_fd)
+ return (-2);
+
+ /* bogus call? */
+ if (socket_path == 0 || client_name == 0)
+ return (-3);
+
+ sock->config = socket_path;
+ sock->flags = CLIB_SOCKET_F_IS_CLIENT | CLIB_SOCKET_F_SEQPACKET;
+
+ error = clib_socket_init (sock);
+
+ if (error)
+ {
+ clib_error_report (error);
+ return (-1);
+ }
+
+ scm->socket_fd = sock->fd;
+
+ mbp = (msgbuf_t *) buffer;
+ mbp->q = 0;
+ mbp->data_len = ntohl (sizeof (*mp));
+ mbp->gc_mark_timestamp = 0;
+
+ mp = (vl_api_sockclnt_create_t *) mbp->data;
+ mp->_vl_msg_id = ntohs (VL_API_SOCKCLNT_CREATE);
+ strncpy ((char *) mp->name, client_name, sizeof (mp->name) - 1);
+ mp->name[sizeof (mp->name) - 1] = 0;
+ mp->context = 0xfeedface;
+
+ n = write (scm->socket_fd, mbp, sizeof (*mbp) + ntohl (mbp->data_len));
+ if (n < 0)
+ {
+ clib_unix_warning ("socket write (msg)");
+ return (-1);
+ }
+
+ memset (buffer, 0, sizeof (buffer));
+
+ total_bytes = 0;
+ rdptr = buffer;
+ do
+ {
+ n = read (scm->socket_fd, rdptr, sizeof (buffer) - (rdptr - buffer));
+ if (n < 0)
+ {
+ clib_unix_warning ("socket read");
+ }
+ total_bytes += n;
+ rdptr += n;
+ }
+ while (total_bytes < sizeof (vl_api_sockclnt_create_reply_t)
+ + sizeof (msgbuf_t));
+
+ rp = (vl_api_sockclnt_create_reply_t *) (buffer + sizeof (msgbuf_t));
+ if (ntohs (rp->_vl_msg_id) != VL_API_SOCKCLNT_CREATE_REPLY)
+ {
+ clib_warning ("connect reply got msg id %d\n", ntohs (rp->_vl_msg_id));
+ return (-1);
+ }
+
+ /* allocate tx, rx buffers */
+ scm->socket_buffer_size = socket_buffer_size ? socket_buffer_size :
+ SOCKET_CLIENT_DEFAULT_BUFFER_SIZE;
+ vec_validate (scm->socket_tx_buffer, scm->socket_buffer_size - 1);
+ vec_validate (scm->socket_rx_buffer, scm->socket_buffer_size - 1);
+ _vec_len (scm->socket_rx_buffer) = 0;
+ scm->socket_enable = 1;
+
+ return (0);
+}
+
+void
+vl_socket_client_disconnect (socket_client_main_t * scm)
+{
+ if (scm->socket_fd && (close (scm->socket_fd) < 0))
+ clib_unix_warning ("close");
+ scm->socket_fd = 0;
+}
+
+void
+vl_socket_client_enable_disable (socket_client_main_t * scm, int enable)
+{
+ scm->socket_enable = enable;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/socksvr_vlib.c b/src/vlibmemory/socksvr_vlib.c
new file mode 100644
index 00000000000..1a263e7bf37
--- /dev/null
+++ b/src/vlibmemory/socksvr_vlib.c
@@ -0,0 +1,719 @@
+/*
+ *------------------------------------------------------------------
+ * socksvr_vlib.c
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/ioctl.h>
+#include <vppinfra/byte_order.h>
+#include <svm/memfd.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include <vlibmemory/api.h>
+
+#include <vlibmemory/vl_memory_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_printfun
+
+/* instantiate all the endian swap functions we know about */
+#define vl_endianfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_endianfun
+
+void
+dump_socket_clients (vlib_main_t * vm, api_main_t * am)
+{
+ vl_api_registration_t *reg;
+ socket_main_t *sm = &socket_main;
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f;
+
+ /*
+ * Must have at least one active client, not counting the
+ * REGISTRATION_TYPE_SOCKET_LISTEN bind/accept socket
+ */
+ if (pool_elts (sm->registration_pool) < 2)
+ return;
+
+ vlib_cli_output (vm, "Socket clients");
+ vlib_cli_output (vm, "%16s %8s", "Name", "Fildesc");
+ /* *INDENT-OFF* */
+ pool_foreach (reg, sm->registration_pool,
+ ({
+ if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) {
+ f = pool_elt_at_index (fm->file_pool, reg->clib_file_index);
+ vlib_cli_output (vm, "%16s %8d",
+ reg->name, f->file_descriptor);
+ }
+ }));
+/* *INDENT-ON* */
+}
+
+void
+vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
+{
+ u16 msg_id = ntohs (*(u16 *) elem);
+ api_main_t *am = &api_main;
+ msgbuf_t *mb = (msgbuf_t *) (elem - offsetof (msgbuf_t, data));
+#if CLIB_DEBUG > 1
+ u32 output_length;
+#endif
+ clib_file_t *cf = rp->clib_file_index + file_main.file_pool;
+
+ ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
+
+ if (msg_id >= vec_len (am->api_trace_cfg))
+ {
+ clib_warning ("id out of range: %d", msg_id);
+ vl_msg_api_free ((void *) elem);
+ return;
+ }
+
+ /* Add the msgbuf_t to the output vector */
+ vl_socket_add_pending_output_no_flush (cf,
+ rp->vl_api_registration_pool_index +
+ socket_main.registration_pool,
+ (u8 *) mb, sizeof (*mb));
+ /* Send the message */
+ vl_socket_add_pending_output (cf,
+ rp->vl_api_registration_pool_index
+ + socket_main.registration_pool,
+ elem, ntohl (mb->data_len));
+
+#if CLIB_DEBUG > 1
+ output_length = sizeof (*mb) + ntohl (mb->data_len);
+ clib_warning ("wrote %u bytes to fd %d", output_length,
+ cf->file_descriptor);
+#endif
+
+ vl_msg_api_free ((void *) elem);
+}
+
+void
+vl_free_socket_registration_index (u32 pool_index)
+{
+ int i;
+ vl_api_registration_t *rp;
+ if (pool_is_free_index (socket_main.registration_pool, pool_index))
+ {
+ clib_warning ("main pool index %d already free", pool_index);
+ return;
+ }
+ rp = pool_elt_at_index (socket_main.registration_pool, pool_index);
+
+ ASSERT (rp->registration_type != REGISTRATION_TYPE_FREE);
+ for (i = 0; i < vec_len (rp->additional_fds_to_close); i++)
+ if (close (rp->additional_fds_to_close[i]) < 0)
+ clib_unix_warning ("close");
+ vec_free (rp->additional_fds_to_close);
+ vec_free (rp->name);
+ vec_free (rp->unprocessed_input);
+ vec_free (rp->output_vector);
+ rp->registration_type = REGISTRATION_TYPE_FREE;
+ pool_put (socket_main.registration_pool, rp);
+}
+
+void
+vl_api_socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp,
+ i8 * input_v)
+{
+ msgbuf_t *mbp = (msgbuf_t *) input_v;
+
+ u8 *the_msg = (u8 *) (mbp->data);
+ socket_main.current_uf = uf;
+ socket_main.current_rp = rp;
+ vl_msg_api_socket_handler (the_msg);
+ socket_main.current_uf = 0;
+ socket_main.current_rp = 0;
+}
+
+clib_error_t *
+vl_socket_read_ready (clib_file_t * uf)
+{
+ clib_file_main_t *fm = &file_main;
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_registration_t *rp;
+ int n;
+ i8 *msg_buffer = 0;
+ u8 *data_for_process;
+ u32 msg_len;
+ u32 save_input_buffer_length = vec_len (socket_main.input_buffer);
+ vl_socket_args_for_process_t *a;
+ msgbuf_t *mbp;
+ int mbp_set = 0;
+
+ rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
+
+ n = read (uf->file_descriptor, socket_main.input_buffer,
+ vec_len (socket_main.input_buffer));
+
+ if (n <= 0 && errno != EAGAIN)
+ {
+ clib_file_del (fm, uf);
+
+ if (!pool_is_free (socket_main.registration_pool, rp))
+ {
+ u32 index = rp - socket_main.registration_pool;
+ vl_free_socket_registration_index (index);
+ }
+ else
+ {
+ clib_warning ("client index %d already free?",
+ rp->vl_api_registration_pool_index);
+ }
+ return 0;
+ }
+
+ _vec_len (socket_main.input_buffer) = n;
+
+ /*
+ * Look for bugs here. This code is tricky because
+ * data read from a stream socket does not honor message
+ * boundaries. In the case of a long message (>4K bytes)
+ * we have to do (at least) 2 reads, etc.
+ */
+ do
+ {
+ if (vec_len (rp->unprocessed_input))
+ {
+ vec_append (rp->unprocessed_input, socket_main.input_buffer);
+ msg_buffer = rp->unprocessed_input;
+ }
+ else
+ {
+ msg_buffer = socket_main.input_buffer;
+ mbp_set = 0;
+ }
+
+ if (mbp_set == 0)
+ {
+ /* Any chance that we have a complete message? */
+ if (vec_len (msg_buffer) <= sizeof (msgbuf_t))
+ goto save_and_split;
+
+ mbp = (msgbuf_t *) msg_buffer;
+ msg_len = ntohl (mbp->data_len);
+ mbp_set = 1;
+ }
+
+ /* We don't have the entire message yet. */
+ if (mbp_set == 0
+ || (msg_len + sizeof (msgbuf_t)) > vec_len (msg_buffer))
+ {
+ save_and_split:
+ /* if we were using the input buffer save the fragment */
+ if (msg_buffer == socket_main.input_buffer)
+ {
+ ASSERT (vec_len (rp->unprocessed_input) == 0);
+ vec_validate (rp->unprocessed_input, vec_len (msg_buffer) - 1);
+ clib_memcpy (rp->unprocessed_input, msg_buffer,
+ vec_len (msg_buffer));
+ _vec_len (rp->unprocessed_input) = vec_len (msg_buffer);
+ }
+ _vec_len (socket_main.input_buffer) = save_input_buffer_length;
+ return 0;
+ }
+
+ data_for_process = (u8 *) vec_dup (msg_buffer);
+ _vec_len (data_for_process) = (msg_len + sizeof (msgbuf_t));
+ pool_get (socket_main.process_args, a);
+ a->clib_file = uf;
+ a->regp = rp;
+ a->data = data_for_process;
+
+ vlib_process_signal_event (vm, memclnt_node.index,
+ SOCKET_READ_EVENT,
+ a - socket_main.process_args);
+ if (n > (msg_len + sizeof (*mbp)))
+ vec_delete (msg_buffer, msg_len + sizeof (*mbp), 0);
+ else
+ _vec_len (msg_buffer) = 0;
+ n -= msg_len + sizeof (msgbuf_t);
+ msg_len = 0;
+ mbp_set = 0;
+ }
+ while (n > 0);
+
+ _vec_len (socket_main.input_buffer) = save_input_buffer_length;
+
+ return 0;
+}
+
+void
+vl_socket_add_pending_output (clib_file_t * uf,
+ vl_api_registration_t * rp,
+ u8 * buffer, uword buffer_bytes)
+{
+ clib_file_main_t *fm = &file_main;
+
+ vec_add (rp->output_vector, buffer, buffer_bytes);
+ if (vec_len (rp->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+void
+vl_socket_add_pending_output_no_flush (clib_file_t * uf,
+ vl_api_registration_t * rp,
+ u8 * buffer, uword buffer_bytes)
+{
+ vec_add (rp->output_vector, buffer, buffer_bytes);
+}
+
+static void
+socket_del_pending_output (clib_file_t * uf,
+ vl_api_registration_t * rp, uword n_bytes)
+{
+ clib_file_main_t *fm = &file_main;
+
+ vec_delete (rp->output_vector, n_bytes, 0);
+ if (vec_len (rp->output_vector) <= 0)
+ {
+ int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+clib_error_t *
+vl_socket_write_ready (clib_file_t * uf)
+{
+ clib_file_main_t *fm = &file_main;
+ vl_api_registration_t *rp;
+ int n;
+
+ rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
+
+ /* Flush output vector. */
+ n = write (uf->file_descriptor,
+ rp->output_vector, vec_len (rp->output_vector));
+
+ if (n < 0)
+ {
+#if DEBUG > 2
+ clib_warning ("write error, close the file...\n");
+#endif
+ clib_file_del (fm, uf);
+
+ vl_free_socket_registration_index (rp - socket_main.registration_pool);
+ return 0;
+ }
+
+ else if (n > 0)
+ socket_del_pending_output (uf, rp, n);
+
+ return 0;
+}
+
+clib_error_t *
+vl_socket_error_ready (clib_file_t * uf)
+{
+ vl_api_registration_t *rp;
+ clib_file_main_t *fm = &file_main;
+
+ rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
+ clib_file_del (fm, uf);
+ vl_free_socket_registration_index (rp - socket_main.registration_pool);
+
+ return 0;
+}
+
+void
+socksvr_file_add (clib_file_main_t * fm, int fd)
+{
+ vl_api_registration_t *rp;
+ clib_file_t template = { 0 };
+
+ pool_get (socket_main.registration_pool, rp);
+ memset (rp, 0, sizeof (*rp));
+
+ template.read_function = vl_socket_read_ready;
+ template.write_function = vl_socket_write_ready;
+ template.error_function = vl_socket_error_ready;
+ template.file_descriptor = fd;
+ template.private_data = rp - socket_main.registration_pool;
+
+ rp->registration_type = REGISTRATION_TYPE_SOCKET_SERVER;
+ rp->vl_api_registration_pool_index = rp - socket_main.registration_pool;
+ rp->clib_file_index = clib_file_add (fm, &template);
+}
+
+static clib_error_t *
+socksvr_accept_ready (clib_file_t * uf)
+{
+ clib_file_main_t *fm = &file_main;
+ socket_main_t *sm = &socket_main;
+ clib_socket_t *sock = &sm->socksvr_listen_socket;
+ clib_socket_t client;
+ clib_error_t *error;
+
+ error = clib_socket_accept (sock, &client);
+
+ if (error)
+ return error;
+
+ socksvr_file_add (fm, client.fd);
+ return 0;
+}
+
+static clib_error_t *
+socksvr_bogus_write (clib_file_t * uf)
+{
+ clib_warning ("why am I here?");
+ return 0;
+}
+
+/*
+ * vl_api_sockclnt_create_t_handler
+ */
+void
+vl_api_sockclnt_create_t_handler (vl_api_sockclnt_create_t * mp)
+{
+ vl_api_registration_t *regp;
+ vl_api_sockclnt_create_reply_t *rp;
+ int rv = 1;
+
+ regp = socket_main.current_rp;
+
+ ASSERT (regp->registration_type == REGISTRATION_TYPE_SOCKET_SERVER);
+
+ regp->name = format (0, "%s%c", mp->name, 0);
+
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = htons (VL_API_SOCKCLNT_CREATE_REPLY);
+ rp->handle = (uword) regp;
+ rp->index = (uword) regp->vl_api_registration_pool_index;
+ rp->context = mp->context;
+ rp->response = htonl (rv);
+
+ vl_msg_api_send (regp, (u8 *) rp);
+}
+
+/*
+ * vl_api_sockclnt_delete_t_handler
+ */
+void
+vl_api_sockclnt_delete_t_handler (vl_api_sockclnt_delete_t * mp)
+{
+ vl_api_registration_t *regp;
+ vl_api_sockclnt_delete_reply_t *rp;
+
+ if (!pool_is_free_index (socket_main.registration_pool, mp->index))
+ {
+ regp = pool_elt_at_index (socket_main.registration_pool, mp->index);
+
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = htons (VL_API_SOCKCLNT_DELETE_REPLY);
+ rp->handle = mp->handle;
+ rp->response = htonl (1);
+
+ vl_msg_api_send (regp, (u8 *) rp);
+
+ clib_file_del (&file_main, file_main.file_pool + regp->clib_file_index);
+
+ vl_free_socket_registration_index (mp->index);
+ }
+ else
+ {
+ clib_warning ("unknown client ID %d", mp->index);
+ }
+}
+
+static clib_error_t *
+send_fd_msg (int socket_fd, int fd_to_share)
+{
+ struct msghdr mh = { 0 };
+ struct iovec iov[1];
+ char ctl[CMSG_SPACE (sizeof (int))];
+ char *msg = "memfd";
+ int rv;
+
+ iov[0].iov_base = msg;
+ iov[0].iov_len = strlen (msg);
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+
+ struct cmsghdr *cmsg;
+ memset (&ctl, 0, sizeof (ctl));
+ mh.msg_control = ctl;
+ mh.msg_controllen = sizeof (ctl);
+ cmsg = CMSG_FIRSTHDR (&mh);
+ cmsg->cmsg_len = CMSG_LEN (sizeof (int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy (CMSG_DATA (cmsg), &fd_to_share, sizeof (int));
+
+ rv = sendmsg (socket_fd, &mh, 0);
+ if (rv < 0)
+ return clib_error_return_unix (0, "sendmsg");
+ return 0;
+}
+
+/*
+ * Create a memory-fd segment.
+ */
+void
+vl_api_memfd_segment_create_t_handler (vl_api_memfd_segment_create_t * mp)
+{
+ vl_api_memfd_segment_create_reply_t *rmp;
+ api_main_t *am = &api_main;
+ clib_file_t *cf;
+ memfd_private_t _memfd_private, *memfd = &_memfd_private;
+ vl_api_registration_t *regp;
+ vlib_main_t *vm = vlib_get_main ();
+ svm_map_region_args_t _args, *a = &_args;
+ svm_region_t *vlib_rp;
+ int rv;
+
+ regp = vl_api_client_index_to_registration (mp->client_index);
+
+ if (regp == 0)
+ {
+ clib_warning ("API client disconnected");
+ return;
+ }
+
+ if (regp->registration_type != REGISTRATION_TYPE_SOCKET_SERVER)
+ {
+ rv = -31; /* VNET_API_ERROR_INVALID_REGISTRATION */
+ goto reply;
+ }
+
+ memset (memfd, 0, sizeof (*memfd));
+
+ /* Embed in api_main_t */
+ memfd->memfd_size = mp->requested_size;
+ memfd->requested_va = 0ULL;
+ memfd->i_am_master = 1;
+ memfd->name = format (0, "%s%c", regp->name, 0);
+
+ /* Set up a memfd segment of the requested size */
+ rv = memfd_master_init (memfd, mp->client_index);
+
+ if (rv)
+ goto reply;
+
+ /* Remember to close this fd when the socket connection goes away */
+ vec_add1 (regp->additional_fds_to_close, memfd->fd);
+
+ /* And create a plausible svm_region in it */
+ memset (a, 0, sizeof (*a));
+ a->baseva = memfd->sh->memfd_va + MMAP_PAGESIZE;
+ a->size = memfd->memfd_size - MMAP_PAGESIZE;
+ /* $$$$ might want a different config parameter */
+ a->pvt_heap_size = am->api_pvt_heap_size;
+ a->flags = SVM_FLAGS_MHEAP;
+ svm_region_init_mapped_region (a, (svm_region_t *) a->baseva);
+
+ vlib_rp = (svm_region_t *) a->baseva;
+
+ /*
+ * Part deux, initialize the svm_region_t shared-memory header
+ * api allocation rings, and so on.
+ */
+ vl_init_shmem (vlib_rp, 1 /* is_vlib (dont-care) */ , 1 /* is_private */ );
+
+ vec_add1 (am->vlib_private_rps, vlib_rp);
+
+ memfd->sh->ready = 1;
+
+ /* Recompute the set of input queues to poll in memclnt_process */
+ vec_reset_length (vl_api_queue_cursizes);
+
+reply:
+
+ /* send the reply message */
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = htons (VL_API_MEMFD_SEGMENT_CREATE_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = htonl (rv);
+
+ vl_msg_api_send (regp, (u8 *) rmp);
+
+ if (rv != 0)
+ return;
+
+ /*
+ * We need the reply message to make it out the back door
+ * before we send the magic fd message.
+ */
+ vlib_process_suspend (vm, 11e-6);
+
+ cf = file_main.file_pool + regp->clib_file_index;
+
+ /* send the magic "here's your sign (aka fd)" socket message */
+ send_fd_msg (cf->file_descriptor, memfd->fd);
+}
+
+#define foreach_vlib_api_msg \
+_(SOCKCLNT_CREATE, sockclnt_create) \
+_(SOCKCLNT_DELETE, sockclnt_delete) \
+_(MEMFD_SEGMENT_CREATE, memfd_segment_create)
+
+clib_error_t *
+socksvr_api_init (vlib_main_t * vm)
+{
+ clib_file_main_t *fm = &file_main;
+ clib_file_t template = { 0 };
+ vl_api_registration_t *rp;
+ vl_msg_api_msg_config_t cfg;
+ vl_msg_api_msg_config_t *c = &cfg;
+ socket_main_t *sm = &socket_main;
+ clib_socket_t *sock = &sm->socksvr_listen_socket;
+ clib_error_t *error;
+
+ /* If not explicitly configured, do not bind/enable, etc. */
+ if (sm->socket_name == 0)
+ return 0;
+
+#define _(N,n) do { \
+ c->id = VL_API_##N; \
+ c->name = #n; \
+ c->handler = vl_api_##n##_t_handler; \
+ c->cleanup = vl_noop_handler; \
+ c->endian = vl_api_##n##_t_endian; \
+ c->print = vl_api_##n##_t_print; \
+ c->size = sizeof(vl_api_##n##_t); \
+ c->traced = 1; /* trace, so these msgs print */ \
+ c->replay = 0; /* don't replay client create/delete msgs */ \
+ c->message_bounce = 0; /* don't bounce this message */ \
+ vl_msg_api_config(c);} while (0);
+
+ foreach_vlib_api_msg;
+#undef _
+
+ vec_resize (sm->input_buffer, 4096);
+
+ sock->config = (char *) sm->socket_name;
+
+ /* mkdir of file socket, only under /run */
+ if (strncmp (sock->config, "/run", 4) == 0)
+ {
+ u8 *tmp = format (0, "%s", sock->config);
+ int i = vec_len (tmp);
+ while (i && tmp[--i] != '/')
+ ;
+
+ tmp[i] = 0;
+
+ if (i)
+ vlib_unix_recursive_mkdir ((char *) tmp);
+ vec_free (tmp);
+ }
+
+ sock->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_SEQPACKET |
+ CLIB_SOCKET_F_ALLOW_GROUP_WRITE;
+ error = clib_socket_init (sock);
+ if (error)
+ return error;
+
+ pool_get (sm->registration_pool, rp);
+ memset (rp, 0, sizeof (*rp));
+
+ rp->registration_type = REGISTRATION_TYPE_SOCKET_LISTEN;
+
+ template.read_function = socksvr_accept_ready;
+ template.write_function = socksvr_bogus_write;
+ template.file_descriptor = sock->fd;
+ template.private_data = rp - sm->registration_pool;
+
+ rp->clib_file_index = clib_file_add (fm, &template);
+ return 0;
+}
+
+static clib_error_t *
+socket_exit (vlib_main_t * vm)
+{
+ clib_file_main_t *fm = &file_main;
+ socket_main_t *sm = &socket_main;
+ vl_api_registration_t *rp;
+
+ /* Defensive driving in case something wipes out early */
+ if (sm->registration_pool)
+ {
+ u32 index;
+ /* *INDENT-OFF* */
+ pool_foreach (rp, sm->registration_pool, ({
+ clib_file_del (fm, fm->file_pool + rp->clib_file_index);
+ index = rp->vl_api_registration_pool_index;
+ vl_free_socket_registration_index (index);
+ }));
+/* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (socket_exit);
+
+static clib_error_t *
+socksvr_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ socket_main_t *sm = &socket_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "socket-name %s", &sm->socket_name))
+ ;
+ else if (unformat (input, "default"))
+ {
+ sm->socket_name = format (0, "%s%c", API_SOCKET_FILE, 0);
+ }
+ else
+ {
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (socksvr_config, "socksvr");
+
+clib_error_t *
+vlibsocket_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vlibsocket_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */