From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Mon, 19 Dec 2016 23:05:39 +0100
Subject: Reorganize source tree to use single autotools instance

Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/vlibmemory/api.h                      |  163 ++++
 src/vlibmemory/memclnt.api                |   91 ++
 src/vlibmemory/memory_client.c            |  283 ++++++
 src/vlibmemory/memory_shared.c            |  852 ++++++++++++++++++
 src/vlibmemory/memory_vlib.c              | 1346 +++++++++++++++++++++++++++++
 src/vlibmemory/unix_shared_memory_queue.c |  324 +++++++
 src/vlibmemory/unix_shared_memory_queue.h |   69 ++
 src/vlibmemory/vl_memory_api_h.h          |   32 +
 src/vlibmemory/vl_memory_msg_enum.h       |   42 +
 9 files changed, 3202 insertions(+)
 create mode 100644 src/vlibmemory/api.h
 create mode 100644 src/vlibmemory/memclnt.api
 create mode 100644 src/vlibmemory/memory_client.c
 create mode 100644 src/vlibmemory/memory_shared.c
 create mode 100644 src/vlibmemory/memory_vlib.c
 create mode 100644 src/vlibmemory/unix_shared_memory_queue.c
 create mode 100644 src/vlibmemory/unix_shared_memory_queue.h
 create mode 100644 src/vlibmemory/vl_memory_api_h.h
 create mode 100644 src/vlibmemory/vl_memory_msg_enum.h

(limited to 'src/vlibmemory')

diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h
new file mode 100644
index 00000000..54a0a001
--- /dev/null
+++ b/src/vlibmemory/api.h
@@ -0,0 +1,163 @@
+/*
+ *------------------------------------------------------------------
+ * api.h
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_vlibmemory_api_h
+#define included_vlibmemory_api_h
+
+#include <vppinfra/error.h>
+#include <svm/svm.h>
+#include <vlib/vlib.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+
+/* Allocated in shared memory */
+
+/*
+ * Ring-allocation scheme for client API messages
+ *
+ * Only one proc/thread has control of a given message buffer.
+ * To free a buffer allocated from one of these rings, we clear
+ * a field in the buffer (header), and leave.
+ *
+ * No locks, no hits, no errors...
+ */
+typedef struct ring_alloc_
+{
+  unix_shared_memory_queue_t *rp;
+  u16 size;
+  u16 nitems;
+  u32 hits;
+  u32 misses;
+} ring_alloc_t;
+
+/*
+ * Initializers for the (shared-memory) rings
+ * _(size, n). Note: each msg has an 8 byte header.
+ * Might want to change that to an index sometime.
+ */
+#define foreach_vl_aring_size                   \
+_(64+8, 1024)                                   \
+_(256+8, 128)                                   \
+_(1024+8, 64)
+
+#define foreach_clnt_aring_size                 \
+_(1024+8, 1024)                                 \
+_(2048+8, 128)                                  \
+_(4096+8, 8)
+
+typedef struct vl_shmem_hdr_
+{
+  int version;
+
+  /* getpid () for the VLIB client process */
+  volatile int vl_pid;
+
+  /* Client sends VLIB msgs here. */
+  unix_shared_memory_queue_t *vl_input_queue;
+
+  /* Vector of rings; one for each size. */
+
+  /* VLIB allocates buffers to send msgs to clients here. */
+  ring_alloc_t *vl_rings;
+
+  /* Clients allocate buffer to send msgs to VLIB here. */
+  ring_alloc_t *client_rings;
+
+  /* Number of detected application restarts */
+  u32 application_restarts;
+
+  /* Number of messages reclaimed during application restart */
+  u32 restart_reclaims;
+
+} vl_shmem_hdr_t;
+
+#define VL_SHM_VERSION 2
+
+#define VL_API_EPOCH_MASK 0xFF
+#define VL_API_EPOCH_SHIFT 8
+
+static inline u32
+vl_msg_api_handle_get_epoch (u32 index)
+{
+  return (index & VL_API_EPOCH_MASK);
+}
+
+static inline u32
+vl_msg_api_handle_get_index (u32 index)
+{
+  return (index >> VL_API_EPOCH_SHIFT);
+}
+
+static inline u32
+vl_msg_api_handle_from_index_and_epoch (u32 index, u32 epoch)
+{
+  u32 handle;
+  ASSERT (index < 0x00FFFFFF);
+
+  handle = (index << VL_API_EPOCH_SHIFT) | (epoch & VL_API_EPOCH_MASK);
+  return handle;
+}
+
+void *vl_msg_api_alloc (int nbytes);
+void *vl_msg_api_alloc_or_null (int nbytes);
+void *vl_msg_api_alloc_as_if_client (int nbytes);
+void *vl_msg_api_alloc_as_if_client_or_null (int nbytes);
+void vl_msg_api_free (void *a);
+int vl_map_shmem (char *region_name, int is_vlib);
+void vl_register_mapped_shmem_region (svm_region_t * rp);
+void vl_unmap_shmem (void);
+void vl_msg_api_send_shmem (unix_shared_memory_queue_t * q, u8 * elem);
+void vl_msg_api_send_shmem_nolock (unix_shared_memory_queue_t * q, u8 * elem);
+void vl_msg_api_send (vl_api_registration_t * rp, u8 * elem);
+int vl_client_connect (char *name, int ctx_quota, int input_queue_size);
+void vl_client_disconnect (void);
+unix_shared_memory_queue_t *vl_api_client_index_to_input_queue (u32 index);
+vl_api_registration_t *vl_api_client_index_to_registration (u32 index);
+int vl_client_api_map (char *region_name);
+void vl_client_api_unmap (void);
+void vl_set_memory_region_name (char *name);
+void vl_set_memory_root_path (char *root_path);
+void vl_set_memory_uid (int uid);
+void vl_set_memory_gid (int gid);
+void vl_set_global_memory_baseva (u64 baseva);
+void vl_set_global_memory_size (u64 size);
+void vl_set_api_memory_size (u64 size);
+void vl_set_global_pvt_heap_size (u64 size);
+void vl_set_api_pvt_heap_size (u64 size);
+void vl_enable_disable_memory_api (vlib_main_t * vm, int yesno);
+void vl_client_disconnect_from_vlib (void);
+int vl_client_connect_to_vlib (char *svm_name, char *client_name,
+			       int rx_queue_size);
+int vl_client_connect_to_vlib_no_rx_pthread (char *svm_name,
+					     char *client_name,
+					     int rx_queue_size);
+u16 vl_client_get_first_plugin_msg_id (char *plugin_name);
+
+void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+#endif /* included_vlibmemory_api_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api
new file mode 100644
index 00000000..2f654caf
--- /dev/null
+++ b/src/vlibmemory/memclnt.api
@@ -0,0 +1,91 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Create a client registration 
+ */
+manual_print 
+define memclnt_create {
+    i32 ctx_quota;              /* requested punt context quota */
+    u32 context;                /* opaque value to be returned in the reply */
+    u64 input_queue;            /* client's queue */
+    u8 name[64];                /* for show, find by name, whatever */
+    u32 api_versions[8];        /* client-server pairs use as desired */
+};
+
+define memclnt_create_reply {
+    i32 response;               /* Non-negative = success */
+    u64 handle;                 /* handle by which vlib knows this client */
+    u32 index;                  /* index, used e.g. by API trace replay */
+    u32 context;                /* opaque value from the create request */
+    u64 message_table;          /* serialized message table in shmem */
+};
+
+/*
+ * Delete a client registration 
+ */
+manual_print
+define memclnt_delete {
+    u32 index;                  /* index, used e.g. by API trace replay */
+    u64 handle;               /* handle by which vlib knows this client */
+};
+
+define memclnt_delete_reply {
+    i32 response;               /* Non-negative = success */
+    u64 handle;               /* in case the client wonders */
+};
+
+/* 
+ * Client RX thread exit
+ */
+
+define rx_thread_exit {
+    u8 dummy;
+};
+
+/*
+ * RPC
+ */
+define rpc_call {
+    u32 client_index;
+    u32 context;
+    u64 function;
+    u8 multicast;
+    u8 need_barrier_sync;
+    u8 send_reply;
+    u8 data[0];
+};
+
+define rpc_reply {
+    i32 retval;
+    u32 context;
+};
+
+/*
+ * Lookup message-ID base by name
+ */
+define get_first_msg_id {
+    u32 client_index;
+    u32 context;
+    u8 name[64];
+};
+
+define get_first_msg_id_reply {
+    u32 client_index;
+    u32 context;
+    i32 retval;
+    u16 first_msg_id;
+};
diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c
new file mode 100644
index 00000000..234a0a5a
--- /dev/null
+++ b/src/vlibmemory/memory_client.c
@@ -0,0 +1,283 @@
+/*
+ *------------------------------------------------------------------
+ * memory_client.c - API message handling, client code.
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <signal.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <string.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/time.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibmemory/api.h>
+
+#include <vlibmemory/vl_memory_msg_enum.h>
+
+#define vl_typedefs		/* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun		/* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) clib_warning (__VA_ARGS__)
+#define vl_printfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_printfun
+
+typedef struct
+{
+  u8 rx_thread_jmpbuf_valid;
+  u8 connected_to_vlib;
+  jmp_buf rx_thread_jmpbuf;
+  pthread_t rx_thread_handle;
+  /* Plugin message base lookup scheme */
+  volatile u8 first_msg_id_reply_ready;
+  u16 first_msg_id_reply;
+} memory_client_main_t;
+
+memory_client_main_t memory_client_main;
+
+static void *
+rx_thread_fn (void *arg)
+{
+  unix_shared_memory_queue_t *q;
+  memory_client_main_t *mm = &memory_client_main;
+  api_main_t *am = &api_main;
+
+  q = am->vl_input_queue;
+
+  /* So we can make the rx thread terminate cleanly */
+  if (setjmp (mm->rx_thread_jmpbuf) == 0)
+    {
+      mm->rx_thread_jmpbuf_valid = 1;
+      while (1)
+	{
+	  vl_msg_api_queue_handler (q);
+	}
+    }
+  pthread_exit (0);
+}
+
+static void
+vl_api_rx_thread_exit_t_handler (vl_api_rx_thread_exit_t * mp)
+{
+  memory_client_main_t *mm = &memory_client_main;
+  vl_msg_api_free (mp);
+  longjmp (mm->rx_thread_jmpbuf, 1);
+}
+
+static void
+noop_handler (void *notused)
+{
+}
+
+#define foreach_api_msg						\
+_(RX_THREAD_EXIT, rx_thread_exit)
+
+static int
+connect_to_vlib_internal (char *svm_name, char *client_name,
+			  int rx_queue_size, int want_pthread)
+{
+  int rv = 0;
+  memory_client_main_t *mm = &memory_client_main;
+
+  if ((rv = vl_client_api_map (svm_name)))
+    {
+      clib_warning ("vl_client_api map rv %d", rv);
+      return rv;
+    }
+
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                            vl_api_##n##_t_handler,             \
+                            noop_handler,                       \
+                            vl_api_##n##_t_endian,              \
+                            vl_api_##n##_t_print,               \
+                            sizeof(vl_api_##n##_t), 1);
+  foreach_api_msg;
+#undef _
+
+  if (vl_client_connect (client_name, 0 /* punt quota */ ,
+			 rx_queue_size /* input queue */ ) < 0)
+    {
+      vl_client_api_unmap ();
+      return -1;
+    }
+
+  /* Start the rx queue thread */
+
+  if (want_pthread)
+    {
+      rv = pthread_create (&mm->rx_thread_handle,
+			   NULL /*attr */ , rx_thread_fn, 0);
+      if (rv)
+	clib_warning ("pthread_create returned %d", rv);
+    }
+
+  mm->connected_to_vlib = 1;
+  return 0;
+}
+
+int
+vl_client_connect_to_vlib (char *svm_name, char *client_name,
+			   int rx_queue_size)
+{
+  return connect_to_vlib_internal (svm_name, client_name, rx_queue_size,
+				   1 /* want pthread */ );
+}
+
+int
+vl_client_connect_to_vlib_no_rx_pthread (char *svm_name, char *client_name,
+					 int rx_queue_size)
+{
+  return connect_to_vlib_internal (svm_name, client_name, rx_queue_size,
+				   0 /* want pthread */ );
+}
+
+void
+vl_client_disconnect_from_vlib (void)
+{
+  memory_client_main_t *mm = &memory_client_main;
+  api_main_t *am = &api_main;
+  uword junk;
+
+  if (mm->rx_thread_jmpbuf_valid)
+    {
+      vl_api_rx_thread_exit_t *ep;
+      ep = vl_msg_api_alloc (sizeof (*ep));
+      ep->_vl_msg_id = ntohs (VL_API_RX_THREAD_EXIT);
+      vl_msg_api_send_shmem (am->vl_input_queue, (u8 *) & ep);
+      pthread_join (mm->rx_thread_handle, (void **) &junk);
+    }
+  if (mm->connected_to_vlib)
+    {
+      vl_client_disconnect ();
+      vl_client_api_unmap ();
+    }
+  memset (mm, 0, sizeof (*mm));
+}
+
+static void vl_api_get_first_msg_id_reply_t_handler
+  (vl_api_get_first_msg_id_reply_t * mp)
+{
+  memory_client_main_t *mm = &memory_client_main;
+  i32 retval = ntohl (mp->retval);
+
+  mm->first_msg_id_reply = (retval >= 0) ? ntohs (mp->first_msg_id) : ~0;
+  mm->first_msg_id_reply_ready = 1;
+}
+
+u16
+vl_client_get_first_plugin_msg_id (char *plugin_name)
+{
+  vl_api_get_first_msg_id_t *mp;
+  api_main_t *am = &api_main;
+  memory_client_main_t *mm = &memory_client_main;
+  f64 timeout;
+  void *old_handler;
+  clib_time_t clib_time;
+  u16 rv = ~0;
+
+  if (strlen (plugin_name) + 1 > sizeof (mp->name))
+    return (rv);
+
+  memset (&clib_time, 0, sizeof (clib_time));
+  clib_time_init (&clib_time);
+
+  /* Push this plugin's first_msg_id_reply handler */
+  old_handler = am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY];
+  am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = (void *)
+    vl_api_get_first_msg_id_reply_t_handler;
+
+  /* Ask the data-plane for the message-ID base of the indicated plugin */
+  mm->first_msg_id_reply_ready = 0;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = ntohs (VL_API_GET_FIRST_MSG_ID);
+  mp->client_index = am->my_client_index;
+  strncpy ((char *) mp->name, plugin_name, sizeof (mp->name) - 1);
+
+  vl_msg_api_send_shmem (am->shmem_hdr->vl_input_queue, (u8 *) & mp);
+
+  /* Synchronously wait for the answer */
+  do
+    {
+      timeout = clib_time_now (&clib_time) + 1.0;
+
+      while (clib_time_now (&clib_time) < timeout)
+	{
+	  if (mm->first_msg_id_reply_ready == 1)
+	    {
+	      rv = mm->first_msg_id_reply;
+	      goto result;
+	    }
+	}
+      /* Restore old handler */
+      am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = old_handler;
+
+      return rv;
+    }
+  while (0);
+
+result:
+
+  /* Restore the old handler */
+  am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = old_handler;
+
+  if (rv == (u16) ~ 0)
+    clib_warning ("plugin '%s' not registered", plugin_name);
+
+  return rv;
+}
+
+void
+vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
+{
+  clib_warning ("STUB called...");
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
new file mode 100644
index 00000000..d8d32004
--- /dev/null
+++ b/src/vlibmemory/memory_shared.c
@@ -0,0 +1,852 @@
+/*
+ *------------------------------------------------------------------
+ * memclnt_shared.c - API message handling, common code for both clients
+ * and the vlib process itself.
+ *
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <vppinfra/format.h>
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibmemory/api.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+
+#include <vlibmemory/vl_memory_msg_enum.h>
+
+#define vl_typedefs
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+static inline void *
+vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null)
+{
+  int i;
+  msgbuf_t *rv;
+  ring_alloc_t *ap;
+  unix_shared_memory_queue_t *q;
+  void *oldheap;
+  vl_shmem_hdr_t *shmem_hdr;
+  api_main_t *am = &api_main;
+
+  shmem_hdr = am->shmem_hdr;
+
+  if (shmem_hdr == 0)
+    {
+      clib_warning ("shared memory header NULL");
+      return 0;
+    }
+
+  /* account for the msgbuf_t header */
+  nbytes += sizeof (msgbuf_t);
+
+  if (shmem_hdr->vl_rings == 0)
+    {
+      clib_warning ("vl_rings NULL");
+      ASSERT (0);
+      abort ();
+    }
+
+  if (shmem_hdr->client_rings == 0)
+    {
+      clib_warning ("client_rings NULL");
+      ASSERT (0);
+      abort ();
+    }
+
+  ap = pool ? shmem_hdr->vl_rings : shmem_hdr->client_rings;
+  for (i = 0; i < vec_len (ap); i++)
+    {
+      /* Too big? */
+      if (nbytes > ap[i].size)
+	{
+	  continue;
+	}
+
+      q = ap[i].rp;
+      if (pool == 0)
+	{
+	  pthread_mutex_lock (&q->mutex);
+	}
+      rv = (msgbuf_t *) (&q->data[0] + q->head * q->elsize);
+      /*
+       * Is this item still in use?
+       */
+      if (rv->q)
+	{
+	  /* yes, loser; try next larger pool */
+	  ap[i].misses++;
+	  if (pool == 0)
+	    pthread_mutex_unlock (&q->mutex);
+	  continue;
+	}
+      /* OK, we have a winner */
+      ap[i].hits++;
+      /*
+       * Remember the source queue, although we
+       * don't need to know the queue to free the item.
+       */
+      rv->q = q;
+      q->head++;
+      if (q->head == q->maxsize)
+	q->head = 0;
+
+      if (pool == 0)
+	pthread_mutex_unlock (&q->mutex);
+      goto out;
+    }
+
+  /*
+   * Request too big, or head element of all size-compatible rings
+   * still in use. Fall back to shared-memory malloc.
+   */
+  am->ring_misses++;
+
+  pthread_mutex_lock (&am->vlib_rp->mutex);
+  oldheap = svm_push_data_heap (am->vlib_rp);
+  if (may_return_null)
+    {
+      rv = clib_mem_alloc_or_null (nbytes);
+      if (PREDICT_FALSE (rv == 0))
+	{
+	  svm_pop_heap (oldheap);
+	  pthread_mutex_unlock (&am->vlib_rp->mutex);
+	  return 0;
+	}
+    }
+  else
+    rv = clib_mem_alloc (nbytes);
+
+  rv->q = 0;
+  svm_pop_heap (oldheap);
+  pthread_mutex_unlock (&am->vlib_rp->mutex);
+
+out:
+  rv->data_len = htonl (nbytes - sizeof (msgbuf_t));
+  return (rv->data);
+}
+
+void *
+vl_msg_api_alloc (int nbytes)
+{
+  int pool;
+  api_main_t *am = &api_main;
+  vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+
+  /*
+   * Clients use pool-0, vlib proc uses pool 1
+   */
+  pool = (am->our_pid == shmem_hdr->vl_pid);
+  return vl_msg_api_alloc_internal (nbytes, pool, 0 /* may_return_null */ );
+}
+
+void *
+vl_msg_api_alloc_or_null (int nbytes)
+{
+  int pool;
+  api_main_t *am = &api_main;
+  vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+
+  pool = (am->our_pid == shmem_hdr->vl_pid);
+  return vl_msg_api_alloc_internal (nbytes, pool, 1 /* may_return_null */ );
+}
+
+void *
+vl_msg_api_alloc_as_if_client (int nbytes)
+{
+  return vl_msg_api_alloc_internal (nbytes, 0, 0 /* may_return_null */ );
+}
+
+void *
+vl_msg_api_alloc_as_if_client_or_null (int nbytes)
+{
+  return vl_msg_api_alloc_internal (nbytes, 0, 1 /* may_return_null */ );
+}
+
+void
+vl_msg_api_free (void *a)
+{
+  msgbuf_t *rv;
+  void *oldheap;
+  api_main_t *am = &api_main;
+
+  rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data));
+
+  /*
+   * Here's the beauty of the scheme.  Only one proc/thread has
+   * control of a given message buffer. To free a buffer, we just clear the
+   * queue field, and leave. No locks, no hits, no errors...
+   */
+  if (rv->q)
+    {
+      rv->q = 0;
+      return;
+    }
+
+  pthread_mutex_lock (&am->vlib_rp->mutex);
+  oldheap = svm_push_data_heap (am->vlib_rp);
+  clib_mem_free (rv);
+  svm_pop_heap (oldheap);
+  pthread_mutex_unlock (&am->vlib_rp->mutex);
+}
+
+static void
+vl_msg_api_free_nolock (void *a)
+{
+  msgbuf_t *rv;
+  void *oldheap;
+  api_main_t *am = &api_main;
+
+  rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data));
+  /*
+   * Here's the beauty of the scheme.  Only one proc/thread has
+   * control of a given message buffer. To free a buffer, we just clear the
+   * queue field, and leave. No locks, no hits, no errors...
+   */
+  if (rv->q)
+    {
+      rv->q = 0;
+      return;
+    }
+
+  oldheap = svm_push_data_heap (am->vlib_rp);
+  clib_mem_free (rv);
+  svm_pop_heap (oldheap);
+}
+
+void
+vl_set_memory_root_path (char *name)
+{
+  api_main_t *am = &api_main;
+
+  am->root_path = name;
+}
+
+void
+vl_set_memory_uid (int uid)
+{
+  api_main_t *am = &api_main;
+
+  am->api_uid = uid;
+}
+
+void
+vl_set_memory_gid (int gid)
+{
+  api_main_t *am = &api_main;
+
+  am->api_gid = gid;
+}
+
+void
+vl_set_global_memory_baseva (u64 baseva)
+{
+  api_main_t *am = &api_main;
+
+  am->global_baseva = baseva;
+}
+
+void
+vl_set_global_memory_size (u64 size)
+{
+  api_main_t *am = &api_main;
+
+  am->global_size = size;
+}
+
+void
+vl_set_api_memory_size (u64 size)
+{
+  api_main_t *am = &api_main;
+
+  am->api_size = size;
+}
+
+void
+vl_set_global_pvt_heap_size (u64 size)
+{
+  api_main_t *am = &api_main;
+
+  am->global_pvt_heap_size = size;
+}
+
+void
+vl_set_api_pvt_heap_size (u64 size)
+{
+  api_main_t *am = &api_main;
+
+  am->api_pvt_heap_size = size;
+}
+
+int
+vl_map_shmem (char *region_name, int is_vlib)
+{
+  svm_map_region_args_t _a, *a = &_a;
+  svm_region_t *vlib_rp, *root_rp;
+  void *oldheap;
+  vl_shmem_hdr_t *shmem_hdr = 0;
+  api_main_t *am = &api_main;
+  int i;
+  struct timespec ts, tsrem;
+
+  if (is_vlib == 0)
+    svm_region_init_chroot (am->root_path);
+
+  memset (a, 0, sizeof (*a));
+
+  a->name = region_name;
+  a->size = am->api_size ? am->api_size : (16 << 20);
+  a->flags = SVM_FLAGS_MHEAP;
+  a->uid = am->api_uid;
+  a->gid = am->api_gid;
+  a->pvt_heap_size = am->api_pvt_heap_size;
+
+  vlib_rp = svm_region_find_or_create (a);
+
+  if (vlib_rp == 0)
+    return (-2);
+
+  pthread_mutex_lock (&vlib_rp->mutex);
+  /* Has someone else set up the shared-memory variable table? */
+  if (vlib_rp->user_ctx)
+    {
+      am->shmem_hdr = (void *) vlib_rp->user_ctx;
+      am->our_pid = getpid ();
+      if (is_vlib)
+	{
+	  unix_shared_memory_queue_t *q;
+	  uword old_msg;
+	  /*
+	   * application restart. Reset cached pids, API message
+	   * rings, list of clients; otherwise, various things
+	   * fail. (e.g. queue non-empty notification)
+	   */
+
+	  /* ghosts keep the region from disappearing properly */
+	  svm_client_scan_this_region_nolock (vlib_rp);
+	  am->shmem_hdr->application_restarts++;
+	  q = am->shmem_hdr->vl_input_queue;
+	  am->shmem_hdr->vl_pid = getpid ();
+	  q->consumer_pid = am->shmem_hdr->vl_pid;
+	  /* Drain the input queue, freeing msgs */
+	  for (i = 0; i < 10; i++)
+	    {
+	      if (pthread_mutex_trylock (&q->mutex) == 0)
+		{
+		  pthread_mutex_unlock (&q->mutex);
+		  goto mutex_ok;
+		}
+	      ts.tv_sec = 0;
+	      ts.tv_nsec = 10000 * 1000;	/* 10 ms */
+	      while (nanosleep (&ts, &tsrem) < 0)
+		ts = tsrem;
+	    }
+	  /* Mutex buggered, "fix" it */
+	  memset (&q->mutex, 0, sizeof (q->mutex));
+	  clib_warning ("forcibly release main input queue mutex");
+
+	mutex_ok:
+	  am->vlib_rp = vlib_rp;
+	  while (unix_shared_memory_queue_sub (q,
+					       (u8 *) & old_msg,
+					       1 /* nowait */ )
+		 != -2 /* queue underflow */ )
+	    {
+	      vl_msg_api_free_nolock ((void *) old_msg);
+	      am->shmem_hdr->restart_reclaims++;
+	    }
+	  pthread_mutex_unlock (&vlib_rp->mutex);
+	  root_rp = svm_get_root_rp ();
+	  ASSERT (root_rp);
+	  /* Clean up the root region client list */
+	  pthread_mutex_lock (&root_rp->mutex);
+	  svm_client_scan_this_region_nolock (root_rp);
+	  pthread_mutex_unlock (&root_rp->mutex);
+	}
+      else
+	{
+	  pthread_mutex_unlock (&vlib_rp->mutex);
+	}
+      am->vlib_rp = vlib_rp;
+      vec_add1 (am->mapped_shmem_regions, vlib_rp);
+      return 0;
+    }
+  /* Clients simply have to wait... */
+  if (!is_vlib)
+    {
+      pthread_mutex_unlock (&vlib_rp->mutex);
+
+      /* Wait up to 100 seconds... */
+      for (i = 0; i < 10000; i++)
+	{
+	  ts.tv_sec = 0;
+	  ts.tv_nsec = 10000 * 1000;	/* 10 ms */
+	  while (nanosleep (&ts, &tsrem) < 0)
+	    ts = tsrem;
+	  if (vlib_rp->user_ctx)
+	    goto ready;
+	}
+      /* Clean up and leave... */
+      svm_region_unmap (vlib_rp);
+      clib_warning ("region init fail");
+      return (-2);
+
+    ready:
+      am->shmem_hdr = (void *) vlib_rp->user_ctx;
+      am->our_pid = getpid ();
+      am->vlib_rp = vlib_rp;
+      vec_add1 (am->mapped_shmem_regions, vlib_rp);
+      return 0;
+    }
+
+  /* Nope, it's our problem... */
+
+  oldheap = svm_push_data_heap (vlib_rp);
+
+  vec_validate (shmem_hdr, 0);
+  shmem_hdr->version = VL_SHM_VERSION;
+
+  /* vlib main input queue */
+  shmem_hdr->vl_input_queue =
+    unix_shared_memory_queue_init (1024, sizeof (uword), getpid (),
+				   am->vlib_signal);
+
+  /* Set up the msg ring allocator */
+#define _(sz,n)                                                 \
+    do {                                                        \
+        ring_alloc_t _rp;                                       \
+        _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \
+        _rp.size = (sz);                                        \
+        _rp.nitems = n;                                         \
+        _rp.hits = 0;                                           \
+        _rp.misses = 0;                                         \
+        vec_add1(shmem_hdr->vl_rings, _rp);                     \
+    } while (0);
+
+  foreach_vl_aring_size;
+#undef _
+
+#define _(sz,n)                                                 \
+    do {                                                        \
+        ring_alloc_t _rp;                                       \
+        _rp.rp = unix_shared_memory_queue_init ((n), (sz), 0, 0); \
+        _rp.size = (sz);                                        \
+        _rp.nitems = n;                                         \
+        _rp.hits = 0;                                           \
+        _rp.misses = 0;                                         \
+        vec_add1(shmem_hdr->client_rings, _rp);                 \
+    } while (0);
+
+  foreach_clnt_aring_size;
+#undef _
+
+  am->shmem_hdr = shmem_hdr;
+  am->vlib_rp = vlib_rp;
+  am->our_pid = getpid ();
+  if (is_vlib)
+    am->shmem_hdr->vl_pid = am->our_pid;
+
+  svm_pop_heap (oldheap);
+
+  /*
+   * After absolutely everything that a client might see is set up,
+   * declare the shmem region valid
+   */
+  vlib_rp->user_ctx = shmem_hdr;
+
+  pthread_mutex_unlock (&vlib_rp->mutex);
+  vec_add1 (am->mapped_shmem_regions, vlib_rp);
+  return 0;
+}
+
+void
+vl_register_mapped_shmem_region (svm_region_t * rp)
+{
+  api_main_t *am = &api_main;
+
+  vec_add1 (am->mapped_shmem_regions, rp);
+}
+
+void
+vl_unmap_shmem (void)
+{
+  svm_region_t *rp;
+  int i;
+  api_main_t *am = &api_main;
+
+  if (!svm_get_root_rp ())
+    return;
+
+  for (i = 0; i < vec_len (am->mapped_shmem_regions); i++)
+    {
+      rp = am->mapped_shmem_regions[i];
+      svm_region_unmap (rp);
+    }
+
+  vec_free (am->mapped_shmem_regions);
+  am->shmem_hdr = 0;
+
+  svm_region_exit ();
+  /* $$$ more careful cleanup, valgrind run... */
+  vec_free (am->msg_handlers);
+  vec_free (am->msg_endian_handlers);
+  vec_free (am->msg_print_handlers);
+}
+
+void
+vl_msg_api_send_shmem (unix_shared_memory_queue_t * q, u8 * elem)
+{
+  api_main_t *am = &api_main;
+  uword *trace = (uword *) elem;
+
+  if (am->tx_trace && am->tx_trace->enabled)
+    vl_msg_api_trace (am, am->tx_trace, (void *) trace[0]);
+
+  (void) unix_shared_memory_queue_add (q, elem, 0 /* nowait */ );
+}
+
+void
+vl_msg_api_send_shmem_nolock (unix_shared_memory_queue_t * q, u8 * elem)
+{
+  api_main_t *am = &api_main;
+  uword *trace = (uword *) elem;
+
+  if (am->tx_trace && am->tx_trace->enabled)
+    vl_msg_api_trace (am, am->tx_trace, (void *) trace[0]);
+
+  (void) unix_shared_memory_queue_add_nolock (q, elem);
+}
+
+static void
+vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
+{
+  serialize_main_t _sm, *sm = &_sm;
+  api_main_t *am = &api_main;
+  u8 *tblv;
+  u32 nmsgs;
+  int i;
+  u8 *name_and_crc;
+  u32 msg_index;
+
+  am->my_client_index = mp->index;
+  am->my_registration = (vl_api_registration_t *) (uword) mp->handle;
+
+  /* Clean out any previous hash table (unlikely) */
+  if (am->msg_index_by_name_and_crc)
+    {
+      int i;
+      u8 **keys = 0;
+      hash_pair_t *hp;
+      /* *INDENT-OFF* */
+      hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
+      ({
+        vec_add1 (keys, (u8 *) hp->key);
+      }));
+      /* *INDENT-ON* */
+      for (i = 0; i < vec_len (keys); i++)
+	vec_free (keys[i]);
+      vec_free (keys);
+    }
+
+  am->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword));
+
+  /* Recreate the vnet-side API message handler table */
+  tblv = (u8 *) mp->message_table;
+  serialize_open_vector (sm, tblv);
+  unserialize_integer (sm, &nmsgs, sizeof (u32));
+
+  for (i = 0; i < nmsgs; i++)
+    {
+      msg_index = unserialize_likely_small_unsigned_integer (sm);
+      unserialize_cstring (sm, (char **) &name_and_crc);
+      hash_set_mem (am->msg_index_by_name_and_crc, name_and_crc, msg_index);
+    }
+}
+
+u32
+vl_api_get_msg_index (u8 * name_and_crc)
+{
+  api_main_t *am = &api_main;
+  uword *p;
+
+  if (am->msg_index_by_name_and_crc)
+    {
+      p = hash_get_mem (am->msg_index_by_name_and_crc, name_and_crc);
+      if (p)
+	return p[0];
+    }
+  return ~0;
+}
+
+int
+vl_client_connect (char *name, int ctx_quota, int input_queue_size)
+{
+  svm_region_t *svm;
+  vl_api_memclnt_create_t *mp;
+  vl_api_memclnt_create_reply_t *rp;
+  unix_shared_memory_queue_t *vl_input_queue;
+  vl_shmem_hdr_t *shmem_hdr;
+  int rv = 0;
+  void *oldheap;
+  api_main_t *am = &api_main;
+
+  if (am->my_registration)
+    {
+      clib_warning ("client %s already connected...", name);
+      return -1;
+    }
+
+  if (am->vlib_rp == 0)
+    {
+      clib_warning ("am->vlib_rp NULL");
+      return -1;
+    }
+
+  svm = am->vlib_rp;
+  shmem_hdr = am->shmem_hdr;
+
+  if (shmem_hdr == 0 || shmem_hdr->vl_input_queue == 0)
+    {
+      clib_warning ("shmem_hdr / input queue NULL");
+      return -1;
+    }
+
+  pthread_mutex_lock (&svm->mutex);
+  oldheap = svm_push_data_heap (svm);
+  vl_input_queue =
+    unix_shared_memory_queue_init (input_queue_size, sizeof (uword),
+				   getpid (), 0);
+  pthread_mutex_unlock (&svm->mutex);
+  svm_pop_heap (oldheap);
+
+  am->my_client_index = ~0;
+  am->my_registration = 0;
+  am->vl_input_queue = vl_input_queue;
+
+  mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_create_t));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE);
+  mp->ctx_quota = ctx_quota;
+  mp->input_queue = (uword) vl_input_queue;
+  strncpy ((char *) mp->name, name, sizeof (mp->name) - 1);
+
+  vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp);
+
+  while (1)
+    {
+      int qstatus;
+      struct timespec ts, tsrem;
+      int i;
+
+      /* Wait up to 10 seconds */
+      for (i = 0; i < 1000; i++)
+	{
+	  qstatus = unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp,
+						  1 /* nowait */ );
+	  if (qstatus == 0)
+	    goto read_one_msg;
+	  ts.tv_sec = 0;
+	  ts.tv_nsec = 10000 * 1000;	/* 10 ms */
+	  while (nanosleep (&ts, &tsrem) < 0)
+	    ts = tsrem;
+	}
+      /* Timeout... */
+      clib_warning ("memclnt_create_reply timeout");
+      return -1;
+
+    read_one_msg:
+      if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_CREATE_REPLY)
+	{
+	  clib_warning ("unexpected reply: id %d", ntohs (rp->_vl_msg_id));
+	  continue;
+	}
+      rv = clib_net_to_host_u32 (rp->response);
+
+      vl_msg_api_handler ((void *) rp);
+      break;
+    }
+  return (rv);
+}
+
+static void
+vl_api_memclnt_delete_reply_t_handler (vl_api_memclnt_delete_reply_t * mp)
+{
+  void *oldheap;
+  api_main_t *am = &api_main;
+
+  pthread_mutex_lock (&am->vlib_rp->mutex);
+  oldheap = svm_push_data_heap (am->vlib_rp);
+  unix_shared_memory_queue_free (am->vl_input_queue);
+  pthread_mutex_unlock (&am->vlib_rp->mutex);
+  svm_pop_heap (oldheap);
+
+  am->my_client_index = ~0;
+  am->my_registration = 0;
+  am->vl_input_queue = 0;
+}
+
+void
+vl_client_disconnect (void)
+{
+  vl_api_memclnt_delete_t *mp;
+  vl_api_memclnt_delete_reply_t *rp;
+  unix_shared_memory_queue_t *vl_input_queue;
+  vl_shmem_hdr_t *shmem_hdr;
+  time_t begin;
+  api_main_t *am = &api_main;
+
+  ASSERT (am->vlib_rp);
+  shmem_hdr = am->shmem_hdr;
+  ASSERT (shmem_hdr && shmem_hdr->vl_input_queue);
+
+  vl_input_queue = am->vl_input_queue;
+
+  mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_delete_t));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE);
+  mp->index = am->my_client_index;
+  mp->handle = (uword) am->my_registration;
+
+  vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp);
+
+  /*
+   * Have to be careful here, in case the client is disconnecting
+   * because e.g. the vlib process died, or is unresponsive.
+   */
+
+  begin = time (0);
+  while (1)
+    {
+      time_t now;
+
+      now = time (0);
+
+      if (now >= (begin + 2))
+	{
+	  clib_warning ("peer unresponsive, give up");
+	  am->my_client_index = ~0;
+	  am->my_registration = 0;
+	  am->shmem_hdr = 0;
+	  break;
+	}
+      if (unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp, 1) < 0)
+	continue;
+
+      /* drain the queue */
+      if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY)
+	{
+	  vl_msg_api_handler ((void *) rp);
+	  continue;
+	}
+      vl_msg_api_handler ((void *) rp);
+      break;
+    }
+}
+
+static inline vl_api_registration_t *
+vl_api_client_index_to_registration_internal (u32 handle)
+{
+  vl_api_registration_t **regpp;
+  vl_api_registration_t *regp;
+  api_main_t *am = &api_main;
+  u32 index;
+
+  index = vl_msg_api_handle_get_index (handle);
+  if ((am->shmem_hdr->application_restarts & VL_API_EPOCH_MASK)
+      != vl_msg_api_handle_get_epoch (handle))
+    {
+      vl_msg_api_increment_missing_client_counter ();
+      return 0;
+    }
+
+  regpp = am->vl_clients + index;
+
+  if (pool_is_free (am->vl_clients, regpp))
+    {
+      vl_msg_api_increment_missing_client_counter ();
+      return 0;
+    }
+  regp = *regpp;
+  return (regp);
+}
+
+vl_api_registration_t *
+vl_api_client_index_to_registration (u32 index)
+{
+  return (vl_api_client_index_to_registration_internal (index));
+}
+
+unix_shared_memory_queue_t *
+vl_api_client_index_to_input_queue (u32 index)
+{
+  vl_api_registration_t *regp;
+  api_main_t *am = &api_main;
+
+  /* Special case: vlib trying to send itself a message */
+  if (index == (u32) ~ 0)
+    return (am->shmem_hdr->vl_input_queue);
+
+  regp = vl_api_client_index_to_registration_internal (index);
+  if (!regp)
+    return 0;
+  return (regp->vl_input_queue);
+}
+
+#define foreach_api_client_msg                  \
+_(MEMCLNT_CREATE_REPLY, memclnt_create_reply)   \
+_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply)
+
+int
+vl_client_api_map (char *region_name)
+{
+  int rv;
+
+  if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0)
+    {
+      return rv;
+    }
+
+#define _(N,n)                                                          \
+    vl_msg_api_set_handlers(VL_API_##N, 0 /* name */,                   \
+                           vl_api_##n##_t_handler,                      \
+                           0/* cleanup */, 0/* endian */, 0/* print */, \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_api_client_msg;
+#undef _
+  return 0;
+}
+
+void
+vl_client_api_unmap (void)
+{
+  vl_unmap_shmem ();
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
new file mode 100644
index 00000000..1d40bcb7
--- /dev/null
+++ b/src/vlibmemory/memory_vlib.c
@@ -0,0 +1,1346 @@
+/*
+ *------------------------------------------------------------------
+ * memory_vlib.c
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <pthread.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/byte_order.h>
+#include <vppinfra/elog.h>
+#include <stdarg.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#define TRACE_VLIB_MEMORY_QUEUE 0
+
+#include <vlibmemory/vl_memory_msg_enum.h>	/* enumerate all vlib messages */
+
+#define vl_typedefs		/* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_printfun
+
+static inline void *
+vl_api_memclnt_create_t_print (vl_api_memclnt_create_t * a, void *handle)
+{
+  vl_print (handle, "vl_api_memclnt_create_t:\n");
+  vl_print (handle, "name: %s\n", a->name);
+  vl_print (handle, "input_queue: 0x%wx\n", a->input_queue);
+  vl_print (handle, "context: %u\n", (unsigned) a->context);
+  vl_print (handle, "ctx_quota: %ld\n", (long) a->ctx_quota);
+  return handle;
+}
+
+static inline void *
+vl_api_memclnt_delete_t_print (vl_api_memclnt_delete_t * a, void *handle)
+{
+  vl_print (handle, "vl_api_memclnt_delete_t:\n");
+  vl_print (handle, "index: %u\n", (unsigned) a->index);
+  vl_print (handle, "handle: 0x%wx\n", a->handle);
+  return handle;
+}
+
+/* instantiate all the endian swap functions we know about */
+#define vl_endianfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_endianfun
+
+void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
+  __attribute__ ((weak));
+
+void
+vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
+{
+  static int count;
+
+  if (count++ < 5)
+    clib_warning ("need to link against -lvlibsocket, msg not sent!");
+}
+
+void
+vl_msg_api_send (vl_api_registration_t * rp, u8 * elem)
+{
+  if (PREDICT_FALSE (rp->registration_type > REGISTRATION_TYPE_SHMEM))
+    {
+      vl_socket_api_send (rp, elem);
+    }
+  else
+    {
+      vl_msg_api_send_shmem (rp->vl_input_queue, elem);
+    }
+}
+
+u8 *
+vl_api_serialize_message_table (api_main_t * am, u8 * vector)
+{
+  serialize_main_t _sm, *sm = &_sm;
+  hash_pair_t *hp;
+  u32 nmsg = hash_elts (am->msg_index_by_name_and_crc);
+
+  serialize_open_vector (sm, vector);
+
+  /* serialize the count */
+  serialize_integer (sm, nmsg, sizeof (u32));
+
+  hash_foreach_pair (hp, am->msg_index_by_name_and_crc, (
+							  {
+							  serialize_likely_small_unsigned_integer
+							  (sm, hp->value[0]);
+							  serialize_cstring
+							  (sm,
+							   (char *) hp->key);
+							  }));
+
+  return serialize_close_vector (sm);
+}
+
+/*
+ * vl_api_memclnt_create_t_handler
+ */
+void
+vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
+{
+  vl_api_registration_t **regpp;
+  vl_api_registration_t *regp;
+  vl_api_memclnt_create_reply_t *rp;
+  svm_region_t *svm;
+  unix_shared_memory_queue_t *q;
+  int rv = 0;
+  void *oldheap;
+  api_main_t *am = &api_main;
+  u8 *serialized_message_table = 0;
+
+  /*
+   * This is tortured. Maintain a vlib-address-space private
+   * pool of client registrations. We use the shared-memory virtual
+   * address of client structure as a handle, to allow direct
+   * manipulation of context quota vbls from the client library.
+   *
+   * This scheme causes trouble w/ API message trace replay, since
+   * some random VA from clib_mem_alloc() certainly won't
+   * occur in the Linux sim. The (very) few places
+   * that care need to use the pool index.
+   *
+   * Putting the registration object(s) into a pool in shared memory and
+   * using the pool index as a handle seems like a great idea.
+   * Unfortunately, each and every reference to that pool would need
+   * to be protected by a mutex:
+   *
+   *     Client                      VLIB
+   *     ------                      ----
+   *     convert pool index to
+   *     pointer.
+   *     <deschedule>
+   *                                 expand pool
+   *                                 <deschedule>
+   *     kaboom!
+   */
+
+  pool_get (am->vl_clients, regpp);
+
+  svm = am->vlib_rp;
+
+  if (am->serialized_message_table_in_shmem == 0)
+    serialized_message_table = vl_api_serialize_message_table (am, 0);
+
+  pthread_mutex_lock (&svm->mutex);
+  oldheap = svm_push_data_heap (svm);
+  *regpp = clib_mem_alloc (sizeof (vl_api_registration_t));
+
+  regp = *regpp;
+  memset (regp, 0, sizeof (*regp));
+  regp->registration_type = REGISTRATION_TYPE_SHMEM;
+  regp->vl_api_registration_pool_index = regpp - am->vl_clients;
+
+  q = regp->vl_input_queue = (unix_shared_memory_queue_t *) (uword)
+    mp->input_queue;
+
+  regp->name = format (0, "%s", mp->name);
+  vec_add1 (regp->name, 0);
+  if (serialized_message_table)
+    am->serialized_message_table_in_shmem =
+      vec_dup (serialized_message_table);
+
+  pthread_mutex_unlock (&svm->mutex);
+  svm_pop_heap (oldheap);
+
+  vec_free (serialized_message_table);
+
+  rp = vl_msg_api_alloc (sizeof (*rp));
+  rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE_REPLY);
+  rp->handle = (uword) regp;
+  rp->index = vl_msg_api_handle_from_index_and_epoch
+    (regp->vl_api_registration_pool_index,
+     am->shmem_hdr->application_restarts);
+  rp->context = mp->context;
+  rp->response = ntohl (rv);
+  rp->message_table = (u64) am->serialized_message_table_in_shmem;
+
+  vl_msg_api_send_shmem (q, (u8 *) & rp);
+}
+
+/* Application callback to clean up leftover registrations from this client */
+int vl_api_memclnt_delete_callback (u32 client_index) __attribute__ ((weak));
+
+int
+vl_api_memclnt_delete_callback (u32 client_index)
+{
+  return 0;
+}
+
+/*
+ * vl_api_memclnt_delete_t_handler
+ */
+void
+vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp)
+{
+  vl_api_registration_t **regpp;
+  vl_api_registration_t *regp;
+  vl_api_memclnt_delete_reply_t *rp;
+  svm_region_t *svm;
+  void *oldheap;
+  api_main_t *am = &api_main;
+  u32 handle, client_index, epoch;
+
+  handle = mp->index;
+
+  if (vl_api_memclnt_delete_callback (handle))
+    return;
+
+  epoch = vl_msg_api_handle_get_epoch (handle);
+  client_index = vl_msg_api_handle_get_index (handle);
+
+  if (epoch != (am->shmem_hdr->application_restarts & VL_API_EPOCH_MASK))
+    {
+      clib_warning
+	("Stale clnt delete index %d old epoch %d cur epoch %d",
+	 client_index, epoch,
+	 (am->shmem_hdr->application_restarts & VL_API_EPOCH_MASK));
+      return;
+    }
+
+  regpp = am->vl_clients + client_index;
+
+  if (!pool_is_free (am->vl_clients, regpp))
+    {
+      regp = *regpp;
+      svm = am->vlib_rp;
+
+      /* $$$ check the input queue for e.g. punted sf's */
+
+      rp = vl_msg_api_alloc (sizeof (*rp));
+      rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE_REPLY);
+      rp->handle = mp->handle;
+      rp->response = 1;
+
+      vl_msg_api_send_shmem (regp->vl_input_queue, (u8 *) & rp);
+
+      if (client_index != regp->vl_api_registration_pool_index)
+	{
+	  clib_warning ("mismatch client_index %d pool_index %d",
+			client_index, regp->vl_api_registration_pool_index);
+	  vl_msg_api_free (rp);
+	  return;
+	}
+
+      /* No dangling references, please */
+      *regpp = 0;
+
+      pool_put_index (am->vl_clients, regp->vl_api_registration_pool_index);
+
+      pthread_mutex_lock (&svm->mutex);
+      oldheap = svm_push_data_heap (svm);
+      /* Poison the old registration */
+      memset (regp, 0xF1, sizeof (*regp));
+      clib_mem_free (regp);
+      pthread_mutex_unlock (&svm->mutex);
+      svm_pop_heap (oldheap);
+    }
+  else
+    {
+      clib_warning ("unknown client ID %d", mp->index);
+    }
+}
+
+void
+vl_api_get_first_msg_id_t_handler (vl_api_get_first_msg_id_t * mp)
+{
+  vl_api_get_first_msg_id_reply_t *rmp;
+  unix_shared_memory_queue_t *q;
+  uword *p;
+  api_main_t *am = &api_main;
+  vl_api_msg_range_t *rp;
+  u8 name[64];
+  u16 first_msg_id = ~0;
+  int rv = -7;			/* VNET_API_ERROR_INVALID_VALUE */
+
+  q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (!q)
+    return;
+
+  if (am->msg_range_by_name == 0)
+    goto out;
+
+  strncpy ((char *) name, (char *) mp->name, ARRAY_LEN (name) - 1);
+
+  p = hash_get_mem (am->msg_range_by_name, name);
+  if (p == 0)
+    goto out;
+
+  rp = vec_elt_at_index (am->msg_ranges, p[0]);
+
+  first_msg_id = rp->first_msg_id;
+  rv = 0;
+
+out:
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_GET_FIRST_MSG_ID_REPLY);
+  rmp->context = mp->context;
+  rmp->retval = ntohl (rv);
+  rmp->first_msg_id = ntohs (first_msg_id);
+  vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+#define foreach_vlib_api_msg                    \
+_(MEMCLNT_CREATE, memclnt_create)               \
+_(MEMCLNT_DELETE, memclnt_delete)               \
+_(GET_FIRST_MSG_ID, get_first_msg_id)
+
+/*
+ * vl_api_init
+ */
+static int
+memory_api_init (char *region_name)
+{
+  int rv;
+  vl_msg_api_msg_config_t cfg;
+  vl_msg_api_msg_config_t *c = &cfg;
+
+  if ((rv = vl_map_shmem (region_name, 1 /* is_vlib */ )) < 0)
+    return rv;
+
+#define _(N,n) do {                                             \
+    c->id = VL_API_##N;                                         \
+    c->name = #n;                                               \
+    c->handler = vl_api_##n##_t_handler;                        \
+    c->cleanup = vl_noop_handler;                               \
+    c->endian = vl_api_##n##_t_endian;                          \
+    c->print = vl_api_##n##_t_print;                            \
+    c->size = sizeof(vl_api_##n##_t);                           \
+    c->traced = 1; /* trace, so these msgs print */             \
+    c->replay = 0; /* don't replay client create/delete msgs */ \
+    vl_msg_api_config(c);} while (0);
+
+  foreach_vlib_api_msg;
+#undef _
+
+  return 0;
+}
+
+#define foreach_histogram_bucket                \
+_(400)                                          \
+_(200)                                          \
+_(100)                                          \
+_(10)
+
+typedef enum
+{
+#define _(n) SLEEP_##n##_US,
+  foreach_histogram_bucket
+#undef _
+    SLEEP_N_BUCKETS,
+} histogram_index_t;
+
+static u64 vector_rate_histogram[SLEEP_N_BUCKETS];
+
+static void memclnt_queue_callback (vlib_main_t * vm);
+
+static uword
+memclnt_process (vlib_main_t * vm,
+		 vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+  uword mp;
+  vl_shmem_hdr_t *shm;
+  unix_shared_memory_queue_t *q;
+  clib_error_t *e;
+  int rv;
+  api_main_t *am = &api_main;
+  f64 dead_client_scan_time;
+  f64 sleep_time, start_time;
+  f64 vector_rate;
+
+  vlib_set_queue_signal_callback (vm, memclnt_queue_callback);
+
+  if ((rv = memory_api_init (am->region_name)) < 0)
+    {
+      clib_warning ("memory_api_init returned %d, wait for godot...", rv);
+      vlib_process_suspend (vm, 1e70);
+    }
+
+  shm = am->shmem_hdr;
+  ASSERT (shm);
+  q = shm->vl_input_queue;
+  ASSERT (q);
+
+  e = vlib_call_init_exit_functions
+    (vm, vm->api_init_function_registrations, 1 /* call_once */ );
+  if (e)
+    clib_error_report (e);
+
+  sleep_time = 20.0;
+  dead_client_scan_time = vlib_time_now (vm) + 20.0;
+
+  /* $$$ pay attention to frame size, control CPU usage */
+  while (1)
+    {
+      uword event_type __attribute__ ((unused));
+      i8 *headp;
+      int need_broadcast;
+
+      /*
+       * There's a reason for checking the queue before
+       * sleeping. If the vlib application crashes, it's entirely
+       * possible for a client to enqueue a connect request
+       * during the process restart interval.
+       *
+       * Unless some force of physics causes the new incarnation
+       * of the application to process the request, the client will
+       * sit and wait for Godot...
+       */
+      vector_rate = vlib_last_vector_length_per_node (vm);
+      start_time = vlib_time_now (vm);
+      while (1)
+	{
+	  pthread_mutex_lock (&q->mutex);
+	  if (q->cursize == 0)
+	    {
+	      vm->api_queue_nonempty = 0;
+	      pthread_mutex_unlock (&q->mutex);
+
+	      if (TRACE_VLIB_MEMORY_QUEUE)
+		{
+                  /* *INDENT-OFF* */
+                  ELOG_TYPE_DECLARE (e) =
+                    {
+                      .format = "q-underflow: len %d",
+                      .format_args = "i4",
+                    };
+                  /* *INDENT-ON* */
+		  struct
+		  {
+		    u32 len;
+		  } *ed;
+		  ed = ELOG_DATA (&vm->elog_main, e);
+		  ed->len = 0;
+		}
+	      sleep_time = 20.0;
+	      break;
+	    }
+
+	  headp = (i8 *) (q->data + sizeof (uword) * q->head);
+	  clib_memcpy (&mp, headp, sizeof (uword));
+
+	  q->head++;
+	  need_broadcast = (q->cursize == q->maxsize / 2);
+	  q->cursize--;
+
+	  if (PREDICT_FALSE (q->head == q->maxsize))
+	    q->head = 0;
+	  pthread_mutex_unlock (&q->mutex);
+	  if (need_broadcast)
+	    (void) pthread_cond_broadcast (&q->condvar);
+
+	  vl_msg_api_handler_with_vm_node (am, (void *) mp, vm, node);
+
+	  /* Allow no more than 10us without a pause */
+	  if (vlib_time_now (vm) > start_time + 10e-6)
+	    {
+	      int index = SLEEP_400_US;
+	      if (vector_rate > 40.0)
+		sleep_time = 400e-6;
+	      else if (vector_rate > 20.0)
+		{
+		  index = SLEEP_200_US;
+		  sleep_time = 200e-6;
+		}
+	      else if (vector_rate >= 1.0)
+		{
+		  index = SLEEP_100_US;
+		  sleep_time = 100e-6;
+		}
+	      else
+		{
+		  index = SLEEP_10_US;
+		  sleep_time = 10e-6;
+		}
+	      vector_rate_histogram[index] += 1;
+	      break;
+	    }
+	}
+
+      event_type = vlib_process_wait_for_event_or_clock (vm, sleep_time);
+      vm->queue_signal_pending = 0;
+      vlib_process_get_events (vm, 0 /* event_data */ );
+
+      if (vlib_time_now (vm) > dead_client_scan_time)
+	{
+	  vl_api_registration_t **regpp;
+	  vl_api_registration_t *regp;
+	  unix_shared_memory_queue_t *q;
+	  static u32 *dead_indices;
+	  static u32 *confused_indices;
+
+	  vec_reset_length (dead_indices);
+	  vec_reset_length (confused_indices);
+
+          /* *INDENT-OFF* */
+          pool_foreach (regpp, am->vl_clients,
+          ({
+            regp = *regpp;
+            if (regp)
+              {
+                q = regp->vl_input_queue;
+                if (kill (q->consumer_pid, 0) < 0)
+                  {
+                    vec_add1(dead_indices, regpp - am->vl_clients);
+                  }
+              }
+            else
+              {
+                clib_warning ("NULL client registration index %d",
+                              regpp - am->vl_clients);
+                vec_add1 (confused_indices, regpp - am->vl_clients);
+              }
+          }));
+          /* *INDENT-ON* */
+	  /* This should "never happen," but if it does, fix it... */
+	  if (PREDICT_FALSE (vec_len (confused_indices) > 0))
+	    {
+	      int i;
+	      for (i = 0; i < vec_len (confused_indices); i++)
+		{
+		  pool_put_index (am->vl_clients, confused_indices[i]);
+		}
+	    }
+
+	  if (PREDICT_FALSE (vec_len (dead_indices) > 0))
+	    {
+	      int i;
+	      svm_region_t *svm;
+	      void *oldheap;
+
+	      /* Allow the application to clean up its registrations */
+	      for (i = 0; i < vec_len (dead_indices); i++)
+		{
+		  regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]);
+		  if (regpp)
+		    {
+		      u32 handle;
+
+		      handle = vl_msg_api_handle_from_index_and_epoch
+			(dead_indices[i], shm->application_restarts);
+		      (void) vl_api_memclnt_delete_callback (handle);
+		    }
+		}
+
+	      svm = am->vlib_rp;
+	      pthread_mutex_lock (&svm->mutex);
+	      oldheap = svm_push_data_heap (svm);
+
+	      for (i = 0; i < vec_len (dead_indices); i++)
+		{
+		  regpp = pool_elt_at_index (am->vl_clients, dead_indices[i]);
+		  if (regpp)
+		    {
+		      /* Poison the old registration */
+		      memset (*regpp, 0xF3, sizeof (**regpp));
+		      clib_mem_free (*regpp);
+		      /* no dangling references, please */
+		      *regpp = 0;
+		    }
+		  else
+		    {
+		      svm_pop_heap (oldheap);
+		      clib_warning ("Duplicate free, client index %d",
+				    regpp - am->vl_clients);
+		      oldheap = svm_push_data_heap (svm);
+		    }
+		}
+
+	      svm_client_scan_this_region_nolock (am->vlib_rp);
+
+	      pthread_mutex_unlock (&svm->mutex);
+	      svm_pop_heap (oldheap);
+	      for (i = 0; i < vec_len (dead_indices); i++)
+		pool_put_index (am->vl_clients, dead_indices[i]);
+	    }
+
+	  dead_client_scan_time = vlib_time_now (vm) + 20.0;
+	}
+
+      if (TRACE_VLIB_MEMORY_QUEUE)
+	{
+          /* *INDENT-OFF* */
+          ELOG_TYPE_DECLARE (e) = {
+            .format = "q-awake: len %d",
+            .format_args = "i4",
+          };
+          /* *INDENT-ON* */
+	  struct
+	  {
+	    u32 len;
+	  } *ed;
+	  ed = ELOG_DATA (&vm->elog_main, e);
+	  ed->len = q->cursize;
+	}
+    }
+
+  return 0;
+}
+
+static clib_error_t *
+vl_api_show_histogram_command (vlib_main_t * vm,
+			       unformat_input_t * input,
+			       vlib_cli_command_t * cli_cmd)
+{
+  u64 total_counts = 0;
+  int i;
+
+  for (i = 0; i < SLEEP_N_BUCKETS; i++)
+    {
+      total_counts += vector_rate_histogram[i];
+    }
+
+  if (total_counts == 0)
+    {
+      vlib_cli_output (vm, "No control-plane activity.");
+      return 0;
+    }
+
+#define _(n)                                                    \
+    do {                                                        \
+        f64 percent;                                            \
+        percent = ((f64) vector_rate_histogram[SLEEP_##n##_US]) \
+            / (f64) total_counts;                               \
+        percent *= 100.0;                                       \
+        vlib_cli_output (vm, "Sleep %3d us: %llu, %.2f%%",n,    \
+                         vector_rate_histogram[SLEEP_##n##_US], \
+                         percent);                              \
+    } while (0);
+  foreach_histogram_bucket;
+#undef _
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_histogram_command, static) = {
+    .path = "show api histogram",
+    .short_help = "show api histogram",
+    .function = vl_api_show_histogram_command,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vl_api_clear_histogram_command (vlib_main_t * vm,
+				unformat_input_t * input,
+				vlib_cli_command_t * cli_cmd)
+{
+  int i;
+
+  for (i = 0; i < SLEEP_N_BUCKETS; i++)
+    vector_rate_histogram[i] = 0;
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) = {
+    .path = "clear api histogram",
+    .short_help = "clear api histogram",
+    .function = vl_api_clear_histogram_command,
+};
+/* *INDENT-ON* */
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (memclnt_node,static) = {
+    .function = memclnt_process,
+    .type = VLIB_NODE_TYPE_PROCESS,
+    .name = "api-rx-from-ring",
+    .state = VLIB_NODE_STATE_DISABLED,
+};
+/* *INDENT-ON* */
+
+static void
+memclnt_queue_callback (vlib_main_t * vm)
+{
+  static volatile int *cursizep;
+
+  if (PREDICT_FALSE (cursizep == 0))
+    {
+      api_main_t *am = &api_main;
+      vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+      unix_shared_memory_queue_t *q;
+
+      if (shmem_hdr == 0)
+	return;
+
+      q = shmem_hdr->vl_input_queue;
+      if (q == 0)
+	return;
+      cursizep = &q->cursize;
+    }
+
+  if (*cursizep >= 1)
+    {
+      vm->queue_signal_pending = 1;
+      vm->api_queue_nonempty = 1;
+      vlib_process_signal_event (vm, memclnt_node.index,
+				 /* event_type */ 0, /* event_data */ 0);
+    }
+}
+
+void
+vl_enable_disable_memory_api (vlib_main_t * vm, int enable)
+{
+  vlib_node_set_state (vm, memclnt_node.index,
+		       (enable
+			? VLIB_NODE_STATE_POLLING
+			: VLIB_NODE_STATE_DISABLED));
+}
+
+static uword
+api_rx_from_node (vlib_main_t * vm,
+		  vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  uword n_packets = frame->n_vectors;
+  uword n_left_from;
+  u32 *from;
+  static u8 *long_msg;
+
+  vec_validate (long_msg, 4095);
+  n_left_from = frame->n_vectors;
+  from = vlib_frame_args (frame);
+
+  while (n_left_from > 0)
+    {
+      u32 bi0;
+      vlib_buffer_t *b0;
+      void *msg;
+      uword msg_len;
+
+      bi0 = from[0];
+      b0 = vlib_get_buffer (vm, bi0);
+      from += 1;
+      n_left_from -= 1;
+
+      msg = b0->data + b0->current_data;
+      msg_len = b0->current_length;
+      if (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+	{
+	  ASSERT (long_msg != 0);
+	  _vec_len (long_msg) = 0;
+	  vec_add (long_msg, msg, msg_len);
+	  while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+	    {
+	      b0 = vlib_get_buffer (vm, b0->next_buffer);
+	      msg = b0->data + b0->current_data;
+	      msg_len = b0->current_length;
+	      vec_add (long_msg, msg, msg_len);
+	    }
+	  msg = long_msg;
+	}
+      vl_msg_api_handler_no_trace_no_free (msg);
+    }
+
+  /* Free what we've been given. */
+  vlib_buffer_free (vm, vlib_frame_args (frame), n_packets);
+
+  return n_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (api_rx_from_node_node,static) = {
+    .function = api_rx_from_node,
+    .type = VLIB_NODE_TYPE_INTERNAL,
+    .vector_size = 4,
+    .name = "api-rx-from-node",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+setup_memclnt_exit (vlib_main_t * vm)
+{
+  atexit (vl_unmap_shmem);
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (setup_memclnt_exit);
+
+
+static clib_error_t *
+vl_api_ring_command (vlib_main_t * vm,
+		     unformat_input_t * input, vlib_cli_command_t * cli_cmd)
+{
+  int i;
+  ring_alloc_t *ap;
+  vl_shmem_hdr_t *shmem_hdr;
+  api_main_t *am = &api_main;
+
+  shmem_hdr = am->shmem_hdr;
+
+  if (shmem_hdr == 0)
+    {
+      vlib_cli_output (vm, "Shared memory segment not initialized...\n");
+      return 0;
+    }
+
+  vlib_cli_output (vm, "%8s %8s %8s %8s %8s\n",
+		   "Owner", "Size", "Nitems", "Hits", "Misses");
+
+  ap = shmem_hdr->vl_rings;
+
+  for (i = 0; i < vec_len (shmem_hdr->vl_rings); i++)
+    {
+      vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n",
+		       "vlib", ap->size, ap->nitems, ap->hits, ap->misses);
+      ap++;
+    }
+
+  ap = shmem_hdr->client_rings;
+
+  for (i = 0; i < vec_len (shmem_hdr->client_rings); i++)
+    {
+      vlib_cli_output (vm, "%8s %8d %8d %8d %8d\n",
+		       "clnt", ap->size, ap->nitems, ap->hits, ap->misses);
+      ap++;
+    }
+
+  vlib_cli_output (vm, "%d ring miss fallback allocations\n",
+		   am->ring_misses);
+
+  vlib_cli_output (vm, "%d application restarts, %d reclaimed msgs\n",
+		   shmem_hdr->application_restarts,
+		   shmem_hdr->restart_reclaims);
+  return 0;
+}
+
+void dump_socket_clients (vlib_main_t * vm, api_main_t * am)
+  __attribute__ ((weak));
+
+void
+dump_socket_clients (vlib_main_t * vm, api_main_t * am)
+{
+}
+
+static clib_error_t *
+vl_api_client_command (vlib_main_t * vm,
+		       unformat_input_t * input, vlib_cli_command_t * cli_cmd)
+{
+  vl_api_registration_t **regpp, *regp;
+  unix_shared_memory_queue_t *q;
+  char *health;
+  api_main_t *am = &api_main;
+  u32 *confused_indices = 0;
+
+  if (!pool_elts (am->vl_clients))
+    goto socket_clients;
+  vlib_cli_output (vm, "Shared memory clients");
+  vlib_cli_output (vm, "%16s %8s %14s %18s %s",
+		   "Name", "PID", "Queue Length", "Queue VA", "Health");
+
+  /* *INDENT-OFF* */
+  pool_foreach (regpp, am->vl_clients,
+  ({
+    regp = *regpp;
+
+    if (regp)
+      {
+        q = regp->vl_input_queue;
+        if (kill (q->consumer_pid, 0) < 0)
+          {
+            health = "DEAD";
+          }
+        else
+          {
+            health = "alive";
+          }
+        vlib_cli_output (vm, "%16s %8d %14d 0x%016llx %s\n",
+                         regp->name, q->consumer_pid, q->cursize,
+                         q, health);
+      }
+    else
+      {
+        clib_warning ("NULL client registration index %d",
+                      regpp - am->vl_clients);
+        vec_add1 (confused_indices, regpp - am->vl_clients);
+      }
+  }));
+  /* *INDENT-ON* */
+
+  /* This should "never happen," but if it does, fix it... */
+  if (PREDICT_FALSE (vec_len (confused_indices) > 0))
+    {
+      int i;
+      for (i = 0; i < vec_len (confused_indices); i++)
+	{
+	  pool_put_index (am->vl_clients, confused_indices[i]);
+	}
+    }
+  vec_free (confused_indices);
+
+  if (am->missing_clients)
+    vlib_cli_output (vm, "%u messages with missing clients",
+		     am->missing_clients);
+socket_clients:
+  dump_socket_clients (vm, am);
+
+  return 0;
+}
+
+static clib_error_t *
+vl_api_status_command (vlib_main_t * vm,
+		       unformat_input_t * input, vlib_cli_command_t * cli_cmd)
+{
+  api_main_t *am = &api_main;
+
+  // check if rx_trace and tx_trace are not null pointers
+
+  if (am->rx_trace == 0)
+    {
+      vlib_cli_output (vm, "RX Trace disabled\n");
+    }
+  else
+    {
+      if (am->rx_trace->enabled == 0)
+	vlib_cli_output (vm, "RX Trace disabled\n");
+      else
+	vlib_cli_output (vm, "RX Trace enabled\n");
+    }
+
+  if (am->tx_trace == 0)
+    {
+      vlib_cli_output (vm, "TX Trace disabled\n");
+    }
+  else
+    {
+      if (am->tx_trace->enabled == 0)
+	vlib_cli_output (vm, "TX Trace disabled\n");
+      else
+	vlib_cli_output (vm, "TX Trace enabled\n");
+    }
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_command, static) = {
+    .path = "show api",
+    .short_help = "Show API information",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_ring_command, static) = {
+    .path = "show api ring-stats",
+    .short_help = "Message ring statistics",
+    .function = vl_api_ring_command,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_clients_command, static) = {
+    .path = "show api clients",
+    .short_help = "Client information",
+    .function = vl_api_client_command,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_status_command, static) = {
+    .path = "show api status",
+    .short_help = "Show API trace status",
+    .function = vl_api_status_command,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vl_api_message_table_command (vlib_main_t * vm,
+			      unformat_input_t * input,
+			      vlib_cli_command_t * cli_cmd)
+{
+  api_main_t *am = &api_main;
+  int i;
+  int verbose = 0;
+
+  if (unformat (input, "verbose"))
+    verbose = 1;
+
+
+  if (verbose == 0)
+    vlib_cli_output (vm, "%-4s %s", "ID", "Name");
+  else
+    vlib_cli_output (vm, "%-4s %-40s %6s %7s", "ID", "Name", "Bounce",
+		     "MP-safe");
+
+  for (i = 1; i < vec_len (am->msg_names); i++)
+    {
+      if (verbose == 0)
+	{
+	  vlib_cli_output (vm, "%-4d %s", i,
+			   am->msg_names[i] ? am->msg_names[i] :
+			   "  [no handler]");
+	}
+      else
+	{
+	  vlib_cli_output (vm, "%-4d %-40s %6d %7d", i,
+			   am->msg_names[i] ? am->msg_names[i] :
+			   "  [no handler]", am->message_bounce[i],
+			   am->is_mp_safe[i]);
+	}
+    }
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_message_table_command, static) = {
+    .path = "show api message-table",
+    .short_help = "Message Table",
+    .function = vl_api_message_table_command,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vl_api_trace_command (vlib_main_t * vm,
+		      unformat_input_t * input, vlib_cli_command_t * cli_cmd)
+{
+  u32 nitems = 1024;
+  vl_api_trace_which_t which = VL_API_TRACE_RX;
+  api_main_t *am = &api_main;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "rx nitems %u", &nitems) || unformat (input, "rx"))
+	goto configure;
+      else if (unformat (input, "tx nitems %u", &nitems)
+	       || unformat (input, "tx"))
+	{
+	  which = VL_API_TRACE_RX;
+	  goto configure;
+	}
+      else if (unformat (input, "on rx"))
+	{
+	  vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 1);
+	}
+      else if (unformat (input, "on tx"))
+	{
+	  vl_msg_api_trace_onoff (am, VL_API_TRACE_TX, 1);
+	}
+      else if (unformat (input, "on"))
+	{
+	  vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 1);
+	}
+      else if (unformat (input, "off"))
+	{
+	  vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 0);
+	  vl_msg_api_trace_onoff (am, VL_API_TRACE_TX, 0);
+	}
+      else if (unformat (input, "free"))
+	{
+	  vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 0);
+	  vl_msg_api_trace_onoff (am, VL_API_TRACE_TX, 0);
+	  vl_msg_api_trace_free (am, VL_API_TRACE_RX);
+	  vl_msg_api_trace_free (am, VL_API_TRACE_TX);
+	}
+      else if (unformat (input, "debug on"))
+	{
+	  am->msg_print_flag = 1;
+	}
+      else if (unformat (input, "debug off"))
+	{
+	  am->msg_print_flag = 0;
+	}
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+  return 0;
+
+configure:
+  if (vl_msg_api_trace_configure (am, which, nitems))
+    {
+      vlib_cli_output (vm, "warning: trace configure error (%d, %d)",
+		       which, nitems);
+    }
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (trace, static) = {
+    .path = "set api-trace",
+    .short_help = "API trace",
+    .function = vl_api_trace_command,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+vlibmemory_init (vlib_main_t * vm)
+{
+  api_main_t *am = &api_main;
+  svm_map_region_args_t _a, *a = &_a;
+
+  memset (a, 0, sizeof (*a));
+  a->root_path = am->root_path;
+  a->name = SVM_GLOBAL_REGION_NAME;
+  a->baseva = (am->global_baseva != 0) ?
+    am->global_baseva : SVM_GLOBAL_REGION_BASEVA;
+  a->size = (am->global_size != 0) ? am->global_size : SVM_GLOBAL_REGION_SIZE;
+  a->flags = SVM_FLAGS_NODATA;
+  a->uid = am->api_uid;
+  a->gid = am->api_gid;
+  a->pvt_heap_size =
+    (am->global_pvt_heap_size !=
+     0) ? am->global_pvt_heap_size : SVM_PVT_MHEAP_SIZE;
+
+  svm_region_init_args (a);
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (vlibmemory_init);
+
+void
+vl_set_memory_region_name (char *name)
+{
+  api_main_t *am = &api_main;
+
+  am->region_name = name;
+}
+
+static int
+range_compare (vl_api_msg_range_t * a0, vl_api_msg_range_t * a1)
+{
+  int len0, len1, clen;
+
+  len0 = vec_len (a0->name);
+  len1 = vec_len (a1->name);
+  clen = len0 < len1 ? len0 : len1;
+  return (strncmp ((char *) a0->name, (char *) a1->name, clen));
+}
+
+static u8 *
+format_api_msg_range (u8 * s, va_list * args)
+{
+  vl_api_msg_range_t *rp = va_arg (*args, vl_api_msg_range_t *);
+
+  if (rp == 0)
+    s = format (s, "%-20s%9s%9s", "Name", "First-ID", "Last-ID");
+  else
+    s = format (s, "%-20s%9d%9d", rp->name, rp->first_msg_id,
+		rp->last_msg_id);
+
+  return s;
+}
+
+static clib_error_t *
+vl_api_show_plugin_command (vlib_main_t * vm,
+			    unformat_input_t * input,
+			    vlib_cli_command_t * cli_cmd)
+{
+  api_main_t *am = &api_main;
+  vl_api_msg_range_t *rp = 0;
+  int i;
+
+  if (vec_len (am->msg_ranges) == 0)
+    {
+      vlib_cli_output (vm, "No plugin API message ranges configured...");
+      return 0;
+    }
+
+  rp = vec_dup (am->msg_ranges);
+
+  vec_sort_with_function (rp, range_compare);
+
+  vlib_cli_output (vm, "Plugin API message ID ranges...\n");
+  vlib_cli_output (vm, "%U", format_api_msg_range, 0 /* header */ );
+
+  for (i = 0; i < vec_len (rp); i++)
+    vlib_cli_output (vm, "%U", format_api_msg_range, rp + i);
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) = {
+    .path = "show api plugin",
+    .short_help = "show api plugin",
+    .function = vl_api_show_plugin_command,
+};
+/* *INDENT-ON* */
+
+static void
+vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp)
+{
+  vl_api_rpc_reply_t *rmp;
+  int (*fp) (void *);
+  i32 rv = 0;
+  vlib_main_t *vm = vlib_get_main ();
+
+  if (mp->function == 0)
+    {
+      rv = -1;
+      clib_warning ("rpc NULL function pointer");
+    }
+
+  else
+    {
+      if (mp->need_barrier_sync)
+	vlib_worker_thread_barrier_sync (vm);
+
+      fp = uword_to_pointer (mp->function, int (*)(void *));
+      rv = fp (mp->data);
+
+      if (mp->need_barrier_sync)
+	vlib_worker_thread_barrier_release (vm);
+    }
+
+  if (mp->send_reply)
+    {
+      unix_shared_memory_queue_t *q =
+	vl_api_client_index_to_input_queue (mp->client_index);
+      if (q)
+	{
+	  rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp));
+	  rmp->_vl_msg_id = ntohs (VL_API_RPC_REPLY);
+	  rmp->context = mp->context;
+	  rmp->retval = rv;
+	  vl_msg_api_send_shmem (q, (u8 *) & rmp);
+	}
+    }
+  if (mp->multicast)
+    {
+      clib_warning ("multicast not yet implemented...");
+    }
+}
+
+static void
+vl_api_rpc_reply_t_handler (vl_api_rpc_reply_t * mp)
+{
+  clib_warning ("unimplemented");
+}
+
+void
+vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
+{
+  vl_api_rpc_call_t *mp;
+  api_main_t *am = &api_main;
+  vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+  unix_shared_memory_queue_t *q;
+
+  /* Main thread: call the function directly */
+  if (os_get_cpu_number () == 0)
+    {
+      vlib_main_t *vm = vlib_get_main ();
+      void (*call_fp) (void *);
+
+      vlib_worker_thread_barrier_sync (vm);
+
+      call_fp = fp;
+      call_fp (data);
+
+      vlib_worker_thread_barrier_release (vm);
+      return;
+    }
+
+  /* Any other thread, actually do an RPC call... */
+  mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + data_length);
+
+  memset (mp, 0, sizeof (*mp));
+  clib_memcpy (mp->data, data, data_length);
+  mp->_vl_msg_id = ntohs (VL_API_RPC_CALL);
+  mp->function = pointer_to_uword (fp);
+  mp->need_barrier_sync = 1;
+
+  /*
+   * Use the "normal" control-plane mechanism for the main thread.
+   * Well, almost. if the main input queue is full, we cannot
+   * block. Otherwise, we can expect a barrier sync timeout.
+   */
+  q = shmem_hdr->vl_input_queue;
+
+  while (pthread_mutex_trylock (&q->mutex))
+    vlib_worker_thread_barrier_check ();
+
+  while (PREDICT_FALSE (unix_shared_memory_queue_is_full (q)))
+    {
+      pthread_mutex_unlock (&q->mutex);
+      vlib_worker_thread_barrier_check ();
+      while (pthread_mutex_trylock (&q->mutex))
+	vlib_worker_thread_barrier_check ();
+    }
+
+  vl_msg_api_send_shmem_nolock (q, (u8 *) & mp);
+
+  pthread_mutex_unlock (&q->mutex);
+}
+
+#define foreach_rpc_api_msg                     \
+_(RPC_CALL,rpc_call)                            \
+_(RPC_REPLY,rpc_reply)
+
+static clib_error_t *
+rpc_api_hookup (vlib_main_t * vm)
+{
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_noop_handler,			\
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 0 /* do not trace */);
+  foreach_rpc_api_msg;
+#undef _
+  return 0;
+}
+
+VLIB_API_INIT_FUNCTION (rpc_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/unix_shared_memory_queue.c b/src/vlibmemory/unix_shared_memory_queue.c
new file mode 100644
index 00000000..25d28910
--- /dev/null
+++ b/src/vlibmemory/unix_shared_memory_queue.c
@@ -0,0 +1,324 @@
+/*
+ *------------------------------------------------------------------
+ * unix_shared_memory_queue.c - unidirectional shared-memory queues
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/format.h>
+#include <vppinfra/cache.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <signal.h>
+
+/*
+ * unix_shared_memory_queue_init
+ *
+ * nels = number of elements on the queue
+ * elsize = element size, presumably 4 and cacheline-size will
+ *          be popular choices.
+ * coid  = consumer coid, from ChannelCreate
+ * pid   = consumer pid
+ * pulse_code  = pulse code consumer expects
+ * pulse_value = pulse value consumer expects
+ * consumer_prio = consumer's priority, so pulses won't change
+ *                 the consumer's priority.
+ *
+ * The idea is to call this function in the queue consumer,
+ * and e-mail the queue pointer to the producer(s).
+ *
+ * The spp process / main thread allocates one of these
+ * at startup; its main input queue. The spp main input queue
+ * has a pointer to it in the shared memory segment header.
+ *
+ * You probably want to be on an svm data heap before calling this
+ * function.
+ */
+unix_shared_memory_queue_t *
+unix_shared_memory_queue_init (int nels,
+			       int elsize,
+			       int consumer_pid,
+			       int signal_when_queue_non_empty)
+{
+  unix_shared_memory_queue_t *q;
+  pthread_mutexattr_t attr;
+  pthread_condattr_t cattr;
+
+  q = clib_mem_alloc_aligned (sizeof (unix_shared_memory_queue_t)
+			      + nels * elsize, CLIB_CACHE_LINE_BYTES);
+  memset (q, 0, sizeof (*q));
+
+  q->elsize = elsize;
+  q->maxsize = nels;
+  q->consumer_pid = consumer_pid;
+  q->signal_when_queue_non_empty = signal_when_queue_non_empty;
+
+  memset (&attr, 0, sizeof (attr));
+  memset (&cattr, 0, sizeof (attr));
+
+  if (pthread_mutexattr_init (&attr))
+    clib_unix_warning ("mutexattr_init");
+  if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
+    clib_unix_warning ("pthread_mutexattr_setpshared");
+  if (pthread_mutex_init (&q->mutex, &attr))
+    clib_unix_warning ("mutex_init");
+  if (pthread_mutexattr_destroy (&attr))
+    clib_unix_warning ("mutexattr_destroy");
+  if (pthread_condattr_init (&cattr))
+    clib_unix_warning ("condattr_init");
+  /* prints funny-looking messages in the Linux target */
+  if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
+    clib_unix_warning ("condattr_setpshared");
+  if (pthread_cond_init (&q->condvar, &cattr))
+    clib_unix_warning ("cond_init1");
+  if (pthread_condattr_destroy (&cattr))
+    clib_unix_warning ("cond_init2");
+
+  return (q);
+}
+
+/*
+ * unix_shared_memory_queue_free
+ */
+void
+unix_shared_memory_queue_free (unix_shared_memory_queue_t * q)
+{
+  (void) pthread_mutex_destroy (&q->mutex);
+  (void) pthread_cond_destroy (&q->condvar);
+  clib_mem_free (q);
+}
+
+void
+unix_shared_memory_queue_lock (unix_shared_memory_queue_t * q)
+{
+  pthread_mutex_lock (&q->mutex);
+}
+
+void
+unix_shared_memory_queue_unlock (unix_shared_memory_queue_t * q)
+{
+  pthread_mutex_unlock (&q->mutex);
+}
+
+int
+unix_shared_memory_queue_is_full (unix_shared_memory_queue_t * q)
+{
+  return q->cursize == q->maxsize;
+}
+
+/*
+ * unix_shared_memory_queue_add_nolock
+ */
+int
+unix_shared_memory_queue_add_nolock (unix_shared_memory_queue_t * q,
+				     u8 * elem)
+{
+  i8 *tailp;
+  int need_broadcast = 0;
+
+  if (PREDICT_FALSE (q->cursize == q->maxsize))
+    {
+      while (q->cursize == q->maxsize)
+	{
+	  (void) pthread_cond_wait (&q->condvar, &q->mutex);
+	}
+    }
+
+  tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+  clib_memcpy (tailp, elem, q->elsize);
+
+  q->tail++;
+  q->cursize++;
+
+  need_broadcast = (q->cursize == 1);
+
+  if (q->tail == q->maxsize)
+    q->tail = 0;
+
+  if (need_broadcast)
+    {
+      (void) pthread_cond_broadcast (&q->condvar);
+      if (q->signal_when_queue_non_empty)
+	kill (q->consumer_pid, q->signal_when_queue_non_empty);
+    }
+  return 0;
+}
+
+int
+unix_shared_memory_queue_add_raw (unix_shared_memory_queue_t * q, u8 * elem)
+{
+  i8 *tailp;
+
+  if (PREDICT_FALSE (q->cursize == q->maxsize))
+    {
+      while (q->cursize == q->maxsize)
+	;
+    }
+
+  tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+  clib_memcpy (tailp, elem, q->elsize);
+
+  q->tail++;
+  q->cursize++;
+
+  if (q->tail == q->maxsize)
+    q->tail = 0;
+  return 0;
+}
+
+
+/*
+ * unix_shared_memory_queue_add
+ */
+int
+unix_shared_memory_queue_add (unix_shared_memory_queue_t * q,
+			      u8 * elem, int nowait)
+{
+  i8 *tailp;
+  int need_broadcast = 0;
+
+  if (nowait)
+    {
+      /* zero on success */
+      if (pthread_mutex_trylock (&q->mutex))
+	{
+	  return (-1);
+	}
+    }
+  else
+    pthread_mutex_lock (&q->mutex);
+
+  if (PREDICT_FALSE (q->cursize == q->maxsize))
+    {
+      if (nowait)
+	{
+	  pthread_mutex_unlock (&q->mutex);
+	  return (-2);
+	}
+      while (q->cursize == q->maxsize)
+	{
+	  (void) pthread_cond_wait (&q->condvar, &q->mutex);
+	}
+    }
+
+  tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+  clib_memcpy (tailp, elem, q->elsize);
+
+  q->tail++;
+  q->cursize++;
+
+  need_broadcast = (q->cursize == 1);
+
+  if (q->tail == q->maxsize)
+    q->tail = 0;
+
+  if (need_broadcast)
+    {
+      (void) pthread_cond_broadcast (&q->condvar);
+      if (q->signal_when_queue_non_empty)
+	kill (q->consumer_pid, q->signal_when_queue_non_empty);
+    }
+  pthread_mutex_unlock (&q->mutex);
+
+  return 0;
+}
+
+/*
+ * unix_shared_memory_queue_sub
+ */
+int
+unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q,
+			      u8 * elem, int nowait)
+{
+  i8 *headp;
+  int need_broadcast = 0;
+
+  if (nowait)
+    {
+      /* zero on success */
+      if (pthread_mutex_trylock (&q->mutex))
+	{
+	  return (-1);
+	}
+    }
+  else
+    pthread_mutex_lock (&q->mutex);
+
+  if (PREDICT_FALSE (q->cursize == 0))
+    {
+      if (nowait)
+	{
+	  pthread_mutex_unlock (&q->mutex);
+	  return (-2);
+	}
+      while (q->cursize == 0)
+	{
+	  (void) pthread_cond_wait (&q->condvar, &q->mutex);
+	}
+    }
+
+  headp = (i8 *) (&q->data[0] + q->elsize * q->head);
+  clib_memcpy (elem, headp, q->elsize);
+
+  q->head++;
+  if (q->cursize == q->maxsize)
+    need_broadcast = 1;
+
+  q->cursize--;
+
+  if (q->head == q->maxsize)
+    q->head = 0;
+
+  if (need_broadcast)
+    (void) pthread_cond_broadcast (&q->condvar);
+
+  pthread_mutex_unlock (&q->mutex);
+
+  return 0;
+}
+
+int
+unix_shared_memory_queue_sub_raw (unix_shared_memory_queue_t * q, u8 * elem)
+{
+  i8 *headp;
+
+  if (PREDICT_FALSE (q->cursize == 0))
+    {
+      while (q->cursize == 0)
+	;
+    }
+
+  headp = (i8 *) (&q->data[0] + q->elsize * q->head);
+  clib_memcpy (elem, headp, q->elsize);
+
+  q->head++;
+  q->cursize--;
+
+  if (q->head == q->maxsize)
+    q->head = 0;
+  return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/unix_shared_memory_queue.h b/src/vlibmemory/unix_shared_memory_queue.h
new file mode 100644
index 00000000..f758f17c
--- /dev/null
+++ b/src/vlibmemory/unix_shared_memory_queue.h
@@ -0,0 +1,69 @@
+/*
+ *------------------------------------------------------------------
+ * unix_shared_memory_queue.h - shared-memory queues
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_unix_shared_memory_queue_h
+#define included_unix_shared_memory_queue_h
+
+#include <pthread.h>
+#include <vppinfra/mem.h>
+
+typedef struct _unix_shared_memory_queue
+{
+  pthread_mutex_t mutex;	/* 8 bytes */
+  pthread_cond_t condvar;	/* 8 bytes */
+  int head;
+  int tail;
+  int cursize;
+  int maxsize;
+  int elsize;
+  int consumer_pid;
+  int signal_when_queue_non_empty;
+  char data[0];
+} unix_shared_memory_queue_t;
+
+unix_shared_memory_queue_t *unix_shared_memory_queue_init (int nels,
+							   int elsize,
+							   int consumer_pid,
+							   int
+							   signal_when_queue_non_empty);
+void unix_shared_memory_queue_free (unix_shared_memory_queue_t * q);
+int unix_shared_memory_queue_add (unix_shared_memory_queue_t * q,
+				  u8 * elem, int nowait);
+int unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q,
+				  u8 * elem, int nowait);
+void unix_shared_memory_queue_lock (unix_shared_memory_queue_t * q);
+void unix_shared_memory_queue_unlock (unix_shared_memory_queue_t * q);
+int unix_shared_memory_queue_is_full (unix_shared_memory_queue_t * q);
+int unix_shared_memory_queue_add_nolock (unix_shared_memory_queue_t * q,
+					 u8 * elem);
+
+int unix_shared_memory_queue_sub_raw (unix_shared_memory_queue_t * q,
+				      u8 * elem);
+int unix_shared_memory_queue_add_raw (unix_shared_memory_queue_t * q,
+				      u8 * elem);
+
+#endif /* included_unix_shared_memory_queue_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/vl_memory_api_h.h b/src/vlibmemory/vl_memory_api_h.h
new file mode 100644
index 00000000..c1ae79b1
--- /dev/null
+++ b/src/vlibmemory/vl_memory_api_h.h
@@ -0,0 +1,32 @@
+/*
+ *------------------------------------------------------------------
+ * vl_memory_api_h.h - memory API headers, in a specific order.
+ *
+ * Copyright (c) 2009-2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+/*
+ * Add to the bottom of the #include list, or elves will steal your
+ * keyboard in the middle of the night!
+ */
+#include <vlibmemory/memclnt.api.h>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/vl_memory_msg_enum.h b/src/vlibmemory/vl_memory_msg_enum.h
new file mode 100644
index 00000000..974c0c21
--- /dev/null
+++ b/src/vlibmemory/vl_memory_msg_enum.h
@@ -0,0 +1,42 @@
+/*
+ *------------------------------------------------------------------
+ * vl_memory_msg_enum.h - Our view of how to number API messages
+ * Clients have their own view, which has to agree with ours.
+ *
+ * Copyright (c) 2009-2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __VL_MSG_ENUM_H__
+#define __VL_MSG_ENUM_H__
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum
+{
+  VL_ILLEGAL_MESSAGE_ID = 0,
+#include <vlibmemory/vl_memory_api_h.h>
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* __VL_MSG_ENUM_H__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
-- 
cgit 1.2.3-korg


From 0691d6e9ff4388dc85e150c40bbc877bb74c02b1 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Thu, 5 Jan 2017 10:08:52 -0500
Subject: Fix uninitialized stack local, VPP-581

Sporadically messes up the client message allocation ring, by setting
c->message_bounce[msg_id] non-zero. A day-1 bug, made blatantly
obvious by the python API language binding for no particular reason.

Manually cherry-picked from stable/1701 due to the recent tree
reorganization.

Change-Id: Ifa03c5487436cbe50a6204db48fd9ce4938e32bb
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlibapi/api_shared.c     | 2 ++
 src/vlibmemory/memory_vlib.c | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'src/vlibmemory')

diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index 6a04fac9..18067d1d 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -691,6 +691,8 @@ vl_msg_api_set_handlers (int id, char *name, void *handler, void *cleanup,
   vl_msg_api_msg_config_t cfg;
   vl_msg_api_msg_config_t *c = &cfg;
 
+  memset (c, 0, sizeof (*c));
+
   c->id = id;
   c->name = name;
   c->handler = handler;
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 1d40bcb7..69f35d72 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -347,6 +347,8 @@ memory_api_init (char *region_name)
   vl_msg_api_msg_config_t cfg;
   vl_msg_api_msg_config_t *c = &cfg;
 
+  memset (c, 0, sizeof (*c));
+
   if ((rv = vl_map_shmem (region_name, 1 /* is_vlib */ )) < 0)
     return rv;
 
@@ -360,6 +362,7 @@ memory_api_init (char *region_name)
     c->size = sizeof(vl_api_##n##_t);                           \
     c->traced = 1; /* trace, so these msgs print */             \
     c->replay = 0; /* don't replay client create/delete msgs */ \
+    c->message_bounce = 0; /* don't bounce this message */	\
     vl_msg_api_config(c);} while (0);
 
   foreach_vlib_api_msg;
-- 
cgit 1.2.3-korg


From 842b9c59cc21b3e2917aaa25069fb15addf976f1 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Mon, 9 Jan 2017 15:54:00 -0500
Subject: Self-service garbage collection for the API message allocator

Change-Id: Iadc08eede15fa5978e4010bbece0232aab8b0fee
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlibapi/api.h              |  3 ++-
 src/vlibmemory/api.h           |  3 +++
 src/vlibmemory/memory_shared.c | 21 +++++++++++++++++++++
 src/vlibmemory/memory_vlib.c   |  7 ++++---
 4 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index 970a0ee0..fcb101d7 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -124,6 +124,7 @@ typedef struct
   u8 *is_mp_safe;
   struct ring_alloc_ *arings;
   u32 ring_misses;
+  u32 garbage_collects;
   u32 missing_clients;
   vl_api_trace_t *rx_trace;
   vl_api_trace_t *tx_trace;
@@ -212,7 +213,7 @@ typedef struct msgbuf_
 {
   unix_shared_memory_queue_t *q;
   u32 data_len;
-  u32 pad;
+  u32 gc_mark_timestamp;
   u8 data[0];
 } msgbuf_t;
 
diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h
index 54a0a001..8e44c20d 100644
--- a/src/vlibmemory/api.h
+++ b/src/vlibmemory/api.h
@@ -86,6 +86,9 @@ typedef struct vl_shmem_hdr_
   /* Number of messages reclaimed during application restart */
   u32 restart_reclaims;
 
+  /* Number of garbage-collected messages */
+  u32 garbage_collects;
+
 } vl_shmem_hdr_t;
 
 #define VL_SHM_VERSION 2
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
index d8d32004..c41f32f7 100644
--- a/src/vlibmemory/memory_shared.c
+++ b/src/vlibmemory/memory_shared.c
@@ -95,12 +95,31 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null)
        */
       if (rv->q)
 	{
+	  u32 now = (u32) time (0);
+
+	  if (PREDICT_TRUE (rv->gc_mark_timestamp == 0))
+	    rv->gc_mark_timestamp = now;
+	  else
+	    {
+	      if (now - rv->gc_mark_timestamp > 10)
+		{
+		  if (CLIB_DEBUG > 0)
+		    clib_warning ("garbage collect pool %d ring %d index %d",
+				  pool, i, q->head);
+		  shmem_hdr->garbage_collects++;
+		  goto collected;
+		}
+	    }
+
+
 	  /* yes, loser; try next larger pool */
 	  ap[i].misses++;
 	  if (pool == 0)
 	    pthread_mutex_unlock (&q->mutex);
 	  continue;
 	}
+    collected:
+
       /* OK, we have a winner */
       ap[i].hits++;
       /*
@@ -108,6 +127,7 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null)
        * don't need to know the queue to free the item.
        */
       rv->q = q;
+      rv->gc_mark_timestamp = 0;
       q->head++;
       if (q->head == q->maxsize)
 	q->head = 0;
@@ -201,6 +221,7 @@ vl_msg_api_free (void *a)
   if (rv->q)
     {
       rv->q = 0;
+      rv->gc_mark_timestamp = 0;
       return;
     }
 
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 69f35d72..7d21c9dd 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -853,9 +853,10 @@ vl_api_ring_command (vlib_main_t * vm,
   vlib_cli_output (vm, "%d ring miss fallback allocations\n",
 		   am->ring_misses);
 
-  vlib_cli_output (vm, "%d application restarts, %d reclaimed msgs\n",
-		   shmem_hdr->application_restarts,
-		   shmem_hdr->restart_reclaims);
+  vlib_cli_output
+    (vm, "%d application restarts, %d reclaimed msgs, %d garbage collects\n",
+     shmem_hdr->application_restarts,
+     shmem_hdr->restart_reclaims, shmem_hdr->garbage_collects);
   return 0;
 }
 
-- 
cgit 1.2.3-korg


From 987e11dffecc4ec3f55ae3d2fa5d2cc40dcf91f0 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Mon, 27 Feb 2017 13:10:27 -0500
Subject: Trace plugin binary API message range allocation

Change-Id: I544a5d2906548607b69f999567b92f802fddddbb
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlibmemory/memclnt.api   | 15 ++++++++
 src/vlibmemory/memory_vlib.c | 82 +++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 89 insertions(+), 8 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api
index 2f654caf..0532d7b6 100644
--- a/src/vlibmemory/memclnt.api
+++ b/src/vlibmemory/memclnt.api
@@ -89,3 +89,18 @@ define get_first_msg_id_reply {
     i32 retval;
     u16 first_msg_id;
 };
+
+/*
+ * Trace the plugin message-id allocator
+ * so we stand a chance of dealing with different sets of plugins
+ * at api trace replay time
+ */
+
+manual_print define trace_plugin_msg_ids
+{
+    u32 client_index;
+    u32 context;
+    u8 plugin_name[128];
+    u16 first_msg_id;
+    u16 last_msg_id;
+};
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 7d21c9dd..231caa58 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -70,6 +70,17 @@ vl_api_memclnt_delete_t_print (vl_api_memclnt_delete_t * a, void *handle)
   return handle;
 }
 
+static inline void *
+vl_api_trace_plugin_msg_ids_t_print (vl_api_trace_plugin_msg_ids_t * a,
+				     void *handle)
+{
+  vl_print (handle, "vl_api_trace_plugin_msg_ids: %s first %u last %u\n",
+	    a->plugin_name,
+	    clib_host_to_net_u16 (a->first_msg_id),
+	    clib_host_to_net_u16 (a->last_msg_id));
+  return handle;
+}
+
 /* instantiate all the endian swap functions we know about */
 #define vl_endianfun
 #include <vlibmemory/vl_memory_api_h.h>
@@ -112,14 +123,13 @@ vl_api_serialize_message_table (api_main_t * am, u8 * vector)
   /* serialize the count */
   serialize_integer (sm, nmsg, sizeof (u32));
 
-  hash_foreach_pair (hp, am->msg_index_by_name_and_crc, (
-							  {
-							  serialize_likely_small_unsigned_integer
-							  (sm, hp->value[0]);
-							  serialize_cstring
-							  (sm,
-							   (char *) hp->key);
-							  }));
+  /* *INDENT-OFF* */
+  hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
+  ({
+    serialize_likely_small_unsigned_integer (sm, hp->value[0]);
+    serialize_cstring (sm, (char *) hp->key);
+  }));
+  /* *INDENT-ON* */
 
   return serialize_close_vector (sm);
 }
@@ -389,6 +399,31 @@ static u64 vector_rate_histogram[SLEEP_N_BUCKETS];
 
 static void memclnt_queue_callback (vlib_main_t * vm);
 
+/*
+ * Callback to send ourselves a plugin numbering-space trace msg
+ */
+static void
+send_one_plugin_msg_ids_msg (u8 * name, u16 first_msg_id, u16 last_msg_id)
+{
+  vl_api_trace_plugin_msg_ids_t *mp;
+  api_main_t *am = &api_main;
+  vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+  unix_shared_memory_queue_t *q;
+
+  mp = vl_msg_api_alloc_as_if_client (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_TRACE_PLUGIN_MSG_IDS);
+  strncpy ((char *) mp->plugin_name, (char *) name,
+	   sizeof (mp->plugin_name) - 1);
+  mp->first_msg_id = clib_host_to_net_u16 (first_msg_id);
+  mp->last_msg_id = clib_host_to_net_u16 (last_msg_id);
+
+  q = shmem_hdr->vl_input_queue;
+
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
 static uword
 memclnt_process (vlib_main_t * vm,
 		 vlib_node_runtime_t * node, vlib_frame_t * f)
@@ -402,6 +437,7 @@ memclnt_process (vlib_main_t * vm,
   f64 dead_client_scan_time;
   f64 sleep_time, start_time;
   f64 vector_rate;
+  int i;
 
   vlib_set_queue_signal_callback (vm, memclnt_queue_callback);
 
@@ -424,6 +460,16 @@ memclnt_process (vlib_main_t * vm,
   sleep_time = 20.0;
   dead_client_scan_time = vlib_time_now (vm) + 20.0;
 
+  /*
+   * Send plugin message range messages for each plugin we loaded
+   */
+  for (i = 0; i < vec_len (am->msg_ranges); i++)
+    {
+      vl_api_msg_range_t *rp = am->msg_ranges + i;
+      send_one_plugin_msg_ids_msg (rp->name, rp->first_msg_id,
+				   rp->last_msg_id);
+    }
+
   /* $$$ pay attention to frame size, control CPU usage */
   while (1)
     {
@@ -1320,10 +1366,20 @@ vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
   pthread_mutex_unlock (&q->mutex);
 }
 
+static void
+vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp)
+{
+  /* Do nothing. We just want to trace the message */
+}
+
+
 #define foreach_rpc_api_msg                     \
 _(RPC_CALL,rpc_call)                            \
 _(RPC_REPLY,rpc_reply)
 
+#define foreach_plugin_trace_msg		\
+_(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids)
+
 static clib_error_t *
 rpc_api_hookup (vlib_main_t * vm)
 {
@@ -1336,6 +1392,16 @@ rpc_api_hookup (vlib_main_t * vm)
                            sizeof(vl_api_##n##_t), 0 /* do not trace */);
   foreach_rpc_api_msg;
 #undef _
+
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_noop_handler,			\
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1 /* do trace */);
+  foreach_plugin_trace_msg;
+#undef _
   return 0;
 }
 
-- 
cgit 1.2.3-korg


From 68b0fb0c620c7451ef1a6380c43c39de6614db51 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Tue, 28 Feb 2017 15:15:56 -0500
Subject: VPP-598: tcp stack initial commit

Change-Id: I49e5ce0aae6e4ff634024387ceaf7dbc432a0351
Signed-off-by: Dave Barach <dave@barachs.net>
Signed-off-by: Florin Coras <fcoras@cisco.com>
---
 src/Makefile.am                                  |    1 +
 src/plugins/ioam/export-common/ioam_export.h     |    2 +-
 src/plugins/ioam/ipfixcollector/ipfixcollector.c |    2 +-
 src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c    |    2 +-
 src/plugins/snat/in2out.c                        |   26 +-
 src/plugins/snat/out2in.c                        |   24 +-
 src/scripts/vnet/tcp                             |   18 +-
 src/scripts/vnet/udp                             |   19 +
 src/scripts/vnet/uri/tcp-setup.sh                |   39 +
 src/scripts/vnet/uri/tcp_server                  |    4 +
 src/scripts/vnet/uri/udp                         |   19 +
 src/svm.am                                       |   10 +-
 src/svm/ssvm.c                                   |   16 +
 src/svm/ssvm.h                                   |   18 +-
 src/svm/svm_fifo.c                               |  568 ++++++
 src/svm/svm_fifo.h                               |  157 ++
 src/svm/svm_fifo_segment.c                       |  193 ++
 src/svm/svm_fifo_segment.h                       |   89 +
 src/svm/test_svm_fifo1.c                         |  361 ++++
 src/uri.am                                       |   22 +
 src/uri/uri_tcp_test.c                           |  916 +++++++++
 src/uri/uri_udp_test.c                           |  553 ++++++
 src/uri/uri_udp_test2.c                          |  954 +++++++++
 src/uri/uritest.c                                |  484 +++++
 src/vlib/buffer.c                                |    2 +-
 src/vlib/buffer.h                                |   68 +
 src/vlibmemory/unix_shared_memory_queue.c        |   12 +-
 src/vlibmemory/unix_shared_memory_queue.h        |    2 +-
 src/vnet.am                                      |   66 +-
 src/vnet/api_errno.h                             |   21 +-
 src/vnet/bfd/bfd_udp.c                           |    4 +-
 src/vnet/buffer.h                                |   10 +
 src/vnet/classify/vnet_classify.c                |    4 +-
 src/vnet/dhcp/dhcp_proxy.h                       |    2 +-
 src/vnet/flow/flow_report.h                      |    2 +-
 src/vnet/ip/ip.h                                 |    4 +-
 src/vnet/ip/ip4.h                                |   42 +-
 src/vnet/ip/ip4_forward.c                        |  173 +-
 src/vnet/ip/ip4_packet.h                         |   26 +-
 src/vnet/ip/ip6.h                                |   44 +-
 src/vnet/ip/ip6_packet.h                         |   26 +-
 src/vnet/ip/punt.c                               |    2 +-
 src/vnet/ip/tcp_packet.h                         |  141 --
 src/vnet/ip/udp.h                                |  315 ---
 src/vnet/ip/udp_error.def                        |   21 -
 src/vnet/ip/udp_format.c                         |   91 -
 src/vnet/ip/udp_init.c                           |   71 -
 src/vnet/ip/udp_local.c                          |  645 ------
 src/vnet/ip/udp_packet.h                         |   65 -
 src/vnet/ip/udp_pg.c                             |  237 ---
 src/vnet/ipsec/ikev2.c                           |    2 +-
 src/vnet/ipsec/ikev2_cli.c                       |    2 +-
 src/vnet/ipsec/ikev2_crypto.c                    |    2 +-
 src/vnet/lisp-cp/packets.c                       |   65 +-
 src/vnet/lisp-cp/packets.h                       |   45 -
 src/vnet/lisp-gpe/interface.c                    |    2 +-
 src/vnet/lisp-gpe/lisp_gpe.h                     |    4 +-
 src/vnet/lisp-gpe/lisp_gpe_adjacency.c           |    2 +
 src/vnet/session/application.c                   |  343 ++++
 src/vnet/session/application.h                   |  120 ++
 src/vnet/session/application_interface.c         |  459 +++++
 src/vnet/session/application_interface.h         |  136 ++
 src/vnet/session/hashes.c                        |   28 +
 src/vnet/session/node.c                          |  435 ++++
 src/vnet/session/session.api                     |  429 ++++
 src/vnet/session/session.c                       | 1286 ++++++++++++
 src/vnet/session/session.h                       |  380 ++++
 src/vnet/session/session_api.c                   |  821 ++++++++
 src/vnet/session/session_cli.c                   |  189 ++
 src/vnet/session/transport.c                     |   64 +
 src/vnet/session/transport.h                     |  250 +++
 src/vnet/tcp/tcp.c                               |  708 +++++++
 src/vnet/tcp/tcp.h                               |  624 ++++++
 src/vnet/tcp/tcp_error.def                       |   35 +
 src/vnet/tcp/tcp_format.c                        |  136 ++
 src/vnet/tcp/tcp_input.c                         | 2316 ++++++++++++++++++++++
 src/vnet/tcp/tcp_newreno.c                       |   93 +
 src/vnet/tcp/tcp_output.c                        | 1412 +++++++++++++
 src/vnet/tcp/tcp_packet.h                        |  184 ++
 src/vnet/tcp/tcp_pg.c                            |  236 +++
 src/vnet/tcp/tcp_syn_filter4.c                   |  542 +++++
 src/vnet/tcp/tcp_timer.h                         |   29 +
 src/vnet/udp/builtin_server.c                    |  239 +++
 src/vnet/udp/udp.c                               |  342 ++++
 src/vnet/udp/udp.h                               |  362 ++++
 src/vnet/udp/udp_error.def                       |   21 +
 src/vnet/udp/udp_format.c                        |   91 +
 src/vnet/udp/udp_input.c                         |  314 +++
 src/vnet/udp/udp_local.c                         |  666 +++++++
 src/vnet/udp/udp_packet.h                        |   65 +
 src/vnet/udp/udp_pg.c                            |  237 +++
 src/vnet/vnet_all_api_h.h                        |    1 +
 src/vnet/vxlan-gpe/vxlan_gpe.h                   |    2 +-
 src/vnet/vxlan/vxlan.h                           |    2 +-
 src/vpp/api/vpe.api                              |    1 +
 src/vppinfra.am                                  |    5 +
 src/vppinfra/bihash_16_8.h                       |  103 +
 src/vppinfra/bihash_48_8.h                       |  116 ++
 src/vppinfra/tw_timer_16t_1w_2048sl.c            |   26 +
 src/vppinfra/tw_timer_16t_1w_2048sl.h            |   46 +
 100 files changed, 18737 insertions(+), 1874 deletions(-)
 create mode 100644 src/scripts/vnet/udp
 create mode 100755 src/scripts/vnet/uri/tcp-setup.sh
 create mode 100644 src/scripts/vnet/uri/tcp_server
 create mode 100644 src/scripts/vnet/uri/udp
 create mode 100644 src/svm/svm_fifo.c
 create mode 100644 src/svm/svm_fifo.h
 create mode 100644 src/svm/svm_fifo_segment.c
 create mode 100644 src/svm/svm_fifo_segment.h
 create mode 100644 src/svm/test_svm_fifo1.c
 create mode 100644 src/uri.am
 create mode 100644 src/uri/uri_tcp_test.c
 create mode 100644 src/uri/uri_udp_test.c
 create mode 100644 src/uri/uri_udp_test2.c
 create mode 100644 src/uri/uritest.c
 delete mode 100644 src/vnet/ip/tcp_packet.h
 delete mode 100644 src/vnet/ip/udp.h
 delete mode 100644 src/vnet/ip/udp_error.def
 delete mode 100644 src/vnet/ip/udp_format.c
 delete mode 100644 src/vnet/ip/udp_init.c
 delete mode 100644 src/vnet/ip/udp_local.c
 delete mode 100644 src/vnet/ip/udp_packet.h
 delete mode 100644 src/vnet/ip/udp_pg.c
 create mode 100644 src/vnet/session/application.c
 create mode 100644 src/vnet/session/application.h
 create mode 100644 src/vnet/session/application_interface.c
 create mode 100644 src/vnet/session/application_interface.h
 create mode 100644 src/vnet/session/hashes.c
 create mode 100644 src/vnet/session/node.c
 create mode 100644 src/vnet/session/session.api
 create mode 100644 src/vnet/session/session.c
 create mode 100644 src/vnet/session/session.h
 create mode 100644 src/vnet/session/session_api.c
 create mode 100644 src/vnet/session/session_cli.c
 create mode 100644 src/vnet/session/transport.c
 create mode 100644 src/vnet/session/transport.h
 create mode 100644 src/vnet/tcp/tcp.c
 create mode 100644 src/vnet/tcp/tcp.h
 create mode 100644 src/vnet/tcp/tcp_error.def
 create mode 100644 src/vnet/tcp/tcp_format.c
 create mode 100644 src/vnet/tcp/tcp_input.c
 create mode 100644 src/vnet/tcp/tcp_newreno.c
 create mode 100644 src/vnet/tcp/tcp_output.c
 create mode 100644 src/vnet/tcp/tcp_packet.h
 create mode 100644 src/vnet/tcp/tcp_pg.c
 create mode 100644 src/vnet/tcp/tcp_syn_filter4.c
 create mode 100644 src/vnet/tcp/tcp_timer.h
 create mode 100644 src/vnet/udp/builtin_server.c
 create mode 100644 src/vnet/udp/udp.c
 create mode 100644 src/vnet/udp/udp.h
 create mode 100644 src/vnet/udp/udp_error.def
 create mode 100644 src/vnet/udp/udp_format.c
 create mode 100644 src/vnet/udp/udp_input.c
 create mode 100644 src/vnet/udp/udp_local.c
 create mode 100644 src/vnet/udp/udp_packet.h
 create mode 100644 src/vnet/udp/udp_pg.c
 create mode 100644 src/vppinfra/bihash_16_8.h
 create mode 100644 src/vppinfra/bihash_48_8.h
 create mode 100644 src/vppinfra/tw_timer_16t_1w_2048sl.c
 create mode 100644 src/vppinfra/tw_timer_16t_1w_2048sl.h

(limited to 'src/vlibmemory')

diff --git a/src/Makefile.am b/src/Makefile.am
index 08feb29a..641707ed 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -88,6 +88,7 @@ include vlib-api.am
 include vnet.am
 include vpp.am
 include vpp-api-test.am
+include uri.am
 
 SUBDIRS += plugins
 
diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h
index e84dab0b..dd48a93b 100644
--- a/src/plugins/ioam/export-common/ioam_export.h
+++ b/src/plugins/ioam/export-common/ioam_export.h
@@ -21,7 +21,7 @@
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/ip6_packet.h>
 #include <vnet/ip/ip6_hop_by_hop.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/flow/ipfix_packet.h>
 
 #include <vppinfra/pool.h>
diff --git a/src/plugins/ioam/ipfixcollector/ipfixcollector.c b/src/plugins/ioam/ipfixcollector/ipfixcollector.c
index 4ae47edc..71b934ec 100644
--- a/src/plugins/ioam/ipfixcollector/ipfixcollector.c
+++ b/src/plugins/ioam/ipfixcollector/ipfixcollector.c
@@ -15,7 +15,7 @@
 
 #include <vnet/ip/ip.h>
 #include <vnet/plugin/plugin.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <ioam/ipfixcollector/ipfixcollector.h>
 
 ipfix_collector_main_t ipfix_collector_main;
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
index b42c357c..f334c983 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
@@ -16,7 +16,7 @@
 #include <vppinfra/hash.h>
 #include <vnet/vnet.h>
 #include <vnet/ip/ip.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/vxlan-gpe/vxlan_gpe.h>
 #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c
index e30c913c..b4b7793d 100644
--- a/src/plugins/snat/in2out.c
+++ b/src/plugins/snat/in2out.c
@@ -689,12 +689,12 @@ snat_hairpinning (snat_main_t *sm,
                              ip4_header_t, dst_address);
       ip0->checksum = ip_csum_fold (sum0);
 
-      old_dst_port0 = tcp0->ports.dst;
+      old_dst_port0 = tcp0->dst;
       if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
         {
           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
             {
-              tcp0->ports.dst = new_dst_port0;
+              tcp0->dst = new_dst_port0;
               sum0 = tcp0->checksum;
               sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
                                      ip4_header_t, dst_address);
@@ -872,9 +872,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
 
           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
             {
-              old_port0 = tcp0->ports.src;
-              tcp0->ports.src = s0->out2in.port;
-              new_port0 = tcp0->ports.src;
+              old_port0 = tcp0->src_port;
+              tcp0->src_port = s0->out2in.port;
+              new_port0 = tcp0->src_port;
 
               sum0 = tcp0->checksum;
               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
@@ -1012,9 +1012,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
 
           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
             {
-              old_port1 = tcp1->ports.src;
-              tcp1->ports.src = s1->out2in.port;
-              new_port1 = tcp1->ports.src;
+              old_port1 = tcp1->src_port;
+              tcp1->src_port = s1->out2in.port;
+              new_port1 = tcp1->src_port;
 
               sum1 = tcp1->checksum;
               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
@@ -1188,9 +1188,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
 
           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
             {
-              old_port0 = tcp0->ports.src;
-              tcp0->ports.src = s0->out2in.port;
-              new_port0 = tcp0->ports.src;
+              old_port0 = tcp0->src_port;
+              tcp0->src_port = s0->out2in.port;
+              new_port0 = tcp0->src_port;
 
               sum0 = tcp0->checksum;
               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
@@ -1667,8 +1667,8 @@ snat_in2out_fast_static_map_fn (vlib_main_t * vm,
             {
               if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
                 {
-                  old_port0 = tcp0->ports.src;
-                  tcp0->ports.src = new_port0;
+                  old_port0 = tcp0->src_port;
+                  tcp0->src_port = new_port0;
 
                   sum0 = tcp0->checksum;
                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c
index 328f5ba4..3bfc0aa3 100644
--- a/src/plugins/snat/out2in.c
+++ b/src/plugins/snat/out2in.c
@@ -19,7 +19,7 @@
 #include <vnet/handoff.h>
 
 #include <vnet/ip/ip.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/fib/ip4_fib.h>
 #include <snat/snat.h>
@@ -602,9 +602,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
 
           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
             {
-              old_port0 = tcp0->ports.dst;
-              tcp0->ports.dst = s0->in2out.port;
-              new_port0 = tcp0->ports.dst;
+              old_port0 = tcp0->dst_port;
+              tcp0->dst_port = s0->in2out.port;
+              new_port0 = tcp0->dst_port;
 
               sum0 = tcp0->checksum;
               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
@@ -737,9 +737,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
 
           if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
             {
-              old_port1 = tcp1->ports.dst;
-              tcp1->ports.dst = s1->in2out.port;
-              new_port1 = tcp1->ports.dst;
+              old_port1 = tcp1->dst_port;
+              tcp1->dst_port = s1->in2out.port;
+              new_port1 = tcp1->dst_port;
 
               sum1 = tcp1->checksum;
               sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
@@ -907,9 +907,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
 
           if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
             {
-              old_port0 = tcp0->ports.dst;
-              tcp0->ports.dst = s0->in2out.port;
-              new_port0 = tcp0->ports.dst;
+              old_port0 = tcp0->dst_port;
+              tcp0->dst_port = s0->in2out.port;
+              new_port0 = tcp0->dst_port;
 
               sum0 = tcp0->checksum;
               sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
@@ -1369,8 +1369,8 @@ snat_out2in_fast_node_fn (vlib_main_t * vm,
             {
                if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
                 {
-                  old_port0 = tcp0->ports.dst;
-                  tcp0->ports.dst = new_port0;
+                  old_port0 = tcp0->dst_port;
+                  tcp0->dst_port = new_port0;
 
                   sum0 = tcp0->checksum;
                   sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
diff --git a/src/scripts/vnet/tcp b/src/scripts/vnet/tcp
index a2ee8b2d..b9c23c3a 100644
--- a/src/scripts/vnet/tcp
+++ b/src/scripts/vnet/tcp
@@ -1,16 +1,18 @@
+loop create
+set int ip address loop0 192.168.1.1/8
+set int state loop0 up
+
 packet-generator new {
   name x
-  limit 1
+  limit 2048
   node ip4-input
-  size 64-64
+  size 100-100
+  interface loop0
   no-recycle
   data {
-    TCP: 1.2.3.4 -> 5.6.7.8
-    TCP: 1234 -> 5678
+    TCP: 192.168.1.2 -> 192.168.1.1
+    TCP: 32415 -> 80
+    SYN
     incrementing 100
   }
 }
-
-tr add pg-input 100
-ip route 5.6.7.8/32 via local
-ip route 1.2.3.4/32 via local
diff --git a/src/scripts/vnet/udp b/src/scripts/vnet/udp
new file mode 100644
index 00000000..7dda1eec
--- /dev/null
+++ b/src/scripts/vnet/udp
@@ -0,0 +1,19 @@
+loop create
+set int ip address loop0 192.168.1.1/8
+set int state loop0 up
+
+packet-generator new {
+  name udp
+  limit 512
+  rate 1e4
+  node ip4-input
+  size 100-100
+  interface loop0
+  no-recycle
+  data {
+   UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1
+   UDP: 4321 -> 1234
+    length 72
+    incrementing 100
+  }
+}
diff --git a/src/scripts/vnet/uri/tcp-setup.sh b/src/scripts/vnet/uri/tcp-setup.sh
new file mode 100755
index 00000000..e0b01588
--- /dev/null
+++ b/src/scripts/vnet/uri/tcp-setup.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+function topo_setup
+{
+  ip netns add vppns1
+  ip link add veth_vpp1 type veth peer name vpp1
+  ip link set dev vpp1 up
+  ip link set dev veth_vpp1 up netns vppns1
+
+  ip netns exec vppns1                          \
+  bash -c "
+    ip link set dev lo up
+    ip addr add 6.0.1.2/24 dev veth_vpp1
+  "
+
+  ethtool --offload  vpp1 rx off tx off
+  ip netns exec vppns1 ethtool --offload veth_vpp1 rx off tx off
+
+}
+
+function topo_clean
+{
+  ip link del dev veth_vpp1 &> /dev/null
+  ip netns del vppns1 &> /dev/null
+}
+
+if [ "$1" == "clean" ] ; then
+  topo_clean
+    exit 0
+else
+  topo_setup
+fi
+
+# to test connectivity do:
+# sudo ip netns exec vppns1 telnet 6.0.1.1 1234
+# to push traffic to the server
+# dd if=/dev/zero bs=1024K count=512 | nc 6.0.1.1
+# to listen for incoming connection from vpp
+# nc -l 1234
diff --git a/src/scripts/vnet/uri/tcp_server b/src/scripts/vnet/uri/tcp_server
new file mode 100644
index 00000000..7f5a86de
--- /dev/null
+++ b/src/scripts/vnet/uri/tcp_server
@@ -0,0 +1,4 @@
+create host-interface name vpp1
+set int state host-vpp1 up
+set int ip address host-vpp1 6.0.1.1/24
+trace add af-packet-input 10
diff --git a/src/scripts/vnet/uri/udp b/src/scripts/vnet/uri/udp
new file mode 100644
index 00000000..ca13b83c
--- /dev/null
+++ b/src/scripts/vnet/uri/udp
@@ -0,0 +1,19 @@
+loop create
+set int ip address loop0 10.0.0.1/32
+set int state loop0 up
+
+packet-generator new {
+  name udp
+  limit 512
+  rate 1e4
+  node ip4-input
+  size 100-100
+  interface loop0
+  no-recycle
+  data {
+   UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1
+   UDP: 4321 -> 1234
+    length 72
+    incrementing 100
+  }
+}
diff --git a/src/svm.am b/src/svm.am
index 2cd385bd..442eba8e 100644
--- a/src/svm.am
+++ b/src/svm.am
@@ -13,13 +13,14 @@
 
 bin_PROGRAMS += svmtool svmdbtool
 
-nobase_include_HEADERS += svm/svm.h svm/ssvm.h svm/svmdb.h
+nobase_include_HEADERS += svm/svm.h svm/ssvm.h svm/svmdb.h \
+	svm/svm_fifo.h svm/svm_fifo_segment.h
 
 lib_LTLIBRARIES += libsvm.la libsvmdb.la
 
+libsvm_la_SOURCES = svm/svm.c svm/ssvm.c svm/svm_fifo.c svm/svm_fifo_segment.c
 libsvm_la_LIBADD = libvppinfra.la -lrt -lpthread
 libsvm_la_DEPENDENCIES = libvppinfra.la
-libsvm_la_SOURCES = svm/svm.c svm/ssvm.c
 
 svmtool_SOURCES = svm/svmtool.c
 svmtool_LDADD = libsvm.la libvppinfra.la -lpthread -lrt
@@ -31,4 +32,9 @@ libsvmdb_la_SOURCES = svm/svmdb.c
 svmdbtool_SOURCES = svm/svmdbtool.c
 svmdbtool_LDADD = libsvmdb.la libsvm.la libvppinfra.la -lpthread -lrt
 
+noinst_PROGRAMS += test_svm_fifo1
+test_svm_fifo1_SOURCES = svm/test_svm_fifo1.c
+test_svm_fifo1_LDADD = libsvm.la libvppinfra.la -lpthread -lrt
+test_svm_fifo1_LDFLAGS = -static
+
 # vi:syntax=automake
diff --git a/src/svm/ssvm.c b/src/svm/ssvm.c
index 6f409eb6..6cda1f27 100644
--- a/src/svm/ssvm.c
+++ b/src/svm/ssvm.c
@@ -169,6 +169,22 @@ re_map_it:
   return 0;
 }
 
+void
+ssvm_delete (ssvm_private_t * ssvm)
+{
+  u8 *fn;
+
+  fn = format (0, "/dev/shm/%s%c", ssvm->name, 0);
+
+  /* Throw away the backing file */
+  if (unlink ((char *) fn) < 0)
+    clib_unix_warning ("unlink segment '%s'", ssvm->name);
+
+  munmap ((void *) ssvm->requested_va, ssvm->ssvm_size);
+  vec_free (fn);
+}
+
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/svm/ssvm.h b/src/svm/ssvm.h
index 9e61b9a0..bccfc164 100644
--- a/src/svm/ssvm.h
+++ b/src/svm/ssvm.h
@@ -38,7 +38,10 @@
 #include <vppinfra/pool.h>
 #include <vppinfra/format.h>
 
-#define MMAP_PAGESIZE (4<<10)
+#ifndef MMAP_PAGESIZE
+#define MMAP_PAGESIZE (clib_mem_get_page_size())
+#endif
+
 #define SSVM_N_OPAQUE 7
 
 typedef struct
@@ -125,12 +128,12 @@ ssvm_pop_heap (void *oldheap)
 }
 
 #define foreach_ssvm_api_error                  \
-_(NO_NAME, "No shared segment name", -10)       \
-_(NO_SIZE, "Size not set (master)", -11)        \
-_(CREATE_FAILURE, "Create failed", -12)		\
-_(SET_SIZE, "Set size failed", -13)		\
-_(MMAP, "mmap failed", -14)			\
-_(SLAVE_TIMEOUT, "Slave map timeout", -15)
+_(NO_NAME, "No shared segment name", -100)      \
+_(NO_SIZE, "Size not set (master)", -101)       \
+_(CREATE_FAILURE, "Create failed", -102)        \
+_(SET_SIZE, "Set size failed", -103)		\
+_(MMAP, "mmap failed", -104)			\
+_(SLAVE_TIMEOUT, "Slave map timeout", -105)
 
 typedef enum
 {
@@ -143,6 +146,7 @@ typedef enum
 
 int ssvm_master_init (ssvm_private_t * ssvm, u32 master_index);
 int ssvm_slave_init (ssvm_private_t * ssvm, int timeout_in_seconds);
+void ssvm_delete (ssvm_private_t * ssvm);
 
 #endif /* __included_ssvm_h__ */
 
diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c
new file mode 100644
index 00000000..11f90193
--- /dev/null
+++ b/src/svm/svm_fifo.c
@@ -0,0 +1,568 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "svm_fifo.h"
+
+/** create an svm fifo, in the current heap. Fails vs blow up the process */
+svm_fifo_t *
+svm_fifo_create (u32 data_size_in_bytes)
+{
+  svm_fifo_t *f;
+  pthread_mutexattr_t attr;
+  pthread_condattr_t cattr;
+
+  f = clib_mem_alloc_aligned_or_null (sizeof (*f) + data_size_in_bytes,
+				      CLIB_CACHE_LINE_BYTES);
+  if (f == 0)
+    return 0;
+
+  memset (f, 0, sizeof (*f) + data_size_in_bytes);
+  f->nitems = data_size_in_bytes;
+  f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX;
+
+  memset (&attr, 0, sizeof (attr));
+  memset (&cattr, 0, sizeof (cattr));
+
+  if (pthread_mutexattr_init (&attr))
+    clib_unix_warning ("mutexattr_init");
+  if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
+    clib_unix_warning ("pthread_mutexattr_setpshared");
+  if (pthread_mutex_init (&f->mutex, &attr))
+    clib_unix_warning ("mutex_init");
+  if (pthread_mutexattr_destroy (&attr))
+    clib_unix_warning ("mutexattr_destroy");
+  if (pthread_condattr_init (&cattr))
+    clib_unix_warning ("condattr_init");
+  if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
+    clib_unix_warning ("condattr_setpshared");
+  if (pthread_cond_init (&f->condvar, &cattr))
+    clib_unix_warning ("cond_init1");
+  if (pthread_condattr_destroy (&cattr))
+    clib_unix_warning ("cond_init2");
+
+  return (f);
+}
+
+always_inline ooo_segment_t *
+ooo_segment_new (svm_fifo_t * f, u32 start, u32 length)
+{
+  ooo_segment_t *s;
+
+  pool_get (f->ooo_segments, s);
+
+  s->fifo_position = start;
+  s->length = length;
+
+  s->prev = s->next = OOO_SEGMENT_INVALID_INDEX;
+
+  return s;
+}
+
+always_inline void
+ooo_segment_del (svm_fifo_t * f, u32 index)
+{
+  ooo_segment_t *cur, *prev = 0, *next = 0;
+  cur = pool_elt_at_index (f->ooo_segments, index);
+
+  if (cur->next != OOO_SEGMENT_INVALID_INDEX)
+    {
+      next = pool_elt_at_index (f->ooo_segments, cur->next);
+      next->prev = cur->prev;
+    }
+
+  if (cur->prev != OOO_SEGMENT_INVALID_INDEX)
+    {
+      prev = pool_elt_at_index (f->ooo_segments, cur->prev);
+      prev->next = cur->next;
+    }
+  else
+    {
+      f->ooos_list_head = cur->next;
+    }
+
+  pool_put (f->ooo_segments, cur);
+}
+
+/**
+ * Add segment to fifo's out-of-order segment list. Takes care of merging
+ * adjacent segments and removing overlapping ones.
+ */
+static void
+ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
+{
+  ooo_segment_t *s, *new_s, *prev, *next, *it;
+  u32 new_index, position, end_offset, s_sof, s_eof, s_index;
+
+  position = (f->tail + offset) % f->nitems;
+  end_offset = offset + length;
+
+  if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX)
+    {
+      s = ooo_segment_new (f, position, length);
+      f->ooos_list_head = s - f->ooo_segments;
+      f->ooos_newest = f->ooos_list_head;
+      return;
+    }
+
+  /* Find first segment that starts after new segment */
+  s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
+  while (s->next != OOO_SEGMENT_INVALID_INDEX
+	 && ooo_segment_offset (f, s) <= offset)
+    s = pool_elt_at_index (f->ooo_segments, s->next);
+
+  s_index = s - f->ooo_segments;
+  s_sof = ooo_segment_offset (f, s);
+  s_eof = ooo_segment_end_offset (f, s);
+
+  /* No overlap, add before current segment */
+  if (end_offset < s_sof)
+    {
+      new_s = ooo_segment_new (f, position, length);
+      new_index = new_s - f->ooo_segments;
+
+      /* Pool might've moved, get segment again */
+      s = pool_elt_at_index (f->ooo_segments, s_index);
+
+      if (s->prev != OOO_SEGMENT_INVALID_INDEX)
+	{
+	  new_s->prev = s->prev;
+
+	  prev = pool_elt_at_index (f->ooo_segments, new_s->prev);
+	  prev->next = new_index;
+	}
+      else
+	{
+	  /* New head */
+	  f->ooos_list_head = new_index;
+	}
+
+      new_s->next = s - f->ooo_segments;
+      s->prev = new_index;
+      f->ooos_newest = new_index;
+      return;
+    }
+  /* No overlap, add after current segment */
+  else if (s_eof < offset)
+    {
+      new_s = ooo_segment_new (f, position, length);
+      new_index = new_s - f->ooo_segments;
+
+      /* Pool might've moved, get segment again */
+      s = pool_elt_at_index (f->ooo_segments, s_index);
+
+      if (s->next != OOO_SEGMENT_INVALID_INDEX)
+	{
+	  new_s->next = s->next;
+
+	  next = pool_elt_at_index (f->ooo_segments, new_s->next);
+	  next->prev = new_index;
+	}
+
+      new_s->prev = s - f->ooo_segments;
+      s->next = new_index;
+      f->ooos_newest = new_index;
+
+      return;
+    }
+
+  /*
+   * Merge needed
+   */
+
+  /* Merge at head */
+  if (offset <= s_sof)
+    {
+      /* If we have a previous, check if we overlap */
+      if (s->prev != OOO_SEGMENT_INVALID_INDEX)
+	{
+	  prev = pool_elt_at_index (f->ooo_segments, s->prev);
+
+	  /* New segment merges prev and current. Remove previous and
+	   * update position of current. */
+	  if (ooo_segment_end_offset (f, prev) >= offset)
+	    {
+	      s->fifo_position = prev->fifo_position;
+	      s->length = s_eof - ooo_segment_offset (f, prev);
+	      ooo_segment_del (f, s->prev);
+	    }
+	}
+      else
+	{
+	  s->fifo_position = position;
+	  s->length = s_eof - ooo_segment_offset (f, s);
+	}
+
+      /* The new segment's tail may cover multiple smaller ones */
+      if (s_eof < end_offset)
+	{
+	  /* Remove segments completely covered */
+	  it = (s->next != OOO_SEGMENT_INVALID_INDEX) ?
+	    pool_elt_at_index (f->ooo_segments, s->next) : 0;
+	  while (it && ooo_segment_end_offset (f, it) < end_offset)
+	    {
+	      next = (it->next != OOO_SEGMENT_INVALID_INDEX) ?
+		pool_elt_at_index (f->ooo_segments, it->next) : 0;
+	      ooo_segment_del (f, it - f->ooo_segments);
+	      it = next;
+	    }
+
+	  /* Update length. Segment's start might have changed. */
+	  s->length = end_offset - ooo_segment_offset (f, s);
+
+	  /* If partial overlap with last, merge */
+	  if (it && ooo_segment_offset (f, it) < end_offset)
+	    {
+	      s->length +=
+		it->length - (ooo_segment_offset (f, it) - end_offset);
+	      ooo_segment_del (f, it - f->ooo_segments);
+	    }
+	}
+    }
+  /* Last but overlapping previous */
+  else if (s_eof <= end_offset)
+    {
+      s->length = end_offset - ooo_segment_offset (f, s);
+    }
+  /* New segment completely covered by current one */
+  else
+    {
+      /* Do Nothing */
+    }
+
+  /* Most recently updated segment */
+  f->ooos_newest = s - f->ooo_segments;
+}
+
+/**
+ * Removes segments that can now be enqueued because the fifo's tail has
+ * advanced. Returns the number of bytes added to tail.
+ */
+static int
+ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued)
+{
+  ooo_segment_t *s;
+  u32 index, bytes = 0, diff;
+
+  s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
+
+  /* If last tail update overlaps one/multiple ooo segments, remove them */
+  diff = (f->nitems + f->tail - s->fifo_position) % f->nitems;
+  while (0 < diff && diff < n_bytes_enqueued)
+    {
+      /* Segment end is beyond the tail. Advance tail and be done */
+      if (diff < s->length)
+	{
+	  f->tail += s->length - diff;
+	  f->tail %= f->nitems;
+	  break;
+	}
+      /* If we have next go on */
+      else if (s->next != OOO_SEGMENT_INVALID_INDEX)
+	{
+	  index = s - f->ooo_segments;
+	  s = pool_elt_at_index (f->ooo_segments, s->next);
+	  diff = (f->nitems + f->tail - s->fifo_position) % f->nitems;
+	  ooo_segment_del (f, index);
+	}
+      /* End of search */
+      else
+	{
+	  break;
+	}
+    }
+
+  /* If tail is adjacent to an ooo segment, 'consume' it */
+  if (diff == 0)
+    {
+      bytes = ((f->nitems - f->cursize) >= s->length) ? s->length :
+	f->nitems - f->cursize;
+
+      f->tail += bytes;
+      f->tail %= f->nitems;
+
+      ooo_segment_del (f, s - f->ooo_segments);
+    }
+
+  return bytes;
+}
+
+static int
+svm_fifo_enqueue_internal (svm_fifo_t * f,
+			   int pid, u32 max_bytes, u8 * copy_from_here)
+{
+  u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
+  u32 cursize, nitems;
+
+  if (PREDICT_FALSE (f->cursize == f->nitems))
+    return -2;			/* fifo stuffed */
+
+  /* read cursize, which can only decrease while we're working */
+  cursize = f->cursize;
+  nitems = f->nitems;
+
+  /* Number of bytes we're going to copy */
+  total_copy_bytes = (nitems - cursize) < max_bytes ?
+    (nitems - cursize) : max_bytes;
+
+  if (PREDICT_TRUE (copy_from_here != 0))
+    {
+      /* Number of bytes in first copy segment */
+      first_copy_bytes = ((nitems - f->tail) < total_copy_bytes)
+	? (nitems - f->tail) : total_copy_bytes;
+
+      clib_memcpy (&f->data[f->tail], copy_from_here, first_copy_bytes);
+      f->tail += first_copy_bytes;
+      f->tail = (f->tail == nitems) ? 0 : f->tail;
+
+      /* Number of bytes in second copy segment, if any */
+      second_copy_bytes = total_copy_bytes - first_copy_bytes;
+      if (second_copy_bytes)
+	{
+	  clib_memcpy (&f->data[f->tail], copy_from_here + first_copy_bytes,
+		       second_copy_bytes);
+	  f->tail += second_copy_bytes;
+	  f->tail = (f->tail == nitems) ? 0 : f->tail;
+	}
+    }
+  else
+    {
+      /* Account for a zero-copy enqueue done elsewhere */
+      ASSERT (max_bytes <= (nitems - cursize));
+      f->tail += max_bytes;
+      f->tail = f->tail % nitems;
+      total_copy_bytes = max_bytes;
+    }
+
+  /* Any out-of-order segments to collect? */
+  if (PREDICT_FALSE (f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX))
+    total_copy_bytes += ooo_segment_try_collect (f, total_copy_bytes);
+
+  /* Atomically increase the queue length */
+  __sync_fetch_and_add (&f->cursize, total_copy_bytes);
+
+  return (total_copy_bytes);
+}
+
+int
+svm_fifo_enqueue_nowait (svm_fifo_t * f,
+			 int pid, u32 max_bytes, u8 * copy_from_here)
+{
+  return svm_fifo_enqueue_internal (f, pid, max_bytes, copy_from_here);
+}
+
+/** Enqueue a future segment.
+ * Two choices: either copies the entire segment, or copies nothing
+ * Returns 0 of the entire segment was copied
+ * Returns -1 if none of the segment was copied due to lack of space
+ */
+
+static int
+svm_fifo_enqueue_with_offset_internal2 (svm_fifo_t * f,
+					int pid,
+					u32 offset,
+					u32 required_bytes,
+					u8 * copy_from_here)
+{
+  u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
+  u32 cursize, nitems;
+  u32 tail_plus_offset;
+
+  ASSERT (offset > 0);
+
+  /* read cursize, which can only decrease while we're working */
+  cursize = f->cursize;
+  nitems = f->nitems;
+
+  /* Will this request fit? */
+  if ((required_bytes + offset) > (nitems - cursize))
+    return -1;
+
+  ooo_segment_add (f, offset, required_bytes);
+
+  /* Number of bytes we're going to copy */
+  total_copy_bytes = required_bytes;
+  tail_plus_offset = (f->tail + offset) % nitems;
+
+  /* Number of bytes in first copy segment */
+  first_copy_bytes = ((nitems - tail_plus_offset) < total_copy_bytes)
+    ? (nitems - tail_plus_offset) : total_copy_bytes;
+
+  clib_memcpy (&f->data[tail_plus_offset], copy_from_here, first_copy_bytes);
+
+  /* Number of bytes in second copy segment, if any */
+  second_copy_bytes = total_copy_bytes - first_copy_bytes;
+  if (second_copy_bytes)
+    {
+      tail_plus_offset += first_copy_bytes;
+      tail_plus_offset %= nitems;
+
+      ASSERT (tail_plus_offset == 0);
+
+      clib_memcpy (&f->data[tail_plus_offset],
+		   copy_from_here + first_copy_bytes, second_copy_bytes);
+    }
+
+  return (0);
+}
+
+
+int
+svm_fifo_enqueue_with_offset (svm_fifo_t * f,
+			      int pid,
+			      u32 offset,
+			      u32 required_bytes, u8 * copy_from_here)
+{
+  return svm_fifo_enqueue_with_offset_internal2
+    (f, pid, offset, required_bytes, copy_from_here);
+}
+
+
+static int
+svm_fifo_dequeue_internal2 (svm_fifo_t * f,
+			    int pid, u32 max_bytes, u8 * copy_here)
+{
+  u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
+  u32 cursize, nitems;
+
+  if (PREDICT_FALSE (f->cursize == 0))
+    return -2;			/* nothing in the fifo */
+
+  /* read cursize, which can only increase while we're working */
+  cursize = f->cursize;
+  nitems = f->nitems;
+
+  /* Number of bytes we're going to copy */
+  total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes;
+
+  if (PREDICT_TRUE (copy_here != 0))
+    {
+      /* Number of bytes in first copy segment */
+      first_copy_bytes = ((nitems - f->head) < total_copy_bytes)
+	? (nitems - f->head) : total_copy_bytes;
+      clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes);
+      f->head += first_copy_bytes;
+      f->head = (f->head == nitems) ? 0 : f->head;
+
+      /* Number of bytes in second copy segment, if any */
+      second_copy_bytes = total_copy_bytes - first_copy_bytes;
+      if (second_copy_bytes)
+	{
+	  clib_memcpy (copy_here + first_copy_bytes,
+		       &f->data[f->head], second_copy_bytes);
+	  f->head += second_copy_bytes;
+	  f->head = (f->head == nitems) ? 0 : f->head;
+	}
+    }
+  else
+    {
+      /* Account for a zero-copy dequeue done elsewhere */
+      ASSERT (max_bytes <= cursize);
+      f->head += max_bytes;
+      f->head = f->head % nitems;
+      cursize -= max_bytes;
+      total_copy_bytes = max_bytes;
+    }
+
+  __sync_fetch_and_sub (&f->cursize, total_copy_bytes);
+
+  return (total_copy_bytes);
+}
+
+int
+svm_fifo_dequeue_nowait (svm_fifo_t * f,
+			 int pid, u32 max_bytes, u8 * copy_here)
+{
+  return svm_fifo_dequeue_internal2 (f, pid, max_bytes, copy_here);
+}
+
+int
+svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes,
+	       u8 * copy_here)
+{
+  u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
+  u32 cursize, nitems;
+
+  if (PREDICT_FALSE (f->cursize == 0))
+    return -2;			/* nothing in the fifo */
+
+  /* read cursize, which can only increase while we're working */
+  cursize = f->cursize;
+  nitems = f->nitems;
+
+  /* Number of bytes we're going to copy */
+  total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes;
+
+  if (PREDICT_TRUE (copy_here != 0))
+    {
+      /* Number of bytes in first copy segment */
+      first_copy_bytes =
+	((nitems - f->head) < total_copy_bytes) ?
+	(nitems - f->head) : total_copy_bytes;
+      clib_memcpy (copy_here, &f->data[f->head], first_copy_bytes);
+
+      /* Number of bytes in second copy segment, if any */
+      second_copy_bytes = total_copy_bytes - first_copy_bytes;
+      if (second_copy_bytes)
+	{
+	  clib_memcpy (copy_here + first_copy_bytes, &f->data[0],
+		       second_copy_bytes);
+	}
+    }
+  return total_copy_bytes;
+}
+
+int
+svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes)
+{
+  u32 total_drop_bytes, first_drop_bytes, second_drop_bytes;
+  u32 cursize, nitems;
+
+  if (PREDICT_FALSE (f->cursize == 0))
+    return -2;			/* nothing in the fifo */
+
+  /* read cursize, which can only increase while we're working */
+  cursize = f->cursize;
+  nitems = f->nitems;
+
+  /* Number of bytes we're going to drop */
+  total_drop_bytes = (cursize < max_bytes) ? cursize : max_bytes;
+
+  /* Number of bytes in first copy segment */
+  first_drop_bytes =
+    ((nitems - f->head) < total_drop_bytes) ?
+    (nitems - f->head) : total_drop_bytes;
+  f->head += first_drop_bytes;
+  f->head = (f->head == nitems) ? 0 : f->head;
+
+  /* Number of bytes in second drop segment, if any */
+  second_drop_bytes = total_drop_bytes - first_drop_bytes;
+  if (second_drop_bytes)
+    {
+      f->head += second_drop_bytes;
+      f->head = (f->head == nitems) ? 0 : f->head;
+    }
+
+  __sync_fetch_and_sub (&f->cursize, total_drop_bytes);
+
+  return total_drop_bytes;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h
new file mode 100644
index 00000000..70624b74
--- /dev/null
+++ b/src/svm/svm_fifo.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ssvm_fifo_h__
+#define __included_ssvm_fifo_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <pthread.h>
+
+typedef enum
+{
+  SVM_FIFO_TAG_NOT_HELD = 0,
+  SVM_FIFO_TAG_DEQUEUE,
+  SVM_FIFO_TAG_ENQUEUE,
+} svm_lock_tag_t;
+
+/** Out-of-order segment */
+typedef struct
+{
+  u32 next;	/**< Next linked-list element pool index */
+  u32 prev;	/**< Previous linked-list element pool index */
+
+  u32 fifo_position;	/**< Start of segment, normalized*/
+  u32 length;		/**< Length of segment */
+} ooo_segment_t;
+
+#define OOO_SEGMENT_INVALID_INDEX ((u32)~0)
+
+typedef struct
+{
+  pthread_mutex_t mutex;	/* 8 bytes */
+  pthread_cond_t condvar;	/* 8 bytes */
+  u32 owner_pid;
+  svm_lock_tag_t tag;
+  volatile u32 cursize;
+  u32 nitems;
+
+  /* Backpointers */
+  u32 server_session_index;
+  u32 client_session_index;
+  u8 server_thread_index;
+  u8 client_thread_index;
+    CLIB_CACHE_LINE_ALIGN_MARK (end_shared);
+  u32 head;
+    CLIB_CACHE_LINE_ALIGN_MARK (end_consumer);
+
+  /* producer */
+  u32 tail;
+
+  ooo_segment_t *ooo_segments;	/**< Pool of ooo segments */
+  u32 ooos_list_head;		/**< Head of out-of-order linked-list */
+  u32 ooos_newest;		/**< Last segment to have been updated */
+
+    CLIB_CACHE_LINE_ALIGN_MARK (data);
+} svm_fifo_t;
+
+static inline int
+svm_fifo_lock (svm_fifo_t * f, u32 pid, u32 tag, int nowait)
+{
+  if (PREDICT_TRUE (nowait == 0))
+    pthread_mutex_lock (&f->mutex);
+  else
+    {
+      if (pthread_mutex_trylock (&f->mutex))
+	return -1;
+    }
+  f->owner_pid = pid;
+  f->tag = tag;
+  return 0;
+}
+
+static inline void
+svm_fifo_unlock (svm_fifo_t * f)
+{
+  f->owner_pid = 0;
+  f->tag = 0;
+  CLIB_MEMORY_BARRIER ();
+  pthread_mutex_unlock (&f->mutex);
+}
+
+static inline u32
+svm_fifo_max_dequeue (svm_fifo_t * f)
+{
+  return f->cursize;
+}
+
+static inline u32
+svm_fifo_max_enqueue (svm_fifo_t * f)
+{
+  return f->nitems - f->cursize;
+}
+
+static inline u8
+svm_fifo_has_ooo_data (svm_fifo_t * f)
+{
+  return f->ooos_list_head != OOO_SEGMENT_INVALID_INDEX;
+}
+
+svm_fifo_t *svm_fifo_create (u32 data_size_in_bytes);
+
+int svm_fifo_enqueue_nowait (svm_fifo_t * f, int pid, u32 max_bytes,
+			     u8 * copy_from_here);
+
+int svm_fifo_enqueue_with_offset (svm_fifo_t * f, int pid,
+				  u32 offset, u32 required_bytes,
+				  u8 * copy_from_here);
+
+int svm_fifo_dequeue_nowait (svm_fifo_t * f, int pid, u32 max_bytes,
+			     u8 * copy_here);
+
+int svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes,
+		   u8 * copy_here);
+int svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes);
+
+always_inline ooo_segment_t *
+svm_fifo_newest_ooo_segment (svm_fifo_t * f)
+{
+  return f->ooo_segments + f->ooos_newest;
+}
+
+always_inline u32
+ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s)
+{
+  return ((f->nitems + s->fifo_position - f->tail) % f->nitems);
+}
+
+always_inline u32
+ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s)
+{
+  return ((f->nitems + s->fifo_position + s->length - f->tail) % f->nitems);
+}
+
+#endif /* __included_ssvm_fifo_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c
new file mode 100644
index 00000000..acabb3bd
--- /dev/null
+++ b/src/svm/svm_fifo_segment.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <svm/svm_fifo_segment.h>
+
+svm_fifo_segment_main_t svm_fifo_segment_main;
+
+/** (master) create an svm fifo segment */
+int
+svm_fifo_segment_create (svm_fifo_segment_create_args_t * a)
+{
+  int rv;
+  svm_fifo_segment_private_t *s;
+  svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+  ssvm_shared_header_t *sh;
+  svm_fifo_segment_header_t *fsh;
+  void *oldheap;
+
+  /* Allocate a fresh segment */
+  pool_get (sm->segments, s);
+  memset (s, 0, sizeof (*s));
+
+  s->ssvm.ssvm_size = a->segment_size;
+  s->ssvm.i_am_master = 1;
+  s->ssvm.my_pid = getpid ();
+  s->ssvm.name = (u8 *) a->segment_name;
+  s->ssvm.requested_va = sm->next_baseva;
+
+  rv = ssvm_master_init (&s->ssvm, s - sm->segments);
+
+  if (rv)
+    {
+      _vec_len (s) = vec_len (s) - 1;
+      return (rv);
+    }
+
+  /* Note; requested_va updated due to seg base addr randomization */
+  sm->next_baseva = s->ssvm.requested_va + a->segment_size;
+
+  sh = s->ssvm.sh;
+  oldheap = ssvm_push_heap (sh);
+
+  /* Set up svm_fifo_segment shared header */
+  fsh = clib_mem_alloc (sizeof (*fsh));
+  memset (fsh, 0, sizeof (*fsh));
+  sh->opaque[0] = fsh;
+  s->h = fsh;
+  fsh->segment_name = format (0, "%s%c", a->segment_name, 0);
+
+  /* Avoid vec_add1(...) failure when adding a fifo, etc. */
+  vec_validate (fsh->fifos, 64);
+  _vec_len (fsh->fifos) = 0;
+
+  ssvm_pop_heap (oldheap);
+
+  sh->ready = 1;
+  a->new_segment_index = s - sm->segments;
+  return (0);
+}
+
+/** (slave) attach to an svm fifo segment */
+int
+svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a)
+{
+  int rv;
+  svm_fifo_segment_private_t *s;
+  svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+  ssvm_shared_header_t *sh;
+  svm_fifo_segment_header_t *fsh;
+
+  /* Allocate a fresh segment */
+  pool_get (sm->segments, s);
+
+  memset (s, 0, sizeof (*s));
+
+  s->ssvm.ssvm_size = a->segment_size;
+  s->ssvm.my_pid = getpid ();
+  s->ssvm.name = (u8 *) a->segment_name;
+  s->ssvm.requested_va = sm->next_baseva;
+
+  rv = ssvm_slave_init (&s->ssvm, sm->timeout_in_seconds);
+
+  if (rv)
+    {
+      _vec_len (s) = vec_len (s) - 1;
+      return (rv);
+    }
+
+  /* Fish the segment header */
+  sh = s->ssvm.sh;
+  fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+  s->h = fsh;
+
+  a->new_segment_index = s - sm->segments;
+  return (0);
+}
+
+void
+svm_fifo_segment_delete (svm_fifo_segment_private_t * s)
+{
+  svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+  ssvm_delete (&s->ssvm);
+  pool_put (sm->segments, s);
+}
+
+svm_fifo_t *
+svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
+			     u32 data_size_in_bytes)
+{
+  ssvm_shared_header_t *sh;
+  svm_fifo_segment_header_t *fsh;
+  svm_fifo_t *f;
+  void *oldheap;
+
+  sh = s->ssvm.sh;
+  fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+  oldheap = ssvm_push_heap (sh);
+
+  /* Note: this can fail, in which case: create another segment */
+  f = svm_fifo_create (data_size_in_bytes);
+  if (f == 0)
+    {
+      ssvm_pop_heap (oldheap);
+      return (0);
+    }
+
+  vec_add1 (fsh->fifos, f);
+
+  ssvm_pop_heap (oldheap);
+  return (f);
+}
+
+void
+svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f)
+{
+  ssvm_shared_header_t *sh;
+  svm_fifo_segment_header_t *fsh;
+  void *oldheap;
+  int i;
+
+  sh = s->ssvm.sh;
+  fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+  oldheap = ssvm_push_heap (sh);
+
+  for (i = 0; i < vec_len (fsh->fifos); i++)
+    {
+      if (fsh->fifos[i] == f)
+	{
+	  vec_delete (fsh->fifos, 1, i);
+	  goto found;
+	}
+    }
+  clib_warning ("fifo 0x%llx not found in fifo table...", f);
+
+found:
+  clib_mem_free (f);
+  ssvm_pop_heap (oldheap);
+}
+
+void
+svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds)
+{
+  svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+
+  sm->next_baseva = baseva;
+  sm->timeout_in_seconds = timeout_in_seconds;
+}
+
+u32
+svm_fifo_segment_index (svm_fifo_segment_private_t * s)
+{
+  return s - svm_fifo_segment_main.segments;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/svm_fifo_segment.h b/src/svm/svm_fifo_segment.h
new file mode 100644
index 00000000..793fa7c8
--- /dev/null
+++ b/src/svm/svm_fifo_segment.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ssvm_fifo_segment_h__
+#define __included_ssvm_fifo_segment_h__
+
+#include "svm_fifo.h"
+#include "ssvm.h"
+
+typedef struct
+{
+  volatile svm_fifo_t **fifos;
+  u8 *segment_name;
+} svm_fifo_segment_header_t;
+
+typedef struct
+{
+  ssvm_private_t ssvm;
+  svm_fifo_segment_header_t *h;
+} svm_fifo_segment_private_t;
+
+typedef struct
+{
+  /** pool of segments */
+  svm_fifo_segment_private_t *segments;
+  /* Where to put the next one */
+  u64 next_baseva;
+  u32 timeout_in_seconds;
+} svm_fifo_segment_main_t;
+
+extern svm_fifo_segment_main_t svm_fifo_segment_main;
+
+typedef struct
+{
+  char *segment_name;
+  u32 segment_size;
+  u32 new_segment_index;
+} svm_fifo_segment_create_args_t;
+
+static inline svm_fifo_segment_private_t *
+svm_fifo_get_segment (u32 segment_index)
+{
+  svm_fifo_segment_main_t *ssm = &svm_fifo_segment_main;
+  return vec_elt_at_index (ssm->segments, segment_index);
+}
+
+#define foreach_ssvm_fifo_segment_api_error             \
+_(OUT_OF_SPACE, "Out of space in segment", -200)
+
+typedef enum
+{
+#define _(n,s,c) SSVM_FIFO_SEGMENT_API_ERROR_##n = c,
+  foreach_ssvm_fifo_segment_api_error
+#undef _
+} ssvm_fifo_segment_api_error_enum_t;
+
+int svm_fifo_segment_create (svm_fifo_segment_create_args_t * a);
+int svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a);
+void svm_fifo_segment_delete (svm_fifo_segment_private_t * s);
+
+svm_fifo_t *svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
+					 u32 data_size_in_bytes);
+void svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s,
+				 svm_fifo_t * f);
+
+void svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds);
+
+u32 svm_fifo_segment_index (svm_fifo_segment_private_t * s);
+
+#endif /* __included_ssvm_fifo_segment_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/svm/test_svm_fifo1.c b/src/svm/test_svm_fifo1.c
new file mode 100644
index 00000000..355653df
--- /dev/null
+++ b/src/svm/test_svm_fifo1.c
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "svm_fifo_segment.h"
+
+clib_error_t *
+hello_world (int verbose)
+{
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  svm_fifo_segment_private_t *sp;
+  svm_fifo_t *f;
+  int rv;
+  u8 *test_data;
+  u8 *retrieved_data = 0;
+  clib_error_t *error = 0;
+  int pid = getpid ();
+
+  memset (a, 0, sizeof (*a));
+
+  a->segment_name = "fifo-test1";
+  a->segment_size = 256 << 10;
+
+  rv = svm_fifo_segment_create (a);
+
+  if (rv)
+    return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
+
+  sp = svm_fifo_get_segment (a->new_segment_index);
+
+  f = svm_fifo_segment_alloc_fifo (sp, 4096);
+
+  if (f == 0)
+    return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed");
+
+  test_data = format (0, "Hello world%c", 0);
+  vec_validate (retrieved_data, vec_len (test_data) - 1);
+
+  while (svm_fifo_max_enqueue (f) >= vec_len (test_data))
+    svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data);
+
+  while (svm_fifo_max_dequeue (f) >= vec_len (test_data))
+    svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data),
+			     retrieved_data);
+
+  while (svm_fifo_max_enqueue (f) >= vec_len (test_data))
+    svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data);
+
+  while (svm_fifo_max_dequeue (f) >= vec_len (test_data))
+    svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data),
+			     retrieved_data);
+
+  if (!memcmp (retrieved_data, test_data, vec_len (test_data)))
+    error = clib_error_return (0, "data test OK, got '%s'", retrieved_data);
+  else
+    error = clib_error_return (0, "data test FAIL!");
+
+  svm_fifo_segment_free_fifo (sp, f);
+
+  return error;
+}
+
+clib_error_t *
+master (int verbose)
+{
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  svm_fifo_segment_private_t *sp;
+  svm_fifo_t *f;
+  int rv;
+  u8 *test_data;
+  u8 *retrieved_data = 0;
+  int i;
+  int pid = getpid ();
+
+  memset (a, 0, sizeof (*a));
+
+  a->segment_name = "fifo-test1";
+  a->segment_size = 256 << 10;
+
+  rv = svm_fifo_segment_create (a);
+
+  if (rv)
+    return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
+
+  sp = svm_fifo_get_segment (a->new_segment_index);
+
+  f = svm_fifo_segment_alloc_fifo (sp, 4096);
+
+  if (f == 0)
+    return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed");
+
+  test_data = format (0, "Hello world%c", 0);
+  vec_validate (retrieved_data, vec_len (test_data) - 1);
+
+  for (i = 0; i < 1000; i++)
+    svm_fifo_enqueue_nowait (f, pid, vec_len (test_data), test_data);
+
+  return clib_error_return (0, "master (enqueue) done");
+}
+
+clib_error_t *
+mempig (int verbose)
+{
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  svm_fifo_segment_private_t *sp;
+  svm_fifo_t *f;
+  svm_fifo_t **flist = 0;
+  int rv;
+  int i;
+
+  memset (a, 0, sizeof (*a));
+
+  a->segment_name = "fifo-test1";
+  a->segment_size = 256 << 10;
+
+  rv = svm_fifo_segment_create (a);
+
+  if (rv)
+    return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
+
+  sp = svm_fifo_get_segment (a->new_segment_index);
+
+  for (i = 0; i < 1000; i++)
+    {
+      f = svm_fifo_segment_alloc_fifo (sp, 4096);
+      if (f == 0)
+	break;
+      vec_add1 (flist, f);
+    }
+
+  fformat (stdout, "Try #1: created %d fifos...\n", vec_len (flist));
+  for (i = 0; i < vec_len (flist); i++)
+    {
+      f = flist[i];
+      svm_fifo_segment_free_fifo (sp, f);
+    }
+
+  _vec_len (flist) = 0;
+
+  for (i = 0; i < 1000; i++)
+    {
+      f = svm_fifo_segment_alloc_fifo (sp, 4096);
+      if (f == 0)
+	break;
+      vec_add1 (flist, f);
+    }
+
+  fformat (stdout, "Try #2: created %d fifos...\n", vec_len (flist));
+  for (i = 0; i < vec_len (flist); i++)
+    {
+      f = flist[i];
+      svm_fifo_segment_free_fifo (sp, f);
+    }
+
+  return 0;
+}
+
+clib_error_t *
+offset (int verbose)
+{
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  svm_fifo_segment_private_t *sp;
+  svm_fifo_t *f;
+  int rv;
+  u32 *test_data = 0;
+  u32 *recovered_data = 0;
+  int i;
+  int pid = getpid ();
+
+  memset (a, 0, sizeof (*a));
+
+  a->segment_name = "fifo-test1";
+  a->segment_size = 256 << 10;
+
+  rv = svm_fifo_segment_create (a);
+
+  if (rv)
+    return clib_error_return (0, "svm_fifo_segment_create returned %d", rv);
+
+  sp = svm_fifo_get_segment (a->new_segment_index);
+
+  f = svm_fifo_segment_alloc_fifo (sp, 200 << 10);
+
+  if (f == 0)
+    return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed");
+
+  for (i = 0; i < (3 * 1024); i++)
+    vec_add1 (test_data, i);
+
+  /* Enqueue the first 1024 u32's */
+  svm_fifo_enqueue_nowait (f, pid, 4096 /* bytes to enqueue */ ,
+			   (u8 *) test_data);
+
+  /* Enqueue the third 1024 u32's 2048 ahead of the current tail */
+  svm_fifo_enqueue_with_offset (f, pid, 4096, 4096, (u8 *) & test_data[2048]);
+
+  /* Enqueue the second 1024 u32's at the current tail */
+  svm_fifo_enqueue_nowait (f, pid, 4096 /* bytes to enqueue */ ,
+			   (u8 *) & test_data[1024]);
+
+  vec_validate (recovered_data, (3 * 1024) - 1);
+
+  svm_fifo_dequeue_nowait (f, pid, 3 * 4096, (u8 *) recovered_data);
+
+  for (i = 0; i < (3 * 1024); i++)
+    {
+      if (recovered_data[i] != test_data[i])
+	{
+	  clib_warning ("[%d] expected %d recovered %d", i,
+			test_data[i], recovered_data[i]);
+	  return clib_error_return (0, "offset test FAILED");
+	}
+    }
+
+  return clib_error_return (0, "offset test OK");
+}
+
+clib_error_t *
+slave (int verbose)
+{
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  svm_fifo_segment_private_t *sp;
+  svm_fifo_segment_header_t *fsh;
+  svm_fifo_t *f;
+  ssvm_shared_header_t *sh;
+  int rv;
+  u8 *test_data;
+  u8 *retrieved_data = 0;
+  int pid = getpid ();
+  int i;
+
+  memset (a, 0, sizeof (*a));
+
+  a->segment_name = "fifo-test1";
+
+  rv = svm_fifo_segment_attach (a);
+
+  if (rv)
+    return clib_error_return (0, "svm_fifo_segment_attach returned %d", rv);
+
+  sp = svm_fifo_get_segment (a->new_segment_index);
+  sh = sp->ssvm.sh;
+  fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+
+  /* might wanna wait.. */
+  f = (svm_fifo_t *) fsh->fifos[0];
+
+  /* Lazy bastards united */
+  test_data = format (0, "Hello world%c", 0);
+  vec_validate (retrieved_data, vec_len (test_data) - 1);
+
+  for (i = 0; i < 1000; i++)
+    {
+      svm_fifo_dequeue_nowait (f, pid, vec_len (retrieved_data),
+			       retrieved_data);
+      if (memcmp (retrieved_data, test_data, vec_len (retrieved_data)))
+	return clib_error_return (0, "retrieved data incorrect, '%s'",
+				  retrieved_data);
+    }
+
+  return clib_error_return (0, "slave (dequeue) done");
+}
+
+
+int
+test_ssvm_fifo1 (unformat_input_t * input)
+{
+  clib_error_t *error = 0;
+  int verbose = 0;
+  int test_id = 0;
+
+  svm_fifo_segment_init (0x200000000ULL, 20);
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "verbose %d", &verbose))
+	;
+      else if (unformat (input, "verbose"))
+	verbose = 1;
+      else if (unformat (input, "master"))
+	test_id = 1;
+      else if (unformat (input, "slave"))
+	test_id = 2;
+      else if (unformat (input, "mempig"))
+	test_id = 3;
+      else if (unformat (input, "offset"))
+	test_id = 4;
+      else
+	{
+	  error = clib_error_create ("unknown input `%U'\n",
+				     format_unformat_error, input);
+	  goto out;
+	}
+    }
+
+  switch (test_id)
+    {
+    case 0:
+      error = hello_world (verbose);
+      break;
+
+    case 1:
+      error = master (verbose);
+      break;
+
+    case 2:
+      error = slave (verbose);
+      break;
+
+    case 3:
+      error = mempig (verbose);
+      break;
+
+    case 4:
+      error = offset (verbose);
+      break;
+
+    default:
+      error = clib_error_return (0, "test id %d unknown", test_id);
+      break;
+    }
+
+out:
+  if (error)
+    clib_error_report (error);
+
+  return 0;
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  unformat_input_t i;
+  int r;
+
+  unformat_init_command_line (&i, argv);
+  r = test_ssvm_fifo1 (&i);
+  unformat_free (&i);
+  return r;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri.am b/src/uri.am
new file mode 100644
index 00000000..8cdd77c6
--- /dev/null
+++ b/src/uri.am
@@ -0,0 +1,22 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+noinst_PROGRAMS += uri_udp_test2 uri_tcp_test
+
+uri_udp_test2_SOURCES = uri/uri_udp_test2.c			
+uri_udp_test2_LDADD = libvlibmemoryclient.la libvlibapi.la libsvm.la \
+	libvppinfra.la -lpthread -lm -lrt 
+
+uri_tcp_test_SOURCES = uri/uri_tcp_test.c
+uri_tcp_test_LDADD = libvlibmemoryclient.la libvlibapi.la libsvm.la \
+	libvppinfra.la -lpthread -lm -lrt 
diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c
new file mode 100644
index 00000000..ed5a37d8
--- /dev/null
+++ b/src/uri/uri_tcp_test.c
@@ -0,0 +1,916 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <svm/svm_fifo_segment.h>
+#include <vlibmemory/api.h>
+#include <vpp/api/vpe_msg_enum.h>
+
+#include "../vnet/session/application_interface.h"
+
+#define vl_typedefs             /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun            /* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+/* Satisfy external references when not linking with -lvlib */
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+
+typedef struct
+{
+  svm_fifo_t * server_rx_fifo;
+  svm_fifo_t * server_tx_fifo;
+
+  u32 vpp_session_index;
+  u32 vpp_session_thread;
+} session_t;
+
+typedef enum
+{
+  STATE_START,
+  STATE_READY,
+  STATE_DISCONNECTING,
+  STATE_FAILED
+} connection_state_t;
+
+typedef struct
+{
+  /* vpe input queue */
+  unix_shared_memory_queue_t *vl_input_queue;
+
+  /* API client handle */
+  u32 my_client_index;
+
+  /* The URI we're playing with */
+  u8 * uri;
+
+  /* Session pool */
+  session_t * sessions;
+
+  /* Hash table for disconnect processing */
+  uword * session_index_by_vpp_handles;
+
+  /* intermediate rx buffer */
+  u8 * rx_buf;
+
+  /* URI for slave's connect */
+  u8 * connect_uri;
+
+  u32 connected_session_index;
+
+  int i_am_master;
+
+  /* drop all packets */
+  int drop_packets;
+
+  /* Our event queue */
+  unix_shared_memory_queue_t * our_event_queue;
+
+  /* $$$ single thread only for the moment */
+  unix_shared_memory_queue_t * vpp_event_queue;
+
+  pid_t my_pid;
+
+  /* For deadman timers */
+  clib_time_t clib_time;
+
+  /* State of the connection, shared between msg RX thread and main thread */
+  volatile connection_state_t state;
+
+  /* Signal variables */
+  volatile int time_to_stop;
+  volatile int time_to_print_stats;
+
+  u32 configured_segment_size;
+
+  /* VNET_API_ERROR_FOO -> "Foo" hash table */
+  uword * error_string_by_error_number;
+
+  /* convenience */
+  svm_fifo_segment_main_t * segment_main;
+
+  u8 *connect_test_data;
+} uri_tcp_test_main_t;
+
+uri_tcp_test_main_t uri_tcp_test_main;
+
+#if CLIB_DEBUG > 0
+#define NITER 10000
+#else
+#define NITER 4000000
+#endif
+
+int
+wait_for_state_change (uri_tcp_test_main_t * utm, connection_state_t state)
+{
+#if CLIB_DEBUG > 0
+#define TIMEOUT 600.0
+#else
+#define TIMEOUT 600.0
+#endif
+
+  f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT;
+
+  while (clib_time_now (&utm->clib_time) < timeout)
+    {
+      if (utm->state == state)
+        return 0;
+      if (utm->state == STATE_FAILED)
+	return -1;
+    }
+  clib_warning ("timeout waiting for STATE_READY");
+  return -1;
+}
+
+static void
+init_error_string_table (uri_tcp_test_main_t * utm)
+{
+  utm->error_string_by_error_number = hash_create (0, sizeof (uword));
+
+#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s);
+  foreach_vnet_api_error;
+#undef _
+
+  hash_set (utm->error_string_by_error_number, 99, "Misc");
+}
+
+static void
+stop_signal (int signum)
+{
+  uri_tcp_test_main_t *um = &uri_tcp_test_main;
+
+  um->time_to_stop = 1;
+}
+
+static void
+stats_signal (int signum)
+{
+  uri_tcp_test_main_t *um = &uri_tcp_test_main;
+
+  um->time_to_print_stats = 1;
+}
+
+static clib_error_t *
+setup_signal_handlers (void)
+{
+  signal (SIGINT, stats_signal);
+  signal (SIGQUIT, stop_signal);
+  signal (SIGTERM, stop_signal);
+
+  return 0;
+}
+
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+  clib_warning ("BUG");
+}
+
+int
+connect_to_vpp (char *name)
+{
+  uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+  api_main_t *am = &api_main;
+
+  if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0)
+    return -1;
+
+  utm->vl_input_queue = am->shmem_hdr->vl_input_queue;
+  utm->my_client_index = am->my_client_index;
+
+  return 0;
+}
+
+static void
+vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t *mp)
+{
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  int rv;
+
+  a->segment_name = (char *) mp->segment_name;
+  a->segment_size = mp->segment_size;
+  /* Attach to the segment vpp created */
+  rv = svm_fifo_segment_attach (a);
+  if (rv)
+    {
+      clib_warning ("svm_fifo_segment_attach ('%s') failed",
+                    mp->segment_name);
+      return;
+    }
+  clib_warning ("Mapped new segment '%s' size %d", mp->segment_name,
+                mp->segment_size);
+}
+
+static void
+vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
+{
+  uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+  session_t * session;
+  vl_api_disconnect_session_reply_t * rmp;
+  uword * p;
+  int rv = 0;
+  u64 key;
+
+  key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index;
+
+  p = hash_get (utm->session_index_by_vpp_handles, key);
+
+  if (p)
+    {
+      session = pool_elt_at_index (utm->sessions, p[0]);
+      hash_unset (utm->session_index_by_vpp_handles, key);
+      pool_put (utm->sessions, session);
+    }
+  else
+    {
+      clib_warning ("couldn't find session key %llx", key);
+      rv = -11;
+    }
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+
+  rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY);
+  rmp->retval = rv;
+  rmp->session_index = mp->session_index;
+  rmp->session_thread_index = mp->session_thread_index;
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp);
+}
+
+static void
+vl_api_reset_session_t_handler (vl_api_reset_session_t * mp)
+{
+  uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+  session_t * session;
+  vl_api_reset_session_reply_t * rmp;
+  uword * p;
+  int rv = 0;
+  u64 key;
+
+  key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index;
+
+  p = hash_get(utm->session_index_by_vpp_handles, key);
+
+  if (p)
+    {
+      session = pool_elt_at_index(utm->sessions, p[0]);
+      hash_unset(utm->session_index_by_vpp_handles, key);
+      pool_put(utm->sessions, session);
+    }
+  else
+    {
+      clib_warning("couldn't find session key %llx", key);
+      rv = -11;
+    }
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY);
+  rmp->retval = rv;
+  rmp->session_index = mp->session_index;
+  rmp->session_thread_index = mp->session_thread_index;
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp);
+}
+
+void
+handle_fifo_event_connect_rx (uri_tcp_test_main_t *utm, session_fifo_event_t * e)
+{
+  svm_fifo_t * rx_fifo;
+  int n_read, bytes;
+
+  rx_fifo = e->fifo;
+
+  bytes = e->enqueue_length;
+  do
+    {
+      n_read = svm_fifo_dequeue_nowait (rx_fifo, 0, vec_len(utm->rx_buf),
+                                         utm->rx_buf);
+      if (n_read > 0)
+        bytes -= n_read;
+    }
+  while (n_read < 0 || bytes > 0);
+
+  //      bytes_to_read = svm_fifo_max_dequeue (rx_fifo);
+  //
+  //      bytes_to_read = vec_len(utm->rx_buf) > bytes_to_read ?
+  //        bytes_to_read : vec_len(utm->rx_buf);
+  //
+  //      buffer_offset = 0;
+  //      while (bytes_to_read > 0)
+  //        {
+  //          rv = svm_fifo_dequeue_nowait2 (rx_fifo, mypid,
+  //                                         bytes_to_read,
+  //                                         utm->rx_buf + buffer_offset);
+  //          if (rv > 0)
+  //            {
+  //              bytes_to_read -= rv;
+  //              buffer_offset += rv;
+  //              bytes_received += rv;
+  //            }
+  //        }
+
+
+  //  while (bytes_received < bytes_sent)
+  //    {
+  //      rv = svm_fifo_dequeue_nowait2 (rx_fifo, mypid,
+  //                                     vec_len (utm->rx_buf),
+  //                                     utm->rx_buf);
+  //      if (rv > 0)
+  //        {
+  //#if CLIB_DEBUG > 0
+  //          int j;
+  //          for (j = 0; j < rv; j++)
+  //            {
+  //              if (utm->rx_buf[j] != ((bytes_received + j) & 0xff))
+  //                {
+  //                  clib_warning ("error at byte %lld, 0x%x not 0x%x",
+  //                                bytes_received + j,
+  //                                utm->rx_buf[j],
+  //                                ((bytes_received + j )&0xff));
+  //                }
+  //            }
+  //#endif
+  //          bytes_received += (u64) rv;
+  //        }
+  //    }
+}
+
+void
+handle_connect_event_queue (uri_tcp_test_main_t * utm)
+{
+  session_fifo_event_t _e, *e = &_e;;
+
+  unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e, 0 /* nowait */);
+  switch (e->event_type)
+    {
+    case FIFO_EVENT_SERVER_RX:
+      handle_fifo_event_connect_rx (utm, e);
+      break;
+
+    case FIFO_EVENT_SERVER_EXIT:
+      return;
+
+    default:
+      clib_warning("unknown event type %d", e->event_type);
+      break;
+    }
+}
+
+void
+uri_tcp_connect_send (uri_tcp_test_main_t *utm)
+{
+  u8 *test_data = utm->connect_test_data;
+  u64 bytes_sent = 0;
+  int rv;
+  int mypid = getpid();
+  session_t * session;
+  svm_fifo_t *tx_fifo;
+  int buffer_offset, bytes_to_send = 0;
+  session_fifo_event_t evt;
+  static int serial_number = 0;
+  int i;
+  u32 max_chunk = 64 << 10, write;
+
+  session = pool_elt_at_index (utm->sessions, utm->connected_session_index);
+  tx_fifo = session->server_tx_fifo;
+
+  vec_validate (utm->rx_buf, vec_len (test_data) - 1);
+
+  for (i = 0; i < 10; i++)
+    {
+      bytes_to_send = vec_len (test_data);
+      buffer_offset = 0;
+      while (bytes_to_send > 0)
+        {
+          write = bytes_to_send > max_chunk ? max_chunk : bytes_to_send;
+          rv = svm_fifo_enqueue_nowait (tx_fifo, mypid, write,
+                                         test_data + buffer_offset);
+
+          if (rv > 0)
+            {
+              bytes_to_send -= rv;
+              buffer_offset += rv;
+              bytes_sent += rv;
+
+              /* Fabricate TX event, send to vpp */
+              evt.fifo = tx_fifo;
+              evt.event_type = FIFO_EVENT_SERVER_TX;
+              /* $$$$ for event logging */
+              evt.enqueue_length = rv;
+              evt.event_id = serial_number++;
+
+              unix_shared_memory_queue_add (utm->vpp_event_queue, (u8 *) &evt,
+                                            0 /* do wait for mutex */);
+            }
+        }
+    }
+}
+
+static void
+uri_tcp_client_test (uri_tcp_test_main_t * utm)
+{
+  vl_api_connect_uri_t * cmp;
+  vl_api_disconnect_session_t *dmp;
+  session_t *connected_session;
+  int i;
+
+  cmp = vl_msg_api_alloc (sizeof (*cmp));
+  memset (cmp, 0, sizeof (*cmp));
+
+  cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI);
+  cmp->client_index = utm->my_client_index;
+  cmp->context = ntohl(0xfeedface);
+  memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&cmp);
+
+  if (wait_for_state_change (utm, STATE_READY))
+    {
+      return;
+    }
+
+  /* Init test data */
+  vec_validate (utm->connect_test_data, 64 * 1024 - 1);
+  for (i = 0; i < vec_len (utm->connect_test_data); i++)
+    utm->connect_test_data[i] = i & 0xff;
+
+  /* Start reader thread */
+  /* handle_connect_event_queue (utm); */
+
+  /* Start send */
+  uri_tcp_connect_send (utm);
+
+  /* Disconnect */
+  connected_session = pool_elt_at_index(utm->sessions,
+					utm->connected_session_index);
+  dmp = vl_msg_api_alloc (sizeof (*dmp));
+  memset (dmp, 0, sizeof (*dmp));
+  dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
+  dmp->client_index = utm->my_client_index;
+  dmp->session_index = connected_session->vpp_session_index;
+  dmp->session_thread_index = connected_session->vpp_session_thread;
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&dmp);
+}
+
+void
+handle_fifo_event_server_rx (uri_tcp_test_main_t *utm, session_fifo_event_t * e)
+{
+  svm_fifo_t * rx_fifo, * tx_fifo;
+  int n_read;
+
+  session_fifo_event_t evt;
+  unix_shared_memory_queue_t *q;
+  int rv, bytes;
+
+  rx_fifo = e->fifo;
+  tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo;
+
+  bytes = e->enqueue_length;
+  do
+    {
+      n_read = svm_fifo_dequeue_nowait (rx_fifo, 0, vec_len(utm->rx_buf),
+                                         utm->rx_buf);
+
+      /* Reflect if a non-drop session */
+      if (!utm->drop_packets && n_read > 0)
+        {
+          do
+            {
+              rv = svm_fifo_enqueue_nowait (tx_fifo, 0, n_read, utm->rx_buf);
+            }
+          while (rv == -2);
+
+          /* Fabricate TX event, send to vpp */
+          evt.fifo = tx_fifo;
+          evt.event_type = FIFO_EVENT_SERVER_TX;
+          /* $$$$ for event logging */
+          evt.enqueue_length = n_read;
+          evt.event_id = e->event_id;
+          q = utm->vpp_event_queue;
+          unix_shared_memory_queue_add (q, (u8 *) &evt, 0 /* do wait for mutex */);
+        }
+
+      if (n_read > 0)
+        bytes -= n_read;
+    }
+  while (n_read < 0 || bytes > 0);
+}
+
+void
+handle_event_queue (uri_tcp_test_main_t * utm)
+{
+  session_fifo_event_t _e, *e = &_e;;
+
+  while (1)
+    {
+      unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *)e,
+                                    0 /* nowait */);
+      switch (e->event_type)
+        {
+        case FIFO_EVENT_SERVER_RX:
+          handle_fifo_event_server_rx (utm, e);
+          break;
+
+        case FIFO_EVENT_SERVER_EXIT:
+          return;
+
+        default:
+          clib_warning ("unknown event type %d", e->event_type);
+          break;
+        }
+      if (PREDICT_FALSE(utm->time_to_stop == 1))
+        break;
+      if (PREDICT_FALSE(utm->time_to_print_stats == 1))
+        {
+          utm->time_to_print_stats = 0;
+          fformat(stdout, "%d connections\n", pool_elts (utm->sessions));
+        }
+    }
+}
+
+static void
+vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp)
+{
+  uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  int rv;
+
+  if (mp->retval)
+    {
+      clib_warning("bind failed: %d", mp->retval);
+      return;
+    }
+
+  if (mp->segment_name_length == 0)
+    {
+      clib_warning("segment_name_length zero");
+      return;
+    }
+
+  a->segment_name = (char *) mp->segment_name;
+  a->segment_size = mp->segment_size;
+
+  ASSERT(mp->server_event_queue_address);
+
+  /* Attach to the segment vpp created */
+  rv = svm_fifo_segment_attach (a);
+  if (rv)
+    {
+      clib_warning("svm_fifo_segment_attach ('%s') failed", mp->segment_name);
+      return;
+    }
+
+  utm->our_event_queue =
+      (unix_shared_memory_queue_t *) mp->server_event_queue_address;
+
+  utm->state = STATE_READY;
+}
+
+static void
+vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
+{
+  uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  session_t *session;
+  u32 session_index;
+  svm_fifo_t *rx_fifo, *tx_fifo;
+  int rv;
+
+  if (mp->retval)
+    {
+      clib_warning ("connection failed with code: %d", mp->retval);
+      utm->state = STATE_FAILED;
+      return;
+    }
+  /*
+   * Attatch to segment
+   */
+
+  if (mp->segment_name_length == 0)
+    {
+      clib_warning ("segment_name_length zero");
+      utm->state = STATE_FAILED;
+      return;
+    }
+
+  a->segment_name = (char *) mp->segment_name;
+  a->segment_size = mp->segment_size;
+
+  ASSERT(mp->client_event_queue_address);
+
+  /* Attach to the segment vpp created */
+  rv = svm_fifo_segment_attach (a);
+  if (rv)
+    {
+      clib_warning ("svm_fifo_segment_attach ('%s') failed",
+                    mp->segment_name);
+      return;
+    }
+
+  /*
+   * Save the queues
+   */
+
+  utm->our_event_queue = (unix_shared_memory_queue_t *)
+    mp->client_event_queue_address;
+
+  utm->vpp_event_queue = (unix_shared_memory_queue_t *)
+    mp->vpp_event_queue_address;
+
+  /*
+   * Setup session
+   */
+
+  pool_get (utm->sessions, session);
+  session_index = session - utm->sessions;
+
+  rx_fifo = (svm_fifo_t *)mp->server_rx_fifo;
+  rx_fifo->client_session_index = session_index;
+  tx_fifo = (svm_fifo_t *)mp->server_tx_fifo;
+  tx_fifo->client_session_index = session_index;
+
+  session->server_rx_fifo = rx_fifo;
+  session->server_tx_fifo = tx_fifo;
+  session->vpp_session_index = mp->session_index;
+  session->vpp_session_thread = mp->session_thread_index;
+
+  /* Save handle */
+  utm->connected_session_index = session_index;
+
+  utm->state = STATE_READY;
+}
+
+void
+uri_tcp_bind (uri_tcp_test_main_t *utm)
+{
+  vl_api_bind_uri_t * bmp;
+  u32 fifo_size = 3 << 20;
+  bmp = vl_msg_api_alloc (sizeof (*bmp));
+  memset (bmp, 0, sizeof (*bmp));
+
+  bmp->_vl_msg_id = ntohs (VL_API_BIND_URI);
+  bmp->client_index = utm->my_client_index;
+  bmp->context = ntohl(0xfeedface);
+  bmp->initial_segment_size = 256<<20;    /* size of initial segment */
+  bmp->options[SESSION_OPTIONS_FLAGS] =
+    SESSION_OPTIONS_FLAGS_USE_FIFO | SESSION_OPTIONS_FLAGS_ADD_SEGMENT;
+  bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = fifo_size;
+  bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = fifo_size;
+  bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128<<20;
+  memcpy (bmp->uri, utm->uri, vec_len (utm->uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&bmp);
+}
+
+static void
+vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t *mp)
+{
+  uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+
+  if (mp->retval != 0)
+    clib_warning ("returned %d", ntohl(mp->retval));
+
+  utm->state = STATE_START;
+}
+
+void
+uri_tcp_unbind (uri_tcp_test_main_t *utm)
+{
+  vl_api_unbind_uri_t * ump;
+
+  ump = vl_msg_api_alloc (sizeof (*ump));
+  memset (ump, 0, sizeof (*ump));
+
+  ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI);
+  ump->client_index = utm->my_client_index;
+  memcpy (ump->uri, utm->uri, vec_len (utm->uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&ump);
+}
+
+static void
+vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
+{
+  uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+  vl_api_accept_session_reply_t *rmp;
+  svm_fifo_t * rx_fifo, * tx_fifo;
+  session_t * session;
+  static f64 start_time;
+  u64 key;
+  u32 session_index;
+
+  if (start_time == 0.0)
+      start_time = clib_time_now (&utm->clib_time);
+
+  utm->vpp_event_queue = (unix_shared_memory_queue_t *)
+    mp->vpp_event_queue_address;
+
+  /* Allocate local session and set it up */
+  pool_get (utm->sessions, session);
+  session_index = session - utm->sessions;
+
+  rx_fifo = (svm_fifo_t *)mp->server_rx_fifo;
+  rx_fifo->client_session_index = session_index;
+  tx_fifo = (svm_fifo_t *)mp->server_tx_fifo;
+  tx_fifo->client_session_index = session_index;
+
+  session->server_rx_fifo = rx_fifo;
+  session->server_tx_fifo = tx_fifo;
+
+  /* Add it to lookup table */
+  key = (((u64)mp->session_thread_index) << 32) | (u64)mp->session_index;
+  hash_set (utm->session_index_by_vpp_handles, key, session_index);
+
+  utm->state = STATE_READY;
+
+  /* Stats printing */
+  if (pool_elts (utm->sessions) && (pool_elts(utm->sessions) % 20000) == 0)
+    {
+      f64 now = clib_time_now (&utm->clib_time);
+      fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n",
+               pool_elts(utm->sessions), now - start_time,
+               (f64)pool_elts(utm->sessions) / (now - start_time));
+    }
+
+  /* Send accept reply to vpp */
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY);
+  rmp->session_type = mp->session_type;
+  rmp->session_index = mp->session_index;
+  rmp->session_thread_index = mp->session_thread_index;
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *)&rmp);
+}
+
+void
+uri_tcp_server_test (uri_tcp_test_main_t * utm)
+{
+
+  /* Bind to uri */
+  uri_tcp_bind (utm);
+
+  if (wait_for_state_change (utm, STATE_READY))
+    {
+      clib_warning ("timeout waiting for STATE_READY");
+      return;
+    }
+
+  /* Enter handle event loop */
+  handle_event_queue (utm);
+
+  /* Cleanup */
+  uri_tcp_unbind (utm);
+
+  if (wait_for_state_change (utm, STATE_START))
+    {
+      clib_warning ("timeout waiting for STATE_START");
+      return;
+    }
+
+  fformat (stdout, "Test complete...\n");
+}
+
+#define foreach_uri_msg                         \
+_(BIND_URI_REPLY, bind_uri_reply)               \
+_(UNBIND_URI_REPLY, unbind_uri_reply)           \
+_(ACCEPT_SESSION, accept_session)               \
+_(CONNECT_URI_REPLY, connect_uri_reply)         \
+_(DISCONNECT_SESSION, disconnect_session)       \
+_(RESET_SESSION, reset_session)       		\
+_(MAP_ANOTHER_SEGMENT, map_another_segment)
+
+void
+uri_api_hookup (uri_tcp_test_main_t * utm)
+{
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_uri_msg;
+#undef _
+}
+
+int
+main (int argc, char **argv)
+{
+  uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+  unformat_input_t _argv, *a = &_argv;
+  u8 *chroot_prefix;
+  u8 *heap;
+  u8 * bind_name = (u8 *) "tcp://0.0.0.0/1234";
+  u32 tmp;
+  mheap_t *h;
+  session_t * session;
+  int i;
+  int i_am_master = 1, drop_packets = 0;
+
+  clib_mem_init (0, 256 << 20);
+
+  heap = clib_mem_get_per_cpu_heap ();
+  h = mheap_header (heap);
+
+  /* make the main heap thread-safe */
+  h->flags |= MHEAP_FLAG_THREAD_SAFE;
+
+  vec_validate (utm->rx_buf, 65536);
+
+  utm->session_index_by_vpp_handles =
+    hash_create (0, sizeof(uword));
+
+  utm->my_pid = getpid();
+  utm->configured_segment_size = 1<<20;
+
+  clib_time_init (&utm->clib_time);
+  init_error_string_table (utm);
+  svm_fifo_segment_init(0x200000000ULL, 20);
+  unformat_init_command_line (a, argv);
+
+  while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (a, "chroot prefix %s", &chroot_prefix))
+        {
+          vl_set_memory_root_path ((char *) chroot_prefix);
+        }
+      else if (unformat (a, "uri %s", &bind_name))
+        ;
+      else if (unformat (a, "segment-size %dM", &tmp))
+        utm->configured_segment_size = tmp<<20;
+      else if (unformat (a, "segment-size %dG", &tmp))
+        utm->configured_segment_size = tmp<<30;
+      else if (unformat (a, "master"))
+        i_am_master = 1;
+      else if (unformat (a, "slave"))
+        i_am_master = 0;
+      else if (unformat (a, "drop"))
+        drop_packets = 1;
+      else
+        {
+          fformat (stderr, "%s: usage [master|slave]\n");
+          exit (1);
+        }
+    }
+
+  utm->uri = format (0, "%s%c", bind_name, 0);
+  utm->i_am_master = i_am_master;
+  utm->segment_main = &svm_fifo_segment_main;
+  utm->drop_packets = drop_packets;
+
+  utm->connect_uri = format (0, "tcp://6.0.1.2/1234%c", 0);
+
+  setup_signal_handlers();
+  uri_api_hookup (utm);
+
+  if (connect_to_vpp (i_am_master? "uri_tcp_server":"uri_tcp_client") < 0)
+    {
+      svm_region_exit ();
+      fformat (stderr, "Couldn't connect to vpe, exiting...\n");
+      exit (1);
+    }
+
+  if (i_am_master == 0)
+    {
+      uri_tcp_client_test (utm);
+      exit (0);
+    }
+
+  /* $$$$ hack preallocation */
+  for (i = 0; i < 200000; i++)
+    {
+      pool_get (utm->sessions, session);
+      memset (session, 0, sizeof (*session));
+    }
+  for (i = 0; i < 200000; i++)
+    pool_put_index (utm->sessions, i);
+
+  uri_tcp_server_test (utm);
+
+  vl_client_disconnect_from_vlib ();
+  exit (0);
+}
diff --git a/src/uri/uri_udp_test.c b/src/uri/uri_udp_test.c
new file mode 100644
index 00000000..6f5284c9
--- /dev/null
+++ b/src/uri/uri_udp_test.c
@@ -0,0 +1,553 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/time.h>
+#include <vppinfra/macros.h>
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp-api/vpe_msg_enum.h>
+#include <svm_fifo_segment.h>
+
+#include <vnet/uri/uri.h>
+
+#define vl_typedefs		/* define message structures */
+#include <vpp-api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun		/* define message structures */
+#include <vpp-api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp-api/vpe_all_api_h.h>
+#undef vl_printfun
+
+/* Satisfy external references when not linking with -lvlib */
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+
+typedef enum
+{
+  STATE_START,
+  STATE_READY,
+  STATE_DISCONNECTING,
+} connection_state_t;
+
+typedef struct
+{
+  svm_fifo_t *server_rx_fifo;
+  svm_fifo_t *server_tx_fifo;
+} session_t;
+
+typedef struct
+{
+  /* vpe input queue */
+  unix_shared_memory_queue_t *vl_input_queue;
+
+  /* API client handle */
+  u32 my_client_index;
+
+  /* The URI we're playing with */
+  u8 *uri;
+
+  /* Session pool */
+  session_t *sessions;
+
+  /* Hash table for disconnect processing */
+  uword *session_index_by_vpp_handles;
+
+  /* fifo segment */
+  svm_fifo_segment_private_t *seg;
+
+  /* intermediate rx buffer */
+  u8 *rx_buf;
+
+  /* Our event queue */
+  unix_shared_memory_queue_t *our_event_queue;
+
+  /* $$$ single thread only for the moment */
+  unix_shared_memory_queue_t *vpp_event_queue;
+
+  /* For deadman timers */
+  clib_time_t clib_time;
+
+  /* State of the connection, shared between msg RX thread and main thread */
+  volatile connection_state_t state;
+
+  volatile int time_to_stop;
+  volatile int time_to_print_stats;
+
+  /* VNET_API_ERROR_FOO -> "Foo" hash table */
+  uword *error_string_by_error_number;
+} uri_udp_test_main_t;
+
+#if CLIB_DEBUG > 0
+#define NITER 1000
+#else
+#define NITER 1000000
+#endif
+
+uri_udp_test_main_t uri_udp_test_main;
+
+static void
+stop_signal (int signum)
+{
+  uri_udp_test_main_t *um = &uri_udp_test_main;
+
+  um->time_to_stop = 1;
+}
+
+static void
+stats_signal (int signum)
+{
+  uri_udp_test_main_t *um = &uri_udp_test_main;
+
+  um->time_to_print_stats = 1;
+}
+
+static clib_error_t *
+setup_signal_handlers (void)
+{
+  signal (SIGINT, stats_signal);
+  signal (SIGQUIT, stop_signal);
+  signal (SIGTERM, stop_signal);
+
+  return 0;
+}
+
+u8 *
+format_api_error (u8 * s, va_list * args)
+{
+  uri_udp_test_main_t *utm = va_arg (*args, uri_udp_test_main_t *);
+  i32 error = va_arg (*args, u32);
+  uword *p;
+
+  p = hash_get (utm->error_string_by_error_number, -error);
+
+  if (p)
+    s = format (s, "%s", p[0]);
+  else
+    s = format (s, "%d", error);
+  return s;
+}
+
+int
+wait_for_state_change (uri_udp_test_main_t * utm, connection_state_t state)
+{
+  f64 timeout = clib_time_now (&utm->clib_time) + 5.0;
+
+  while (clib_time_now (&utm->clib_time) < timeout)
+    {
+      if (utm->state == state)
+	return 0;
+    }
+  return -1;
+}
+
+static void
+vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  int rv;
+
+  if (mp->segment_name_length == 0)
+    {
+      clib_warning ("segment_name_length zero");
+      return;
+    }
+
+  a->segment_name = (char *) mp->segment_name;
+
+  /* Attach to the segment vpp created */
+  rv = svm_fifo_segment_attach (a);
+  if (rv)
+    {
+      clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name);
+      return;
+    }
+
+  utm->our_event_queue = (unix_shared_memory_queue_t *)
+    mp->server_event_queue_address;
+
+  utm->state = STATE_READY;
+}
+
+static void
+vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+
+  if (mp->retval != 0)
+    clib_warning ("returned %d", ntohl (mp->retval));
+
+  utm->state = STATE_START;
+}
+
+static void
+vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  vl_api_accept_session_reply_t *rmp;
+  svm_fifo_t *rx_fifo, *tx_fifo;
+  session_t *session;
+  static f64 start_time;
+  u64 key;
+
+  if (start_time == 0.0)
+    start_time = clib_time_now (&utm->clib_time);
+
+  utm->vpp_event_queue = (unix_shared_memory_queue_t *)
+    mp->vpp_event_queue_address;
+
+  pool_get (utm->sessions, session);
+
+  rx_fifo = (svm_fifo_t *) mp->server_rx_fifo;
+  rx_fifo->client_session_index = session - utm->sessions;
+  tx_fifo = (svm_fifo_t *) mp->server_tx_fifo;
+  tx_fifo->client_session_index = session - utm->sessions;
+
+  session->server_rx_fifo = rx_fifo;
+  session->server_tx_fifo = tx_fifo;
+
+  key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index;
+
+  hash_set (utm->session_index_by_vpp_handles, key, session - utm->sessions);
+
+  utm->state = STATE_READY;
+
+  if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0)
+    {
+      f64 now = clib_time_now (&utm->clib_time);
+      fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n",
+	       pool_elts (utm->sessions), now - start_time,
+	       (f64) pool_elts (utm->sessions) / (now - start_time));
+    }
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY);
+  rmp->session_type = mp->session_type;
+  rmp->session_index = mp->session_index;
+  rmp->session_thread_index = mp->session_thread_index;
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+}
+
+static void
+vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  session_t *session;
+  vl_api_disconnect_session_reply_t *rmp;
+  uword *p;
+  int rv = 0;
+  u64 key;
+
+  key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index;
+
+  p = hash_get (utm->session_index_by_vpp_handles, key);
+
+  if (p)
+    {
+      session = pool_elt_at_index (utm->sessions, p[0]);
+      hash_unset (utm->session_index_by_vpp_handles, key);
+      pool_put (utm->sessions, session);
+    }
+  else
+    {
+      clib_warning ("couldn't find session key %llx", key);
+      rv = -11;
+    }
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY);
+  rmp->retval = rv;
+  rmp->session_index = mp->session_index;
+  rmp->session_thread_index = mp->session_thread_index;
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+}
+
+#define foreach_uri_msg                         \
+_(BIND_URI_REPLY, bind_uri_reply)               \
+_(UNBIND_URI_REPLY, unbind_uri_reply)           \
+_(ACCEPT_SESSION, accept_session)		\
+_(DISCONNECT_SESSION, disconnect_session)
+
+void
+uri_api_hookup (uri_udp_test_main_t * utm)
+{
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,	        \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_uri_msg;
+#undef _
+
+}
+
+
+int
+connect_to_vpp (char *name)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  api_main_t *am = &api_main;
+
+  if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0)
+    return -1;
+
+  utm->vl_input_queue = am->shmem_hdr->vl_input_queue;
+  utm->my_client_index = am->my_client_index;
+
+  return 0;
+}
+
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+  clib_warning ("BUG");
+}
+
+static void
+init_error_string_table (uri_udp_test_main_t * utm)
+{
+  utm->error_string_by_error_number = hash_create (0, sizeof (uword));
+
+#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s);
+  foreach_vnet_api_error;
+#undef _
+
+  hash_set (utm->error_string_by_error_number, 99, "Misc");
+}
+
+void
+handle_fifo_event_server_rx (uri_udp_test_main_t * utm,
+			     session_fifo_event_t * e)
+{
+  svm_fifo_t *rx_fifo, *tx_fifo;
+  int nbytes;
+
+  session_fifo_event_t evt;
+  unix_shared_memory_queue_t *q;
+  int rv;
+
+  rx_fifo = e->fifo;
+  tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo;
+
+  do
+    {
+      nbytes = svm_fifo_dequeue_nowait (rx_fifo, 0,
+					vec_len (utm->rx_buf), utm->rx_buf);
+    }
+  while (nbytes <= 0);
+  do
+    {
+      rv = svm_fifo_enqueue_nowait (tx_fifo, 0, nbytes, utm->rx_buf);
+    }
+  while (rv == -2);
+
+  /* Fabricate TX event, send to vpp */
+  evt.fifo = tx_fifo;
+  evt.event_type = FIFO_EVENT_SERVER_TX;
+  /* $$$$ for event logging */
+  evt.enqueue_length = nbytes;
+  evt.event_id = e->event_id;
+  q = utm->vpp_event_queue;
+  unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ );
+}
+
+void
+handle_event_queue (uri_udp_test_main_t * utm)
+{
+  session_fifo_event_t _e, *e = &_e;;
+
+  while (1)
+    {
+      unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e,
+				    0 /* nowait */ );
+      switch (e->event_type)
+	{
+	case FIFO_EVENT_SERVER_RX:
+	  handle_fifo_event_server_rx (utm, e);
+	  break;
+
+	case FIFO_EVENT_SERVER_EXIT:
+	  return;
+
+	default:
+	  clib_warning ("unknown event type %d", e->event_type);
+	  break;
+	}
+      if (PREDICT_FALSE (utm->time_to_stop == 1))
+	break;
+      if (PREDICT_FALSE (utm->time_to_print_stats == 1))
+	{
+	  utm->time_to_print_stats = 0;
+	  fformat (stdout, "%d connections\n", pool_elts (utm->sessions));
+	}
+    }
+}
+
+void
+uri_udp_test (uri_udp_test_main_t * utm)
+{
+  vl_api_bind_uri_t *bmp;
+  vl_api_unbind_uri_t *ump;
+
+  bmp = vl_msg_api_alloc (sizeof (*bmp));
+  memset (bmp, 0, sizeof (*bmp));
+
+  bmp->_vl_msg_id = ntohs (VL_API_BIND_URI);
+  bmp->client_index = utm->my_client_index;
+  bmp->context = ntohl (0xfeedface);
+  bmp->segment_size = 2 << 30;
+  memcpy (bmp->uri, utm->uri, vec_len (utm->uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp);
+
+  if (wait_for_state_change (utm, STATE_READY))
+    {
+      clib_warning ("timeout waiting for STATE_READY");
+      return;
+    }
+
+  handle_event_queue (utm);
+
+  ump = vl_msg_api_alloc (sizeof (*ump));
+  memset (ump, 0, sizeof (*ump));
+
+  ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI);
+  ump->client_index = utm->my_client_index;
+  memcpy (ump->uri, utm->uri, vec_len (utm->uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump);
+
+  if (wait_for_state_change (utm, STATE_START))
+    {
+      clib_warning ("timeout waiting for STATE_START");
+      return;
+    }
+
+  fformat (stdout, "Test complete...\n");
+}
+
+int
+main (int argc, char **argv)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  unformat_input_t _argv, *a = &_argv;
+  u8 *chroot_prefix;
+  u8 *heap;
+  u8 *bind_name = (u8 *) "udp4:1234";
+  mheap_t *h;
+  session_t *session;
+  int i;
+
+  clib_mem_init (0, 256 << 20);
+
+  heap = clib_mem_get_per_cpu_heap ();
+  h = mheap_header (heap);
+
+  /* make the main heap thread-safe */
+  h->flags |= MHEAP_FLAG_THREAD_SAFE;
+
+  vec_validate (utm->rx_buf, 8192);
+
+  utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
+
+  clib_time_init (&utm->clib_time);
+  init_error_string_table (utm);
+  svm_fifo_segment_init (0x200000000ULL, 20);
+  unformat_init_command_line (a, argv);
+
+  while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (a, "chroot prefix %s", &chroot_prefix))
+	{
+	  vl_set_memory_root_path ((char *) chroot_prefix);
+	}
+      else if (unformat (a, "uri %s", &bind_name))
+	;
+      else
+	{
+	  fformat (stderr, "%s: usage [master|slave]\n");
+	  exit (1);
+	}
+    }
+
+  utm->uri = format (0, "%s%c", bind_name, 0);
+
+  setup_signal_handlers ();
+
+  uri_api_hookup (utm);
+
+  if (connect_to_vpp ("uri_udp_test") < 0)
+    {
+      svm_region_exit ();
+      fformat (stderr, "Couldn't connect to vpe, exiting...\n");
+      exit (1);
+    }
+
+  /* $$$$ hack preallocation */
+  for (i = 0; i < 200000; i++)
+    {
+      pool_get (utm->sessions, session);
+      memset (session, 0, sizeof (*session));
+    }
+  for (i = 0; i < 200000; i++)
+    pool_put_index (utm->sessions, i);
+
+  uri_udp_test (utm);
+
+  vl_client_disconnect_from_vlib ();
+  exit (0);
+}
+
+#undef vl_api_version
+#define vl_api_version(n,v) static u32 vpe_api_version = v;
+#include <vpp-api/vpe.api.h>
+#undef vl_api_version
+
+void
+vl_client_add_api_signatures (vl_api_memclnt_create_t * mp)
+{
+  /*
+   * Send the main API signature in slot 0. This bit of code must
+   * match the checks in ../vpe/api/api.c: vl_msg_api_version_check().
+   */
+  mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/uri_udp_test2.c b/src/uri/uri_udp_test2.c
new file mode 100644
index 00000000..ddfffaa6
--- /dev/null
+++ b/src/uri/uri_udp_test2.c
@@ -0,0 +1,954 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/time.h>
+#include <vppinfra/macros.h>
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/api/vpe_msg_enum.h>
+#include <svm/svm_fifo_segment.h>
+#include <pthread.h>
+
+#include "../vnet/session/application_interface.h"
+
+#define vl_typedefs		/* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun		/* define message structures */
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp/api/vpe_all_api_h.h>
+#undef vl_printfun
+
+/* Satisfy external references when not linking with -lvlib */
+vlib_main_t vlib_global_main;
+vlib_main_t **vlib_mains;
+
+typedef enum
+{
+  STATE_START,
+  STATE_READY,
+  STATE_DISCONNECTING,
+} connection_state_t;
+
+typedef struct
+{
+  svm_fifo_t *server_rx_fifo;
+  svm_fifo_t *server_tx_fifo;
+} session_t;
+
+typedef struct
+{
+  /* vpe input queue */
+  unix_shared_memory_queue_t *vl_input_queue;
+
+  /* API client handle */
+  u32 my_client_index;
+
+  /* The URI we're playing with */
+  u8 *uri;
+
+  /* Session pool */
+  session_t *sessions;
+
+  /* Hash table for disconnect processing */
+  uword *session_index_by_vpp_handles;
+
+  /* fifo segment */
+  svm_fifo_segment_private_t *seg;
+
+  /* intermediate rx buffer */
+  u8 *rx_buf;
+
+  /* URI for connect */
+  u8 *connect_uri;
+
+  int i_am_master;
+
+  /* Our event queue */
+  unix_shared_memory_queue_t *our_event_queue;
+
+  /* $$$ single thread only for the moment */
+  unix_shared_memory_queue_t *vpp_event_queue;
+
+  /* $$$$ hack: cut-through session index */
+  volatile u32 cut_through_session_index;
+
+  /* unique segment name counter */
+  u32 unique_segment_index;
+
+  pid_t my_pid;
+
+  /* pthread handle */
+  pthread_t cut_through_thread_handle;
+
+  /* For deadman timers */
+  clib_time_t clib_time;
+
+  /* State of the connection, shared between msg RX thread and main thread */
+  volatile connection_state_t state;
+
+  volatile int time_to_stop;
+  volatile int time_to_print_stats;
+
+  u32 configured_segment_size;
+
+  /* VNET_API_ERROR_FOO -> "Foo" hash table */
+  uword *error_string_by_error_number;
+
+  /* convenience */
+  svm_fifo_segment_main_t *segment_main;
+
+} uri_udp_test_main_t;
+
+#if CLIB_DEBUG > 0
+#define NITER 10000
+#else
+#define NITER 4000000
+#endif
+
+uri_udp_test_main_t uri_udp_test_main;
+
+static void
+stop_signal (int signum)
+{
+  uri_udp_test_main_t *um = &uri_udp_test_main;
+
+  um->time_to_stop = 1;
+}
+
+static void
+stats_signal (int signum)
+{
+  uri_udp_test_main_t *um = &uri_udp_test_main;
+
+  um->time_to_print_stats = 1;
+}
+
+static clib_error_t *
+setup_signal_handlers (void)
+{
+  signal (SIGINT, stats_signal);
+  signal (SIGQUIT, stop_signal);
+  signal (SIGTERM, stop_signal);
+
+  return 0;
+}
+
+u8 *
+format_api_error (u8 * s, va_list * args)
+{
+  uri_udp_test_main_t *utm = va_arg (*args, uri_udp_test_main_t *);
+  i32 error = va_arg (*args, u32);
+  uword *p;
+
+  p = hash_get (utm->error_string_by_error_number, -error);
+
+  if (p)
+    s = format (s, "%s", p[0]);
+  else
+    s = format (s, "%d", error);
+  return s;
+}
+
+int
+wait_for_state_change (uri_udp_test_main_t * utm, connection_state_t state)
+{
+#if CLIB_DEBUG > 0
+#define TIMEOUT 600.0
+#else
+#define TIMEOUT 600.0
+#endif
+
+  f64 timeout = clib_time_now (&utm->clib_time) + TIMEOUT;
+
+  while (clib_time_now (&utm->clib_time) < timeout)
+    {
+      if (utm->state == state)
+	return 0;
+    }
+  return -1;
+}
+
+u64 server_bytes_received, server_bytes_sent;
+
+static void *
+cut_through_thread_fn (void *arg)
+{
+  session_t *s;
+  svm_fifo_t *rx_fifo;
+  svm_fifo_t *tx_fifo;
+  u8 *my_copy_buffer = 0;
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  i32 actual_transfer;
+  int rv;
+  u32 buffer_offset;
+
+  while (utm->cut_through_session_index == ~0)
+    ;
+
+  s = pool_elt_at_index (utm->sessions, utm->cut_through_session_index);
+
+  rx_fifo = s->server_rx_fifo;
+  tx_fifo = s->server_tx_fifo;
+
+  vec_validate (my_copy_buffer, 64 * 1024 - 1);
+
+  while (true)
+    {
+      /* We read from the tx fifo and write to the rx fifo */
+      do
+	{
+	  actual_transfer = svm_fifo_dequeue_nowait (tx_fifo, 0,
+						     vec_len (my_copy_buffer),
+						     my_copy_buffer);
+	}
+      while (actual_transfer <= 0);
+
+      server_bytes_received += actual_transfer;
+
+      buffer_offset = 0;
+      while (actual_transfer > 0)
+	{
+	  rv = svm_fifo_enqueue_nowait (rx_fifo, 0, actual_transfer,
+					my_copy_buffer + buffer_offset);
+	  if (rv > 0)
+	    {
+	      actual_transfer -= rv;
+	      buffer_offset += rv;
+	      server_bytes_sent += rv;
+	    }
+
+	}
+      if (PREDICT_FALSE (utm->time_to_stop))
+	break;
+    }
+
+  pthread_exit (0);
+}
+
+static void
+uri_udp_slave_test (uri_udp_test_main_t * utm)
+{
+  vl_api_connect_uri_t *cmp;
+  int i;
+  u8 *test_data = 0;
+  u64 bytes_received = 0, bytes_sent = 0;
+  i32 bytes_to_read;
+  int rv;
+  int mypid = getpid ();
+  f64 before, after, delta, bytes_per_second;
+  session_t *session;
+  svm_fifo_t *rx_fifo, *tx_fifo;
+  int buffer_offset, bytes_to_send = 0;
+
+  vec_validate (test_data, 64 * 1024 - 1);
+  for (i = 0; i < vec_len (test_data); i++)
+    test_data[i] = i & 0xff;
+
+  cmp = vl_msg_api_alloc (sizeof (*cmp));
+  memset (cmp, 0, sizeof (*cmp));
+
+  cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI);
+  cmp->client_index = utm->my_client_index;
+  cmp->context = ntohl (0xfeedface);
+  memcpy (cmp->uri, utm->connect_uri, vec_len (utm->connect_uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp);
+
+  if (wait_for_state_change (utm, STATE_READY))
+    {
+      clib_warning ("timeout waiting for STATE_READY");
+      return;
+    }
+
+  session = pool_elt_at_index (utm->sessions, utm->cut_through_session_index);
+  rx_fifo = session->server_rx_fifo;
+  tx_fifo = session->server_tx_fifo;
+
+  before = clib_time_now (&utm->clib_time);
+
+  vec_validate (utm->rx_buf, vec_len (test_data) - 1);
+
+  for (i = 0; i < NITER; i++)
+    {
+      bytes_to_send = vec_len (test_data);
+      buffer_offset = 0;
+      while (bytes_to_send > 0)
+	{
+	  rv = svm_fifo_enqueue_nowait (tx_fifo, mypid,
+					bytes_to_send,
+					test_data + buffer_offset);
+
+	  if (rv > 0)
+	    {
+	      bytes_to_send -= rv;
+	      buffer_offset += rv;
+	      bytes_sent += rv;
+	    }
+	}
+
+      bytes_to_read = svm_fifo_max_dequeue (rx_fifo);
+
+      bytes_to_read = vec_len (utm->rx_buf) > bytes_to_read ?
+	bytes_to_read : vec_len (utm->rx_buf);
+
+      buffer_offset = 0;
+      while (bytes_to_read > 0)
+	{
+	  rv = svm_fifo_dequeue_nowait (rx_fifo, mypid,
+					bytes_to_read,
+					utm->rx_buf + buffer_offset);
+	  if (rv > 0)
+	    {
+	      bytes_to_read -= rv;
+	      buffer_offset += rv;
+	      bytes_received += rv;
+	    }
+	}
+    }
+  while (bytes_received < bytes_sent)
+    {
+      rv = svm_fifo_dequeue_nowait (rx_fifo, mypid,
+				    vec_len (utm->rx_buf), utm->rx_buf);
+      if (rv > 0)
+	{
+#if CLIB_DEBUG > 0
+	  int j;
+	  for (j = 0; j < rv; j++)
+	    {
+	      if (utm->rx_buf[j] != ((bytes_received + j) & 0xff))
+		{
+		  clib_warning ("error at byte %lld, 0x%x not 0x%x",
+				bytes_received + j,
+				utm->rx_buf[j],
+				((bytes_received + j) & 0xff));
+		}
+	    }
+#endif
+	  bytes_received += (u64) rv;
+	}
+    }
+
+  after = clib_time_now (&utm->clib_time);
+  delta = after - before;
+  bytes_per_second = 0.0;
+
+  if (delta > 0.0)
+    bytes_per_second = (f64) bytes_received / delta;
+
+  fformat (stdout,
+	   "Done: %lld recv bytes in %.2f seconds, %.2f bytes/sec...\n\n",
+	   bytes_received, delta, bytes_per_second);
+  fformat (stdout,
+	   "Done: %lld sent bytes in %.2f seconds, %.2f bytes/sec...\n\n",
+	   bytes_sent, delta, bytes_per_second);
+  fformat (stdout,
+	   "client -> server -> client round trip: %.2f Gbit/sec \n\n",
+	   (bytes_per_second * 8.0) / 1e9);
+}
+
+static void
+vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  int rv;
+
+  if (mp->segment_name_length == 0)
+    {
+      clib_warning ("segment_name_length zero");
+      return;
+    }
+
+  a->segment_name = (char *) mp->segment_name;
+  a->segment_size = mp->segment_size;
+
+  ASSERT (mp->server_event_queue_address);
+
+  /* Attach to the segment vpp created */
+  rv = svm_fifo_segment_attach (a);
+  if (rv)
+    {
+      clib_warning ("svm_fifo_segment_attach ('%s') failed",
+		    mp->segment_name);
+      return;
+    }
+
+  utm->our_event_queue = (unix_shared_memory_queue_t *)
+    mp->server_event_queue_address;
+
+  utm->state = STATE_READY;
+}
+
+static void
+vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp)
+{
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  int rv;
+
+  a->segment_name = (char *) mp->segment_name;
+  a->segment_size = mp->segment_size;
+  /* Attach to the segment vpp created */
+  rv = svm_fifo_segment_attach (a);
+  if (rv)
+    {
+      clib_warning ("svm_fifo_segment_attach ('%s') failed",
+		    mp->segment_name);
+      return;
+    }
+  clib_warning ("Mapped new segment '%s' size %d", mp->segment_name,
+		mp->segment_size);
+}
+
+static void
+vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
+{
+  u32 segment_index;
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  svm_fifo_segment_private_t *seg;
+  unix_shared_memory_queue_t *client_q;
+  vl_api_connect_uri_reply_t *rmp;
+  session_t *session;
+  int rv = 0;
+
+  /* Create the segment */
+  a->segment_name = (char *) format (0, "%d:segment%d%c", utm->my_pid,
+				     utm->unique_segment_index++, 0);
+  a->segment_size = utm->configured_segment_size;
+
+  rv = svm_fifo_segment_create (a);
+  if (rv)
+    {
+      clib_warning ("sm_fifo_segment_create ('%s') failed", a->segment_name);
+      rv = VNET_API_ERROR_URI_FIFO_CREATE_FAILED;
+      goto send_reply;
+    }
+
+  vec_add2 (utm->seg, seg, 1);
+
+  segment_index = vec_len (sm->segments) - 1;
+
+  memcpy (seg, sm->segments + segment_index, sizeof (utm->seg[0]));
+
+  pool_get (utm->sessions, session);
+
+  /*
+   * By construction the master's idea of the rx fifo ends up in
+   * fsh->fifos[0], and the master's idea of the tx fifo ends up in
+   * fsh->fifos[1].
+   */
+  session->server_rx_fifo = svm_fifo_segment_alloc_fifo (utm->seg,
+							 128 * 1024);
+  ASSERT (session->server_rx_fifo);
+
+  session->server_tx_fifo = svm_fifo_segment_alloc_fifo (utm->seg,
+							 128 * 1024);
+  ASSERT (session->server_tx_fifo);
+
+  session->server_rx_fifo->server_session_index = session - utm->sessions;
+  session->server_tx_fifo->server_session_index = session - utm->sessions;
+  utm->cut_through_session_index = session - utm->sessions;
+
+  rv = pthread_create (&utm->cut_through_thread_handle,
+		       NULL /*attr */ , cut_through_thread_fn, 0);
+  if (rv)
+    {
+      clib_warning ("pthread_create returned %d", rv);
+      rv = VNET_API_ERROR_SYSCALL_ERROR_1;
+    }
+
+send_reply:
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+
+  rmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI_REPLY);
+  rmp->context = mp->context;
+  rmp->retval = ntohl (rv);
+  rmp->segment_name_length = vec_len (a->segment_name);
+  memcpy (rmp->segment_name, a->segment_name, vec_len (a->segment_name));
+
+  vec_free (a->segment_name);
+
+  client_q = (unix_shared_memory_queue_t *) mp->client_queue_address;
+  vl_msg_api_send_shmem (client_q, (u8 *) & rmp);
+}
+
+static void
+vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+
+  if (mp->retval != 0)
+    clib_warning ("returned %d", ntohl (mp->retval));
+
+  utm->state = STATE_START;
+}
+
+static void
+vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  vl_api_accept_session_reply_t *rmp;
+  svm_fifo_t *rx_fifo, *tx_fifo;
+  session_t *session;
+  static f64 start_time;
+  u64 key;
+
+  if (start_time == 0.0)
+    start_time = clib_time_now (&utm->clib_time);
+
+  utm->vpp_event_queue = (unix_shared_memory_queue_t *)
+    mp->vpp_event_queue_address;
+
+  pool_get (utm->sessions, session);
+
+  rx_fifo = (svm_fifo_t *) mp->server_rx_fifo;
+  rx_fifo->client_session_index = session - utm->sessions;
+  tx_fifo = (svm_fifo_t *) mp->server_tx_fifo;
+  tx_fifo->client_session_index = session - utm->sessions;
+
+  session->server_rx_fifo = rx_fifo;
+  session->server_tx_fifo = tx_fifo;
+
+  key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index;
+
+  hash_set (utm->session_index_by_vpp_handles, key, session - utm->sessions);
+
+  utm->state = STATE_READY;
+
+  if (pool_elts (utm->sessions) && (pool_elts (utm->sessions) % 20000) == 0)
+    {
+      f64 now = clib_time_now (&utm->clib_time);
+      fformat (stdout, "%d active sessions in %.2f seconds, %.2f/sec...\n",
+	       pool_elts (utm->sessions), now - start_time,
+	       (f64) pool_elts (utm->sessions) / (now - start_time));
+    }
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY);
+  rmp->session_type = mp->session_type;
+  rmp->session_index = mp->session_index;
+  rmp->session_thread_index = mp->session_thread_index;
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+}
+
+static void
+vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  session_t *session;
+  vl_api_disconnect_session_reply_t *rmp;
+  uword *p;
+  int rv = 0;
+  u64 key;
+
+  key = (((u64) mp->session_thread_index) << 32) | (u64) mp->session_index;
+
+  p = hash_get (utm->session_index_by_vpp_handles, key);
+
+  if (p)
+    {
+      session = pool_elt_at_index (utm->sessions, p[0]);
+      hash_unset (utm->session_index_by_vpp_handles, key);
+      pool_put (utm->sessions, session);
+    }
+  else
+    {
+      clib_warning ("couldn't find session key %llx", key);
+      rv = -11;
+    }
+
+  rmp = vl_msg_api_alloc (sizeof (*rmp));
+  memset (rmp, 0, sizeof (*rmp));
+  rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY);
+  rmp->retval = rv;
+  rmp->session_index = mp->session_index;
+  rmp->session_thread_index = mp->session_thread_index;
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+}
+
+static void
+vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
+{
+  svm_fifo_segment_main_t *sm = &svm_fifo_segment_main;
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  ssvm_shared_header_t *sh;
+  svm_fifo_segment_private_t *seg;
+  svm_fifo_segment_header_t *fsh;
+  session_t *session;
+  u32 segment_index;
+  int rv;
+
+  ASSERT (utm->i_am_master == 0);
+
+  if (mp->segment_name_length == 0)
+    {
+      clib_warning ("segment_name_length zero");
+      return;
+    }
+
+  memset (a, 0, sizeof (*a));
+
+  a->segment_name = (char *) mp->segment_name;
+
+  sleep (1);
+
+  rv = svm_fifo_segment_attach (a);
+  if (rv)
+    {
+      clib_warning ("sm_fifo_segment_create ('%v') failed", mp->segment_name);
+      return;
+    }
+
+  segment_index = vec_len (sm->segments) - 1;
+
+  vec_add2 (utm->seg, seg, 1);
+
+  memcpy (seg, sm->segments + segment_index, sizeof (*seg));
+  sh = seg->ssvm.sh;
+  fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+
+  while (vec_len (fsh->fifos) < 2)
+    sleep (1);
+
+  pool_get (utm->sessions, session);
+  utm->cut_through_session_index = session - utm->sessions;
+
+  session->server_rx_fifo = (svm_fifo_t *) fsh->fifos[0];
+  ASSERT (session->server_rx_fifo);
+  session->server_tx_fifo = (svm_fifo_t *) fsh->fifos[1];
+  ASSERT (session->server_tx_fifo);
+
+  /* security: could unlink /dev/shm/<mp->segment_name> here, maybe */
+
+  utm->state = STATE_READY;
+}
+
+#define foreach_uri_msg                         \
+_(BIND_URI_REPLY, bind_uri_reply)               \
+_(CONNECT_URI, connect_uri)                     \
+_(CONNECT_URI_REPLY, connect_uri_reply)         \
+_(UNBIND_URI_REPLY, unbind_uri_reply)           \
+_(ACCEPT_SESSION, accept_session)		\
+_(DISCONNECT_SESSION, disconnect_session)	\
+_(MAP_ANOTHER_SEGMENT, map_another_segment)
+
+void
+uri_api_hookup (uri_udp_test_main_t * utm)
+{
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_uri_msg;
+#undef _
+
+}
+
+
+int
+connect_to_vpp (char *name)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  api_main_t *am = &api_main;
+
+  if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0)
+    return -1;
+
+  utm->vl_input_queue = am->shmem_hdr->vl_input_queue;
+  utm->my_client_index = am->my_client_index;
+
+  return 0;
+}
+
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+  clib_warning ("BUG");
+}
+
+static void
+init_error_string_table (uri_udp_test_main_t * utm)
+{
+  utm->error_string_by_error_number = hash_create (0, sizeof (uword));
+
+#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s);
+  foreach_vnet_api_error;
+#undef _
+
+  hash_set (utm->error_string_by_error_number, 99, "Misc");
+}
+
+void
+handle_fifo_event_server_rx (uri_udp_test_main_t * utm,
+			     session_fifo_event_t * e)
+{
+  svm_fifo_t *rx_fifo, *tx_fifo;
+  int nbytes;
+
+  session_fifo_event_t evt;
+  unix_shared_memory_queue_t *q;
+  int rv;
+
+  rx_fifo = e->fifo;
+  tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo;
+
+  do
+    {
+      nbytes = svm_fifo_dequeue_nowait (rx_fifo, 0,
+					vec_len (utm->rx_buf), utm->rx_buf);
+    }
+  while (nbytes <= 0);
+  do
+    {
+      rv = svm_fifo_enqueue_nowait (tx_fifo, 0, nbytes, utm->rx_buf);
+    }
+  while (rv == -2);
+
+  /* Fabricate TX event, send to vpp */
+  evt.fifo = tx_fifo;
+  evt.event_type = FIFO_EVENT_SERVER_TX;
+  /* $$$$ for event logging */
+  evt.enqueue_length = nbytes;
+  evt.event_id = e->event_id;
+  q = utm->vpp_event_queue;
+  unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ );
+}
+
+void
+handle_event_queue (uri_udp_test_main_t * utm)
+{
+  session_fifo_event_t _e, *e = &_e;;
+
+  while (1)
+    {
+      unix_shared_memory_queue_sub (utm->our_event_queue, (u8 *) e,
+				    0 /* nowait */ );
+      switch (e->event_type)
+	{
+	case FIFO_EVENT_SERVER_RX:
+	  handle_fifo_event_server_rx (utm, e);
+	  break;
+
+	case FIFO_EVENT_SERVER_EXIT:
+	  return;
+
+	default:
+	  clib_warning ("unknown event type %d", e->event_type);
+	  break;
+	}
+      if (PREDICT_FALSE (utm->time_to_stop == 1))
+	break;
+      if (PREDICT_FALSE (utm->time_to_print_stats == 1))
+	{
+	  utm->time_to_print_stats = 0;
+	  fformat (stdout, "%d connections\n", pool_elts (utm->sessions));
+	}
+    }
+}
+
+void
+uri_udp_test (uri_udp_test_main_t * utm)
+{
+  vl_api_bind_uri_t *bmp;
+  vl_api_unbind_uri_t *ump;
+
+  bmp = vl_msg_api_alloc (sizeof (*bmp));
+  memset (bmp, 0, sizeof (*bmp));
+
+  bmp->_vl_msg_id = ntohs (VL_API_BIND_URI);
+  bmp->client_index = utm->my_client_index;
+  bmp->context = ntohl (0xfeedface);
+  bmp->initial_segment_size = 256 << 20;	/* size of initial segment */
+  bmp->options[SESSION_OPTIONS_FLAGS] =
+    SESSION_OPTIONS_FLAGS_USE_FIFO | SESSION_OPTIONS_FLAGS_ADD_SEGMENT;
+  bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 16 << 10;
+  bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 16 << 10;
+  bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20;
+  memcpy (bmp->uri, utm->uri, vec_len (utm->uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp);
+
+  if (wait_for_state_change (utm, STATE_READY))
+    {
+      clib_warning ("timeout waiting for STATE_READY");
+      return;
+    }
+
+  handle_event_queue (utm);
+
+  ump = vl_msg_api_alloc (sizeof (*ump));
+  memset (ump, 0, sizeof (*ump));
+
+  ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI);
+  ump->client_index = utm->my_client_index;
+  memcpy (ump->uri, utm->uri, vec_len (utm->uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump);
+
+  if (wait_for_state_change (utm, STATE_START))
+    {
+      clib_warning ("timeout waiting for STATE_START");
+      return;
+    }
+
+  fformat (stdout, "Test complete...\n");
+}
+
+int
+main (int argc, char **argv)
+{
+  uri_udp_test_main_t *utm = &uri_udp_test_main;
+  unformat_input_t _argv, *a = &_argv;
+  u8 *chroot_prefix;
+  u8 *heap;
+  u8 *bind_name = (u8 *) "udp://0.0.0.0/1234";
+  u32 tmp;
+  mheap_t *h;
+  session_t *session;
+  int i;
+  int i_am_master = 1;
+
+  clib_mem_init (0, 256 << 20);
+
+  heap = clib_mem_get_per_cpu_heap ();
+  h = mheap_header (heap);
+
+  /* make the main heap thread-safe */
+  h->flags |= MHEAP_FLAG_THREAD_SAFE;
+
+  vec_validate (utm->rx_buf, 8192);
+
+  utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
+
+  utm->my_pid = getpid ();
+  utm->configured_segment_size = 1 << 20;
+
+  clib_time_init (&utm->clib_time);
+  init_error_string_table (utm);
+  svm_fifo_segment_init (0x200000000ULL, 20);
+  unformat_init_command_line (a, argv);
+
+  while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (a, "chroot prefix %s", &chroot_prefix))
+	{
+	  vl_set_memory_root_path ((char *) chroot_prefix);
+	}
+      else if (unformat (a, "uri %s", &bind_name))
+	;
+      else if (unformat (a, "segment-size %dM", &tmp))
+	utm->configured_segment_size = tmp << 20;
+      else if (unformat (a, "segment-size %dG", &tmp))
+	utm->configured_segment_size = tmp << 30;
+      else if (unformat (a, "master"))
+	i_am_master = 1;
+      else if (unformat (a, "slave"))
+	i_am_master = 0;
+      else
+	{
+	  fformat (stderr, "%s: usage [master|slave]\n");
+	  exit (1);
+	}
+    }
+
+  utm->cut_through_session_index = ~0;
+  utm->uri = format (0, "%s%c", bind_name, 0);
+  utm->i_am_master = i_am_master;
+  utm->segment_main = &svm_fifo_segment_main;
+
+  utm->connect_uri = format (0, "udp://10.0.0.1/1234%c", 0);
+
+  setup_signal_handlers ();
+
+  uri_api_hookup (utm);
+
+  if (connect_to_vpp (i_am_master ? "uri_udp_master" : "uri_udp_slave") < 0)
+    {
+      svm_region_exit ();
+      fformat (stderr, "Couldn't connect to vpe, exiting...\n");
+      exit (1);
+    }
+
+  if (i_am_master == 0)
+    {
+      uri_udp_slave_test (utm);
+      exit (0);
+    }
+
+  /* $$$$ hack preallocation */
+  for (i = 0; i < 200000; i++)
+    {
+      pool_get (utm->sessions, session);
+      memset (session, 0, sizeof (*session));
+    }
+  for (i = 0; i < 200000; i++)
+    pool_put_index (utm->sessions, i);
+
+  uri_udp_test (utm);
+
+  vl_client_disconnect_from_vlib ();
+  exit (0);
+}
+
+#undef vl_api_version
+#define vl_api_version(n,v) static u32 vpe_api_version = v;
+#include <vpp/api/vpe.api.h>
+#undef vl_api_version
+
+void
+vl_client_add_api_signatures (vl_api_memclnt_create_t * mp)
+{
+  /*
+   * Send the main API signature in slot 0. This bit of code must
+   * match the checks in ../vpe/api/api.c: vl_msg_api_version_check().
+   */
+  mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version);
+}
+
+u32
+vl (void *p)
+{
+  return vec_len (p);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/uri/uritest.c b/src/uri/uritest.c
new file mode 100644
index 00000000..edcdb3ad
--- /dev/null
+++ b/src/uri/uritest.c
@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <setjmp.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/time.h>
+#include <vppinfra/macros.h>
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp-api/vpe_msg_enum.h>
+#include <svm_fifo_segment.h>
+
+#define vl_typedefs		/* define message structures */
+#include <vpp-api/vpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun		/* define message structures */
+#include <vpp-api/vpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <vpp-api/vpe_all_api_h.h>
+#undef vl_printfun
+
+typedef enum
+{
+  STATE_START,
+  STATE_READY,
+  STATE_DISCONNECTING,
+} connection_state_t;
+
+typedef struct
+{
+  /* vpe input queue */
+  unix_shared_memory_queue_t *vl_input_queue;
+
+  /* API client handle */
+  u32 my_client_index;
+
+  /* role */
+  int i_am_master;
+
+  /* The URI we're playing with */
+  u8 *uri;
+
+  /* fifo segment */
+  svm_fifo_segment_private_t *seg;
+
+  svm_fifo_t *rx_fifo;
+  svm_fifo_t *tx_fifo;
+
+  /* For deadman timers */
+  clib_time_t clib_time;
+
+  /* State of the connection, shared between msg RX thread and main thread */
+  volatile connection_state_t state;
+
+  /* VNET_API_ERROR_FOO -> "Foo" hash table */
+  uword *error_string_by_error_number;
+} uritest_main_t;
+
+#if CLIB_DEBUG > 0
+#define NITER 1000
+#else
+#define NITER 1000000
+#endif
+
+uritest_main_t uritest_main;
+
+u8 *
+format_api_error (u8 * s, va_list * args)
+{
+  uritest_main_t *utm = va_arg (*args, uritest_main_t *);
+  i32 error = va_arg (*args, u32);
+  uword *p;
+
+  p = hash_get (utm->error_string_by_error_number, -error);
+
+  if (p)
+    s = format (s, "%s", p[0]);
+  else
+    s = format (s, "%d", error);
+  return s;
+}
+
+int
+wait_for_state_change (uritest_main_t * utm, connection_state_t state)
+{
+  f64 timeout = clib_time_now (&utm->clib_time) + 1.0;
+
+  while (clib_time_now (&utm->clib_time) < timeout)
+    {
+      if (utm->state == state)
+	return 0;
+    }
+  return -1;
+}
+
+static void
+vl_api_bind_uri_reply_t_handler (vl_api_bind_uri_reply_t * mp)
+{
+  uritest_main_t *utm = &uritest_main;
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  int rv;
+
+  ASSERT (utm->i_am_master);
+
+  if (mp->segment_name_length == 0)
+    {
+      clib_warning ("segment_name_length zero");
+      return;
+    }
+
+  a->segment_name = (char *) mp->segment_name;
+  a->segment_size = mp->segment_size;
+
+  /* Create the segment */
+  rv = svm_fifo_segment_create (a);
+  if (rv)
+    {
+      clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name);
+      return;
+    }
+
+  vec_validate (utm->seg, 0);
+
+  memcpy (utm->seg, a->rv, sizeof (*utm->seg));
+
+  /*
+   * By construction the master's idea of the rx fifo ends up in
+   * fsh->fifos[0], and the master's idea of the tx fifo ends up in
+   * fsh->fifos[1].
+   */
+  utm->rx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, 10240);
+  ASSERT (utm->rx_fifo);
+
+  utm->tx_fifo = svm_fifo_segment_alloc_fifo (utm->seg, 10240);
+  ASSERT (utm->tx_fifo);
+
+  utm->state = STATE_READY;
+}
+
+static void
+vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
+{
+  uritest_main_t *utm = &uritest_main;
+  svm_fifo_segment_create_args_t _a, *a = &_a;
+  ssvm_shared_header_t *sh;
+  svm_fifo_segment_header_t *fsh;
+  int rv;
+
+  ASSERT (utm->i_am_master == 0);
+
+  if (mp->segment_name_length == 0)
+    {
+      clib_warning ("segment_name_length zero");
+      return;
+    }
+
+  memset (a, 0, sizeof (*a));
+
+  a->segment_name = (char *) mp->segment_name;
+
+  rv = svm_fifo_segment_attach (a);
+  if (rv)
+    {
+      clib_warning ("sm_fifo_segment_create ('%s') failed", mp->segment_name);
+      return;
+    }
+
+  vec_validate (utm->seg, 0);
+
+  memcpy (utm->seg, a->rv, sizeof (*utm->seg));
+  sh = utm->seg->ssvm.sh;
+  fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
+
+  while (vec_len (fsh->fifos) < 2)
+    sleep (1);
+
+  utm->rx_fifo = (svm_fifo_t *) fsh->fifos[1];
+  ASSERT (utm->rx_fifo);
+  utm->tx_fifo = (svm_fifo_t *) fsh->fifos[0];
+  ASSERT (utm->tx_fifo);
+
+  /* security: could unlink /dev/shm/<mp->segment_name> here, maybe */
+
+  utm->state = STATE_READY;
+}
+
+static void
+vl_api_unbind_uri_reply_t_handler (vl_api_unbind_uri_reply_t * mp)
+{
+  uritest_main_t *utm = &uritest_main;
+
+  if (mp->retval != 0)
+    clib_warning ("returned %d", ntohl (mp->retval));
+
+  utm->state = STATE_START;
+}
+
+#define foreach_uri_msg                         \
+_(BIND_URI_REPLY, bind_uri_reply)               \
+_(CONNECT_URI_REPLY, connect_uri_reply)         \
+_(UNBIND_URI_REPLY, unbind_uri_reply)
+
+void
+uri_api_hookup (uritest_main_t * utm)
+{
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,	        \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_uri_msg;
+#undef _
+
+}
+
+
+int
+connect_to_vpp (char *name)
+{
+  uritest_main_t *utm = &uritest_main;
+  api_main_t *am = &api_main;
+
+  if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0)
+    return -1;
+
+  utm->vl_input_queue = am->shmem_hdr->vl_input_queue;
+  utm->my_client_index = am->my_client_index;
+
+  return 0;
+}
+
+void
+vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
+{
+  clib_warning ("BUG");
+}
+
+static void
+init_error_string_table (uritest_main_t * utm)
+{
+  utm->error_string_by_error_number = hash_create (0, sizeof (uword));
+
+#define _(n,v,s) hash_set (utm->error_string_by_error_number, -v, s);
+  foreach_vnet_api_error;
+#undef _
+
+  hash_set (utm->error_string_by_error_number, 99, "Misc");
+}
+
+void
+uritest_master (uritest_main_t * utm)
+{
+  vl_api_bind_uri_t *bmp;
+  vl_api_unbind_uri_t *ump;
+  int i;
+  u8 *test_data = 0;
+  u8 *reply = 0;
+  u32 reply_len;
+  int mypid = getpid ();
+
+  for (i = 0; i < 2048; i++)
+    vec_add1 (test_data, 'a' + (i % 32));
+
+  bmp = vl_msg_api_alloc (sizeof (*bmp));
+  memset (bmp, 0, sizeof (*bmp));
+
+  bmp->_vl_msg_id = ntohs (VL_API_BIND_URI);
+  bmp->client_index = utm->my_client_index;
+  bmp->context = ntohl (0xfeedface);
+  bmp->segment_size = 256 << 10;
+  memcpy (bmp->uri, utm->uri, vec_len (utm->uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & bmp);
+
+  if (wait_for_state_change (utm, STATE_READY))
+    {
+      clib_warning ("timeout waiting for STATE_READY");
+      return;
+    }
+
+  for (i = 0; i < NITER; i++)
+    svm_fifo_enqueue (utm->tx_fifo, mypid, vec_len (test_data), test_data);
+
+  vec_validate (reply, 0);
+
+  reply_len = svm_fifo_dequeue (utm->rx_fifo, mypid, vec_len (reply), reply);
+
+  if (reply_len != 1)
+    clib_warning ("reply length %d", reply_len);
+
+  if (reply[0] == 1)
+    fformat (stdout, "Test OK...");
+
+  ump = vl_msg_api_alloc (sizeof (*ump));
+  memset (ump, 0, sizeof (*ump));
+
+  ump->_vl_msg_id = ntohs (VL_API_UNBIND_URI);
+  ump->client_index = utm->my_client_index;
+  memcpy (ump->uri, utm->uri, vec_len (utm->uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & ump);
+
+  if (wait_for_state_change (utm, STATE_START))
+    {
+      clib_warning ("timeout waiting for STATE_READY");
+      return;
+    }
+
+  fformat (stdout, "Master done...\n");
+}
+
+void
+uritest_slave (uritest_main_t * utm)
+{
+  vl_api_connect_uri_t *cmp;
+  int i, j;
+  u8 *test_data = 0;
+  u8 *reply = 0;
+  u32 bytes_received = 0;
+  u32 actual_bytes;
+  int mypid = getpid ();
+  u8 ok;
+  f64 before, after, delta, bytes_per_second;
+
+  vec_validate (test_data, 4095);
+
+  cmp = vl_msg_api_alloc (sizeof (*cmp));
+  memset (cmp, 0, sizeof (*cmp));
+
+  cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI);
+  cmp->client_index = utm->my_client_index;
+  cmp->context = ntohl (0xfeedface);
+  memcpy (cmp->uri, utm->uri, vec_len (utm->uri));
+  vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & cmp);
+
+  if (wait_for_state_change (utm, STATE_READY))
+    {
+      clib_warning ("timeout waiting for STATE_READY");
+      return;
+    }
+
+  ok = 1;
+  before = clib_time_now (&utm->clib_time);
+  for (i = 0; i < NITER; i++)
+    {
+      actual_bytes = svm_fifo_dequeue (utm->rx_fifo, mypid,
+				       vec_len (test_data), test_data);
+      j = 0;
+      while (j < actual_bytes)
+	{
+	  if (test_data[j] != ('a' + (bytes_received % 32)))
+	    ok = 0;
+	  bytes_received++;
+	  j++;
+	}
+      if (bytes_received == NITER * 2048)
+	break;
+    }
+
+  vec_add1 (reply, ok);
+
+  svm_fifo_enqueue (utm->tx_fifo, mypid, vec_len (reply), reply);
+  after = clib_time_now (&utm->clib_time);
+  delta = after - before;
+  bytes_per_second = 0.0;
+
+  if (delta > 0.0)
+    bytes_per_second = (f64) bytes_received / delta;
+
+  fformat (stdout,
+	   "Slave done, %d bytes in %.2f seconds, %.2f bytes/sec...\n",
+	   bytes_received, delta, bytes_per_second);
+}
+
+int
+main (int argc, char **argv)
+{
+  uritest_main_t *utm = &uritest_main;
+  unformat_input_t _argv, *a = &_argv;
+  u8 *chroot_prefix;
+  u8 *heap;
+  char *bind_name = "fifo:uritest";
+  mheap_t *h;
+  int i_am_master = 0;
+
+  clib_mem_init (0, 128 << 20);
+
+  heap = clib_mem_get_per_cpu_heap ();
+  h = mheap_header (heap);
+
+  /* make the main heap thread-safe */
+  h->flags |= MHEAP_FLAG_THREAD_SAFE;
+
+  clib_time_init (&utm->clib_time);
+  init_error_string_table (utm);
+  svm_fifo_segment_init (0x200000000ULL, 20);
+  unformat_init_command_line (a, argv);
+
+  utm->uri = format (0, "%s%c", bind_name, 0);
+
+  while (unformat_check_input (a) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (a, "master"))
+	i_am_master = 1;
+      else if (unformat (a, "slave"))
+	i_am_master = 0;
+      else if (unformat (a, "chroot prefix %s", &chroot_prefix))
+	{
+	  vl_set_memory_root_path ((char *) chroot_prefix);
+	}
+      else
+	{
+	  fformat (stderr, "%s: usage [master|slave]\n");
+	  exit (1);
+	}
+    }
+
+  uri_api_hookup (utm);
+
+  if (connect_to_vpp (i_am_master ? "uritest_master" : "uritest_slave") < 0)
+    {
+      svm_region_exit ();
+      fformat (stderr, "Couldn't connect to vpe, exiting...\n");
+      exit (1);
+    }
+
+  utm->i_am_master = i_am_master;
+
+  if (i_am_master)
+    uritest_master (utm);
+  else
+    uritest_slave (utm);
+
+  vl_client_disconnect_from_vlib ();
+  exit (0);
+}
+
+#undef vl_api_version
+#define vl_api_version(n,v) static u32 vpe_api_version = v;
+#include <vpp-api/vpe.api.h>
+#undef vl_api_version
+
+void
+vl_client_add_api_signatures (vl_api_memclnt_create_t * mp)
+{
+  /*
+   * Send the main API signature in slot 0. This bit of code must
+   * match the checks in ../vpe/api/api.c: vl_msg_api_version_check().
+   */
+  mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index 4f5eb09d..9f26bec7 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -360,7 +360,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm,
   memset (f, 0, sizeof (f[0]));
   f->index = f - bm->buffer_free_list_pool;
   f->n_data_bytes = vlib_buffer_round_size (n_data_bytes);
-  f->min_n_buffers_each_physmem_alloc = 16;
+  f->min_n_buffers_each_physmem_alloc = VLIB_FRAME_SIZE;
   f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name);
 
   /* Setup free buffer template. */
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
index 1f723f3b..69c8c7cc 100644
--- a/src/vlib/buffer.h
+++ b/src/vlib/buffer.h
@@ -240,6 +240,74 @@ vlib_get_buffer_opaque2 (vlib_buffer_t * b)
   return (void *) b->opaque2;
 }
 
+/** \brief Get pointer to the end of buffer's data
+ * @param b     pointer to the buffer
+ * @return      pointer to tail of packet's data
+ */
+always_inline u8 *
+vlib_buffer_get_tail (vlib_buffer_t * b)
+{
+  return b->data + b->current_data + b->current_length;
+}
+
+/** \brief Append uninitialized data to buffer
+ * @param b     pointer to the buffer
+ * @param size  number of uninitialized bytes
+ * @return      pointer to beginning of uninitialized data
+ */
+always_inline void *
+vlib_buffer_put_uninit (vlib_buffer_t * b, u8 size)
+{
+  void *p = vlib_buffer_get_tail (b);
+  /* XXX make sure there's enough space */
+  b->current_length += size;
+  return p;
+}
+
+/** \brief Prepend uninitialized data to buffer
+ * @param b     pointer to the buffer
+ * @param size  number of uninitialized bytes
+ * @return      pointer to beginning of uninitialized data
+ */
+always_inline void *
+vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size)
+{
+  ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size);
+  b->current_data -= size;
+  b->current_length += size;
+
+  return vlib_buffer_get_current (b);
+}
+
+/** \brief Make head room, typically for packet headers
+ * @param b     pointer to the buffer
+ * @param size  number of head room bytes
+ * @return      pointer to start of buffer (current data)
+ */
+always_inline void *
+vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size)
+{
+  ASSERT (b->current_data + VLIB_BUFFER_PRE_DATA_SIZE >= size);
+  b->current_data += size;
+  return vlib_buffer_get_current (b);
+}
+
+/** \brief Retrieve bytes from buffer head
+ * @param b     pointer to the buffer
+ * @param size  number of bytes to pull
+ * @return      pointer to start of buffer (current data)
+ */
+always_inline void *
+vlib_buffer_pull (vlib_buffer_t * b, u8 size)
+{
+  if (b->current_length + VLIB_BUFFER_PRE_DATA_SIZE < size)
+    return 0;
+
+  void *data = vlib_buffer_get_current (b);
+  vlib_buffer_advance (b, size);
+  return data;
+}
+
 /* Forward declaration. */
 struct vlib_main_t;
 
diff --git a/src/vlibmemory/unix_shared_memory_queue.c b/src/vlibmemory/unix_shared_memory_queue.c
index 25d28910..e86edec3 100644
--- a/src/vlibmemory/unix_shared_memory_queue.c
+++ b/src/vlibmemory/unix_shared_memory_queue.c
@@ -33,18 +33,13 @@
  * nels = number of elements on the queue
  * elsize = element size, presumably 4 and cacheline-size will
  *          be popular choices.
- * coid  = consumer coid, from ChannelCreate
  * pid   = consumer pid
- * pulse_code  = pulse code consumer expects
- * pulse_value = pulse value consumer expects
- * consumer_prio = consumer's priority, so pulses won't change
- *                 the consumer's priority.
  *
  * The idea is to call this function in the queue consumer,
  * and e-mail the queue pointer to the producer(s).
  *
- * The spp process / main thread allocates one of these
- * at startup; its main input queue. The spp main input queue
+ * The vpp process / main thread allocates one of these
+ * at startup; its main input queue. The vpp main input queue
  * has a pointer to it in the shared memory segment header.
  *
  * You probably want to be on an svm data heap before calling this
@@ -70,7 +65,7 @@ unix_shared_memory_queue_init (int nels,
   q->signal_when_queue_non_empty = signal_when_queue_non_empty;
 
   memset (&attr, 0, sizeof (attr));
-  memset (&cattr, 0, sizeof (attr));
+  memset (&cattr, 0, sizeof (cattr));
 
   if (pthread_mutexattr_init (&attr))
     clib_unix_warning ("mutexattr_init");
@@ -277,6 +272,7 @@ unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q,
   clib_memcpy (elem, headp, q->elsize);
 
   q->head++;
+  /* $$$$ JFC shouldn't this be == 0? */
   if (q->cursize == q->maxsize)
     need_broadcast = 1;
 
diff --git a/src/vlibmemory/unix_shared_memory_queue.h b/src/vlibmemory/unix_shared_memory_queue.h
index f758f17c..13800065 100644
--- a/src/vlibmemory/unix_shared_memory_queue.h
+++ b/src/vlibmemory/unix_shared_memory_queue.h
@@ -29,7 +29,7 @@ typedef struct _unix_shared_memory_queue
   pthread_cond_t condvar;	/* 8 bytes */
   int head;
   int tail;
-  int cursize;
+  volatile int cursize;
   int maxsize;
   int elsize;
   int consumer_pid;
diff --git a/src/vnet.am b/src/vnet.am
index 64484e18..923f61d8 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -324,11 +324,7 @@ libvnet_la_SOURCES +=				\
  vnet/ip/ip_input_acl.c				\
  vnet/ip/lookup.c				\
  vnet/ip/ping.c					\
- vnet/ip/punt.c					\
- vnet/ip/udp_format.c				\
- vnet/ip/udp_init.c				\
- vnet/ip/udp_local.c				\
- vnet/ip/udp_pg.c
+ vnet/ip/punt.c
 
 nobase_include_HEADERS +=			\
  vnet/ip/format.h				\
@@ -354,11 +350,7 @@ nobase_include_HEADERS +=			\
  vnet/ip/ports.def				\
  vnet/ip/protocols.def				\
  vnet/ip/punt_error.def				\
- vnet/ip/punt.h					\
- vnet/ip/tcp_packet.h				\
- vnet/ip/udp_error.def				\
- vnet/ip/udp.h					\
- vnet/ip/udp_packet.h
+ vnet/ip/punt.h
 
 API_FILES += vnet/ip/ip.api
 
@@ -473,6 +465,38 @@ test_map_LDADD = libvnet.la libvppinfra.la libvlib.la	\
 test_map_LDFLAGS = -static
 endif
 
+########################################
+# Layer 4 protocol: tcp
+########################################
+libvnet_la_SOURCES +=				\
+ vnet/tcp/tcp_format.c				\
+ vnet/tcp/tcp_pg.c				\
+ vnet/tcp/tcp_syn_filter4.c			\
+ vnet/tcp/tcp_output.c				\
+ vnet/tcp/tcp_input.c				\
+ vnet/tcp/tcp_newreno.c				\
+ vnet/tcp/tcp.c
+
+nobase_include_HEADERS +=			\
+ vnet/tcp/tcp_packet.h				\
+ vnet/tcp/tcp_timer.h				\
+ vnet/tcp/tcp.h
+
+########################################
+# Layer 4 protocol: udp
+########################################
+libvnet_la_SOURCES +=				\
+ vnet/udp/udp.c					\
+ vnet/udp/udp_input.c				\
+ vnet/udp/builtin_server.c			\
+ vnet/udp/udp_format.c				\
+ vnet/udp/udp_local.c				\
+ vnet/udp/udp_pg.c
+
+nobase_include_HEADERS +=			\
+  vnet/udp/udp_error.def                       	\
+  vnet/udp/udp.h                               	\
+  vnet/udp/udp_packet.h
 
 ########################################
 # Tunnel protocol: gre
@@ -833,6 +857,28 @@ libvnet_la_SOURCES +=				\
 nobase_include_HEADERS +=			\
   vnet/devices/ssvm/ssvm_eth.h
 
+########################################
+# session managmeent
+########################################
+
+libvnet_la_SOURCES +=				\
+  vnet/session/session.c			\
+  vnet/session/node.c				\
+  vnet/session/transport.c			\
+  vnet/session/application.c			\
+  vnet/session/session_cli.c			\
+  vnet/session/hashes.c				\
+  vnet/session/application_interface.c		\
+  vnet/session/session_api.c
+
+nobase_include_HEADERS +=			\
+  vnet/session/session.h			\
+  vnet/session/application.h			\
+  vnet/session/transport.h			\
+  vnet/session/application_interface.h
+
+API_FILES += vnet/session/session.api
+
 ########################################
 # Linux packet interface
 ########################################
diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h
index 8680ef7c..861a5767 100644
--- a/src/vnet/api_errno.h
+++ b/src/vnet/api_errno.h
@@ -91,14 +91,19 @@ _(INVALID_ADDRESS_FAMILY, -97, "Invalid address family")                \
 _(INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index")    \
 _(TABLE_TOO_BIG, -99, "Table too big")                                  \
 _(CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \
-_(BFD_EEXIST, -101, "Duplicate BFD object") \
-_(BFD_ENOENT, -102, "No such BFD object") \
-_(BFD_EINUSE, -103, "BFD object in use") \
-_(BFD_NOTSUPP, -104, "BFD feature not supported") \
-_(LISP_RLOC_LOCAL, -105, "RLOC address is local") \
-_(BFD_EAGAIN, -106, "BFD object cannot be manipulated at this time")    \
-_(INVALID_GPE_MODE, -107, "Invalid GPE mode")                           \
-_(LISP_GPE_ENTRIES_PRESENT, -108, "LISP GPE entries are present")
+_(BFD_EEXIST, -101, "Duplicate BFD object")                             \
+_(BFD_ENOENT, -102, "No such BFD object")                               \
+_(BFD_EINUSE, -103, "BFD object in use")                                \
+_(BFD_NOTSUPP, -104, "BFD feature not supported")                       \
+_(ADDRESS_IN_USE, -105, "Address in use")				\
+_(ADDRESS_NOT_IN_USE, -106, "Address not in use")			\
+_(QUEUE_FULL, -107, "Queue full")                                       \
+_(UNKNOWN_URI_TYPE, -108, "Unknown URI type")				\
+_(URI_FIFO_CREATE_FAILED, -109, "URI FIFO segment create failed")       \
+_(LISP_RLOC_LOCAL, -110, "RLOC address is local")                       \
+_(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time")	\
+_(INVALID_GPE_MODE, -112, "Invalid GPE mode")                           \
+_(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present")
 
 typedef enum
 {
diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c
index 146faad6..cf05089b 100644
--- a/src/vnet/bfd/bfd_udp.c
+++ b/src/vnet/bfd/bfd_udp.c
@@ -18,12 +18,12 @@
 #include <vlib/buffer.h>
 #include <vnet/ip/format.h>
 #include <vnet/ethernet/packet.h>
-#include <vnet/ip/udp_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/udp/udp.h>
 #include <vnet/ip/lookup.h>
 #include <vnet/ip/icmp46_packet.h>
 #include <vnet/ip/ip4.h>
 #include <vnet/ip/ip6.h>
-#include <vnet/ip/udp.h>
 #include <vnet/ip/ip6_packet.h>
 #include <vnet/adj/adj.h>
 #include <vnet/adj/adj_nbr.h>
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index f1cc6371..3de01f2a 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -277,6 +277,16 @@ typedef struct
       u16 buffer_advance;
     } device_input_feat;
 
+    /* TCP */
+    struct
+    {
+      u32 connection_index;
+      u32 seq_number;
+      u32 seq_end;
+      u32 ack_number;
+      u8 flags;
+    } tcp;
+
     u32 unused[6];
   };
 } vnet_buffer_opaque_t;
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
index 6093e2ac..b651a1f1 100644
--- a/src/vnet/classify/vnet_classify.c
+++ b/src/vnet/classify/vnet_classify.c
@@ -695,8 +695,8 @@ int vnet_classify_add_del_table (vnet_classify_main_t * cm,
 }
 
 #define foreach_tcp_proto_field                 \
-_(src_port)                                     \
-_(dst_port)
+_(src)                                          \
+_(dst)
 
 #define foreach_udp_proto_field                 \
 _(src_port)                                     \
diff --git a/src/vnet/dhcp/dhcp_proxy.h b/src/vnet/dhcp/dhcp_proxy.h
index c0d79c41..4586d883 100644
--- a/src/vnet/dhcp/dhcp_proxy.h
+++ b/src/vnet/dhcp/dhcp_proxy.h
@@ -26,7 +26,7 @@
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/pg/pg.h>
 #include <vnet/ip/format.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 
 typedef enum {
 #define dhcp_proxy_error(n,s) DHCP_PROXY_ERROR_##n,
diff --git a/src/vnet/flow/flow_report.h b/src/vnet/flow/flow_report.h
index 4e764377..e8ed3818 100644
--- a/src/vnet/flow/flow_report.h
+++ b/src/vnet/flow/flow_report.h
@@ -23,7 +23,7 @@
 #include <vnet/ip/ip_packet.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vlib/cli.h>
 #include <vppinfra/error.h>
 #include <vppinfra/hash.h>
diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
index 02a1a963..70b4ccd8 100644
--- a/src/vnet/ip/ip.h
+++ b/src/vnet/ip/ip.h
@@ -50,8 +50,8 @@
 #include <vnet/ip/ip_packet.h>
 #include <vnet/ip/lookup.h>
 
-#include <vnet/ip/tcp_packet.h>
-#include <vnet/ip/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/udp/udp_packet.h>
 #include <vnet/ip/icmp46_packet.h>
 
 #include <vnet/ip/ip4.h>
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index b184fbae..4e075d0f 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -309,8 +309,8 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
   b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
   b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
 
-  t1 = is_tcp_udp ? tcp->ports.src : 0;
-  t2 = is_tcp_udp ? tcp->ports.dst : 0;
+  t1 = is_tcp_udp ? tcp->src : 0;
+  t2 = is_tcp_udp ? tcp->dst : 0;
 
   t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
   t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
@@ -334,6 +334,44 @@ u8 *format_ip4_forward_next_trace (u8 * s, va_list * args);
 
 u32 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0);
 
+#define IP_DF 0x4000		/* don't fragment */
+
+/**
+ * Push IPv4 header to buffer
+ *
+ * This does not support fragmentation.
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param src - source IP
+ * @param dst - destination IP
+ * @param prot - payload proto
+ *
+ * @return - pointer to start of IP header
+ */
+always_inline void *
+vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b,
+		      ip4_address_t * src, ip4_address_t * dst, int proto)
+{
+  ip4_header_t *ih;
+
+  /* make some room */
+  ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t));
+
+  ih->ip_version_and_header_length = 0x45;
+  ih->tos = 0;
+  ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
+
+  /* No fragments */
+  ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF);
+  ih->ttl = 255;
+  ih->protocol = proto;
+  ih->src_address.as_u32 = src->as_u32;
+  ih->dst_address.as_u32 = dst->as_u32;
+
+  ih->checksum = ip4_header_checksum (ih);
+  return ih;
+}
 #endif /* included_ip_ip4_h */
 
 /*
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 8081b34b..66d91ab6 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -1478,8 +1478,18 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
   return p0->flags;
 }
 
-static uword
-ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip4_local) =
+{
+  .arc_name  = "ip4-local",
+  .start_nodes = VNET_FEATURES ("ip4-local"),
+};
+/* *INDENT-ON* */
+
+static inline uword
+ip4_local_inline (vlib_main_t * vm,
+		  vlib_node_runtime_t * node,
+		  vlib_frame_t * frame, int head_of_feature_arc)
 {
   ip4_main_t *im = &ip4_main;
   ip_lookup_main_t *lm = &im->lookup_main;
@@ -1487,6 +1497,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   u32 *from, *to_next, n_left_from, n_left_to_next;
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip4_input_node.index);
+  u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -1513,7 +1524,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  i32 len_diff0, len_diff1;
 	  u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
 	  u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
-	  u8 enqueue_code;
+	  u32 sw_if_index0, sw_if_index1;
 
 	  pi0 = to_next[0] = from[0];
 	  pi1 = to_next[1] = from[1];
@@ -1522,6 +1533,8 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  to_next += 2;
 	  n_left_to_next -= 2;
 
+	  next0 = next1 = IP_LOCAL_NEXT_DROP;
+
 	  p0 = vlib_get_buffer (vm, pi0);
 	  p1 = vlib_get_buffer (vm, pi1);
 
@@ -1531,14 +1544,18 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
 	  vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
 
-	  fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
-				vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+	  sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+	  sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+	  fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+	  fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
+
+	  fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
 	  fib_index0 =
 	    (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
 	     (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
 
-	  fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
-				vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+	  fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
 	  fib_index1 =
 	    (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
 	     (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
@@ -1557,6 +1574,13 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	     until support of IP frag reassembly is implemented */
 	  proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
 	  proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
+
+	  if (head_of_feature_arc == 0)
+	    {
+	      error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
+	      goto skip_checks;
+	    }
+
 	  is_udp0 = proto0 == IP_PROTOCOL_UDP;
 	  is_udp1 = proto1 == IP_PROTOCOL_UDP;
 	  is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
@@ -1686,6 +1710,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  next0 = lm->local_next_by_ip_protocol[proto0];
 	  next1 = lm->local_next_by_ip_protocol[proto1];
 
+	skip_checks:
 	  next0 =
 	    error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
 	  next1 =
@@ -1694,44 +1719,17 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  p0->error = error0 ? error_node->errors[error0] : 0;
 	  p1->error = error1 ? error_node->errors[error1] : 0;
 
-	  enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
-
-	  if (PREDICT_FALSE (enqueue_code != 0))
+	  if (head_of_feature_arc)
 	    {
-	      switch (enqueue_code)
-		{
-		case 1:
-		  /* A B A */
-		  to_next[-2] = pi1;
-		  to_next -= 1;
-		  n_left_to_next += 1;
-		  vlib_set_next_frame_buffer (vm, node, next0, pi0);
-		  break;
-
-		case 2:
-		  /* A A B */
-		  to_next -= 1;
-		  n_left_to_next += 1;
-		  vlib_set_next_frame_buffer (vm, node, next1, pi1);
-		  break;
-
-		case 3:
-		  /* A B B or A B C */
-		  to_next -= 2;
-		  n_left_to_next += 2;
-		  vlib_set_next_frame_buffer (vm, node, next0, pi0);
-		  vlib_set_next_frame_buffer (vm, node, next1, pi1);
-		  if (next0 == next1)
-		    {
-		      vlib_put_next_frame (vm, node, next_index,
-					   n_left_to_next);
-		      next_index = next1;
-		      vlib_get_next_frame (vm, node, next_index, to_next,
-					   n_left_to_next);
-		    }
-		  break;
-		}
+	      if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
+		vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
+	      if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
+		vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
 	    }
+
+	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+					   n_left_to_next, pi0, pi1,
+					   next0, next1);
 	}
 
       while (n_left_from > 0 && n_left_to_next > 0)
@@ -1746,6 +1744,7 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
 	  load_balance_t *lb0;
 	  const dpo_id_t *dpo0;
+	  u32 sw_if_index0;
 
 	  pi0 = to_next[0] = from[0];
 	  from += 1;
@@ -1753,14 +1752,18 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  to_next += 1;
 	  n_left_to_next -= 1;
 
+	  next0 = IP_LOCAL_NEXT_DROP;
+
 	  p0 = vlib_get_buffer (vm, pi0);
 
 	  ip0 = vlib_buffer_get_current (p0);
 
 	  vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
 
-	  fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
-				vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+	  sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+	  fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+
 	  fib_index0 =
 	    (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
 	     (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
@@ -1775,6 +1778,13 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  /* Treat IP frag packets as "experimental" protocol for now
 	     until support of IP frag reassembly is implemented */
 	  proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
+
+	  if (head_of_feature_arc == 0)
+	    {
+	      error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
+	      goto skip_check;
+	    }
+
 	  is_udp0 = proto0 == IP_PROTOCOL_UDP;
 	  is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
 
@@ -1847,6 +1857,8 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 		     ip0->dst_address.as_u32 != 0xFFFFFFFF)
 		    ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
 
+	skip_check:
+
 	  next0 = lm->local_next_by_ip_protocol[proto0];
 
 	  next0 =
@@ -1854,18 +1866,15 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 
 	  p0->error = error0 ? error_node->errors[error0] : 0;
 
-	  if (PREDICT_FALSE (next0 != next_index))
+	  if (head_of_feature_arc)
 	    {
-	      n_left_to_next += 1;
-	      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-
-	      next_index = next0;
-	      vlib_get_next_frame (vm, node, next_index, to_next,
-				   n_left_to_next);
-	      to_next[0] = pi0;
-	      to_next += 1;
-	      n_left_to_next -= 1;
+	      if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
+		vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
 	    }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, pi0, next0);
+
 	}
 
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -1874,21 +1883,57 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   return frame->n_vectors;
 }
 
+static uword
+ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
 VLIB_REGISTER_NODE (ip4_local_node) =
 {
-  .function = ip4_local,.name = "ip4-local",.vector_size =
-    sizeof (u32),.format_trace =
-    format_ip4_forward_next_trace,.n_next_nodes =
-    IP_LOCAL_N_NEXT,.next_nodes =
+  .function = ip4_local,
+  .name = "ip4-local",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ip4_forward_next_trace,
+  .n_next_nodes = IP_LOCAL_N_NEXT,
+  .next_nodes =
   {
-  [IP_LOCAL_NEXT_DROP] = "error-drop",
-      [IP_LOCAL_NEXT_PUNT] = "error-punt",
-      [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
-      [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}
-,};
+    [IP_LOCAL_NEXT_DROP] = "error-drop",
+    [IP_LOCAL_NEXT_PUNT] = "error-punt",
+    [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
+    [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
+};
+/* *INDENT-ON* */
 
 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
 
+static uword
+ip4_local_end_of_arc (vlib_main_t * vm,
+		      vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
+  .function = ip4_local_end_of_arc,
+  .name = "ip4-local-end-of-arc",
+  .vector_size = sizeof (u32),
+
+  .format_trace = format_ip4_forward_next_trace,
+  .sibling_of = "ip4-local",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
+
+VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
+  .arc_name = "ip4-local",
+  .node_name = "ip4-local-end-of-arc",
+  .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
 void
 ip4_register_protocol (u32 protocol, u32 node_index)
 {
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
index 8da788b4..b2c1fcd4 100644
--- a/src/vnet/ip/ip4_packet.h
+++ b/src/vnet/ip/ip4_packet.h
@@ -41,7 +41,7 @@
 #define included_ip4_packet_h
 
 #include <vnet/ip/ip_packet.h>	/* for ip_csum_t */
-#include <vnet/ip/tcp_packet.h>	/* for tcp_header_t */
+#include <vnet/tcp/tcp_packet.h>	/* for tcp_header_t */
 #include <vppinfra/byte_order.h>	/* for clib_net_to_host_u16 */
 
 /* IP4 address which can be accessed either as 4 bytes
@@ -342,10 +342,10 @@ ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0)
   ip0->src_address.data_u32 = dst0;
   ip0->dst_address.data_u32 = src0;
 
-  src0 = tcp0->ports.src;
-  dst0 = tcp0->ports.dst;
-  tcp0->ports.src = dst0;
-  tcp0->ports.dst = src0;
+  src0 = tcp0->src;
+  dst0 = tcp0->dst;
+  tcp0->src = dst0;
+  tcp0->dst = src0;
 }
 
 always_inline void
@@ -363,14 +363,14 @@ ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1,
   ip0->dst_address.data_u32 = src0;
   ip1->dst_address.data_u32 = src1;
 
-  src0 = tcp0->ports.src;
-  src1 = tcp1->ports.src;
-  dst0 = tcp0->ports.dst;
-  dst1 = tcp1->ports.dst;
-  tcp0->ports.src = dst0;
-  tcp1->ports.src = dst1;
-  tcp0->ports.dst = src0;
-  tcp1->ports.dst = src1;
+  src0 = tcp0->src;
+  src1 = tcp1->src;
+  dst0 = tcp0->dst;
+  dst1 = tcp1->dst;
+  tcp0->src = dst0;
+  tcp1->src = dst1;
+  tcp0->dst = src0;
+  tcp1->dst = src1;
 }
 
 #endif /* included_ip4_packet_h */
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index 5456f0f2..2615fbfa 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -461,8 +461,8 @@ ip6_compute_flow_hash (const ip6_header_t * ip,
   b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
   b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
 
-  t1 = is_tcp_udp ? tcp->ports.src : 0;
-  t2 = is_tcp_udp ? tcp->ports.dst : 0;
+  t1 = is_tcp_udp ? tcp->src : 0;
+  t2 = is_tcp_udp ? tcp->dst : 0;
 
   t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
   t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
@@ -497,6 +497,46 @@ int ip6_hbh_register_option (u8 option,
 int ip6_hbh_unregister_option (u8 option);
 void ip6_hbh_set_next_override (uword next);
 
+/**
+ * Push IPv6 header to buffer
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param src - source IP
+ * @param dst - destination IP
+ * @param prot - payload proto
+ *
+ * @return - pointer to start of IP header
+ */
+always_inline void *
+vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b,
+		      ip6_address_t * src, ip6_address_t * dst, int proto)
+{
+  ip6_header_t *ip6h;
+  u16 payload_length;
+
+  /* make some room */
+  ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t));
+
+  ip6h->ip_version_traffic_class_and_flow_label =
+    clib_host_to_net_u32 (0x6 << 28);
+
+  /* calculate ip6 payload length */
+  payload_length = vlib_buffer_length_in_chain (vm, b);
+  payload_length -= sizeof (*ip6h);
+
+  ip6h->payload_length = clib_host_to_net_u16 (payload_length);
+
+  ip6h->hop_limit = 0xff;
+  ip6h->protocol = proto;
+  clib_memcpy (ip6h->src_address.as_u8, src->as_u8,
+	       sizeof (ip6h->src_address));
+  clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8,
+	       sizeof (ip6h->src_address));
+
+  return ip6h;
+}
+
 #endif /* included_ip_ip6_h */
 
 /*
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h
index 1e551c8b..4fd14b96 100644
--- a/src/vnet/ip/ip6_packet.h
+++ b/src/vnet/ip/ip6_packet.h
@@ -40,7 +40,7 @@
 #ifndef included_ip6_packet_h
 #define included_ip6_packet_h
 
-#include <vnet/ip/tcp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
 #include <vnet/ip/ip4_packet.h>
 
 typedef union
@@ -373,10 +373,10 @@ ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
   {
     u16 src0, dst0;
 
-    src0 = tcp0->ports.src;
-    dst0 = tcp0->ports.dst;
-    tcp0->ports.src = dst0;
-    tcp0->ports.dst = src0;
+    src0 = tcp0->src;
+    dst0 = tcp0->dst;
+    tcp0->src = dst0;
+    tcp0->dst = src0;
   }
 }
 
@@ -400,14 +400,14 @@ ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1,
   {
     u16 src0, dst0, src1, dst1;
 
-    src0 = tcp0->ports.src;
-    src1 = tcp1->ports.src;
-    dst0 = tcp0->ports.dst;
-    dst1 = tcp1->ports.dst;
-    tcp0->ports.src = dst0;
-    tcp1->ports.src = dst1;
-    tcp0->ports.dst = src0;
-    tcp1->ports.dst = src1;
+    src0 = tcp0->src;
+    src1 = tcp1->src;
+    dst0 = tcp0->dst;
+    dst1 = tcp1->dst;
+    tcp0->src = dst0;
+    tcp1->src = dst1;
+    tcp0->dst = src0;
+    tcp1->dst = src1;
   }
 }
 
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
index 9c735128..48558401 100644
--- a/src/vnet/ip/punt.c
+++ b/src/vnet/ip/punt.c
@@ -23,7 +23,7 @@
  */
 #include <vlib/vlib.h>
 #include <vnet/pg/pg.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/ip/punt.h>
 
 #define foreach_punt_next \
diff --git a/src/vnet/ip/tcp_packet.h b/src/vnet/ip/tcp_packet.h
deleted file mode 100644
index 93f73e01..00000000
--- a/src/vnet/ip/tcp_packet.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip4/tcp_packet.h: TCP packet format (see RFC 793)
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef included_tcp_packet_h
-#define included_tcp_packet_h
-
-/* TCP flags bit 0 first. */
-#define foreach_tcp_flag			\
-  _ (FIN)					\
-  _ (SYN)					\
-  _ (RST)					\
-  _ (PSH)					\
-  _ (ACK)					\
-  _ (URG)					\
-  _ (ECE)					\
-  _ (CWR)
-
-enum
-{
-#define _(f) TCP_FLAG_BIT_##f,
-  foreach_tcp_flag
-#undef _
-    TCP_N_FLAG_BITS,
-
-#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
-  foreach_tcp_flag
-#undef _
-};
-
-typedef struct
-{
-  /* Source and destination port. */
-  union
-  {
-    union
-    {
-      struct
-      {
-	u16 src, dst;
-      };
-      u32 src_and_dst;
-    } ports;
-    struct
-    {
-      u16 src_port, dst_port;
-    };
-  };
-
-  /* Sequence and acknowledgment number. */
-  u32 seq_number, ack_number;
-
-  /* Size of TCP header in 32-bit units plus 4 reserved bits. */
-  u8 tcp_header_u32s_and_reserved;
-
-  /* see foreach_tcp_flag for enumation of tcp flags. */
-  u8 flags;
-
-  /* Current window advertised by sender.
-     This is the number of bytes sender is willing to receive
-     right now. */
-  u16 window;
-
-  /* Checksum of TCP pseudo header and data. */
-  u16 checksum;
-
-  u16 urgent_pointer;
-} tcp_header_t;
-
-always_inline int
-tcp_header_bytes (tcp_header_t * t)
-{
-  return (t->tcp_header_u32s_and_reserved >> 4) * sizeof (u32);
-}
-
-/* TCP options. */
-typedef enum tcp_option_type
-{
-  TCP_OPTION_END = 0,
-  TCP_OPTION_NOP = 1,
-  TCP_OPTION_MSS = 2,
-  TCP_OPTION_WINDOW_SCALE = 3,
-  TCP_OPTION_SACK_PERMITTED = 4,
-  TCP_OPTION_SACK_BLOCK = 5,
-  TCP_OPTION_TIME_STAMP = 8,
-} tcp_option_type_t;
-
-/* All except NOP and END have 1 byte length field. */
-typedef struct
-{
-  tcp_option_type_t type:8;
-
-  /* Length of this option in bytes. */
-  u8 length;
-} tcp_option_with_length_t;
-
-#endif /* included_tcp_packet_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/udp.h b/src/vnet/ip/udp.h
deleted file mode 100644
index bad58b5d..00000000
--- a/src/vnet/ip/udp.h
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * ip/udp.h: udp protocol
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_udp_h
-#define included_udp_h
-
-#include <vnet/vnet.h>
-#include <vnet/ip/udp_packet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ip/ip4.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/pg/pg.h>
-#include <vnet/ip/format.h>
-
-typedef enum
-{
-#define udp_error(n,s) UDP_ERROR_##n,
-#include <vnet/ip/udp_error.def>
-#undef udp_error
-  UDP_N_ERROR,
-} udp_error_t;
-
-#define foreach_udp4_dst_port			\
-_ (67, dhcp_to_server)                          \
-_ (68, dhcp_to_client)                          \
-_ (500, ikev2)                                  \
-_ (3784, bfd4)                                  \
-_ (3785, bfd_echo4)                             \
-_ (4341, lisp_gpe)                              \
-_ (4342, lisp_cp)                          	\
-_ (4739, ipfix)                                 \
-_ (4789, vxlan)					\
-_ (4789, vxlan6)				\
-_ (4790, vxlan_gpe)				\
-_ (6633, vpath_3)
-
-
-#define foreach_udp6_dst_port                   \
-_ (547, dhcpv6_to_server)                       \
-_ (546, dhcpv6_to_client)			\
-_ (3784, bfd6)                                  \
-_ (3785, bfd_echo6)                             \
-_ (4341, lisp_gpe6)                             \
-_ (4342, lisp_cp6)                          	\
-_ (4790, vxlan6_gpe)      \
-_ (6633, vpath6_3)
-
-typedef enum
-{
-#define _(n,f) UDP_DST_PORT_##f = n,
-  foreach_udp4_dst_port foreach_udp6_dst_port
-#undef _
-} udp_dst_port_t;
-
-typedef enum
-{
-#define _(n,f) UDP6_DST_PORT_##f = n,
-  foreach_udp6_dst_port
-#undef _
-} udp6_dst_port_t;
-
-typedef struct
-{
-  /* Name (a c string). */
-  char *name;
-
-  /* GRE protocol type in host byte order. */
-  udp_dst_port_t dst_port;
-
-  /* Node which handles this type. */
-  u32 node_index;
-
-  /* Next index for this type. */
-  u32 next_index;
-} udp_dst_port_info_t;
-
-typedef enum
-{
-  UDP_IP6 = 0,
-  UDP_IP4,			/* the code is full of is_ip4... */
-  N_UDP_AF,
-} udp_af_t;
-
-typedef struct
-{
-  udp_dst_port_info_t *dst_port_infos[N_UDP_AF];
-
-  /* Hash tables mapping name/protocol to protocol info index. */
-  uword *dst_port_info_by_name[N_UDP_AF];
-  uword *dst_port_info_by_dst_port[N_UDP_AF];
-
-  /* convenience */
-  vlib_main_t *vlib_main;
-} udp_main_t;
-
-always_inline udp_dst_port_info_t *
-udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4)
-{
-  uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port);
-  return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0;
-}
-
-format_function_t format_udp_header;
-format_function_t format_udp_rx_trace;
-
-unformat_function_t unformat_udp_header;
-
-void udp_register_dst_port (vlib_main_t * vm,
-			    udp_dst_port_t dst_port,
-			    u32 node_index, u8 is_ip4);
-
-void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
-
-always_inline void
-ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
-{
-  u16 new_l0;
-  udp_header_t *udp0;
-
-  if (is_ip4)
-    {
-      ip4_header_t *ip0;
-      ip_csum_t sum0;
-      u16 old_l0 = 0;
-
-      ip0 = vlib_buffer_get_current (b0);
-
-      /* fix the <bleep>ing outer-IP checksum */
-      sum0 = ip0->checksum;
-      /* old_l0 always 0, see the rewrite setup */
-      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
-
-      sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
-			     length /* changed member */ );
-      ip0->checksum = ip_csum_fold (sum0);
-      ip0->length = new_l0;
-
-      /* Fix UDP length */
-      udp0 = (udp_header_t *) (ip0 + 1);
-      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-				     - sizeof (*ip0));
-      udp0->length = new_l0;
-    }
-  else
-    {
-      ip6_header_t *ip0;
-      int bogus0;
-
-      ip0 = vlib_buffer_get_current (b0);
-
-      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-				     - sizeof (*ip0));
-      ip0->payload_length = new_l0;
-
-      /* Fix UDP length */
-      udp0 = (udp_header_t *) (ip0 + 1);
-      udp0->length = new_l0;
-
-      udp0->checksum =
-	ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
-      ASSERT (bogus0 == 0);
-
-      if (udp0->checksum == 0)
-	udp0->checksum = 0xffff;
-    }
-}
-
-always_inline void
-ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
-		  u8 is_ip4)
-{
-  vlib_buffer_advance (b0, -ec_len);
-
-  if (is_ip4)
-    {
-      ip4_header_t *ip0;
-
-      ip0 = vlib_buffer_get_current (b0);
-
-      /* Apply the encap string. */
-      clib_memcpy (ip0, ec0, ec_len);
-      ip_udp_fixup_one (vm, b0, 1);
-    }
-  else
-    {
-      ip6_header_t *ip0;
-
-      ip0 = vlib_buffer_get_current (b0);
-
-      /* Apply the encap string. */
-      clib_memcpy (ip0, ec0, ec_len);
-      ip_udp_fixup_one (vm, b0, 0);
-    }
-}
-
-always_inline void
-ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
-		  u8 * ec0, u8 * ec1, word ec_len, u8 is_v4)
-{
-  u16 new_l0, new_l1;
-  udp_header_t *udp0, *udp1;
-
-  ASSERT (_vec_len (ec0) == _vec_len (ec1));
-
-  vlib_buffer_advance (b0, -ec_len);
-  vlib_buffer_advance (b1, -ec_len);
-
-  if (is_v4)
-    {
-      ip4_header_t *ip0, *ip1;
-      ip_csum_t sum0, sum1;
-      u16 old_l0 = 0, old_l1 = 0;
-
-      ip0 = vlib_buffer_get_current (b0);
-      ip1 = vlib_buffer_get_current (b1);
-
-      /* Apply the encap string */
-      clib_memcpy (ip0, ec0, ec_len);
-      clib_memcpy (ip1, ec1, ec_len);
-
-      /* fix the <bleep>ing outer-IP checksum */
-      sum0 = ip0->checksum;
-      sum1 = ip1->checksum;
-
-      /* old_l0 always 0, see the rewrite setup */
-      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
-      new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
-
-      sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
-			     length /* changed member */ );
-      sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
-			     length /* changed member */ );
-
-      ip0->checksum = ip_csum_fold (sum0);
-      ip1->checksum = ip_csum_fold (sum1);
-
-      ip0->length = new_l0;
-      ip1->length = new_l1;
-
-      /* Fix UDP length */
-      udp0 = (udp_header_t *) (ip0 + 1);
-      udp1 = (udp_header_t *) (ip1 + 1);
-
-      new_l0 =
-	clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
-			      sizeof (*ip0));
-      new_l1 =
-	clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) -
-			      sizeof (*ip1));
-      udp0->length = new_l0;
-      udp1->length = new_l1;
-    }
-  else
-    {
-      ip6_header_t *ip0, *ip1;
-      int bogus0, bogus1;
-
-      ip0 = vlib_buffer_get_current (b0);
-      ip1 = vlib_buffer_get_current (b1);
-
-      /* Apply the encap string. */
-      clib_memcpy (ip0, ec0, ec_len);
-      clib_memcpy (ip1, ec1, ec_len);
-
-      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
-				     - sizeof (*ip0));
-      new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
-				     - sizeof (*ip1));
-      ip0->payload_length = new_l0;
-      ip1->payload_length = new_l1;
-
-      /* Fix UDP length */
-      udp0 = (udp_header_t *) (ip0 + 1);
-      udp1 = (udp_header_t *) (ip1 + 1);
-
-      udp0->length = new_l0;
-      udp1->length = new_l1;
-
-      udp0->checksum =
-	ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
-      udp1->checksum =
-	ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1);
-      ASSERT (bogus0 == 0);
-      ASSERT (bogus1 == 0);
-
-      if (udp0->checksum == 0)
-	udp0->checksum = 0xffff;
-      if (udp1->checksum == 0)
-	udp1->checksum = 0xffff;
-    }
-}
-
-#endif /* included_udp_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/udp_error.def b/src/vnet/ip/udp_error.def
deleted file mode 100644
index bfdae0ac..00000000
--- a/src/vnet/ip/udp_error.def
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * udp_error.def: udp errors
- *
- * Copyright (c) 2013-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-udp_error (NONE, "no error")
-udp_error (NO_LISTENER, "no listener for dst port")
-udp_error (LENGTH_ERROR, "UDP packets with length errors")
-udp_error (PUNT, "no listener punt")
diff --git a/src/vnet/ip/udp_format.c b/src/vnet/ip/udp_format.c
deleted file mode 100644
index abdf561e..00000000
--- a/src/vnet/ip/udp_format.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip/udp_format.c: udp formatting
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <vnet/ip/ip.h>
-
-/* Format UDP header. */
-u8 *
-format_udp_header (u8 * s, va_list * args)
-{
-  udp_header_t *udp = va_arg (*args, udp_header_t *);
-  u32 max_header_bytes = va_arg (*args, u32);
-  uword indent;
-  u32 header_bytes = sizeof (udp[0]);
-
-  /* Nothing to do. */
-  if (max_header_bytes < sizeof (udp[0]))
-    return format (s, "UDP header truncated");
-
-  indent = format_get_indent (s);
-  indent += 2;
-
-  s = format (s, "UDP: %d -> %d",
-	      clib_net_to_host_u16 (udp->src_port),
-	      clib_net_to_host_u16 (udp->dst_port));
-
-  s = format (s, "\n%Ulength %d, checksum 0x%04x",
-	      format_white_space, indent,
-	      clib_net_to_host_u16 (udp->length),
-	      clib_net_to_host_u16 (udp->checksum));
-
-  /* Recurse into next protocol layer. */
-  if (max_header_bytes != 0 && header_bytes < max_header_bytes)
-    {
-      ip_main_t *im = &ip_main;
-      tcp_udp_port_info_t *pi;
-
-      pi = ip_get_tcp_udp_port_info (im, udp->dst_port);
-
-      if (pi && pi->format_header)
-	s = format (s, "\n%U%U",
-		    format_white_space, indent - 2, pi->format_header,
-		    /* next protocol header */ (udp + 1),
-		    max_header_bytes - sizeof (udp[0]));
-    }
-
-  return s;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/udp_init.c b/src/vnet/ip/udp_init.c
deleted file mode 100644
index 1241ca4a..00000000
--- a/src/vnet/ip/udp_init.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip/udp_init.c: udp initialization
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <vnet/ip/ip.h>
-
-clib_error_t *
-udp_init (vlib_main_t * vm)
-{
-  ip_main_t *im = &ip_main;
-  ip_protocol_info_t *pi;
-  clib_error_t *error;
-
-  error = vlib_call_init_function (vm, ip_main_init);
-
-  if (!error)
-    {
-      pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP);
-      if (pi == 0)
-	return clib_error_return (0, "UDP protocol info AWOL");
-      pi->format_header = format_udp_header;
-      pi->unformat_pg_edit = unformat_pg_udp_header;
-    }
-
-  return 0;
-}
-
-VLIB_INIT_FUNCTION (udp_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/udp_local.c b/src/vnet/ip/udp_local.c
deleted file mode 100644
index 13ab6e4f..00000000
--- a/src/vnet/ip/udp_local.c
+++ /dev/null
@@ -1,645 +0,0 @@
-/*
- * node.c: udp packet processing
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/pg/pg.h>
-#include <vnet/ip/udp.h>
-#include <vnet/ip/udp_packet.h>
-#include <vppinfra/sparse_vec.h>
-
-udp_main_t udp_main;
-
-#define foreach_udp_input_next                  \
-  _ (PUNT, "error-punt")                        \
-  _ (DROP, "error-drop")                        \
-  _ (ICMP4_ERROR, "ip4-icmp-error")             \
-  _ (ICMP6_ERROR, "ip6-icmp-error")
-
-typedef enum
-{
-#define _(s,n) UDP_INPUT_NEXT_##s,
-  foreach_udp_input_next
-#undef _
-    UDP_INPUT_N_NEXT,
-} udp_input_next_t;
-
-typedef struct
-{
-  u16 src_port;
-  u16 dst_port;
-  u8 bound;
-} udp_rx_trace_t;
-
-u8 *
-format_udp_rx_trace (u8 * s, va_list * args)
-{
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *);
-
-  s = format (s, "UDP: src-port %d dst-port %d%s",
-	      clib_net_to_host_u16 (t->src_port),
-	      clib_net_to_host_u16 (t->dst_port),
-	      t->bound ? "" : " (no listener)");
-  return s;
-}
-
-typedef struct
-{
-  /* Sparse vector mapping udp dst_port in network byte order
-     to next index. */
-  u16 *next_by_dst_port;
-  u8 punt_unknown;
-} udp_input_runtime_t;
-
-vlib_node_registration_t udp4_input_node;
-vlib_node_registration_t udp6_input_node;
-
-always_inline uword
-udp46_input_inline (vlib_main_t * vm,
-		    vlib_node_runtime_t * node,
-		    vlib_frame_t * from_frame, int is_ip4)
-{
-  udp_input_runtime_t *rt = is_ip4 ?
-    (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index)
-    : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index);
-  __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next;
-  word n_no_listener = 0;
-  u8 punt_unknown = rt->punt_unknown;
-
-  from = vlib_frame_vector_args (from_frame);
-  n_left_from = from_frame->n_vectors;
-
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from >= 4 && n_left_to_next >= 2)
-	{
-	  u32 bi0, bi1;
-	  vlib_buffer_t *b0, *b1;
-	  udp_header_t *h0 = 0, *h1 = 0;
-	  u32 i0, i1, dst_port0, dst_port1;
-	  u32 advance0, advance1;
-	  u32 error0, next0, error1, next1;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p2, *p3;
-
-	    p2 = vlib_get_buffer (vm, from[2]);
-	    p3 = vlib_get_buffer (vm, from[3]);
-
-	    vlib_prefetch_buffer_header (p2, LOAD);
-	    vlib_prefetch_buffer_header (p3, LOAD);
-
-	    CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD);
-	    CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD);
-	  }
-
-	  bi0 = from[0];
-	  bi1 = from[1];
-	  to_next[0] = bi0;
-	  to_next[1] = bi1;
-	  from += 2;
-	  to_next += 2;
-	  n_left_to_next -= 2;
-	  n_left_from -= 2;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-
-	  /* ip4/6_local hands us the ip header, not the udp header */
-	  if (is_ip4)
-	    {
-	      advance0 = sizeof (ip4_header_t);
-	      advance1 = sizeof (ip4_header_t);
-	    }
-	  else
-	    {
-	      advance0 = sizeof (ip6_header_t);
-	      advance1 = sizeof (ip6_header_t);
-	    }
-
-	  if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
-	    {
-	      error0 = UDP_ERROR_LENGTH_ERROR;
-	      next0 = UDP_INPUT_NEXT_DROP;
-	    }
-	  else
-	    {
-	      vlib_buffer_advance (b0, advance0);
-	      h0 = vlib_buffer_get_current (b0);
-	      error0 = next0 = 0;
-	      if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) >
-				 vlib_buffer_length_in_chain (vm, b0)))
-		{
-		  error0 = UDP_ERROR_LENGTH_ERROR;
-		  next0 = UDP_INPUT_NEXT_DROP;
-		}
-	    }
-
-	  if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1)))
-	    {
-	      error1 = UDP_ERROR_LENGTH_ERROR;
-	      next1 = UDP_INPUT_NEXT_DROP;
-	    }
-	  else
-	    {
-	      vlib_buffer_advance (b1, advance1);
-	      h1 = vlib_buffer_get_current (b1);
-	      error1 = next1 = 0;
-	      if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) >
-				 vlib_buffer_length_in_chain (vm, b1)))
-		{
-		  error1 = UDP_ERROR_LENGTH_ERROR;
-		  next1 = UDP_INPUT_NEXT_DROP;
-		}
-	    }
-
-	  /* Index sparse array with network byte order. */
-	  dst_port0 = (error0 == 0) ? h0->dst_port : 0;
-	  dst_port1 = (error1 == 0) ? h1->dst_port : 0;
-	  sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1,
-			     &i0, &i1);
-	  next0 = (error0 == 0) ? vec_elt (rt->next_by_dst_port, i0) : next0;
-	  next1 = (error1 == 0) ? vec_elt (rt->next_by_dst_port, i1) : next1;
-
-	  if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX))
-	    {
-	      // move the pointer back so icmp-error can find the
-	      // ip packet header
-	      vlib_buffer_advance (b0, -(word) advance0);
-
-	      if (PREDICT_FALSE (punt_unknown))
-		{
-		  b0->error = node->errors[UDP_ERROR_PUNT];
-		  next0 = UDP_INPUT_NEXT_PUNT;
-		}
-	      else if (is_ip4)
-		{
-		  icmp4_error_set_vnet_buffer (b0,
-					       ICMP4_destination_unreachable,
-					       ICMP4_destination_unreachable_port_unreachable,
-					       0);
-		  next0 = UDP_INPUT_NEXT_ICMP4_ERROR;
-		  n_no_listener++;
-		}
-	      else
-		{
-		  icmp6_error_set_vnet_buffer (b0,
-					       ICMP6_destination_unreachable,
-					       ICMP6_destination_unreachable_port_unreachable,
-					       0);
-		  next0 = UDP_INPUT_NEXT_ICMP6_ERROR;
-		  n_no_listener++;
-		}
-	    }
-	  else
-	    {
-	      b0->error = node->errors[UDP_ERROR_NONE];
-	      // advance to the payload
-	      vlib_buffer_advance (b0, sizeof (*h0));
-	    }
-
-	  if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX))
-	    {
-	      // move the pointer back so icmp-error can find the
-	      // ip packet header
-	      vlib_buffer_advance (b1, -(word) advance1);
-
-	      if (PREDICT_FALSE (punt_unknown))
-		{
-		  b1->error = node->errors[UDP_ERROR_PUNT];
-		  next1 = UDP_INPUT_NEXT_PUNT;
-		}
-	      else if (is_ip4)
-		{
-		  icmp4_error_set_vnet_buffer (b1,
-					       ICMP4_destination_unreachable,
-					       ICMP4_destination_unreachable_port_unreachable,
-					       0);
-		  next1 = UDP_INPUT_NEXT_ICMP4_ERROR;
-		  n_no_listener++;
-		}
-	      else
-		{
-		  icmp6_error_set_vnet_buffer (b1,
-					       ICMP6_destination_unreachable,
-					       ICMP6_destination_unreachable_port_unreachable,
-					       0);
-		  next1 = UDP_INPUT_NEXT_ICMP6_ERROR;
-		  n_no_listener++;
-		}
-	    }
-	  else
-	    {
-	      b1->error = node->errors[UDP_ERROR_NONE];
-	      // advance to the payload
-	      vlib_buffer_advance (b1, sizeof (*h1));
-	    }
-
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      udp_rx_trace_t *tr = vlib_add_trace (vm, node,
-						   b0, sizeof (*tr));
-	      if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
-		{
-		  tr->src_port = h0 ? h0->src_port : 0;
-		  tr->dst_port = h0 ? h0->dst_port : 0;
-		  tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR &&
-			       next0 != UDP_INPUT_NEXT_ICMP6_ERROR);
-		}
-	    }
-	  if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      udp_rx_trace_t *tr = vlib_add_trace (vm, node,
-						   b1, sizeof (*tr));
-	      if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR])
-		{
-		  tr->src_port = h1 ? h1->src_port : 0;
-		  tr->dst_port = h1 ? h1->dst_port : 0;
-		  tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR &&
-			       next1 != UDP_INPUT_NEXT_ICMP6_ERROR);
-		}
-	    }
-
-	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, bi1, next0, next1);
-	}
-
-      while (n_left_from > 0 && n_left_to_next > 0)
-	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  udp_header_t *h0 = 0;
-	  u32 i0, next0;
-	  u32 advance0;
-
-	  bi0 = from[0];
-	  to_next[0] = bi0;
-	  from += 1;
-	  to_next += 1;
-	  n_left_from -= 1;
-	  n_left_to_next -= 1;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-
-	  /* ip4/6_local hands us the ip header, not the udp header */
-	  if (is_ip4)
-	    advance0 = sizeof (ip4_header_t);
-	  else
-	    advance0 = sizeof (ip6_header_t);
-
-	  if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
-	    {
-	      b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
-	      next0 = UDP_INPUT_NEXT_DROP;
-	      goto trace_x1;
-	    }
-
-	  vlib_buffer_advance (b0, advance0);
-
-	  h0 = vlib_buffer_get_current (b0);
-
-	  if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <=
-			    vlib_buffer_length_in_chain (vm, b0)))
-	    {
-	      i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port);
-	      next0 = vec_elt (rt->next_by_dst_port, i0);
-
-	      if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX))
-		{
-		  // move the pointer back so icmp-error can find the
-		  // ip packet header
-		  vlib_buffer_advance (b0, -(word) advance0);
-
-		  if (PREDICT_FALSE (punt_unknown))
-		    {
-		      b0->error = node->errors[UDP_ERROR_PUNT];
-		      next0 = UDP_INPUT_NEXT_PUNT;
-		    }
-		  else if (is_ip4)
-		    {
-		      icmp4_error_set_vnet_buffer (b0,
-						   ICMP4_destination_unreachable,
-						   ICMP4_destination_unreachable_port_unreachable,
-						   0);
-		      next0 = UDP_INPUT_NEXT_ICMP4_ERROR;
-		      n_no_listener++;
-		    }
-		  else
-		    {
-		      icmp6_error_set_vnet_buffer (b0,
-						   ICMP6_destination_unreachable,
-						   ICMP6_destination_unreachable_port_unreachable,
-						   0);
-		      next0 = UDP_INPUT_NEXT_ICMP6_ERROR;
-		      n_no_listener++;
-		    }
-		}
-	      else
-		{
-		  b0->error = node->errors[UDP_ERROR_NONE];
-		  // advance to the payload
-		  vlib_buffer_advance (b0, sizeof (*h0));
-		}
-	    }
-	  else
-	    {
-	      b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
-	      next0 = UDP_INPUT_NEXT_DROP;
-	    }
-
-	trace_x1:
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      udp_rx_trace_t *tr = vlib_add_trace (vm, node,
-						   b0, sizeof (*tr));
-	      if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
-		{
-		  tr->src_port = h0->src_port;
-		  tr->dst_port = h0->dst_port;
-		  tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR &&
-			       next0 != UDP_INPUT_NEXT_ICMP6_ERROR);
-		}
-	    }
-
-	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, next0);
-	}
-
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
-  vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER,
-		    n_no_listener);
-  return from_frame->n_vectors;
-}
-
-static char *udp_error_strings[] = {
-#define udp_error(n,s) s,
-#include "udp_error.def"
-#undef udp_error
-};
-
-static uword
-udp4_input (vlib_main_t * vm,
-	    vlib_node_runtime_t * node, vlib_frame_t * from_frame)
-{
-  return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ );
-}
-
-static uword
-udp6_input (vlib_main_t * vm,
-	    vlib_node_runtime_t * node, vlib_frame_t * from_frame)
-{
-  return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ );
-}
-
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (udp4_input_node) = {
-  .function = udp4_input,
-  .name = "ip4-udp-lookup",
-  /* Takes a vector of packets. */
-  .vector_size = sizeof (u32),
-
-  .runtime_data_bytes = sizeof (udp_input_runtime_t),
-
-  .n_errors = UDP_N_ERROR,
-  .error_strings = udp_error_strings,
-
-  .n_next_nodes = UDP_INPUT_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
-    foreach_udp_input_next
-#undef _
-  },
-
-  .format_buffer = format_udp_header,
-  .format_trace = format_udp_rx_trace,
-  .unformat_buffer = unformat_udp_header,
-};
-/* *INDENT-ON* */
-
-VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input);
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (udp6_input_node) = {
-  .function = udp6_input,
-  .name = "ip6-udp-lookup",
-  /* Takes a vector of packets. */
-  .vector_size = sizeof (u32),
-
-  .runtime_data_bytes = sizeof (udp_input_runtime_t),
-
-  .n_errors = UDP_N_ERROR,
-  .error_strings = udp_error_strings,
-
-  .n_next_nodes = UDP_INPUT_N_NEXT,
-  .next_nodes = {
-#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
-    foreach_udp_input_next
-#undef _
-  },
-
-  .format_buffer = format_udp_header,
-  .format_trace = format_udp_rx_trace,
-  .unformat_buffer = unformat_udp_header,
-};
-/* *INDENT-ON* */
-
-VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input);
-
-static void
-add_dst_port (udp_main_t * um,
-	      udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4)
-{
-  udp_dst_port_info_t *pi;
-  u32 i;
-
-  vec_add2 (um->dst_port_infos[is_ip4], pi, 1);
-  i = pi - um->dst_port_infos[is_ip4];
-
-  pi->name = dst_port_name;
-  pi->dst_port = dst_port;
-  pi->next_index = pi->node_index = ~0;
-
-  hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i);
-
-  if (pi->name)
-    hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i);
-}
-
-void
-udp_register_dst_port (vlib_main_t * vm,
-		       udp_dst_port_t dst_port, u32 node_index, u8 is_ip4)
-{
-  udp_main_t *um = &udp_main;
-  udp_dst_port_info_t *pi;
-  udp_input_runtime_t *rt;
-  u16 *n;
-
-  {
-    clib_error_t *error = vlib_call_init_function (vm, udp_local_init);
-    if (error)
-      clib_error_report (error);
-  }
-
-  pi = udp_get_dst_port_info (um, dst_port, is_ip4);
-  if (!pi)
-    {
-      add_dst_port (um, dst_port, 0, is_ip4);
-      pi = udp_get_dst_port_info (um, dst_port, is_ip4);
-      ASSERT (pi);
-    }
-
-  pi->node_index = node_index;
-  pi->next_index = vlib_node_add_next (vm,
-				       is_ip4 ? udp4_input_node.index
-				       : udp6_input_node.index, node_index);
-
-  /* Setup udp protocol -> next index sparse vector mapping. */
-  rt = vlib_node_get_runtime_data
-    (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
-  n = sparse_vec_validate (rt->next_by_dst_port,
-			   clib_host_to_net_u16 (dst_port));
-  n[0] = pi->next_index;
-}
-
-void
-udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add)
-{
-  udp_input_runtime_t *rt;
-
-  {
-    clib_error_t *error = vlib_call_init_function (vm, udp_local_init);
-    if (error)
-      clib_error_report (error);
-  }
-
-  rt = vlib_node_get_runtime_data
-    (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
-
-  rt->punt_unknown = is_add;
-}
-
-/* Parse a UDP header. */
-uword
-unformat_udp_header (unformat_input_t * input, va_list * args)
-{
-  u8 **result = va_arg (*args, u8 **);
-  udp_header_t *udp;
-  __attribute__ ((unused)) int old_length;
-  u16 src_port, dst_port;
-
-  /* Allocate space for IP header. */
-  {
-    void *p;
-
-    old_length = vec_len (*result);
-    vec_add2 (*result, p, sizeof (ip4_header_t));
-    udp = p;
-  }
-
-  memset (udp, 0, sizeof (udp[0]));
-  if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port))
-    {
-      udp->src_port = clib_host_to_net_u16 (src_port);
-      udp->dst_port = clib_host_to_net_u16 (dst_port);
-      return 1;
-    }
-  return 0;
-}
-
-static void
-udp_setup_node (vlib_main_t * vm, u32 node_index)
-{
-  vlib_node_t *n = vlib_get_node (vm, node_index);
-  pg_node_t *pn = pg_get_node (node_index);
-
-  n->format_buffer = format_udp_header;
-  n->unformat_buffer = unformat_udp_header;
-  pn->unformat_edit = unformat_pg_udp_header;
-}
-
-clib_error_t *
-udp_local_init (vlib_main_t * vm)
-{
-  udp_input_runtime_t *rt;
-  udp_main_t *um = &udp_main;
-  int i;
-
-  {
-    clib_error_t *error;
-    error = vlib_call_init_function (vm, udp_init);
-    if (error)
-      clib_error_report (error);
-  }
-
-
-  for (i = 0; i < 2; i++)
-    {
-      um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword));
-      um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword));
-    }
-
-  udp_setup_node (vm, udp4_input_node.index);
-  udp_setup_node (vm, udp6_input_node.index);
-
-  rt = vlib_node_get_runtime_data (vm, udp4_input_node.index);
-
-  rt->next_by_dst_port = sparse_vec_new
-    ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
-     /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
-
-  rt->punt_unknown = 0;
-
-#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */);
-  foreach_udp4_dst_port
-#undef _
-    rt = vlib_node_get_runtime_data (vm, udp6_input_node.index);
-
-  rt->next_by_dst_port = sparse_vec_new
-    ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
-     /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
-
-  rt->punt_unknown = 0;
-
-#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */);
-  foreach_udp6_dst_port
-#undef _
-    ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index);
-  /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */
-  return 0;
-}
-
-VLIB_INIT_FUNCTION (udp_local_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/udp_packet.h b/src/vnet/ip/udp_packet.h
deleted file mode 100644
index beea3059..00000000
--- a/src/vnet/ip/udp_packet.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip4/udp_packet.h: UDP packet format
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef included_udp_packet_h
-#define included_udp_packet_h
-
-typedef struct
-{
-  /* Source and destination port. */
-  u16 src_port, dst_port;
-
-  /* Length of UDP header plus payload. */
-  u16 length;
-
-  /* Checksum of UDP pseudo-header and data or
-     zero if checksum is disabled. */
-  u16 checksum;
-} udp_header_t;
-
-#endif /* included_udp_packet_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/udp_pg.c b/src/vnet/ip/udp_pg.c
deleted file mode 100644
index c9d8d38c..00000000
--- a/src/vnet/ip/udp_pg.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip/udp_pg: UDP packet-generator interface
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <vnet/pg/pg.h>
-#include <vnet/ip/ip.h>		/* for unformat_udp_udp_port */
-
-#define UDP_PG_EDIT_LENGTH (1 << 0)
-#define UDP_PG_EDIT_CHECKSUM (1 << 1)
-
-always_inline void
-udp_pg_edit_function_inline (pg_main_t * pg,
-			     pg_stream_t * s,
-			     pg_edit_group_t * g,
-			     u32 * packets, u32 n_packets, u32 flags)
-{
-  vlib_main_t *vm = vlib_get_main ();
-  u32 ip_offset, udp_offset;
-
-  udp_offset = g->start_byte_offset;
-  ip_offset = (g - 1)->start_byte_offset;
-
-  while (n_packets >= 1)
-    {
-      vlib_buffer_t *p0;
-      ip4_header_t *ip0;
-      udp_header_t *udp0;
-      u32 udp_len0;
-
-      p0 = vlib_get_buffer (vm, packets[0]);
-      n_packets -= 1;
-      packets += 1;
-
-      ip0 = (void *) (p0->data + ip_offset);
-      udp0 = (void *) (p0->data + udp_offset);
-      udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]);
-
-      if (flags & UDP_PG_EDIT_LENGTH)
-	udp0->length =
-	  clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0)
-				- ip_offset);
-
-      /* Initialize checksum with header. */
-      if (flags & UDP_PG_EDIT_CHECKSUM)
-	{
-	  ip_csum_t sum0;
-
-	  sum0 = clib_mem_unaligned (&ip0->src_address, u64);
-
-	  sum0 = ip_csum_with_carry
-	    (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16)));
-
-	  /* Invalidate possibly old checksum. */
-	  udp0->checksum = 0;
-
-	  sum0 =
-	    ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0,
-					    sum0);
-
-	  sum0 = ~ip_csum_fold (sum0);
-
-	  /* Zero checksum means checksumming disabled. */
-	  sum0 = sum0 != 0 ? sum0 : 0xffff;
-
-	  udp0->checksum = sum0;
-	}
-    }
-}
-
-static void
-udp_pg_edit_function (pg_main_t * pg,
-		      pg_stream_t * s,
-		      pg_edit_group_t * g, u32 * packets, u32 n_packets)
-{
-  switch (g->edit_function_opaque)
-    {
-    case UDP_PG_EDIT_LENGTH:
-      udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
-				   UDP_PG_EDIT_LENGTH);
-      break;
-
-    case UDP_PG_EDIT_CHECKSUM:
-      udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
-				   UDP_PG_EDIT_CHECKSUM);
-      break;
-
-    case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH:
-      udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
-				   UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH);
-      break;
-
-    default:
-      ASSERT (0);
-      break;
-    }
-}
-
-typedef struct
-{
-  pg_edit_t src_port, dst_port;
-  pg_edit_t length;
-  pg_edit_t checksum;
-} pg_udp_header_t;
-
-static inline void
-pg_udp_header_init (pg_udp_header_t * p)
-{
-  /* Initialize fields that are not bit fields in the IP header. */
-#define _(f) pg_edit_init (&p->f, udp_header_t, f);
-  _(src_port);
-  _(dst_port);
-  _(length);
-  _(checksum);
-#undef _
-}
-
-uword
-unformat_pg_udp_header (unformat_input_t * input, va_list * args)
-{
-  pg_stream_t *s = va_arg (*args, pg_stream_t *);
-  pg_udp_header_t *p;
-  u32 group_index;
-
-  p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t),
-			    &group_index);
-  pg_udp_header_init (p);
-
-  /* Defaults. */
-  p->checksum.type = PG_EDIT_UNSPECIFIED;
-  p->length.type = PG_EDIT_UNSPECIFIED;
-
-  if (!unformat (input, "UDP: %U -> %U",
-		 unformat_pg_edit,
-		 unformat_tcp_udp_port, &p->src_port,
-		 unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port))
-    goto error;
-
-  /* Parse options. */
-  while (1)
-    {
-      if (unformat (input, "length %U",
-		    unformat_pg_edit, unformat_pg_number, &p->length))
-	;
-
-      else if (unformat (input, "checksum %U",
-			 unformat_pg_edit, unformat_pg_number, &p->checksum))
-	;
-
-      /* Can't parse input: try next protocol level. */
-      else
-	break;
-    }
-
-  {
-    ip_main_t *im = &ip_main;
-    u16 dst_port;
-    tcp_udp_port_info_t *pi;
-
-    pi = 0;
-    if (p->dst_port.type == PG_EDIT_FIXED)
-      {
-	dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO);
-	pi = ip_get_tcp_udp_port_info (im, dst_port);
-      }
-
-    if (pi && pi->unformat_pg_edit
-	&& unformat_user (input, pi->unformat_pg_edit, s))
-      ;
-
-    else if (!unformat_user (input, unformat_pg_payload, s))
-      goto error;
-
-    p = pg_get_edit_group (s, group_index);
-    if (p->checksum.type == PG_EDIT_UNSPECIFIED
-	|| p->length.type == PG_EDIT_UNSPECIFIED)
-      {
-	pg_edit_group_t *g = pg_stream_get_group (s, group_index);
-	g->edit_function = udp_pg_edit_function;
-	g->edit_function_opaque = 0;
-	if (p->checksum.type == PG_EDIT_UNSPECIFIED)
-	  g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM;
-	if (p->length.type == PG_EDIT_UNSPECIFIED)
-	  g->edit_function_opaque |= UDP_PG_EDIT_LENGTH;
-      }
-
-    return 1;
-  }
-
-error:
-  /* Free up any edits we may have added. */
-  pg_free_edit_group (s);
-  return 0;
-}
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c
index 09209334..2c1074d8 100644
--- a/src/vnet/ipsec/ikev2.c
+++ b/src/vnet/ipsec/ikev2.c
@@ -17,7 +17,7 @@
 #include <vnet/vnet.h>
 #include <vnet/pg/pg.h>
 #include <vppinfra/error.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/ipsec/ipsec.h>
 #include <vnet/ipsec/ikev2.h>
 #include <vnet/ipsec/ikev2_priv.h>
diff --git a/src/vnet/ipsec/ikev2_cli.c b/src/vnet/ipsec/ikev2_cli.c
index 5c88d8d4..05ed4e60 100644
--- a/src/vnet/ipsec/ikev2_cli.c
+++ b/src/vnet/ipsec/ikev2_cli.c
@@ -16,7 +16,7 @@
 #include <vnet/vnet.h>
 #include <vnet/pg/pg.h>
 #include <vppinfra/error.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/ipsec/ikev2.h>
 #include <vnet/ipsec/ikev2_priv.h>
 
diff --git a/src/vnet/ipsec/ikev2_crypto.c b/src/vnet/ipsec/ikev2_crypto.c
index c201d3eb..ca56158f 100644
--- a/src/vnet/ipsec/ikev2_crypto.c
+++ b/src/vnet/ipsec/ikev2_crypto.c
@@ -17,7 +17,7 @@
 #include <vnet/vnet.h>
 #include <vnet/pg/pg.h>
 #include <vppinfra/error.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/ipsec/ikev2.h>
 #include <vnet/ipsec/ikev2_priv.h>
 #include <openssl/obj_mac.h>
diff --git a/src/vnet/lisp-cp/packets.c b/src/vnet/lisp-cp/packets.c
index 3a4f421b..f24024f1 100644
--- a/src/vnet/lisp-cp/packets.c
+++ b/src/vnet/lisp-cp/packets.c
@@ -15,7 +15,7 @@
 
 #include <vnet/lisp-cp/packets.h>
 #include <vnet/lisp-cp/lisp_cp_messages.h>
-#include <vnet/ip/udp_packet.h>
+#include <vnet/udp/udp_packet.h>
 
 /* Returns IP ID for the packet */
 /* static u16 ip_id = 0;
@@ -141,61 +141,6 @@ pkt_push_udp (vlib_main_t * vm, vlib_buffer_t * b, u16 sp, u16 dp)
   return uh;
 }
 
-void *
-pkt_push_ipv4 (vlib_main_t * vm, vlib_buffer_t * b, ip4_address_t * src,
-	       ip4_address_t * dst, int proto)
-{
-  ip4_header_t *ih;
-
-  /* make some room */
-  ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t));
-
-  ih->ip_version_and_header_length = 0x45;
-  ih->tos = 0;
-  ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
-
-  /* iph->fragment_id = clib_host_to_net_u16(get_IP_ID ()); */
-
-  /* TODO: decide if we allow fragments in case of control */
-  ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF);
-  ih->ttl = 255;
-  ih->protocol = proto;
-  ih->src_address.as_u32 = src->as_u32;
-  ih->dst_address.as_u32 = dst->as_u32;
-
-  ih->checksum = ip4_header_checksum (ih);
-  return ih;
-}
-
-void *
-pkt_push_ipv6 (vlib_main_t * vm, vlib_buffer_t * b, ip6_address_t * src,
-	       ip6_address_t * dst, int proto)
-{
-  ip6_header_t *ip6h;
-  u16 payload_length;
-
-  /* make some room */
-  ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t));
-
-  ip6h->ip_version_traffic_class_and_flow_label =
-    clib_host_to_net_u32 (0x6 << 28);
-
-  /* calculate ip6 payload length */
-  payload_length = vlib_buffer_length_in_chain (vm, b);
-  payload_length -= sizeof (*ip6h);
-
-  ip6h->payload_length = clib_host_to_net_u16 (payload_length);
-
-  ip6h->hop_limit = 0xff;
-  ip6h->protocol = proto;
-  clib_memcpy (ip6h->src_address.as_u8, src->as_u8,
-	       sizeof (ip6h->src_address));
-  clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8,
-	       sizeof (ip6h->src_address));
-
-  return ip6h;
-}
-
 void *
 pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src,
 	     ip_address_t * dst, u32 proto)
@@ -210,12 +155,12 @@ pkt_push_ip (vlib_main_t * vm, vlib_buffer_t * b, ip_address_t * src,
   switch (ip_addr_version (src))
     {
     case IP4:
-      return pkt_push_ipv4 (vm, b, &ip_addr_v4 (src), &ip_addr_v4 (dst),
-			    proto);
+      return vlib_buffer_push_ip4 (vm, b, &ip_addr_v4 (src),
+				   &ip_addr_v4 (dst), proto);
       break;
     case IP6:
-      return pkt_push_ipv6 (vm, b, &ip_addr_v6 (src), &ip_addr_v6 (dst),
-			    proto);
+      return vlib_buffer_push_ip6 (vm, b, &ip_addr_v6 (src),
+				   &ip_addr_v6 (dst), proto);
       break;
     }
 
diff --git a/src/vnet/lisp-cp/packets.h b/src/vnet/lisp-cp/packets.h
index 212a1d78..f6da3bf4 100644
--- a/src/vnet/lisp-cp/packets.h
+++ b/src/vnet/lisp-cp/packets.h
@@ -26,51 +26,6 @@ void *pkt_push_udp_and_ip (vlib_main_t * vm, vlib_buffer_t * b, u16 sp,
 
 void *pkt_push_ecm_hdr (vlib_buffer_t * b);
 
-always_inline u8 *
-vlib_buffer_get_tail (vlib_buffer_t * b)
-{
-  return b->data + b->current_data + b->current_length;
-}
-
-always_inline void *
-vlib_buffer_put_uninit (vlib_buffer_t * b, u8 size)
-{
-  /* XXX should make sure there's enough space! */
-  void *p = vlib_buffer_get_tail (b);
-  b->current_length += size;
-  return p;
-}
-
-always_inline void *
-vlib_buffer_push_uninit (vlib_buffer_t * b, u8 size)
-{
-  /* XXX should make sure there's enough space! */
-  ASSERT (b->current_data >= size);
-  b->current_data -= size;
-  b->current_length += size;
-
-  return vlib_buffer_get_current (b);
-}
-
-always_inline void *
-vlib_buffer_make_headroom (vlib_buffer_t * b, u8 size)
-{
-  /* XXX should make sure there's enough space! */
-  b->current_data += size;
-  return vlib_buffer_get_current (b);
-}
-
-always_inline void *
-vlib_buffer_pull (vlib_buffer_t * b, u8 size)
-{
-  if (b->current_length < size)
-    return 0;
-
-  void *data = vlib_buffer_get_current (b);
-  vlib_buffer_advance (b, size);
-  return data;
-}
-
 /* *INDENT-ON* */
 
 /*
diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c
index 13359277..292c7e6a 100644
--- a/src/vnet/lisp-gpe/interface.c
+++ b/src/vnet/lisp-gpe/interface.c
@@ -23,7 +23,7 @@
 #include <vppinfra/hash.h>
 #include <vnet/vnet.h>
 #include <vnet/ip/ip.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/lisp-gpe/lisp_gpe.h>
 #include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h>
diff --git a/src/vnet/lisp-gpe/lisp_gpe.h b/src/vnet/lisp-gpe/lisp_gpe.h
index c898a7da..b5a50ec6 100644
--- a/src/vnet/lisp-gpe/lisp_gpe.h
+++ b/src/vnet/lisp-gpe/lisp_gpe.h
@@ -27,10 +27,12 @@
 #include <vnet/l2/l2_input.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/lisp-cp/lisp_types.h>
 #include <vnet/lisp-gpe/lisp_gpe_packet.h>
 #include <vnet/adj/adj_types.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
 
 /** IP4-UDP-LISP encap header */
 /* *INDENT-OFF* */
diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
index 65006b81..dbcf7134 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
@@ -25,6 +25,8 @@
 #include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
 #include <vnet/fib/fib_entry.h>
 #include <vnet/adj/adj_midchain.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
 
 /**
  * Memory pool of all adjacencies
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
new file mode 100644
index 00000000..a561e7d1
--- /dev/null
+++ b/src/vnet/session/application.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/application.h>
+#include <vnet/session/session.h>
+
+/*
+ * Pool from which we allocate all applications
+ */
+static application_t *app_pool;
+
+/*
+ * Hash table of apps by api client index
+ */
+static uword *app_by_api_client_index;
+
+int
+application_api_queue_is_full (application_t * app)
+{
+  unix_shared_memory_queue_t *q;
+
+  /* builtin servers are always OK */
+  if (app->api_client_index == ~0)
+    return 0;
+
+  q = vl_api_client_index_to_input_queue (app->api_client_index);
+  if (!q)
+    return 1;
+
+  if (q->cursize == q->maxsize)
+    return 1;
+  return 0;
+}
+
+static void
+application_table_add (application_t * app)
+{
+  hash_set (app_by_api_client_index, app->api_client_index, app->index);
+}
+
+static void
+application_table_del (application_t * app)
+{
+  hash_unset (app_by_api_client_index, app->api_client_index);
+}
+
+application_t *
+application_lookup (u32 api_client_index)
+{
+  uword *p;
+  p = hash_get (app_by_api_client_index, api_client_index);
+  if (p)
+    return application_get (p[0]);
+
+  return 0;
+}
+
+void
+application_del (application_t * app)
+{
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+  api_main_t *am = &api_main;
+  void *oldheap;
+  session_manager_t *sm;
+
+  if (app->mode == APP_SERVER)
+    {
+      sm = session_manager_get (app->session_manager_index);
+      session_manager_del (smm, sm);
+    }
+
+  /* Free the event fifo in the /vpe-api shared-memory segment */
+  oldheap = svm_push_data_heap (am->vlib_rp);
+  if (app->event_queue)
+    unix_shared_memory_queue_free (app->event_queue);
+  svm_pop_heap (oldheap);
+
+  application_table_del (app);
+
+  pool_put (app_pool, app);
+}
+
+application_t *
+application_new (application_type_t type, session_type_t sst,
+		 u32 api_client_index, u32 flags, session_cb_vft_t * cb_fns)
+{
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+  api_main_t *am = &api_main;
+  application_t *app;
+  void *oldheap;
+  session_manager_t *sm;
+
+  pool_get (app_pool, app);
+  memset (app, 0, sizeof (*app));
+
+  /* Allocate event fifo in the /vpe-api shared-memory segment */
+  oldheap = svm_push_data_heap (am->vlib_rp);
+
+  /* Allocate server event queue */
+  app->event_queue =
+    unix_shared_memory_queue_init (128 /* nels $$$$ config */ ,
+				   sizeof (session_fifo_event_t),
+				   0 /* consumer pid */ ,
+				   0
+				   /* (do not) signal when queue non-empty */
+    );
+
+  svm_pop_heap (oldheap);
+
+  /* If a server, allocate session manager */
+  if (type == APP_SERVER)
+    {
+      pool_get (smm->session_managers, sm);
+      memset (sm, 0, sizeof (*sm));
+
+      app->session_manager_index = sm - smm->session_managers;
+    }
+  else if (type == APP_CLIENT)
+    {
+      /* Allocate connect session manager if needed */
+      if (smm->connect_manager_index[sst] == INVALID_INDEX)
+	connects_session_manager_init (smm, sst);
+      app->session_manager_index = smm->connect_manager_index[sst];
+    }
+
+  app->mode = type;
+  app->index = application_get_index (app);
+  app->session_type = sst;
+  app->api_client_index = api_client_index;
+  app->flags = flags;
+  app->cb_fns = *cb_fns;
+
+  /* Add app to lookup by api_client_index table */
+  application_table_add (app);
+
+  return app;
+}
+
+application_t *
+application_get (u32 index)
+{
+  return pool_elt_at_index (app_pool, index);
+}
+
+u32
+application_get_index (application_t * app)
+{
+  return app - app_pool;
+}
+
+int
+application_server_init (application_t * server, u32 segment_size,
+			 u32 add_segment_size, u32 rx_fifo_size,
+			 u32 tx_fifo_size, u8 ** segment_name)
+{
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+  session_manager_t *sm;
+  int rv;
+
+  sm = session_manager_get (server->session_manager_index);
+
+  /* Add first segment */
+  if ((rv = session_manager_add_first_segment (smm, sm, segment_size,
+					       segment_name)))
+    {
+      return rv;
+    }
+
+  /* Setup session manager */
+  sm->add_segment_size = add_segment_size;
+  sm->rx_fifo_size = rx_fifo_size;
+  sm->tx_fifo_size = tx_fifo_size;
+  sm->add_segment = sm->add_segment_size != 0;
+  return 0;
+}
+
+u8 *
+format_application_server (u8 * s, va_list * args)
+{
+  application_t *srv = va_arg (*args, application_t *);
+  int verbose = va_arg (*args, int);
+  vl_api_registration_t *regp;
+  stream_session_t *listener;
+  u8 *server_name, *str, *seg_name;
+  u32 segment_size;
+
+  if (srv == 0)
+    {
+      if (verbose)
+	s = format (s, "%-40s%-20s%-15s%-15s%-10s", "Connection", "Server",
+		    "Segment", "API Client", "Cookie");
+      else
+	s = format (s, "%-40s%-20s", "Connection", "Server");
+
+      return s;
+    }
+
+  regp = vl_api_client_index_to_registration (srv->api_client_index);
+  if (!regp)
+    server_name = format (0, "%s%c", regp->name, 0);
+  else
+    server_name = regp->name;
+
+  listener = stream_session_listener_get (srv->session_type,
+					  srv->session_index);
+  str = format (0, "%U", format_stream_session, listener, verbose);
+
+  session_manager_get_segment_info (listener->server_segment_index, &seg_name,
+				    &segment_size);
+  if (verbose)
+    {
+      s = format (s, "%-40s%-20s%-20s%-10d%-10d", str, server_name,
+		  seg_name, srv->api_client_index, srv->accept_cookie);
+    }
+  else
+    s = format (s, "%-40s%-20s", str, server_name);
+  return s;
+}
+
+u8 *
+format_application_client (u8 * s, va_list * args)
+{
+  application_t *client = va_arg (*args, application_t *);
+  int verbose = va_arg (*args, int);
+  stream_session_t *session;
+  u8 *str, *seg_name;
+  u32 segment_size;
+
+  if (client == 0)
+    {
+      if (verbose)
+	s =
+	  format (s, "%-40s%-20s%-10s", "Connection", "Segment",
+		  "API Client");
+      else
+	s = format (s, "%-40s", "Connection");
+
+      return s;
+    }
+
+  session = stream_session_get (client->session_index, client->thread_index);
+  str = format (0, "%U", format_stream_session, session, verbose);
+
+  session_manager_get_segment_info (session->server_segment_index, &seg_name,
+				    &segment_size);
+  if (verbose)
+    {
+      s = format (s, "%-40s%-20s%-10d%", str, seg_name,
+		  client->api_client_index);
+    }
+  else
+    s = format (s, "%-40s", str);
+  return s;
+}
+
+static clib_error_t *
+show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
+		     vlib_cli_command_t * cmd)
+{
+  application_t *app;
+  int do_server = 0;
+  int do_client = 0;
+  int verbose = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "server"))
+	do_server = 1;
+      else if (unformat (input, "client"))
+	do_client = 1;
+      else if (unformat (input, "verbose"))
+	verbose = 1;
+      else
+	break;
+    }
+
+  if (do_server)
+    {
+      if (pool_elts (app_pool))
+	{
+	  vlib_cli_output (vm, "%U", format_application_server,
+			   0 /* header */ ,
+			   verbose);
+          /* *INDENT-OFF* */
+          pool_foreach (app, app_pool,
+          ({
+            if (app->mode == APP_SERVER)
+              vlib_cli_output (vm, "%U", format_application_server, app,
+                               verbose);
+          }));
+          /* *INDENT-ON* */
+	}
+      else
+	vlib_cli_output (vm, "No active server bindings");
+    }
+
+  if (do_client)
+    {
+      if (pool_elts (app_pool))
+	{
+	  vlib_cli_output (vm, "%U", format_application_client,
+			   0 /* header */ ,
+			   verbose);
+          /* *INDENT-OFF* */
+          pool_foreach (app, app_pool,
+          ({
+            if (app->mode == APP_CLIENT)
+              vlib_cli_output (vm, "%U", format_application_client, app,
+                               verbose);
+          }));
+          /* *INDENT-ON* */
+	}
+      else
+	vlib_cli_output (vm, "No active server bindings");
+    }
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (show_app_command, static) =
+{
+.path = "show app",.short_help =
+    "show app [server|client] [verbose]",.function = show_app_command_fn,};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
new file mode 100644
index 00000000..027d6967
--- /dev/null
+++ b/src/vnet/session/application.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_VNET_SESSION_APPLICATION_H_
+#define SRC_VNET_SESSION_APPLICATION_H_
+
+#include <vnet/vnet.h>
+#include <vnet/session/session.h>
+
+typedef enum
+{
+  APP_SERVER,
+  APP_CLIENT
+} application_type_t;
+
+typedef struct _stream_session_cb_vft
+{
+  /** Notify server of new segment */
+  int (*add_segment_callback) (u32 api_client_index, const u8 * seg_name,
+			       u32 seg_size);
+
+  /** Notify server of newly accepted session */
+  int (*session_accept_callback) (stream_session_t * new_session);
+
+  /* Connection request callback */
+  int (*session_connected_callback) (u32 api_client_index,
+				     stream_session_t * s, u8 code);
+
+  /** Notify app that session is closing */
+  void (*session_disconnect_callback) (stream_session_t * s);
+
+  /** Notify app that session was reset */
+  void (*session_reset_callback) (stream_session_t * s);
+
+  /* Direct RX callback, for built-in servers */
+  int (*builtin_server_rx_callback) (stream_session_t * session);
+
+  /* Redirect connection to local server */
+  int (*redirect_connect_callback) (u32 api_client_index, void *mp);
+} session_cb_vft_t;
+
+typedef struct _application
+{
+  /** Index in server pool */
+  u32 index;
+
+  /** Flags */
+  u32 flags;
+
+  /** Binary API connection index, ~0 if internal */
+  u32 api_client_index;
+
+  /* */
+  u32 api_context;
+
+  /** Application listens for events on this svm queue */
+  unix_shared_memory_queue_t *event_queue;
+
+  /** Stream session type */
+  u8 session_type;
+
+  /* Stream server mode: accept or connect */
+  u8 mode;
+
+  u32 session_manager_index;
+
+  /*
+   * Bind/Listen specific
+   */
+
+  /** Accept cookie, for multiple session flavors ($$$ maybe) */
+  u32 accept_cookie;
+
+  /** Index of the listen session or connect session */
+  u32 session_index;
+
+  /** Session thread index for client connect sessions */
+  u32 thread_index;
+
+  /*
+   * Callbacks: shoulder-taps for the server/client
+   */
+  session_cb_vft_t cb_fns;
+} application_t;
+
+application_t *application_new (application_type_t type, session_type_t sst,
+				u32 api_client_index, u32 flags,
+				session_cb_vft_t * cb_fns);
+void application_del (application_t * app);
+application_t *application_get (u32 index);
+application_t *application_lookup (u32 api_client_index);
+u32 application_get_index (application_t * app);
+
+int
+application_server_init (application_t * server, u32 segment_size,
+			 u32 add_segment_size, u32 rx_fifo_size,
+			 u32 tx_fifo_size, u8 ** segment_name);
+int application_api_queue_is_full (application_t * app);
+
+#endif /* SRC_VNET_SESSION_APPLICATION_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
new file mode 100644
index 00000000..0ea77fd8
--- /dev/null
+++ b/src/vnet/session/application_interface.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/session/application_interface.h>
+
+#include <vnet/session/session.h>
+#include <vlibmemory/api.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/ip4_fib.h>
+
+/** @file
+    VPP's application/session API bind/unbind/connect/disconnect calls
+*/
+
+static u8
+ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4)
+{
+  if (is_ip4)
+    return (ip46_address->ip4.as_u32 == 0);
+  else
+    return (ip46_address->as_u64[0] == 0 && ip46_address->as_u64[1] == 0);
+}
+
+static u8
+ip_is_local (ip46_address_t * ip46_address, u8 is_ip4)
+{
+  fib_node_index_t fei;
+  fib_entry_flag_t flags;
+  fib_prefix_t prefix;
+
+  /* Check if requester is local */
+  if (is_ip4)
+    {
+      prefix.fp_len = 32;
+      prefix.fp_proto = FIB_PROTOCOL_IP4;
+    }
+  else
+    {
+      prefix.fp_len = 128;
+      prefix.fp_proto = FIB_PROTOCOL_IP6;
+    }
+
+  clib_memcpy (&prefix.fp_addr, ip46_address, sizeof (ip46_address));
+  fei = fib_table_lookup (0, &prefix);
+  flags = fib_entry_get_flags (fei);
+
+  return (flags & FIB_ENTRY_FLAG_LOCAL);
+}
+
+int
+api_parse_session_handle (u64 handle, u32 * session_index, u32 * thread_index)
+{
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+  stream_session_t *pool;
+
+  *thread_index = handle & 0xFFFFFFFF;
+  *session_index = handle >> 32;
+
+  if (*thread_index >= vec_len (smm->sessions))
+    return VNET_API_ERROR_INVALID_VALUE;
+
+  pool = smm->sessions[*thread_index];
+
+  if (pool_is_free_index (pool, *session_index))
+    return VNET_API_ERROR_INVALID_VALUE_2;
+
+  return 0;
+}
+
+int
+vnet_bind_i (u32 api_client_index, ip46_address_t * ip46, u16 port_host_order,
+	     session_type_t sst, u64 * options, session_cb_vft_t * cb_fns,
+	     application_t ** app, u32 * len_seg_name, char *seg_name)
+{
+  u8 *segment_name = 0;
+  application_t *server = 0;
+  stream_session_t *listener;
+  u8 is_ip4;
+
+  listener =
+    stream_session_lookup_listener (ip46,
+				    clib_host_to_net_u16 (port_host_order),
+				    sst);
+
+  if (listener)
+    return VNET_API_ERROR_ADDRESS_IN_USE;
+
+  if (application_lookup (api_client_index))
+    {
+      clib_warning ("Only one bind supported for now");
+      return VNET_API_ERROR_ADDRESS_IN_USE;
+    }
+
+  is_ip4 = SESSION_TYPE_IP4_UDP == sst || SESSION_TYPE_IP4_TCP == sst;
+  if (!ip_is_zero (ip46, is_ip4) && !ip_is_local (ip46, is_ip4))
+    return VNET_API_ERROR_INVALID_VALUE;
+
+  /* Allocate and initialize stream server */
+  server = application_new (APP_SERVER, sst, api_client_index,
+			    options[SESSION_OPTIONS_FLAGS], cb_fns);
+
+  application_server_init (server, options[SESSION_OPTIONS_SEGMENT_SIZE],
+			   options[SESSION_OPTIONS_ADD_SEGMENT_SIZE],
+			   options[SESSION_OPTIONS_RX_FIFO_SIZE],
+			   options[SESSION_OPTIONS_TX_FIFO_SIZE],
+			   &segment_name);
+
+  /* Setup listen path down to transport */
+  stream_session_start_listen (server->index, ip46, port_host_order);
+
+  /*
+   * Return values
+   */
+
+  ASSERT (vec_len (segment_name) <= 128);
+  *len_seg_name = vec_len (segment_name);
+  memcpy (seg_name, segment_name, *len_seg_name);
+  *app = server;
+
+  return 0;
+}
+
+int
+vnet_unbind_i (u32 api_client_index)
+{
+  application_t *server;
+
+  /*
+   * Find the stream_server_t corresponding to the api client
+   */
+  server = application_lookup (api_client_index);
+  if (!server)
+    return VNET_API_ERROR_INVALID_VALUE_2;
+
+  /* Clear the listener */
+  stream_session_stop_listen (server->index);
+  application_del (server);
+
+  return 0;
+}
+
+int
+vnet_connect_i (u32 api_client_index, u32 api_context, session_type_t sst,
+		ip46_address_t * ip46, u16 port, u64 * options, void *mp,
+		session_cb_vft_t * cb_fns)
+{
+  stream_session_t *listener;
+  application_t *server, *app;
+
+  /*
+   * Figure out if connecting to a local server
+   */
+  listener = stream_session_lookup_listener (ip46,
+					     clib_host_to_net_u16 (port),
+					     sst);
+  if (listener)
+    {
+      server = application_get (listener->app_index);
+
+      /*
+       * Server is willing to have a direct fifo connection created
+       * instead of going through the state machine, etc.
+       */
+      if (server->flags & SESSION_OPTIONS_FLAGS_USE_FIFO)
+	return server->cb_fns.
+	  redirect_connect_callback (server->api_client_index, mp);
+    }
+
+  /* Create client app */
+  app = application_new (APP_CLIENT, sst, api_client_index,
+			 options[SESSION_OPTIONS_FLAGS], cb_fns);
+
+  app->api_context = api_context;
+
+  /*
+   * Not connecting to a local server. Create regular session
+   */
+  stream_session_open (sst, ip46, port, app->index);
+
+  return 0;
+}
+
+/**
+ * unformat a vnet URI
+ *
+ * fifo://name
+ * tcp://ip46-addr:port
+ * udp://ip46-addr:port
+ *
+ * u8 ip46_address[16];
+ * u16  port_in_host_byte_order;
+ * stream_session_type_t sst;
+ * u8 *fifo_name;
+ *
+ * if (unformat (input, "%U", unformat_vnet_uri, &ip46_address,
+ *              &sst, &port, &fifo_name))
+ *  etc...
+ *
+ */
+uword
+unformat_vnet_uri (unformat_input_t * input, va_list * args)
+{
+  ip46_address_t *address = va_arg (*args, ip46_address_t *);
+  session_type_t *sst = va_arg (*args, session_type_t *);
+  u16 *port = va_arg (*args, u16 *);
+
+  if (unformat (input, "tcp://%U/%d", unformat_ip4_address, &address->ip4,
+		port))
+    {
+      *sst = SESSION_TYPE_IP4_TCP;
+      return 1;
+    }
+  if (unformat (input, "udp://%U/%d", unformat_ip4_address, &address->ip4,
+		port))
+    {
+      *sst = SESSION_TYPE_IP4_UDP;
+      return 1;
+    }
+  if (unformat (input, "udp://%U/%d", unformat_ip6_address, &address->ip6,
+		port))
+    {
+      *sst = SESSION_TYPE_IP6_UDP;
+      return 1;
+    }
+  if (unformat (input, "tcp://%U/%d", unformat_ip6_address, &address->ip6,
+		port))
+    {
+      *sst = SESSION_TYPE_IP6_TCP;
+      return 1;
+    }
+
+  return 0;
+}
+
+int
+parse_uri (char *uri, session_type_t * sst, ip46_address_t * addr,
+	   u16 * port_number_host_byte_order)
+{
+  unformat_input_t _input, *input = &_input;
+
+  /* Make sure */
+  uri = (char *) format (0, "%s%c", uri, 0);
+
+  /* Parse uri */
+  unformat_init_string (input, uri, strlen (uri));
+  if (!unformat (input, "%U", unformat_vnet_uri, addr, sst,
+		 port_number_host_byte_order))
+    {
+      unformat_free (input);
+      return VNET_API_ERROR_INVALID_VALUE;
+    }
+  unformat_free (input);
+
+  return 0;
+}
+
+int
+vnet_bind_uri (vnet_bind_args_t * a)
+{
+  application_t *server = 0;
+  u16 port_host_order;
+  session_type_t sst = SESSION_N_TYPES;
+  ip46_address_t ip46;
+  int rv;
+
+  memset (&ip46, 0, sizeof (ip46));
+  rv = parse_uri (a->uri, &sst, &ip46, &port_host_order);
+  if (rv)
+    return rv;
+
+  if ((rv = vnet_bind_i (a->api_client_index, &ip46, port_host_order, sst,
+			 a->options, a->session_cb_vft, &server,
+			 &a->segment_name_length, a->segment_name)))
+    return rv;
+
+  a->server_event_queue_address = (u64) server->event_queue;
+  return 0;
+}
+
+session_type_t
+session_type_from_proto_and_ip (session_api_proto_t proto, u8 is_ip4)
+{
+  if (proto == SESSION_PROTO_TCP)
+    {
+      if (is_ip4)
+	return SESSION_TYPE_IP4_TCP;
+      else
+	return SESSION_TYPE_IP6_TCP;
+    }
+  else
+    {
+      if (is_ip4)
+	return SESSION_TYPE_IP4_UDP;
+      else
+	return SESSION_TYPE_IP6_UDP;
+    }
+
+  return SESSION_N_TYPES;
+}
+
+int
+vnet_unbind_uri (char *uri, u32 api_client_index)
+{
+  u16 port_number_host_byte_order;
+  session_type_t sst = SESSION_N_TYPES;
+  ip46_address_t ip46_address;
+  stream_session_t *listener;
+  int rv;
+
+  rv = parse_uri (uri, &sst, &ip46_address, &port_number_host_byte_order);
+  if (rv)
+    return rv;
+
+  listener =
+    stream_session_lookup_listener (&ip46_address,
+				    clib_host_to_net_u16
+				    (port_number_host_byte_order), sst);
+
+  if (!listener)
+    return VNET_API_ERROR_ADDRESS_NOT_IN_USE;
+
+  /* External client? */
+  if (api_client_index != ~0)
+    {
+      ASSERT (vl_api_client_index_to_registration (api_client_index));
+    }
+
+  return vnet_unbind_i (api_client_index);
+}
+
+int
+vnet_connect_uri (vnet_connect_args_t * a)
+{
+  ip46_address_t ip46_address;
+  u16 port;
+  session_type_t sst;
+  application_t *app;
+  int rv;
+
+  app = application_lookup (a->api_client_index);
+  if (app)
+    {
+      clib_warning ("Already have a connect from this app");
+      return VNET_API_ERROR_INVALID_VALUE_2;
+    }
+
+  /* Parse uri */
+  rv = parse_uri (a->uri, &sst, &ip46_address, &port);
+  if (rv)
+    return rv;
+
+  return vnet_connect_i (a->api_client_index, a->api_context, sst,
+			 &ip46_address, port, a->options, a->mp,
+			 a->session_cb_vft);
+}
+
+int
+vnet_disconnect_session (u32 client_index, u32 session_index,
+			 u32 thread_index)
+{
+  stream_session_t *session;
+
+  session = stream_session_get (session_index, thread_index);
+  stream_session_disconnect (session);
+
+  return 0;
+}
+
+
+int
+vnet_bind (vnet_bind_args_t * a)
+{
+  application_t *server = 0;
+  session_type_t sst = SESSION_N_TYPES;
+  int rv;
+
+  sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4);
+  if ((rv = vnet_bind_i (a->api_client_index, &a->tep.ip, a->tep.port, sst,
+			 a->options, a->session_cb_vft, &server,
+			 &a->segment_name_length, a->segment_name)))
+    return rv;
+
+  a->server_event_queue_address = (u64) server->event_queue;
+  a->handle = (u64) a->tep.vrf << 32 | (u64) server->session_index;
+  return 0;
+}
+
+int
+vnet_unbind (vnet_unbind_args_t * a)
+{
+  application_t *server;
+
+  if (a->api_client_index != ~0)
+    {
+      ASSERT (vl_api_client_index_to_registration (a->api_client_index));
+    }
+
+  /* Make sure this is the right one */
+  server = application_lookup (a->api_client_index);
+  ASSERT (server->session_index == (0xFFFFFFFF & a->handle));
+
+  /* TODO use handle to disambiguate namespaces/vrfs */
+  return vnet_unbind_i (a->api_client_index);
+}
+
+int
+vnet_connect (vnet_connect_args_t * a)
+{
+  session_type_t sst;
+  application_t *app;
+
+  app = application_lookup (a->api_client_index);
+  if (app)
+    {
+      clib_warning ("Already have a connect from this app");
+      return VNET_API_ERROR_INVALID_VALUE_2;
+    }
+
+  sst = session_type_from_proto_and_ip (a->proto, a->tep.is_ip4);
+  return vnet_connect_i (a->api_client_index, a->api_context, sst, &a->tep.ip,
+			 a->tep.port, a->options, a->mp, a->session_cb_vft);
+}
+
+int
+vnet_disconnect (vnet_disconnect_args_t * a)
+{
+  stream_session_t *session;
+  u32 session_index, thread_index;
+
+  if (api_parse_session_handle (a->handle, &session_index, &thread_index))
+    {
+      clib_warning ("Invalid handle");
+      return -1;
+    }
+
+  session = stream_session_get (session_index, thread_index);
+  stream_session_disconnect (session);
+
+  return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
new file mode 100644
index 00000000..8d87c067
--- /dev/null
+++ b/src/vnet/session/application_interface.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_uri_h__
+#define __included_uri_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <svm/svm_fifo_segment.h>
+#include <vnet/session/session.h>
+#include <vnet/session/application.h>
+#include <vnet/session/transport.h>
+
+typedef enum _session_api_proto
+{
+  SESSION_PROTO_TCP,
+  SESSION_PROTO_UDP
+} session_api_proto_t;
+
+typedef struct _vnet_bind_args_t
+{
+  union
+  {
+    char *uri;
+    struct
+    {
+      transport_endpoint_t tep;
+      session_api_proto_t proto;
+    };
+  };
+
+  u32 api_client_index;
+  u64 *options;
+  session_cb_vft_t *session_cb_vft;
+
+  /*
+   * Results
+   */
+  char *segment_name;
+  u32 segment_name_length;
+  u64 server_event_queue_address;
+  u64 handle;
+} vnet_bind_args_t;
+
+typedef struct _vnet_unbind_args_t
+{
+  union
+  {
+    char *uri;
+    u64 handle;
+  };
+  u32 api_client_index;
+} vnet_unbind_args_t;
+
+typedef struct _vnet_connect_args
+{
+  union
+  {
+    char *uri;
+    struct
+    {
+      transport_endpoint_t tep;
+      session_api_proto_t proto;
+    };
+  };
+  u32 api_client_index;
+  u32 api_context;
+  u64 *options;
+  session_cb_vft_t *session_cb_vft;
+
+  /* Used for redirects */
+  void *mp;
+} vnet_connect_args_t;
+
+typedef struct _vnet_disconnect_args_t
+{
+  u64 handle;
+  u32 api_client_index;
+} vnet_disconnect_args_t;
+
+/* Bind / connect options */
+typedef enum
+{
+  SESSION_OPTIONS_FLAGS,
+  SESSION_OPTIONS_SEGMENT_SIZE,
+  SESSION_OPTIONS_ADD_SEGMENT_SIZE,
+  SESSION_OPTIONS_RX_FIFO_SIZE,
+  SESSION_OPTIONS_TX_FIFO_SIZE,
+  SESSION_OPTIONS_ACCEPT_COOKIE,
+  SESSION_OPTIONS_N_OPTIONS
+} session_options_index_t;
+
+/** Server can handle delegated connect requests from local clients */
+#define SESSION_OPTIONS_FLAGS_USE_FIFO	(1<<0)
+
+/** Server wants vpp to add segments when out of memory for fifos */
+#define SESSION_OPTIONS_FLAGS_ADD_SEGMENT   (1<<1)
+
+#define VNET_CONNECT_REDIRECTED	123
+
+int vnet_bind_uri (vnet_bind_args_t *);
+int vnet_unbind_uri (char *uri, u32 api_client_index);
+int vnet_connect_uri (vnet_connect_args_t * a);
+int
+vnet_disconnect_session (u32 client_index, u32 session_index,
+			 u32 thread_index);
+
+int vnet_bind (vnet_bind_args_t * a);
+int vnet_connect (vnet_connect_args_t * a);
+int vnet_unbind (vnet_unbind_args_t * a);
+int vnet_disconnect (vnet_disconnect_args_t * a);
+
+int
+api_parse_session_handle (u64 handle, u32 * session_index,
+			  u32 * thread_index);
+
+#endif /* __included_uri_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/hashes.c b/src/vnet/session/hashes.c
new file mode 100644
index 00000000..1808dd73
--- /dev/null
+++ b/src/vnet/session/hashes.c
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Generate typed init functions for multiple hash table styles... */
+
+#include <vppinfra/bihash_16_8.h>
+#include <vppinfra/bihash_template.h>
+
+#include <vppinfra/bihash_template.c>
+
+#undef __included_bihash_template_h__
+
+#include <vppinfra/bihash_48_8.h>
+#include <vppinfra/bihash_template.h>
+
+#include <vppinfra/bihash_template.c>
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
new file mode 100644
index 00000000..e467f4e9
--- /dev/null
+++ b/src/vnet/session/node.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/tcp/tcp.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+
+#include <vnet/udp/udp_packet.h>
+#include <vnet/lisp-cp/packets.h>
+#include <math.h>
+
+vlib_node_registration_t session_queue_node;
+
+typedef struct
+{
+  u32 session_index;
+  u32 server_thread_index;
+} session_queue_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_session_queue_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *);
+
+  s = format (s, "SESSION_QUEUE: session index %d, server thread index %d",
+	      t->session_index, t->server_thread_index);
+  return s;
+}
+
+vlib_node_registration_t session_queue_node;
+
+#define foreach_session_queue_error                 \
+_(TX, "Packets transmitted")                    \
+_(TIMER, "Timer events")
+
+typedef enum
+{
+#define _(sym,str) SESSION_QUEUE_ERROR_##sym,
+  foreach_session_queue_error
+#undef _
+    SESSION_QUEUE_N_ERROR,
+} session_queue_error_t;
+
+static char *session_queue_error_strings[] = {
+#define _(sym,string) string,
+  foreach_session_queue_error
+#undef _
+};
+
+static u32 session_type_to_next[] = {
+  SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT,
+  SESSION_QUEUE_NEXT_IP4_LOOKUP,
+  SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT,
+  SESSION_QUEUE_NEXT_IP6_LOOKUP,
+};
+
+always_inline int
+session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node,
+		   session_manager_main_t * smm, session_fifo_event_t * e0,
+		   stream_session_t * s0, u32 thread_index, int *n_tx_packets,
+		   u8 peek_data)
+{
+  u32 n_trace = vlib_get_trace_count (vm, node);
+  u32 left_to_snd0, max_len_to_snd0, len_to_deq0, n_bufs, snd_space0;
+  u32 n_frame_bytes, n_frames_per_evt;
+  transport_connection_t *tc0;
+  transport_proto_vft_t *transport_vft;
+  u32 next_index, next0, *to_next, n_left_to_next, bi0;
+  vlib_buffer_t *b0;
+  u32 rx_offset;
+  u16 snd_mss0;
+  u8 *data0;
+  int i;
+
+  next_index = next0 = session_type_to_next[s0->session_type];
+
+  transport_vft = session_get_transport_vft (s0->session_type);
+  tc0 = transport_vft->get_connection (s0->connection_index, thread_index);
+
+  /* Make sure we have space to send and there's something to dequeue */
+  snd_space0 = transport_vft->send_space (tc0);
+  snd_mss0 = transport_vft->send_mss (tc0);
+
+  if (snd_space0 == 0 || svm_fifo_max_dequeue (s0->server_tx_fifo) == 0
+      || snd_mss0 == 0)
+    return 0;
+
+  ASSERT (e0->enqueue_length > 0);
+
+  /* Ensure we're not writing more than transport window allows */
+  max_len_to_snd0 = clib_min (e0->enqueue_length, snd_space0);
+
+  if (peek_data)
+    {
+      /* Offset in rx fifo from where to peek data  */
+      rx_offset = transport_vft->rx_fifo_offset (tc0);
+    }
+
+  /* TODO check if transport is willing to send len_to_snd0
+   * bytes (Nagle) */
+
+  n_frame_bytes = snd_mss0 * VLIB_FRAME_SIZE;
+  n_frames_per_evt = ceil ((double) max_len_to_snd0 / n_frame_bytes);
+
+  n_bufs = vec_len (smm->tx_buffers[thread_index]);
+  left_to_snd0 = max_len_to_snd0;
+  for (i = 0; i < n_frames_per_evt; i++)
+    {
+      /* Make sure we have at least one full frame of buffers ready */
+      if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE))
+	{
+	  vec_validate (smm->tx_buffers[thread_index],
+			n_bufs + VLIB_FRAME_SIZE - 1);
+	  n_bufs +=
+	    vlib_buffer_alloc (vm, &smm->tx_buffers[thread_index][n_bufs],
+			       VLIB_FRAME_SIZE);
+
+	  /* buffer shortage
+	   * XXX 0.9 because when debugging we might not get a full frame */
+	  if (PREDICT_FALSE (n_bufs < 0.9 * VLIB_FRAME_SIZE))
+	    {
+	      /* Keep track of how much we've dequeued and exit */
+	      e0->enqueue_length -= max_len_to_snd0 - left_to_snd0;
+	      return -1;
+	    }
+
+	  _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
+	}
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+      while (left_to_snd0 && n_left_to_next)
+	{
+	  /* Get free buffer */
+	  n_bufs--;
+	  bi0 = smm->tx_buffers[thread_index][n_bufs];
+	  _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b0->error = 0;
+	  b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID
+	    | VNET_BUFFER_LOCALLY_ORIGINATED;
+	  b0->current_data = 0;
+
+	  /* RX on the local interface. tx in default fib */
+	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+	  /* usual speculation, or the enqueue_x1 macro will barf */
+	  to_next[0] = bi0;
+	  to_next += 1;
+	  n_left_to_next -= 1;
+
+	  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+	  if (PREDICT_FALSE (n_trace > 0))
+	    {
+	      session_queue_trace_t *t0;
+	      vlib_trace_buffer (vm, node, next_index, b0,
+				 1 /* follow_chain */ );
+	      vlib_set_trace_count (vm, node, --n_trace);
+	      t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+	      t0->session_index = s0->session_index;
+	      t0->server_thread_index = s0->thread_index;
+	    }
+
+	  if (1)
+	    {
+	      ELOG_TYPE_DECLARE (e) =
+	      {
+	      .format = "evt-dequeue: id %d length %d",.format_args =
+		  "i4i4",};
+	      struct
+	      {
+		u32 data[2];
+	      } *ed;
+	      ed = ELOG_DATA (&vm->elog_main, e);
+	      ed->data[0] = e0->event_id;
+	      ed->data[1] = e0->enqueue_length;
+	    }
+
+	  len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0;
+
+	  /* Make room for headers */
+	  data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN);
+
+	  /* Dequeue the data
+	   * TODO 1) peek instead of dequeue
+	   *      2) buffer chains */
+	  if (peek_data)
+	    {
+	      int n_bytes_read;
+	      n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, s0->pid,
+					    rx_offset, len_to_deq0, data0);
+	      if (n_bytes_read < 0)
+		goto dequeue_fail;
+
+	      /* Keep track of progress locally, transport is also supposed to
+	       * increment it independently when pushing header */
+	      rx_offset += n_bytes_read;
+	    }
+	  else
+	    {
+	      if (svm_fifo_dequeue_nowait (s0->server_tx_fifo, s0->pid,
+					   len_to_deq0, data0) < 0)
+		goto dequeue_fail;
+	    }
+
+	  b0->current_length = len_to_deq0;
+
+	  /* Ask transport to push header */
+	  transport_vft->push_header (tc0, b0);
+
+	  left_to_snd0 -= len_to_deq0;
+	  *n_tx_packets = *n_tx_packets + 1;
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, next0);
+	}
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  /* If we couldn't dequeue all bytes store progress */
+  if (max_len_to_snd0 < e0->enqueue_length)
+    {
+      e0->enqueue_length -= max_len_to_snd0;
+      vec_add1 (smm->evts_partially_read[thread_index], *e0);
+    }
+  return 0;
+
+dequeue_fail:
+  /* Can't read from fifo. Store event rx progress, save as partially read,
+   * return buff to free list and return  */
+  e0->enqueue_length -= max_len_to_snd0 - left_to_snd0;
+  vec_add1 (smm->evts_partially_read[thread_index], *e0);
+
+  to_next -= 1;
+  n_left_to_next += 1;
+  _vec_len (smm->tx_buffers[thread_index]) += 1;
+
+  clib_warning ("dequeue fail");
+  return 0;
+}
+
+int
+session_fifo_rx_peek (vlib_main_t * vm, vlib_node_runtime_t * node,
+		      session_manager_main_t * smm, session_fifo_event_t * e0,
+		      stream_session_t * s0, u32 thread_index, int *n_tx_pkts)
+{
+  return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts,
+			    1);
+}
+
+int
+session_fifo_rx_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node,
+			 session_manager_main_t * smm,
+			 session_fifo_event_t * e0, stream_session_t * s0,
+			 u32 thread_index, int *n_tx_pkts)
+{
+  return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts,
+			    0);
+}
+
+static uword
+session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+		       vlib_frame_t * frame)
+{
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+  session_fifo_event_t *my_fifo_events, *e;
+  u32 n_to_dequeue;
+  unix_shared_memory_queue_t *q;
+  int n_tx_packets = 0;
+  u32 my_thread_index = vm->cpu_index;
+  int i, rv;
+
+  /*
+   *  Update TCP time
+   */
+  tcp_update_time (vlib_time_now (vm), my_thread_index);
+
+  /*
+   * Get vpp queue events
+   */
+  q = smm->vpp_event_queues[my_thread_index];
+  if (PREDICT_FALSE (q == 0))
+    return 0;
+
+  /* min number of events we can dequeue without blocking */
+  n_to_dequeue = q->cursize;
+  if (n_to_dequeue == 0)
+    return 0;
+
+  my_fifo_events = smm->fifo_events[my_thread_index];
+
+  /* If we didn't manage to process previous events try going
+   * over them again without dequeuing new ones.
+   * XXX: Block senders to sessions that can't keep up */
+  if (vec_len (my_fifo_events) >= 100)
+    goto skip_dequeue;
+
+  /* See you in the next life, don't be late */
+  if (pthread_mutex_trylock (&q->mutex))
+    return 0;
+
+  for (i = 0; i < n_to_dequeue; i++)
+    {
+      vec_add2 (my_fifo_events, e, 1);
+      unix_shared_memory_queue_sub_raw (q, (u8 *) e);
+    }
+
+  /* The other side of the connection is not polling */
+  if (q->cursize < (q->maxsize / 8))
+    (void) pthread_cond_broadcast (&q->condvar);
+  pthread_mutex_unlock (&q->mutex);
+
+  smm->fifo_events[my_thread_index] = my_fifo_events;
+
+skip_dequeue:
+
+  for (i = 0; i < n_to_dequeue; i++)
+    {
+      svm_fifo_t *f0;		/* $$$ prefetch 1 ahead maybe */
+      stream_session_t *s0;
+      u32 server_session_index0, server_thread_index0;
+      session_fifo_event_t *e0;
+
+      e0 = &my_fifo_events[i];
+      f0 = e0->fifo;
+      server_session_index0 = f0->server_session_index;
+      server_thread_index0 = f0->server_thread_index;
+
+      /* $$$ add multiple event queues, per vpp worker thread */
+      ASSERT (server_thread_index0 == my_thread_index);
+
+      s0 = pool_elt_at_index (smm->sessions[my_thread_index],
+			      server_session_index0);
+
+      ASSERT (s0->thread_index == my_thread_index);
+
+      switch (e0->event_type)
+	{
+	case FIFO_EVENT_SERVER_TX:
+	  /* Spray packets in per session type frames, since they go to
+	   * different nodes */
+	  rv = (smm->session_rx_fns[s0->session_type]) (vm, node, smm, e0, s0,
+							my_thread_index,
+							&n_tx_packets);
+	  if (rv < 0)
+	    goto done;
+
+	  break;
+
+	default:
+	  clib_warning ("unhandled event type %d", e0->event_type);
+	}
+    }
+
+done:
+
+  /* Couldn't process all events. Probably out of buffers */
+  if (PREDICT_FALSE (i < n_to_dequeue))
+    {
+      session_fifo_event_t *partially_read =
+	smm->evts_partially_read[my_thread_index];
+      vec_add (partially_read, &my_fifo_events[i], n_to_dequeue - i);
+      vec_free (my_fifo_events);
+      smm->fifo_events[my_thread_index] = partially_read;
+      smm->evts_partially_read[my_thread_index] = 0;
+    }
+  else
+    {
+      vec_free (smm->fifo_events[my_thread_index]);
+      smm->fifo_events[my_thread_index] =
+	smm->evts_partially_read[my_thread_index];
+      smm->evts_partially_read[my_thread_index] = 0;
+    }
+
+  vlib_node_increment_counter (vm, session_queue_node.index,
+			       SESSION_QUEUE_ERROR_TX, n_tx_packets);
+
+  return n_tx_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (session_queue_node) =
+{
+  .function = session_queue_node_fn,
+  .name = "session-queue",
+  .format_trace = format_session_queue_trace,
+  .type = VLIB_NODE_TYPE_INPUT,
+  .n_errors = ARRAY_LEN (session_queue_error_strings),
+  .error_strings = session_queue_error_strings,
+  .n_next_nodes = SESSION_QUEUE_N_NEXT,
+  /* .state = VLIB_NODE_STATE_DISABLED, enable on-demand? */
+  /* edit / add dispositions here */
+  .next_nodes =
+  {
+      [SESSION_QUEUE_NEXT_DROP] = "error-drop",
+      [SESSION_QUEUE_NEXT_IP4_LOOKUP] = "ip4-lookup",
+      [SESSION_QUEUE_NEXT_IP6_LOOKUP] = "ip6-lookup",
+      [SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT] = "tcp4-output",
+      [SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT] = "tcp6-output",
+  },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api
new file mode 100644
index 00000000..a7b28c1d
--- /dev/null
+++ b/src/vnet/session/session.api
@@ -0,0 +1,429 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+ /** \brief Bind to a given URI
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param accept_cookie - sender accept cookie, to identify this bind flavor
+    @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4]
+                 "tcp://::/0/80" [ipv6] etc.
+    @param options - socket options, fifo sizes, etc.
+*/
+define bind_uri {
+  u32 client_index;
+  u32 context;
+  u32 accept_cookie;
+  u32 initial_segment_size;
+  u8 uri[128];
+  u64 options[16];
+};
+
+/** \brief Unbind a given URI
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param uri - a URI, e.g. "tcp://0.0.0.0/0/80" [ipv4]
+                 "tcp://::/0/80" [ipv6], etc.
+    @param options - socket options, fifo sizes, etc.
+*/
+define unbind_uri {
+  u32 client_index;
+  u32 context;
+  u8 uri[128];
+};
+
+/** \brief Connect to a given URI
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param accept_cookie - sender accept cookie, to identify this bind flavor
+    @param uri - a URI, e.g. "tcp4://0.0.0.0/0/80"
+                 "tcp6://::/0/80" [ipv6], etc.
+    @param options - socket options, fifo sizes, etc.
+*/
+define connect_uri {
+  u32 client_index;
+  u32 context;
+  u8 uri[128];
+  u64 client_queue_address;
+  u64 options[16];
+};
+
+/** \brief Bind reply
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+    @param event_queue_address - vpp event queue address or 0 if this 
+                                 connection shouldn't send events
+    @param segment_name_length - length of segment name 
+    @param segment_name - name of segment client needs to attach to
+*/
+define bind_uri_reply {
+    u32 context;
+    i32 retval;
+    u64 server_event_queue_address;
+    u8 segment_name_length;
+    u32 segment_size;
+    u8 segment_name[128];
+};
+
+/** \brief unbind reply
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+*/
+define unbind_uri_reply {
+    u32 context;
+    i32 retval;
+};
+
+/** \brief vpp->client, connect reply
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+    @param server_rx_fifo - rx (vpp -> vpp-client) fifo address 
+    @param server_tx_fifo - tx (vpp-client -> vpp) fifo address 
+    @param session_index - session index;
+    @param session_thread_index - session thread index
+    @param session_type - session thread type
+    @param vpp_event_queue_address - vpp's event queue address
+    @param client_event_queue_address - client's event queue address
+    @param segment_name_length - non-zero if the client needs to attach to 
+                                 the fifo segment
+    @param segment_name - set if the client needs to attach to the segment
+*/
+define connect_uri_reply {
+  u32 context;
+  i32 retval;
+  u64 server_rx_fifo;
+  u64 server_tx_fifo;
+  u32 session_index;
+  u32 session_thread_index;
+  u8 session_type;
+  u64 client_event_queue_address;
+  u64 vpp_event_queue_address;
+  u32 segment_size;
+  u8 segment_name_length;
+  u8 segment_name[128];
+};
+
+/** \brief vpp->client, please map an additional shared memory segment
+    @param context - sender context, to match reply w/ request
+    @param segment_name - 
+*/
+define map_another_segment {
+    u32 client_index;
+    u32 context;
+    u32 segment_size;
+    u8 segment_name[128];
+};
+
+/** \brief client->vpp
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+*/
+define map_another_segment_reply {
+    u32 context;
+    i32 retval;
+};
+
+/** \brief vpp->client, accept this session
+    @param context - sender context, to match reply w/ request
+    @param accept_cookie - tells client which bind flavor just occurred
+    @param rx_fifo_address - rx (vpp -> vpp-client) fifo address 
+    @param tx_fifo_address - tx (vpp-client -> vpp) fifo address 
+    @param session_index - index of new session
+    @param session_thread_index - thread index of new session
+    @param vpp_event_queue_address - vpp's event queue address
+    @param session_type - type of session
+    
+*/
+define accept_session {
+  u32 client_index;
+  u32 context;
+  u32 accept_cookie;
+  u64 server_rx_fifo;
+  u64 server_tx_fifo;
+  u32 session_index;
+  u32 session_thread_index;
+  u64 vpp_event_queue_address;
+  u8 session_type;
+};
+
+/** \brief client->vpp, reply to an accept message
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+    @param session_index - session index from accept_session / connect_reply
+    @param session_thread_index - thread index from accept_session /
+                                  connect_reply
+*/
+define accept_session_reply {
+  u32 context;
+  i32 retval;
+  u8 session_type;
+  u8 session_thread_index;
+  u32 session_index;
+};
+
+/** \brief bidirectional disconnect API
+    @param client_index - opaque cookie to identify the sender
+                          client to vpp direction only
+    @param context - sender context, to match reply w/ request
+    @param session_index - cookie #1 from accept_session / connect_reply
+    @param session_thread_index - cookie #2
+*/
+define disconnect_session {
+  u32 client_index;
+  u32 context;
+  u32 session_index;
+  u32 session_thread_index;
+};
+
+/** \brief bidirectional disconnect reply API
+    @param client_index - opaque cookie to identify the sender
+                          client to vpp direction only
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+    @param session_index - session index from accept_session / connect_reply
+    @param session_thread_index - thread index from accept_session /
+                                  connect_reply
+*/
+define disconnect_session_reply {
+  u32 client_index;
+  u32 context;
+  i32 retval;
+  u32 session_index;
+  u32 session_thread_index;
+};
+
+/** \brief vpp->client reset session API
+    @param client_index - opaque cookie to identify the sender
+                          client to vpp direction only
+    @param context - sender context, to match reply w/ request
+    @param session_index - session index from accept_session / connect_reply
+    @param session_thread_index - thread index from accept_session /
+                                  connect_reply
+*/
+define reset_session {
+  u32 client_index;
+  u32 context;
+  u32 session_index;
+  u32 session_thread_index;
+};
+
+/** \brief client->vpp reset session reply
+    @param client_index - opaque cookie to identify the sender
+                          client to vpp direction only
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+    @param session_index - session index from accept_session / connect_reply
+    @param session_thread_index - thread index from accept_session /
+                                  connect_reply
+*/
+define reset_session_reply {
+  u32 client_index;
+  u32 context;
+  i32 retval;
+  u32 session_index;
+  u32 session_thread_index;
+};
+
+/** \brief Bind to an ip:port pair for a given transport protocol
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param vrf - bind namespace
+    @param is_ip4 - flag that is 1 if ip address family is IPv4
+    @param ip - ip address
+    @param port - port 
+    @param proto - protocol 0 - TCP 1 - UDP
+    @param options - socket options, fifo sizes, etc.
+*/
+define bind_sock {
+  u32 client_index;
+  u32 context;
+  u32 vrf;
+  u8 is_ip4;
+  u8 ip[16];
+  u16 port;
+  u8 proto;
+  u64 options[16];
+};
+
+/** \brief Unbind 
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param handle - bind handle obtained from bind reply
+*/
+define unbind_sock {
+  u32 client_index;
+  u32 context;
+  u64 handle;
+};
+
+/** \brief Connect to a remote peer
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param vrf - connection namespace
+    @param is_ip4 - flag that is 1 if ip address family is IPv4
+    @param ip - ip address
+    @param port - port 
+    @param proto - protocol 0 - TCP 1 - UDP
+    @param client_queue_address - client's API queue address. Non-zero when 
+                                  used to perform redirects
+    @param options - socket options, fifo sizes, etc.
+*/
+define connect_sock {
+  u32 client_index;
+  u32 context;
+  u32 vrf;
+  u8 is_ip4;
+  u8 ip[16];
+  u16 port;
+  u8 proto;
+  u64 client_queue_address;
+  u64 options[16];
+};
+
+/** \brief Bind reply
+    @param context - sender context, to match reply w/ request
+    @param handle - bind handle
+    @param retval - return code for the request
+    @param event_queue_address - vpp event queue address or 0 if this 
+                                 connection shouldn't send events
+    @param segment_name_length - length of segment name 
+    @param segment_name - name of segment client needs to attach to
+*/
+define bind_sock_reply {
+  u32 context;
+  u64 handle;
+  i32 retval;
+  u64 server_event_queue_address;
+  u32 segment_size;
+  u8 segment_name_length;
+  u8 segment_name[128];
+};
+
+/** \brief unbind reply
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+*/
+define unbind_sock_reply {
+  u32 context;
+  i32 retval;
+};
+
+/** \brief vpp/server->client, connect reply
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+    @param handle - connection handle
+    @param server_rx_fifo - rx (vpp -> vpp-client) fifo address 
+    @param server_tx_fifo - tx (vpp-client -> vpp) fifo address 
+    @param vpp_event_queue_address - vpp's event queue address
+    @param client_event_queue_address - client's event queue address
+    @param segment_name_length - non-zero if the client needs to attach to 
+                                 the fifo segment
+    @param segment_name - set if the client needs to attach to the segment
+*/
+define connect_sock_reply {
+  u32 context;
+  i32 retval;
+  u64 handle;
+  u64 server_rx_fifo;
+  u64 server_tx_fifo;
+  u64 client_event_queue_address;
+  u64 vpp_event_queue_address;
+  u32 segment_size;
+  u8 segment_name_length;
+  u8 segment_name[128];
+};
+
+/** \brief bidirectional disconnect API
+    @param client_index - opaque cookie to identify the sender
+                          client to vpp direction only
+    @param context - sender context, to match reply w/ request
+    @param handle - session handle obtained through accept/connect
+*/
+define disconnect_sock {
+  u32 client_index;
+  u32 context;
+  u64 handle;
+};
+
+/** \brief bidirectional disconnect reply API
+    @param client_index - opaque cookie to identify the sender
+                          client to vpp direction only
+    @param client_context - sender context, to match reply w/ request
+    @param handle - session handle obtained through accept/connect
+*/
+define disconnect_sock_reply {
+  u32 client_index;
+  u32 context;
+  i32 retval;
+  u64 handle;
+};
+
+/** \brief vpp->client, accept this session
+    @param context - sender context, to match reply w/ request
+    @param accept_cookie - tells client which bind flavor just occurred
+    @param handle - session handle obtained through accept/connect
+    @param rx_fifo_address - rx (vpp -> vpp-client) fifo address 
+    @param tx_fifo_address - tx (vpp-client -> vpp) fifo address 
+    @param vpp_event_queue_address - vpp's event queue address
+*/
+define accept_sock {
+  u32 client_index;
+  u32 context;
+  u32 accept_cookie;
+  u64 handle;
+  u64 server_rx_fifo;
+  u64 server_tx_fifo;
+  u64 vpp_event_queue_address;
+};
+
+/** \brief client->vpp, reply to an accept message
+    @param context - sender context, to match reply w/ request
+    @param retval - return code for the request
+    @param handle - session handle obtained through accept/connect
+*/
+define accept_sock_reply {
+  u32 context;
+  i32 retval;
+  u64 handle;
+};
+
+/** \brief vpp->client reset session API
+    @param client_index - opaque cookie to identify the sender
+                          client to vpp direction only
+    @param context - sender context, to match reply w/ request
+    @param handle - session handle obtained through accept/connect
+*/
+define reset_sock {
+  u32 client_index;
+  u32 context;
+  u64 handle;
+};
+
+/** \brief client->vpp reset session reply
+    @param client_index - opaque cookie to identify the sender
+                          client to vpp direction only
+    @param context - sender context, to match reply w/ request
+    @param handle - session handle obtained through accept/connect
+*/
+define reset_sock_reply {
+  u32 client_index;
+  u32 context;
+  i32 retval;
+  u64 handle;
+};
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
\ No newline at end of file
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
new file mode 100644
index 00000000..539da613
--- /dev/null
+++ b/src/vnet/session/session.c
@@ -0,0 +1,1286 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Session and session manager
+ */
+
+#include <vnet/session/session.h>
+#include <vlibmemory/api.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/session/application.h>
+
+/**
+ * Per-type vector of transport protocol virtual function tables
+ */
+static transport_proto_vft_t *tp_vfts;
+
+session_manager_main_t session_manager_main;
+
+/*
+ * Session lookup key; (src-ip, dst-ip, src-port, dst-port, session-type)
+ * Value: (owner thread index << 32 | session_index);
+ */
+static void
+stream_session_table_add_for_tc (u8 sst, transport_connection_t * tc,
+				 u64 value)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  session_kv4_t kv4;
+  session_kv6_t kv6;
+
+  switch (sst)
+    {
+    case SESSION_TYPE_IP4_UDP:
+    case SESSION_TYPE_IP4_TCP:
+      make_v4_ss_kv_from_tc (&kv4, tc);
+      kv4.value = value;
+      clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4, 1 /* is_add */ );
+      break;
+    case SESSION_TYPE_IP6_UDP:
+    case SESSION_TYPE_IP6_TCP:
+      make_v6_ss_kv_from_tc (&kv6, tc);
+      kv6.value = value;
+      clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6, 1 /* is_add */ );
+      break;
+    default:
+      clib_warning ("Session type not supported");
+      ASSERT (0);
+    }
+}
+
+void
+stream_session_table_add (session_manager_main_t * smm, stream_session_t * s,
+			  u64 value)
+{
+  transport_connection_t *tc;
+
+  tc = tp_vfts[s->session_type].get_connection (s->connection_index,
+						s->thread_index);
+  stream_session_table_add_for_tc (s->session_type, tc, value);
+}
+
+static void
+stream_session_half_open_table_add (u8 sst, transport_connection_t * tc,
+				    u64 value)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  session_kv4_t kv4;
+  session_kv6_t kv6;
+
+  switch (sst)
+    {
+    case SESSION_TYPE_IP4_UDP:
+    case SESSION_TYPE_IP4_TCP:
+      make_v4_ss_kv_from_tc (&kv4, tc);
+      kv4.value = value;
+      clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4,
+				1 /* is_add */ );
+      break;
+    case SESSION_TYPE_IP6_UDP:
+    case SESSION_TYPE_IP6_TCP:
+      make_v6_ss_kv_from_tc (&kv6, tc);
+      kv6.value = value;
+      clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6,
+				1 /* is_add */ );
+      break;
+    default:
+      clib_warning ("Session type not supported");
+      ASSERT (0);
+    }
+}
+
+static int
+stream_session_table_del_for_tc (session_manager_main_t * smm, u8 sst,
+				 transport_connection_t * tc)
+{
+  session_kv4_t kv4;
+  session_kv6_t kv6;
+
+  switch (sst)
+    {
+    case SESSION_TYPE_IP4_UDP:
+    case SESSION_TYPE_IP4_TCP:
+      make_v4_ss_kv_from_tc (&kv4, tc);
+      return clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4,
+				       0 /* is_add */ );
+      break;
+    case SESSION_TYPE_IP6_UDP:
+    case SESSION_TYPE_IP6_TCP:
+      make_v6_ss_kv_from_tc (&kv6, tc);
+      return clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6,
+				       0 /* is_add */ );
+      break;
+    default:
+      clib_warning ("Session type not supported");
+      ASSERT (0);
+    }
+
+  return 0;
+}
+
+static int
+stream_session_table_del (session_manager_main_t * smm, stream_session_t * s)
+{
+  transport_connection_t *ts;
+
+  ts = tp_vfts[s->session_type].get_connection (s->connection_index,
+						s->thread_index);
+  return stream_session_table_del_for_tc (smm, s->session_type, ts);
+}
+
+static void
+stream_session_half_open_table_del (session_manager_main_t * smm, u8 sst,
+				    transport_connection_t * tc)
+{
+  session_kv4_t kv4;
+  session_kv6_t kv6;
+
+  switch (sst)
+    {
+    case SESSION_TYPE_IP4_UDP:
+    case SESSION_TYPE_IP4_TCP:
+      make_v4_ss_kv_from_tc (&kv4, tc);
+      clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4,
+				0 /* is_add */ );
+      break;
+    case SESSION_TYPE_IP6_UDP:
+    case SESSION_TYPE_IP6_TCP:
+      make_v6_ss_kv_from_tc (&kv6, tc);
+      clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6,
+				0 /* is_add */ );
+      break;
+    default:
+      clib_warning ("Session type not supported");
+      ASSERT (0);
+    }
+}
+
+stream_session_t *
+stream_session_lookup_listener4 (ip4_address_t * lcl, u16 lcl_port, u8 proto)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  session_kv4_t kv4;
+  int rv;
+
+  make_v4_listener_kv (&kv4, lcl, lcl_port, proto);
+  rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4);
+  if (rv == 0)
+    return pool_elt_at_index (smm->listen_sessions[proto], (u32) kv4.value);
+
+  /* Zero out the lcl ip */
+  kv4.key[0] = 0;
+  rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4);
+  if (rv == 0)
+    return pool_elt_at_index (smm->listen_sessions[proto], kv4.value);
+
+  return 0;
+}
+
+/** Looks up a session based on the 5-tuple passed as argument.
+ *
+ * First it tries to find an established session, if this fails, it tries
+ * finding a listener session if this fails, it tries a lookup with a
+ * wildcarded local source (listener bound to all interfaces)
+ */
+stream_session_t *
+stream_session_lookup4 (ip4_address_t * lcl, ip4_address_t * rmt,
+			u16 lcl_port, u16 rmt_port, u8 proto,
+			u32 my_thread_index)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  session_kv4_t kv4;
+  int rv;
+
+  /* Lookup session amongst established ones */
+  make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto);
+  rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4);
+  if (rv == 0)
+    return stream_session_get_tsi (kv4.value, my_thread_index);
+
+  /* If nothing is found, check if any listener is available */
+  return stream_session_lookup_listener4 (lcl, lcl_port, proto);
+}
+
+stream_session_t *
+stream_session_lookup_listener6 (ip6_address_t * lcl, u16 lcl_port, u8 proto)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  session_kv6_t kv6;
+  int rv;
+
+  make_v6_listener_kv (&kv6, lcl, lcl_port, proto);
+  rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6);
+  if (rv == 0)
+    return pool_elt_at_index (smm->listen_sessions[proto], kv6.value);
+
+  /* Zero out the lcl ip */
+  kv6.key[0] = kv6.key[1] = 0;
+  rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6);
+  if (rv == 0)
+    return pool_elt_at_index (smm->listen_sessions[proto], kv6.value);
+
+  return 0;
+}
+
+/* Looks up a session based on the 5-tuple passed as argument.
+ * First it tries to find an established session, if this fails, it tries
+ * finding a listener session if this fails, it tries a lookup with a
+ * wildcarded local source (listener bound to all interfaces) */
+stream_session_t *
+stream_session_lookup6 (ip6_address_t * lcl, ip6_address_t * rmt,
+			u16 lcl_port, u16 rmt_port, u8 proto,
+			u32 my_thread_index)
+{
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+  session_kv6_t kv6;
+  int rv;
+
+  make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto);
+  rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6);
+  if (rv == 0)
+    return stream_session_get_tsi (kv6.value, my_thread_index);
+
+  /* If nothing is found, check if any listener is available */
+  return stream_session_lookup_listener6 (lcl, lcl_port, proto);
+}
+
+stream_session_t *
+stream_session_lookup_listener (ip46_address_t * lcl, u16 lcl_port, u8 proto)
+{
+  switch (proto)
+    {
+    case SESSION_TYPE_IP4_UDP:
+    case SESSION_TYPE_IP4_TCP:
+      return stream_session_lookup_listener4 (&lcl->ip4, lcl_port, proto);
+      break;
+    case SESSION_TYPE_IP6_UDP:
+    case SESSION_TYPE_IP6_TCP:
+      return stream_session_lookup_listener6 (&lcl->ip6, lcl_port, proto);
+      break;
+    }
+  return 0;
+}
+
+static u64
+stream_session_half_open_lookup (session_manager_main_t * smm,
+				 ip46_address_t * lcl, ip46_address_t * rmt,
+				 u16 lcl_port, u16 rmt_port, u8 proto)
+{
+  session_kv4_t kv4;
+  session_kv6_t kv6;
+  int rv;
+
+  switch (proto)
+    {
+    case SESSION_TYPE_IP4_UDP:
+    case SESSION_TYPE_IP4_TCP:
+      make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto);
+      rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4);
+
+      if (rv == 0)
+	return kv4.value;
+
+      return (u64) ~ 0;
+      break;
+    case SESSION_TYPE_IP6_UDP:
+    case SESSION_TYPE_IP6_TCP:
+      make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto);
+      rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6);
+
+      if (rv == 0)
+	return kv6.value;
+
+      return (u64) ~ 0;
+      break;
+    }
+  return 0;
+}
+
+transport_connection_t *
+stream_session_lookup_transport4 (session_manager_main_t * smm,
+				  ip4_address_t * lcl, ip4_address_t * rmt,
+				  u16 lcl_port, u16 rmt_port, u8 proto,
+				  u32 my_thread_index)
+{
+  session_kv4_t kv4;
+  stream_session_t *s;
+  int rv;
+
+  /* Lookup session amongst established ones */
+  make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto);
+  rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4);
+  if (rv == 0)
+    {
+      s = stream_session_get_tsi (kv4.value, my_thread_index);
+
+      return tp_vfts[s->session_type].get_connection (s->connection_index,
+						      my_thread_index);
+    }
+
+  /* If nothing is found, check if any listener is available */
+  s = stream_session_lookup_listener4 (lcl, lcl_port, proto);
+  if (s)
+    return tp_vfts[s->session_type].get_listener (s->connection_index);
+
+  /* Finally, try half-open connections */
+  rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4);
+  if (rv == 0)
+    return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF);
+
+  return 0;
+}
+
+transport_connection_t *
+stream_session_lookup_transport6 (session_manager_main_t * smm,
+				  ip6_address_t * lcl, ip6_address_t * rmt,
+				  u16 lcl_port, u16 rmt_port, u8 proto,
+				  u32 my_thread_index)
+{
+  stream_session_t *s;
+  session_kv6_t kv6;
+  int rv;
+
+  make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto);
+  rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6);
+  if (rv == 0)
+    {
+      s = stream_session_get_tsi (kv6.value, my_thread_index);
+
+      return tp_vfts[s->session_type].get_connection (s->connection_index,
+						      my_thread_index);
+    }
+
+  /* If nothing is found, check if any listener is available */
+  s = stream_session_lookup_listener6 (lcl, lcl_port, proto);
+  if (s)
+    return tp_vfts[s->session_type].get_listener (s->connection_index);
+
+  /* Finally, try half-open connections */
+  rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6);
+  if (rv == 0)
+    return tp_vfts[s->session_type].get_half_open (kv6.value & 0xFFFFFFFF);
+
+  return 0;
+}
+
+/**
+ * Allocate vpp event queue (once) per worker thread
+ */
+void
+vpp_session_event_queue_allocate (session_manager_main_t * smm,
+				  u32 thread_index)
+{
+  api_main_t *am = &api_main;
+  void *oldheap;
+
+  if (smm->vpp_event_queues[thread_index] == 0)
+    {
+      /* Allocate event fifo in the /vpe-api shared-memory segment */
+      oldheap = svm_push_data_heap (am->vlib_rp);
+
+      smm->vpp_event_queues[thread_index] =
+	unix_shared_memory_queue_init (2048 /* nels $$$$ config */ ,
+				       sizeof (session_fifo_event_t),
+				       0 /* consumer pid */ ,
+				       0
+				       /* (do not) send signal when queue non-empty */
+	);
+
+      svm_pop_heap (oldheap);
+    }
+}
+
+void
+session_manager_get_segment_info (u32 index, u8 ** name, u32 * size)
+{
+  svm_fifo_segment_private_t *s;
+  s = svm_fifo_get_segment (index);
+  *name = s->h->segment_name;
+  *size = s->ssvm.ssvm_size;
+}
+
+always_inline int
+session_manager_add_segment_i (session_manager_main_t * smm,
+			       session_manager_t * sm,
+			       u32 segment_size, u8 * segment_name)
+{
+  svm_fifo_segment_create_args_t _ca, *ca = &_ca;
+  int rv;
+
+  memset (ca, 0, sizeof (*ca));
+
+  ca->segment_name = (char *) segment_name;
+  ca->segment_size = segment_size;
+
+  rv = svm_fifo_segment_create (ca);
+  if (rv)
+    {
+      clib_warning ("svm_fifo_segment_create ('%s', %d) failed",
+		    ca->segment_name, ca->segment_size);
+      vec_free (segment_name);
+      return -1;
+    }
+
+  vec_add1 (sm->segment_indices, ca->new_segment_index);
+
+  return 0;
+}
+
+static int
+session_manager_add_segment (session_manager_main_t * smm,
+			     session_manager_t * sm)
+{
+  u8 *segment_name;
+  svm_fifo_segment_create_args_t _ca, *ca = &_ca;
+  u32 add_segment_size;
+  u32 default_segment_size = 128 << 10;
+
+  memset (ca, 0, sizeof (*ca));
+  segment_name = format (0, "%d-%d%c", getpid (),
+			 smm->unique_segment_name_counter++, 0);
+  add_segment_size =
+    sm->add_segment_size ? sm->add_segment_size : default_segment_size;
+
+  return session_manager_add_segment_i (smm, sm, add_segment_size,
+					segment_name);
+}
+
+int
+session_manager_add_first_segment (session_manager_main_t * smm,
+				   session_manager_t * sm, u32 segment_size,
+				   u8 ** segment_name)
+{
+  svm_fifo_segment_create_args_t _ca, *ca = &_ca;
+  memset (ca, 0, sizeof (*ca));
+  *segment_name = format (0, "%d-%d%c", getpid (),
+			  smm->unique_segment_name_counter++, 0);
+  return session_manager_add_segment_i (smm, sm, segment_size, *segment_name);
+}
+
+void
+session_manager_del (session_manager_main_t * smm, session_manager_t * sm)
+{
+  u32 *deleted_sessions = 0;
+  u32 *deleted_thread_indices = 0;
+  int i, j;
+
+  /* Across all fifo segments used by the server */
+  for (j = 0; j < vec_len (sm->segment_indices); j++)
+    {
+      svm_fifo_segment_private_t *fifo_segment;
+      svm_fifo_t **fifos;
+      /* Vector of fifos allocated in the segment */
+      fifo_segment = svm_fifo_get_segment (sm->segment_indices[j]);
+      fifos = (svm_fifo_t **) fifo_segment->h->fifos;
+
+      /*
+       * Remove any residual sessions from the session lookup table
+       * Don't bother deleting the individual fifos, we're going to
+       * throw away the fifo segment in a minute.
+       */
+      for (i = 0; i < vec_len (fifos); i++)
+	{
+	  svm_fifo_t *fifo;
+	  u32 session_index, thread_index;
+	  stream_session_t *session;
+
+	  fifo = fifos[i];
+	  session_index = fifo->server_session_index;
+	  thread_index = fifo->server_thread_index;
+
+	  session = pool_elt_at_index (smm->sessions[thread_index],
+				       session_index);
+
+	  /* Add to the deleted_sessions vector (once!) */
+	  if (!session->is_deleted)
+	    {
+	      session->is_deleted = 1;
+	      vec_add1 (deleted_sessions,
+			session - smm->sessions[thread_index]);
+	      vec_add1 (deleted_thread_indices, thread_index);
+	    }
+	}
+
+      for (i = 0; i < vec_len (deleted_sessions); i++)
+	{
+	  stream_session_t *session;
+
+	  session =
+	    pool_elt_at_index (smm->sessions[deleted_thread_indices[i]],
+			       deleted_sessions[i]);
+
+	  /* Instead of directly removing the session call disconnect */
+	  stream_session_disconnect (session);
+
+	  /*
+	     stream_session_table_del (smm, session);
+	     pool_put(smm->sessions[deleted_thread_indices[i]], session);
+	   */
+	}
+
+      vec_reset_length (deleted_sessions);
+      vec_reset_length (deleted_thread_indices);
+
+      /* Instead of removing the segment, test when removing the session if
+       * the segment can be removed
+       */
+      /* svm_fifo_segment_delete (fifo_segment); */
+    }
+
+  vec_free (deleted_sessions);
+  vec_free (deleted_thread_indices);
+}
+
+int
+session_manager_allocate_session_fifos (session_manager_main_t * smm,
+					session_manager_t * sm,
+					svm_fifo_t ** server_rx_fifo,
+					svm_fifo_t ** server_tx_fifo,
+					u32 * fifo_segment_index,
+					u8 * added_a_segment)
+{
+  svm_fifo_segment_private_t *fifo_segment;
+  u32 fifo_size, default_fifo_size = 8192 /* TODO config */ ;
+  int i;
+
+  *added_a_segment = 0;
+
+  /* Allocate svm fifos */
+  ASSERT (vec_len (sm->segment_indices));
+
+again:
+  for (i = 0; i < vec_len (sm->segment_indices); i++)
+    {
+      *fifo_segment_index = sm->segment_indices[i];
+      fifo_segment = svm_fifo_get_segment (*fifo_segment_index);
+
+      fifo_size = sm->rx_fifo_size;
+      fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size;
+      *server_rx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size);
+
+      fifo_size = sm->tx_fifo_size;
+      fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size;
+      *server_tx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size);
+
+      if (*server_rx_fifo == 0)
+	{
+	  /* This would be very odd, but handle it... */
+	  if (*server_tx_fifo != 0)
+	    {
+	      svm_fifo_segment_free_fifo (fifo_segment, *server_tx_fifo);
+	      *server_tx_fifo = 0;
+	    }
+	  continue;
+	}
+      if (*server_tx_fifo == 0)
+	{
+	  if (*server_rx_fifo != 0)
+	    {
+	      svm_fifo_segment_free_fifo (fifo_segment, *server_rx_fifo);
+	      *server_rx_fifo = 0;
+	    }
+	  continue;
+	}
+      break;
+    }
+
+  /* See if we're supposed to create another segment */
+  if (*server_rx_fifo == 0)
+    {
+      if (sm->add_segment)
+	{
+	  if (*added_a_segment)
+	    {
+	      clib_warning ("added a segment, still cant allocate a fifo");
+	      return SESSION_ERROR_NEW_SEG_NO_SPACE;
+	    }
+
+	  if (session_manager_add_segment (smm, sm))
+	    return VNET_API_ERROR_URI_FIFO_CREATE_FAILED;
+
+	  *added_a_segment = 1;
+	  goto again;
+	}
+      else
+	return SESSION_ERROR_NO_SPACE;
+    }
+  return 0;
+}
+
+int
+stream_session_create_i (session_manager_main_t * smm, application_t * app,
+			 transport_connection_t * tc,
+			 stream_session_t ** ret_s)
+{
+  int rv;
+  svm_fifo_t *server_rx_fifo = 0, *server_tx_fifo = 0;
+  u32 fifo_segment_index;
+  u32 pool_index, seg_size;
+  stream_session_t *s;
+  u64 value;
+  u32 thread_index = tc->thread_index;
+  session_manager_t *sm;
+  u8 segment_added;
+  u8 *seg_name;
+
+  sm = session_manager_get (app->session_manager_index);
+
+  /* Check the API queue */
+  if (app->mode == APP_SERVER && application_api_queue_is_full (app))
+    return SESSION_ERROR_API_QUEUE_FULL;
+
+  if ((rv = session_manager_allocate_session_fifos (smm, sm, &server_rx_fifo,
+						    &server_tx_fifo,
+						    &fifo_segment_index,
+						    &segment_added)))
+    return rv;
+
+  if (segment_added && app->mode == APP_SERVER)
+    {
+      /* Send an API message to the external server, to map new segment */
+      ASSERT (app->cb_fns.add_segment_callback);
+
+      session_manager_get_segment_info (fifo_segment_index, &seg_name,
+					&seg_size);
+      if (app->cb_fns.add_segment_callback (app->api_client_index, seg_name,
+					    seg_size))
+	return VNET_API_ERROR_URI_FIFO_CREATE_FAILED;
+    }
+
+  /* Create the session */
+  pool_get (smm->sessions[thread_index], s);
+  memset (s, 0, sizeof (*s));
+
+  /* Initialize backpointers */
+  pool_index = s - smm->sessions[thread_index];
+  server_rx_fifo->server_session_index = pool_index;
+  server_rx_fifo->server_thread_index = thread_index;
+
+  server_tx_fifo->server_session_index = pool_index;
+  server_tx_fifo->server_thread_index = thread_index;
+
+  s->server_rx_fifo = server_rx_fifo;
+  s->server_tx_fifo = server_tx_fifo;
+
+  /* Initialize state machine, such as it is... */
+  s->session_type = app->session_type;
+  s->session_state = SESSION_STATE_CONNECTING;
+  s->app_index = application_get_index (app);
+  s->server_segment_index = fifo_segment_index;
+  s->thread_index = thread_index;
+  s->session_index = pool_index;
+
+  /* Attach transport to session */
+  s->connection_index = tc->c_index;
+
+  /* Attach session to transport */
+  tc->s_index = s->session_index;
+
+  /* Add to the main lookup table */
+  value = (((u64) thread_index) << 32) | (u64) s->session_index;
+  stream_session_table_add_for_tc (app->session_type, tc, value);
+
+  *ret_s = s;
+
+  return 0;
+}
+
+/*
+ * Enqueue data for delivery to session peer. Does not notify peer of enqueue
+ * event but on request can queue notification events for later delivery by
+ * calling stream_server_flush_enqueue_events().
+ *
+ * @param tc Transport connection which is to be enqueued data
+ * @param data Data to be enqueued
+ * @param len Length of data to be enqueued
+ * @param queue_event Flag to indicate if peer is to be notified or if event
+ *                    is to be queued. The former is useful when more data is
+ *                    enqueued and only one event is to be generated.
+ * @return Number of bytes enqueued or a negative value if enqueueing failed.
+ */
+int
+stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len,
+			     u8 queue_event)
+{
+  stream_session_t *s;
+  int enqueued;
+
+  s = stream_session_get (tc->s_index, tc->thread_index);
+
+  /* Make sure there's enough space left. We might've filled the pipes */
+  if (PREDICT_FALSE (len > svm_fifo_max_enqueue (s->server_rx_fifo)))
+    return -1;
+
+  enqueued = svm_fifo_enqueue_nowait (s->server_rx_fifo, s->pid, len, data);
+
+  if (queue_event)
+    {
+      /* Queue RX event on this fifo. Eventually these will need to be flushed
+       * by calling stream_server_flush_enqueue_events () */
+      session_manager_main_t *smm = vnet_get_session_manager_main ();
+      u32 thread_index = s->thread_index;
+      u32 my_enqueue_epoch = smm->current_enqueue_epoch[thread_index];
+
+      if (s->enqueue_epoch != my_enqueue_epoch)
+	{
+	  s->enqueue_epoch = my_enqueue_epoch;
+	  vec_add1 (smm->session_indices_to_enqueue_by_thread[thread_index],
+		    s - smm->sessions[thread_index]);
+	}
+    }
+
+  return enqueued;
+}
+
+/** Check if we have space in rx fifo to push more bytes */
+u8
+stream_session_no_space (transport_connection_t * tc, u32 thread_index,
+			 u16 data_len)
+{
+  stream_session_t *s = stream_session_get (tc->c_index, thread_index);
+
+  if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY))
+    return 1;
+
+  if (data_len > svm_fifo_max_enqueue (s->server_rx_fifo))
+    return 1;
+
+  return 0;
+}
+
+u32
+stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
+			   u32 offset, u32 max_bytes)
+{
+  stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+  return svm_fifo_peek (s->server_tx_fifo, s->pid, offset, max_bytes, buffer);
+}
+
+u32
+stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes)
+{
+  stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+  return svm_fifo_dequeue_drop (s->server_tx_fifo, s->pid, max_bytes);
+}
+
+/**
+ * Notify session peer that new data has been enqueued.
+ *
+ * @param s Stream session for which the event is to be generated.
+ * @param block Flag to indicate if call should block if event queue is full.
+ *
+ * @return 0 on succes or negative number if failed to send notification.
+ */
+static int
+stream_session_enqueue_notify (stream_session_t * s, u8 block)
+{
+  application_t *app;
+  session_fifo_event_t evt;
+  unix_shared_memory_queue_t *q;
+  static u32 serial_number;
+
+  if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED))
+    return 0;
+
+  /* Get session's server */
+  app = application_get (s->app_index);
+
+  /* Fabricate event */
+  evt.fifo = s->server_rx_fifo;
+  evt.event_type = FIFO_EVENT_SERVER_RX;
+  evt.event_id = serial_number++;
+  evt.enqueue_length = svm_fifo_max_dequeue (s->server_rx_fifo);
+
+  /* Add event to server's event queue */
+  q = app->event_queue;
+
+  /* Based on request block (or not) for lack of space */
+  if (block || PREDICT_TRUE (q->cursize < q->maxsize))
+    unix_shared_memory_queue_add (app->event_queue, (u8 *) & evt,
+				  0 /* do wait for mutex */ );
+  else
+    return -1;
+
+  if (1)
+    {
+      ELOG_TYPE_DECLARE (e) =
+      {
+      .format = "evt-enqueue: id %d length %d",.format_args = "i4i4",};
+      struct
+      {
+	u32 data[2];
+      } *ed;
+      ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+      ed->data[0] = evt.event_id;
+      ed->data[1] = evt.enqueue_length;
+    }
+
+  return 0;
+}
+
+/**
+ * Flushes queue of sessions that are to be notified of new data
+ * enqueued events.
+ *
+ * @param thread_index Thread index for which the flush is to be performed.
+ * @return 0 on success or a positive number indicating the number of
+ *         failures due to API queue being full.
+ */
+int
+session_manager_flush_enqueue_events (u32 thread_index)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  u32 *session_indices_to_enqueue;
+  int i, errors = 0;
+
+  session_indices_to_enqueue =
+    smm->session_indices_to_enqueue_by_thread[thread_index];
+
+  for (i = 0; i < vec_len (session_indices_to_enqueue); i++)
+    {
+      stream_session_t *s0;
+
+      /* Get session */
+      s0 = stream_session_get (session_indices_to_enqueue[i], thread_index);
+      if (stream_session_enqueue_notify (s0, 0 /* don't block */ ))
+	{
+	  errors++;
+	}
+    }
+
+  vec_reset_length (session_indices_to_enqueue);
+
+  smm->session_indices_to_enqueue_by_thread[thread_index] =
+    session_indices_to_enqueue;
+
+  /* Increment enqueue epoch for next round */
+  smm->current_enqueue_epoch[thread_index]++;
+
+  return errors;
+}
+
+/*
+ * Start listening on server's ip/port pair for requested transport.
+ *
+ * Creates a 'dummy' stream session with state LISTENING to be used in session
+ * lookups, prior to establishing connection. Requests transport to build
+ * it's own specific listening connection.
+ */
+int
+stream_session_start_listen (u32 server_index, ip46_address_t * ip, u16 port)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  stream_session_t *s;
+  transport_connection_t *tc;
+  application_t *srv;
+  u32 tci;
+
+  srv = application_get (server_index);
+
+  pool_get (smm->listen_sessions[srv->session_type], s);
+  memset (s, 0, sizeof (*s));
+
+  s->session_type = srv->session_type;
+  s->session_state = SESSION_STATE_LISTENING;
+  s->session_index = s - smm->listen_sessions[srv->session_type];
+  s->app_index = srv->index;
+
+  /* Transport bind/listen  */
+  tci = tp_vfts[srv->session_type].bind (smm->vlib_main, s->session_index, ip,
+					 port);
+
+  /* Attach transport to session */
+  s->connection_index = tci;
+  tc = tp_vfts[srv->session_type].get_listener (tci);
+
+  srv->session_index = s->session_index;
+
+  /* Add to the main lookup table */
+  stream_session_table_add_for_tc (s->session_type, tc, s->session_index);
+
+  return 0;
+}
+
+void
+stream_session_stop_listen (u32 server_index)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  stream_session_t *listener;
+  transport_connection_t *tc;
+  application_t *srv;
+
+  srv = application_get (server_index);
+  listener = pool_elt_at_index (smm->listen_sessions[srv->session_type],
+				srv->session_index);
+
+  tc = tp_vfts[srv->session_type].get_listener (listener->connection_index);
+  stream_session_table_del_for_tc (smm, listener->session_type, tc);
+
+  tp_vfts[srv->session_type].unbind (smm->vlib_main,
+				     listener->connection_index);
+  pool_put (smm->listen_sessions[srv->session_type], listener);
+}
+
+int
+connect_server_add_segment_cb (application_t * ss, char *segment_name,
+			       u32 segment_size)
+{
+  /* Does exactly nothing, but die */
+  ASSERT (0);
+  return 0;
+}
+
+void
+connects_session_manager_init (session_manager_main_t * smm, u8 session_type)
+{
+  session_manager_t *sm;
+  u32 connect_fifo_size = 8 << 10;	/* Config? */
+  u32 default_segment_size = 1 << 20;
+
+  pool_get (smm->session_managers, sm);
+  memset (sm, 0, sizeof (*sm));
+
+  sm->add_segment_size = default_segment_size;
+  sm->rx_fifo_size = connect_fifo_size;
+  sm->tx_fifo_size = connect_fifo_size;
+  sm->add_segment = 1;
+
+  session_manager_add_segment (smm, sm);
+  smm->connect_manager_index[session_type] = sm - smm->session_managers;
+}
+
+void
+stream_session_connect_notify (transport_connection_t * tc, u8 sst,
+			       u8 is_fail)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  application_t *app;
+  stream_session_t *new_s = 0;
+  u64 value;
+
+  value = stream_session_half_open_lookup (smm, &tc->lcl_ip, &tc->rmt_ip,
+					   tc->lcl_port, tc->rmt_port,
+					   tc->proto);
+  if (value == HALF_OPEN_LOOKUP_INVALID_VALUE)
+    {
+      clib_warning ("This can't be good!");
+      return;
+    }
+
+  app = application_get (value >> 32);
+
+  if (!is_fail)
+    {
+      /* Create new session (server segments are allocated if needed) */
+      if (stream_session_create_i (smm, app, tc, &new_s))
+	return;
+
+      app->session_index = stream_session_get_index (new_s);
+      app->thread_index = new_s->thread_index;
+
+      /* Allocate vpp event queue for this thread if needed */
+      vpp_session_event_queue_allocate (smm, tc->thread_index);
+    }
+
+  /* Notify client */
+  app->cb_fns.session_connected_callback (app->api_client_index, new_s,
+					  is_fail);
+
+  /* Cleanup session lookup */
+  stream_session_half_open_table_del (smm, sst, tc);
+}
+
+void
+stream_session_accept_notify (transport_connection_t * tc)
+{
+  application_t *server;
+  stream_session_t *s;
+
+  s = stream_session_get (tc->s_index, tc->thread_index);
+  server = application_get (s->app_index);
+  server->cb_fns.session_accept_callback (s);
+}
+
+/**
+ * Notification from transport that connection is being closed.
+ *
+ * A disconnect is sent to application but state is not removed. Once
+ * disconnect is acknowledged by application, session disconnect is called.
+ * Ultimately this leads to close being called on transport (passive close).
+ */
+void
+stream_session_disconnect_notify (transport_connection_t * tc)
+{
+  application_t *server;
+  stream_session_t *s;
+
+  s = stream_session_get (tc->s_index, tc->thread_index);
+  server = application_get (s->app_index);
+  server->cb_fns.session_disconnect_callback (s);
+}
+
+/**
+ * Cleans up session and associated app if needed.
+ */
+void
+stream_session_delete (stream_session_t * s)
+{
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+  svm_fifo_segment_private_t *fifo_segment;
+  application_t *app;
+  int rv;
+
+  /* delete from the main lookup table */
+  rv = stream_session_table_del (smm, s);
+
+  if (rv)
+    clib_warning ("hash delete error, rv %d", rv);
+
+  /* Cleanup fifo segments */
+  fifo_segment = svm_fifo_get_segment (s->server_segment_index);
+  svm_fifo_segment_free_fifo (fifo_segment, s->server_rx_fifo);
+  svm_fifo_segment_free_fifo (fifo_segment, s->server_tx_fifo);
+
+  /* Cleanup app if client */
+  app = application_get (s->app_index);
+  if (app->mode == APP_CLIENT)
+    {
+      application_del (app);
+    }
+  else if (app->mode == APP_SERVER)
+    {
+      session_manager_t *sm;
+      svm_fifo_segment_private_t *fifo_segment;
+      svm_fifo_t **fifos;
+      u32 fifo_index;
+
+      sm = session_manager_get (app->session_manager_index);
+
+      /* Delete fifo */
+      fifo_segment = svm_fifo_get_segment (s->server_segment_index);
+      fifos = (svm_fifo_t **) fifo_segment->h->fifos;
+
+      fifo_index = svm_fifo_segment_index (fifo_segment);
+
+      /* Remove segment only if it holds no fifos and not the first */
+      if (sm->segment_indices[0] != fifo_index && vec_len (fifos) == 0)
+	svm_fifo_segment_delete (fifo_segment);
+    }
+
+  pool_put (smm->sessions[s->thread_index], s);
+}
+
+/**
+ * Notification from transport that connection is being deleted
+ *
+ * This should be called only on previously fully established sessions. For
+ * instance failed connects should call stream_session_connect_notify and
+ * indicate that the connect has failed.
+ */
+void
+stream_session_delete_notify (transport_connection_t * tc)
+{
+  stream_session_t *s;
+
+  s = stream_session_get_if_valid (tc->s_index, tc->thread_index);
+  if (!s)
+    {
+      clib_warning ("Surprised!");
+      return;
+    }
+  stream_session_delete (s);
+}
+
+/**
+ * Notify application that connection has been reset.
+ */
+void
+stream_session_reset_notify (transport_connection_t * tc)
+{
+  stream_session_t *s;
+  application_t *app;
+  s = stream_session_get (tc->s_index, tc->thread_index);
+
+  app = application_get (s->app_index);
+  app->cb_fns.session_reset_callback (s);
+}
+
+/**
+ * Accept a stream session. Optionally ping the server by callback.
+ */
+int
+stream_session_accept (transport_connection_t * tc, u32 listener_index,
+		       u8 sst, u8 notify)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  application_t *server;
+  stream_session_t *s, *listener;
+
+  int rv;
+
+  /* Find the server */
+  listener = pool_elt_at_index (smm->listen_sessions[sst], listener_index);
+  server = application_get (listener->app_index);
+
+  if ((rv = stream_session_create_i (smm, server, tc, &s)))
+    return rv;
+
+  /* Allocate vpp event queue for this thread if needed */
+  vpp_session_event_queue_allocate (smm, tc->thread_index);
+
+  /* Shoulder-tap the server */
+  if (notify)
+    {
+      server->cb_fns.session_accept_callback (s);
+    }
+
+  return 0;
+}
+
+void
+stream_session_open (u8 sst, ip46_address_t * addr, u16 port_host_byte_order,
+		     u32 app_index)
+{
+  transport_connection_t *tc;
+  u32 tci;
+  u64 value;
+
+  /* Ask transport to open connection */
+  tci = tp_vfts[sst].open (addr, port_host_byte_order);
+
+  /* Get transport connection */
+  tc = tp_vfts[sst].get_half_open (tci);
+
+  /* Store api_client_index and transport connection index */
+  value = (((u64) app_index) << 32) | (u64) tc->c_index;
+
+  /* Add to the half-open lookup table */
+  stream_session_half_open_table_add (sst, tc, value);
+}
+
+/**
+ * Disconnect session and propagate to transport. This should eventually
+ * result in a delete notification that allows us to cleanup session state.
+ * Called for both active/passive disconnects.
+ */
+void
+stream_session_disconnect (stream_session_t * s)
+{
+  tp_vfts[s->session_type].close (s->connection_index, s->thread_index);
+  s->session_state = SESSION_STATE_CLOSED;
+}
+
+/**
+ * Cleanup transport and session state.
+ */
+void
+stream_session_cleanup (stream_session_t * s)
+{
+  tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index);
+  stream_session_delete (s);
+}
+
+void
+session_register_transport (u8 type, const transport_proto_vft_t * vft)
+{
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+
+  vec_validate (tp_vfts, type);
+  tp_vfts[type] = *vft;
+
+  /* If an offset function is provided, then peek instead of dequeue */
+  smm->session_rx_fns[type] =
+    (vft->rx_fifo_offset) ? session_fifo_rx_peek : session_fifo_rx_dequeue;
+}
+
+transport_proto_vft_t *
+session_get_transport_vft (u8 type)
+{
+  if (type >= vec_len (tp_vfts))
+    return 0;
+  return &tp_vfts[type];
+}
+
+static clib_error_t *
+session_manager_main_init (vlib_main_t * vm)
+{
+  u32 num_threads;
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  session_manager_main_t *smm = &session_manager_main;
+  int i;
+
+  smm->vlib_main = vm;
+  smm->vnet_main = vnet_get_main ();
+
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+
+  if (num_threads < 1)
+    return clib_error_return (0, "n_thread_stacks not set");
+
+  /* $$$ config parameters */
+  svm_fifo_segment_init (0x200000000ULL /* first segment base VA */ ,
+			 20 /* timeout in seconds */ );
+
+  /* configure per-thread ** vectors */
+  vec_validate (smm->sessions, num_threads - 1);
+  vec_validate (smm->session_indices_to_enqueue_by_thread, num_threads - 1);
+  vec_validate (smm->tx_buffers, num_threads - 1);
+  vec_validate (smm->fifo_events, num_threads - 1);
+  vec_validate (smm->evts_partially_read, num_threads - 1);
+  vec_validate (smm->current_enqueue_epoch, num_threads - 1);
+  vec_validate (smm->vpp_event_queues, num_threads - 1);
+
+  /* $$$$ preallocate hack config parameter */
+  for (i = 0; i < 200000; i++)
+    {
+      stream_session_t *ss;
+      pool_get (smm->sessions[0], ss);
+      memset (ss, 0, sizeof (*ss));
+    }
+
+  for (i = 0; i < 200000; i++)
+    pool_put_index (smm->sessions[0], i);
+
+  clib_bihash_init_16_8 (&smm->v4_session_hash, "v4 session table",
+			 200000 /* $$$$ config parameter nbuckets */ ,
+			 (64 << 20) /*$$$ config parameter table size */ );
+  clib_bihash_init_48_8 (&smm->v6_session_hash, "v6 session table",
+			 200000 /* $$$$ config parameter nbuckets */ ,
+			 (64 << 20) /*$$$ config parameter table size */ );
+
+  clib_bihash_init_16_8 (&smm->v4_half_open_hash, "v4 half-open table",
+			 200000 /* $$$$ config parameter nbuckets */ ,
+			 (64 << 20) /*$$$ config parameter table size */ );
+  clib_bihash_init_48_8 (&smm->v6_half_open_hash, "v6 half-open table",
+			 200000 /* $$$$ config parameter nbuckets */ ,
+			 (64 << 20) /*$$$ config parameter table size */ );
+
+  for (i = 0; i < SESSION_N_TYPES; i++)
+    smm->connect_manager_index[i] = INVALID_INDEX;
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (session_manager_main_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
new file mode 100644
index 00000000..cf14cca9
--- /dev/null
+++ b/src/vnet/session/session.h
@@ -0,0 +1,380 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_session_h__
+#define __included_session_h__
+
+#include <vnet/session/transport.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/sparse_vec.h>
+#include <svm/svm_fifo_segment.h>
+
+#define HALF_OPEN_LOOKUP_INVALID_VALUE ((u64)~0)
+#define INVALID_INDEX ((u32)~0)
+
+/* TODO decide how much since we have pre-data as well */
+#define MAX_HDRS_LEN    100	/* Max number of bytes for headers */
+
+typedef enum
+{
+  FIFO_EVENT_SERVER_RX,
+  FIFO_EVENT_SERVER_TX,
+  FIFO_EVENT_TIMEOUT,
+  FIFO_EVENT_SERVER_EXIT,
+} fifo_event_type_t;
+
+#define foreach_session_input_error                                         \
+_(NO_SESSION, "No session drops")                                       \
+_(NO_LISTENER, "No listener for dst port drops")                        \
+_(ENQUEUED, "Packets pushed into rx fifo")                              \
+_(NOT_READY, "Session not ready packets")                               \
+_(FIFO_FULL, "Packets dropped for lack of rx fifo space")               \
+_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space")      \
+_(API_QUEUE_FULL, "Sessions not created for lack of API queue space")   \
+_(NEW_SEG_NO_SPACE, "Created segment, couldn't allocate a fifo pair")   \
+_(NO_SPACE, "Couldn't allocate a fifo pair")
+
+typedef enum
+{
+#define _(sym,str) SESSION_ERROR_##sym,
+  foreach_session_input_error
+#undef _
+    SESSION_N_ERROR,
+} session_error_t;
+
+/* Event queue input node static next indices */
+typedef enum
+{
+  SESSION_QUEUE_NEXT_DROP,
+  SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT,
+  SESSION_QUEUE_NEXT_IP4_LOOKUP,
+  SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT,
+  SESSION_QUEUE_NEXT_IP6_LOOKUP,
+  SESSION_QUEUE_N_NEXT,
+} session_queue_next_t;
+
+#define foreach_session_type                    \
+  _(IP4_TCP, ip4_tcp)                           \
+  _(IP4_UDP, ip4_udp)                           \
+  _(IP6_TCP, ip6_tcp)                           \
+  _(IP6_UDP, ip6_udp)
+
+typedef enum
+{
+#define _(A, a) SESSION_TYPE_##A,
+  foreach_session_type
+#undef _
+    SESSION_N_TYPES,
+} session_type_t;
+
+/*
+ * Application session state
+ */
+typedef enum
+{
+  SESSION_STATE_LISTENING,
+  SESSION_STATE_CONNECTING,
+  SESSION_STATE_READY,
+  SESSION_STATE_CLOSED,
+  SESSION_STATE_N_STATES,
+} stream_session_state_t;
+
+typedef CLIB_PACKED (struct
+		     {
+		     svm_fifo_t * fifo;
+		     u8 event_type;
+		     /* $$$$ for event logging */
+		     u16 event_id;
+		     u32 enqueue_length;
+		     }) session_fifo_event_t;
+
+typedef struct _stream_session_t
+{
+  /** Type */
+  u8 session_type;
+
+  /** State */
+  u8 session_state;
+
+  /** Session index in per_thread pool */
+  u32 session_index;
+
+  /** Transport specific */
+  u32 connection_index;
+
+  u8 thread_index;
+
+  /** Application specific */
+  u32 pid;
+
+  /** fifo pointers. Once allocated, these do not move */
+  svm_fifo_t *server_rx_fifo;
+  svm_fifo_t *server_tx_fifo;
+
+  /** To avoid n**2 "one event per frame" check */
+  u8 enqueue_epoch;
+
+  /** used during unbind processing */
+  u8 is_deleted;
+
+  /** stream server pool index */
+  u32 app_index;
+
+  /** svm segment index */
+  u32 server_segment_index;
+} stream_session_t;
+
+typedef struct _session_manager
+{
+  /** segments mapped by this server */
+  u32 *segment_indices;
+
+  /** Session fifo sizes. They are provided for binds and take default
+   * values for connects */
+  u32 rx_fifo_size;
+  u32 tx_fifo_size;
+
+  /** Configured additional segment size */
+  u32 add_segment_size;
+
+  /** Flag that indicates if additional segments should be created */
+  u8 add_segment;
+} session_manager_t;
+
+/* Forward definition */
+typedef struct _session_manager_main session_manager_main_t;
+
+typedef int
+  (session_fifo_rx_fn) (vlib_main_t * vm, vlib_node_runtime_t * node,
+			session_manager_main_t * smm,
+			session_fifo_event_t * e0, stream_session_t * s0,
+			u32 thread_index, int *n_tx_pkts);
+
+extern session_fifo_rx_fn session_fifo_rx_peek;
+extern session_fifo_rx_fn session_fifo_rx_dequeue;
+
+struct _session_manager_main
+{
+  /** Lookup tables for established sessions and listeners */
+  clib_bihash_16_8_t v4_session_hash;
+  clib_bihash_48_8_t v6_session_hash;
+
+  /** Lookup tables for half-open sessions */
+  clib_bihash_16_8_t v4_half_open_hash;
+  clib_bihash_48_8_t v6_half_open_hash;
+
+  /** Per worker thread session pools */
+  stream_session_t **sessions;
+
+  /** Pool of listen sessions. Same type as stream sessions to ease lookups */
+  stream_session_t *listen_sessions[SESSION_N_TYPES];
+
+  /** Sparse vector to map dst port to stream server  */
+  u16 *stream_server_by_dst_port[SESSION_N_TYPES];
+
+  /** per-worker enqueue epoch counters */
+  u8 *current_enqueue_epoch;
+
+  /** Per-worker thread vector of sessions to enqueue */
+  u32 **session_indices_to_enqueue_by_thread;
+
+  /** per-worker tx buffer free lists */
+  u32 **tx_buffers;
+
+  /** Per worker-thread vector of partially read events */
+  session_fifo_event_t **evts_partially_read;
+
+  /** per-worker active event vectors */
+  session_fifo_event_t **fifo_events;
+
+  /** vpp fifo event queue */
+  unix_shared_memory_queue_t **vpp_event_queues;
+
+  /** Unique segment name counter */
+  u32 unique_segment_name_counter;
+
+  /* Connection manager used by incoming connects */
+  u32 connect_manager_index[SESSION_N_TYPES];
+
+  session_manager_t *session_managers;
+
+  /** Per transport rx function that can either dequeue or peek */
+  session_fifo_rx_fn *session_rx_fns[SESSION_N_TYPES];
+
+  /* Convenience */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+};
+
+extern session_manager_main_t session_manager_main;
+
+/*
+ * Session manager function
+ */
+always_inline session_manager_main_t *
+vnet_get_session_manager_main ()
+{
+  return &session_manager_main;
+}
+
+always_inline session_manager_t *
+session_manager_get (u32 index)
+{
+  return pool_elt_at_index (session_manager_main.session_managers, index);
+}
+
+always_inline unix_shared_memory_queue_t *
+session_manager_get_vpp_event_queue (u32 thread_index)
+{
+  return session_manager_main.vpp_event_queues[thread_index];
+}
+
+always_inline session_manager_t *
+connects_session_manager_get (session_manager_main_t * smm,
+			      session_type_t session_type)
+{
+  return pool_elt_at_index (smm->session_managers,
+			    smm->connect_manager_index[session_type]);
+}
+
+void session_manager_get_segment_info (u32 index, u8 ** name, u32 * size);
+int session_manager_flush_enqueue_events (u32 thread_index);
+int
+session_manager_add_first_segment (session_manager_main_t * smm,
+				   session_manager_t * sm, u32 segment_size,
+				   u8 ** segment_name);
+void
+session_manager_del (session_manager_main_t * smm, session_manager_t * sm);
+void
+connects_session_manager_init (session_manager_main_t * smm, u8 session_type);
+
+/*
+ * Stream session functions
+ */
+
+stream_session_t *stream_session_lookup_listener4 (ip4_address_t * lcl,
+						   u16 lcl_port, u8 proto);
+stream_session_t *stream_session_lookup4 (ip4_address_t * lcl,
+					  ip4_address_t * rmt, u16 lcl_port,
+					  u16 rmt_port, u8 proto,
+					  u32 thread_index);
+stream_session_t *stream_session_lookup_listener6 (ip6_address_t * lcl,
+						   u16 lcl_port, u8 proto);
+stream_session_t *stream_session_lookup6 (ip6_address_t * lcl,
+					  ip6_address_t * rmt, u16 lcl_port,
+					  u16 rmt_port, u8, u32 thread_index);
+transport_connection_t
+  * stream_session_lookup_transport4 (session_manager_main_t * smm,
+				      ip4_address_t * lcl,
+				      ip4_address_t * rmt, u16 lcl_port,
+				      u16 rmt_port, u8 proto,
+				      u32 thread_index);
+transport_connection_t
+  * stream_session_lookup_transport6 (session_manager_main_t * smm,
+				      ip6_address_t * lcl,
+				      ip6_address_t * rmt, u16 lcl_port,
+				      u16 rmt_port, u8 proto,
+				      u32 thread_index);
+stream_session_t *stream_session_lookup_listener (ip46_address_t * lcl,
+						  u16 lcl_port, u8 proto);
+
+always_inline stream_session_t *
+stream_session_get_tsi (u64 ti_and_si, u32 thread_index)
+{
+  ASSERT ((u32) (ti_and_si >> 32) == thread_index);
+  return pool_elt_at_index (session_manager_main.sessions[thread_index],
+			    ti_and_si & 0xFFFFFFFFULL);
+}
+
+always_inline stream_session_t *
+stream_session_get (u64 si, u32 thread_index)
+{
+  return pool_elt_at_index (session_manager_main.sessions[thread_index], si);
+}
+
+always_inline stream_session_t *
+stream_session_get_if_valid (u64 si, u32 thread_index)
+{
+  if (thread_index >= vec_len (session_manager_main.sessions))
+    return 0;
+
+  if (pool_is_free_index (session_manager_main.sessions[thread_index], si))
+    return 0;
+
+  return pool_elt_at_index (session_manager_main.sessions[thread_index], si);
+}
+
+always_inline stream_session_t *
+stream_session_listener_get (u8 sst, u64 si)
+{
+  return pool_elt_at_index (session_manager_main.listen_sessions[sst], si);
+}
+
+always_inline u32
+stream_session_get_index (stream_session_t * s)
+{
+  if (s->session_state == SESSION_STATE_LISTENING)
+    return s - session_manager_main.listen_sessions[s->session_type];
+
+  return s - session_manager_main.sessions[s->thread_index];
+}
+
+always_inline u32
+stream_session_max_enqueue (transport_connection_t * tc)
+{
+  stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+  return svm_fifo_max_enqueue (s->server_rx_fifo);
+}
+
+int
+stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len,
+			     u8 queue_event);
+u32
+stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
+			   u32 offset, u32 max_bytes);
+u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
+
+void
+stream_session_connect_notify (transport_connection_t * tc, u8 sst,
+			       u8 is_fail);
+void stream_session_accept_notify (transport_connection_t * tc);
+void stream_session_disconnect_notify (transport_connection_t * tc);
+void stream_session_delete_notify (transport_connection_t * tc);
+void stream_session_reset_notify (transport_connection_t * tc);
+int
+stream_session_accept (transport_connection_t * tc, u32 listener_index,
+		       u8 sst, u8 notify);
+void stream_session_open (u8 sst, ip46_address_t * addr,
+			  u16 port_host_byte_order, u32 api_client_index);
+void stream_session_disconnect (stream_session_t * s);
+void stream_session_cleanup (stream_session_t * s);
+int
+stream_session_start_listen (u32 server_index, ip46_address_t * ip, u16 port);
+void stream_session_stop_listen (u32 server_index);
+
+u8 *format_stream_session (u8 * s, va_list * args);
+
+void session_register_transport (u8 type, const transport_proto_vft_t * vft);
+transport_proto_vft_t *session_get_transport_vft (u8 type);
+
+#endif /* __included_session_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
new file mode 100644
index 00000000..9d068684
--- /dev/null
+++ b/src/vnet/session/session_api.c
@@ -0,0 +1,821 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/session/application.h>
+
+#include <vnet/vnet_msg_enum.h>
+#include "application_interface.h"
+
+#define vl_typedefs		/* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun		/* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_session_api_msg                                         \
+_(MAP_ANOTHER_SEGMENT_REPLY, map_another_segment_reply)                 \
+_(BIND_URI, bind_uri)                                                   \
+_(UNBIND_URI, unbind_uri)                                               \
+_(CONNECT_URI, connect_uri)                                             \
+_(DISCONNECT_SESSION, disconnect_session)                               \
+_(DISCONNECT_SESSION_REPLY, disconnect_session_reply)                   \
+_(ACCEPT_SESSION_REPLY, accept_session_reply)                           \
+_(RESET_SESSION_REPLY, reset_session_reply)                   		\
+_(BIND_SOCK, bind_sock) 		                                \
+_(UNBIND_SOCK, unbind_sock)                                             \
+_(CONNECT_SOCK, connect_sock)                                          	\
+_(DISCONNECT_SOCK, disconnect_sock)                               	\
+_(DISCONNECT_SOCK_REPLY, disconnect_sock_reply)                        	\
+_(ACCEPT_SOCK_REPLY, accept_sock_reply)                           	\
+_(RESET_SOCK_REPLY, reset_sock_reply)                   		\
+
+static int
+send_add_segment_callback (u32 api_client_index, const u8 * segment_name,
+			   u32 segment_size)
+{
+  vl_api_map_another_segment_t *mp;
+  unix_shared_memory_queue_t *q;
+
+  q = vl_api_client_index_to_input_queue (api_client_index);
+
+  if (!q)
+    return -1;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MAP_ANOTHER_SEGMENT);
+  mp->segment_size = segment_size;
+  strncpy ((char *) mp->segment_name, (char *) segment_name,
+	   sizeof (mp->segment_name) - 1);
+
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+
+  return 0;
+}
+
+static int
+send_session_accept_uri_callback (stream_session_t * s)
+{
+  vl_api_accept_session_t *mp;
+  unix_shared_memory_queue_t *q, *vpp_queue;
+  application_t *server = application_get (s->app_index);
+
+  q = vl_api_client_index_to_input_queue (server->api_client_index);
+  vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
+
+  if (!q)
+    return -1;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SESSION);
+
+  /* Note: session_type is the first octet in all types of sessions */
+
+  mp->accept_cookie = server->accept_cookie;
+  mp->server_rx_fifo = (u64) s->server_rx_fifo;
+  mp->server_tx_fifo = (u64) s->server_tx_fifo;
+  mp->session_thread_index = s->thread_index;
+  mp->session_index = s->session_index;
+  mp->session_type = s->session_type;
+  mp->vpp_event_queue_address = (u64) vpp_queue;
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+
+  return 0;
+}
+
+static void
+send_session_disconnect_uri_callback (stream_session_t * s)
+{
+  vl_api_disconnect_session_t *mp;
+  unix_shared_memory_queue_t *q;
+  application_t *app = application_get (s->app_index);
+
+  q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+  if (!q)
+    return;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SESSION);
+
+  mp->session_thread_index = s->thread_index;
+  mp->session_index = s->session_index;
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static int
+send_session_connected_uri_callback (u32 api_client_index,
+				     stream_session_t * s, u8 is_fail)
+{
+  vl_api_connect_uri_reply_t *mp;
+  unix_shared_memory_queue_t *q;
+  application_t *app = application_lookup (api_client_index);
+  u8 *seg_name;
+  unix_shared_memory_queue_t *vpp_queue;
+
+  q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+  if (!q)
+    return -1;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_URI_REPLY);
+  mp->context = app->api_context;
+  mp->retval = is_fail;
+  if (!is_fail)
+    {
+      vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
+      mp->server_rx_fifo = (u64) s->server_rx_fifo;
+      mp->server_tx_fifo = (u64) s->server_tx_fifo;
+      mp->session_thread_index = s->thread_index;
+      mp->session_index = s->session_index;
+      mp->session_type = s->session_type;
+      mp->vpp_event_queue_address = (u64) vpp_queue;
+      mp->client_event_queue_address = (u64) app->event_queue;
+
+      session_manager_get_segment_info (s->server_segment_index, &seg_name,
+					&mp->segment_size);
+      mp->segment_name_length = vec_len (seg_name);
+      if (mp->segment_name_length)
+	clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length);
+    }
+
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+
+  /* Remove client if connect failed */
+  if (is_fail)
+    application_del (app);
+
+  return 0;
+}
+
+/**
+ * Redirect a connect_uri message to the indicated server.
+ * Only sent if the server has bound the related port with
+ * URI_OPTIONS_FLAGS_USE_FIFO
+ */
+static int
+redirect_connect_uri_callback (u32 server_api_client_index, void *mp_arg)
+{
+  vl_api_connect_uri_t *mp = mp_arg;
+  unix_shared_memory_queue_t *server_q, *client_q;
+  vlib_main_t *vm = vlib_get_main ();
+  f64 timeout = vlib_time_now (vm) + 0.5;
+  int rv = 0;
+
+  server_q = vl_api_client_index_to_input_queue (server_api_client_index);
+
+  if (!server_q)
+    {
+      rv = VNET_API_ERROR_INVALID_VALUE;
+      goto out;
+    }
+
+  client_q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (!client_q)
+    {
+      rv = VNET_API_ERROR_INVALID_VALUE_2;
+      goto out;
+    }
+
+  /* Tell the server the client's API queue address, so it can reply */
+  mp->client_queue_address = (u64) client_q;
+
+  /*
+   * Bounce message handlers MUST NOT block the data-plane.
+   * Spin waiting for the queue lock, but
+   */
+
+  while (vlib_time_now (vm) < timeout)
+    {
+      rv =
+	unix_shared_memory_queue_add (server_q, (u8 *) & mp, 1 /*nowait */ );
+      switch (rv)
+	{
+	  /* correctly enqueued */
+	case 0:
+	  return VNET_CONNECT_REDIRECTED;
+
+	  /* continue spinning, wait for pthread_mutex_trylock to work */
+	case -1:
+	  continue;
+
+	  /* queue stuffed, drop the msg */
+	case -2:
+	  rv = VNET_API_ERROR_QUEUE_FULL;
+	  goto out;
+	}
+    }
+out:
+  /* Dispose of the message */
+  vl_msg_api_free (mp);
+  return rv;
+}
+
+static u64
+make_session_handle (stream_session_t * s)
+{
+  return (u64) s->session_index << 32 | (u64) s->thread_index;
+}
+
+static int
+send_session_accept_callback (stream_session_t * s)
+{
+  vl_api_accept_sock_t *mp;
+  unix_shared_memory_queue_t *q, *vpp_queue;
+  application_t *server = application_get (s->app_index);
+
+  q = vl_api_client_index_to_input_queue (server->api_client_index);
+  vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
+
+  if (!q)
+    return -1;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SOCK);
+
+  /* Note: session_type is the first octet in all types of sessions */
+
+  mp->accept_cookie = server->accept_cookie;
+  mp->server_rx_fifo = (u64) s->server_rx_fifo;
+  mp->server_tx_fifo = (u64) s->server_tx_fifo;
+  mp->handle = make_session_handle (s);
+  mp->vpp_event_queue_address = (u64) vpp_queue;
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+
+  return 0;
+}
+
+static int
+send_session_connected_callback (u32 api_client_index, stream_session_t * s,
+				 u8 is_fail)
+{
+  vl_api_connect_sock_reply_t *mp;
+  unix_shared_memory_queue_t *q;
+  application_t *app = application_lookup (api_client_index);
+  u8 *seg_name;
+  unix_shared_memory_queue_t *vpp_queue;
+
+  q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+  if (!q)
+    return -1;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_SOCK_REPLY);
+  mp->context = app->api_context;
+  mp->retval = is_fail;
+  if (!is_fail)
+    {
+      vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
+      mp->server_rx_fifo = (u64) s->server_rx_fifo;
+      mp->server_tx_fifo = (u64) s->server_tx_fifo;
+      mp->handle = make_session_handle (s);
+      mp->vpp_event_queue_address = (u64) vpp_queue;
+      mp->client_event_queue_address = (u64) app->event_queue;
+
+      session_manager_get_segment_info (s->server_segment_index, &seg_name,
+					&mp->segment_size);
+      mp->segment_name_length = vec_len (seg_name);
+      if (mp->segment_name_length)
+	clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length);
+    }
+
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+
+  /* Remove client if connect failed */
+  if (is_fail)
+    application_del (app);
+
+  return 0;
+}
+
+static void
+send_session_disconnect_callback (stream_session_t * s)
+{
+  vl_api_disconnect_sock_t *mp;
+  unix_shared_memory_queue_t *q;
+  application_t *app = application_get (s->app_index);
+
+  q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+  if (!q)
+    return;
+
+  mp = vl_msg_api_alloc (sizeof (*mp));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SOCK);
+
+  mp->handle = make_session_handle (s);
+  vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+/**
+ * Redirect a connect_uri message to the indicated server.
+ * Only sent if the server has bound the related port with
+ * URI_OPTIONS_FLAGS_USE_FIFO
+ */
+static int
+redirect_connect_callback (u32 server_api_client_index, void *mp_arg)
+{
+  vl_api_connect_sock_t *mp = mp_arg;
+  unix_shared_memory_queue_t *server_q, *client_q;
+  vlib_main_t *vm = vlib_get_main ();
+  f64 timeout = vlib_time_now (vm) + 0.5;
+  int rv = 0;
+
+  server_q = vl_api_client_index_to_input_queue (server_api_client_index);
+
+  if (!server_q)
+    {
+      rv = VNET_API_ERROR_INVALID_VALUE;
+      goto out;
+    }
+
+  client_q = vl_api_client_index_to_input_queue (mp->client_index);
+  if (!client_q)
+    {
+      rv = VNET_API_ERROR_INVALID_VALUE_2;
+      goto out;
+    }
+
+  /* Tell the server the client's API queue address, so it can reply */
+  mp->client_queue_address = (u64) client_q;
+
+  /*
+   * Bounce message handlers MUST NOT block the data-plane.
+   * Spin waiting for the queue lock, but
+   */
+
+  while (vlib_time_now (vm) < timeout)
+    {
+      rv =
+	unix_shared_memory_queue_add (server_q, (u8 *) & mp, 1 /*nowait */ );
+      switch (rv)
+	{
+	  /* correctly enqueued */
+	case 0:
+	  return VNET_CONNECT_REDIRECTED;
+
+	  /* continue spinning, wait for pthread_mutex_trylock to work */
+	case -1:
+	  continue;
+
+	  /* queue stuffed, drop the msg */
+	case -2:
+	  rv = VNET_API_ERROR_QUEUE_FULL;
+	  goto out;
+	}
+    }
+out:
+  /* Dispose of the message */
+  vl_msg_api_free (mp);
+  return rv;
+}
+
+static session_cb_vft_t uri_session_cb_vft = {
+  .session_accept_callback = send_session_accept_uri_callback,
+  .session_disconnect_callback = send_session_disconnect_uri_callback,
+  .session_connected_callback = send_session_connected_uri_callback,
+  .add_segment_callback = send_add_segment_callback,
+  .redirect_connect_callback = redirect_connect_uri_callback
+};
+
+static session_cb_vft_t session_cb_vft = {
+  .session_accept_callback = send_session_accept_callback,
+  .session_disconnect_callback = send_session_disconnect_callback,
+  .session_connected_callback = send_session_connected_callback,
+  .add_segment_callback = send_add_segment_callback,
+  .redirect_connect_callback = redirect_connect_callback
+};
+
+static int
+api_session_not_valid (u32 session_index, u32 thread_index)
+{
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+  stream_session_t *pool;
+
+  if (thread_index >= vec_len (smm->sessions))
+    return VNET_API_ERROR_INVALID_VALUE;
+
+  pool = smm->sessions[thread_index];
+
+  if (pool_is_free_index (pool, session_index))
+    return VNET_API_ERROR_INVALID_VALUE_2;
+
+  return 0;
+}
+
+static void
+vl_api_bind_uri_t_handler (vl_api_bind_uri_t * mp)
+{
+  vl_api_bind_uri_reply_t *rmp;
+  vnet_bind_args_t _a, *a = &_a;
+  char segment_name[128];
+  u32 segment_name_length;
+  int rv;
+
+  _Static_assert (sizeof (u64) * SESSION_OPTIONS_N_OPTIONS <=
+		  sizeof (mp->options),
+		  "Out of options, fix api message definition");
+
+  segment_name_length = ARRAY_LEN (segment_name);
+
+  memset (a, 0, sizeof (*a));
+
+  a->uri = (char *) mp->uri;
+  a->api_client_index = mp->client_index;
+  a->options = mp->options;
+  a->segment_name = segment_name;
+  a->segment_name_length = segment_name_length;
+  a->session_cb_vft = &uri_session_cb_vft;
+
+  a->options[SESSION_OPTIONS_SEGMENT_SIZE] = mp->initial_segment_size;
+  a->options[SESSION_OPTIONS_ACCEPT_COOKIE] = mp->accept_cookie;
+  rv = vnet_bind_uri (a);
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_BIND_URI_REPLY, ({
+    rmp->retval = rv;
+    if (!rv)
+      {
+	rmp->segment_name_length = 0;
+	/* $$$$ policy? */
+	rmp->segment_size = mp->initial_segment_size;
+	if (segment_name_length)
+	  {
+	    memcpy (rmp->segment_name, segment_name, segment_name_length);
+	    rmp->segment_name_length = segment_name_length;
+	  }
+	rmp->server_event_queue_address = a->server_event_queue_address;
+      }
+  }));
+  /* *INDENT-ON* */
+
+}
+
+static void
+vl_api_unbind_uri_t_handler (vl_api_unbind_uri_t * mp)
+{
+  vl_api_unbind_uri_reply_t *rmp;
+  int rv;
+
+  rv = vnet_unbind_uri ((char *) mp->uri, mp->client_index);
+
+  REPLY_MACRO (VL_API_UNBIND_URI_REPLY);
+}
+
+static void
+vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
+{
+  vnet_connect_args_t _a, *a = &_a;
+
+  a->uri = (char *) mp->uri;
+  a->api_client_index = mp->client_index;
+  a->api_context = mp->context;
+  a->options = mp->options;
+  a->session_cb_vft = &uri_session_cb_vft;
+  a->mp = mp;
+  vnet_connect_uri (a);
+}
+
+static void
+vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
+{
+  vl_api_disconnect_session_reply_t *rmp;
+  int rv;
+
+  rv = api_session_not_valid (mp->session_index, mp->session_thread_index);
+  if (!rv)
+    rv = vnet_disconnect_session (mp->client_index, mp->session_index,
+				  mp->session_thread_index);
+
+  REPLY_MACRO (VL_API_DISCONNECT_SESSION_REPLY);
+}
+
+static void
+vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t *
+					   mp)
+{
+  if (api_session_not_valid (mp->session_index, mp->session_thread_index))
+    {
+      clib_warning ("Invalid session!");
+      return;
+    }
+
+  /* Client objected to disconnecting the session, log and continue */
+  if (mp->retval)
+    {
+      clib_warning ("client retval %d", mp->retval);
+      return;
+    }
+
+  /* Disconnect has been confirmed. Confirm close to transport */
+  vnet_disconnect_session (mp->client_index, mp->session_index,
+			   mp->session_thread_index);
+}
+
+static void
+vl_api_reset_session_reply_t_handler (vl_api_reset_session_reply_t * mp)
+{
+  stream_session_t *s;
+
+  if (api_session_not_valid (mp->session_index, mp->session_thread_index))
+    {
+      clib_warning ("Invalid session!");
+      return;
+    }
+
+  /* Client objected to resetting the session, log and continue */
+  if (mp->retval)
+    {
+      clib_warning ("client retval %d", mp->retval);
+      return;
+    }
+
+  s = stream_session_get (mp->session_index, mp->session_thread_index);
+
+  /* This comes as a response to a reset, transport only waiting for
+   * confirmation to remove connection state, no need to disconnect */
+  stream_session_cleanup (s);
+}
+
+static void
+vl_api_accept_session_reply_t_handler (vl_api_accept_session_reply_t * mp)
+{
+  stream_session_t *s;
+  int rv;
+
+  if (api_session_not_valid (mp->session_index, mp->session_thread_index))
+    return;
+
+  s = stream_session_get (mp->session_index, mp->session_thread_index);
+  rv = mp->retval;
+
+  if (rv)
+    {
+      /* Server isn't interested, kill the session */
+      stream_session_disconnect (s);
+      return;
+    }
+
+  s->session_state = SESSION_STATE_READY;
+}
+
+static void
+vl_api_map_another_segment_reply_t_handler (vl_api_map_another_segment_reply_t
+					    * mp)
+{
+  clib_warning ("not implemented");
+}
+
+static void
+vl_api_bind_sock_t_handler (vl_api_bind_sock_t * mp)
+{
+  vl_api_bind_sock_reply_t *rmp;
+  vnet_bind_args_t _a, *a = &_a;
+  char segment_name[128];
+  u32 segment_name_length;
+  int rv;
+
+  STATIC_ASSERT (sizeof (u64) * SESSION_OPTIONS_N_OPTIONS <=
+		 sizeof (mp->options),
+		 "Out of options, fix api message definition");
+
+  segment_name_length = ARRAY_LEN (segment_name);
+
+  memset (a, 0, sizeof (*a));
+
+  clib_memcpy (&a->tep.ip, mp->ip,
+	       (mp->is_ip4 ? sizeof (ip4_address_t) :
+		sizeof (ip6_address_t)));
+  a->tep.is_ip4 = mp->is_ip4;
+  a->tep.port = mp->port;
+  a->tep.vrf = mp->vrf;
+
+  a->api_client_index = mp->client_index;
+  a->options = mp->options;
+  a->segment_name = segment_name;
+  a->segment_name_length = segment_name_length;
+  a->session_cb_vft = &session_cb_vft;
+
+  rv = vnet_bind_uri (a);
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_BIND_SOCK_REPLY, ({
+    rmp->retval = rv;
+    if (!rv)
+      {
+	rmp->segment_name_length = 0;
+	rmp->segment_size = mp->options[SESSION_OPTIONS_SEGMENT_SIZE];
+	if (segment_name_length)
+	  {
+	    memcpy(rmp->segment_name, segment_name, segment_name_length);
+	    rmp->segment_name_length = segment_name_length;
+	  }
+	rmp->server_event_queue_address = a->server_event_queue_address;
+      }
+  }));
+  /* *INDENT-ON* */
+}
+
+static void
+vl_api_unbind_sock_t_handler (vl_api_unbind_sock_t * mp)
+{
+  vl_api_unbind_sock_reply_t *rmp;
+  vnet_unbind_args_t _a, *a = &_a;
+  int rv;
+
+  a->api_client_index = mp->client_index;
+  a->handle = mp->handle;
+
+  rv = vnet_unbind (a);
+
+  REPLY_MACRO (VL_API_UNBIND_SOCK_REPLY);
+}
+
+static void
+vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp)
+{
+  vnet_connect_args_t _a, *a = &_a;
+
+  clib_memcpy (&a->tep.ip, mp->ip,
+	       (mp->is_ip4 ? sizeof (ip4_address_t) :
+		sizeof (ip6_address_t)));
+  a->tep.is_ip4 = mp->is_ip4;
+  a->tep.port = mp->port;
+  a->tep.vrf = mp->vrf;
+  a->options = mp->options;
+  a->session_cb_vft = &session_cb_vft;
+  a->api_context = mp->context;
+  a->mp = mp;
+
+  vnet_connect (a);
+}
+
+static void
+vl_api_disconnect_sock_t_handler (vl_api_disconnect_sock_t * mp)
+{
+  vnet_disconnect_args_t _a, *a = &_a;
+  vl_api_disconnect_sock_reply_t *rmp;
+  int rv;
+
+  a->api_client_index = mp->client_index;
+  a->handle = mp->handle;
+  rv = vnet_disconnect (a);
+
+  REPLY_MACRO (VL_API_DISCONNECT_SOCK_REPLY);
+}
+
+static void
+vl_api_disconnect_sock_reply_t_handler (vl_api_disconnect_sock_reply_t * mp)
+{
+  vnet_disconnect_args_t _a, *a = &_a;
+
+  /* Client objected to disconnecting the session, log and continue */
+  if (mp->retval)
+    {
+      clib_warning ("client retval %d", mp->retval);
+      return;
+    }
+
+  a->api_client_index = mp->client_index;
+  a->handle = mp->handle;
+
+  vnet_disconnect (a);
+}
+
+static void
+vl_api_reset_sock_reply_t_handler (vl_api_reset_sock_reply_t * mp)
+{
+  stream_session_t *s;
+  u32 session_index, thread_index;
+
+  /* Client objected to resetting the session, log and continue */
+  if (mp->retval)
+    {
+      clib_warning ("client retval %d", mp->retval);
+      return;
+    }
+
+  if (api_parse_session_handle (mp->handle, &session_index, &thread_index))
+    {
+      clib_warning ("Invalid handle");
+      return;
+    }
+
+  s = stream_session_get (session_index, thread_index);
+
+  /* This comes as a response to a reset, transport only waiting for
+   * confirmation to remove connection state, no need to disconnect */
+  stream_session_cleanup (s);
+}
+
+static void
+vl_api_accept_sock_reply_t_handler (vl_api_accept_sock_reply_t * mp)
+{
+  stream_session_t *s;
+  u32 session_index, thread_index;
+
+  if (api_parse_session_handle (mp->handle, &session_index, &thread_index))
+    {
+      clib_warning ("Invalid handle");
+      return;
+    }
+  s = stream_session_get (session_index, thread_index);
+
+  if (mp->retval)
+    {
+      /* Server isn't interested, kill the session */
+      stream_session_disconnect (s);
+      return;
+    }
+
+  s->session_state = SESSION_STATE_READY;
+}
+
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+  foreach_vl_msg_name_crc_session;
+#undef _
+}
+
+/*
+ * session_api_hookup
+ * Add uri's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../open-repo/vlib/memclnt_vlib.c:memclnt_process()
+ */
+static clib_error_t *
+session_api_hookup (vlib_main_t * vm)
+{
+  api_main_t *am = &api_main;
+
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_session_api_msg;
+#undef _
+
+  /*
+   * Messages which bounce off the data-plane to
+   * an API client. Simply tells the message handling infra not
+   * to free the message.
+   *
+   * Bounced message handlers MUST NOT block the data plane
+   */
+  am->message_bounce[VL_API_CONNECT_URI] = 1;
+  am->message_bounce[VL_API_CONNECT_SOCK] = 1;
+
+  /*
+   * Set up the (msg_name, crc, message-id) table
+   */
+  setup_message_id_table (am);
+
+  return 0;
+}
+
+VLIB_API_INIT_FUNCTION (session_api_hookup);
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
new file mode 100644
index 00000000..b2943a1c
--- /dev/null
+++ b/src/vnet/session/session_cli.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/session/application.h>
+#include <vnet/session/session.h>
+
+/**
+ * Format stream session as per the following format
+ *
+ * verbose:
+ *   "Connection", "Rx fifo", "Tx fifo", "Session Index"
+ * non-verbose:
+ *   "Connection"
+ */
+u8 *
+format_stream_session (u8 * s, va_list * args)
+{
+  stream_session_t *ss = va_arg (*args, stream_session_t *);
+  int verbose = va_arg (*args, int);
+  transport_proto_vft_t *tp_vft;
+  u8 *str = 0;
+
+  tp_vft = session_get_transport_vft (ss->session_type);
+
+  if (verbose)
+    str = format (0, "%-20llp%-20llp%-15lld", ss->server_rx_fifo,
+		  ss->server_tx_fifo, stream_session_get_index (ss));
+
+  if (ss->session_state == SESSION_STATE_READY)
+    {
+      s = format (s, "%-40U%v", tp_vft->format_connection,
+		  ss->connection_index, ss->thread_index, str);
+    }
+  else if (ss->session_state == SESSION_STATE_LISTENING)
+    {
+      s = format (s, "%-40U%v", tp_vft->format_listener, ss->connection_index,
+		  str);
+    }
+  else if (ss->session_state == SESSION_STATE_READY)
+    {
+      s =
+	format (s, "%-40U%v", tp_vft->format_half_open, ss->connection_index,
+		str);
+    }
+  else if (ss->session_state == SESSION_STATE_CLOSED)
+    {
+      s = format (s, "[CL] %-40U%v", tp_vft->format_connection,
+		  ss->connection_index, ss->thread_index, str);
+    }
+  else
+    {
+      clib_warning ("Session in unknown state!");
+    }
+
+  vec_free (str);
+
+  return s;
+}
+
+static clib_error_t *
+show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
+			 vlib_cli_command_t * cmd)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  int verbose = 0, i;
+  stream_session_t *pool;
+  stream_session_t *s;
+  u8 *str = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "verbose"))
+	verbose = 1;
+      else
+	break;
+    }
+
+  for (i = 0; i < vec_len (smm->sessions); i++)
+    {
+      u32 once_per_pool;
+      pool = smm->sessions[i];
+
+      once_per_pool = 1;
+
+      if (pool_elts (pool))
+	{
+
+	  vlib_cli_output (vm, "Thread %d: %d active sessions",
+			   i, pool_elts (pool));
+	  if (verbose)
+	    {
+	      if (once_per_pool)
+		{
+		  str = format (str, "%-40s%-20s%-20s%-15s",
+				"Connection", "Rx fifo", "Tx fifo",
+				"Session Index");
+		  vlib_cli_output (vm, "%v", str);
+		  vec_reset_length (str);
+		  once_per_pool = 0;
+		}
+
+              /* *INDENT-OFF* */
+              pool_foreach (s, pool,
+              ({
+                vlib_cli_output (vm, "%U", format_stream_session, s, verbose);
+              }));
+              /* *INDENT-ON* */
+	    }
+	}
+      else
+	vlib_cli_output (vm, "Thread %d: no active sessions", i);
+    }
+  vec_free (str);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (show_uri_command, static) =
+{
+.path = "show session",.short_help = "show session [verbose]",.function =
+    show_session_command_fn,};
+
+
+static clib_error_t *
+clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
+			  vlib_cli_command_t * cmd)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  u32 thread_index = 0;
+  u32 session_index = ~0;
+  stream_session_t *pool, *session;
+  application_t *server;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "thread %d", &thread_index))
+	;
+      else if (unformat (input, "session %d", &session_index))
+	;
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+
+  if (session_index == ~0)
+    return clib_error_return (0, "session <nn> required, but not set.");
+
+  if (thread_index > vec_len (smm->sessions))
+    return clib_error_return (0, "thread %d out of range [0-%d]",
+			      thread_index, vec_len (smm->sessions));
+
+  pool = smm->sessions[thread_index];
+
+  if (pool_is_free_index (pool, session_index))
+    return clib_error_return (0, "session %d not active", session_index);
+
+  session = pool_elt_at_index (pool, session_index);
+  server = application_get (session->app_index);
+
+  /* Disconnect both app and transport */
+  server->cb_fns.session_disconnect_callback (session);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (clear_uri_session_command, static) =
+{
+.path = "clear session",.short_help =
+    "clear session thread <thread> session <index>",.function =
+    clear_session_command_fn,};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
new file mode 100644
index 00000000..abd94ba4
--- /dev/null
+++ b/src/vnet/session/transport.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/transport.h>
+
+u32
+transport_endpoint_lookup (transport_endpoint_table_t *ht, ip46_address_t *ip,
+                           u16 port)
+{
+  clib_bihash_kv_24_8_t kv;
+  int rv;
+
+  kv.key[0] = ip->as_u64[0];
+  kv.key[1] = ip->as_u64[1];
+  kv.key[2] = port;
+
+  rv = clib_bihash_search_inline_24_8 (ht, &kv);
+  if (rv == 0)
+    return kv.value;
+
+  return TRANSPORT_ENDPOINT_INVALID_INDEX;
+}
+
+void
+transport_endpoint_table_add (transport_endpoint_table_t *ht,
+                              transport_endpoint_t *te, u32 value)
+{
+  clib_bihash_kv_24_8_t kv;
+
+  kv.key[0] = te->ip.as_u64[0];
+  kv.key[1] = te->ip.as_u64[1];
+  kv.key[2] = te->port;
+  kv.value = value;
+
+  clib_bihash_add_del_24_8 (ht, &kv, 1);
+}
+
+void
+transport_endpoint_table_del (transport_endpoint_table_t *ht,
+                              transport_endpoint_t *te)
+{
+  clib_bihash_kv_24_8_t kv;
+
+  kv.key[0] = te->ip.as_u64[0];
+  kv.key[1] = te->ip.as_u64[1];
+  kv.key[2] = te->port;
+
+  clib_bihash_add_del_24_8 (ht, &kv, 0);
+}
+
+
+
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
new file mode 100644
index 00000000..2d4415ba
--- /dev/null
+++ b/src/vnet/session/transport.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef VNET_VNET_URI_TRANSPORT_H_
+#define VNET_VNET_URI_TRANSPORT_H_
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/bihash_16_8.h>
+#include <vppinfra/bihash_48_8.h>
+
+/*
+ * Protocol independent transport properties associated to a session
+ */
+typedef struct _transport_connection
+{
+  ip46_address_t rmt_ip;	/**< Remote IP */
+  ip46_address_t lcl_ip;	/**< Local IP */
+  u16 lcl_port;			/**< Local port */
+  u16 rmt_port;			/**< Remote port */
+  u8 proto;			/**< Transport protocol id */
+
+  u32 s_index;			/**< Parent session index */
+  u32 c_index;			/**< Connection index in transport pool */
+  u8 is_ip4;			/**< Flag if IP4 connection */
+  u32 thread_index;		/**< Worker-thread index */
+
+  /** Macros for 'derived classes' where base is named "connection" */
+#define c_lcl_ip connection.lcl_ip
+#define c_rmt_ip connection.rmt_ip
+#define c_lcl_ip4 connection.lcl_ip.ip4
+#define c_rmt_ip4 connection.rmt_ip.ip4
+#define c_lcl_ip6 connection.lcl_ip.ip6
+#define c_rmt_ip6 connection.rmt_ip.ip6
+#define c_lcl_port connection.lcl_port
+#define c_rmt_port connection.rmt_port
+#define c_proto connection.proto
+#define c_state connection.state
+#define c_s_index connection.s_index
+#define c_c_index connection.c_index
+#define c_is_ip4 connection.is_ip4
+#define c_thread_index connection.thread_index
+} transport_connection_t;
+
+/*
+ * Transport protocol virtual function table
+ */
+typedef struct _transport_proto_vft
+{
+  /*
+   * Setup
+   */
+  u32 (*bind) (vlib_main_t *, u32, ip46_address_t *, u16);
+  u32 (*unbind) (vlib_main_t *, u32);
+  int (*open) (ip46_address_t * addr, u16 port_host_byte_order);
+  void (*close) (u32 conn_index, u32 thread_index);
+  void (*cleanup) (u32 conn_index, u32 thread_index);
+
+  /*
+   * Transmission
+   */
+    u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b);
+    u16 (*send_mss) (transport_connection_t * tc);
+    u32 (*send_space) (transport_connection_t * tc);
+    u32 (*rx_fifo_offset) (transport_connection_t * tc);
+
+  /*
+   * Connection retrieval
+   */
+  transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx);
+  transport_connection_t *(*get_listener) (u32 conn_index);
+  transport_connection_t *(*get_half_open) (u32 conn_index);
+
+  /*
+   * Format
+   */
+  u8 *(*format_connection) (u8 * s, va_list * args);
+  u8 *(*format_listener) (u8 * s, va_list * args);
+  u8 *(*format_half_open) (u8 * s, va_list * args);
+
+} transport_proto_vft_t;
+
+/* 16 octets */
+typedef CLIB_PACKED (struct
+		     {
+		     union
+		     {
+		     struct
+		     {
+		     ip4_address_t src; ip4_address_t dst;
+		     u16 src_port;
+		     u16 dst_port;
+		     /* align by making this 4 octets even though its a 1-bit field
+		      * NOTE: avoid key overlap with other transports that use 5 tuples for
+		      * session identification.
+		      */
+		     u32 proto;
+		     };
+		     u64 as_u64[2];
+		     };
+		     }) v4_connection_key_t;
+
+typedef CLIB_PACKED (struct
+		     {
+		     union
+		     {
+		     struct
+		     {
+		     /* 48 octets */
+		     ip6_address_t src; ip6_address_t dst;
+		     u16 src_port;
+		     u16 dst_port; u32 proto; u8 unused_for_now[8];
+		     }; u64 as_u64[6];
+		     };
+		     }) v6_connection_key_t;
+
+typedef clib_bihash_kv_16_8_t session_kv4_t;
+typedef clib_bihash_kv_48_8_t session_kv6_t;
+
+always_inline void
+make_v4_ss_kv (session_kv4_t * kv, ip4_address_t * lcl, ip4_address_t * rmt,
+	       u16 lcl_port, u16 rmt_port, u8 proto)
+{
+  v4_connection_key_t key;
+  memset (&key, 0, sizeof (v4_connection_key_t));
+
+  key.src.as_u32 = lcl->as_u32;
+  key.dst.as_u32 = rmt->as_u32;
+  key.src_port = lcl_port;
+  key.dst_port = rmt_port;
+  key.proto = proto;
+
+  kv->key[0] = key.as_u64[0];
+  kv->key[1] = key.as_u64[1];
+  kv->value = ~0ULL;
+}
+
+always_inline void
+make_v4_listener_kv (session_kv4_t * kv, ip4_address_t * lcl, u16 lcl_port,
+		     u8 proto)
+{
+  v4_connection_key_t key;
+  memset (&key, 0, sizeof (v4_connection_key_t));
+
+  key.src.as_u32 = lcl->as_u32;
+  key.dst.as_u32 = 0;
+  key.src_port = lcl_port;
+  key.dst_port = 0;
+  key.proto = proto;
+
+  kv->key[0] = key.as_u64[0];
+  kv->key[1] = key.as_u64[1];
+  kv->value = ~0ULL;
+}
+
+always_inline void
+make_v4_ss_kv_from_tc (session_kv4_t * kv, transport_connection_t * t)
+{
+  return make_v4_ss_kv (kv, &t->lcl_ip.ip4, &t->rmt_ip.ip4, t->lcl_port,
+			t->rmt_port, t->proto);
+}
+
+always_inline void
+make_v6_ss_kv (session_kv6_t * kv, ip6_address_t * lcl, ip6_address_t * rmt,
+	       u16 lcl_port, u16 rmt_port, u8 proto)
+{
+  v6_connection_key_t key;
+  memset (&key, 0, sizeof (v6_connection_key_t));
+
+  key.src.as_u64[0] = lcl->as_u64[0];
+  key.src.as_u64[1] = lcl->as_u64[1];
+  key.dst.as_u64[0] = rmt->as_u64[0];
+  key.dst.as_u64[1] = rmt->as_u64[1];
+  key.src_port = lcl_port;
+  key.dst_port = rmt_port;
+  key.proto = proto;
+
+  kv->key[0] = key.as_u64[0];
+  kv->key[1] = key.as_u64[1];
+  kv->value = ~0ULL;
+}
+
+always_inline void
+make_v6_listener_kv (session_kv6_t * kv, ip6_address_t * lcl, u16 lcl_port,
+		     u8 proto)
+{
+  v6_connection_key_t key;
+  memset (&key, 0, sizeof (v6_connection_key_t));
+
+  key.src.as_u64[0] = lcl->as_u64[0];
+  key.src.as_u64[1] = lcl->as_u64[1];
+  key.dst.as_u64[0] = 0;
+  key.dst.as_u64[1] = 0;
+  key.src_port = lcl_port;
+  key.dst_port = 0;
+  key.proto = proto;
+
+  kv->key[0] = key.as_u64[0];
+  kv->key[1] = key.as_u64[1];
+  kv->value = ~0ULL;
+}
+
+always_inline void
+make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * t)
+{
+  make_v6_ss_kv (kv, &t->lcl_ip.ip6, &t->rmt_ip.ip6, t->lcl_port,
+		 t->rmt_port, t->proto);
+}
+
+typedef struct _transport_endpoint
+{
+  ip46_address_t ip;
+  u16 port;
+  u8 is_ip4;
+  u32 vrf;
+} transport_endpoint_t;
+
+typedef clib_bihash_24_8_t transport_endpoint_table_t;
+
+#define TRANSPORT_ENDPOINT_INVALID_INDEX ((u32)~0)
+
+u32
+transport_endpoint_lookup (transport_endpoint_table_t * ht,
+			   ip46_address_t * ip, u16 port);
+void transport_endpoint_table_add (transport_endpoint_table_t * ht,
+				   transport_endpoint_t * te, u32 value);
+void transport_endpoint_table_del (transport_endpoint_table_t * ht,
+				   transport_endpoint_t * te);
+
+#endif /* VNET_VNET_URI_TRANSPORT_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
new file mode 100644
index 00000000..0f9b7097
--- /dev/null
+++ b/src/vnet/tcp/tcp.c
@@ -0,0 +1,708 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/tcp/tcp.h>
+#include <vnet/session/session.h>
+#include <vnet/fib/fib.h>
+#include <math.h>
+
+tcp_main_t tcp_main;
+
+static u32
+tcp_connection_bind (vlib_main_t * vm, u32 session_index, ip46_address_t * ip,
+		     u16 port_host_byte_order, u8 is_ip4)
+{
+  tcp_main_t *tm = &tcp_main;
+  tcp_connection_t *listener;
+
+  pool_get (tm->listener_pool, listener);
+  memset (listener, 0, sizeof (*listener));
+
+  listener->c_c_index = listener - tm->listener_pool;
+  listener->c_lcl_port = clib_host_to_net_u16 (port_host_byte_order);
+
+  if (is_ip4)
+    listener->c_lcl_ip4.as_u32 = ip->ip4.as_u32;
+  else
+    clib_memcpy (&listener->c_lcl_ip6, &ip->ip6, sizeof (ip6_address_t));
+
+  listener->c_s_index = session_index;
+  listener->c_proto = SESSION_TYPE_IP4_TCP;
+  listener->state = TCP_STATE_LISTEN;
+  listener->c_is_ip4 = 1;
+
+  return listener->c_c_index;
+}
+
+u32
+tcp_session_bind_ip4 (vlib_main_t * vm, u32 session_index,
+		      ip46_address_t * ip, u16 port_host_byte_order)
+{
+  return tcp_connection_bind (vm, session_index, ip, port_host_byte_order, 1);
+}
+
+u32
+tcp_session_bind_ip6 (vlib_main_t * vm, u32 session_index,
+		      ip46_address_t * ip, u16 port_host_byte_order)
+{
+  return tcp_connection_bind (vm, session_index, ip, port_host_byte_order, 0);
+
+}
+
+static void
+tcp_session_unbind (u32 listener_index)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  pool_put_index (tm->listener_pool, listener_index);
+}
+
+u32
+tcp_session_unbind_ip4 (vlib_main_t * vm, u32 listener_index)
+{
+  tcp_session_unbind (listener_index);
+  return 0;
+}
+
+u32
+tcp_session_unbind_ip6 (vlib_main_t * vm, u32 listener_index)
+{
+  tcp_session_unbind (listener_index);
+  return 0;
+}
+
+transport_connection_t *
+tcp_session_get_listener (u32 listener_index)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  tcp_connection_t *tc;
+  tc = pool_elt_at_index (tm->listener_pool, listener_index);
+  return &tc->connection;
+}
+
+/**
+ * Cleans up connection state.
+ *
+ * No notifications.
+ */
+void
+tcp_connection_cleanup (tcp_connection_t * tc)
+{
+  tcp_main_t *tm = &tcp_main;
+  u32 tepi;
+  transport_endpoint_t *tep;
+
+  /* Cleanup local endpoint if this was an active connect */
+  tepi = transport_endpoint_lookup (&tm->local_endpoints_table, &tc->c_lcl_ip,
+				    tc->c_lcl_port);
+
+  /*XXX lock */
+  if (tepi != TRANSPORT_ENDPOINT_INVALID_INDEX)
+    {
+      tep = pool_elt_at_index (tm->local_endpoints, tepi);
+      transport_endpoint_table_del (&tm->local_endpoints_table, tep);
+      pool_put (tm->local_endpoints, tep);
+    }
+
+  /* Make sure all timers are cleared */
+  tcp_connection_timers_reset (tc);
+
+  /* Check if half-open */
+  if (tc->state == TCP_STATE_SYN_SENT)
+    pool_put (tm->half_open_connections, tc);
+  else
+    pool_put (tm->connections[tc->c_thread_index], tc);
+}
+
+/**
+ * Connection removal.
+ *
+ * This should be called only once connection enters CLOSED state. Note
+ * that it notifies the session of the removal event, so if the goal is to
+ * just remove the connection, call tcp_connection_cleanup instead.
+ */
+void
+tcp_connection_del (tcp_connection_t * tc)
+{
+  stream_session_delete_notify (&tc->connection);
+  tcp_connection_cleanup (tc);
+}
+
+/**
+ * Begin connection closing procedure.
+ *
+ * If at the end the connection is not in CLOSED state, it is not removed.
+ * Instead, we rely on on TCP to advance through state machine to either
+ * 1) LAST_ACK (passive close) whereby when the last ACK is received
+ * tcp_connection_del is called. This notifies session of the delete and
+ * calls cleanup.
+ * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers
+ * and cleanup is called.
+ */
+void
+tcp_connection_close (tcp_connection_t * tc)
+{
+  /* Send FIN if needed */
+  if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD
+      || tc->state == TCP_STATE_CLOSE_WAIT)
+    tcp_send_fin (tc);
+
+  /* Switch state */
+  if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD)
+    tc->state = TCP_STATE_FIN_WAIT_1;
+  else if (tc->state == TCP_STATE_SYN_SENT)
+    tc->state = TCP_STATE_CLOSED;
+  else if (tc->state == TCP_STATE_CLOSE_WAIT)
+    tc->state = TCP_STATE_LAST_ACK;
+
+  /* Half-close connections are not supported XXX */
+
+  if (tc->state == TCP_STATE_CLOSED)
+    tcp_connection_del (tc);
+}
+
+void
+tcp_session_close (u32 conn_index, u32 thread_index)
+{
+  tcp_connection_t *tc;
+  tc = tcp_connection_get (conn_index, thread_index);
+  tcp_connection_close (tc);
+}
+
+void
+tcp_session_cleanup (u32 conn_index, u32 thread_index)
+{
+  tcp_connection_t *tc;
+  tc = tcp_connection_get (conn_index, thread_index);
+  tcp_connection_cleanup (tc);
+}
+
+void *
+ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
+{
+  ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+  ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
+  ip_interface_address_t *ia = 0;
+
+  if (is_ip4)
+    {
+      /* *INDENT-OFF* */
+      foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
+      ({
+        return ip_interface_address_get_address (lm4, ia);
+      }));
+      /* *INDENT-ON* */
+    }
+  else
+    {
+      /* *INDENT-OFF* */
+      foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
+      ({
+        return ip_interface_address_get_address (lm6, ia);
+      }));
+      /* *INDENT-ON* */
+    }
+
+  return 0;
+}
+
+/**
+ * Allocate local port and add if successful add entry to local endpoint
+ * table to mark the pair as used.
+ */
+u16
+tcp_allocate_local_port (tcp_main_t * tm, ip46_address_t * ip)
+{
+  u8 unique = 0;
+  transport_endpoint_t *tep;
+  u32 time_now, tei;
+  u16 min = 1024, max = 65535, tries;	/* XXX configurable ? */
+
+  tries = max - min;
+  time_now = tcp_time_now ();
+
+  /* Start at random point or max */
+  pool_get (tm->local_endpoints, tep);
+  clib_memcpy (&tep->ip, ip, sizeof (*ip));
+  tep->port = random_u32 (&time_now) << 16;
+  tep->port = tep->port < min ? max : tep->port;
+
+  /* Search for first free slot */
+  while (tries)
+    {
+      tei = transport_endpoint_lookup (&tm->local_endpoints_table, &tep->ip,
+				       tep->port);
+      if (tei == TRANSPORT_ENDPOINT_INVALID_INDEX)
+	{
+	  unique = 1;
+	  break;
+	}
+
+      tep->port--;
+
+      if (tep->port < min)
+	tep->port = max;
+
+      tries--;
+    }
+
+  if (unique)
+    {
+      transport_endpoint_table_add (&tm->local_endpoints_table, tep,
+				    tep - tm->local_endpoints);
+
+      return tep->port;
+    }
+
+  /* Failed */
+  pool_put (tm->local_endpoints, tep);
+  return -1;
+}
+
+/**
+ * Initialize all connection timers as invalid
+ */
+void
+tcp_connection_timers_init (tcp_connection_t * tc)
+{
+  int i;
+
+  /* Set all to invalid */
+  for (i = 0; i < TCP_N_TIMERS; i++)
+    {
+      tc->timers[i] = TCP_TIMER_HANDLE_INVALID;
+    }
+
+  tc->rto = TCP_RTO_INIT;
+}
+
+/**
+ * Stop all connection timers
+ */
+void
+tcp_connection_timers_reset (tcp_connection_t * tc)
+{
+  int i;
+  for (i = 0; i < TCP_N_TIMERS; i++)
+    {
+      tcp_timer_reset (tc, i);
+    }
+}
+
+/** Initialize tcp connection variables
+ *
+ * Should be called after having received a msg from the peer, i.e., a SYN or
+ * a SYNACK, such that connection options have already been exchanged. */
+void
+tcp_connection_init_vars (tcp_connection_t * tc)
+{
+  tcp_connection_timers_init (tc);
+  tcp_set_snd_mss (tc);
+  tc->sack_sb.head = TCP_INVALID_SACK_HOLE_INDEX;
+  tcp_cc_init (tc);
+}
+
+int
+tcp_connection_open (ip46_address_t * rmt_addr, u16 rmt_port, u8 is_ip4)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  tcp_connection_t *tc;
+  fib_prefix_t prefix;
+  u32 fei, sw_if_index;
+  ip46_address_t lcl_addr;
+  u16 lcl_port;
+
+  /*
+   * Find the local address and allocate port
+   */
+  memset (&lcl_addr, 0, sizeof (lcl_addr));
+
+  /* Find a FIB path to the destination */
+  clib_memcpy (&prefix.fp_addr, rmt_addr, sizeof (*rmt_addr));
+  prefix.fp_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+  prefix.fp_len = is_ip4 ? 32 : 128;
+
+  fei = fib_table_lookup (0, &prefix);
+
+  /* Couldn't find route to destination. Bail out. */
+  if (fei == FIB_NODE_INDEX_INVALID)
+    return -1;
+
+  sw_if_index = fib_entry_get_resolving_interface (fei);
+
+  if (sw_if_index == (u32) ~ 0)
+    return -1;
+
+  if (is_ip4)
+    {
+      ip4_address_t *ip4;
+      ip4 = ip_interface_get_first_ip (sw_if_index, 1);
+      lcl_addr.ip4.as_u32 = ip4->as_u32;
+    }
+  else
+    {
+      ip6_address_t *ip6;
+      ip6 = ip_interface_get_first_ip (sw_if_index, 0);
+      clib_memcpy (&lcl_addr.ip6, ip6, sizeof (*ip6));
+    }
+
+  /* Allocate source port */
+  lcl_port = tcp_allocate_local_port (tm, &lcl_addr);
+  if (lcl_port < 1)
+    return -1;
+
+  /*
+   * Create connection and send SYN
+   */
+
+  pool_get (tm->half_open_connections, tc);
+  memset (tc, 0, sizeof (*tc));
+
+  clib_memcpy (&tc->c_rmt_ip, rmt_addr, sizeof (ip46_address_t));
+  clib_memcpy (&tc->c_lcl_ip, &lcl_addr, sizeof (ip46_address_t));
+  tc->c_rmt_port = clib_host_to_net_u16 (rmt_port);
+  tc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
+  tc->c_c_index = tc - tm->half_open_connections;
+  tc->c_is_ip4 = is_ip4;
+
+  /* The other connection vars will be initialized after SYN ACK */
+  tcp_connection_timers_init (tc);
+
+  tcp_send_syn (tc);
+
+  tc->state = TCP_STATE_SYN_SENT;
+
+  return tc->c_c_index;
+}
+
+int
+tcp_session_open_ip4 (ip46_address_t * addr, u16 port)
+{
+  return tcp_connection_open (addr, port, 1);
+}
+
+int
+tcp_session_open_ip6 (ip46_address_t * addr, u16 port)
+{
+  return tcp_connection_open (addr, port, 0);
+}
+
+u8 *
+format_tcp_session_ip4 (u8 * s, va_list * args)
+{
+  u32 tci = va_arg (*args, u32);
+  u32 thread_index = va_arg (*args, u32);
+  tcp_connection_t *tc;
+
+  tc = tcp_connection_get (tci, thread_index);
+
+  s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address,
+	      &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port),
+	      format_ip4_address, &tc->c_rmt_ip4,
+	      clib_net_to_host_u16 (tc->c_rmt_port));
+
+  return s;
+}
+
+u8 *
+format_tcp_session_ip6 (u8 * s, va_list * args)
+{
+  u32 tci = va_arg (*args, u32);
+  u32 thread_index = va_arg (*args, u32);
+  tcp_connection_t *tc = tcp_connection_get (tci, thread_index);
+  s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address,
+	      &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port),
+	      format_ip6_address, &tc->c_rmt_ip6,
+	      clib_net_to_host_u16 (tc->c_rmt_port));
+  return s;
+}
+
+u8 *
+format_tcp_listener_session_ip4 (u8 * s, va_list * args)
+{
+  u32 tci = va_arg (*args, u32);
+  tcp_connection_t *tc = tcp_listener_get (tci);
+  s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address,
+	      &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port),
+	      format_ip4_address, &tc->c_rmt_ip4,
+	      clib_net_to_host_u16 (tc->c_rmt_port));
+  return s;
+}
+
+u8 *
+format_tcp_listener_session_ip6 (u8 * s, va_list * args)
+{
+  u32 tci = va_arg (*args, u32);
+  tcp_connection_t *tc = tcp_listener_get (tci);
+  s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address,
+	      &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port),
+	      format_ip6_address, &tc->c_rmt_ip6,
+	      clib_net_to_host_u16 (tc->c_rmt_port));
+  return s;
+}
+
+u8 *
+format_tcp_half_open_session_ip4 (u8 * s, va_list * args)
+{
+  u32 tci = va_arg (*args, u32);
+  tcp_connection_t *tc = tcp_half_open_connection_get (tci);
+  s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip4_address,
+	      &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port),
+	      format_ip4_address, &tc->c_rmt_ip4,
+	      clib_net_to_host_u16 (tc->c_rmt_port));
+  return s;
+}
+
+u8 *
+format_tcp_half_open_session_ip6 (u8 * s, va_list * args)
+{
+  u32 tci = va_arg (*args, u32);
+  tcp_connection_t *tc = tcp_half_open_connection_get (tci);
+  s = format (s, "[%s] %U:%d->%U:%d", "tcp", format_ip6_address,
+	      &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port),
+	      format_ip6_address, &tc->c_rmt_ip6,
+	      clib_net_to_host_u16 (tc->c_rmt_port));
+  return s;
+}
+
+transport_connection_t *
+tcp_session_get_transport (u32 conn_index, u32 thread_index)
+{
+  tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index);
+  return &tc->connection;
+}
+
+transport_connection_t *
+tcp_half_open_session_get_transport (u32 conn_index)
+{
+  tcp_connection_t *tc = tcp_half_open_connection_get (conn_index);
+  return &tc->connection;
+}
+
+u16
+tcp_session_send_mss (transport_connection_t * trans_conn)
+{
+  tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
+  return tc->snd_mss;
+}
+
+u32
+tcp_session_send_space (transport_connection_t * trans_conn)
+{
+  tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
+  return tcp_available_snd_space (tc);
+}
+
+u32
+tcp_session_rx_fifo_offset (transport_connection_t * trans_conn)
+{
+  tcp_connection_t *tc = (tcp_connection_t *) trans_conn;
+  return (tc->snd_una_max - tc->snd_una);
+}
+
+/* *INDENT-OFF* */
+const static transport_proto_vft_t tcp4_proto = {
+  .bind = tcp_session_bind_ip4,
+  .unbind = tcp_session_unbind_ip4,
+  .push_header = tcp_push_header,
+  .get_connection = tcp_session_get_transport,
+  .get_listener = tcp_session_get_listener,
+  .get_half_open = tcp_half_open_session_get_transport,
+  .open = tcp_session_open_ip4,
+  .close = tcp_session_close,
+  .cleanup = tcp_session_cleanup,
+  .send_mss = tcp_session_send_mss,
+  .send_space = tcp_session_send_space,
+  .rx_fifo_offset = tcp_session_rx_fifo_offset,
+  .format_connection = format_tcp_session_ip4,
+  .format_listener = format_tcp_listener_session_ip4,
+  .format_half_open = format_tcp_half_open_session_ip4
+};
+
+const static transport_proto_vft_t tcp6_proto = {
+  .bind = tcp_session_bind_ip6,
+  .unbind = tcp_session_unbind_ip6,
+  .push_header = tcp_push_header,
+  .get_connection = tcp_session_get_transport,
+  .get_listener = tcp_session_get_listener,
+  .get_half_open = tcp_half_open_session_get_transport,
+  .open = tcp_session_open_ip6,
+  .close = tcp_session_close,
+  .cleanup = tcp_session_cleanup,
+  .send_mss = tcp_session_send_mss,
+  .send_space = tcp_session_send_space,
+  .rx_fifo_offset = tcp_session_rx_fifo_offset,
+  .format_connection = format_tcp_session_ip6,
+  .format_listener = format_tcp_listener_session_ip6,
+  .format_half_open = format_tcp_half_open_session_ip6
+};
+/* *INDENT-ON* */
+
+void
+tcp_timer_keep_handler (u32 conn_index)
+{
+  u32 cpu_index = os_get_cpu_number ();
+  tcp_connection_t *tc;
+
+  tc = tcp_connection_get (conn_index, cpu_index);
+  tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID;
+
+  tcp_connection_close (tc);
+}
+
+void
+tcp_timer_establish_handler (u32 conn_index)
+{
+  tcp_connection_t *tc;
+  u8 sst;
+
+  tc = tcp_half_open_connection_get (conn_index);
+  tc->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID;
+
+  ASSERT (tc->state == TCP_STATE_SYN_SENT);
+
+  sst = tc->c_is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
+  stream_session_connect_notify (&tc->connection, sst, 1 /* fail */ );
+
+  tcp_connection_cleanup (tc);
+}
+
+void
+tcp_timer_2msl_handler (u32 conn_index)
+{
+  u32 cpu_index = os_get_cpu_number ();
+  tcp_connection_t *tc;
+
+  tc = tcp_connection_get (conn_index, cpu_index);
+  tc->timers[TCP_TIMER_2MSL] = TCP_TIMER_HANDLE_INVALID;
+
+  tcp_connection_del (tc);
+}
+
+/* *INDENT-OFF* */
+static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
+{
+    tcp_timer_retransmit_handler,
+    tcp_timer_delack_handler,
+    0,
+    tcp_timer_keep_handler,
+    tcp_timer_2msl_handler,
+    tcp_timer_retransmit_syn_handler,
+    tcp_timer_establish_handler
+};
+/* *INDENT-ON* */
+
+static void
+tcp_expired_timers_dispatch (u32 * expired_timers)
+{
+  int i;
+  u32 connection_index, timer_id;
+
+  for (i = 0; i < vec_len (expired_timers); i++)
+    {
+      /* Get session index and timer id */
+      connection_index = expired_timers[i] & 0x0FFFFFFF;
+      timer_id = expired_timers[i] >> 28;
+
+      /* Handle expiration */
+      (*timer_expiration_handlers[timer_id]) (connection_index);
+    }
+}
+
+void
+tcp_initialize_timer_wheels (tcp_main_t * tm)
+{
+  tw_timer_wheel_16t_2w_512sl_t *tw;
+  vec_foreach (tw, tm->timer_wheels)
+  {
+    tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch,
+				      100e-3 /* timer period 100ms */ , ~0);
+    tw->last_run_time = vlib_time_now (tm->vlib_main);
+  }
+}
+
+clib_error_t *
+tcp_init (vlib_main_t * vm)
+{
+  ip_main_t *im = &ip_main;
+  ip_protocol_info_t *pi;
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  clib_error_t *error = 0;
+  u32 num_threads;
+
+  tm->vlib_main = vm;
+  tm->vnet_main = vnet_get_main ();
+
+  if ((error = vlib_call_init_function (vm, ip_main_init)))
+    return error;
+  if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+    return error;
+  if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+    return error;
+
+  /*
+   * Registrations
+   */
+
+  /* Register with IP */
+  pi = ip_get_protocol_info (im, IP_PROTOCOL_TCP);
+  if (pi == 0)
+    return clib_error_return (0, "TCP protocol info AWOL");
+  pi->format_header = format_tcp_header;
+  pi->unformat_pg_edit = unformat_pg_tcp_header;
+
+  ip4_register_protocol (IP_PROTOCOL_TCP, tcp4_input_node.index);
+
+  /* Register as transport with URI */
+  session_register_transport (SESSION_TYPE_IP4_TCP, &tcp4_proto);
+  session_register_transport (SESSION_TYPE_IP6_TCP, &tcp6_proto);
+
+  /*
+   * Initialize data structures
+   */
+
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+  vec_validate (tm->connections, num_threads - 1);
+
+  /* Initialize per worker thread tx buffers (used for control messages) */
+  vec_validate (tm->tx_buffers, num_threads - 1);
+
+  /* Initialize timer wheels */
+  vec_validate (tm->timer_wheels, num_threads - 1);
+  tcp_initialize_timer_wheels (tm);
+
+  vec_validate (tm->delack_connections, num_threads - 1);
+
+  /* Initialize clocks per tick for TCP timestamp. Used to compute
+   * monotonically increasing timestamps. */
+  tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock
+    / TCP_TSTAMP_RESOLUTION;
+
+  clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoint table",
+			 200000 /* $$$$ config parameter nbuckets */ ,
+			 (64 << 20) /*$$$ config parameter table size */ );
+
+  return error;
+}
+
+VLIB_INIT_FUNCTION (tcp_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
new file mode 100644
index 00000000..22f00a63
--- /dev/null
+++ b/src/vnet/tcp/tcp.h
@@ -0,0 +1,624 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _vnet_tcp_h_
+#define _vnet_tcp_h_
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/tcp/tcp_timer.h>
+#include <vnet/session/transport.h>
+#include <vnet/session/session.h>
+
+#define TCP_TICK 10e-3			/**< TCP tick period (s) */
+#define THZ 1/TCP_TICK			/**< TCP tick frequency */
+#define TCP_TSTAMP_RESOLUTION TCP_TICK	/**< Time stamp resolution */
+#define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */
+#define TCP_MAX_OPTION_SPACE 40
+
+#define TCP_DUPACK_THRESHOLD 3
+#define TCP_DEFAULT_RX_FIFO_SIZE 64 << 10
+
+/** TCP FSM state definitions as per RFC793. */
+#define foreach_tcp_fsm_state   \
+  _(CLOSED, "CLOSED")           \
+  _(LISTEN, "LISTEN")           \
+  _(SYN_SENT, "SYN_SENT")       \
+  _(SYN_RCVD, "SYN_RCVD")       \
+  _(ESTABLISHED, "ESTABLISHED") \
+  _(CLOSE_WAIT, "CLOSE_WAIT")   \
+  _(FIN_WAIT_1, "FIN_WAIT_1")   \
+  _(LAST_ACK, "LAST_ACK")       \
+  _(CLOSING, "CLOSING")         \
+  _(FIN_WAIT_2, "FIN_WAIT_2")   \
+  _(TIME_WAIT, "TIME_WAIT")
+
+typedef enum _tcp_state
+{
+#define _(sym, str) TCP_STATE_##sym,
+  foreach_tcp_fsm_state
+#undef _
+  TCP_N_STATES
+} tcp_state_t;
+
+format_function_t format_tcp_state;
+
+/** TCP timers */
+#define foreach_tcp_timer               \
+  _(RETRANSMIT, "RETRANSMIT")           \
+  _(DELACK, "DELAYED ACK")              \
+  _(PERSIST, "PERSIST")                 \
+  _(KEEP, "KEEP")                       \
+  _(2MSL, "2MSL")                       \
+  _(RETRANSMIT_SYN, "RETRANSMIT_SYN")   \
+  _(ESTABLISH, "ESTABLISH")
+
+typedef enum _tcp_timers
+{
+#define _(sym, str) TCP_TIMER_##sym,
+  foreach_tcp_timer
+#undef _
+  TCP_N_TIMERS
+} tcp_timers_e;
+
+typedef void (timer_expiration_handler) (u32 index);
+
+extern timer_expiration_handler tcp_timer_delack_handler;
+extern timer_expiration_handler tcp_timer_retransmit_handler;
+extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
+
+#define TCP_TIMER_HANDLE_INVALID ((u32) ~0)
+
+/* Timer delays as multiples of 100ms */
+#define TCP_TO_TIMER_TICK       TCP_TICK*10	/* Period for converting from TCP
+						 * ticks to timer units */
+#define TCP_DELACK_TIME         1	/* 0.1s */
+#define TCP_ESTABLISH_TIME      750	/* 75s */
+#define TCP_2MSL_TIME           300	/* 30s */
+
+#define TCP_RTO_MAX 60 * THZ	/* Min max RTO (60s) as per RFC6298 */
+#define TCP_RTT_MAX 30 * THZ	/* 30s (probably too much) */
+#define TCP_RTO_SYN_RETRIES 3	/* SYN retries without doubling RTO */
+#define TCP_RTO_INIT 1 * THZ	/* Initial retransmit timer */
+
+void tcp_update_time (f64 now, u32 thread_index);
+
+/** TCP connection flags */
+#define foreach_tcp_connection_flag             \
+  _(DELACK, "Delay ACK")                        \
+  _(SNDACK, "Send ACK")                         \
+  _(BURSTACK, "Burst ACK set")                  \
+  _(SENT_RCV_WND0, "Sent 0 receive window")     \
+  _(RECOVERY, "Recovery on")                    \
+  _(FAST_RECOVERY, "Fast Recovery on")
+
+typedef enum _tcp_connection_flag_bits
+{
+#define _(sym, str) TCP_CONN_##sym##_BIT,
+  foreach_tcp_connection_flag
+#undef _
+  TCP_CONN_N_FLAG_BITS
+} tcp_connection_flag_bits_e;
+
+typedef enum _tcp_connection_flag
+{
+#define _(sym, str) TCP_CONN_##sym = 1 << TCP_CONN_##sym##_BIT,
+  foreach_tcp_connection_flag
+#undef _
+  TCP_CONN_N_FLAGS
+} tcp_connection_flags_e;
+
+/** TCP buffer flags */
+#define foreach_tcp_buf_flag                            \
+  _ (ACK)       /**< Sending ACK. */                    \
+  _ (DUPACK)    /**< Sending DUPACK. */                 \
+
+enum
+{
+#define _(f) TCP_BUF_BIT_##f,
+  foreach_tcp_buf_flag
+#undef _
+    TCP_N_BUF_BITS,
+};
+
+enum
+{
+#define _(f) TCP_BUF_FLAG_##f = 1 << TCP_BUF_BIT_##f,
+  foreach_tcp_buf_flag
+#undef _
+};
+
+#define TCP_MAX_SACK_BLOCKS 5	/**< Max number of SACK blocks stored */
+#define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0)
+
+typedef struct _sack_scoreboard_hole
+{
+  u32 next;		/**< Index for next entry in linked list */
+  u32 prev;		/**< Index for previous entry in linked list */
+  u32 start;		/**< Start sequence number */
+  u32 end;		/**< End sequence number */
+} sack_scoreboard_hole_t;
+
+typedef struct _sack_scoreboard
+{
+  sack_scoreboard_hole_t *holes;	/**< Pool of holes */
+  u32 head;				/**< Index to first entry */
+  u32 sacked_bytes;			/**< Number of bytes sacked in sb */
+} sack_scoreboard_t;
+
+typedef enum _tcp_cc_algorithm_type
+{
+  TCP_CC_NEWRENO,
+} tcp_cc_algorithm_type_e;
+
+typedef struct _tcp_cc_algorithm tcp_cc_algorithm_t;
+
+typedef enum _tcp_cc_ack_t
+{
+  TCP_CC_ACK,
+  TCP_CC_DUPACK,
+  TCP_CC_PARTIALACK
+} tcp_cc_ack_t;
+
+typedef struct _tcp_connection
+{
+  transport_connection_t connection;  /**< Common transport data. First! */
+
+  u8 state;			/**< TCP state as per tcp_state_t */
+  u16 flags;			/**< Connection flags (see tcp_conn_flags_e) */
+  u32 timers[TCP_N_TIMERS];	/**< Timer handles into timer wheel */
+
+  /* TODO RFC4898 */
+
+  /** Send sequence variables RFC793 */
+  u32 snd_una;		/**< oldest unacknowledged sequence number */
+  u32 snd_una_max;	/**< newest unacknowledged sequence number + 1*/
+  u32 snd_wnd;		/**< send window */
+  u32 snd_wl1;		/**< seq number used for last snd.wnd update */
+  u32 snd_wl2;		/**< ack number used for last snd.wnd update */
+  u32 snd_nxt;		/**< next seq number to be sent */
+
+  /** Receive sequence variables RFC793 */
+  u32 rcv_nxt;		/**< next sequence number expected */
+  u32 rcv_wnd;		/**< receive window we expect */
+
+  u32 rcv_las;		/**< rcv_nxt at last ack sent/rcv_wnd update */
+  u32 iss;		/**< initial sent sequence */
+  u32 irs;		/**< initial remote sequence */
+
+  /* Options */
+  tcp_options_t opt;	/**< TCP connection options parsed */
+  u8 rcv_wscale;	/**< Window scale to advertise to peer */
+  u8 snd_wscale;	/**< Window scale to use when sending */
+  u32 tsval_recent;	/**< Last timestamp received */
+  u32 tsval_recent_age;	/**< When last updated tstamp_recent*/
+
+  sack_block_t *snd_sacks;	/**< Vector of SACKs to send. XXX Fixed size? */
+  sack_scoreboard_t sack_sb;	/**< SACK "scoreboard" that tracks holes */
+
+  u8 rcv_dupacks;	/**< Number of DUPACKs received */
+  u8 snt_dupacks;	/**< Number of DUPACKs sent in a burst */
+
+  /* Congestion control */
+  u32 cwnd;		/**< Congestion window */
+  u32 ssthresh;		/**< Slow-start threshold */
+  u32 prev_ssthresh;	/**< ssthresh before congestion */
+  u32 bytes_acked;	/**< Bytes acknowledged by current segment */
+  u32 rtx_bytes;	/**< Retransmitted bytes */
+  u32 tsecr_last_ack;	/**< Timestamp echoed to us in last health ACK */
+  tcp_cc_algorithm_t *cc_algo;	/**< Congestion control algorithm */
+
+  /* RTT and RTO */
+  u32 rto;		/**< Retransmission timeout */
+  u32 rto_boff;		/**< Index for RTO backoff */
+  u32 srtt;		/**< Smoothed RTT */
+  u32 rttvar;		/**< Smoothed mean RTT difference. Approximates variance */
+  u32 rtt_ts;		/**< Timestamp for tracked ACK */
+  u32 rtt_seq;		/**< Sequence number for tracked ACK */
+
+  u16 snd_mss;		/**< Send MSS */
+} tcp_connection_t;
+
+struct _tcp_cc_algorithm
+{
+  void (*rcv_ack) (tcp_connection_t * tc);
+  void (*rcv_cong_ack) (tcp_connection_t * tc, tcp_cc_ack_t ack);
+  void (*congestion) (tcp_connection_t * tc);
+  void (*recovered) (tcp_connection_t * tc);
+  void (*init) (tcp_connection_t * tc);
+};
+
+#define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY
+#define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY
+#define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY)
+#define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY))
+#define tcp_recovery_off(tc) ((tc)->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY))
+#define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh)
+
+typedef enum
+{
+  TCP_IP4,
+  TCP_IP6,
+  TCP_N_AF,
+} tcp_af_t;
+
+typedef enum _tcp_error
+{
+#define tcp_error(n,s) TCP_ERROR_##n,
+#include <vnet/tcp/tcp_error.def>
+#undef tcp_error
+  TCP_N_ERROR,
+} tcp_error_t;
+
+typedef struct _tcp_lookup_dispatch
+{
+  u8 next, error;
+} tcp_lookup_dispatch_t;
+
+typedef struct _tcp_main
+{
+  /* Per-worker thread tcp connection pools */
+  tcp_connection_t **connections;
+
+  /* Pool of listeners. */
+  tcp_connection_t *listener_pool;
+
+  /** Dispatch table by state and flags */
+  tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64];
+
+  u8 log2_tstamp_clocks_per_tick;
+  f64 tstamp_ticks_per_clock;
+
+  /** per-worker tx buffer free lists */
+  u32 **tx_buffers;
+
+  /* Per worker-thread timer wheel for connections timers */
+  tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
+
+  /* Convenience per worker-thread vector of connections to DELACK */
+  u32 **delack_connections;
+
+  /* Pool of half-open connections on which we've sent a SYN */
+  tcp_connection_t *half_open_connections;
+
+  /* Pool of local TCP endpoints */
+  transport_endpoint_t *local_endpoints;
+
+  /* Local endpoints lookup table */
+  transport_endpoint_table_t local_endpoints_table;
+
+  /* Congestion control algorithms registered */
+  tcp_cc_algorithm_t *cc_algos;
+
+  /* convenience */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+  ip4_main_t *ip4_main;
+  ip6_main_t *ip6_main;
+} tcp_main_t;
+
+extern tcp_main_t tcp_main;
+extern vlib_node_registration_t tcp4_input_node;
+extern vlib_node_registration_t tcp6_input_node;
+extern vlib_node_registration_t tcp4_output_node;
+extern vlib_node_registration_t tcp6_output_node;
+
+always_inline tcp_main_t *
+vnet_get_tcp_main ()
+{
+  return &tcp_main;
+}
+
+always_inline tcp_connection_t *
+tcp_connection_get (u32 conn_index, u32 thread_index)
+{
+  return pool_elt_at_index (tcp_main.connections[thread_index], conn_index);
+}
+
+always_inline tcp_connection_t *
+tcp_connection_get_if_valid (u32 conn_index, u32 thread_index)
+{
+  if (tcp_main.connections[thread_index] == 0)
+    return 0;
+  if (pool_is_free_index (tcp_main.connections[thread_index], conn_index))
+    return 0;
+  return pool_elt_at_index (tcp_main.connections[thread_index], conn_index);
+}
+
+void tcp_connection_close (tcp_connection_t * tc);
+void tcp_connection_cleanup (tcp_connection_t * tc);
+void tcp_connection_del (tcp_connection_t * tc);
+
+always_inline tcp_connection_t *
+tcp_listener_get (u32 tli)
+{
+  return pool_elt_at_index (tcp_main.listener_pool, tli);
+}
+
+always_inline tcp_connection_t *
+tcp_half_open_connection_get (u32 conn_index)
+{
+  return pool_elt_at_index (tcp_main.half_open_connections, conn_index);
+}
+
+void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b);
+void tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b);
+void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b);
+void tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4);
+void tcp_send_syn (tcp_connection_t * tc);
+void tcp_send_fin (tcp_connection_t * tc);
+void tcp_set_snd_mss (tcp_connection_t * tc);
+
+always_inline u32
+tcp_end_seq (tcp_header_t * th, u32 len)
+{
+  return th->seq_number + tcp_is_syn (th) + tcp_is_fin (th) + len;
+}
+
+/* Modulo arithmetic for TCP sequence numbers */
+#define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0)
+#define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0)
+#define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0)
+#define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0)
+
+/* Modulo arithmetic for timestamps */
+#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
+#define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
+
+always_inline u32
+tcp_flight_size (const tcp_connection_t * tc)
+{
+  return tc->snd_una_max - tc->snd_una - tc->sack_sb.sacked_bytes
+    + tc->rtx_bytes;
+}
+
+/**
+ * Initial cwnd as per RFC5681
+ */
+always_inline u32
+tcp_initial_cwnd (const tcp_connection_t * tc)
+{
+  if (tc->snd_mss > 2190)
+    return 2 * tc->snd_mss;
+  else if (tc->snd_mss > 1095)
+    return 3 * tc->snd_mss;
+  else
+    return 4 * tc->snd_mss;
+}
+
+always_inline u32
+tcp_loss_wnd (const tcp_connection_t * tc)
+{
+  return tc->snd_mss;
+}
+
+always_inline u32
+tcp_available_wnd (const tcp_connection_t * tc)
+{
+  return clib_min (tc->cwnd, tc->snd_wnd);
+}
+
+always_inline u32
+tcp_available_snd_space (const tcp_connection_t * tc)
+{
+  u32 available_wnd = tcp_available_wnd (tc);
+  u32 flight_size = tcp_flight_size (tc);
+
+  if (available_wnd <= flight_size)
+    return 0;
+
+  return available_wnd - flight_size;
+}
+
+void tcp_retransmit_first_unacked (tcp_connection_t * tc);
+
+void tcp_fast_retransmit (tcp_connection_t * tc);
+
+always_inline u32
+tcp_time_now (void)
+{
+  return clib_cpu_time_now () * tcp_main.tstamp_ticks_per_clock;
+}
+
+u32 tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
+
+u32
+tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b,
+				u32 max_bytes);
+
+void tcp_connection_timers_init (tcp_connection_t * tc);
+void tcp_connection_timers_reset (tcp_connection_t * tc);
+
+void tcp_connection_init_vars (tcp_connection_t * tc);
+
+always_inline void
+tcp_connection_force_ack (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+  /* Reset flags, make sure ack is sent */
+  tc->flags = TCP_CONN_SNDACK;
+  vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
+}
+
+always_inline void
+tcp_timer_set (tcp_connection_t * tc, u8 timer_id, u32 interval)
+{
+  tc->timers[timer_id]
+    = tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
+				   tc->c_c_index, timer_id, interval);
+}
+
+always_inline void
+tcp_retransmit_timer_set (tcp_main_t * tm, tcp_connection_t * tc)
+{
+  /* XXX Switch to faster TW */
+  tcp_timer_set (tc, TCP_TIMER_RETRANSMIT,
+		 clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+}
+
+always_inline void
+tcp_timer_reset (tcp_connection_t * tc, u8 timer_id)
+{
+  if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID)
+    return;
+
+  tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
+			      tc->timers[timer_id]);
+  tc->timers[timer_id] = TCP_TIMER_HANDLE_INVALID;
+}
+
+always_inline void
+tcp_timer_update (tcp_connection_t * tc, u8 timer_id, u32 interval)
+{
+  if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID)
+    tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
+				tc->timers[timer_id]);
+  tc->timers[timer_id] =
+    tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
+				 tc->c_c_index, timer_id, interval);
+}
+
+always_inline u8
+tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer)
+{
+  return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID;
+}
+
+void
+scoreboard_remove_hole (sack_scoreboard_t * sb,
+			sack_scoreboard_hole_t * hole);
+
+always_inline sack_scoreboard_hole_t *
+scoreboard_next_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
+{
+  if (hole->next != TCP_INVALID_SACK_HOLE_INDEX)
+    return pool_elt_at_index (sb->holes, hole->next);
+  return 0;
+}
+
+always_inline sack_scoreboard_hole_t *
+scoreboard_first_hole (sack_scoreboard_t * sb)
+{
+  if (sb->head != TCP_INVALID_SACK_HOLE_INDEX)
+    return pool_elt_at_index (sb->holes, sb->head);
+  return 0;
+}
+
+always_inline void
+scoreboard_clear (sack_scoreboard_t * sb)
+{
+  sack_scoreboard_hole_t *hole = scoreboard_first_hole (sb);
+  while ((hole = scoreboard_first_hole (sb)))
+    {
+      scoreboard_remove_hole (sb, hole);
+    }
+}
+
+always_inline u32
+scoreboard_hole_bytes (sack_scoreboard_hole_t * hole)
+{
+  return hole->end - hole->start;
+}
+
+always_inline void
+tcp_cc_algo_register (tcp_cc_algorithm_type_e type,
+		      const tcp_cc_algorithm_t * vft)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vec_validate (tm->cc_algos, type);
+
+  tm->cc_algos[type] = *vft;
+}
+
+always_inline tcp_cc_algorithm_t *
+tcp_cc_algo_get (tcp_cc_algorithm_type_e type)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  return &tm->cc_algos[type];
+}
+
+void tcp_cc_init (tcp_connection_t * tc);
+
+/**
+ * Push TCP header to buffer
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param sp_net - source port net order
+ * @param dp_net - destination port net order
+ * @param seq - sequence number net order
+ * @param ack - ack number net order
+ * @param tcp_hdr_opts_len - header and options length in bytes
+ * @param flags - header flags
+ * @param wnd - window size
+ *
+ * @return - pointer to start of TCP header
+ */
+always_inline void *
+vlib_buffer_push_tcp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, u32 seq,
+				u32 ack, u8 tcp_hdr_opts_len, u8 flags,
+				u16 wnd)
+{
+  tcp_header_t *th;
+
+  th = vlib_buffer_push_uninit (b, tcp_hdr_opts_len);
+
+  th->src_port = sp;
+  th->dst_port = dp;
+  th->seq_number = seq;
+  th->ack_number = ack;
+  th->data_offset_and_reserved = (tcp_hdr_opts_len >> 2) << 4;
+  th->flags = flags;
+  th->window = wnd;
+  th->checksum = 0;
+  th->urgent_pointer = 0;
+  return th;
+}
+
+/**
+ * Push TCP header to buffer
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param sp_net - source port net order
+ * @param dp_net - destination port net order
+ * @param seq - sequence number host order
+ * @param ack - ack number host order
+ * @param tcp_hdr_opts_len - header and options length in bytes
+ * @param flags - header flags
+ * @param wnd - window size
+ *
+ * @return - pointer to start of TCP header
+ */
+always_inline void *
+vlib_buffer_push_tcp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, u32 seq,
+		      u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
+{
+  return vlib_buffer_push_tcp_net_order (b, sp_net, dp_net,
+					 clib_host_to_net_u32 (seq),
+					 clib_host_to_net_u32 (ack),
+					 tcp_hdr_opts_len, flags,
+					 clib_host_to_net_u16 (wnd));
+}
+
+#endif /* _vnet_tcp_h_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def
new file mode 100644
index 00000000..cff5ec13
--- /dev/null
+++ b/src/vnet/tcp/tcp_error.def
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+tcp_error (NONE, "no error")
+tcp_error (NO_LISTENER, "no listener for dst port")
+tcp_error (LOOKUP_DROPS, "lookup drops")
+tcp_error (DISPATCH, "Dispatch error")
+tcp_error (ENQUEUED, "Packets pushed into rx fifo")                              
+tcp_error (PURE_ACK, "Pure acks")
+tcp_error (SYNS_RCVD, "SYNs received")
+tcp_error (SYN_ACKS_RCVD, "SYN-ACKs received")
+tcp_error (NOT_READY, "Session not ready for packets")                               
+tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space")               
+tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space")      
+tcp_error (API_QUEUE_FULL, "Sessions not created for lack of API queue space")
+tcp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated")
+tcp_error (SEGMENT_INVALID, "Invalid segment")
+tcp_error (ACK_INVALID, "Invalid ACK")
+tcp_error (ACK_DUP, "Duplicate ACK")
+tcp_error (ACK_OLD, "Old ACK")
+tcp_error (PKTS_SENT, "Packets sent")
+tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs")
+tcp_error (RST_SENT, "Resets sent")
\ No newline at end of file
diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c
new file mode 100644
index 00000000..7136741d
--- /dev/null
+++ b/src/vnet/tcp/tcp_format.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * tcp/tcp_format.c: tcp formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/tcp/tcp.h>
+
+static u8 *
+format_tcp_flags (u8 * s, va_list * args)
+{
+  int flags = va_arg (*args, int);
+
+#define _(f) if (flags & TCP_FLAG_##f) s = format (s, "%s, ", #f);
+  foreach_tcp_flag
+#undef _
+    return s;
+}
+
+/* Format TCP header. */
+u8 *
+format_tcp_header (u8 * s, va_list * args)
+{
+  tcp_header_t *tcp = va_arg (*args, tcp_header_t *);
+  u32 max_header_bytes = va_arg (*args, u32);
+  u32 header_bytes;
+  uword indent;
+
+  /* Nothing to do. */
+  if (max_header_bytes < sizeof (tcp[0]))
+    return format (s, "TCP header truncated");
+
+  indent = format_get_indent (s);
+  indent += 2;
+  header_bytes = tcp_header_bytes (tcp);
+
+  s = format (s, "TCP: %d -> %d", clib_net_to_host_u16 (tcp->src),
+	      clib_net_to_host_u16 (tcp->dst));
+
+  s = format (s, "\n%Useq. 0x%08x ack 0x%08x", format_white_space, indent,
+	      clib_net_to_host_u32 (tcp->seq_number),
+	      clib_net_to_host_u32 (tcp->ack_number));
+
+  s = format (s, "\n%Uflags %U, tcp header: %d bytes", format_white_space,
+	      indent, format_tcp_flags, tcp->flags, header_bytes);
+
+  s = format (s, "\n%Uwindow %d, checksum 0x%04x", format_white_space, indent,
+	      clib_net_to_host_u16 (tcp->window),
+	      clib_net_to_host_u16 (tcp->checksum));
+
+
+#if 0
+  /* Format TCP options. */
+  {
+    u8 *o;
+    u8 *option_start = (void *) (tcp + 1);
+    u8 *option_end = (void *) tcp + header_bytes;
+
+    for (o = option_start; o < option_end;)
+      {
+	u32 length = o[1];
+	switch (o[0])
+	  {
+	  case TCP_OPTION_END:
+	    length = 1;
+	    o = option_end;
+	    break;
+
+	  case TCP_OPTION_NOOP:
+	    length = 1;
+	    break;
+
+	  }
+      }
+  }
+#endif
+
+  /* Recurse into next protocol layer. */
+  if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+    {
+      ip_main_t *im = &ip_main;
+      tcp_udp_port_info_t *pi;
+
+      pi = ip_get_tcp_udp_port_info (im, tcp->dst);
+
+      if (pi && pi->format_header)
+	s = format (s, "\n%U%U", format_white_space, indent - 2,
+		    pi->format_header,
+		    /* next protocol header */ (void *) tcp + header_bytes,
+		    max_header_bytes - header_bytes);
+    }
+
+  return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
new file mode 100644
index 00000000..daa0683b
--- /dev/null
+++ b/src/vnet/tcp/tcp_input.c
@@ -0,0 +1,2316 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/sparse_vec.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/tcp/tcp.h>
+#include <vnet/session/session.h>
+#include <math.h>
+
+static char *tcp_error_strings[] = {
+#define tcp_error(n,s) s,
+#include <vnet/tcp/tcp_error.def>
+#undef tcp_error
+};
+
+/* All TCP nodes have the same outgoing arcs */
+#define foreach_tcp_state_next                  \
+  _ (DROP, "error-drop")                        \
+  _ (TCP4_OUTPUT, "tcp4-output")                \
+  _ (TCP6_OUTPUT, "tcp6-output")
+
+typedef enum _tcp_established_next
+{
+#define _(s,n) TCP_ESTABLISHED_NEXT_##s,
+  foreach_tcp_state_next
+#undef _
+    TCP_ESTABLISHED_N_NEXT,
+} tcp_established_next_t;
+
+typedef enum _tcp_rcv_process_next
+{
+#define _(s,n) TCP_RCV_PROCESS_NEXT_##s,
+  foreach_tcp_state_next
+#undef _
+    TCP_RCV_PROCESS_N_NEXT,
+} tcp_rcv_process_next_t;
+
+typedef enum _tcp_syn_sent_next
+{
+#define _(s,n) TCP_SYN_SENT_NEXT_##s,
+  foreach_tcp_state_next
+#undef _
+    TCP_SYN_SENT_N_NEXT,
+} tcp_syn_sent_next_t;
+
+typedef enum _tcp_listen_next
+{
+#define _(s,n) TCP_LISTEN_NEXT_##s,
+  foreach_tcp_state_next
+#undef _
+    TCP_LISTEN_N_NEXT,
+} tcp_listen_next_t;
+
+/* Generic, state independent indices */
+typedef enum _tcp_state_next
+{
+#define _(s,n) TCP_NEXT_##s,
+  foreach_tcp_state_next
+#undef _
+    TCP_STATE_N_NEXT,
+} tcp_state_next_t;
+
+#define tcp_next_output(is_ip4) (is_ip4 ? TCP_NEXT_TCP4_OUTPUT          \
+                                        : TCP_NEXT_TCP6_OUTPUT)
+
+vlib_node_registration_t tcp4_established_node;
+vlib_node_registration_t tcp6_established_node;
+
+/**
+ * Validate segment sequence number. As per RFC793:
+ *
+ * Segment Receive Test
+ *      Length  Window
+ *      ------- -------  -------------------------------------------
+ *      0       0       SEG.SEQ = RCV.NXT
+ *      0       >0      RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+ *      >0      0       not acceptable
+ *      >0      >0      RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+ *                      or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
+ *
+ * This ultimately consists in checking if segment falls within the window.
+ * The one important difference compared to RFC793 is that we use rcv_las,
+ * or the rcv_nxt at last ack sent instead of rcv_nxt since that's the
+ * peer's reference when computing our receive window.
+ *
+ * This accepts only segments within the window.
+ */
+always_inline u8
+tcp_segment_in_rcv_wnd (tcp_connection_t * tc, u32 seq, u32 end_seq)
+{
+  return seq_leq (end_seq, tc->rcv_las + tc->rcv_wnd)
+    && seq_geq (seq, tc->rcv_nxt);
+}
+
+void
+tcp_options_parse (tcp_header_t * th, tcp_options_t * to)
+{
+  const u8 *data;
+  u8 opt_len, opts_len, kind;
+  int j;
+  sack_block_t b;
+
+  opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t);
+  data = (const u8 *) (th + 1);
+
+  /* Zero out all flags but those set in SYN */
+  to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE);
+
+  for (; opts_len > 0; opts_len -= opt_len, data += opt_len)
+    {
+      kind = data[0];
+
+      /* Get options length */
+      if (kind == TCP_OPTION_EOL)
+	break;
+      else if (kind == TCP_OPTION_NOOP)
+	opt_len = 1;
+      else
+	{
+	  /* broken options */
+	  if (opts_len < 2)
+	    break;
+	  opt_len = data[1];
+
+	  /* weird option length */
+	  if (opt_len < 2 || opt_len > opts_len)
+	    break;
+	}
+
+      /* Parse options */
+      switch (kind)
+	{
+	case TCP_OPTION_MSS:
+	  if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th))
+	    {
+	      to->flags |= TCP_OPTS_FLAG_MSS;
+	      to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2));
+	    }
+	  break;
+	case TCP_OPTION_WINDOW_SCALE:
+	  if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th))
+	    {
+	      to->flags |= TCP_OPTS_FLAG_WSCALE;
+	      to->wscale = data[2];
+	      if (to->wscale > TCP_MAX_WND_SCALE)
+		{
+		  clib_warning ("Illegal window scaling value: %d",
+				to->wscale);
+		  to->wscale = TCP_MAX_WND_SCALE;
+		}
+	    }
+	  break;
+	case TCP_OPTION_TIMESTAMP:
+	  if (opt_len == TCP_OPTION_LEN_TIMESTAMP)
+	    {
+	      to->flags |= TCP_OPTS_FLAG_TSTAMP;
+	      to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2));
+	      to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6));
+	    }
+	  break;
+	case TCP_OPTION_SACK_PERMITTED:
+	  if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th))
+	    to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
+	  break;
+	case TCP_OPTION_SACK_BLOCK:
+	  /* If SACK permitted was not advertised or a SYN, break */
+	  if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th))
+	    break;
+
+	  /* If too short or not correctly formatted, break */
+	  if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK))
+	    break;
+
+	  to->flags |= TCP_OPTS_FLAG_SACK;
+	  to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK;
+	  vec_reset_length (to->sacks);
+	  for (j = 0; j < to->n_sack_blocks; j++)
+	    {
+	      b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 4 * j));
+	      b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 4 * j));
+	      vec_add1 (to->sacks, b);
+	    }
+	  break;
+	default:
+	  /* Nothing to see here */
+	  continue;
+	}
+    }
+}
+
+always_inline int
+tcp_segment_check_paws (tcp_connection_t * tc)
+{
+  /* XXX normally test for timestamp should be lt instead of leq, but for
+   * local testing this is not enough */
+  return tcp_opts_tstamp (&tc->opt) && tc->tsval_recent
+    && timestamp_lt (tc->opt.tsval, tc->tsval_recent);
+}
+
+/**
+ * Validate incoming segment as per RFC793 p. 69 and RFC1323 p. 19
+ *
+ * It first verifies if segment has a wrapped sequence number (PAWS) and then
+ * does the processing associated to the first four steps (ignoring security
+ * and precedence): sequence number, rst bit and syn bit checks.
+ *
+ * @return 0 if segments passes validation.
+ */
+static int
+tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0,
+		      vlib_buffer_t * b0, tcp_header_t * th0, u32 * next0)
+{
+  u8 paws_failed;
+
+  if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0)))
+    return -1;
+
+  tcp_options_parse (th0, &tc0->opt);
+
+  /* RFC1323: Check against wrapped sequence numbers (PAWS). If we have
+   * timestamp to echo and it's less than tsval_recent, drop segment
+   * but still send an ACK in order to retain TCP's mechanism for detecting
+   * and recovering from half-open connections */
+  paws_failed = tcp_segment_check_paws (tc0);
+  if (paws_failed)
+    {
+      clib_warning ("paws failed");
+
+      /* If it just so happens that a segment updates tsval_recent for a
+       * segment over 24 days old, invalidate tsval_recent. */
+      if (timestamp_lt (tc0->tsval_recent_age + TCP_PAWS_IDLE,
+			tcp_time_now ()))
+	{
+	  /* Age isn't reset until we get a valid tsval (bsd inspired) */
+	  tc0->tsval_recent = 0;
+	}
+      else
+	{
+	  /* Drop after ack if not rst */
+	  if (!tcp_rst (th0))
+	    {
+	      tcp_make_ack (tc0, b0);
+	      *next0 = tcp_next_output (tc0->c_is_ip4);
+	      return -1;
+	    }
+	}
+    }
+
+  /* 1st: check sequence number */
+  if (!tcp_segment_in_rcv_wnd (tc0, vnet_buffer (b0)->tcp.seq_number,
+			       vnet_buffer (b0)->tcp.seq_end))
+    {
+      if (!tcp_rst (th0))
+	{
+	  /* Send dup ack */
+	  tcp_make_ack (tc0, b0);
+	  *next0 = tcp_next_output (tc0->c_is_ip4);
+	}
+      return -1;
+    }
+
+  /* 2nd: check the RST bit */
+  if (tcp_rst (th0))
+    {
+      /* Notify session that connection has been reset. Switch
+       * state to closed and await for session to do the cleanup. */
+      stream_session_reset_notify (&tc0->connection);
+      tc0->state = TCP_STATE_CLOSED;
+      return -1;
+    }
+
+  /* 3rd: check security and precedence (skip) */
+
+  /* 4th: check the SYN bit */
+  if (tcp_syn (th0))
+    {
+      tcp_send_reset (b0, tc0->c_is_ip4);
+      return -1;
+    }
+
+  /* If PAWS passed and segment in window, save timestamp */
+  if (!paws_failed)
+    {
+      tc0->tsval_recent = tc0->opt.tsval;
+      tc0->tsval_recent_age = tcp_time_now ();
+    }
+
+  return 0;
+}
+
+always_inline int
+tcp_rcv_ack_is_acceptable (tcp_connection_t * tc0, vlib_buffer_t * tb0)
+{
+  /* SND.UNA =< SEG.ACK =< SND.NXT */
+  return (seq_leq (tc0->snd_una, vnet_buffer (tb0)->tcp.ack_number)
+	  && seq_leq (vnet_buffer (tb0)->tcp.ack_number, tc0->snd_nxt));
+}
+
+/**
+ * Compute smoothed RTT as per VJ's '88 SIGCOMM and RFC6298
+ *
+ * Note that although the original article, srtt and rttvar are scaled
+ * to minimize round-off errors, here we don't. Instead, we rely on
+ * better precision time measurements.
+ *
+ * TODO support us rtt resolution
+ */
+static void
+tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt)
+{
+  int err;
+
+  if (tc->srtt != 0)
+    {
+      err = mrtt - tc->srtt;
+      tc->srtt += err >> 3;
+
+      /* XXX Drop in RTT results in RTTVAR increase and bigger RTO.
+       * The increase should be bound */
+      tc->rttvar += (clib_abs (err) - tc->rttvar) >> 2;
+    }
+  else
+    {
+      /* First measurement. */
+      tc->srtt = mrtt;
+      tc->rttvar = mrtt << 1;
+    }
+}
+
+/** Update RTT estimate and RTO timer
+ *
+ * Measure RTT: We have two sources of RTT measurements: TSOPT and ACK
+ * timing. Middle boxes are known to fiddle with TCP options so we
+ * should give higher priority to ACK timing.
+ *
+ * return 1 if valid rtt 0 otherwise
+ */
+static int
+tcp_update_rtt (tcp_connection_t * tc, u32 ack)
+{
+  u32 mrtt = 0;
+
+  /* Karn's rule, part 1. Don't use retransmitted segments to estimate
+   * RTT because they're ambiguous. */
+  if (tc->rtt_seq && seq_gt (ack, tc->rtt_seq) && !tc->rto_boff)
+    {
+      mrtt = tcp_time_now () - tc->rtt_ts;
+      tc->rtt_seq = 0;
+    }
+
+  /* As per RFC7323 TSecr can be used for RTTM only if the segment advances
+   * snd_una, i.e., the left side of the send window:
+   * seq_lt (tc->snd_una, ack). Note: last condition could be dropped, we don't
+   * try to update rtt for dupacks */
+  else if (tcp_opts_tstamp (&tc->opt) && tc->opt.tsecr && tc->bytes_acked)
+    {
+      mrtt = tcp_time_now () - tc->opt.tsecr;
+    }
+
+  /* Ignore dubious measurements */
+  if (mrtt == 0 || mrtt > TCP_RTT_MAX)
+    return 0;
+
+  tcp_estimate_rtt (tc, mrtt);
+
+  tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX);
+
+  return 1;
+}
+
+/**
+ * Dequeue bytes that have been acked and while at it update RTT estimates.
+ */
+static void
+tcp_dequeue_acked (tcp_connection_t * tc, u32 ack)
+{
+  /* Dequeue the newly ACKed bytes */
+  stream_session_dequeue_drop (&tc->connection, tc->bytes_acked);
+
+  /* Update rtt and rto */
+  if (tcp_update_rtt (tc, ack))
+    {
+      /* Good ACK received and valid RTT, make sure retransmit backoff is 0 */
+      tc->rto_boff = 0;
+    }
+}
+
+/** Check if dupack as per RFC5681 Sec. 2 */
+always_inline u8
+tcp_ack_is_dupack (tcp_connection_t * tc, vlib_buffer_t * b, u32 new_snd_wnd)
+{
+  return ((vnet_buffer (b)->tcp.ack_number == tc->snd_una)
+	  && seq_gt (tc->snd_una_max, tc->snd_una)
+	  && (vnet_buffer (b)->tcp.seq_end == vnet_buffer (b)->tcp.seq_number)
+	  && (new_snd_wnd == tc->snd_wnd));
+}
+
+void
+scoreboard_remove_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
+{
+  sack_scoreboard_hole_t *next, *prev;
+
+  if (hole->next != TCP_INVALID_SACK_HOLE_INDEX)
+    {
+      next = pool_elt_at_index (sb->holes, hole->next);
+      next->prev = hole->prev;
+    }
+
+  if (hole->prev != TCP_INVALID_SACK_HOLE_INDEX)
+    {
+      prev = pool_elt_at_index (sb->holes, hole->prev);
+      prev->next = hole->next;
+    }
+  else
+    {
+      sb->head = hole->next;
+    }
+
+  pool_put (sb->holes, hole);
+}
+
+sack_scoreboard_hole_t *
+scoreboard_insert_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * prev,
+			u32 start, u32 end)
+{
+  sack_scoreboard_hole_t *hole, *next;
+  u32 hole_index;
+
+  pool_get (sb->holes, hole);
+  memset (hole, 0, sizeof (*hole));
+
+  hole->start = start;
+  hole->end = end;
+  hole_index = hole - sb->holes;
+
+  if (prev)
+    {
+      hole->prev = prev - sb->holes;
+      hole->next = prev->next;
+
+      if ((next = scoreboard_next_hole (sb, hole)))
+	next->prev = hole_index;
+
+      prev->next = hole_index;
+    }
+  else
+    {
+      sb->head = hole_index;
+      hole->prev = TCP_INVALID_SACK_HOLE_INDEX;
+      hole->next = TCP_INVALID_SACK_HOLE_INDEX;
+    }
+
+  return hole;
+}
+
+static void
+tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
+{
+  sack_scoreboard_t *sb = &tc->sack_sb;
+  sack_block_t *blk, tmp;
+  sack_scoreboard_hole_t *hole, *next_hole;
+  u32 blk_index = 0;
+  int i, j;
+
+  if (!tcp_opts_sack (tc) && sb->head == TCP_INVALID_SACK_HOLE_INDEX)
+    return;
+
+  /* Remove invalid blocks */
+  vec_foreach (blk, tc->opt.sacks)
+  {
+    if (seq_lt (blk->start, blk->end)
+	&& seq_gt (blk->start, tc->snd_una)
+	&& seq_gt (blk->start, ack) && seq_lt (blk->end, tc->snd_nxt))
+      continue;
+
+    vec_del1 (tc->opt.sacks, blk - tc->opt.sacks);
+  }
+
+  /* Add block for cumulative ack */
+  if (seq_gt (ack, tc->snd_una))
+    {
+      tmp.start = tc->snd_una;
+      tmp.end = ack;
+      vec_add1 (tc->opt.sacks, tmp);
+    }
+
+  if (vec_len (tc->opt.sacks) == 0)
+    return;
+
+  /* Make sure blocks are ordered */
+  for (i = 0; i < vec_len (tc->opt.sacks); i++)
+    for (j = i; j < vec_len (tc->opt.sacks); j++)
+      if (seq_lt (tc->opt.sacks[j].start, tc->opt.sacks[i].start))
+	{
+	  tmp = tc->opt.sacks[i];
+	  tc->opt.sacks[i] = tc->opt.sacks[j];
+	  tc->opt.sacks[j] = tmp;
+	}
+
+  /* If no holes, insert the first that covers all outstanding bytes */
+  if (sb->head == TCP_INVALID_SACK_HOLE_INDEX)
+    {
+      scoreboard_insert_hole (sb, 0, tc->snd_una, tc->snd_una_max);
+    }
+
+  /* Walk the holes with the SACK blocks */
+  hole = pool_elt_at_index (sb->holes, sb->head);
+  while (hole && blk_index < vec_len (tc->opt.sacks))
+    {
+      blk = &tc->opt.sacks[blk_index];
+
+      if (seq_leq (blk->start, hole->start))
+	{
+	  /* Block covers hole. Remove hole */
+	  if (seq_geq (blk->end, hole->end))
+	    {
+	      next_hole = scoreboard_next_hole (sb, hole);
+
+	      /* Byte accounting */
+	      if (seq_lt (hole->end, ack))
+		{
+		  /* Bytes lost because snd wnd left edge advances */
+		  if (seq_lt (next_hole->start, ack))
+		    sb->sacked_bytes -= next_hole->start - hole->end;
+		  else
+		    sb->sacked_bytes -= ack - hole->end;
+		}
+	      else
+		{
+		  sb->sacked_bytes += scoreboard_hole_bytes (hole);
+		}
+
+	      scoreboard_remove_hole (sb, hole);
+	      hole = next_hole;
+	    }
+	  /* Partial overlap */
+	  else
+	    {
+	      sb->sacked_bytes += blk->end - hole->start;
+	      hole->start = blk->end;
+	      blk_index++;
+	    }
+	}
+      else
+	{
+	  /* Hole must be split */
+	  if (seq_leq (blk->end, hole->end))
+	    {
+	      sb->sacked_bytes += blk->end - blk->start;
+	      scoreboard_insert_hole (sb, hole, blk->end, hole->end);
+	      hole->end = blk->start - 1;
+	      blk_index++;
+	    }
+	  else
+	    {
+	      sb->sacked_bytes += hole->end - blk->start + 1;
+	      hole->end = blk->start - 1;
+	      hole = scoreboard_next_hole (sb, hole);
+	    }
+	}
+    }
+}
+
+/** Update snd_wnd
+ *
+ * If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and SND.WL2 =< SEG.ACK)), set
+ * SND.WND <- SEG.WND, set SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK */
+static void
+tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd)
+{
+  if (tc->snd_wl1 < seq || (tc->snd_wl1 == seq && tc->snd_wl2 <= ack))
+    {
+      tc->snd_wnd = snd_wnd;
+      tc->snd_wl1 = seq;
+      tc->snd_wl2 = ack;
+    }
+}
+
+static void
+tcp_cc_congestion (tcp_connection_t * tc)
+{
+  tc->cc_algo->congestion (tc);
+}
+
+static void
+tcp_cc_recover (tcp_connection_t * tc)
+{
+  if (tcp_in_fastrecovery (tc))
+    {
+      tc->cc_algo->recovered (tc);
+      tcp_recovery_off (tc);
+    }
+  else if (tcp_in_recovery (tc))
+    {
+      tcp_recovery_off (tc);
+      tc->cwnd = tcp_loss_wnd (tc);
+    }
+}
+
+static void
+tcp_cc_rcv_ack (tcp_connection_t * tc)
+{
+  u8 partial_ack;
+
+  if (tcp_in_recovery (tc))
+    {
+      partial_ack = seq_lt (tc->snd_una, tc->snd_una_max);
+      if (!partial_ack)
+	{
+	  /* Clear retransmitted bytes. */
+	  tc->rtx_bytes = 0;
+	  tcp_cc_recover (tc);
+	}
+      else
+	{
+	  /* Clear retransmitted bytes. XXX should we clear all? */
+	  tc->rtx_bytes = 0;
+	  tc->cc_algo->rcv_cong_ack (tc, TCP_CC_PARTIALACK);
+
+	  /* Retransmit first unacked segment */
+	  tcp_retransmit_first_unacked (tc);
+	}
+    }
+  else
+    {
+      tc->cc_algo->rcv_ack (tc);
+    }
+
+  tc->rcv_dupacks = 0;
+  tc->tsecr_last_ack = tc->opt.tsecr;
+}
+
+static void
+tcp_cc_rcv_dupack (tcp_connection_t * tc, u32 ack)
+{
+  ASSERT (tc->snd_una == ack);
+
+  tc->rcv_dupacks++;
+  if (tc->rcv_dupacks == TCP_DUPACK_THRESHOLD)
+    {
+      /* RFC6582 NewReno heuristic to avoid multiple fast retransmits */
+      if (tc->opt.tsecr != tc->tsecr_last_ack)
+	{
+	  tc->rcv_dupacks = 0;
+	  return;
+	}
+
+      tcp_fastrecovery_on (tc);
+
+      /* Handle congestion and dupack */
+      tcp_cc_congestion (tc);
+      tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+
+      tcp_fast_retransmit (tc);
+
+      /* Post retransmit update cwnd to ssthresh and account for the
+       * three segments that have left the network and should've been
+       * buffered at the receiver */
+      tc->cwnd = tc->ssthresh + TCP_DUPACK_THRESHOLD * tc->snd_mss;
+    }
+  else if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD)
+    {
+      ASSERT (tcp_in_fastrecovery (tc));
+
+      tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+    }
+}
+
+void
+tcp_cc_init (tcp_connection_t * tc)
+{
+  tc->cc_algo = tcp_cc_algo_get (TCP_CC_NEWRENO);
+  tc->cc_algo->init (tc);
+}
+
+static int
+tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b,
+	     tcp_header_t * th, u32 * next, u32 * error)
+{
+  u32 new_snd_wnd;
+
+  /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) then send an
+   * ACK, drop the segment, and return  */
+  if (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt))
+    {
+      tcp_make_ack (tc, b);
+      *next = tcp_next_output (tc->c_is_ip4);
+      *error = TCP_ERROR_ACK_INVALID;
+      return -1;
+    }
+
+  /* If old ACK, discard */
+  if (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
+    {
+      *error = TCP_ERROR_ACK_OLD;
+      return -1;
+    }
+
+  if (tcp_opts_sack_permitted (&tc->opt))
+    tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number);
+
+  new_snd_wnd = clib_net_to_host_u32 (th->window) << tc->snd_wscale;
+
+  if (tcp_ack_is_dupack (tc, b, new_snd_wnd))
+    {
+      tcp_cc_rcv_dupack (tc, vnet_buffer (b)->tcp.ack_number);
+      *error = TCP_ERROR_ACK_DUP;
+      return -1;
+    }
+
+  /* Valid ACK */
+  tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
+  tc->snd_una = vnet_buffer (b)->tcp.ack_number;
+
+  /* Dequeue ACKed packet and update RTT */
+  tcp_dequeue_acked (tc, vnet_buffer (b)->tcp.ack_number);
+
+  tcp_update_snd_wnd (tc, vnet_buffer (b)->tcp.seq_number,
+		      vnet_buffer (b)->tcp.ack_number, new_snd_wnd);
+
+  /* Updates congestion control (slow start/congestion avoidance) */
+  tcp_cc_rcv_ack (tc);
+
+  /* If everything has been acked, stop retransmit timer
+   * otherwise update */
+  if (tc->snd_una == tc->snd_una_max)
+    tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT);
+  else
+    tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, tc->rto);
+
+  return 0;
+}
+
+/**
+ * Build SACK list as per RFC2018.
+ *
+ * Makes sure the first block contains the segment that generated the current
+ * ACK and the following ones are the ones most recently reported in SACK
+ * blocks.
+ *
+ * @param tc TCP connection for which the SACK list is updated
+ * @param start Start sequence number of the newest SACK block
+ * @param end End sequence of the newest SACK block
+ */
+static void
+tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end)
+{
+  sack_block_t *new_list = 0, block;
+  u32 n_elts;
+  int i;
+  u8 new_head = 0;
+
+  /* If the first segment is ooo add it to the list. Last write might've moved
+   * rcv_nxt over the first segment. */
+  if (seq_lt (tc->rcv_nxt, start))
+    {
+      block.start = start;
+      block.end = end;
+      vec_add1 (new_list, block);
+      new_head = 1;
+    }
+
+  /* Find the blocks still worth keeping. */
+  for (i = 0; i < vec_len (tc->snd_sacks); i++)
+    {
+      /* Discard if:
+       * 1) rcv_nxt advanced beyond current block OR
+       * 2) Segment overlapped by the first segment, i.e., it has been merged
+       *    into it.*/
+      if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt)
+	  || seq_leq (tc->snd_sacks[i].start, end))
+	continue;
+
+      /* Save subsequent segments to new SACK list. */
+      n_elts = clib_min (vec_len (tc->snd_sacks) - i,
+			 TCP_MAX_SACK_BLOCKS - new_head);
+      vec_insert_elts (new_list, &tc->snd_sacks[i], n_elts, new_head);
+      break;
+    }
+
+  /* Replace old vector with new one */
+  vec_free (tc->snd_sacks);
+  tc->snd_sacks = new_list;
+}
+
+/** Enqueue data for delivery to application */
+always_inline u32
+tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
+			  u16 data_len)
+{
+  int written;
+
+  /* Pure ACK. Update rcv_nxt and be done. */
+  if (PREDICT_FALSE (data_len == 0))
+    {
+      tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end;
+      return TCP_ERROR_PURE_ACK;
+    }
+
+  written = stream_session_enqueue_data (&tc->connection,
+					 vlib_buffer_get_current (b),
+					 data_len, 1 /* queue event */ );
+
+  /* Update rcv_nxt */
+  if (PREDICT_TRUE (written == data_len))
+    {
+      tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end;
+    }
+  /* If more data written than expected, account for out-of-order bytes. */
+  else if (written > data_len)
+    {
+      tc->rcv_nxt = vnet_buffer (b)->tcp.seq_end + written - data_len;
+
+      /* Send ACK confirming the update */
+      tc->flags |= TCP_CONN_SNDACK;
+
+      /* Update SACK list if need be */
+      if (tcp_opts_sack_permitted (&tc->opt))
+	{
+	  /* Remove SACK blocks that have been delivered */
+	  tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt);
+	}
+    }
+  else
+    {
+      ASSERT (0);
+      return TCP_ERROR_FIFO_FULL;
+    }
+
+  return TCP_ERROR_ENQUEUED;
+}
+
+/** Enqueue out-of-order data */
+always_inline u32
+tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
+			 u16 data_len)
+{
+  stream_session_t *s0;
+  u32 offset, seq;
+
+  s0 = stream_session_get (tc->c_s_index, tc->c_thread_index);
+  seq = vnet_buffer (b)->tcp.seq_number;
+  offset = seq - tc->rcv_nxt;
+
+  if (svm_fifo_enqueue_with_offset (s0->server_rx_fifo, s0->pid, offset,
+				    data_len, vlib_buffer_get_current (b)))
+    return TCP_ERROR_FIFO_FULL;
+
+  /* Update SACK list if in use */
+  if (tcp_opts_sack_permitted (&tc->opt))
+    {
+      ooo_segment_t *newest;
+      u32 start, end;
+
+      /* Get the newest segment from the fifo */
+      newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo);
+      start = tc->rcv_nxt + ooo_segment_offset (s0->server_rx_fifo, newest);
+      end = tc->rcv_nxt + ooo_segment_end_offset (s0->server_rx_fifo, newest);
+
+      tcp_update_sack_list (tc, start, end);
+    }
+
+  return TCP_ERROR_ENQUEUED;
+}
+
+/**
+ * Check if ACK could be delayed. DELACK timer is set only after frame is
+ * processed so this can return true for a full bursts of packets.
+ */
+always_inline int
+tcp_can_delack (tcp_connection_t * tc)
+{
+  /* If there's no DELACK timer set and the last window sent wasn't 0 we
+   * can safely delay. */
+  if (!tcp_timer_is_active (tc, TCP_TIMER_DELACK)
+      && (tc->flags & TCP_CONN_SENT_RCV_WND0) == 0
+      && (tc->flags & TCP_CONN_SNDACK) == 0)
+    return 1;
+
+  return 0;
+}
+
+static int
+tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b,
+		 u16 n_data_bytes, u32 * next0)
+{
+  u32 error = 0;
+
+  /* Handle out-of-order data */
+  if (PREDICT_FALSE (vnet_buffer (b)->tcp.seq_number != tc->rcv_nxt))
+    {
+      error = tcp_session_enqueue_ooo (tc, b, n_data_bytes);
+
+      /* Don't send more than 3 dupacks per burst
+       * XXX decide if this is good */
+      if (tc->snt_dupacks < 3)
+	{
+	  /* RFC2581: Send DUPACK for fast retransmit */
+	  tcp_make_ack (tc, b);
+	  *next0 = tcp_next_output (tc->c_is_ip4);
+
+	  /* Mark as DUPACK. We may filter these in output if
+	   * the burst fills the holes. */
+	  vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_DUPACK;
+
+	  tc->snt_dupacks++;
+	}
+
+      goto done;
+    }
+
+  /* In order data, enqueue. Fifo figures out by itself if any out-of-order
+   * segments can be enqueued after fifo tail offset changes. */
+  error = tcp_session_enqueue_data (tc, b, n_data_bytes);
+
+  /* Check if ACK can be delayed */
+  if (tcp_can_delack (tc))
+    {
+      /* Nothing to do for pure ACKs */
+      if (n_data_bytes == 0)
+	goto done;
+
+      /* If connection has not been previously marked for delay ack
+       * add it to the list and flag it */
+      if (!tc->flags & TCP_CONN_DELACK)
+	{
+	  vec_add1 (tm->delack_connections[tc->c_thread_index],
+		    tc->c_c_index);
+	  tc->flags |= TCP_CONN_DELACK;
+	}
+    }
+  else
+    {
+      /* Check if a packet has already been enqueued to output for burst.
+       * If yes, then drop this one, otherwise, let it pass through to
+       * output */
+      if ((tc->flags & TCP_CONN_BURSTACK) == 0)
+	{
+	  *next0 = tcp_next_output (tc->c_is_ip4);
+	  tcp_make_ack (tc, b);
+	  error = TCP_ERROR_ENQUEUED;
+
+	  /* TODO: maybe add counter to ensure N acks will be sent/burst */
+	  tc->flags |= TCP_CONN_BURSTACK;
+	}
+    }
+
+done:
+  return error;
+}
+
+void
+delack_timers_init (tcp_main_t * tm, u32 thread_index)
+{
+  tcp_connection_t *tc;
+  u32 i, *conns;
+  tw_timer_wheel_16t_2w_512sl_t *tw;
+
+  tw = &tm->timer_wheels[thread_index];
+  conns = tm->delack_connections[thread_index];
+  for (i = 0; i < vec_len (conns); i++)
+    {
+      tc = pool_elt_at_index (tm->connections[thread_index], conns[i]);
+      ASSERT (0 != tc);
+
+      tc->timers[TCP_TIMER_DELACK]
+	= tw_timer_start_16t_2w_512sl (tw, conns[i],
+				       TCP_TIMER_DELACK, TCP_DELACK_TIME);
+    }
+  vec_reset_length (tm->delack_connections[thread_index]);
+}
+
+always_inline uword
+tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+			  vlib_frame_t * from_frame, int is_ip4)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->cpu_index, errors = 0;
+  tcp_main_t *tm = vnet_get_tcp_main ();
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  tcp_header_t *th0 = 0;
+	  tcp_connection_t *tc0;
+	  ip4_header_t *ip40;
+	  ip6_header_t *ip60;
+	  u32 n_advance_bytes0, n_data_bytes0;
+	  u32 next0 = TCP_ESTABLISHED_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
+				    my_thread_index);
+
+	  /* Checksum computed by ipx_local no need to compute again */
+
+	  if (is_ip4)
+	    {
+	      ip40 = vlib_buffer_get_current (b0);
+	      th0 = ip4_next_header (ip40);
+	      n_advance_bytes0 = (ip4_header_bytes (ip40)
+				  + tcp_header_bytes (th0));
+	      n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
+		- n_advance_bytes0;
+	    }
+	  else
+	    {
+	      ip60 = vlib_buffer_get_current (b0);
+	      th0 = ip6_next_header (ip60);
+	      n_advance_bytes0 = tcp_header_bytes (th0);
+	      n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length)
+		- n_advance_bytes0;
+	      n_advance_bytes0 += sizeof (ip60[0]);
+	    }
+
+	  /* SYNs, FINs and data consume sequence numbers */
+	  vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
+	    + tcp_is_syn (th0) + tcp_is_fin (th0) + n_data_bytes0;
+
+	  /* TODO header prediction fast path */
+
+	  /* 1-4: check SEQ, RST, SYN */
+	  if (PREDICT_FALSE (tcp_segment_validate (vm, tc0, b0, th0, &next0)))
+	    {
+	      error0 = TCP_ERROR_SEGMENT_INVALID;
+	      goto drop;
+	    }
+
+	  /* 5: check the ACK field  */
+	  if (tcp_rcv_ack (tc0, b0, th0, &next0, &error0))
+	    {
+	      goto drop;
+	    }
+
+	  /* 6: check the URG bit TODO */
+
+	  /* 7: process the segment text */
+	  vlib_buffer_advance (b0, n_advance_bytes0);
+	  error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0);
+
+	  /* 8: check the FIN bit */
+	  if (tcp_fin (th0))
+	    {
+	      /* Send ACK and enter CLOSE-WAIT */
+	      tcp_make_ack (tc0, b0);
+	      tcp_connection_force_ack (tc0, b0);
+	      next0 = tcp_next_output (tc0->c_is_ip4);
+	      tc0->state = TCP_STATE_CLOSE_WAIT;
+	      stream_session_disconnect_notify (&tc0->connection);
+	    }
+
+	drop:
+	  b0->error = node->errors[error0];
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+
+	    }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  errors = session_manager_flush_enqueue_events (my_thread_index);
+  if (errors)
+    {
+      if (is_ip4)
+	vlib_node_increment_counter (vm, tcp4_established_node.index,
+				     TCP_ERROR_EVENT_FIFO_FULL, errors);
+      else
+	vlib_node_increment_counter (vm, tcp6_established_node.index,
+				     TCP_ERROR_EVENT_FIFO_FULL, errors);
+    }
+
+  delack_timers_init (tm, my_thread_index);
+
+  return from_frame->n_vectors;
+}
+
+static uword
+tcp4_established (vlib_main_t * vm, vlib_node_runtime_t * node,
+		  vlib_frame_t * from_frame)
+{
+  return tcp46_established_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_established (vlib_main_t * vm, vlib_node_runtime_t * node,
+		  vlib_frame_t * from_frame)
+{
+  return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_established_node) =
+{
+  .function = tcp4_established,
+  .name = "tcp4-established",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,.error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
+    foreach_tcp_state_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_established_node, tcp4_established);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_established_node) =
+{
+  .function = tcp6_established,
+  .name = "tcp6-established",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
+    foreach_tcp_state_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_established_node, tcp6_established);
+
+vlib_node_registration_t tcp4_syn_sent_node;
+vlib_node_registration_t tcp6_syn_sent_node;
+
+always_inline uword
+tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+		       vlib_frame_t * from_frame, int is_ip4)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0, ack0, seq0;
+	  vlib_buffer_t *b0;
+	  tcp_header_t *tcp0 = 0;
+	  tcp_connection_t *tc0;
+	  ip4_header_t *ip40;
+	  ip6_header_t *ip60;
+	  u32 n_advance_bytes0, n_data_bytes0;
+	  tcp_connection_t *new_tc0;
+	  u32 next0 = TCP_SYN_SENT_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  tc0 =
+	    tcp_half_open_connection_get (vnet_buffer (b0)->
+					  tcp.connection_index);
+
+	  ack0 = vnet_buffer (b0)->tcp.ack_number;
+	  seq0 = vnet_buffer (b0)->tcp.seq_number;
+
+	  /* Checksum computed by ipx_local no need to compute again */
+
+	  if (is_ip4)
+	    {
+	      ip40 = vlib_buffer_get_current (b0);
+	      tcp0 = ip4_next_header (ip40);
+	      n_advance_bytes0 = (ip4_header_bytes (ip40)
+				  + tcp_header_bytes (tcp0));
+	      n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
+		- n_advance_bytes0;
+	    }
+	  else
+	    {
+	      ip60 = vlib_buffer_get_current (b0);
+	      tcp0 = ip6_next_header (ip60);
+	      n_advance_bytes0 = tcp_header_bytes (tcp0);
+	      n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length)
+		- n_advance_bytes0;
+	      n_advance_bytes0 += sizeof (ip60[0]);
+	    }
+
+	  if (PREDICT_FALSE
+	      (!tcp_ack (tcp0) && !tcp_rst (tcp0) && !tcp_syn (tcp0)))
+	    goto drop;
+
+	  /* SYNs, FINs and data consume sequence numbers */
+	  vnet_buffer (b0)->tcp.seq_end = seq0 + tcp_is_syn (tcp0)
+	    + tcp_is_fin (tcp0) + n_data_bytes0;
+
+	  /*
+	   *  1. check the ACK bit
+	   */
+
+	  /*
+	   *   If the ACK bit is set
+	   *     If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless
+	   *     the RST bit is set, if so drop the segment and return)
+	   *       <SEQ=SEG.ACK><CTL=RST>
+	   *     and discard the segment.  Return.
+	   *     If SND.UNA =< SEG.ACK =< SND.NXT then the ACK is acceptable.
+	   */
+	  if (tcp_ack (tcp0))
+	    {
+	      if (ack0 <= tc0->iss || ack0 > tc0->snd_nxt)
+		{
+		  if (!tcp_rst (tcp0))
+		    tcp_send_reset (b0, is_ip4);
+
+		  goto drop;
+		}
+
+	      /* Make sure ACK is valid */
+	      if (tc0->snd_una > ack0)
+		goto drop;
+	    }
+
+	  /*
+	   * 2. check the RST bit
+	   */
+
+	  if (tcp_rst (tcp0))
+	    {
+	      /* If ACK is acceptable, signal client that peer is not
+	       * willing to accept connection and drop connection*/
+	      if (tcp_ack (tcp0))
+		{
+		  stream_session_connect_notify (&tc0->connection, sst,
+						 1 /* fail */ );
+		  tcp_connection_cleanup (tc0);
+		}
+	      goto drop;
+	    }
+
+	  /*
+	   * 3. check the security and precedence (skipped)
+	   */
+
+	  /*
+	   * 4. check the SYN bit
+	   */
+
+	  /* No SYN flag. Drop. */
+	  if (!tcp_syn (tcp0))
+	    goto drop;
+
+	  /* Stop connection establishment and retransmit timers */
+	  tcp_timer_reset (tc0, TCP_TIMER_ESTABLISH);
+	  tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN);
+
+	  /* Valid SYN or SYN-ACK. Move connection from half-open pool to
+	   * current thread pool. */
+	  pool_get (tm->connections[my_thread_index], new_tc0);
+	  clib_memcpy (new_tc0, tc0, sizeof (*new_tc0));
+
+	  new_tc0->c_thread_index = my_thread_index;
+
+	  /* Cleanup half-open connection XXX lock */
+	  pool_put (tm->half_open_connections, tc0);
+
+	  new_tc0->rcv_nxt = vnet_buffer (b0)->tcp.seq_end;
+	  new_tc0->irs = seq0;
+
+	  /* Parse options */
+	  tcp_options_parse (tcp0, &new_tc0->opt);
+	  tcp_connection_init_vars (new_tc0);
+
+	  if (tcp_opts_tstamp (&new_tc0->opt))
+	    {
+	      new_tc0->tsval_recent = new_tc0->opt.tsval;
+	      new_tc0->tsval_recent_age = tcp_time_now ();
+	    }
+
+	  if (tcp_opts_wscale (&new_tc0->opt))
+	    new_tc0->snd_wscale = new_tc0->opt.wscale;
+
+	  new_tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window)
+	    << new_tc0->snd_wscale;
+	  new_tc0->snd_wl1 = seq0;
+	  new_tc0->snd_wl2 = ack0;
+
+	  /* SYN-ACK: See if we can switch to ESTABLISHED state */
+	  if (tcp_ack (tcp0))
+	    {
+	      /* Our SYN is ACKed: we have iss < ack = snd_una */
+
+	      /* TODO Dequeue acknowledged segments if we support Fast Open */
+	      new_tc0->snd_una = ack0;
+	      new_tc0->state = TCP_STATE_ESTABLISHED;
+
+	      /* Notify app that we have connection */
+	      stream_session_connect_notify (&new_tc0->connection, sst, 0);
+
+	      /* Make sure after data segment processing ACK is sent */
+	      new_tc0->flags |= TCP_CONN_SNDACK;
+	    }
+	  /* SYN: Simultaneous open. Change state to SYN-RCVD and send SYN-ACK */
+	  else
+	    {
+	      new_tc0->state = TCP_STATE_SYN_RCVD;
+
+	      /* Notify app that we have connection XXX */
+	      stream_session_connect_notify (&new_tc0->connection, sst, 0);
+
+	      tcp_make_synack (new_tc0, b0);
+	      next0 = tcp_next_output (is_ip4);
+
+	      goto drop;
+	    }
+
+	  /* Read data, if any */
+	  if (n_data_bytes0)
+	    {
+	      error0 =
+		tcp_segment_rcv (tm, new_tc0, b0, n_data_bytes0, &next0);
+	      if (error0 == TCP_ERROR_PURE_ACK)
+		error0 = TCP_ERROR_SYN_ACKS_RCVD;
+	    }
+	  else
+	    {
+	      tcp_make_ack (new_tc0, b0);
+	      next0 = tcp_next_output (new_tc0->c_is_ip4);
+	    }
+
+	drop:
+
+	  b0->error = error0 ? node->errors[error0] : 0;
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+
+	    }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  errors = session_manager_flush_enqueue_events (my_thread_index);
+  if (errors)
+    {
+      if (is_ip4)
+	vlib_node_increment_counter (vm, tcp4_established_node.index,
+				     TCP_ERROR_EVENT_FIFO_FULL, errors);
+      else
+	vlib_node_increment_counter (vm, tcp6_established_node.index,
+				     TCP_ERROR_EVENT_FIFO_FULL, errors);
+    }
+
+  return from_frame->n_vectors;
+}
+
+static uword
+tcp4_syn_sent (vlib_main_t * vm, vlib_node_runtime_t * node,
+	       vlib_frame_t * from_frame)
+{
+  return tcp46_syn_sent_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_syn_sent_rcv (vlib_main_t * vm, vlib_node_runtime_t * node,
+		   vlib_frame_t * from_frame)
+{
+  return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_syn_sent_node) =
+{
+  .function = tcp4_syn_sent,
+  .name = "tcp4-syn-sent",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_SYN_SENT_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
+    foreach_tcp_state_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_syn_sent_node, tcp4_syn_sent);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
+{
+  .function = tcp6_syn_sent_rcv,
+  .name = "tcp6-syn-sent",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_SYN_SENT_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
+    foreach_tcp_state_next
+#undef _
+  }
+,};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv);
+/**
+ * Handles reception for all states except LISTEN, SYN-SEND and ESTABLISHED
+ * as per RFC793 p. 64
+ */
+always_inline uword
+tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+			  vlib_frame_t * from_frame, int is_ip4)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->cpu_index, errors = 0;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  tcp_header_t *tcp0 = 0;
+	  tcp_connection_t *tc0;
+	  ip4_header_t *ip40;
+	  ip6_header_t *ip60;
+	  u32 n_advance_bytes0, n_data_bytes0;
+	  u32 next0 = TCP_RCV_PROCESS_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
+				    my_thread_index);
+
+	  /* Checksum computed by ipx_local no need to compute again */
+
+	  if (is_ip4)
+	    {
+	      ip40 = vlib_buffer_get_current (b0);
+	      tcp0 = ip4_next_header (ip40);
+	      n_advance_bytes0 = (ip4_header_bytes (ip40)
+				  + tcp_header_bytes (tcp0));
+	      n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
+		- n_advance_bytes0;
+	    }
+	  else
+	    {
+	      ip60 = vlib_buffer_get_current (b0);
+	      tcp0 = ip6_next_header (ip60);
+	      n_advance_bytes0 = tcp_header_bytes (tcp0);
+	      n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length)
+		- n_advance_bytes0;
+	      n_advance_bytes0 += sizeof (ip60[0]);
+	    }
+
+	  /* SYNs, FINs and data consume sequence numbers */
+	  vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
+	    + tcp_is_syn (tcp0) + tcp_is_fin (tcp0) + n_data_bytes0;
+
+	  /*
+	   * Special treatment for CLOSED
+	   */
+	  switch (tc0->state)
+	    {
+	    case TCP_STATE_CLOSED:
+	      goto drop;
+	      break;
+	    }
+
+	  /*
+	   * For all other states (except LISTEN)
+	   */
+
+	  /* 1-4: check SEQ, RST, SYN */
+	  if (PREDICT_FALSE
+	      (tcp_segment_validate (vm, tc0, b0, tcp0, &next0)))
+	    {
+	      error0 = TCP_ERROR_SEGMENT_INVALID;
+	      goto drop;
+	    }
+
+	  /* 5: check the ACK field  */
+	  switch (tc0->state)
+	    {
+	    case TCP_STATE_SYN_RCVD:
+	      /*
+	       * If the segment acknowledgment is not acceptable, form a
+	       * reset segment,
+	       *  <SEQ=SEG.ACK><CTL=RST>
+	       * and send it.
+	       */
+	      if (!tcp_rcv_ack_is_acceptable (tc0, b0))
+		{
+		  tcp_send_reset (b0, is_ip4);
+		  goto drop;
+		}
+	      /* Switch state to ESTABLISHED */
+	      tc0->state = TCP_STATE_ESTABLISHED;
+
+	      /* Initialize session variables */
+	      tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
+	      tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window)
+		<< tc0->opt.wscale;
+	      tc0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
+	      tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
+
+	      /* Shoulder tap the server */
+	      stream_session_accept_notify (&tc0->connection);
+
+	      tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN);
+	      break;
+	    case TCP_STATE_ESTABLISHED:
+	      /* We can get packets in established state here because they
+	       * were enqueued before state change */
+	      if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+		goto drop;
+
+	      break;
+	    case TCP_STATE_FIN_WAIT_1:
+	      /* In addition to the processing for the ESTABLISHED state, if
+	       * our FIN is now acknowledged then enter FIN-WAIT-2 and
+	       * continue processing in that state. */
+	      if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+		goto drop;
+	      tc0->state = TCP_STATE_FIN_WAIT_2;
+	      /* Stop all timers, 2MSL will be set lower */
+	      tcp_connection_timers_reset (tc0);
+	      break;
+	    case TCP_STATE_FIN_WAIT_2:
+	      /* In addition to the processing for the ESTABLISHED state, if
+	       * the retransmission queue is empty, the user's CLOSE can be
+	       * acknowledged ("ok") but do not delete the TCB. */
+	      if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+		goto drop;
+	      /* check if rtx queue is empty and ack CLOSE TODO */
+	      break;
+	    case TCP_STATE_CLOSE_WAIT:
+	      /* Do the same processing as for the ESTABLISHED state. */
+	      if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+		goto drop;
+	      break;
+	    case TCP_STATE_CLOSING:
+	      /* In addition to the processing for the ESTABLISHED state, if
+	       * the ACK acknowledges our FIN then enter the TIME-WAIT state,
+	       * otherwise ignore the segment. */
+	      if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+		goto drop;
+
+	      /* XXX test that send queue empty */
+	      tc0->state = TCP_STATE_TIME_WAIT;
+	      goto drop;
+
+	      break;
+	    case TCP_STATE_LAST_ACK:
+	      /* The only thing that can arrive in this state is an
+	       * acknowledgment of our FIN. If our FIN is now acknowledged,
+	       * delete the TCB, enter the CLOSED state, and return. */
+
+	      if (!tcp_rcv_ack_is_acceptable (tc0, b0))
+		goto drop;
+
+	      tcp_connection_del (tc0);
+	      goto drop;
+
+	      break;
+	    case TCP_STATE_TIME_WAIT:
+	      /* The only thing that can arrive in this state is a
+	       * retransmission of the remote FIN. Acknowledge it, and restart
+	       * the 2 MSL timeout. */
+
+	      /* TODO */
+	      goto drop;
+	      break;
+	    default:
+	      ASSERT (0);
+	    }
+
+	  /* 6: check the URG bit TODO */
+
+	  /* 7: process the segment text */
+	  switch (tc0->state)
+	    {
+	    case TCP_STATE_ESTABLISHED:
+	    case TCP_STATE_FIN_WAIT_1:
+	    case TCP_STATE_FIN_WAIT_2:
+	      error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0);
+	      break;
+	    case TCP_STATE_CLOSE_WAIT:
+	    case TCP_STATE_CLOSING:
+	    case TCP_STATE_LAST_ACK:
+	    case TCP_STATE_TIME_WAIT:
+	      /* This should not occur, since a FIN has been received from the
+	       * remote side.  Ignore the segment text. */
+	      break;
+	    }
+
+	  /* 8: check the FIN bit */
+	  if (!tcp_fin (tcp0))
+	    goto drop;
+
+	  switch (tc0->state)
+	    {
+	    case TCP_STATE_ESTABLISHED:
+	    case TCP_STATE_SYN_RCVD:
+	      /* Send FIN-ACK notify app and enter CLOSE-WAIT */
+	      tcp_connection_timers_reset (tc0);
+	      tcp_make_finack (tc0, b0);
+	      next0 = tcp_next_output (tc0->c_is_ip4);
+	      stream_session_disconnect_notify (&tc0->connection);
+	      tc0->state = TCP_STATE_CLOSE_WAIT;
+	      break;
+	    case TCP_STATE_CLOSE_WAIT:
+	    case TCP_STATE_CLOSING:
+	    case TCP_STATE_LAST_ACK:
+	      /* move along .. */
+	      break;
+	    case TCP_STATE_FIN_WAIT_1:
+	      tc0->state = TCP_STATE_TIME_WAIT;
+	      tcp_connection_timers_reset (tc0);
+	      tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME);
+	      break;
+	    case TCP_STATE_FIN_WAIT_2:
+	      /* Got FIN, send ACK! */
+	      tc0->state = TCP_STATE_TIME_WAIT;
+	      tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME);
+	      tcp_make_ack (tc0, b0);
+	      next0 = tcp_next_output (is_ip4);
+	      break;
+	    case TCP_STATE_TIME_WAIT:
+	      /* Remain in the TIME-WAIT state. Restart the 2 MSL time-wait
+	       * timeout.
+	       */
+	      tcp_timer_update (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME);
+	      break;
+	    }
+
+	  b0->error = error0 ? node->errors[error0] : 0;
+
+	drop:
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+
+	    }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  errors = session_manager_flush_enqueue_events (my_thread_index);
+  if (errors)
+    {
+      if (is_ip4)
+	vlib_node_increment_counter (vm, tcp4_established_node.index,
+				     TCP_ERROR_EVENT_FIFO_FULL, errors);
+      else
+	vlib_node_increment_counter (vm, tcp6_established_node.index,
+				     TCP_ERROR_EVENT_FIFO_FULL, errors);
+    }
+
+  return from_frame->n_vectors;
+}
+
+static uword
+tcp4_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node,
+		  vlib_frame_t * from_frame)
+{
+  return tcp46_rcv_process_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_rcv_process (vlib_main_t * vm, vlib_node_runtime_t * node,
+		  vlib_frame_t * from_frame)
+{
+  return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_rcv_process_node) =
+{
+  .function = tcp4_rcv_process,
+  .name = "tcp4-rcv-process",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
+    foreach_tcp_state_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_rcv_process_node, tcp4_rcv_process);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_rcv_process_node) =
+{
+  .function = tcp6_rcv_process,
+  .name = "tcp6-rcv-process",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
+    foreach_tcp_state_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_rcv_process_node, tcp6_rcv_process);
+
+vlib_node_registration_t tcp4_listen_node;
+vlib_node_registration_t tcp6_listen_node;
+
+/**
+ * LISTEN state processing as per RFC 793 p. 65
+ */
+always_inline uword
+tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+		     vlib_frame_t * from_frame, int is_ip4)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->cpu_index;
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  tcp_header_t *th0 = 0;
+	  tcp_connection_t *lc0;
+	  ip4_header_t *ip40;
+	  ip6_header_t *ip60;
+	  tcp_connection_t *child0;
+	  u32 error0 = TCP_ERROR_SYNS_RCVD, next0 = TCP_LISTEN_NEXT_DROP;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  lc0 = tcp_listener_get (vnet_buffer (b0)->tcp.connection_index);
+
+	  if (is_ip4)
+	    {
+	      ip40 = vlib_buffer_get_current (b0);
+	      th0 = ip4_next_header (ip40);
+	    }
+	  else
+	    {
+	      ip60 = vlib_buffer_get_current (b0);
+	      th0 = ip6_next_header (ip60);
+	    }
+
+	  /* Create child session. For syn-flood protection use filter */
+
+	  /* 1. first check for an RST */
+	  if (tcp_rst (th0))
+	    goto drop;
+
+	  /* 2. second check for an ACK */
+	  if (tcp_ack (th0))
+	    {
+	      tcp_send_reset (b0, is_ip4);
+	      goto drop;
+	    }
+
+	  /* 3. check for a SYN (did that already) */
+
+	  /* Create child session and send SYN-ACK */
+	  pool_get (tm->connections[my_thread_index], child0);
+	  memset (child0, 0, sizeof (*child0));
+
+	  child0->c_c_index = child0 - tm->connections[my_thread_index];
+	  child0->c_lcl_port = lc0->c_lcl_port;
+	  child0->c_rmt_port = th0->src_port;
+	  child0->c_is_ip4 = is_ip4;
+	  child0->c_thread_index = my_thread_index;
+
+	  if (is_ip4)
+	    {
+	      child0->c_lcl_ip4.as_u32 = ip40->dst_address.as_u32;
+	      child0->c_rmt_ip4.as_u32 = ip40->src_address.as_u32;
+	    }
+	  else
+	    {
+	      clib_memcpy (&child0->c_lcl_ip6, &ip60->dst_address,
+			   sizeof (ip6_address_t));
+	      clib_memcpy (&child0->c_rmt_ip6, &ip60->src_address,
+			   sizeof (ip6_address_t));
+	    }
+
+	  if (stream_session_accept (&child0->connection, lc0->c_s_index, sst,
+				     0 /* notify */ ))
+	    {
+	      error0 = TCP_ERROR_CREATE_SESSION_FAIL;
+	      goto drop;
+	    }
+
+	  tcp_options_parse (th0, &child0->opt);
+	  tcp_connection_init_vars (child0);
+
+	  child0->irs = vnet_buffer (b0)->tcp.seq_number;
+	  child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1;
+	  child0->state = TCP_STATE_SYN_RCVD;
+
+	  /* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK}
+	   * segments are used to initialize PAWS. */
+	  if (tcp_opts_tstamp (&child0->opt))
+	    {
+	      child0->tsval_recent = child0->opt.tsval;
+	      child0->tsval_recent_age = tcp_time_now ();
+	    }
+
+	  /* Reuse buffer to make syn-ack and send */
+	  tcp_make_synack (child0, b0);
+	  next0 = tcp_next_output (is_ip4);
+
+	drop:
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+
+	    }
+
+	  b0->error = error0 ? node->errors[error0] : 0;
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+  return from_frame->n_vectors;
+}
+
+static uword
+tcp4_listen (vlib_main_t * vm, vlib_node_runtime_t * node,
+	     vlib_frame_t * from_frame)
+{
+  return tcp46_listen_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_listen (vlib_main_t * vm, vlib_node_runtime_t * node,
+	     vlib_frame_t * from_frame)
+{
+  return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_listen_node) =
+{
+  .function = tcp4_listen,
+  .name = "tcp4-listen",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_LISTEN_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
+    foreach_tcp_state_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_listen_node, tcp4_listen);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_listen_node) =
+{
+  .function = tcp6_listen,
+  .name = "tcp6-listen",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_LISTEN_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
+    foreach_tcp_state_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_listen_node, tcp6_listen);
+
+vlib_node_registration_t tcp4_input_node;
+vlib_node_registration_t tcp6_input_node;
+
+typedef enum _tcp_input_next
+{
+  TCP_INPUT_NEXT_DROP,
+  TCP_INPUT_NEXT_LISTEN,
+  TCP_INPUT_NEXT_RCV_PROCESS,
+  TCP_INPUT_NEXT_SYN_SENT,
+  TCP_INPUT_NEXT_ESTABLISHED,
+  TCP_INPUT_NEXT_RESET,
+  TCP_INPUT_N_NEXT
+} tcp_input_next_t;
+
+#define foreach_tcp4_input_next                 \
+  _ (DROP, "error-drop")                        \
+  _ (LISTEN, "tcp4-listen")                     \
+  _ (RCV_PROCESS, "tcp4-rcv-process")           \
+  _ (SYN_SENT, "tcp4-syn-sent")                 \
+  _ (ESTABLISHED, "tcp4-established")		\
+  _ (RESET, "tcp4-reset")
+
+#define foreach_tcp6_input_next                 \
+  _ (DROP, "error-drop")                        \
+  _ (LISTEN, "tcp6-listen")                     \
+  _ (RCV_PROCESS, "tcp6-rcv-process")           \
+  _ (SYN_SENT, "tcp6-syn-sent")                 \
+  _ (ESTABLISHED, "tcp6-established")		\
+  _ (RESET, "tcp6-reset")
+
+typedef struct
+{
+  u16 src_port;
+  u16 dst_port;
+  u8 state;
+} tcp_rx_trace_t;
+
+const char *tcp_fsm_states[] = {
+#define _(sym, str) str,
+  foreach_tcp_fsm_state
+#undef _
+};
+
+u8 *
+format_tcp_state (u8 * s, va_list * args)
+{
+  tcp_state_t *state = va_arg (*args, tcp_state_t *);
+
+  if (state[0] < TCP_N_STATES)
+    s = format (s, "%s", tcp_fsm_states[state[0]]);
+  else
+    s = format (s, "UNKNOWN");
+
+  return s;
+}
+
+u8 *
+format_tcp_rx_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
+
+  s = format (s, "TCP: src-port %d dst-port %U%s\n",
+	      clib_net_to_host_u16 (t->src_port),
+	      clib_net_to_host_u16 (t->dst_port), format_tcp_state, t->state);
+
+  return s;
+}
+
+#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
+
+always_inline uword
+tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+		    vlib_frame_t * from_frame, int is_ip4)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->cpu_index;
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  session_manager_main_t *ssm = vnet_get_session_manager_main ();
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  tcp_header_t *tcp0 = 0;
+	  tcp_connection_t *tc0;
+	  ip4_header_t *ip40;
+	  ip6_header_t *ip60;
+	  u32 error0 = TCP_ERROR_NO_LISTENER, next0 = TCP_INPUT_NEXT_DROP;
+	  u8 flags0;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  if (is_ip4)
+	    {
+	      ip40 = vlib_buffer_get_current (b0);
+	      tcp0 = ip4_next_header (ip40);
+
+	      /* lookup session */
+	      tc0 =
+		(tcp_connection_t *) stream_session_lookup_transport4 (ssm,
+								       &ip40->dst_address,
+								       &ip40->src_address,
+								       tcp0->dst_port,
+								       tcp0->src_port,
+								       SESSION_TYPE_IP4_TCP,
+								       my_thread_index);
+	    }
+	  else
+	    {
+	      ip60 = vlib_buffer_get_current (b0);
+	      tcp0 = ip6_next_header (ip60);
+	      tc0 =
+		(tcp_connection_t *) stream_session_lookup_transport6 (ssm,
+								       &ip60->src_address,
+								       &ip60->dst_address,
+								       tcp0->src_port,
+								       tcp0->dst_port,
+								       SESSION_TYPE_IP6_TCP,
+								       my_thread_index);
+	    }
+
+	  /* Session exists */
+	  if (PREDICT_TRUE (0 != tc0))
+	    {
+	      /* Save connection index */
+	      vnet_buffer (b0)->tcp.connection_index = tc0->c_c_index;
+	      vnet_buffer (b0)->tcp.seq_number =
+		clib_net_to_host_u32 (tcp0->seq_number);
+	      vnet_buffer (b0)->tcp.ack_number =
+		clib_net_to_host_u32 (tcp0->ack_number);
+
+	      flags0 = tcp0->flags & filter_flags;
+	      next0 = tm->dispatch_table[tc0->state][flags0].next;
+	      error0 = tm->dispatch_table[tc0->state][flags0].error;
+
+	      if (PREDICT_FALSE (error0 == TCP_ERROR_DISPATCH))
+		{
+		  /* Overload tcp flags to store state */
+		  vnet_buffer (b0)->tcp.flags = tc0->state;
+		}
+	    }
+	  else
+	    {
+	      /* Send reset */
+	      next0 = TCP_INPUT_NEXT_RESET;
+	      error0 = TCP_ERROR_NO_LISTENER;
+	      vnet_buffer (b0)->tcp.flags = 0;
+	    }
+
+	  b0->error = error0 ? node->errors[error0] : 0;
+
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+
+	    }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return from_frame->n_vectors;
+}
+
+static uword
+tcp4_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+	    vlib_frame_t * from_frame)
+{
+  return tcp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+	    vlib_frame_t * from_frame)
+{
+  return tcp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_input_node) =
+{
+  .function = tcp4_input,
+  .name = "tcp4-input",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_INPUT_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_INPUT_NEXT_##s] = n,
+    foreach_tcp4_input_next
+#undef _
+  },
+  .format_buffer = format_tcp_header,
+  .format_trace = format_tcp_rx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_input_node, tcp4_input);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_input_node) =
+{
+  .function = tcp6_input,
+  .name = "tcp6-input",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_INPUT_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [TCP_INPUT_NEXT_##s] = n,
+    foreach_tcp6_input_next
+#undef _
+  },
+  .format_buffer = format_tcp_header,
+  .format_trace = format_tcp_rx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_input_node, tcp6_input);
+void
+tcp_update_time (f64 now, u32 thread_index)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  tw_timer_expire_timers_16t_2w_512sl (&tm->timer_wheels[thread_index], now);
+}
+
+static void
+tcp_dispatch_table_init (tcp_main_t * tm)
+{
+  int i, j;
+  for (i = 0; i < ARRAY_LEN (tm->dispatch_table); i++)
+    for (j = 0; j < ARRAY_LEN (tm->dispatch_table[i]); j++)
+      {
+	tm->dispatch_table[i][j].next = TCP_INPUT_NEXT_DROP;
+	tm->dispatch_table[i][j].error = TCP_ERROR_DISPATCH;
+      }
+
+#define _(t,f,n,e)                                           	\
+do {                                                       	\
+    tm->dispatch_table[TCP_STATE_##t][f].next = (n);         	\
+    tm->dispatch_table[TCP_STATE_##t][f].error = (e);        	\
+} while (0)
+
+  /* SYNs for new connections -> tcp-listen. */
+  _(LISTEN, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
+  /* ACK for for a SYN-ACK -> tcp-rcv-process. */
+  _(SYN_RCVD, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  /* SYN-ACK for a SYN */
+  _(SYN_SENT, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
+    TCP_ERROR_NONE);
+  _(SYN_SENT, TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE);
+  _(SYN_SENT, TCP_FLAG_RST, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE);
+  _(SYN_SENT, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
+    TCP_ERROR_NONE);
+  /* ACK for for established connection -> tcp-established. */
+  _(ESTABLISHED, TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
+  /* FIN for for established connection -> tcp-established. */
+  _(ESTABLISHED, TCP_FLAG_FIN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
+  _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
+    TCP_ERROR_NONE);
+  /* ACK or FIN-ACK to our FIN */
+  _(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  _(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS,
+    TCP_ERROR_NONE);
+  /* FIN in reply to our FIN from the other side */
+  _(FIN_WAIT_1, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  /* FIN confirming that the peer (app) has closed */
+  _(FIN_WAIT_2, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  _(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+    TCP_ERROR_NONE);
+  _(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+#undef _
+}
+
+clib_error_t *
+tcp_input_init (vlib_main_t * vm)
+{
+  clib_error_t *error = 0;
+  tcp_main_t *tm = vnet_get_tcp_main ();
+
+  if ((error = vlib_call_init_function (vm, tcp_init)))
+    return error;
+
+  /* Initialize dispatch table. */
+  tcp_dispatch_table_init (tm);
+
+  return error;
+}
+
+VLIB_INIT_FUNCTION (tcp_input_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c
new file mode 100644
index 00000000..856dffe4
--- /dev/null
+++ b/src/vnet/tcp/tcp_newreno.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/tcp/tcp.h>
+
+void
+newreno_congestion (tcp_connection_t * tc)
+{
+  tc->prev_ssthresh = tc->ssthresh;
+  tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
+}
+
+void
+newreno_recovered (tcp_connection_t * tc)
+{
+  tc->cwnd = tc->ssthresh;
+}
+
+void
+newreno_rcv_ack (tcp_connection_t * tc)
+{
+  if (tcp_in_slowstart (tc))
+    {
+      tc->cwnd += clib_min (tc->snd_mss, tc->bytes_acked);
+    }
+  else
+    {
+      /* Round up to 1 if needed */
+      tc->cwnd += clib_max (tc->snd_mss * tc->snd_mss / tc->cwnd, 1);
+    }
+}
+
+void
+newreno_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type)
+{
+  if (ack_type == TCP_CC_DUPACK)
+    {
+      tc->cwnd += tc->snd_mss;
+    }
+  else if (ack_type == TCP_CC_PARTIALACK)
+    {
+      tc->cwnd -= tc->bytes_acked;
+      if (tc->bytes_acked > tc->snd_mss)
+	tc->bytes_acked += tc->snd_mss;
+    }
+}
+
+void
+newreno_conn_init (tcp_connection_t * tc)
+{
+  tc->ssthresh = tc->snd_wnd;
+  tc->cwnd = tcp_initial_cwnd (tc);
+}
+
+const static tcp_cc_algorithm_t tcp_newreno = {
+  .congestion = newreno_congestion,
+  .recovered = newreno_recovered,
+  .rcv_ack = newreno_rcv_ack,
+  .rcv_cong_ack = newreno_rcv_cong_ack,
+  .init = newreno_conn_init
+};
+
+clib_error_t *
+newreno_init (vlib_main_t * vm)
+{
+  clib_error_t *error = 0;
+
+  tcp_cc_algo_register (TCP_CC_NEWRENO, &tcp_newreno);
+
+  return error;
+}
+
+VLIB_INIT_FUNCTION (newreno_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
new file mode 100644
index 00000000..dbcf1f74
--- /dev/null
+++ b/src/vnet/tcp/tcp_output.c
@@ -0,0 +1,1412 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/tcp/tcp.h>
+#include <vnet/lisp-cp/packets.h>
+
+vlib_node_registration_t tcp4_output_node;
+vlib_node_registration_t tcp6_output_node;
+
+typedef enum _tcp_output_nect
+{
+  TCP_OUTPUT_NEXT_DROP,
+  TCP_OUTPUT_NEXT_IP_LOOKUP,
+  TCP_OUTPUT_N_NEXT
+} tcp_output_next_t;
+
+#define foreach_tcp4_output_next              	\
+  _ (DROP, "error-drop")                        \
+  _ (IP_LOOKUP, "ip4-lookup")
+
+#define foreach_tcp6_output_next              	\
+  _ (DROP, "error-drop")                        \
+  _ (IP_LOOKUP, "ip6-lookup")
+
+static char *tcp_error_strings[] = {
+#define tcp_error(n,s) s,
+#include <vnet/tcp/tcp_error.def>
+#undef tcp_error
+};
+
+typedef struct
+{
+  u16 src_port;
+  u16 dst_port;
+  u8 state;
+} tcp_tx_trace_t;
+
+u16 dummy_mtu = 400;
+
+u8 *
+format_tcp_tx_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+
+  s = format (s, "TBD\n");
+
+  return s;
+}
+
+void
+tcp_set_snd_mss (tcp_connection_t * tc)
+{
+  u16 snd_mss;
+
+  /* TODO find our iface MTU */
+  snd_mss = dummy_mtu;
+
+  /* TODO cache mss and consider PMTU discovery */
+  snd_mss = tc->opt.mss < snd_mss ? tc->opt.mss : snd_mss;
+
+  tc->snd_mss = snd_mss;
+
+  if (tc->snd_mss == 0)
+    {
+      clib_warning ("snd mss is 0");
+      tc->snd_mss = dummy_mtu;
+    }
+}
+
+static u8
+tcp_window_compute_scale (u32 available_space)
+{
+  u8 wnd_scale = 0;
+  while (wnd_scale < TCP_MAX_WND_SCALE
+	 && (available_space >> wnd_scale) > TCP_WND_MAX)
+    wnd_scale++;
+  return wnd_scale;
+}
+
+/**
+ * Compute initial window and scale factor. As per RFC1323, window field in
+ * SYN and SYN-ACK segments is never scaled.
+ */
+u32
+tcp_initial_window_to_advertise (tcp_connection_t * tc)
+{
+  u32 available_space;
+
+  /* Initial wnd for SYN. Fifos are not allocated yet.
+   * Use some predefined value */
+  if (tc->state != TCP_STATE_SYN_RCVD)
+    {
+      return TCP_DEFAULT_RX_FIFO_SIZE;
+    }
+
+  available_space = stream_session_max_enqueue (&tc->connection);
+  tc->rcv_wscale = tcp_window_compute_scale (available_space);
+  tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
+
+  return clib_min (tc->rcv_wnd, TCP_WND_MAX);
+}
+
+/**
+ * Compute and return window to advertise, scaled as per RFC1323
+ */
+u32
+tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
+{
+  u32 available_space, wnd, scaled_space;
+
+  if (state != TCP_STATE_ESTABLISHED)
+    return tcp_initial_window_to_advertise (tc);
+
+  available_space = stream_session_max_enqueue (&tc->connection);
+  scaled_space = available_space >> tc->rcv_wscale;
+
+  /* Need to update scale */
+  if (PREDICT_FALSE ((scaled_space == 0 && available_space != 0))
+      || (scaled_space >= TCP_WND_MAX))
+    tc->rcv_wscale = tcp_window_compute_scale (available_space);
+
+  wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
+  tc->rcv_wnd = wnd;
+
+  return wnd >> tc->rcv_wscale;
+}
+
+/**
+ * Write TCP options to segment.
+ */
+u32
+tcp_options_write (u8 * data, tcp_options_t * opts)
+{
+  u32 opts_len = 0;
+  u32 buf, seq_len = 4;
+
+  if (tcp_opts_mss (opts))
+    {
+      *data++ = TCP_OPTION_MSS;
+      *data++ = TCP_OPTION_LEN_MSS;
+      buf = clib_host_to_net_u16 (opts->mss);
+      clib_memcpy (data, &buf, sizeof (opts->mss));
+      data += sizeof (opts->mss);
+      opts_len += TCP_OPTION_LEN_MSS;
+    }
+
+  if (tcp_opts_wscale (opts))
+    {
+      *data++ = TCP_OPTION_WINDOW_SCALE;
+      *data++ = TCP_OPTION_LEN_WINDOW_SCALE;
+      *data++ = opts->wscale;
+      opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
+    }
+
+  if (tcp_opts_sack_permitted (opts))
+    {
+      *data++ = TCP_OPTION_SACK_PERMITTED;
+      *data++ = TCP_OPTION_LEN_SACK_PERMITTED;
+      opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
+    }
+
+  if (tcp_opts_tstamp (opts))
+    {
+      *data++ = TCP_OPTION_TIMESTAMP;
+      *data++ = TCP_OPTION_LEN_TIMESTAMP;
+      buf = clib_host_to_net_u32 (opts->tsval);
+      clib_memcpy (data, &buf, sizeof (opts->tsval));
+      data += sizeof (opts->tsval);
+      buf = clib_host_to_net_u32 (opts->tsecr);
+      clib_memcpy (data, &buf, sizeof (opts->tsecr));
+      data += sizeof (opts->tsecr);
+      opts_len += TCP_OPTION_LEN_TIMESTAMP;
+    }
+
+  if (tcp_opts_sack (opts))
+    {
+      int i;
+      u32 n_sack_blocks = clib_min (vec_len (opts->sacks),
+				    TCP_OPTS_MAX_SACK_BLOCKS);
+
+      if (n_sack_blocks != 0)
+	{
+	  *data++ = TCP_OPTION_SACK_BLOCK;
+	  *data++ = 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
+	  for (i = 0; i < n_sack_blocks; i++)
+	    {
+	      buf = clib_host_to_net_u32 (opts->sacks[i].start);
+	      clib_memcpy (data, &buf, seq_len);
+	      data += seq_len;
+	      buf = clib_host_to_net_u32 (opts->sacks[i].end);
+	      clib_memcpy (data, &buf, seq_len);
+	      data += seq_len;
+	    }
+	  opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
+	}
+    }
+
+  /* Terminate TCP options */
+  if (opts_len % 4)
+    {
+      *data++ = TCP_OPTION_EOL;
+      opts_len += TCP_OPTION_LEN_EOL;
+    }
+
+  /* Pad with zeroes to a u32 boundary */
+  while (opts_len % 4)
+    {
+      *data++ = TCP_OPTION_NOOP;
+      opts_len += TCP_OPTION_LEN_NOOP;
+    }
+  return opts_len;
+}
+
+always_inline int
+tcp_make_syn_options (tcp_options_t * opts, u32 initial_wnd)
+{
+  u8 len = 0;
+
+  opts->flags |= TCP_OPTS_FLAG_MSS;
+  opts->mss = dummy_mtu;	/*XXX discover that */
+  len += TCP_OPTION_LEN_MSS;
+
+  opts->flags |= TCP_OPTS_FLAG_WSCALE;
+  opts->wscale = tcp_window_compute_scale (initial_wnd);
+  len += TCP_OPTION_LEN_WINDOW_SCALE;
+
+  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
+  opts->tsval = tcp_time_now ();
+  opts->tsecr = 0;
+  len += TCP_OPTION_LEN_TIMESTAMP;
+
+  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
+  len += TCP_OPTION_LEN_SACK_PERMITTED;
+
+  /* Align to needed boundary */
+  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
+  return len;
+}
+
+always_inline int
+tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts)
+{
+  u8 len = 0;
+
+  opts->flags |= TCP_OPTS_FLAG_MSS;
+  opts->mss = dummy_mtu;	/*XXX discover that */
+  len += TCP_OPTION_LEN_MSS;
+
+  if (tcp_opts_wscale (&tc->opt))
+    {
+      opts->flags |= TCP_OPTS_FLAG_WSCALE;
+      opts->wscale = tc->rcv_wscale;
+      len += TCP_OPTION_LEN_WINDOW_SCALE;
+    }
+
+  if (tcp_opts_tstamp (&tc->opt))
+    {
+      opts->flags |= TCP_OPTS_FLAG_TSTAMP;
+      opts->tsval = tcp_time_now ();
+      opts->tsecr = tc->tsval_recent;
+      len += TCP_OPTION_LEN_TIMESTAMP;
+    }
+
+  if (tcp_opts_sack_permitted (&tc->opt))
+    {
+      opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
+      len += TCP_OPTION_LEN_SACK_PERMITTED;
+    }
+
+  /* Align to needed boundary */
+  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
+  return len;
+}
+
+always_inline int
+tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts)
+{
+  u8 len = 0;
+
+  opts->flags = 0;
+
+  if (tcp_opts_tstamp (&tc->opt))
+    {
+      opts->flags |= TCP_OPTS_FLAG_TSTAMP;
+      opts->tsval = tcp_time_now ();
+      opts->tsecr = tc->tsval_recent;
+      len += TCP_OPTION_LEN_TIMESTAMP;
+    }
+  if (tcp_opts_sack_permitted (&tc->opt))
+    {
+      if (vec_len (tc->snd_sacks))
+	{
+	  opts->flags |= TCP_OPTS_FLAG_SACK;
+	  opts->sacks = tc->snd_sacks;
+	  opts->n_sack_blocks = vec_len (tc->snd_sacks);
+	  len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
+	}
+    }
+
+  /* Align to needed boundary */
+  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
+  return len;
+}
+
+always_inline int
+tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
+		  tcp_state_t state)
+{
+  switch (state)
+    {
+    case TCP_STATE_ESTABLISHED:
+    case TCP_STATE_FIN_WAIT_1:
+      return tcp_make_established_options (tc, opts);
+    case TCP_STATE_SYN_RCVD:
+      return tcp_make_synack_options (tc, opts);
+    case TCP_STATE_SYN_SENT:
+      return tcp_make_syn_options (opts,
+				   tcp_initial_window_to_advertise (tc));
+    default:
+      clib_warning ("Not handled!");
+      return 0;
+    }
+}
+
+#define tcp_get_free_buffer_index(tm, bidx)                             \
+do {                                                                    \
+  u32 *my_tx_buffers, n_free_buffers;                                   \
+  u32 cpu_index = tm->vlib_main->cpu_index;                             \
+  my_tx_buffers = tm->tx_buffers[cpu_index];                            \
+  if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0))                      \
+    {                                                                   \
+      n_free_buffers = 32;      /* TODO config or macro */              \
+      vec_validate (my_tx_buffers, n_free_buffers - 1);                 \
+      _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list (      \
+          tm->vlib_main, my_tx_buffers, n_free_buffers,                 \
+          VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);                         \
+      tm->tx_buffers[cpu_index] = my_tx_buffers;                        \
+    }                                                                   \
+  /* buffer shortage */                                                 \
+  if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0))                     \
+    return;                                                             \
+  *bidx = my_tx_buffers[_vec_len (my_tx_buffers)-1];                    \
+  _vec_len (my_tx_buffers) -= 1;                                        \
+} while (0)
+
+always_inline void
+tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
+{
+  vlib_buffer_t *it = b;
+  do
+    {
+      it->current_data = 0;
+      it->current_length = 0;
+      it->total_length_not_including_first_buffer = 0;
+    }
+  while ((it->flags & VLIB_BUFFER_NEXT_PRESENT)
+	 && (it = vlib_get_buffer (vm, it->next_buffer)));
+
+  /* Leave enough space for headers */
+  vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+}
+
+/**
+ * Prepare ACK
+ */
+void
+tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
+		u8 flags)
+{
+  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
+  u8 tcp_opts_len, tcp_hdr_opts_len;
+  tcp_header_t *th;
+  u16 wnd;
+
+  wnd = tcp_window_to_advertise (tc, state);
+
+  /* Make and write options */
+  tcp_opts_len = tcp_make_established_options (tc, snd_opts);
+  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
+
+  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
+			     tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
+
+  tcp_options_write ((u8 *) (th + 1), snd_opts);
+
+  /* Mark as ACK */
+  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+}
+
+/**
+ * Convert buffer to ACK
+ */
+void
+tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = tm->vlib_main;
+
+  tcp_reuse_buffer (vm, b);
+  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
+  vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
+}
+
+/**
+ * Convert buffer to FIN-ACK
+ */
+void
+tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = tm->vlib_main;
+
+  tcp_reuse_buffer (vm, b);
+  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK | TCP_FLAG_FIN);
+
+  /* Reset flags, make sure ack is sent */
+  tc->flags = TCP_CONN_SNDACK;
+  vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
+
+  tc->snd_nxt += 1;
+}
+
+/**
+ * Convert buffer to SYN-ACK
+ */
+void
+tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = tm->vlib_main;
+  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
+  u8 tcp_opts_len, tcp_hdr_opts_len;
+  tcp_header_t *th;
+  u16 initial_wnd;
+  u32 time_now;
+
+  memset (snd_opts, 0, sizeof (*snd_opts));
+
+  tcp_reuse_buffer (vm, b);
+
+  /* Set random initial sequence */
+  time_now = tcp_time_now ();
+
+  tc->iss = random_u32 (&time_now);
+  tc->snd_una = tc->iss;
+  tc->snd_nxt = tc->iss + 1;
+  tc->snd_una_max = tc->snd_nxt;
+
+  initial_wnd = tcp_initial_window_to_advertise (tc);
+
+  /* Make and write options */
+  tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
+  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
+
+  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
+			     tc->rcv_nxt, tcp_hdr_opts_len,
+			     TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
+
+  tcp_options_write ((u8 *) (th + 1), snd_opts);
+
+  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+  vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
+
+  /* Init retransmit timer */
+  tcp_retransmit_timer_set (tm, tc);
+}
+
+always_inline void
+tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+			  u8 is_ip4)
+{
+  u32 *to_next, next_index;
+  vlib_frame_t *f;
+
+  b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+  b->error = 0;
+
+  /* Default FIB for now */
+  vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+
+  /* Send to IP lookup */
+  next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
+  f = vlib_get_frame_to_node (vm, next_index);
+
+  /* Enqueue the packet */
+  to_next = vlib_frame_vector_args (f);
+  to_next[0] = bi;
+  f->n_vectors = 1;
+  vlib_put_frame_to_node (vm, next_index, f);
+}
+
+int
+tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b0,
+			 tcp_state_t state, u32 my_thread_index, u8 is_ip4)
+{
+  u8 tcp_hdr_len = sizeof (tcp_header_t);
+  ip4_header_t *ih4;
+  ip6_header_t *ih6;
+  tcp_header_t *th0;
+  ip4_address_t src_ip40;
+  ip6_address_t src_ip60;
+  u16 src_port0;
+  u32 tmp;
+
+  /* Find IP and TCP headers */
+  if (is_ip4)
+    {
+      ih4 = vlib_buffer_get_current (b0);
+      th0 = ip4_next_header (ih4);
+    }
+  else
+    {
+      ih6 = vlib_buffer_get_current (b0);
+      th0 = ip6_next_header (ih6);
+    }
+
+  /* Swap src and dst ip */
+  if (is_ip4)
+    {
+      ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
+      src_ip40.as_u32 = ih4->src_address.as_u32;
+      ih4->src_address.as_u32 = ih4->dst_address.as_u32;
+      ih4->dst_address.as_u32 = src_ip40.as_u32;
+
+      /* Chop the end of the pkt */
+      b0->current_length += ip4_header_bytes (ih4) + tcp_hdr_len;
+    }
+  else
+    {
+      ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
+      clib_memcpy (&src_ip60, &ih6->src_address, sizeof (ip6_address_t));
+      clib_memcpy (&ih6->src_address, &ih6->dst_address,
+		   sizeof (ip6_address_t));
+      clib_memcpy (&ih6->dst_address, &src_ip60, sizeof (ip6_address_t));
+
+      /* Chop the end of the pkt */
+      b0->current_length += sizeof (ip6_header_t) + tcp_hdr_len;
+    }
+
+  /* Try to determine what/why we're actually resetting and swap
+   * src and dst ports */
+  if (state == TCP_STATE_CLOSED)
+    {
+      if (!tcp_syn (th0))
+	return -1;
+
+      tmp = clib_net_to_host_u32 (th0->seq_number);
+
+      /* Got a SYN for no listener. */
+      th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK;
+      th0->ack_number = clib_host_to_net_u32 (tmp + 1);
+      th0->seq_number = 0;
+
+    }
+  else if (state >= TCP_STATE_SYN_SENT)
+    {
+      th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK;
+      th0->seq_number = th0->ack_number;
+      th0->ack_number = 0;
+    }
+
+  src_port0 = th0->src_port;
+  th0->src_port = th0->dst_port;
+  th0->dst_port = src_port0;
+  th0->window = 0;
+  th0->data_offset_and_reserved = (tcp_hdr_len >> 2) << 4;
+  th0->urgent_pointer = 0;
+
+  /* Compute checksum */
+  if (is_ip4)
+    {
+      th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4);
+    }
+  else
+    {
+      int bogus = ~0;
+      th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus);
+      ASSERT (!bogus);
+    }
+
+  return 0;
+}
+
+/**
+ *  Send reset without reusing existing buffer
+ */
+void
+tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4)
+{
+  vlib_buffer_t *b;
+  u32 bi;
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = tm->vlib_main;
+  u8 tcp_hdr_len, flags = 0;
+  tcp_header_t *th, *pkt_th;
+  u32 seq, ack;
+  ip4_header_t *ih4, *pkt_ih4;
+  ip6_header_t *ih6, *pkt_ih6;
+
+  tcp_get_free_buffer_index (tm, &bi);
+  b = vlib_get_buffer (vm, bi);
+
+  /* Leave enough space for headers */
+  vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+
+  /* Make and write options */
+  tcp_hdr_len = sizeof (tcp_header_t);
+
+  if (is_ip4)
+    {
+      pkt_ih4 = vlib_buffer_get_current (pkt);
+      pkt_th = ip4_next_header (pkt_ih4);
+    }
+  else
+    {
+      pkt_ih6 = vlib_buffer_get_current (pkt);
+      pkt_th = ip6_next_header (pkt_ih6);
+    }
+
+  if (tcp_ack (pkt_th))
+    {
+      flags = TCP_FLAG_RST;
+      seq = pkt_th->ack_number;
+      ack = 0;
+    }
+  else
+    {
+      flags = TCP_FLAG_RST | TCP_FLAG_ACK;
+      seq = 0;
+      ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
+    }
+
+  th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
+				       seq, ack, tcp_hdr_len, flags, 0);
+
+  /* Swap src and dst ip */
+  if (is_ip4)
+    {
+      ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
+      ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
+				  &pkt_ih4->src_address, IP_PROTOCOL_TCP);
+      th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
+    }
+  else
+    {
+      int bogus = ~0;
+      pkt_ih6 = (ip6_header_t *) (pkt_th - 1);
+      ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
+	      0x60);
+      ih6 =
+	vlib_buffer_push_ip6 (vm, b, &pkt_ih6->dst_address,
+			      &pkt_ih6->src_address, IP_PROTOCOL_TCP);
+      th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
+      ASSERT (!bogus);
+    }
+
+  tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4);
+}
+
+void
+tcp_push_ip_hdr (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b)
+{
+  tcp_header_t *th = vlib_buffer_get_current (b);
+
+  if (tc->c_is_ip4)
+    {
+      ip4_header_t *ih;
+      ih = vlib_buffer_push_ip4 (tm->vlib_main, b, &tc->c_lcl_ip4,
+				 &tc->c_rmt_ip4, IP_PROTOCOL_TCP);
+      th->checksum = ip4_tcp_udp_compute_checksum (tm->vlib_main, b, ih);
+    }
+  else
+    {
+      ip6_header_t *ih;
+      int bogus = ~0;
+
+      ih = vlib_buffer_push_ip6 (tm->vlib_main, b, &tc->c_lcl_ip6,
+				 &tc->c_rmt_ip6, IP_PROTOCOL_TCP);
+      th->checksum = ip6_tcp_udp_icmp_compute_checksum (tm->vlib_main, b, ih,
+							&bogus);
+      ASSERT (!bogus);
+    }
+}
+
+/**
+ *  Send SYN
+ *
+ *  Builds a SYN packet for a half-open connection and sends it to ipx_lookup.
+ *  The packet is not forwarded through tcpx_output to avoid doing lookups
+ *  in the half_open pool.
+ */
+void
+tcp_send_syn (tcp_connection_t * tc)
+{
+  vlib_buffer_t *b;
+  u32 bi;
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = tm->vlib_main;
+  u8 tcp_hdr_opts_len, tcp_opts_len;
+  tcp_header_t *th;
+  u32 time_now;
+  u16 initial_wnd;
+  tcp_options_t snd_opts;
+
+  tcp_get_free_buffer_index (tm, &bi);
+  b = vlib_get_buffer (vm, bi);
+
+  /* Leave enough space for headers */
+  vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+
+  /* Set random initial sequence */
+  time_now = tcp_time_now ();
+
+  tc->iss = random_u32 (&time_now);
+  tc->snd_una = tc->iss;
+  tc->snd_una_max = tc->snd_nxt = tc->iss + 1;
+
+  initial_wnd = tcp_initial_window_to_advertise (tc);
+
+  /* Make and write options */
+  memset (&snd_opts, 0, sizeof (snd_opts));
+  tcp_opts_len = tcp_make_syn_options (&snd_opts, initial_wnd);
+  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
+
+  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
+			     tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
+			     initial_wnd);
+
+  tcp_options_write ((u8 *) (th + 1), &snd_opts);
+
+  /* Measure RTT with this */
+  tc->rtt_ts = tcp_time_now ();
+  tc->rtt_seq = tc->snd_nxt;
+
+  /* Start retransmit trimer  */
+  tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK);
+  tc->rto_boff = 0;
+
+  /* Set the connection establishment timer */
+  tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
+
+  tcp_push_ip_hdr (tm, tc, b);
+  tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
+}
+
+always_inline void
+tcp_enqueue_to_output (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, u8 is_ip4)
+{
+  u32 *to_next, next_index;
+  vlib_frame_t *f;
+
+  b->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+  b->error = 0;
+
+  /* Decide where to send the packet */
+  next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
+  f = vlib_get_frame_to_node (vm, next_index);
+
+  /* Enqueue the packet */
+  to_next = vlib_frame_vector_args (f);
+  to_next[0] = bi;
+  f->n_vectors = 1;
+  vlib_put_frame_to_node (vm, next_index, f);
+}
+
+/**
+ *  Send FIN
+ */
+void
+tcp_send_fin (tcp_connection_t * tc)
+{
+  vlib_buffer_t *b;
+  u32 bi;
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = tm->vlib_main;
+
+  tcp_get_free_buffer_index (tm, &bi);
+  b = vlib_get_buffer (vm, bi);
+
+  /* Leave enough space for headers */
+  vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+
+  tcp_make_finack (tc, b);
+
+  tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+}
+
+always_inline u8
+tcp_make_state_flags (tcp_state_t next_state)
+{
+  switch (next_state)
+    {
+    case TCP_STATE_ESTABLISHED:
+      return TCP_FLAG_ACK;
+    case TCP_STATE_SYN_RCVD:
+      return TCP_FLAG_SYN | TCP_FLAG_ACK;
+    case TCP_STATE_SYN_SENT:
+      return TCP_FLAG_SYN;
+    case TCP_STATE_LAST_ACK:
+    case TCP_STATE_FIN_WAIT_1:
+      return TCP_FLAG_FIN;
+    default:
+      clib_warning ("Shouldn't be here!");
+    }
+  return 0;
+}
+
+/**
+ * Push TCP header and update connection variables
+ */
+static void
+tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b,
+		tcp_state_t next_state)
+{
+  u32 advertise_wnd, data_len;
+  u8 tcp_opts_len, tcp_hdr_opts_len, opts_write_len, flags;
+  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
+  tcp_header_t *th;
+
+  data_len = b->current_length;
+  vnet_buffer (b)->tcp.flags = 0;
+
+  /* Make and write options */
+  memset (snd_opts, 0, sizeof (*snd_opts));
+  tcp_opts_len = tcp_make_options (tc, snd_opts, next_state);
+  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
+
+  /* Get rcv window to advertise */
+  advertise_wnd = tcp_window_to_advertise (tc, next_state);
+  flags = tcp_make_state_flags (next_state);
+
+  /* Push header and options */
+  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
+			     tc->rcv_nxt, tcp_hdr_opts_len, flags,
+			     advertise_wnd);
+
+  opts_write_len = tcp_options_write ((u8 *) (th + 1), snd_opts);
+
+  ASSERT (opts_write_len == tcp_opts_len);
+
+  /* Tag the buffer with the connection index  */
+  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+
+  tc->snd_nxt += data_len;
+}
+
+/* Send delayed ACK when timer expires */
+void
+tcp_timer_delack_handler (u32 index)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = tm->vlib_main;
+  u32 thread_index = os_get_cpu_number ();
+  tcp_connection_t *tc;
+  vlib_buffer_t *b;
+  u32 bi;
+
+  tc = tcp_connection_get (index, thread_index);
+
+  /* Get buffer */
+  tcp_get_free_buffer_index (tm, &bi);
+  b = vlib_get_buffer (vm, bi);
+
+  /* Fill in the ACK */
+  tcp_make_ack (tc, b);
+
+  tc->timers[TCP_TIMER_DELACK] = TCP_TIMER_HANDLE_INVALID;
+  tc->flags &= ~TCP_CONN_DELACK;
+
+  tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+}
+
+/** Build a retransmit segment
+ *
+ * @return the number of bytes in the segment or 0 if there's nothing to
+ *         retransmit
+ * */
+u32
+tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b,
+				u32 max_bytes)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = tm->vlib_main;
+  u32 n_bytes, offset = 0;
+  sack_scoreboard_hole_t *hole;
+  u32 hole_size;
+
+  tcp_reuse_buffer (vm, b);
+
+  ASSERT (tc->state == TCP_STATE_ESTABLISHED);
+  ASSERT (max_bytes != 0);
+
+  if (tcp_opts_sack_permitted (&tc->opt))
+    {
+      /* XXX get first hole not retransmitted yet  */
+      hole = scoreboard_first_hole (&tc->sack_sb);
+      if (!hole)
+	return 0;
+
+      offset = hole->start - tc->snd_una;
+      hole_size = hole->end - hole->start;
+
+      ASSERT (hole_size);
+
+      if (hole_size < max_bytes)
+	max_bytes = hole_size;
+    }
+  else
+    {
+      if (seq_geq (tc->snd_nxt, tc->snd_una_max))
+	return 0;
+    }
+
+  n_bytes = stream_session_peek_bytes (&tc->connection,
+				       vlib_buffer_get_current (b), offset,
+				       max_bytes);
+  ASSERT (n_bytes != 0);
+
+  tc->snd_nxt += n_bytes;
+  tcp_push_hdr_i (tc, b, tc->state);
+
+  return n_bytes;
+}
+
+static void
+tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = tm->vlib_main;
+  u32 thread_index = os_get_cpu_number ();
+  tcp_connection_t *tc;
+  vlib_buffer_t *b;
+  u32 bi, max_bytes, snd_space;
+
+  if (is_syn)
+    {
+      tc = tcp_half_open_connection_get (index);
+    }
+  else
+    {
+      tc = tcp_connection_get (index, thread_index);
+    }
+
+  /* Make sure timer handle is set to invalid */
+  tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
+
+  /* Increment RTO backoff (also equal to number of retries) */
+  tc->rto_boff += 1;
+
+  /* Go back to first un-acked byte */
+  tc->snd_nxt = tc->snd_una;
+
+  /* Get buffer */
+  tcp_get_free_buffer_index (tm, &bi);
+  b = vlib_get_buffer (vm, bi);
+
+  if (tc->state == TCP_STATE_ESTABLISHED)
+    {
+      tcp_fastrecovery_off (tc);
+
+      /* Exponential backoff */
+      tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+
+      /* Figure out what and how many bytes we can send */
+      snd_space = tcp_available_snd_space (tc);
+      max_bytes = clib_min (tc->snd_mss, snd_space);
+      tcp_prepare_retransmit_segment (tc, b, max_bytes);
+
+      tc->rtx_bytes += max_bytes;
+
+      /* No fancy recovery for now! */
+      scoreboard_clear (&tc->sack_sb);
+    }
+  else
+    {
+      /* Retransmit for SYN/SYNACK */
+      ASSERT (tc->state == TCP_STATE_SYN_RCVD
+	      || tc->state == TCP_STATE_SYN_SENT);
+
+      /* Try without increasing RTO a number of times. If this fails,
+       * start growing RTO exponentially */
+      if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
+	tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+
+      vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+      tcp_push_hdr_i (tc, b, tc->state);
+    }
+
+  if (!is_syn)
+    {
+      tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+
+      /* Re-enable retransmit timer */
+      tcp_retransmit_timer_set (tm, tc);
+    }
+  else
+    {
+      ASSERT (tc->state == TCP_STATE_SYN_SENT);
+
+      /* This goes straight to ipx_lookup */
+      tcp_push_ip_hdr (tm, tc, b);
+      tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
+
+      /* Re-enable retransmit timer */
+      tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN,
+		     tc->rto * TCP_TO_TIMER_TICK);
+    }
+}
+
+void
+tcp_timer_retransmit_handler (u32 index)
+{
+  tcp_timer_retransmit_handler_i (index, 0);
+}
+
+void
+tcp_timer_retransmit_syn_handler (u32 index)
+{
+  tcp_timer_retransmit_handler_i (index, 1);
+}
+
+/**
+ * Retansmit first unacked segment */
+void
+tcp_retransmit_first_unacked (tcp_connection_t * tc)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  u32 snd_nxt = tc->snd_nxt;
+  vlib_buffer_t *b;
+  u32 bi;
+
+  tc->snd_nxt = tc->snd_una;
+
+  /* Get buffer */
+  tcp_get_free_buffer_index (tm, &bi);
+  b = vlib_get_buffer (tm->vlib_main, bi);
+
+  tcp_prepare_retransmit_segment (tc, b, tc->snd_mss);
+  tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4);
+
+  tc->snd_nxt = snd_nxt;
+  tc->rtx_bytes += tc->snd_mss;
+}
+
+void
+tcp_fast_retransmit (tcp_connection_t * tc)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  u32 snd_space, max_bytes, n_bytes, bi;
+  vlib_buffer_t *b;
+
+  ASSERT (tcp_in_fastrecovery (tc));
+
+  clib_warning ("fast retransmit!");
+
+  /* Start resending from first un-acked segment */
+  tc->snd_nxt = tc->snd_una;
+
+  snd_space = tcp_available_snd_space (tc);
+
+  while (snd_space)
+    {
+      tcp_get_free_buffer_index (tm, &bi);
+      b = vlib_get_buffer (tm->vlib_main, bi);
+
+      max_bytes = clib_min (tc->snd_mss, snd_space);
+      n_bytes = tcp_prepare_retransmit_segment (tc, b, max_bytes);
+
+      /* Nothing left to retransmit */
+      if (n_bytes == 0)
+	return;
+
+      tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4);
+
+      snd_space -= n_bytes;
+    }
+
+  /* If window allows, send new data */
+  tc->snd_nxt = tc->snd_una_max;
+}
+
+always_inline u32
+tcp_session_has_ooo_data (tcp_connection_t * tc)
+{
+  stream_session_t *s =
+    stream_session_get (tc->c_s_index, tc->c_thread_index);
+  return svm_fifo_has_ooo_data (s->server_rx_fifo);
+}
+
+always_inline uword
+tcp46_output_inline (vlib_main_t * vm,
+		     vlib_node_runtime_t * node,
+		     vlib_frame_t * from_frame, int is_ip4)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->cpu_index;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  tcp_connection_t *tc0;
+	  tcp_header_t *th0;
+	  u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
+				    my_thread_index);
+	  th0 = vlib_buffer_get_current (b0);
+
+	  if (is_ip4)
+	    {
+	      ip4_header_t *ih0;
+	      ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4,
+					  &tc0->c_rmt_ip4, IP_PROTOCOL_TCP);
+	      th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih0);
+	    }
+	  else
+	    {
+	      ip6_header_t *ih0;
+	      int bogus = ~0;
+
+	      ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6,
+					  &tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
+	      th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih0,
+								 &bogus);
+	      ASSERT (!bogus);
+	    }
+
+	  /* Filter out DUPACKs if there are no OOO segments left */
+	  if (PREDICT_FALSE
+	      (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
+	    {
+	      tc0->snt_dupacks--;
+	      ASSERT (tc0->snt_dupacks >= 0);
+	      if (!tcp_session_has_ooo_data (tc0))
+		{
+		  error0 = TCP_ERROR_FILTERED_DUPACKS;
+		  next0 = TCP_OUTPUT_NEXT_DROP;
+		  goto done;
+		}
+	    }
+
+	  /* Retransmitted SYNs do reach this but it should be harmless */
+	  tc0->rcv_las = tc0->rcv_nxt;
+
+	  /* Stop DELACK timer and fix flags */
+	  tc0->flags &=
+	    ~(TCP_CONN_SNDACK | TCP_CONN_DELACK | TCP_CONN_BURSTACK);
+	  if (tcp_timer_is_active (tc0, TCP_TIMER_DELACK))
+	    {
+	      tcp_timer_reset (tc0, TCP_TIMER_DELACK);
+	    }
+
+	  /* If not retransmitting
+	   * 1) update snd_una_max (SYN, SYNACK, new data, FIN)
+	   * 2) If we're not tracking an ACK, start tracking */
+	  if (seq_lt (tc0->snd_una_max, tc0->snd_nxt))
+	    {
+	      tc0->snd_una_max = tc0->snd_nxt;
+	      if (tc0->rtt_ts == 0)
+		{
+		  tc0->rtt_ts = tcp_time_now ();
+		  tc0->rtt_seq = tc0->snd_nxt;
+		}
+	    }
+
+	  /* Set the retransmit timer if not set already and not
+	   * doing a pure ACK */
+	  if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT)
+	      && tc0->snd_nxt != tc0->snd_una)
+	    {
+	      tcp_retransmit_timer_set (tm, tc0);
+	      tc0->rto_boff = 0;
+	    }
+
+	  /* set fib index to default and lookup node */
+	  /* XXX network virtualization (vrf/vni) */
+	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+	  b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+
+	done:
+	  b0->error = error0 != 0 ? node->errors[error0] : 0;
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+
+	    }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return from_frame->n_vectors;
+}
+
+static uword
+tcp4_output (vlib_main_t * vm, vlib_node_runtime_t * node,
+	     vlib_frame_t * from_frame)
+{
+  return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+tcp6_output (vlib_main_t * vm, vlib_node_runtime_t * node,
+	     vlib_frame_t * from_frame)
+{
+  return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+VLIB_REGISTER_NODE (tcp4_output_node) =
+{
+  .function = tcp4_output,.name = "tcp4-output",
+    /* Takes a vector of packets. */
+    .vector_size = sizeof (u32),.n_errors = TCP_N_ERROR,.error_strings =
+    tcp_error_strings,.n_next_nodes = TCP_OUTPUT_N_NEXT,.next_nodes =
+  {
+#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
+    foreach_tcp4_output_next
+#undef _
+  }
+,.format_buffer = format_tcp_header,.format_trace = format_tcp_tx_trace,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp4_output_node, tcp4_output)
+VLIB_REGISTER_NODE (tcp6_output_node) =
+{
+  .function = tcp6_output,.name = "tcp6-output",
+    /* Takes a vector of packets. */
+    .vector_size = sizeof (u32),.n_errors = TCP_N_ERROR,.error_strings =
+    tcp_error_strings,.n_next_nodes = TCP_OUTPUT_N_NEXT,.next_nodes =
+  {
+#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
+    foreach_tcp6_output_next
+#undef _
+  }
+,.format_buffer = format_tcp_header,.format_trace = format_tcp_tx_trace,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output) u32
+tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
+{
+  tcp_connection_t *tc;
+
+  tc = (tcp_connection_t *) tconn;
+  tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED);
+  return 0;
+}
+
+typedef enum _tcp_reset_next
+{
+  TCP_RESET_NEXT_DROP,
+  TCP_RESET_NEXT_IP_LOOKUP,
+  TCP_RESET_N_NEXT
+} tcp_reset_next_t;
+
+#define foreach_tcp4_reset_next        	\
+  _(DROP, "error-drop")                 \
+  _(IP_LOOKUP, "ip4-lookup")
+
+#define foreach_tcp6_reset_next        	\
+  _(DROP, "error-drop")                 \
+  _(IP_LOOKUP, "ip6-lookup")
+
+static uword
+tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+			 vlib_frame_t * from_frame, u8 is_ip4)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->cpu_index;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags,
+				       my_thread_index, is_ip4))
+	    {
+	      error0 = TCP_ERROR_LOOKUP_DROPS;
+	      next0 = TCP_RESET_NEXT_DROP;
+	      goto done;
+	    }
+
+	  /* Prepare to send to IP lookup */
+	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0;
+	  next0 = TCP_RESET_NEXT_IP_LOOKUP;
+
+	done:
+	  b0->error = error0 != 0 ? node->errors[error0] : 0;
+	  b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+
+	    }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+					   n_left_to_next, bi0, next0);
+	}
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+  return from_frame->n_vectors;
+}
+
+static uword
+tcp4_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node,
+		 vlib_frame_t * from_frame)
+{
+  return tcp46_send_reset_inline (vm, node, from_frame, 1);
+}
+
+static uword
+tcp6_send_reset (vlib_main_t * vm, vlib_node_runtime_t * node,
+		 vlib_frame_t * from_frame)
+{
+  return tcp46_send_reset_inline (vm, node, from_frame, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp4_reset_node) = {
+  .function = tcp4_send_reset,
+  .name = "tcp4-reset",
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_RESET_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [TCP_RESET_NEXT_##s] = n,
+    foreach_tcp4_reset_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (tcp6_reset_node) = {
+  .function = tcp6_send_reset,
+  .name = "tcp6-reset",
+  .vector_size = sizeof (u32),
+  .n_errors = TCP_N_ERROR,
+  .error_strings = tcp_error_strings,
+  .n_next_nodes = TCP_RESET_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [TCP_RESET_NEXT_##s] = n,
+    foreach_tcp6_reset_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h
new file mode 100644
index 00000000..866c5fd6
--- /dev/null
+++ b/src/vnet/tcp/tcp_packet.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_tcp_packet_h
+#define included_tcp_packet_h
+
+#include <vnet/vnet.h>
+
+/* TCP flags bit 0 first. */
+#define foreach_tcp_flag                                \
+  _ (FIN) /**< No more data from sender. */             \
+  _ (SYN) /**< Synchronize sequence numbers. */         \
+  _ (RST) /**< Reset the connection. */                 \
+  _ (PSH) /**< Push function. */                        \
+  _ (ACK) /**< Ack field significant. */                \
+  _ (URG) /**< Urgent pointer field significant. */     \
+  _ (ECE) /**< ECN-echo. Receiver got CE packet */      \
+  _ (CWR) /**< Sender reduced congestion window */
+
+enum
+{
+#define _(f) TCP_FLAG_BIT_##f,
+  foreach_tcp_flag
+#undef _
+    TCP_N_FLAG_BITS,
+};
+
+enum
+{
+#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
+  foreach_tcp_flag
+#undef _
+};
+
+typedef struct _tcp_header
+{
+  union
+  {
+    struct
+    {
+      u16 src_port; /**< Source port. */
+      u16 dst_port; /**< Destination port. */
+    };
+    struct
+    {
+      u16 src, dst;
+    };
+  };
+
+  u32 seq_number;	/**< Sequence number of the first data octet in this
+                         *   segment, except when SYN is present. If SYN
+                         *   is present the seq number is is the ISN and the
+                         *   first data octet is ISN+1 */
+  u32 ack_number;	/**< Acknowledgement number if ACK is set. It contains
+                         *   the value of the next sequence number the sender
+                         *   of the segment is expecting to receive. */
+  u8 data_offset_and_reserved;
+  u8 flags;		/**< Flags: see the macro above */
+  u16 window;		/**< Number of bytes sender is willing to receive. */
+
+  u16 checksum;		/**< Checksum of TCP pseudo header and data. */
+  u16 urgent_pointer;	/**< Seq number of the byte after the urgent data. */
+} __attribute__ ((packed)) tcp_header_t;
+
+/* Flag tests that return 0 or !0 */
+#define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4)
+#define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN)
+#define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN)
+#define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST)
+#define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH)
+#define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK)
+#define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG)
+#define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE)
+#define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR)
+
+/* Flag tests that return 0 or 1 */
+#define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN)
+#define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN)
+
+always_inline int
+tcp_header_bytes (tcp_header_t * t)
+{
+  return tcp_doff (t) * sizeof (u32);
+}
+
+/*
+ * TCP options.
+ */
+
+typedef enum tcp_option_type
+{
+  TCP_OPTION_EOL = 0,			/**< End of options. */
+  TCP_OPTION_NOOP = 1,			/**< No operation. */
+  TCP_OPTION_MSS = 2,			/**< Limit MSS. */
+  TCP_OPTION_WINDOW_SCALE = 3,		/**< Window scale. */
+  TCP_OPTION_SACK_PERMITTED = 4,	/**< Selective Ack permitted. */
+  TCP_OPTION_SACK_BLOCK = 5,		/**< Selective Ack block. */
+  TCP_OPTION_TIMESTAMP = 8,		/**< Timestamps. */
+  TCP_OPTION_UTO = 28,			/**< User timeout. */
+  TCP_OPTION_AO = 29,			/**< Authentication Option. */
+} tcp_option_type_t;
+
+#define foreach_tcp_options_flag                                        \
+  _ (MSS)               /**< MSS advertised in SYN */                   \
+  _ (TSTAMP)            /**< Timestamp capability advertised in SYN */  \
+  _ (WSCALE)            /**< Wnd scale capability advertised in SYN */  \
+  _ (SACK_PERMITTED)    /**< SACK capability advertised in SYN */       \
+  _ (SACK)		/**< SACK present */
+
+enum
+{
+#define _(f) TCP_OPTS_FLAG_BIT_##f,
+  foreach_tcp_options_flag
+#undef _
+    TCP_OPTIONS_N_FLAG_BITS,
+};
+
+enum
+{
+#define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f,
+  foreach_tcp_options_flag
+#undef _
+};
+
+typedef struct _sack_block
+{
+  u32 start;		/**< Start sequence number */
+  u32 end;		/**< End sequence number */
+} sack_block_t;
+
+typedef struct
+{
+  u8 flags;		/** Option flags, see above */
+
+  /* Received options */
+  u16 mss;		/**< Maximum segment size advertised by peer */
+  u8 wscale;		/**< Window scale advertised by peer */
+  u32 tsval;		/**< Peer's timestamp value */
+  u32 tsecr;		/**< Echoed/reflected time stamp */
+  sack_block_t *sacks;	/**< SACK blocks received */
+  u8 n_sack_blocks;	/**< Number of SACKs blocks */
+} tcp_options_t;
+
+/* Flag tests that return 0 or !0 */
+#define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS)
+#define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP)
+#define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE)
+#define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK)
+#define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED)
+
+/* TCP option lengths */
+#define TCP_OPTION_LEN_EOL              1
+#define TCP_OPTION_LEN_NOOP             1
+#define TCP_OPTION_LEN_MSS              4
+#define TCP_OPTION_LEN_WINDOW_SCALE     3
+#define TCP_OPTION_LEN_SACK_PERMITTED   2
+#define TCP_OPTION_LEN_TIMESTAMP        10
+#define TCP_OPTION_LEN_SACK_BLOCK        8
+
+#define TCP_WND_MAX                     65535U
+#define TCP_MAX_WND_SCALE               14	/* See RFC 1323 */
+#define TCP_OPTS_ALIGN                  4
+#define TCP_OPTS_MAX_SACK_BLOCKS        3
+#endif /* included_tcp_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c
new file mode 100644
index 00000000..dc324049
--- /dev/null
+++ b/src/vnet/tcp/tcp_pg.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/tcp_pg: TCP packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+/* TCP flags bit 0 first. */
+#define foreach_tcp_flag			\
+  _ (FIN)					\
+  _ (SYN)					\
+  _ (RST)					\
+  _ (PSH)					\
+  _ (ACK)					\
+  _ (URG)					\
+  _ (ECE)					\
+  _ (CWR)
+
+static void
+tcp_pg_edit_function (pg_main_t * pg,
+		      pg_stream_t * s,
+		      pg_edit_group_t * g,
+		      u32 * packets,
+		      u32 n_packets)
+{
+  vlib_main_t * vm = vlib_get_main();
+  u32 ip_offset, tcp_offset;
+
+  tcp_offset = g->start_byte_offset;
+  ip_offset = (g-1)->start_byte_offset;
+
+  while (n_packets >= 1)
+    {
+      vlib_buffer_t * p0;
+      ip4_header_t * ip0;
+      tcp_header_t * tcp0;
+      ip_csum_t sum0;
+      u32 tcp_len0;
+
+      p0 = vlib_get_buffer (vm, packets[0]);
+      n_packets -= 1;
+      packets += 1;
+
+      ASSERT (p0->current_data == 0);
+      ip0 = (void *) (p0->data + ip_offset);
+      tcp0 = (void *) (p0->data + tcp_offset);
+      tcp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]);
+
+      /* Initialize checksum with header. */
+      if (BITS (sum0) == 32)
+	{
+	  sum0 = clib_mem_unaligned (&ip0->src_address, u32);
+	  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
+	}
+      else
+	sum0 = clib_mem_unaligned (&ip0->src_address, u64);
+
+      sum0 = ip_csum_with_carry
+	(sum0, clib_host_to_net_u32 (tcp_len0 + (ip0->protocol << 16)));
+
+      /* Invalidate possibly old checksum. */
+      tcp0->checksum = 0;
+
+      sum0 = ip_incremental_checksum_buffer (vm, p0, tcp_offset, tcp_len0, sum0);
+
+      tcp0->checksum = ~ ip_csum_fold (sum0);
+    }
+}
+
+typedef struct {
+  pg_edit_t src, dst;
+  pg_edit_t seq_number, ack_number;
+  pg_edit_t data_offset_and_reserved;
+#define _(f) pg_edit_t f##_flag;
+  foreach_tcp_flag
+#undef _
+  pg_edit_t window;
+  pg_edit_t checksum;
+  pg_edit_t urgent_pointer;
+} pg_tcp_header_t;
+
+static inline void
+pg_tcp_header_init (pg_tcp_header_t * p)
+{
+  /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, tcp_header_t, f);
+  _ (src);
+  _ (dst);
+  _ (seq_number);
+  _ (ack_number);
+  _ (window);
+  _ (checksum);
+  _ (urgent_pointer);
+#undef _
+
+  /* Initialize bit fields. */
+#define _(f)						\
+  pg_edit_init_bitfield (&p->f##_flag, tcp_header_t,	\
+			 flags,				\
+			 TCP_FLAG_BIT_##f, 1);
+
+  foreach_tcp_flag
+#undef _
+
+  pg_edit_init_bitfield (&p->data_offset_and_reserved, tcp_header_t,
+			 data_offset_and_reserved,
+			 4, 4);
+}
+
+uword
+unformat_pg_tcp_header (unformat_input_t * input, va_list * args)
+{
+  pg_stream_t * s = va_arg (*args, pg_stream_t *);
+  pg_tcp_header_t * p;
+  u32 group_index;
+  
+  p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t),
+			    &group_index);
+  pg_tcp_header_init (p);
+
+  /* Defaults. */
+  pg_edit_set_fixed (&p->seq_number, 0);
+  pg_edit_set_fixed (&p->ack_number, 0);
+
+  pg_edit_set_fixed (&p->data_offset_and_reserved, 
+                     sizeof (tcp_header_t) / sizeof (u32));
+
+  pg_edit_set_fixed (&p->window, 4096);
+  pg_edit_set_fixed (&p->urgent_pointer, 0);
+
+#define _(f) pg_edit_set_fixed (&p->f##_flag, 0);
+  foreach_tcp_flag
+#undef _
+
+  p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+  if (! unformat (input, "TCP: %U -> %U",
+		  unformat_pg_edit,
+		    unformat_tcp_udp_port, &p->src,
+		  unformat_pg_edit,
+		    unformat_tcp_udp_port, &p->dst))
+    goto error;
+
+  /* Parse options. */
+  while (1)
+    {
+      if (unformat (input, "window %U",
+		    unformat_pg_edit,
+		    unformat_pg_number, &p->window))
+	;
+
+      else if (unformat (input, "checksum %U",
+			 unformat_pg_edit,
+			 unformat_pg_number, &p->checksum))
+	;
+
+      /* Flags. */
+#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1);
+  foreach_tcp_flag
+#undef _
+
+      /* Can't parse input: try next protocol level. */
+      else
+	break;
+    }
+
+  {
+    ip_main_t * im = &ip_main;
+    u16 dst_port;
+    tcp_udp_port_info_t * pi;
+
+    pi = 0;
+    if (p->dst.type == PG_EDIT_FIXED)
+      {
+	dst_port = pg_edit_get_value (&p->dst, PG_EDIT_LO);
+	pi = ip_get_tcp_udp_port_info (im, dst_port);
+      }
+
+    if (pi && pi->unformat_pg_edit
+	&& unformat_user (input, pi->unformat_pg_edit, s))
+      ;
+
+    else if (! unformat_user (input, unformat_pg_payload, s))
+      goto error;
+
+    if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+      {
+	pg_edit_group_t * g = pg_stream_get_group (s, group_index);
+	g->edit_function = tcp_pg_edit_function;
+	g->edit_function_opaque = 0;
+      }
+
+    return 1;
+  }
+
+ error:
+  /* Free up any edits we may have added. */
+  pg_free_edit_group (s);
+  return 0;
+}
+
diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c
new file mode 100644
index 00000000..c7605a30
--- /dev/null
+++ b/src/vnet/tcp/tcp_syn_filter4.c
@@ -0,0 +1,542 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/xxhash.h>
+
+typedef struct
+{
+  f64 next_reset;
+  f64 reset_interval;
+  u8 *syn_counts;
+} syn_filter4_runtime_t;
+
+typedef struct
+{
+  u32 next_index;
+  int not_a_syn;
+  u8 filter_value;
+} syn_filter4_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_syn_filter4_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  syn_filter4_trace_t *t = va_arg (*args, syn_filter4_trace_t *);
+
+  s = format (s, "SYN_FILTER4: next index %d, %s",
+	      t->next_index, t->not_a_syn ? "not a syn" : "syn");
+  if (t->not_a_syn == 0)
+    s = format (s, ", filter value %d\n", t->filter_value);
+  else
+    s = format (s, "\n");
+  return s;
+}
+
+static vlib_node_registration_t syn_filter4_node;
+
+#define foreach_syn_filter_error                \
+_(THROTTLED, "TCP SYN packet throttle drops")   \
+_(OK, "TCP SYN packets passed")
+
+typedef enum
+{
+#define _(sym,str) SYN_FILTER_ERROR_##sym,
+  foreach_syn_filter_error
+#undef _
+    SYN_FILTER_N_ERROR,
+} syn_filter_error_t;
+
+static char *syn_filter4_error_strings[] = {
+#define _(sym,string) string,
+  foreach_syn_filter_error
+#undef _
+};
+
+typedef enum
+{
+  SYN_FILTER_NEXT_DROP,
+  SYN_FILTER_N_NEXT,
+} syn_filter_next_t;
+
+extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
+
+static uword
+syn_filter4_node_fn (vlib_main_t * vm,
+		     vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  syn_filter_next_t next_index;
+  u32 ok_syn_packets = 0;
+  vnet_feature_main_t *fm = &feature_main;
+  u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
+  vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
+  syn_filter4_runtime_t *rt = (syn_filter4_runtime_t *) node->runtime_data;
+  f64 now = vlib_time_now (vm);
+  /* Shut up spurious gcc warnings. */
+  u8 *c0 = 0, *c1 = 0, *c2 = 0, *c3 = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  if (now > rt->next_reset)
+    {
+      memset (rt->syn_counts, 0, vec_len (rt->syn_counts));
+      rt->next_reset = now + rt->reset_interval;
+    }
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from >= 8 && n_left_to_next >= 4)
+	{
+	  u32 bi0, bi1, bi2, bi3;
+	  vlib_buffer_t *b0, *b1, *b2, *b3;
+	  u32 next0, next1, next2, next3;
+	  ip4_header_t *ip0, *ip1, *ip2, *ip3;
+	  tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
+	  u32 not_a_syn0 = 1, not_a_syn1 = 1, not_a_syn2 = 1, not_a_syn3 = 1;
+	  u64 hash0, hash1, hash2, hash3;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	    p4 = vlib_get_buffer (vm, from[4]);
+	    p5 = vlib_get_buffer (vm, from[5]);
+	    p6 = vlib_get_buffer (vm, from[6]);
+	    p7 = vlib_get_buffer (vm, from[7]);
+
+	    vlib_prefetch_buffer_header (p4, LOAD);
+	    vlib_prefetch_buffer_header (p5, LOAD);
+	    vlib_prefetch_buffer_header (p6, LOAD);
+	    vlib_prefetch_buffer_header (p7, LOAD);
+
+	    CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+	    CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+	  }
+
+	  /* speculatively enqueue b0 and b1 to the current next frame */
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
+
+	  vnet_get_config_data
+	    (&cm->config_main, &b0->current_config_index,
+	     &next0, 0 /* sizeof (c0[0]) */ );
+	  vnet_get_config_data
+	    (&cm->config_main, &b1->current_config_index,
+	     &next1, 0 /* sizeof (c0[0]) */ );
+	  vnet_get_config_data
+	    (&cm->config_main, &b2->current_config_index,
+	     &next2, 0 /* sizeof (c0[0]) */ );
+	  vnet_get_config_data
+	    (&cm->config_main, &b3->current_config_index,
+	     &next3, 0 /* sizeof (c0[0]) */ );
+
+	  /* Not TCP? */
+	  ip0 = vlib_buffer_get_current (b0);
+	  if (ip0->protocol != IP_PROTOCOL_TCP)
+	    goto trace00;
+
+	  tcp0 = ip4_next_header (ip0);
+	  /*
+	   * Not a SYN?
+	   * $$$$ hack: the TCP bitfield flags seem not to compile
+	   * correct code.
+	   */
+	  if (PREDICT_TRUE (!(tcp0->flags & 0x2)))
+	    goto trace00;
+
+	  not_a_syn0 = 0;
+	  hash0 = clib_xxhash ((u64) ip0->src_address.as_u32);
+	  c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)];
+	  if (PREDICT_FALSE (*c0 >= 0x80))
+	    {
+	      next0 = SYN_FILTER_NEXT_DROP;
+	      b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+	      goto trace00;
+	    }
+	  *c0 += 1;
+	  ok_syn_packets++;
+
+	trace00:
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+			     && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+	    {
+	      syn_filter4_trace_t *t =
+		vlib_add_trace (vm, node, b0, sizeof (*t));
+	      t->not_a_syn = not_a_syn0;
+	      t->next_index = next0;
+	      t->filter_value = not_a_syn0 ? 0 : *c0;
+	    }
+
+	  /* Not TCP? */
+	  ip1 = vlib_buffer_get_current (b1);
+	  if (ip1->protocol != IP_PROTOCOL_TCP)
+	    goto trace01;
+
+	  tcp1 = ip4_next_header (ip1);
+	  /*
+	   * Not a SYN?
+	   * $$$$ hack: the TCP bitfield flags seem not to compile
+	   * correct code.
+	   */
+	  if (PREDICT_TRUE (!(tcp1->flags & 0x2)))
+	    goto trace01;
+
+	  not_a_syn1 = 0;
+	  hash1 = clib_xxhash ((u64) ip1->src_address.as_u32);
+	  c1 = &rt->syn_counts[hash1 & (_vec_len (rt->syn_counts) - 1)];
+	  if (PREDICT_FALSE (*c1 >= 0x80))
+	    {
+	      next1 = SYN_FILTER_NEXT_DROP;
+	      b1->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+	      goto trace01;
+	    }
+	  *c1 += 1;
+	  ok_syn_packets++;
+
+	trace01:
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+			     && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+	    {
+	      syn_filter4_trace_t *t =
+		vlib_add_trace (vm, node, b1, sizeof (*t));
+	      t->not_a_syn = not_a_syn1;
+	      t->next_index = next1;
+	      t->filter_value = not_a_syn1 ? 0 : *c1;
+	    }
+
+	  /* Not TCP? */
+	  ip2 = vlib_buffer_get_current (b2);
+	  if (ip2->protocol != IP_PROTOCOL_TCP)
+	    goto trace02;
+
+	  tcp2 = ip4_next_header (ip2);
+	  /*
+	   * Not a SYN?
+	   * $$$$ hack: the TCP bitfield flags seem not to compile
+	   * correct code.
+	   */
+	  if (PREDICT_TRUE (!(tcp2->flags & 0x2)))
+	    goto trace02;
+
+	  not_a_syn2 = 0;
+	  hash2 = clib_xxhash ((u64) ip2->src_address.as_u32);
+	  c2 = &rt->syn_counts[hash2 & (_vec_len (rt->syn_counts) - 1)];
+	  if (PREDICT_FALSE (*c2 >= 0x80))
+	    {
+	      next2 = SYN_FILTER_NEXT_DROP;
+	      b2->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+	      goto trace02;
+	    }
+	  *c2 += 1;
+	  ok_syn_packets++;
+
+	trace02:
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+			     && (b2->flags & VLIB_BUFFER_IS_TRACED)))
+	    {
+	      syn_filter4_trace_t *t =
+		vlib_add_trace (vm, node, b2, sizeof (*t));
+	      t->not_a_syn = not_a_syn2;
+	      t->next_index = next2;
+	      t->filter_value = not_a_syn2 ? 0 : *c2;
+	    }
+
+	  /* Not TCP? */
+	  ip3 = vlib_buffer_get_current (b3);
+	  if (ip3->protocol != IP_PROTOCOL_TCP)
+	    goto trace03;
+
+	  tcp3 = ip4_next_header (ip3);
+	  /*
+	   * Not a SYN?
+	   * $$$$ hack: the TCP bitfield flags seem not to compile
+	   * correct code.
+	   */
+	  if (PREDICT_TRUE (!(tcp3->flags & 0x2)))
+	    goto trace03;
+
+	  not_a_syn3 = 0;
+	  hash3 = clib_xxhash ((u64) ip3->src_address.as_u32);
+	  c3 = &rt->syn_counts[hash3 & (_vec_len (rt->syn_counts) - 1)];
+	  if (PREDICT_FALSE (*c3 >= 0x80))
+	    {
+	      next3 = SYN_FILTER_NEXT_DROP;
+	      b3->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+	      goto trace03;
+	    }
+	  *c3 += 1;
+	  ok_syn_packets++;
+
+	trace03:
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+			     && (b3->flags & VLIB_BUFFER_IS_TRACED)))
+	    {
+	      syn_filter4_trace_t *t =
+		vlib_add_trace (vm, node, b3, sizeof (*t));
+	      t->not_a_syn = not_a_syn3;
+	      t->next_index = next3;
+	      t->filter_value = not_a_syn3 ? 0 : *c3;
+	    }
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, bi1, bi2, bi3,
+					   next0, next1, next2, next3);
+	}
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  u32 next0;
+	  ip4_header_t *ip0;
+	  tcp_header_t *tcp0;
+	  u32 not_a_syn0 = 1;
+	  u32 hash0;
+	  u8 *c0;
+
+	  /* speculatively enqueue b0 to the current next frame */
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  vnet_get_config_data
+	    (&cm->config_main, &b0->current_config_index,
+	     &next0, 0 /* sizeof (c0[0]) */ );
+
+	  /* Not TCP? */
+	  ip0 = vlib_buffer_get_current (b0);
+	  if (ip0->protocol != IP_PROTOCOL_TCP)
+	    goto trace0;
+
+	  tcp0 = ip4_next_header (ip0);
+	  /*
+	   * Not a SYN?
+	   * $$$$ hack: the TCP bitfield flags seem not to compile
+	   * correct code.
+	   */
+	  if (PREDICT_TRUE (!(tcp0->flags & 0x2)))
+	    goto trace0;
+
+	  not_a_syn0 = 0;
+	  hash0 = clib_xxhash ((u64) ip0->src_address.as_u32);
+	  c0 = &rt->syn_counts[hash0 & (_vec_len (rt->syn_counts) - 1)];
+	  if (PREDICT_FALSE (*c0 >= 0x80))
+	    {
+	      next0 = SYN_FILTER_NEXT_DROP;
+	      b0->error = node->errors[SYN_FILTER_ERROR_THROTTLED];
+	      goto trace0;
+	    }
+	  *c0 += 1;
+	  ok_syn_packets++;
+
+	trace0:
+
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+			     && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+	    {
+	      syn_filter4_trace_t *t =
+		vlib_add_trace (vm, node, b0, sizeof (*t));
+	      t->not_a_syn = not_a_syn0;
+	      t->next_index = next0;
+	      t->filter_value = not_a_syn0 ? 0 : *c0;
+	    }
+
+	  /* verify speculative enqueue, maybe switch current next frame */
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, syn_filter4_node.index,
+			       SYN_FILTER_ERROR_OK, ok_syn_packets);
+  return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (syn_filter4_node, static) =
+{
+  .function = syn_filter4_node_fn,
+  .name = "syn-filter-4",
+  .vector_size = sizeof (u32),
+  .format_trace = format_syn_filter4_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .runtime_data_bytes = sizeof (syn_filter4_runtime_t),
+  .n_errors = ARRAY_LEN(syn_filter4_error_strings),
+  .error_strings = syn_filter4_error_strings,
+
+  .n_next_nodes = SYN_FILTER_N_NEXT,
+
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [SYN_FILTER_NEXT_DROP] = "error-drop",
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (syn_filter4_node, syn_filter4_node_fn);
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (syn_filter_4, static) =
+{
+  .arc_name = "ip4-local",
+  .node_name = "syn-filter-4",
+  .runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
+};
+/* *INDENT-ON* */
+
+int
+syn_filter_enable_disable (u32 sw_if_index, int enable_disable)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  vnet_sw_interface_t *sw;
+  int rv = 0;
+
+  /* Utterly wrong? */
+  if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+    return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+  /* Not a physical port? */
+  sw = vnet_get_sw_interface (vnm, sw_if_index);
+  if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+    return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+  if (enable_disable)
+    {
+      vlib_main_t *vm = vlib_get_main ();
+      syn_filter4_runtime_t *rt;
+
+      rt = vlib_node_get_runtime_data (vm, syn_filter4_node.index);
+      vec_validate (rt->syn_counts, 1023);
+      /*
+       * Given perfect disperson / optimal hashing results:
+       * Allow 128k (successful) syns/sec. 1024, buckets each of which
+       * absorb 128 syns before filtering. Reset table once a second.
+       * Reality bites, lets try resetting once every 100ms.
+       */
+      rt->reset_interval = 0.1;	/* reset interval in seconds */
+    }
+
+  rv = vnet_feature_enable_disable ("ip4-local", "syn-filter-4",
+				    sw_if_index, enable_disable, 0, 0);
+
+  return rv;
+}
+
+static clib_error_t *
+syn_filter_enable_disable_command_fn (vlib_main_t * vm,
+				      unformat_input_t * input,
+				      vlib_cli_command_t * cmd)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 sw_if_index = ~0;
+  int enable_disable = 1;
+  int rv;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "disable"))
+	enable_disable = 0;
+      else if (unformat (input, "%U", unformat_vnet_sw_interface,
+			 vnm, &sw_if_index))
+	;
+      else
+	break;
+    }
+
+  if (sw_if_index == ~0)
+    return clib_error_return (0, "Please specify an interface...");
+
+  rv = syn_filter_enable_disable (sw_if_index, enable_disable);
+
+  switch (rv)
+    {
+    case 0:
+      break;
+
+    case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+      return clib_error_return
+	(0, "Invalid interface, only works on physical ports");
+      break;
+
+    case VNET_API_ERROR_UNIMPLEMENTED:
+      return clib_error_return (0,
+				"Device driver doesn't support redirection");
+      break;
+
+    case VNET_API_ERROR_INVALID_VALUE:
+      return clib_error_return (0, "feature arc not found");
+
+    case VNET_API_ERROR_INVALID_VALUE_2:
+      return clib_error_return (0, "feature node not found");
+
+    default:
+      return clib_error_return (0, "syn_filter_enable_disable returned %d",
+				rv);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_content_command, static) =
+{
+  .path = "ip syn filter",
+  .short_help = "ip syn filter <interface-name> [disable]",
+  .function = syn_filter_enable_disable_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/tcp_timer.h b/src/vnet/tcp/tcp_timer.h
new file mode 100644
index 00000000..fa25268c
--- /dev/null
+++ b/src/vnet/tcp/tcp_timer.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_tcp_timer_h__
+#define __included_tcp_timer_h__
+
+#include <vppinfra/tw_timer_16t_2w_512sl.h>
+#include <vppinfra/tw_timer_16t_1w_2048sl.h>
+
+#endif /* __included_tcp_timer_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/builtin_server.c b/src/vnet/udp/builtin_server.c
new file mode 100644
index 00000000..afa66ba4
--- /dev/null
+++ b/src/vnet/udp/builtin_server.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file
+    udp builtin server
+*/
+
+#include <vnet/udp/udp.h>
+#include <vnet/session/session.h>
+#include <vnet/session/application_interface.h>
+
+/** per-worker built-in server copy buffers */
+u8 **copy_buffers;
+
+static int
+builtin_session_create_callback (stream_session_t * s)
+{
+  /* Simple version: declare session ready-to-go... */
+  s->session_state = SESSION_STATE_READY;
+  return 0;
+}
+
+static void
+builtin_session_disconnect_callback (stream_session_t * s)
+{
+  stream_session_disconnect (s);
+}
+
+static int
+builtin_server_rx_callback (stream_session_t * s)
+{
+  svm_fifo_t *rx_fifo, *tx_fifo;
+  u32 this_transfer;
+  int actual_transfer;
+  u8 *my_copy_buffer;
+  session_fifo_event_t evt;
+  unix_shared_memory_queue_t *q;
+
+  my_copy_buffer = copy_buffers[s->thread_index];
+  rx_fifo = s->server_rx_fifo;
+  tx_fifo = s->server_tx_fifo;
+
+  this_transfer = svm_fifo_max_enqueue (tx_fifo)
+    < svm_fifo_max_dequeue (rx_fifo) ?
+    svm_fifo_max_enqueue (tx_fifo) : svm_fifo_max_dequeue (rx_fifo);
+
+  vec_validate (my_copy_buffer, this_transfer - 1);
+  _vec_len (my_copy_buffer) = this_transfer;
+
+  actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, 0, this_transfer,
+					     my_copy_buffer);
+  ASSERT (actual_transfer == this_transfer);
+  actual_transfer = svm_fifo_enqueue_nowait (tx_fifo, 0, this_transfer,
+					     my_copy_buffer);
+
+  copy_buffers[s->thread_index] = my_copy_buffer;
+
+  /* Fabricate TX event, send to ourselves */
+  evt.fifo = tx_fifo;
+  evt.event_type = FIFO_EVENT_SERVER_TX;
+  /* $$$$ for event logging */
+  evt.enqueue_length = actual_transfer;
+  evt.event_id = 0;
+  q = session_manager_get_vpp_event_queue (s->thread_index);
+  unix_shared_memory_queue_add (q, (u8 *) & evt, 0 /* do wait for mutex */ );
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+static session_cb_vft_t builtin_server = {
+    .session_accept_callback = builtin_session_create_callback,
+    .session_disconnect_callback = builtin_session_disconnect_callback,
+    .builtin_server_rx_callback = builtin_server_rx_callback
+};
+/* *INDENT-ON* */
+
+static int
+bind_builtin_uri_server (u8 * uri)
+{
+  vnet_bind_args_t _a, *a = &_a;
+  char segment_name[128];
+  u32 segment_name_length;
+  int rv;
+  u64 options[16];
+
+  segment_name_length = ARRAY_LEN (segment_name);
+
+  memset (a, 0, sizeof (*a));
+  memset (options, 0, sizeof (options));
+
+  a->uri = (char *) uri;
+  a->api_client_index = ~0;	/* built-in server */
+  a->segment_name = segment_name;
+  a->segment_name_length = segment_name_length;
+  a->session_cb_vft = &builtin_server;
+
+  options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
+  options[SESSION_OPTIONS_SEGMENT_SIZE] = (2 << 30);	/*$$$$ config / arg */
+  a->options = options;
+
+  rv = vnet_bind_uri (a);
+
+  return rv;
+}
+
+static int
+unbind_builtin_uri_server (u8 * uri)
+{
+  int rv;
+
+  rv = vnet_unbind_uri ((char *) uri, ~0 /* client_index */ );
+
+  return rv;
+}
+
+static clib_error_t *
+builtin_server_init (vlib_main_t * vm)
+{
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  u32 num_threads;
+
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+
+  vec_validate (copy_buffers, num_threads - 1);
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (builtin_server_init);
+
+static clib_error_t *
+builtin_uri_bind_command_fn (vlib_main_t * vm,
+			     unformat_input_t * input,
+			     vlib_cli_command_t * cmd)
+{
+  u8 *uri = 0;
+  int rv;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "uri %s", &uri))
+	;
+      else
+	break;
+    }
+
+  if (uri == 0)
+    return clib_error_return (0, "uri to bind not specified...");
+
+  rv = bind_builtin_uri_server (uri);
+
+  vec_free (uri);
+
+  switch (rv)
+    {
+    case 0:
+      break;
+
+    default:
+      return clib_error_return (0, "bind_uri_server returned %d", rv);
+      break;
+    }
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (builtin_uri_bind_command, static) =
+{
+  .path = "builtin uri bind",
+  .short_help = "builtin uri bind",
+  .function = builtin_uri_bind_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+builtin_uri_unbind_command_fn (vlib_main_t * vm,
+			       unformat_input_t * input,
+			       vlib_cli_command_t * cmd)
+{
+  u8 *uri = 0;
+  int rv;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "uri %s", &uri))
+	;
+      else
+	break;
+    }
+
+  if (uri == 0)
+    return clib_error_return (0, "uri to unbind not specified...");
+
+  rv = unbind_builtin_uri_server (uri);
+
+  vec_free (uri);
+
+  switch (rv)
+    {
+    case 0:
+      break;
+
+    default:
+      return clib_error_return (0, "unbind_uri_server returned %d", rv);
+      break;
+    }
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (builtin_uri_unbind_command, static) =
+{
+  .path = "builtin uri unbind",
+  .short_help = "builtin uri unbind",
+  .function = builtin_uri_unbind_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c
new file mode 100644
index 00000000..9e740466
--- /dev/null
+++ b/src/vnet/udp/udp.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** @file
+    udp state machine, etc.
+*/
+
+#include <vnet/udp/udp.h>
+#include <vnet/session/session.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/ip4_fib.h>
+
+udp_uri_main_t udp_uri_main;
+
+u32
+udp_session_bind_ip4 (vlib_main_t * vm, u32 session_index,
+		      ip46_address_t * ip, u16 port_number_host_byte_order)
+{
+  udp_uri_main_t *um = vnet_get_udp_main ();
+  udp_connection_t *listener;
+
+  pool_get (um->udp_listeners, listener);
+  memset (listener, 0, sizeof (udp_connection_t));
+  listener->c_lcl_port = clib_host_to_net_u16 (port_number_host_byte_order);
+  listener->c_lcl_ip4.as_u32 = ip->ip4.as_u32;
+  listener->c_proto = SESSION_TYPE_IP4_UDP;
+  udp_register_dst_port (um->vlib_main, port_number_host_byte_order,
+			 udp4_uri_input_node.index, 1 /* is_ipv4 */ );
+  return 0;
+}
+
+u32
+udp_session_bind_ip6 (vlib_main_t * vm, u32 session_index,
+		      ip46_address_t * ip, u16 port_number_host_byte_order)
+{
+  udp_uri_main_t *um = vnet_get_udp_main ();
+  udp_connection_t *listener;
+
+  pool_get (um->udp_listeners, listener);
+  listener->c_lcl_port = clib_host_to_net_u16 (port_number_host_byte_order);
+  clib_memcpy (&listener->c_lcl_ip6, &ip->ip6, sizeof (ip6_address_t));
+  listener->c_proto = SESSION_TYPE_IP6_UDP;
+  udp_register_dst_port (um->vlib_main, port_number_host_byte_order,
+			 udp4_uri_input_node.index, 0 /* is_ipv4 */ );
+  return 0;
+}
+
+u32
+udp_session_unbind_ip4 (vlib_main_t * vm, u32 listener_index)
+{
+  udp_connection_t *listener;
+  listener = udp_listener_get (listener_index);
+
+  /* deregister the udp_local mapping */
+  udp_unregister_dst_port (vm, listener->c_lcl_port, 1 /* is_ipv4 */ );
+  return 0;
+}
+
+u32
+udp_session_unbind_ip6 (vlib_main_t * vm, u32 listener_index)
+{
+  udp_connection_t *listener;
+
+  listener = udp_listener_get (listener_index);
+
+  /* deregister the udp_local mapping */
+  udp_unregister_dst_port (vm, listener->c_lcl_port, 0 /* is_ipv4 */ );
+  return 0;
+}
+
+transport_connection_t *
+udp_session_get_listener (u32 listener_index)
+{
+  udp_connection_t *us;
+
+  us = udp_listener_get (listener_index);
+  return &us->connection;
+}
+
+u32
+udp_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
+{
+  udp_connection_t *us;
+  u8 *data;
+  udp_header_t *udp;
+
+  us = (udp_connection_t *) tconn;
+
+  if (tconn->is_ip4)
+    {
+      ip4_header_t *ip;
+
+      data = vlib_buffer_get_current (b);
+      udp = (udp_header_t *) (data - sizeof (udp_header_t));
+      ip = (ip4_header_t *) ((u8 *) udp - sizeof (ip4_header_t));
+
+      /* Build packet header, swap rx key src + dst fields */
+      ip->src_address.as_u32 = us->c_lcl_ip4.as_u32;
+      ip->dst_address.as_u32 = us->c_rmt_ip4.as_u32;
+      ip->ip_version_and_header_length = 0x45;
+      ip->ttl = 254;
+      ip->protocol = IP_PROTOCOL_UDP;
+      ip->length = clib_host_to_net_u16 (b->current_length + sizeof (*udp));
+      ip->checksum = ip4_header_checksum (ip);
+
+      udp->src_port = us->c_lcl_port;
+      udp->dst_port = us->c_rmt_port;
+      udp->length = clib_host_to_net_u16 (b->current_length);
+      udp->checksum = 0;
+
+      b->current_length = sizeof (*ip) + sizeof (*udp);
+      return SESSION_QUEUE_NEXT_IP4_LOOKUP;
+    }
+  else
+    {
+      vlib_main_t *vm = vlib_get_main ();
+      ip6_header_t *ip;
+      u16 payload_length;
+      int bogus = ~0;
+
+      data = vlib_buffer_get_current (b);
+      udp = (udp_header_t *) (data - sizeof (udp_header_t));
+      ip = (ip6_header_t *) ((u8 *) udp - sizeof (ip6_header_t));
+
+      /* Build packet header, swap rx key src + dst fields */
+      clib_memcpy (&ip->src_address, &us->c_lcl_ip6, sizeof (ip6_address_t));
+      clib_memcpy (&ip->dst_address, &us->c_rmt_ip6, sizeof (ip6_address_t));
+
+      ip->ip_version_traffic_class_and_flow_label =
+	clib_host_to_net_u32 (0x6 << 28);
+
+      ip->hop_limit = 0xff;
+      ip->protocol = IP_PROTOCOL_UDP;
+
+      payload_length = vlib_buffer_length_in_chain (vm, b);
+      payload_length -= sizeof (*ip);
+
+      ip->payload_length = clib_host_to_net_u16 (payload_length);
+
+      udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip, &bogus);
+      ASSERT (!bogus);
+
+      udp->src_port = us->c_lcl_port;
+      udp->dst_port = us->c_rmt_port;
+      udp->length = clib_host_to_net_u16 (b->current_length);
+      udp->checksum = 0;
+
+      b->current_length = sizeof (*ip) + sizeof (*udp);
+
+      return SESSION_QUEUE_NEXT_IP6_LOOKUP;
+    }
+}
+
+transport_connection_t *
+udp_session_get (u32 connection_index, u32 my_thread_index)
+{
+  udp_uri_main_t *um = vnet_get_udp_main ();
+
+  udp_connection_t *us;
+  us =
+    pool_elt_at_index (um->udp_sessions[my_thread_index], connection_index);
+  return &us->connection;
+}
+
+void
+udp_session_close (u32 connection_index, u32 my_thread_index)
+{
+  udp_uri_main_t *um = vnet_get_udp_main ();
+  pool_put_index (um->udp_sessions[my_thread_index], connection_index);
+}
+
+u8 *
+format_udp_session_ip4 (u8 * s, va_list * args)
+{
+  u32 uci = va_arg (*args, u32);
+  u32 thread_index = va_arg (*args, u32);
+  udp_connection_t *u4;
+
+  u4 = udp_connection_get (uci, thread_index);
+
+  s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address,
+	      &u4->c_lcl_ip4, clib_net_to_host_u16 (u4->c_lcl_port),
+	      format_ip4_address, &u4->c_rmt_ip4,
+	      clib_net_to_host_u16 (u4->c_rmt_port));
+  return s;
+}
+
+u8 *
+format_udp_session_ip6 (u8 * s, va_list * args)
+{
+  u32 uci = va_arg (*args, u32);
+  u32 thread_index = va_arg (*args, u32);
+  udp_connection_t *tc = udp_connection_get (uci, thread_index);
+  s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address,
+	      &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port),
+	      format_ip6_address, &tc->c_rmt_ip6,
+	      clib_net_to_host_u16 (tc->c_rmt_port));
+  return s;
+}
+
+u8 *
+format_udp_listener_session_ip4 (u8 * s, va_list * args)
+{
+  u32 tci = va_arg (*args, u32);
+  udp_connection_t *tc = udp_listener_get (tci);
+  s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip4_address,
+	      &tc->c_lcl_ip4, clib_net_to_host_u16 (tc->c_lcl_port),
+	      format_ip4_address, &tc->c_rmt_ip4,
+	      clib_net_to_host_u16 (tc->c_rmt_port));
+  return s;
+}
+
+u8 *
+format_udp_listener_session_ip6 (u8 * s, va_list * args)
+{
+  u32 tci = va_arg (*args, u32);
+  udp_connection_t *tc = udp_listener_get (tci);
+  s = format (s, "[%s] %U:%d->%U:%d", "udp", format_ip6_address,
+	      &tc->c_lcl_ip6, clib_net_to_host_u16 (tc->c_lcl_port),
+	      format_ip6_address, &tc->c_rmt_ip6,
+	      clib_net_to_host_u16 (tc->c_rmt_port));
+  return s;
+}
+
+u16
+udp_send_mss_uri (transport_connection_t * t)
+{
+  /* TODO figure out MTU of output interface */
+  return 400;
+}
+
+u32
+udp_send_space_uri (transport_connection_t * t)
+{
+  /* No constraint on TX window */
+  return ~0;
+}
+
+int
+udp_open_connection (ip46_address_t * addr, u16 port)
+{
+  clib_warning ("Not implemented");
+  return 0;
+}
+
+/* *INDENT-OFF* */
+const static transport_proto_vft_t udp4_proto = {
+  .bind = udp_session_bind_ip4,
+  .open = udp_open_connection,
+  .unbind = udp_session_unbind_ip4,
+  .push_header = udp_push_header,
+  .get_connection = udp_session_get,
+  .get_listener = udp_session_get_listener,
+  .close = udp_session_close,
+  .send_mss = udp_send_mss_uri,
+  .send_space = udp_send_space_uri,
+  .format_connection = format_udp_session_ip4,
+  .format_listener = format_udp_listener_session_ip4
+};
+
+const static transport_proto_vft_t udp6_proto = {
+  .bind = udp_session_bind_ip6,
+  .open = udp_open_connection,
+  .unbind = udp_session_unbind_ip6,
+  .push_header = udp_push_header,
+  .get_connection = udp_session_get,
+  .get_listener = udp_session_get_listener,
+  .close = udp_session_close,
+  .send_mss = udp_send_mss_uri,
+  .send_space = udp_send_space_uri,
+  .format_connection = format_udp_session_ip6,
+  .format_listener = format_udp_listener_session_ip6
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+udp_init (vlib_main_t * vm)
+{
+  udp_uri_main_t *um = vnet_get_udp_main ();
+  ip_main_t *im = &ip_main;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  u32 num_threads;
+  clib_error_t *error = 0;
+  ip_protocol_info_t *pi;
+
+  um->vlib_main = vm;
+  um->vnet_main = vnet_get_main ();
+
+  if ((error = vlib_call_init_function (vm, ip_main_init)))
+    return error;
+  if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+    return error;
+  if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+    return error;
+
+  /*
+   * Registrations
+   */
+
+  /* IP registration */
+  pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP);
+  if (pi == 0)
+    return clib_error_return (0, "UDP protocol info AWOL");
+  pi->format_header = format_udp_header;
+  pi->unformat_pg_edit = unformat_pg_udp_header;
+
+
+  /* Register as transport with URI */
+  session_register_transport (SESSION_TYPE_IP4_UDP, &udp4_proto);
+  session_register_transport (SESSION_TYPE_IP6_UDP, &udp6_proto);
+
+  /*
+   * Initialize data structures
+   */
+
+  num_threads = 1 /* main thread */  + tm->n_threads;
+  vec_validate (um->udp_sessions, num_threads - 1);
+
+  return error;
+}
+
+VLIB_INIT_FUNCTION (udp_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
new file mode 100644
index 00000000..7ab26ce9
--- /dev/null
+++ b/src/vnet/udp/udp.h
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_udp_h__
+#define __included_udp_h__
+
+#include <vnet/vnet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/format.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/session/transport.h>
+
+typedef struct
+{
+  transport_connection_t connection;	      /** must be first */
+
+  /** ersatz MTU to limit fifo pushes to test data size */
+  u32 mtu;
+} udp_connection_t;
+
+typedef struct _udp_uri_main
+{
+  /* Per-worker thread udp connection pools */
+  udp_connection_t **udp_sessions;
+  udp_connection_t *udp_listeners;
+
+  /* convenience */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+  ip4_main_t *ip4_main;
+  ip6_main_t *ip6_main;
+} udp_uri_main_t;
+
+extern udp_uri_main_t udp_uri_main;
+extern vlib_node_registration_t udp4_uri_input_node;
+
+always_inline udp_uri_main_t *
+vnet_get_udp_main ()
+{
+  return &udp_uri_main;
+}
+
+always_inline udp_connection_t *
+udp_connection_get (u32 conn_index, u32 thread_index)
+{
+  return pool_elt_at_index (udp_uri_main.udp_sessions[thread_index],
+			    conn_index);
+}
+
+always_inline udp_connection_t *
+udp_listener_get (u32 conn_index)
+{
+  return pool_elt_at_index (udp_uri_main.udp_listeners, conn_index);
+}
+
+typedef enum
+{
+#define udp_error(n,s) UDP_ERROR_##n,
+#include <vnet/udp/udp_error.def>
+#undef udp_error
+  UDP_N_ERROR,
+} udp_error_t;
+
+#define foreach_udp4_dst_port			\
+_ (67, dhcp_to_server)                          \
+_ (68, dhcp_to_client)                          \
+_ (500, ikev2)                                  \
+_ (3784, bfd4)                                  \
+_ (3785, bfd_echo4)                             \
+_ (4341, lisp_gpe)                              \
+_ (4342, lisp_cp)                          	\
+_ (4739, ipfix)                                 \
+_ (4789, vxlan)					\
+_ (4789, vxlan6)				\
+_ (4790, vxlan_gpe)				\
+_ (6633, vpath_3)
+
+
+#define foreach_udp6_dst_port                   \
+_ (547, dhcpv6_to_server)                       \
+_ (546, dhcpv6_to_client)			\
+_ (3784, bfd6)                                  \
+_ (3785, bfd_echo6)                             \
+_ (4341, lisp_gpe6)                             \
+_ (4342, lisp_cp6)                          	\
+_ (4790, vxlan6_gpe)      \
+_ (6633, vpath6_3)
+
+typedef enum
+{
+#define _(n,f) UDP_DST_PORT_##f = n,
+  foreach_udp4_dst_port foreach_udp6_dst_port
+#undef _
+} udp_dst_port_t;
+
+typedef enum
+{
+#define _(n,f) UDP6_DST_PORT_##f = n,
+  foreach_udp6_dst_port
+#undef _
+} udp6_dst_port_t;
+
+typedef struct
+{
+  /* Name (a c string). */
+  char *name;
+
+  /* GRE protocol type in host byte order. */
+  udp_dst_port_t dst_port;
+
+  /* Node which handles this type. */
+  u32 node_index;
+
+  /* Next index for this type. */
+  u32 next_index;
+} udp_dst_port_info_t;
+
+typedef enum
+{
+  UDP_IP6 = 0,
+  UDP_IP4,			/* the code is full of is_ip4... */
+  N_UDP_AF,
+} udp_af_t;
+
+typedef struct
+{
+  udp_dst_port_info_t *dst_port_infos[N_UDP_AF];
+
+  /* Hash tables mapping name/protocol to protocol info index. */
+  uword *dst_port_info_by_name[N_UDP_AF];
+  uword *dst_port_info_by_dst_port[N_UDP_AF];
+
+  /* convenience */
+  vlib_main_t *vlib_main;
+} udp_main_t;
+
+always_inline udp_dst_port_info_t *
+udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4)
+{
+  uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port);
+  return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0;
+}
+
+format_function_t format_udp_header;
+format_function_t format_udp_rx_trace;
+
+unformat_function_t unformat_udp_header;
+
+void udp_register_dst_port (vlib_main_t * vm,
+			    udp_dst_port_t dst_port,
+			    u32 node_index, u8 is_ip4);
+
+void
+udp_unregister_dst_port (vlib_main_t * vm,
+			 udp_dst_port_t dst_port, u8 is_ip4);
+
+void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
+
+always_inline void
+ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
+{
+  u16 new_l0;
+  udp_header_t *udp0;
+
+  if (is_ip4)
+    {
+      ip4_header_t *ip0;
+      ip_csum_t sum0;
+      u16 old_l0 = 0;
+
+      ip0 = vlib_buffer_get_current (b0);
+
+      /* fix the <bleep>ing outer-IP checksum */
+      sum0 = ip0->checksum;
+      /* old_l0 always 0, see the rewrite setup */
+      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+
+      sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+			     length /* changed member */ );
+      ip0->checksum = ip_csum_fold (sum0);
+      ip0->length = new_l0;
+
+      /* Fix UDP length */
+      udp0 = (udp_header_t *) (ip0 + 1);
+      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+				     - sizeof (*ip0));
+      udp0->length = new_l0;
+    }
+  else
+    {
+      ip6_header_t *ip0;
+      int bogus0;
+
+      ip0 = vlib_buffer_get_current (b0);
+
+      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+				     - sizeof (*ip0));
+      ip0->payload_length = new_l0;
+
+      /* Fix UDP length */
+      udp0 = (udp_header_t *) (ip0 + 1);
+      udp0->length = new_l0;
+
+      udp0->checksum =
+	ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
+      ASSERT (bogus0 == 0);
+
+      if (udp0->checksum == 0)
+	udp0->checksum = 0xffff;
+    }
+}
+
+always_inline void
+ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
+		  u8 is_ip4)
+{
+  vlib_buffer_advance (b0, -ec_len);
+
+  if (is_ip4)
+    {
+      ip4_header_t *ip0;
+
+      ip0 = vlib_buffer_get_current (b0);
+
+      /* Apply the encap string. */
+      clib_memcpy (ip0, ec0, ec_len);
+      ip_udp_fixup_one (vm, b0, 1);
+    }
+  else
+    {
+      ip6_header_t *ip0;
+
+      ip0 = vlib_buffer_get_current (b0);
+
+      /* Apply the encap string. */
+      clib_memcpy (ip0, ec0, ec_len);
+      ip_udp_fixup_one (vm, b0, 0);
+    }
+}
+
+always_inline void
+ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
+		  u8 * ec0, u8 * ec1, word ec_len, u8 is_v4)
+{
+  u16 new_l0, new_l1;
+  udp_header_t *udp0, *udp1;
+
+  ASSERT (_vec_len (ec0) == _vec_len (ec1));
+
+  vlib_buffer_advance (b0, -ec_len);
+  vlib_buffer_advance (b1, -ec_len);
+
+  if (is_v4)
+    {
+      ip4_header_t *ip0, *ip1;
+      ip_csum_t sum0, sum1;
+      u16 old_l0 = 0, old_l1 = 0;
+
+      ip0 = vlib_buffer_get_current (b0);
+      ip1 = vlib_buffer_get_current (b1);
+
+      /* Apply the encap string */
+      clib_memcpy (ip0, ec0, ec_len);
+      clib_memcpy (ip1, ec1, ec_len);
+
+      /* fix the <bleep>ing outer-IP checksum */
+      sum0 = ip0->checksum;
+      sum1 = ip1->checksum;
+
+      /* old_l0 always 0, see the rewrite setup */
+      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+      new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+
+      sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+			     length /* changed member */ );
+      sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
+			     length /* changed member */ );
+
+      ip0->checksum = ip_csum_fold (sum0);
+      ip1->checksum = ip_csum_fold (sum1);
+
+      ip0->length = new_l0;
+      ip1->length = new_l1;
+
+      /* Fix UDP length */
+      udp0 = (udp_header_t *) (ip0 + 1);
+      udp1 = (udp_header_t *) (ip1 + 1);
+
+      new_l0 =
+	clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
+			      sizeof (*ip0));
+      new_l1 =
+	clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) -
+			      sizeof (*ip1));
+      udp0->length = new_l0;
+      udp1->length = new_l1;
+    }
+  else
+    {
+      ip6_header_t *ip0, *ip1;
+      int bogus0, bogus1;
+
+      ip0 = vlib_buffer_get_current (b0);
+      ip1 = vlib_buffer_get_current (b1);
+
+      /* Apply the encap string. */
+      clib_memcpy (ip0, ec0, ec_len);
+      clib_memcpy (ip1, ec1, ec_len);
+
+      new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+				     - sizeof (*ip0));
+      new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
+				     - sizeof (*ip1));
+      ip0->payload_length = new_l0;
+      ip1->payload_length = new_l1;
+
+      /* Fix UDP length */
+      udp0 = (udp_header_t *) (ip0 + 1);
+      udp1 = (udp_header_t *) (ip1 + 1);
+
+      udp0->length = new_l0;
+      udp1->length = new_l1;
+
+      udp0->checksum =
+	ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
+      udp1->checksum =
+	ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1);
+      ASSERT (bogus0 == 0);
+      ASSERT (bogus1 == 0);
+
+      if (udp0->checksum == 0)
+	udp0->checksum = 0xffff;
+      if (udp1->checksum == 0)
+	udp1->checksum = 0xffff;
+    }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
+#endif /* __included_udp_h__ */
diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def
new file mode 100644
index 00000000..bfdae0ac
--- /dev/null
+++ b/src/vnet/udp/udp_error.def
@@ -0,0 +1,21 @@
+/*
+ * udp_error.def: udp errors
+ *
+ * Copyright (c) 2013-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+udp_error (NONE, "no error")
+udp_error (NO_LISTENER, "no listener for dst port")
+udp_error (LENGTH_ERROR, "UDP packets with length errors")
+udp_error (PUNT, "no listener punt")
diff --git a/src/vnet/udp/udp_format.c b/src/vnet/udp/udp_format.c
new file mode 100644
index 00000000..abdf561e
--- /dev/null
+++ b/src/vnet/udp/udp_format.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_format.c: udp formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format UDP header. */
+u8 *
+format_udp_header (u8 * s, va_list * args)
+{
+  udp_header_t *udp = va_arg (*args, udp_header_t *);
+  u32 max_header_bytes = va_arg (*args, u32);
+  uword indent;
+  u32 header_bytes = sizeof (udp[0]);
+
+  /* Nothing to do. */
+  if (max_header_bytes < sizeof (udp[0]))
+    return format (s, "UDP header truncated");
+
+  indent = format_get_indent (s);
+  indent += 2;
+
+  s = format (s, "UDP: %d -> %d",
+	      clib_net_to_host_u16 (udp->src_port),
+	      clib_net_to_host_u16 (udp->dst_port));
+
+  s = format (s, "\n%Ulength %d, checksum 0x%04x",
+	      format_white_space, indent,
+	      clib_net_to_host_u16 (udp->length),
+	      clib_net_to_host_u16 (udp->checksum));
+
+  /* Recurse into next protocol layer. */
+  if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+    {
+      ip_main_t *im = &ip_main;
+      tcp_udp_port_info_t *pi;
+
+      pi = ip_get_tcp_udp_port_info (im, udp->dst_port);
+
+      if (pi && pi->format_header)
+	s = format (s, "\n%U%U",
+		    format_white_space, indent - 2, pi->format_header,
+		    /* next protocol header */ (udp + 1),
+		    max_header_bytes - sizeof (udp[0]));
+    }
+
+  return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
new file mode 100644
index 00000000..4d509335
--- /dev/null
+++ b/src/vnet/udp/udp_input.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>
+
+#include <vnet/udp/udp.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vnet/udp/udp_packet.h>
+
+#include <vlibmemory/api.h>
+#include "../session/application_interface.h"
+
+vlib_node_registration_t udp4_uri_input_node;
+
+typedef struct
+{
+  u32 session;
+  u32 disposition;
+  u32 thread_index;
+} udp4_uri_input_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_udp4_uri_input_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  udp4_uri_input_trace_t *t = va_arg (*args, udp4_uri_input_trace_t *);
+
+  s = format (s, "UDP4_URI_INPUT: session %d, disposition %d, thread %d",
+	      t->session, t->disposition, t->thread_index);
+  return s;
+}
+
+typedef enum
+{
+  UDP4_URI_INPUT_NEXT_DROP,
+  UDP4_URI_INPUT_N_NEXT,
+} udp4_uri_input_next_t;
+
+static char *udp4_uri_input_error_strings[] = {
+#define _(sym,string) string,
+  foreach_session_input_error
+#undef _
+};
+
+static uword
+udp4_uri_input_node_fn (vlib_main_t * vm,
+			vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  udp4_uri_input_next_t next_index;
+  udp_uri_main_t *um = vnet_get_udp_main ();
+  session_manager_main_t *smm = vnet_get_session_manager_main ();
+  u32 my_thread_index = vm->cpu_index;
+  u8 my_enqueue_epoch;
+  u32 *session_indices_to_enqueue;
+  static u32 serial_number;
+  int i;
+
+  my_enqueue_epoch = ++smm->current_enqueue_epoch[my_thread_index];
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  u32 next0 = UDP4_URI_INPUT_NEXT_DROP;
+	  u32 error0 = SESSION_ERROR_ENQUEUED;
+	  udp_header_t *udp0;
+	  ip4_header_t *ip0;
+	  stream_session_t *s0;
+	  svm_fifo_t *f0;
+	  u16 udp_len0;
+	  u8 *data0;
+
+	  /* speculatively enqueue b0 to the current next frame */
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  /* udp_local hands us a pointer to the udp data */
+
+	  data0 = vlib_buffer_get_current (b0);
+	  udp0 = (udp_header_t *) (data0 - sizeof (*udp0));
+
+	  /* $$$$ fixme: udp_local doesn't do ip options correctly anyhow */
+	  ip0 = (ip4_header_t *) (((u8 *) udp0) - sizeof (*ip0));
+	  s0 = 0;
+
+	  /* lookup session */
+	  s0 = stream_session_lookup4 (&ip0->dst_address, &ip0->src_address,
+				       udp0->dst_port, udp0->src_port,
+				       SESSION_TYPE_IP4_UDP, my_thread_index);
+
+	  /* no listener */
+	  if (PREDICT_FALSE (s0 == 0))
+	    {
+	      error0 = SESSION_ERROR_NO_LISTENER;
+	      goto trace0;
+	    }
+
+	  f0 = s0->server_rx_fifo;
+
+	  /* established hit */
+	  if (PREDICT_TRUE (s0->session_state == SESSION_STATE_READY))
+	    {
+	      udp_len0 = clib_net_to_host_u16 (udp0->length);
+
+	      if (PREDICT_FALSE (udp_len0 > svm_fifo_max_enqueue (f0)))
+		{
+		  error0 = SESSION_ERROR_FIFO_FULL;
+		  goto trace0;
+		}
+
+	      svm_fifo_enqueue_nowait (f0, 0 /* pid */ ,
+				       udp_len0 - sizeof (*udp0),
+				       (u8 *) (udp0 + 1));
+
+	      b0->error = node->errors[SESSION_ERROR_ENQUEUED];
+
+	      /* We need to send an RX event on this fifo */
+	      if (s0->enqueue_epoch != my_enqueue_epoch)
+		{
+		  s0->enqueue_epoch = my_enqueue_epoch;
+
+		  vec_add1 (smm->session_indices_to_enqueue_by_thread
+			    [my_thread_index],
+			    s0 - smm->sessions[my_thread_index]);
+		}
+	    }
+	  /* listener hit */
+	  else if (s0->session_state == SESSION_STATE_LISTENING)
+	    {
+	      udp_connection_t *us;
+	      int rv;
+
+	      error0 = SESSION_ERROR_NOT_READY;
+
+	      /*
+	       * create udp transport session
+	       */
+	      pool_get (um->udp_sessions[my_thread_index], us);
+
+	      us->mtu = 1024;	/* $$$$ policy */
+
+	      us->c_lcl_ip4.as_u32 = ip0->dst_address.as_u32;
+	      us->c_rmt_ip4.as_u32 = ip0->src_address.as_u32;
+	      us->c_lcl_port = udp0->dst_port;
+	      us->c_rmt_port = udp0->src_port;
+	      us->c_proto = SESSION_TYPE_IP4_UDP;
+	      us->c_c_index = us - um->udp_sessions[my_thread_index];
+
+	      /*
+	       * create stream session and attach the udp session to it
+	       */
+	      rv = stream_session_accept (&us->connection, s0->session_index,
+					  SESSION_TYPE_IP4_UDP,
+					  1 /*notify */ );
+	      if (rv)
+		error0 = rv;
+
+	    }
+	  else
+	    {
+
+	      error0 = SESSION_ERROR_NOT_READY;
+	      goto trace0;
+	    }
+
+	trace0:
+	  b0->error = node->errors[error0];
+
+	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+			     && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+	    {
+	      udp4_uri_input_trace_t *t =
+		vlib_add_trace (vm, node, b0, sizeof (*t));
+
+	      t->session = ~0;
+	      if (s0)
+		t->session = s0 - smm->sessions[my_thread_index];
+	      t->disposition = error0;
+	      t->thread_index = my_thread_index;
+	    }
+
+	  /* verify speculative enqueue, maybe switch current next frame */
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  /* Send enqueue events */
+
+  session_indices_to_enqueue =
+    smm->session_indices_to_enqueue_by_thread[my_thread_index];
+
+  for (i = 0; i < vec_len (session_indices_to_enqueue); i++)
+    {
+      session_fifo_event_t evt;
+      unix_shared_memory_queue_t *q;
+      stream_session_t *s0;
+      application_t *server0;
+
+      /* Get session */
+      s0 = pool_elt_at_index (smm->sessions[my_thread_index],
+			      session_indices_to_enqueue[i]);
+
+      /* Get session's server */
+      server0 = application_get (s0->app_index);
+
+      /* Built-in server? Deliver the goods... */
+      if (server0->cb_fns.builtin_server_rx_callback)
+	{
+	  server0->cb_fns.builtin_server_rx_callback (s0);
+	  continue;
+	}
+
+      /* Fabricate event */
+      evt.fifo = s0->server_rx_fifo;
+      evt.event_type = FIFO_EVENT_SERVER_RX;
+      evt.event_id = serial_number++;
+      evt.enqueue_length = svm_fifo_max_dequeue (s0->server_rx_fifo);
+
+      /* Add event to server's event queue */
+      q = server0->event_queue;
+
+      /* Don't block for lack of space */
+      if (PREDICT_TRUE (q->cursize < q->maxsize))
+	unix_shared_memory_queue_add (server0->event_queue, (u8 *) & evt,
+				      0 /* do wait for mutex */ );
+      else
+	{
+	  vlib_node_increment_counter (vm, udp4_uri_input_node.index,
+				       SESSION_ERROR_FIFO_FULL, 1);
+	}
+      if (1)
+	{
+	  ELOG_TYPE_DECLARE (e) =
+	  {
+	  .format = "evt-enqueue: id %d length %d",.format_args = "i4i4",};
+	  struct
+	  {
+	    u32 data[2];
+	  } *ed;
+	  ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+	  ed->data[0] = evt.event_id;
+	  ed->data[1] = evt.enqueue_length;
+	}
+    }
+
+  vec_reset_length (session_indices_to_enqueue);
+
+  smm->session_indices_to_enqueue_by_thread[my_thread_index] =
+    session_indices_to_enqueue;
+
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (udp4_uri_input_node) =
+{
+  .function = udp4_uri_input_node_fn,.name = "udp4-uri-input",.vector_size =
+    sizeof (u32),.format_trace = format_udp4_uri_input_trace,.type =
+    VLIB_NODE_TYPE_INTERNAL,.n_errors =
+    ARRAY_LEN (udp4_uri_input_error_strings),.error_strings =
+    udp4_uri_input_error_strings,.n_next_nodes = UDP4_URI_INPUT_N_NEXT,
+    /* edit / add dispositions here */
+    .next_nodes =
+  {
+  [UDP4_URI_INPUT_NEXT_DROP] = "error-drop",}
+,};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c
new file mode 100644
index 00000000..6b239f73
--- /dev/null
+++ b/src/vnet/udp/udp_local.c
@@ -0,0 +1,666 @@
+/*
+ * node.c: udp packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/udp/udp.h>
+#include <vnet/udp/udp_packet.h>
+#include <vppinfra/sparse_vec.h>
+
+udp_main_t udp_main;
+
+#define foreach_udp_input_next                  \
+  _ (PUNT, "error-punt")                        \
+  _ (DROP, "error-drop")                        \
+  _ (ICMP4_ERROR, "ip4-icmp-error")             \
+  _ (ICMP6_ERROR, "ip6-icmp-error")
+
+typedef enum
+{
+#define _(s,n) UDP_INPUT_NEXT_##s,
+  foreach_udp_input_next
+#undef _
+    UDP_INPUT_N_NEXT,
+} udp_input_next_t;
+
+typedef struct
+{
+  u16 src_port;
+  u16 dst_port;
+  u8 bound;
+} udp_rx_trace_t;
+
+u8 *
+format_udp_rx_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *);
+
+  s = format (s, "UDP: src-port %d dst-port %d%s",
+	      clib_net_to_host_u16 (t->src_port),
+	      clib_net_to_host_u16 (t->dst_port),
+	      t->bound ? "" : " (no listener)");
+  return s;
+}
+
+typedef struct
+{
+  /* Sparse vector mapping udp dst_port in network byte order
+     to next index. */
+  u16 *next_by_dst_port;
+  u8 punt_unknown;
+} udp_input_runtime_t;
+
+vlib_node_registration_t udp4_input_node;
+vlib_node_registration_t udp6_input_node;
+
+always_inline uword
+udp46_input_inline (vlib_main_t * vm,
+		    vlib_node_runtime_t * node,
+		    vlib_frame_t * from_frame, int is_ip4)
+{
+  udp_input_runtime_t *rt = is_ip4 ?
+    (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index)
+    : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index);
+  __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next;
+  word n_no_listener = 0;
+  u8 punt_unknown = rt->punt_unknown;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+	{
+	  u32 bi0, bi1;
+	  vlib_buffer_t *b0, *b1;
+	  udp_header_t *h0 = 0, *h1 = 0;
+	  u32 i0, i1, dst_port0, dst_port1;
+	  u32 advance0, advance1;
+	  u32 error0, next0, error1, next1;
+
+	  /* Prefetch next iteration. */
+	  {
+	    vlib_buffer_t *p2, *p3;
+
+	    p2 = vlib_get_buffer (vm, from[2]);
+	    p3 = vlib_get_buffer (vm, from[3]);
+
+	    vlib_prefetch_buffer_header (p2, LOAD);
+	    vlib_prefetch_buffer_header (p3, LOAD);
+
+	    CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD);
+	    CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD);
+	  }
+
+	  bi0 = from[0];
+	  bi1 = from[1];
+	  to_next[0] = bi0;
+	  to_next[1] = bi1;
+	  from += 2;
+	  to_next += 2;
+	  n_left_to_next -= 2;
+	  n_left_from -= 2;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+
+	  /* ip4/6_local hands us the ip header, not the udp header */
+	  if (is_ip4)
+	    {
+	      advance0 = sizeof (ip4_header_t);
+	      advance1 = sizeof (ip4_header_t);
+	    }
+	  else
+	    {
+	      advance0 = sizeof (ip6_header_t);
+	      advance1 = sizeof (ip6_header_t);
+	    }
+
+	  if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
+	    {
+	      error0 = UDP_ERROR_LENGTH_ERROR;
+	      next0 = UDP_INPUT_NEXT_DROP;
+	    }
+	  else
+	    {
+	      vlib_buffer_advance (b0, advance0);
+	      h0 = vlib_buffer_get_current (b0);
+	      error0 = next0 = 0;
+	      if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) >
+				 vlib_buffer_length_in_chain (vm, b0)))
+		{
+		  error0 = UDP_ERROR_LENGTH_ERROR;
+		  next0 = UDP_INPUT_NEXT_DROP;
+		}
+	    }
+
+	  if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1)))
+	    {
+	      error1 = UDP_ERROR_LENGTH_ERROR;
+	      next1 = UDP_INPUT_NEXT_DROP;
+	    }
+	  else
+	    {
+	      vlib_buffer_advance (b1, advance1);
+	      h1 = vlib_buffer_get_current (b1);
+	      error1 = next1 = 0;
+	      if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) >
+				 vlib_buffer_length_in_chain (vm, b1)))
+		{
+		  error1 = UDP_ERROR_LENGTH_ERROR;
+		  next1 = UDP_INPUT_NEXT_DROP;
+		}
+	    }
+
+	  /* Index sparse array with network byte order. */
+	  dst_port0 = (error0 == 0) ? h0->dst_port : 0;
+	  dst_port1 = (error1 == 0) ? h1->dst_port : 0;
+	  sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1,
+			     &i0, &i1);
+	  next0 = (error0 == 0) ? vec_elt (rt->next_by_dst_port, i0) : next0;
+	  next1 = (error1 == 0) ? vec_elt (rt->next_by_dst_port, i1) : next1;
+
+	  if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX))
+	    {
+	      // move the pointer back so icmp-error can find the
+	      // ip packet header
+	      vlib_buffer_advance (b0, -(word) advance0);
+
+	      if (PREDICT_FALSE (punt_unknown))
+		{
+		  b0->error = node->errors[UDP_ERROR_PUNT];
+		  next0 = UDP_INPUT_NEXT_PUNT;
+		}
+	      else if (is_ip4)
+		{
+		  icmp4_error_set_vnet_buffer (b0,
+					       ICMP4_destination_unreachable,
+					       ICMP4_destination_unreachable_port_unreachable,
+					       0);
+		  next0 = UDP_INPUT_NEXT_ICMP4_ERROR;
+		  n_no_listener++;
+		}
+	      else
+		{
+		  icmp6_error_set_vnet_buffer (b0,
+					       ICMP6_destination_unreachable,
+					       ICMP6_destination_unreachable_port_unreachable,
+					       0);
+		  next0 = UDP_INPUT_NEXT_ICMP6_ERROR;
+		  n_no_listener++;
+		}
+	    }
+	  else
+	    {
+	      b0->error = node->errors[UDP_ERROR_NONE];
+	      // advance to the payload
+	      vlib_buffer_advance (b0, sizeof (*h0));
+	    }
+
+	  if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX))
+	    {
+	      // move the pointer back so icmp-error can find the
+	      // ip packet header
+	      vlib_buffer_advance (b1, -(word) advance1);
+
+	      if (PREDICT_FALSE (punt_unknown))
+		{
+		  b1->error = node->errors[UDP_ERROR_PUNT];
+		  next1 = UDP_INPUT_NEXT_PUNT;
+		}
+	      else if (is_ip4)
+		{
+		  icmp4_error_set_vnet_buffer (b1,
+					       ICMP4_destination_unreachable,
+					       ICMP4_destination_unreachable_port_unreachable,
+					       0);
+		  next1 = UDP_INPUT_NEXT_ICMP4_ERROR;
+		  n_no_listener++;
+		}
+	      else
+		{
+		  icmp6_error_set_vnet_buffer (b1,
+					       ICMP6_destination_unreachable,
+					       ICMP6_destination_unreachable_port_unreachable,
+					       0);
+		  next1 = UDP_INPUT_NEXT_ICMP6_ERROR;
+		  n_no_listener++;
+		}
+	    }
+	  else
+	    {
+	      b1->error = node->errors[UDP_ERROR_NONE];
+	      // advance to the payload
+	      vlib_buffer_advance (b1, sizeof (*h1));
+	    }
+
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+						   b0, sizeof (*tr));
+	      if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+		{
+		  tr->src_port = h0 ? h0->src_port : 0;
+		  tr->dst_port = h0 ? h0->dst_port : 0;
+		  tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR &&
+			       next0 != UDP_INPUT_NEXT_ICMP6_ERROR);
+		}
+	    }
+	  if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+						   b1, sizeof (*tr));
+	      if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+		{
+		  tr->src_port = h1 ? h1->src_port : 0;
+		  tr->dst_port = h1 ? h1->dst_port : 0;
+		  tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR &&
+			       next1 != UDP_INPUT_NEXT_ICMP6_ERROR);
+		}
+	    }
+
+	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, bi1, next0, next1);
+	}
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  u32 bi0;
+	  vlib_buffer_t *b0;
+	  udp_header_t *h0 = 0;
+	  u32 i0, next0;
+	  u32 advance0;
+
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+
+	  /* ip4/6_local hands us the ip header, not the udp header */
+	  if (is_ip4)
+	    advance0 = sizeof (ip4_header_t);
+	  else
+	    advance0 = sizeof (ip6_header_t);
+
+	  if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
+	    {
+	      b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
+	      next0 = UDP_INPUT_NEXT_DROP;
+	      goto trace_x1;
+	    }
+
+	  vlib_buffer_advance (b0, advance0);
+
+	  h0 = vlib_buffer_get_current (b0);
+
+	  if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <=
+			    vlib_buffer_length_in_chain (vm, b0)))
+	    {
+	      i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port);
+	      next0 = vec_elt (rt->next_by_dst_port, i0);
+
+	      if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX))
+		{
+		  // move the pointer back so icmp-error can find the
+		  // ip packet header
+		  vlib_buffer_advance (b0, -(word) advance0);
+
+		  if (PREDICT_FALSE (punt_unknown))
+		    {
+		      b0->error = node->errors[UDP_ERROR_PUNT];
+		      next0 = UDP_INPUT_NEXT_PUNT;
+		    }
+		  else if (is_ip4)
+		    {
+		      icmp4_error_set_vnet_buffer (b0,
+						   ICMP4_destination_unreachable,
+						   ICMP4_destination_unreachable_port_unreachable,
+						   0);
+		      next0 = UDP_INPUT_NEXT_ICMP4_ERROR;
+		      n_no_listener++;
+		    }
+		  else
+		    {
+		      icmp6_error_set_vnet_buffer (b0,
+						   ICMP6_destination_unreachable,
+						   ICMP6_destination_unreachable_port_unreachable,
+						   0);
+		      next0 = UDP_INPUT_NEXT_ICMP6_ERROR;
+		      n_no_listener++;
+		    }
+		}
+	      else
+		{
+		  b0->error = node->errors[UDP_ERROR_NONE];
+		  // advance to the payload
+		  vlib_buffer_advance (b0, sizeof (*h0));
+		}
+	    }
+	  else
+	    {
+	      b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
+	      next0 = UDP_INPUT_NEXT_DROP;
+	    }
+
+	trace_x1:
+	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	    {
+	      udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+						   b0, sizeof (*tr));
+	      if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+		{
+		  tr->src_port = h0->src_port;
+		  tr->dst_port = h0->dst_port;
+		  tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR &&
+			       next0 != UDP_INPUT_NEXT_ICMP6_ERROR);
+		}
+	    }
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, next0);
+	}
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+  vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER,
+		    n_no_listener);
+  return from_frame->n_vectors;
+}
+
+static char *udp_error_strings[] = {
+#define udp_error(n,s) s,
+#include "udp_error.def"
+#undef udp_error
+};
+
+static uword
+udp4_input (vlib_main_t * vm,
+	    vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+  return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+udp6_input (vlib_main_t * vm,
+	    vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+  return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_input_node) = {
+  .function = udp4_input,
+  .name = "ip4-udp-lookup",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+
+  .runtime_data_bytes = sizeof (udp_input_runtime_t),
+
+  .n_errors = UDP_N_ERROR,
+  .error_strings = udp_error_strings,
+
+  .n_next_nodes = UDP_INPUT_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
+    foreach_udp_input_next
+#undef _
+  },
+
+  .format_buffer = format_udp_header,
+  .format_trace = format_udp_rx_trace,
+  .unformat_buffer = unformat_udp_header,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp6_input_node) = {
+  .function = udp6_input,
+  .name = "ip6-udp-lookup",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+
+  .runtime_data_bytes = sizeof (udp_input_runtime_t),
+
+  .n_errors = UDP_N_ERROR,
+  .error_strings = udp_error_strings,
+
+  .n_next_nodes = UDP_INPUT_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
+    foreach_udp_input_next
+#undef _
+  },
+
+  .format_buffer = format_udp_header,
+  .format_trace = format_udp_rx_trace,
+  .unformat_buffer = unformat_udp_header,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input);
+
+static void
+add_dst_port (udp_main_t * um,
+	      udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4)
+{
+  udp_dst_port_info_t *pi;
+  u32 i;
+
+  vec_add2 (um->dst_port_infos[is_ip4], pi, 1);
+  i = pi - um->dst_port_infos[is_ip4];
+
+  pi->name = dst_port_name;
+  pi->dst_port = dst_port;
+  pi->next_index = pi->node_index = ~0;
+
+  hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i);
+
+  if (pi->name)
+    hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i);
+}
+
+void
+udp_register_dst_port (vlib_main_t * vm,
+		       udp_dst_port_t dst_port, u32 node_index, u8 is_ip4)
+{
+  udp_main_t *um = &udp_main;
+  udp_dst_port_info_t *pi;
+  udp_input_runtime_t *rt;
+  u16 *n;
+
+  {
+    clib_error_t *error = vlib_call_init_function (vm, udp_local_init);
+    if (error)
+      clib_error_report (error);
+  }
+
+  pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+  if (!pi)
+    {
+      add_dst_port (um, dst_port, 0, is_ip4);
+      pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+      ASSERT (pi);
+    }
+
+  pi->node_index = node_index;
+  pi->next_index = vlib_node_add_next (vm,
+				       is_ip4 ? udp4_input_node.index
+				       : udp6_input_node.index, node_index);
+
+  /* Setup udp protocol -> next index sparse vector mapping. */
+  rt = vlib_node_get_runtime_data
+    (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
+  n = sparse_vec_validate (rt->next_by_dst_port,
+			   clib_host_to_net_u16 (dst_port));
+  n[0] = pi->next_index;
+}
+
+void
+udp_unregister_dst_port (vlib_main_t * vm, udp_dst_port_t dst_port, u8 is_ip4)
+{
+  udp_main_t *um = &udp_main;
+  udp_dst_port_info_t *pi;
+  udp_input_runtime_t *rt;
+  u16 *n;
+
+  pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+  /* Not registered? Fagedaboudit */
+  if (!pi)
+    return;
+
+  /* Kill the mapping. Don't bother killing the pi, it may be back. */
+  rt = vlib_node_get_runtime_data
+    (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
+  n = sparse_vec_validate (rt->next_by_dst_port,
+			   clib_host_to_net_u16 (dst_port));
+  n[0] = SPARSE_VEC_INVALID_INDEX;
+}
+
+void
+udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add)
+{
+  udp_input_runtime_t *rt;
+
+  {
+    clib_error_t *error = vlib_call_init_function (vm, udp_local_init);
+    if (error)
+      clib_error_report (error);
+  }
+
+  rt = vlib_node_get_runtime_data
+    (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
+
+  rt->punt_unknown = is_add;
+}
+
+/* Parse a UDP header. */
+uword
+unformat_udp_header (unformat_input_t * input, va_list * args)
+{
+  u8 **result = va_arg (*args, u8 **);
+  udp_header_t *udp;
+  __attribute__ ((unused)) int old_length;
+  u16 src_port, dst_port;
+
+  /* Allocate space for IP header. */
+  {
+    void *p;
+
+    old_length = vec_len (*result);
+    vec_add2 (*result, p, sizeof (ip4_header_t));
+    udp = p;
+  }
+
+  memset (udp, 0, sizeof (udp[0]));
+  if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port))
+    {
+      udp->src_port = clib_host_to_net_u16 (src_port);
+      udp->dst_port = clib_host_to_net_u16 (dst_port);
+      return 1;
+    }
+  return 0;
+}
+
+static void
+udp_setup_node (vlib_main_t * vm, u32 node_index)
+{
+  vlib_node_t *n = vlib_get_node (vm, node_index);
+  pg_node_t *pn = pg_get_node (node_index);
+
+  n->format_buffer = format_udp_header;
+  n->unformat_buffer = unformat_udp_header;
+  pn->unformat_edit = unformat_pg_udp_header;
+}
+
+clib_error_t *
+udp_local_init (vlib_main_t * vm)
+{
+  udp_input_runtime_t *rt;
+  udp_main_t *um = &udp_main;
+  int i;
+
+  {
+    clib_error_t *error;
+    error = vlib_call_init_function (vm, udp_init);
+    if (error)
+      clib_error_report (error);
+  }
+
+
+  for (i = 0; i < 2; i++)
+    {
+      um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword));
+      um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword));
+    }
+
+  udp_setup_node (vm, udp4_input_node.index);
+  udp_setup_node (vm, udp6_input_node.index);
+
+  rt = vlib_node_get_runtime_data (vm, udp4_input_node.index);
+
+  rt->next_by_dst_port = sparse_vec_new
+    ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
+     /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+  rt->punt_unknown = 0;
+
+#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */);
+  foreach_udp4_dst_port
+#undef _
+    rt = vlib_node_get_runtime_data (vm, udp6_input_node.index);
+
+  rt->next_by_dst_port = sparse_vec_new
+    ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
+     /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+  rt->punt_unknown = 0;
+
+#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */);
+  foreach_udp6_dst_port
+#undef _
+    ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index);
+  /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (udp_local_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp_packet.h b/src/vnet/udp/udp_packet.h
new file mode 100644
index 00000000..beea3059
--- /dev/null
+++ b/src/vnet/udp/udp_packet.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/udp_packet.h: UDP packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_udp_packet_h
+#define included_udp_packet_h
+
+typedef struct
+{
+  /* Source and destination port. */
+  u16 src_port, dst_port;
+
+  /* Length of UDP header plus payload. */
+  u16 length;
+
+  /* Checksum of UDP pseudo-header and data or
+     zero if checksum is disabled. */
+  u16 checksum;
+} udp_header_t;
+
+#endif /* included_udp_packet_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/udp/udp_pg.c b/src/vnet/udp/udp_pg.c
new file mode 100644
index 00000000..c9d8d38c
--- /dev/null
+++ b/src/vnet/udp/udp_pg.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_pg: UDP packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>		/* for unformat_udp_udp_port */
+
+#define UDP_PG_EDIT_LENGTH (1 << 0)
+#define UDP_PG_EDIT_CHECKSUM (1 << 1)
+
+always_inline void
+udp_pg_edit_function_inline (pg_main_t * pg,
+			     pg_stream_t * s,
+			     pg_edit_group_t * g,
+			     u32 * packets, u32 n_packets, u32 flags)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  u32 ip_offset, udp_offset;
+
+  udp_offset = g->start_byte_offset;
+  ip_offset = (g - 1)->start_byte_offset;
+
+  while (n_packets >= 1)
+    {
+      vlib_buffer_t *p0;
+      ip4_header_t *ip0;
+      udp_header_t *udp0;
+      u32 udp_len0;
+
+      p0 = vlib_get_buffer (vm, packets[0]);
+      n_packets -= 1;
+      packets += 1;
+
+      ip0 = (void *) (p0->data + ip_offset);
+      udp0 = (void *) (p0->data + udp_offset);
+      udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]);
+
+      if (flags & UDP_PG_EDIT_LENGTH)
+	udp0->length =
+	  clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0)
+				- ip_offset);
+
+      /* Initialize checksum with header. */
+      if (flags & UDP_PG_EDIT_CHECKSUM)
+	{
+	  ip_csum_t sum0;
+
+	  sum0 = clib_mem_unaligned (&ip0->src_address, u64);
+
+	  sum0 = ip_csum_with_carry
+	    (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16)));
+
+	  /* Invalidate possibly old checksum. */
+	  udp0->checksum = 0;
+
+	  sum0 =
+	    ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0,
+					    sum0);
+
+	  sum0 = ~ip_csum_fold (sum0);
+
+	  /* Zero checksum means checksumming disabled. */
+	  sum0 = sum0 != 0 ? sum0 : 0xffff;
+
+	  udp0->checksum = sum0;
+	}
+    }
+}
+
+static void
+udp_pg_edit_function (pg_main_t * pg,
+		      pg_stream_t * s,
+		      pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+  switch (g->edit_function_opaque)
+    {
+    case UDP_PG_EDIT_LENGTH:
+      udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+				   UDP_PG_EDIT_LENGTH);
+      break;
+
+    case UDP_PG_EDIT_CHECKSUM:
+      udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+				   UDP_PG_EDIT_CHECKSUM);
+      break;
+
+    case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH:
+      udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+				   UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH);
+      break;
+
+    default:
+      ASSERT (0);
+      break;
+    }
+}
+
+typedef struct
+{
+  pg_edit_t src_port, dst_port;
+  pg_edit_t length;
+  pg_edit_t checksum;
+} pg_udp_header_t;
+
+static inline void
+pg_udp_header_init (pg_udp_header_t * p)
+{
+  /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, udp_header_t, f);
+  _(src_port);
+  _(dst_port);
+  _(length);
+  _(checksum);
+#undef _
+}
+
+uword
+unformat_pg_udp_header (unformat_input_t * input, va_list * args)
+{
+  pg_stream_t *s = va_arg (*args, pg_stream_t *);
+  pg_udp_header_t *p;
+  u32 group_index;
+
+  p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t),
+			    &group_index);
+  pg_udp_header_init (p);
+
+  /* Defaults. */
+  p->checksum.type = PG_EDIT_UNSPECIFIED;
+  p->length.type = PG_EDIT_UNSPECIFIED;
+
+  if (!unformat (input, "UDP: %U -> %U",
+		 unformat_pg_edit,
+		 unformat_tcp_udp_port, &p->src_port,
+		 unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port))
+    goto error;
+
+  /* Parse options. */
+  while (1)
+    {
+      if (unformat (input, "length %U",
+		    unformat_pg_edit, unformat_pg_number, &p->length))
+	;
+
+      else if (unformat (input, "checksum %U",
+			 unformat_pg_edit, unformat_pg_number, &p->checksum))
+	;
+
+      /* Can't parse input: try next protocol level. */
+      else
+	break;
+    }
+
+  {
+    ip_main_t *im = &ip_main;
+    u16 dst_port;
+    tcp_udp_port_info_t *pi;
+
+    pi = 0;
+    if (p->dst_port.type == PG_EDIT_FIXED)
+      {
+	dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO);
+	pi = ip_get_tcp_udp_port_info (im, dst_port);
+      }
+
+    if (pi && pi->unformat_pg_edit
+	&& unformat_user (input, pi->unformat_pg_edit, s))
+      ;
+
+    else if (!unformat_user (input, unformat_pg_payload, s))
+      goto error;
+
+    p = pg_get_edit_group (s, group_index);
+    if (p->checksum.type == PG_EDIT_UNSPECIFIED
+	|| p->length.type == PG_EDIT_UNSPECIFIED)
+      {
+	pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+	g->edit_function = udp_pg_edit_function;
+	g->edit_function_opaque = 0;
+	if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+	  g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM;
+	if (p->length.type == PG_EDIT_UNSPECIFIED)
+	  g->edit_function_opaque |= UDP_PG_EDIT_LENGTH;
+      }
+
+    return 1;
+  }
+
+error:
+  /* Free up any edits we may have added. */
+  pg_free_edit_group (s);
+  return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h
index 142acedc..c4075db6 100644
--- a/src/vnet/vnet_all_api_h.h
+++ b/src/vnet/vnet_all_api_h.h
@@ -51,6 +51,7 @@
 #include <vnet/lisp-cp/lisp.api.h>
 #include <vnet/lisp-gpe/lisp_gpe.api.h>
 #include <vnet/lisp-cp/one.api.h>
+#include <vnet/session/session.api.h>
 #include <vnet/mpls/mpls.api.h>
 #include <vnet/sr/sr.api.h>
 #include <vnet/classify/classify.api.h>
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h
index 1b4bc44e..e768d230 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.h
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.h
@@ -29,7 +29,7 @@
 #include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 
 /**
  * @brief VXLAN GPE header struct
diff --git a/src/vnet/vxlan/vxlan.h b/src/vnet/vxlan/vxlan.h
index adfa3a8e..dca1cd12 100644
--- a/src/vnet/vxlan/vxlan.h
+++ b/src/vnet/vxlan/vxlan.h
@@ -26,7 +26,7 @@
 #include <vnet/vxlan/vxlan_packet.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/udp.h>
+#include <vnet/udp/udp.h>
 #include <vnet/dpo/dpo.h>
 #include <vnet/adj/adj_types.h>
 
diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api
index 24f48293..2d6e4f37 100644
--- a/src/vpp/api/vpe.api
+++ b/src/vpp/api/vpe.api
@@ -38,6 +38,7 @@
  * IPSEC-GRE APIs: see .../src/vnet/ipsec-gre/{ipsec_gre.api, ipsec_gre_api.c}
  * LISP APIs: see .../src/vnet/lisp/{lisp.api, lisp_api.c}
  * LISP-GPE APIs: see .../src/vnet/lisp-gpe/{lisp_gpe.api, lisp_gpe_api.c}
+ * SESSION APIs: .../vnet/session/{session.api session_api.c}
  * MPLS APIs: see .../src/vnet/mpls/{mpls.api, mpls_api.c}
  * SR APIs: see .../src/vnet/sr/{sr.api, sr_api.c}
  * DPDK APIs: see ... /src/vnet/devices/dpdk/{dpdk.api, dpdk_api.c}
diff --git a/src/vppinfra.am b/src/vppinfra.am
index 8d375958..4b9f0c29 100644
--- a/src/vppinfra.am
+++ b/src/vppinfra.am
@@ -157,7 +157,9 @@ nobase_include_HEADERS = \
   vppinfra/asm_mips.h \
   vppinfra/asm_x86.h \
   vppinfra/bihash_8_8.h \
+  vppinfra/bihash_16_8.h \
   vppinfra/bihash_24_8.h \
+  vppinfra/bihash_48_8.h \
   vppinfra/bihash_template.h \
   vppinfra/bihash_template.c \
   vppinfra/bitmap.h \
@@ -206,6 +208,7 @@ nobase_include_HEADERS = \
   vppinfra/timer.h \
   vppinfra/tw_timer_2t_1w_2048sl.h \
   vppinfra/tw_timer_16t_2w_512sl.h \
+  vppinfra/tw_timer_16t_1w_2048sl.h \
   vppinfra/tw_timer_template.h \
   vppinfra/tw_timer_template.c \
   vppinfra/types.h \
@@ -261,6 +264,8 @@ CLIB_CORE = \
   vppinfra/tw_timer_2t_1w_2048sl.c \
   vppinfra/tw_timer_16t_2w_512sl.h \
   vppinfra/tw_timer_16t_2w_512sl.c \
+  vppinfra/tw_timer_16t_1w_2048sl.h \
+  vppinfra/tw_timer_16t_1w_2048sl.c \
   vppinfra/unformat.c \
   vppinfra/vec.c \
   vppinfra/vector.c \
diff --git a/src/vppinfra/bihash_16_8.h b/src/vppinfra/bihash_16_8.h
new file mode 100644
index 00000000..ce80f70e
--- /dev/null
+++ b/src/vppinfra/bihash_16_8.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#undef BIHASH_TYPE
+
+#define BIHASH_TYPE _16_8
+#define BIHASH_KVP_PER_PAGE 4
+
+#ifndef __included_bihash_16_8_h__
+#define __included_bihash_16_8_h__
+
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+
+typedef struct
+{
+  u64 key[2];
+  u64 value;
+} clib_bihash_kv_16_8_t;
+
+static inline int
+clib_bihash_is_free_16_8 (clib_bihash_kv_16_8_t * v)
+{
+  /* Free values are memset to 0xff, check a bit... */
+  if (v->key[0] == ~0ULL && v->value == ~0ULL)
+    return 1;
+  return 0;
+}
+
+#if __SSE4_2__
+#ifndef __defined_crc_u32__
+#define __defined_crc_u32__
+static inline u32
+crc_u32 (u32 data, u32 value)
+{
+  __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data]
+		    "rm" (data));
+  return value;
+}
+#endif /* __defined_crc_u32__ */
+
+static inline u64
+clib_bihash_hash_16_8 (clib_bihash_kv_16_8_t * v)
+{
+  u32 *dp = (u32 *) & v->key[0];
+  u32 value = 0;
+
+  value = crc_u32 (dp[0], value);
+  value = crc_u32 (dp[1], value);
+  value = crc_u32 (dp[2], value);
+  value = crc_u32 (dp[3], value);
+
+  return value;
+}
+#else
+static inline u64
+clib_bihash_hash_16_8 (clib_bihash_kv_16_8_t * v)
+{
+  u64 tmp = v->key[0] ^ v->key[1];
+  return clib_xxhash (tmp);
+}
+#endif
+
+static inline u8 *
+format_bihash_kvp_16_8 (u8 * s, va_list * args)
+{
+  clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
+
+  s = format (s, "key %llu %llu value %llu", v->key[0], v->key[1], v->value);
+  return s;
+}
+
+static inline int
+clib_bihash_key_compare_16_8 (u64 * a, u64 * b)
+{
+  return ((a[0] ^ b[0]) | (a[1] ^ b[1])) == 0;
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_16_8_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h
new file mode 100644
index 00000000..1a6e7691
--- /dev/null
+++ b/src/vppinfra/bihash_48_8.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#undef BIHASH_TYPE
+
+#define BIHASH_TYPE _48_8
+#define BIHASH_KVP_PER_PAGE 4
+
+#ifndef __included_bihash_48_8_h__
+#define __included_bihash_48_8_h__
+
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+
+typedef struct
+{
+  u64 key[6];
+  u64 value;
+} clib_bihash_kv_48_8_t;
+
+static inline int
+clib_bihash_is_free_48_8 (const clib_bihash_kv_48_8_t * v)
+{
+  /* Free values are memset to 0xff, check a bit... */
+  if (v->key[0] == ~0ULL && v->value == ~0ULL)
+    return 1;
+  return 0;
+}
+
+#if __SSE4_2__
+#ifndef __defined_crc_u32__
+#define __defined_crc_u32__
+static inline u32
+crc_u32 (u32 data, u32 value)
+{
+  __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data]
+		    "rm" (data));
+  return value;
+}
+#endif /* __defined_crc_u32__ */
+
+static inline u64
+clib_bihash_hash_48_8 (const clib_bihash_kv_48_8_t * v)
+{
+  const u32 *dp = (const u32 *) &v->key[0];
+  u32 value = 0;
+
+  value = crc_u32 (dp[0], value);
+  value = crc_u32 (dp[1], value);
+  value = crc_u32 (dp[2], value);
+  value = crc_u32 (dp[3], value);
+  value = crc_u32 (dp[4], value);
+  value = crc_u32 (dp[5], value);
+  value = crc_u32 (dp[6], value);
+  value = crc_u32 (dp[7], value);
+  value = crc_u32 (dp[8], value);
+  value = crc_u32 (dp[9], value);
+  value = crc_u32 (dp[10], value);
+  value = crc_u32 (dp[11], value);
+
+  return value;
+}
+#else
+static inline u64
+clib_bihash_hash_48_8 (const clib_bihash_kv_48_8_t * v)
+{
+  u64 tmp = v->key[0] ^ v->key[1] ^ v->key[2] ^ v->key[3] ^ v->key[4]
+    ^ v->key[5];
+  return clib_xxhash (tmp);
+}
+#endif
+
+static inline u8 *
+format_bihash_kvp_48_8 (u8 * s, va_list * args)
+{
+  clib_bihash_kv_48_8_t *v = va_arg (*args, clib_bihash_kv_48_8_t *);
+
+  s = format (s, "key %llu %llu %llu %llu %llu %llu value %llu", v->key[0],
+	      v->key[1], v->key[2], v->key[3], v->key[4], v->key[5],
+	      v->value);
+  return s;
+}
+
+static inline int
+clib_bihash_key_compare_48_8 (const u64 * a, const u64 * b)
+{
+  return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])
+	  | (a[4] ^ b[4]) | (a[5] ^ b[5])) == 0;
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_48_8_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.c b/src/vppinfra/tw_timer_16t_1w_2048sl.c
new file mode 100644
index 00000000..3f342045
--- /dev/null
+++ b/src/vppinfra/tw_timer_16t_1w_2048sl.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/error.h>
+#include "tw_timer_16t_1w_2048sl.h"
+#include "tw_timer_template.c"
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.h b/src/vppinfra/tw_timer_16t_1w_2048sl.h
new file mode 100644
index 00000000..685ac31e
--- /dev/null
+++ b/src/vppinfra/tw_timer_16t_1w_2048sl.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_tw_timer_16t_2w_512sl_h__
+#define __included_tw_timer_16t_2w_512sl_h__
+
+/* ... So that a client app can create multiple wheel geometries */
+#undef TW_TIMER_WHEELS
+#undef TW_SLOTS_PER_RING
+#undef TW_RING_SHIFT
+#undef TW_RING_MASK
+#undef TW_TIMERS_PER_OBJECT
+#undef LOG2_TW_TIMERS_PER_OBJECT
+#undef TW_SUFFIX
+
+#define TW_TIMER_WHEELS 1
+#define TW_SLOTS_PER_RING 2048
+#define TW_RING_SHIFT 11
+#define TW_RING_MASK (TW_SLOTS_PER_RING -1)
+#define TW_TIMERS_PER_OBJECT 16
+#define LOG2_TW_TIMERS_PER_OBJECT 4
+#define TW_SUFFIX _16t_1w_2048sl
+
+#include <vppinfra/tw_timer_template.h>
+
+#endif /* __included_tw_timer_16t_2w_512sl_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
-- 
cgit 1.2.3-korg


From dfbee41b16541b51eb8f7f4d8a831ef9407fb419 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Thu, 2 Mar 2017 18:24:10 -0500
Subject: Improve api trace replay consistency checking

Change-Id: I2c4b9646d53e4c008ccbe6d09c6a683c776c1f60
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlibapi/api.h            |  3 +++
 src/vlibapi/api_shared.c     |  6 ++++++
 src/vlibmemory/memory_vlib.c | 34 ++++++++++++++++++++++++++++++++--
 3 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index b40ece15..2cbeb63c 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -189,6 +189,9 @@ typedef struct
 
   char *region_name;
   char *root_path;
+
+  /* Replay in progress? */
+  int replay_in_progress;
 } api_main_t;
 
 extern api_main_t api_main;
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index 79921afe..69ba10c1 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -890,6 +890,9 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
       msg += size;
     }
 
+  if (which == REPLAY)
+    am->replay_in_progress = 1;
+
   for (; i <= last_index; i++)
     {
       trace_cfg_t *cfgp;
@@ -914,6 +917,7 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
 	  vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
 	  munmap (hp, file_size);
 	  vec_free (tmpbuf);
+	  am->replay_in_progress = 0;
 	  return;
 	}
 
@@ -937,6 +941,7 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
 	      vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id);
 	      munmap (hp, file_size);
 	      vec_free (tmpbuf);
+	      am->replay_in_progress = 0;
 	      return;
 	    }
 	  endian_fp = am->msg_endian_handlers[msg_id];
@@ -1038,6 +1043,7 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
 
   munmap (hp, file_size);
   vec_free (tmpbuf);
+  am->replay_in_progress = 0;
 }
 
 u8 *
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 231caa58..3a7415c0 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1249,6 +1249,8 @@ vl_api_show_plugin_command (vlib_main_t * vm,
   for (i = 0; i < vec_len (rp); i++)
     vlib_cli_output (vm, "%U", format_api_msg_range, rp + i);
 
+  vec_free (rp);
+
   return 0;
 }
 
@@ -1369,9 +1371,37 @@ vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
 static void
 vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp)
 {
-  /* Do nothing. We just want to trace the message */
-}
+  api_main_t *am = &api_main;
+  vl_api_msg_range_t *rp;
+  uword *p;
 
+  /* Noop (except for tracing) during normal operation */
+  if (am->replay_in_progress == 0)
+    return;
+
+  p = hash_get_mem (am->msg_range_by_name, mp->plugin_name);
+  if (p == 0)
+    {
+      clib_warning ("WARNING: traced plugin '%s' not in current image",
+		    mp->plugin_name);
+      return;
+    }
+
+  rp = vec_elt_at_index (am->msg_ranges, p[0]);
+  if (rp->first_msg_id != clib_net_to_host_u16 (mp->first_msg_id))
+    {
+      clib_warning ("WARNING: traced plugin '%s' first message id %d not %d",
+		    mp->plugin_name, clib_net_to_host_u16 (mp->first_msg_id),
+		    rp->first_msg_id);
+    }
+
+  if (rp->last_msg_id != clib_net_to_host_u16 (mp->last_msg_id))
+    {
+      clib_warning ("WARNING: traced plugin '%s' last message id %d not %d",
+		    mp->plugin_name, clib_net_to_host_u16 (mp->last_msg_id),
+		    rp->last_msg_id);
+    }
+}
 
 #define foreach_rpc_api_msg                     \
 _(RPC_CALL,rpc_call)                            \
-- 
cgit 1.2.3-korg


From dfc9b7cac857a3a49555f9fc448bd2c6aa3400a6 Mon Sep 17 00:00:00 2001
From: Ole Troan <ot@cisco.com>
Date: Mon, 6 Mar 2017 23:51:57 +0100
Subject: Python API: Synchronous mode.

Change-Id: Ic8f186dbb35bb4e2e191d311cab51315a88a2d81
Signed-off-by: Ole Troan <ot@cisco.com>
---
 src/vlibmemory/memclnt.api               |  17 +-
 src/vpp-api/python/Makefile.am           |   2 +-
 src/vpp-api/python/pneum/pneum.c         | 273 +++++++++++++++++++++++++++----
 src/vpp-api/python/pneum/pneum.h         |   4 +-
 src/vpp-api/python/vpp_papi/pneum_wrap.c |  39 ++++-
 src/vpp-api/python/vpp_papi/vpp_papi.py  | 194 ++++++++++------------
 6 files changed, 380 insertions(+), 149 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api
index 0532d7b6..c38b483c 100644
--- a/src/vlibmemory/memclnt.api
+++ b/src/vlibmemory/memclnt.api
@@ -48,14 +48,27 @@ define memclnt_delete_reply {
     u64 handle;               /* in case the client wonders */
 };
 
-/* 
+/*
  * Client RX thread exit
  */
-
 define rx_thread_exit {
     u8 dummy;
 };
 
+/*
+ * Client RX thread suspend
+ */
+define memclnt_rx_thread_suspend {
+    u8 dummy;
+};
+
+/*
+ * Client read timeout
+ */
+define memclnt_read_timeout {
+    u8 dummy;
+};
+
 /*
  * RPC
  */
diff --git a/src/vpp-api/python/Makefile.am b/src/vpp-api/python/Makefile.am
index cd8db4f6..54076822 100644
--- a/src/vpp-api/python/Makefile.am
+++ b/src/vpp-api/python/Makefile.am
@@ -39,7 +39,7 @@ libpneum_la_LDFLAGS = -module
 libpneum_la_CPPFLAGS =
 
 # TODO: Support both Python 2 and 3.
-install-exec-local:
+install-exec-local: $(lib_LTLIBRARIES)
 	cd $(srcdir);							\
 	mkdir -p $(pythondir);						\
 	mkdir -p $(pyexecdir); 						\
diff --git a/src/vpp-api/python/pneum/pneum.c b/src/vpp-api/python/pneum/pneum.c
index 37c8d8fe..da9d69df 100644
--- a/src/vpp-api/python/pneum/pneum.c
+++ b/src/vpp-api/python/pneum/pneum.c
@@ -22,9 +22,7 @@
 #include <netinet/in.h>
 #include <netdb.h>
 #include <signal.h>
-#include <setjmp.h>
 #include <stdbool.h>
-
 #include <vnet/vnet.h>
 #include <vlib/vlib.h>
 #include <vlib/unix/unix.h>
@@ -35,6 +33,16 @@
 
 #include "pneum.h"
 
+/*
+ * Asynchronous mode:
+ *  Client registers a callback. All messages are sent to the callback.
+ * Synchronous mode:
+ *  Client calls blocking read().
+ *  Clients are expected to collate events on a queue.
+ *  pneum_write() -> suspends RX thread
+ *  pneum_read() -> resumes RX thread
+ */
+
 #define vl_typedefs             /* define message structures */
 #include <vpp/api/vpe_all_api_h.h>
 #undef vl_typedefs
@@ -47,15 +55,50 @@ vlib_main_t vlib_global_main;
 vlib_main_t **vlib_mains;
 
 typedef struct {
-  u8 rx_thread_jmpbuf_valid;
   u8 connected_to_vlib;
-  jmp_buf rx_thread_jmpbuf;
   pthread_t rx_thread_handle;
+  pthread_t timeout_thread_handle;
+  pthread_mutex_t queue_lock;
+  pthread_cond_t suspend_cv;
+  pthread_cond_t resume_cv;
+  pthread_mutex_t timeout_lock;
+  pthread_cond_t timeout_cv;
+  pthread_cond_t timeout_cancel_cv;
+  pthread_cond_t terminate_cv;
 } pneum_main_t;
 
 pneum_main_t pneum_main;
-
 pneum_callback_t pneum_callback;
+u16 read_timeout = 0;
+bool rx_is_running = false;
+
+static void
+init (void)
+{
+  pneum_main_t *pm = &pneum_main;
+  memset(pm, 0, sizeof(*pm));
+  pthread_mutex_init(&pm->queue_lock, NULL);
+  pthread_cond_init(&pm->suspend_cv, NULL);
+  pthread_cond_init(&pm->resume_cv, NULL);
+  pthread_mutex_init(&pm->timeout_lock, NULL);
+  pthread_cond_init(&pm->timeout_cv, NULL);
+  pthread_cond_init(&pm->timeout_cancel_cv, NULL);
+  pthread_cond_init(&pm->terminate_cv, NULL);
+}
+
+static void
+cleanup (void)
+{
+  pneum_main_t *pm = &pneum_main;
+  pthread_cond_destroy(&pm->suspend_cv);
+  pthread_cond_destroy(&pm->resume_cv);
+  pthread_cond_destroy(&pm->timeout_cv);
+  pthread_cond_destroy(&pm->timeout_cancel_cv);
+  pthread_cond_destroy(&pm->terminate_cv);
+  pthread_mutex_destroy(&pm->queue_lock);
+  pthread_mutex_destroy(&pm->timeout_lock);
+  memset (pm, 0, sizeof (*pm));
+}
 
 /*
  * Satisfy external references when -lvlib is not available.
@@ -75,11 +118,6 @@ static void
 pneum_api_handler (void *msg)
 {
   u16 id = ntohs(*((u16 *)msg));
-  if (id == VL_API_RX_THREAD_EXIT) {
-    pneum_main_t *pm = &pneum_main;
-    vl_msg_api_free(msg);
-    longjmp(pm->rx_thread_jmpbuf, 1);
-  }
   msgbuf_t *msgbuf = (msgbuf_t *)(((u8 *)msg) - offsetof(msgbuf_t, data));
   int l = ntohl(msgbuf->data_len);
   if (l == 0)
@@ -101,16 +139,108 @@ pneum_rx_thread_fn (void *arg)
 
   q = am->vl_input_queue;
 
-  /* So we can make the rx thread terminate cleanly */
-  if (setjmp(pm->rx_thread_jmpbuf) == 0) {
-    pm->rx_thread_jmpbuf_valid = 1;
-    while (1)
-      while (!unix_shared_memory_queue_sub(q, (u8 *)&msg, 0))
-        pneum_api_handler((void *)msg);
-  }
+  while (1)
+    while (!unix_shared_memory_queue_sub(q, (u8 *)&msg, 0))
+      {
+	u16 id = ntohs(*((u16 *)msg));
+	switch (id) {
+	case VL_API_RX_THREAD_EXIT:
+	  vl_msg_api_free((void *) msg);
+	  /* signal waiting threads that this thread is about to terminate */
+	  pthread_mutex_lock(&pm->queue_lock);
+	  pthread_cond_signal(&pm->terminate_cv);
+	  pthread_mutex_unlock(&pm->queue_lock);
+	  pthread_exit(0);
+	  return 0;
+	  break;
+
+	case VL_API_MEMCLNT_RX_THREAD_SUSPEND:
+	  vl_msg_api_free((void * )msg);
+	  /* Suspend thread and signal reader */
+	  pthread_mutex_lock(&pm->queue_lock);
+	  pthread_cond_signal(&pm->suspend_cv);
+	  /* Wait for the resume signal */
+	  pthread_cond_wait (&pm->resume_cv, &pm->queue_lock);
+	  pthread_mutex_unlock(&pm->queue_lock);
+	  break;
+
+	case VL_API_MEMCLNT_READ_TIMEOUT:
+	  clib_warning("Received read timeout in async thread\n");
+	  vl_msg_api_free((void *) msg);
+	  break;
+
+	default:
+	  pneum_api_handler((void *)msg);
+	}
+      }
+}
+
+static void *
+pneum_timeout_thread_fn (void *arg)
+{
+  vl_api_memclnt_read_timeout_t *ep;
+  pneum_main_t *pm = &pneum_main;
+  api_main_t *am = &api_main;
+  struct timespec ts;
+  struct timeval tv;
+  u16 timeout;
+  int rv;
+
+  while (1)
+    {
+      /* Wait for poke */
+      pthread_mutex_lock(&pm->timeout_lock);
+      pthread_cond_wait (&pm->timeout_cv, &pm->timeout_lock);
+      timeout = read_timeout;
+      gettimeofday(&tv, NULL);
+      ts.tv_sec = tv.tv_sec + timeout;
+      ts.tv_nsec = 0;
+      rv = pthread_cond_timedwait (&pm->timeout_cancel_cv,
+				   &pm->timeout_lock, &ts);
+      pthread_mutex_unlock(&pm->timeout_lock);
+      if (rv == ETIMEDOUT)
+	{
+	  ep = vl_msg_api_alloc (sizeof (*ep));
+	  ep->_vl_msg_id = ntohs(VL_API_MEMCLNT_READ_TIMEOUT);
+	  vl_msg_api_send_shmem(am->vl_input_queue, (u8 *)&ep);
+	}
+    }
   pthread_exit(0);
 }
 
+void
+pneum_rx_suspend (void)
+{
+  api_main_t *am = &api_main;
+  pneum_main_t *pm = &pneum_main;
+  vl_api_memclnt_rx_thread_suspend_t *ep;
+
+  if (!pm->rx_thread_handle) return;
+  pthread_mutex_lock(&pm->queue_lock);
+  if (rx_is_running)
+    {
+      ep = vl_msg_api_alloc (sizeof (*ep));
+      ep->_vl_msg_id = ntohs(VL_API_MEMCLNT_RX_THREAD_SUSPEND);
+      vl_msg_api_send_shmem(am->vl_input_queue, (u8 *)&ep);
+      /* Wait for RX thread to tell us it has suspendend */
+      pthread_cond_wait(&pm->suspend_cv, &pm->queue_lock);
+      rx_is_running = false;
+    }
+  pthread_mutex_unlock(&pm->queue_lock);
+}
+
+void
+pneum_rx_resume (void)
+{
+  pneum_main_t *pm = &pneum_main;
+  if (!pm->rx_thread_handle) return;
+  pthread_mutex_lock(&pm->queue_lock);
+  if (rx_is_running) return;
+  pthread_cond_signal(&pm->resume_cv);
+  rx_is_running = true;
+  pthread_mutex_unlock(&pm->queue_lock);
+}
+
 uword *
 pneum_msg_table_get_hash (void)
 {
@@ -126,12 +256,13 @@ pneum_msg_table_size(void)
 }
 
 int
-pneum_connect (char * name, char * chroot_prefix, pneum_callback_t cb, 
+pneum_connect (char * name, char * chroot_prefix, pneum_callback_t cb,
                int rx_qlen)
 {
   int rv = 0;
   pneum_main_t *pm = &pneum_main;
 
+  init();
   if (chroot_prefix != NULL)
     vl_set_memory_root_path (chroot_prefix);
 
@@ -154,6 +285,16 @@ pneum_connect (char * name, char * chroot_prefix, pneum_callback_t cb,
       return (-1);
     }
     pneum_callback = cb;
+    rx_is_running = true;
+  }
+
+  /* Start read timeout thread */
+  rv = pthread_create(&pm->timeout_thread_handle, NULL,
+		      pneum_timeout_thread_fn, 0);
+  if (rv) {
+    clib_warning("pthread_create returned %d", rv);
+    vl_client_api_unmap();
+    return (-1);
   }
 
   pm->connected_to_vlib = 1;
@@ -167,31 +308,69 @@ pneum_disconnect (void)
   api_main_t *am = &api_main;
   pneum_main_t *pm = &pneum_main;
 
-  if (pm->rx_thread_jmpbuf_valid) {
+  if (!pm->connected_to_vlib) return 0;
+
+  if (pm->rx_thread_handle) {
     vl_api_rx_thread_exit_t *ep;
     uword junk;
     ep = vl_msg_api_alloc (sizeof (*ep));
     ep->_vl_msg_id = ntohs(VL_API_RX_THREAD_EXIT);
     vl_msg_api_send_shmem(am->vl_input_queue, (u8 *)&ep);
-    pthread_join(pm->rx_thread_handle, (void **) &junk);
-  }
-  if (pm->connected_to_vlib) {
-    vl_client_disconnect();
-    vl_client_api_unmap();
-    pneum_callback = 0;
+
+    /* wait (with timeout) until RX thread has finished */
+    struct timespec ts;
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    ts.tv_sec = tv.tv_sec + 5;
+    ts.tv_nsec = 0;
+    pthread_mutex_lock(&pm->queue_lock);
+    int rv = pthread_cond_timedwait(&pm->terminate_cv, &pm->queue_lock, &ts);
+    pthread_mutex_unlock(&pm->queue_lock);
+    /* now join so we wait until thread has -really- finished */
+    if (rv == ETIMEDOUT)
+      pthread_cancel(pm->rx_thread_handle);
+    else
+      pthread_join(pm->rx_thread_handle, (void **) &junk);
   }
-  memset (pm, 0, sizeof (*pm));
+  if (pm->timeout_thread_handle)
+    pthread_cancel(pm->timeout_thread_handle);
+
+  vl_client_disconnect();
+  vl_client_api_unmap();
+  pneum_callback = 0;
+
+  cleanup();
 
   return (0);
 }
 
+static void
+set_timeout (unsigned short timeout)
+{
+  pneum_main_t *pm = &pneum_main;
+  pthread_mutex_lock(&pm->timeout_lock);
+  read_timeout = timeout;
+  pthread_cond_signal(&pm->timeout_cv);
+  pthread_mutex_unlock(&pm->timeout_lock);
+}
+
+static void
+unset_timeout (void)
+{
+  pneum_main_t *pm = &pneum_main;
+  pthread_mutex_lock(&pm->timeout_lock);
+  pthread_cond_signal(&pm->timeout_cancel_cv);
+  pthread_mutex_unlock(&pm->timeout_lock);
+}
+
 int
-pneum_read (char **p, int *l)
+pneum_read (char **p, int *l, u16 timeout)
 {
   unix_shared_memory_queue_t *q;
   api_main_t *am = &api_main;
   pneum_main_t *pm = &pneum_main;
   uword msg;
+  msgbuf_t *msgbuf;
 
   if (!pm->connected_to_vlib) return -1;
 
@@ -199,21 +378,48 @@ pneum_read (char **p, int *l)
 
   if (am->our_pid == 0) return (-1);
 
+  /* Poke timeout thread */
+  if (timeout)
+    set_timeout(timeout);
+
   q = am->vl_input_queue;
   int rv = unix_shared_memory_queue_sub(q, (u8 *)&msg, 0);
   if (rv == 0) {
     u16 msg_id = ntohs(*((u16 *)msg));
-    msgbuf_t *msgbuf = (msgbuf_t *)(((u8 *)msg) - offsetof(msgbuf_t, data));
-    *l = ntohl(msgbuf->data_len);
-    if (*l == 0) {
-      printf("Unregistered API message: %d\n", msg_id);
-      return (-1);
+    switch (msg_id) {
+    case VL_API_RX_THREAD_EXIT:
+      printf("Received thread exit\n");
+      return -1;
+    case VL_API_MEMCLNT_RX_THREAD_SUSPEND:
+      printf("Received thread suspend\n");
+      goto error;
+    case VL_API_MEMCLNT_READ_TIMEOUT:
+      printf("Received read timeout %ds\n", timeout);
+      goto error;
+
+    default:
+      msgbuf = (msgbuf_t *)(((u8 *)msg) - offsetof(msgbuf_t, data));
+      *l = ntohl(msgbuf->data_len);
+      if (*l == 0) {
+	printf("Unregistered API message: %d\n", msg_id);
+	goto error;
+      }
     }
     *p = (char *)msg;
+
+    /* Let timeout notification thread know we're done */
+    unset_timeout();
+
   } else {
     printf("Read failed with %d\n", rv);
   }
   return (rv);
+
+ error:
+  vl_msg_api_free((void *) msg);
+  /* Client might forget to resume RX thread on failure */
+  pneum_rx_resume ();
+  return -1;
 }
 
 /*
@@ -241,12 +447,13 @@ pneum_write (char *p, int l)
 
   if (!pm->connected_to_vlib) return -1;
   if (!mp) return (-1);
+
   memcpy(mp, p, l);
   mp->client_index = pneum_client_index();
   q = am->shmem_hdr->vl_input_queue;
   rv = unix_shared_memory_queue_add(q, (u8 *)&mp, 0);
   if (rv != 0) {
-    printf("vpe_api_write fails: %d\n", rv);
+    clib_warning("vpe_api_write fails: %d\n", rv);
     /* Clear message */
     pneum_free(mp);
   }
diff --git a/src/vpp-api/python/pneum/pneum.h b/src/vpp-api/python/pneum/pneum.h
index 9312eb47..c4b55ae0 100644
--- a/src/vpp-api/python/pneum/pneum.h
+++ b/src/vpp-api/python/pneum/pneum.h
@@ -22,11 +22,13 @@ typedef void (*pneum_callback_t)(unsigned char * data, int len);
 int pneum_connect(char * name, char * chroot_prefix, pneum_callback_t cb,
     int rx_qlen);
 int pneum_disconnect(void);
-int pneum_read(char **data, int *l);
+int pneum_read(char **data, int *l, unsigned short timeout);
 int pneum_write(char *data, int len);
 void pneum_free(void * msg);
 uword * pneum_msg_table_get_hash (void);
 int pneum_msg_table_size(void);
 uint32_t pneum_get_msg_index(unsigned char * name);
+void pneum_rx_suspend (void);
+void pneum_rx_resume (void);
 
 #endif
diff --git a/src/vpp-api/python/vpp_papi/pneum_wrap.c b/src/vpp-api/python/vpp_papi/pneum_wrap.c
index 748b9674..c5a7eea1 100644
--- a/src/vpp-api/python/vpp_papi/pneum_wrap.c
+++ b/src/vpp-api/python/vpp_papi/pneum_wrap.c
@@ -42,19 +42,19 @@ wrap_pneum_callback (unsigned char * data, int len)
 }
 
 static PyObject *
-wrap_connect (PyObject *self, PyObject *args)
+wrap_connect (PyObject *self, PyObject *args, PyObject *kw)
 {
   char * name, * chroot_prefix = NULL;
-  int rx_qlen=32; /* default rx queue length */
+  int rx_qlen = 32; /* default rx queue length */
   int rv;
   PyObject * temp = NULL;
   pneum_callback_t cb = NULL;
 
-  if (!PyArg_ParseTuple(args, "s|Ois:wrap_connect",
-			&name, &temp, &rx_qlen, &chroot_prefix))
+  if (!PyArg_ParseTuple(args, "sOzi:wrap_connect",
+			&name, &temp, &chroot_prefix, &rx_qlen))
     return (NULL);
 
-  if (temp)
+  if (temp != Py_None)
     {
       if (!PyCallable_Check(temp))
 	{
@@ -82,6 +82,7 @@ wrap_disconnect (PyObject *self, PyObject *args)
   Py_END_ALLOW_THREADS
   return PyLong_FromLong(rv);
 }
+
 static PyObject *
 wrap_write (PyObject *self, PyObject *args)
 {
@@ -90,6 +91,7 @@ wrap_write (PyObject *self, PyObject *args)
 
   if (!PyArg_ParseTuple(args, "s#", &data, &len))
     return NULL;
+
   Py_BEGIN_ALLOW_THREADS
   rv = pneum_write(data, len);
   Py_END_ALLOW_THREADS
@@ -102,9 +104,12 @@ wrap_read (PyObject *self, PyObject *args)
 {
   char *data;
   int len, rv;
+  unsigned short timeout;
 
+  if (!PyArg_ParseTuple(args, "H", &timeout))
+    return (NULL);
   Py_BEGIN_ALLOW_THREADS
-  rv = pneum_read(&data, &len);
+  rv = pneum_read(&data, &len, timeout);
   Py_END_ALLOW_THREADS
 
   if (rv != 0) { Py_RETURN_NONE; }
@@ -113,9 +118,9 @@ wrap_read (PyObject *self, PyObject *args)
 #else
   PyObject *ret = Py_BuildValue("s#", data, len);
 #endif
+  pneum_free(data);
   if (!ret) { Py_RETURN_NONE; }
 
-  pneum_free(data);
   return ret;
 }
 
@@ -147,12 +152,32 @@ wrap_msg_table (PyObject *self, PyObject *args)
   Py_RETURN_NONE;
 }
 
+static PyObject *
+wrap_suspend (PyObject *self, PyObject *args)
+{
+  Py_BEGIN_ALLOW_THREADS
+  pneum_rx_suspend();
+  Py_END_ALLOW_THREADS
+  Py_RETURN_NONE;
+}
+
+static PyObject *
+wrap_resume (PyObject *self, PyObject *args)
+{
+  Py_BEGIN_ALLOW_THREADS
+  pneum_rx_resume();
+  Py_END_ALLOW_THREADS
+  Py_RETURN_NONE;
+}
+
 static PyMethodDef vpp_api_Methods[] = {
   {"connect", wrap_connect, METH_VARARGS, "Connect to the VPP API."},
   {"disconnect", wrap_disconnect, METH_VARARGS, "Disconnect from the VPP API."},
   {"write", wrap_write, METH_VARARGS, "Write data to the VPP API."},
   {"read", wrap_read, METH_VARARGS, "Read data from the VPP API."},
   {"msg_table", wrap_msg_table, METH_VARARGS, "Get API dictionary."},
+  {"suspend", wrap_suspend, METH_VARARGS, "Suspend RX thread."},
+  {"resume", wrap_resume, METH_VARARGS, "Resume RX thread."},
   {NULL, NULL, 0, NULL}        /* Sentinel */
 };
 
diff --git a/src/vpp-api/python/vpp_papi/vpp_papi.py b/src/vpp-api/python/vpp_papi/vpp_papi.py
index 83247ffa..0c40f171 100644
--- a/src/vpp-api/python/vpp_papi/vpp_papi.py
+++ b/src/vpp-api/python/vpp_papi/vpp_papi.py
@@ -16,7 +16,7 @@
 
 from __future__ import print_function
 import sys, os, logging, collections, struct, json, threading, glob
-import atexit
+import atexit, Queue
 
 logging.basicConfig(level=logging.DEBUG)
 import vpp_api
@@ -57,7 +57,7 @@ class VPP():
     provides a means to register a callback function to receive
     these messages in a background thread.
     """
-    def __init__(self, apifiles = None, testmode = False):
+    def __init__(self, apifiles = None, testmode = False, async_thread = True):
         """Create a VPP API object.
 
         apifiles is a list of files containing API
@@ -72,11 +72,15 @@ class VPP():
         self.buffersize = 10000
         self.connected = False
         self.header = struct.Struct('>HI')
-        self.results_lock = threading.Lock()
-        self.results = {}
-        self.timeout = 5
         self.apifiles = []
         self.event_callback = None
+        self.message_queue = Queue.Queue()
+        self.read_timeout = 0
+        self.vpp_api = vpp_api
+        if async_thread:
+            self.event_thread = threading.Thread(target=self.thread_msg_handler)
+            self.event_thread.daemon = True
+            self.event_thread.start()
 
         if not apifiles:
             # Pick up API definitions from default directory
@@ -346,7 +350,7 @@ class VPP():
                 f = self.make_function(name, i, msgdef, multipart, async)
                 setattr(self._api, name, FuncWrapper(f))
 
-                # olf API stuff starts here - will be removed in 17.07
+                # old API stuff starts here - will be removed in 17.07
                 if hasattr(self, name):
                     raise NameError(
                         3, "Conflicting name in JSON definition: `%s'" % name)
@@ -359,6 +363,12 @@ class VPP():
             raise IOError(1, 'Not connected')
         return vpp_api.write(str(buf))
 
+    def _read (self):
+        if not self.connected:
+            raise IOError(1, 'Not connected')
+
+        return vpp_api.read(self.read_timeout)
+
     def _load_dictionary(self):
         self.vpp_dictionary = {}
         self.vpp_dictionary_maxid = 0
@@ -372,6 +382,19 @@ class VPP():
             self.vpp_dictionary[name] = { 'id' : i, 'crc' : crc }
             self.vpp_dictionary_maxid = max(self.vpp_dictionary_maxid, i)
 
+    def connect_internal(self, name, msg_handler, chroot_prefix, rx_qlen, async):
+	rv = vpp_api.connect(name, msg_handler, chroot_prefix, rx_qlen)
+        if rv != 0:
+            raise IOError(2, 'Connect failed')
+        self.connected = True
+
+        self._load_dictionary()
+        self._register_functions(async=async)
+
+        # Initialise control ping
+        self.control_ping_index = self.vpp_dictionary['control_ping']['id']
+        self.control_ping_msgdef = self.messages['control_ping']
+
     def connect(self, name, chroot_prefix = None, async = False, rx_qlen = 32):
         """Attach to VPP.
 
@@ -381,22 +404,22 @@ class VPP():
         rx_qlen - the length of the VPP message receive queue between
         client and server.
         """
-        msg_handler = self.msg_handler_sync if not async else self.msg_handler_async
-	if chroot_prefix is not None:
-	    rv = vpp_api.connect(name, msg_handler, rx_qlen, chroot_prefix)
-        else:
-	    rv = vpp_api.connect(name, msg_handler, rx_qlen)
+        msg_handler = self.msg_handler_sync if not async \
+                      else self.msg_handler_async
+        return self.connect_internal(name, msg_handler, chroot_prefix, rx_qlen,
+                                     async)
 
-        if rv != 0:
-            raise IOError(2, 'Connect failed')
-        self.connected = True
+    def connect_sync (self, name, chroot_prefix = None, rx_qlen = 32):
+        """Attach to VPP in synchronous mode. Application must poll for events.
 
-        self._load_dictionary()
-        self._register_functions(async=async)
+        name - the name of the client.
+        chroot_prefix - if VPP is chroot'ed, the prefix of the jail
+        rx_qlen - the length of the VPP message receive queue between
+        client and server.
+        """
 
-        # Initialise control ping
-        self.control_ping_index = self.vpp_dictionary['control_ping']['id']
-        self.control_ping_msgdef = self.messages['control_ping']
+        return self.connect_internal(name, None, chroot_prefix, rx_qlen,
+                                     async=False)
 
     def disconnect(self):
         """Detach from VPP."""
@@ -404,56 +427,6 @@ class VPP():
         self.connected = False
         return rv
 
-    def results_wait(self, context):
-        """In a sync call, wait for the reply
-
-        The context ID is used to pair reply to request.
-        """
-
-        # Results is filled by the background callback.  It will
-        # raise the event when the context receives a response.
-        # Given there are two threads we have to be careful with the
-        # use of results and the structures under it, hence the lock.
-        with self.results_lock:
-            result = self.results[context]
-            ev = result['e']
-
-	timed_out = not ev.wait(self.timeout)
-
-	if timed_out:
-	   raise IOError(3, 'Waiting for reply timed out')
-	else:
-	    with self.results_lock:
-                result = self.results[context]
-		del self.results[context]
-		return result['r']
-
-    def results_prepare(self, context, multi=False):
-        """Prep for receiving a result in response to a request msg
-
-        context - unique context number sent in request and
-        returned in reply or replies
-        multi - true if we expect multiple messages from this
-        reply.
-        """
-
-        # The event is used to indicate that all results are in
-        new_result = {
-            'e': threading.Event(),
-        }
-        if multi:
-            # Make it clear to the BG thread it's going to see several
-            # messages; messages are stored in a results array
-            new_result['m'] = True
-            new_result['r'] = []
-
-        new_result['e'].clear()
-
-        # Put the prepped result structure into results, at which point
-        # the bg thread can also access it (hence the thread lock)
-        with self.results_lock:
-            self.results[context] = new_result
-
     def msg_handler_sync(self, msg):
         """Process an incoming message from VPP in sync mode.
 
@@ -473,32 +446,9 @@ class VPP():
 
         if context == 0:
             # No context -> async notification that we feed to the callback
-	    if self.event_callback:
-		self.event_callback(msgname, r)
+            self.message_queue.put_nowait(r)
         else:
-            # Context -> use the results structure (carefully) to find
-            # who we're responding to and return the message to that
-            # thread
-            with self.results_lock:
-                if context not in self.results:
-                    eprint('Not expecting results for this context', context, r)
-                else:
-                    result = self.results[context]
-
-                    #
-                    # Collect results until control ping
-                    #
-
-                    if msgname == 'control_ping_reply':
-                        # End of a multipart
-                        result['e'].set()
-                    elif 'm' in self.results[context]:
-                        # One element in a multipart
-                        result['r'].append(r)
-                    else:
-                        # All of a single result
-                        result['r'] = r
-                        result['e'].set()
+            raise IOError(2, 'RPC reply message received in event handler')
 
     def decode_incoming_msg(self, msg):
         if not msg:
@@ -556,16 +506,16 @@ class VPP():
         no response within the timeout window.
         """
 
-        # We need a context if not supplied, in order to get the
-        # response
-        context = kwargs.get('context', self.get_context())
-	kwargs['context'] = context
-
-        # Set up to receive a response
-        self.results_prepare(context, multi=multipart)
+        if not 'context' in kwargs:
+            context = self.get_context()
+            kwargs['context'] = context
+        else:
+            context = kwargs['context']
+        kwargs['_vl_msg_id'] = i
+        b = self.encode(msgdef, kwargs)
 
-        # Output the message
-        self._call_vpp_async(i, msgdef, **kwargs)
+        vpp_api.suspend()
+        self._write(b)
 
         if multipart:
             # Send a ping after the request - we use its response
@@ -573,9 +523,30 @@ class VPP():
             self._control_ping(context)
 
         # Block until we get a reply.
-        r = self.results_wait(context)
+        rl = []
+        while (True):
+            msg = self._read()
+            if not msg:
+                print('PNEUM ERROR: OH MY GOD')
+                raise IOError(2, 'PNEUM read failed')
+
+            r = self.decode_incoming_msg(msg)
+            msgname = type(r).__name__
+            if not context in r or r.context == 0 or context != r.context:
+                self.message_queue.put_nowait(r)
+                continue
 
-        return r
+            if not multipart:
+                rl = r
+                break
+            if msgname == 'control_ping_reply':
+                break
+
+            rl.append(r)
+
+        vpp_api.resume()
+
+        return rl
 
     def _call_vpp_async(self, i, msgdef, **kwargs):
         """Given a message, send the message and await a reply.
@@ -613,3 +584,16 @@ class VPP():
         callback.
         """
         self.event_callback = callback
+
+    def thread_msg_handler(self):
+        """Python thread calling the user registerd message handler.
+
+        This is to emulate the old style event callback scheme. Modern
+        clients should provide their own thread to poll the event
+        queue.
+        """
+        while True:
+            r = self.message_queue.get()
+            msgname = type(r).__name__
+	    if self.event_callback:
+	        self.event_callback(msgname, r)
-- 
cgit 1.2.3-korg


From 80f54e20270ed0628ee725e3e3c515731a0188f2 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Wed, 8 Mar 2017 19:08:56 -0500
Subject: vlib_mains == 0 special cases be gone

Clean up spurious binary API client link dependency on libvlib.so,
which managed to hide behind vlib_mains == 0 checks reached by
VLIB_xxx_FUNCTION macros.

Change-Id: I5df1f8ab07dca1944250e643ccf06e60a8462325
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/plugins/dpdk/ipsec/ipsec.c       |   8 +-
 src/vlib-api.am                      |   4 +-
 src/vlib/buffer.c                    |  27 +-
 src/vlib/global_funcs.h              |   2 +-
 src/vlib/node_cli.c                  |  28 +-
 src/vlib/node_funcs.h                |   4 +-
 src/vlib/threads.c                   |  16 +-
 src/vlib/threads.h                   |  43 ++-
 src/vlibapi/api.h                    |   4 +-
 src/vlibapi/api_shared.c             | 530 ++---------------------------------
 src/vlibapi/node_serialize.c         |  15 +-
 src/vlibmemory/memory_vlib.c         | 471 +++++++++++++++++++++++++++++++
 src/vnet/devices/virtio/vhost-user.c |   9 +-
 src/vpp-api-test.am                  |   2 -
 src/vpp/api/api.c                    |   1 -
 src/vpp/api/gmon.c                   |   9 +-
 16 files changed, 575 insertions(+), 598 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c
index 16bec20a..b0aaaaec 100644
--- a/src/plugins/dpdk/ipsec/ipsec.c
+++ b/src/plugins/dpdk/ipsec/ipsec.c
@@ -380,13 +380,9 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
   im->cb.check_support_cb = dpdk_ipsec_check_support;
   im->cb.add_del_sa_sess_cb = add_del_sa_sess;
 
-  if (vec_len (vlib_mains) == 0)
-    vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index,
+  for (i = 1; i < tm->n_vlib_mains; i++)
+    vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index,
 			 VLIB_NODE_STATE_POLLING);
-  else
-    for (i = 1; i < tm->n_vlib_mains; i++)
-      vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index,
-			   VLIB_NODE_STATE_POLLING);
 
   /* TODO cryptodev counters */
 
diff --git a/src/vlib-api.am b/src/vlib-api.am
index c05929b1..4e1dae99 100644
--- a/src/vlib-api.am
+++ b/src/vlib-api.am
@@ -14,7 +14,7 @@
 lib_LTLIBRARIES += libvlibmemory.la libvlibapi.la libvlibmemoryclient.la \
 	           libvlibsocket.la
 
-libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la libvlib.la
+libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la 
 libvlibmemory_la_LIBADD = $(libvlibmemory_la_DEPENDENCIES) -lpthread
 libvlibmemory_la_SOURCES =			\
 	vlibmemory/api.h			\
@@ -26,7 +26,7 @@ libvlibmemory_la_SOURCES =			\
 	vlibmemory/unix_shared_memory_queue.c	\
 	vlibmemory/unix_shared_memory_queue.h
 
-libvlibapi_la_DEPENDENCIES = libvppinfra.la libvlib.la libvlibmemory.la
+libvlibapi_la_DEPENDENCIES = libvppinfra.la 
 libvlibapi_la_LIBADD = $(libvlibapi_la_DEPENDENCIES)
 libvlibapi_la_SOURCES = 			\
 	vlibapi/api.h				\
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index 9f26bec7..6ba82584 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -261,7 +261,28 @@ done:
   return result;
 }
 
-vlib_main_t **vlib_mains;
+/*
+ * Hand-craft a static vector w/ length 1, so vec_len(vlib_mains) =1
+ * and vlib_mains[0] = &vlib_global_main from the beginning of time.
+ *
+ * The only place which should ever expand vlib_mains is start_workers()
+ * in threads.c. It knows about the bootstrap vector.
+ */
+/* *INDENT-OFF* */
+static struct
+{
+  vec_header_t h;
+  vlib_main_t *vm;
+} __attribute__ ((packed)) __bootstrap_vlib_main_vector
+  __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES))) =
+{
+  .h.len = 1,
+  .vm = &vlib_global_main,
+};
+/* *INDENT-ON* */
+
+vlib_main_t **vlib_mains = &__bootstrap_vlib_main_vector.vm;
+
 
 /* When dubugging validate that given buffers are either known allocated
    or known free. */
@@ -280,7 +301,7 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm,
   ASSERT (os_get_cpu_number () == 0);
 
   /* smp disaster check */
-  if (vlib_mains)
+  if (vec_len (vlib_mains) > 1)
     ASSERT (vm == vlib_mains[0]);
 
   is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED;
@@ -956,7 +977,7 @@ show_buffers (vlib_main_t * vm,
 
   do
     {
-      curr_vm = vec_len (vlib_mains) ? vlib_mains[vm_index] : vm;
+      curr_vm = vlib_mains[vm_index];
       bm = curr_vm->buffer_main;
 
     /* *INDENT-OFF* */
diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h
index bbdbdef5..f51ec381 100644
--- a/src/vlib/global_funcs.h
+++ b/src/vlib/global_funcs.h
@@ -23,7 +23,7 @@ always_inline vlib_main_t *
 vlib_get_main (void)
 {
   vlib_main_t *vm;
-  vm = vlib_mains ? vlib_mains[os_get_cpu_number ()] : &vlib_global_main;
+  vm = vlib_mains[os_get_cpu_number ()];
   ASSERT (vm);
   return vm;
 }
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index 05d0f0b5..62ab2e64 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -248,16 +248,11 @@ show_node_runtime (vlib_main_t * vm,
       if (unformat (input, "max") || unformat (input, "m"))
 	max = 1;
 
-      if (vec_len (vlib_mains) == 0)
-	vec_add1 (stat_vms, vm);
-      else
+      for (i = 0; i < vec_len (vlib_mains); i++)
 	{
-	  for (i = 0; i < vec_len (vlib_mains); i++)
-	    {
-	      stat_vm = vlib_mains[i];
-	      if (stat_vm)
-		vec_add1 (stat_vms, stat_vm);
-	    }
+	  stat_vm = vlib_mains[i];
+	  if (stat_vm)
+	    vec_add1 (stat_vms, stat_vm);
 	}
 
       /*
@@ -331,7 +326,7 @@ show_node_runtime (vlib_main_t * vm,
 		}
 	    }
 
-	  if (vec_len (vlib_mains))
+	  if (vec_len (vlib_mains) > 1)
 	    {
 	      vlib_worker_thread_t *w = vlib_worker_threads + j;
 	      if (j > 0)
@@ -404,16 +399,11 @@ clear_node_runtime (vlib_main_t * vm,
   vlib_main_t **stat_vms = 0, *stat_vm;
   vlib_node_runtime_t *r;
 
-  if (vec_len (vlib_mains) == 0)
-    vec_add1 (stat_vms, vm);
-  else
+  for (i = 0; i < vec_len (vlib_mains); i++)
     {
-      for (i = 0; i < vec_len (vlib_mains); i++)
-	{
-	  stat_vm = vlib_mains[i];
-	  if (stat_vm)
-	    vec_add1 (stat_vms, stat_vm);
-	}
+      stat_vm = vlib_mains[i];
+      if (stat_vm)
+	vec_add1 (stat_vms, stat_vm);
     }
 
   vlib_worker_thread_barrier_sync (vm);
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index f49a8d6f..8ccfc438 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -201,7 +201,7 @@ vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index)
   vlib_frame_t *f;
   u32 cpu_index = frame_index & VLIB_CPU_MASK;
   u32 offset = frame_index & VLIB_OFFSET_MASK;
-  vm = vlib_mains ? vlib_mains[cpu_index] : vm;
+  vm = vlib_mains[cpu_index];
   f = vm->heap_base + offset;
   return f;
 }
@@ -213,7 +213,7 @@ vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f)
 
   ASSERT (((uword) f & VLIB_CPU_MASK) == 0);
 
-  vm = vlib_mains ? vlib_mains[f->cpu_index] : vm;
+  vm = vlib_mains[f->cpu_index];
 
   i = ((u8 *) f - (u8 *) vm->heap_base);
   return i | f->cpu_index;
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index e3ea3c9c..4676be97 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -570,9 +570,13 @@ start_workers (vlib_main_t * vm)
 
   if (n_vlib_mains > 1)
     {
-      vec_validate (vlib_mains, tm->n_vlib_mains - 1);
+      /* Replace hand-crafted length-1 vector with a real vector */
+      vlib_mains = 0;
+
+      vec_validate_aligned (vlib_mains, tm->n_vlib_mains - 1,
+			    CLIB_CACHE_LINE_BYTES);
       _vec_len (vlib_mains) = 0;
-      vec_add1 (vlib_mains, vm);
+      vec_add1_aligned (vlib_mains, vm, CLIB_CACHE_LINE_BYTES);
 
       vlib_worker_threads->wait_at_barrier =
 	clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
@@ -685,7 +689,7 @@ start_workers (vlib_main_t * vm)
 	      /* Packet trace buffers are guaranteed to be empty, nothing to do here */
 
 	      clib_mem_set_heap (oldheap);
-	      vec_add1 (vlib_mains, vm_clone);
+	      vec_add1_aligned (vlib_mains, vm_clone, CLIB_CACHE_LINE_BYTES);
 
 	      vm_clone->error_main.counters =
 		vec_dup (vlib_mains[0]->error_main.counters);
@@ -805,7 +809,7 @@ vlib_worker_thread_node_runtime_update (void)
 
   ASSERT (os_get_cpu_number () == 0);
 
-  if (vec_len (vlib_mains) == 0)
+  if (vec_len (vlib_mains) == 1)
     return;
 
   vm = vlib_mains[0];
@@ -1148,7 +1152,7 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
   f64 deadline;
   u32 count;
 
-  if (!vlib_mains)
+  if (vec_len (vlib_mains) < 2)
     return;
 
   count = vec_len (vlib_mains) - 1;
@@ -1179,7 +1183,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 {
   f64 deadline;
 
-  if (!vlib_mains)
+  if (vec_len (vlib_mains) < 2)
     return;
 
   if (--vlib_worker_threads[0].recursion_level > 0)
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 75a5a281..a032311c 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -222,30 +222,25 @@ vlib_worker_thread_barrier_check (void)
     }
 }
 
-#define foreach_vlib_main(body)			                        \
-do {                                                                    \
-    vlib_main_t ** __vlib_mains = 0, *this_vlib_main;                   \
-    int ii;                                                             \
-                                                                        \
-    if (vec_len (vlib_mains) == 0)                                      \
-        vec_add1 (__vlib_mains, &vlib_global_main);                     \
-    else                                                                \
-    {                                                                   \
-        for (ii = 0; ii < vec_len (vlib_mains); ii++)                   \
-        {                                                               \
-            this_vlib_main = vlib_mains[ii];                            \
-            if (this_vlib_main)                                         \
-                vec_add1 (__vlib_mains, this_vlib_main);                \
-        }                                                               \
-    }                                                                   \
-                                                                        \
-    for (ii = 0; ii < vec_len (__vlib_mains); ii++)                     \
-    {                                                                   \
-        this_vlib_main = __vlib_mains[ii];                              \
-        /* body uses this_vlib_main... */                               \
-        (body);                                                         \
-    }                                                                   \
-    vec_free (__vlib_mains);                                            \
+#define foreach_vlib_main(body)                         \
+do {                                                    \
+  vlib_main_t ** __vlib_mains = 0, *this_vlib_main;     \
+  int ii;                                               \
+                                                        \
+  for (ii = 0; ii < vec_len (vlib_mains); ii++)         \
+    {                                                   \
+      this_vlib_main = vlib_mains[ii];                  \
+      if (this_vlib_main)                               \
+        vec_add1 (__vlib_mains, this_vlib_main);        \
+    }                                                   \
+                                                        \
+  for (ii = 0; ii < vec_len (__vlib_mains); ii++)       \
+    {                                                   \
+      this_vlib_main = __vlib_mains[ii];                \
+      /* body uses this_vlib_main... */                 \
+      (body);                                           \
+    }                                                   \
+  vec_free (__vlib_mains);                              \
 } while (0);
 
 #define foreach_sched_policy \
diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index 2cbeb63c..87a56121 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -252,11 +252,13 @@ void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q);
 vl_api_trace_t *vl_msg_api_trace_get (api_main_t * am,
 				      vl_api_trace_which_t which);
 
+void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
+void vl_msg_api_barrier_release (void) __attribute__ ((weak));
 void vl_msg_api_free (void *);
 void vl_noop_handler (void *mp);
-clib_error_t *vl_api_init (vlib_main_t * vm);
 void vl_msg_api_increment_missing_client_counter (void);
 void vl_msg_api_post_mortem_dump (void);
+void vl_msg_api_post_mortem_dump_enable_disable (int enable);
 void vl_msg_api_register_pd_handler (void *handler,
 				     u16 msg_id_host_byte_order);
 int vl_msg_api_pd_handler (void *mp, int rv);
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index 69ba10c1..6774e3dd 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -23,11 +23,6 @@
 #include <stdlib.h>
 #include <stddef.h>
 #include <string.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
 #include <vppinfra/format.h>
 #include <vppinfra/byte_order.h>
 #include <vppinfra/error.h>
@@ -36,19 +31,14 @@
 #include <vlibapi/api.h>
 #include <vppinfra/elog.h>
 
-api_main_t api_main;
-
-void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
-void
-vl_msg_api_barrier_sync (void)
-{
-}
-
-void vl_msg_api_barrier_release (void) __attribute__ ((weak));
-void
-vl_msg_api_barrier_release (void)
-{
-}
+/* *INDENT-OFF* */
+api_main_t api_main =
+  {
+    .region_name = "/unset",
+    .api_uid = -1,
+    .api_gid = -1,
+  };
+/* *INDENT-ON* */
 
 void
 vl_msg_api_increment_missing_client_counter (void)
@@ -57,14 +47,6 @@ vl_msg_api_increment_missing_client_counter (void)
   am->missing_clients++;
 }
 
-typedef enum
-{
-  DUMP,
-  CUSTOM_DUMP,
-  REPLAY,
-  INITIALIZERS,
-} vl_api_replay_t;
-
 int
 vl_msg_api_rx_trace_enabled (api_main_t * am)
 {
@@ -397,6 +379,16 @@ vl_msg_api_trace_configure (api_main_t * am, vl_api_trace_which_t which,
   return 0;
 }
 
+void
+vl_msg_api_barrier_sync (void)
+{
+}
+
+void
+vl_msg_api_barrier_release (void)
+{
+}
+
 always_inline void
 msg_handler_internal (api_main_t * am,
 		      void *the_msg, int trace_it, int do_it, int free_it)
@@ -748,495 +740,15 @@ vl_noop_handler (void *mp)
 {
 }
 
-clib_error_t *
-vl_api_init (vlib_main_t * vm)
-{
-  static u8 once;
-  api_main_t *am = &api_main;
-
-  if (once)
-    return 0;
-
-  once = 1;
-
-  am->region_name = "/unset";
-  /*
-   * Eventually passed to fchown, -1 => "current user"
-   * instead of 0 => "root". A very fine disctinction at best.
-   */
-  if (am->api_uid == 0)
-    am->api_uid = -1;
-  if (am->api_gid == 0)
-    am->api_gid = -1;
-
-  return (0);
-}
-
-void vl_msg_api_custom_dump_configure (api_main_t * am)
-  __attribute__ ((weak));
-void
-vl_msg_api_custom_dump_configure (api_main_t * am)
-{
-}
-
-VLIB_INIT_FUNCTION (vl_api_init);
-
-static void
-vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
-			 u32 first_index, u32 last_index,
-			 vl_api_replay_t which)
-{
-  vl_api_trace_file_header_t *hp;
-  int i, fd;
-  struct stat statb;
-  size_t file_size;
-  u8 *msg;
-  u8 endian_swap_needed = 0;
-  api_main_t *am = &api_main;
-  u8 *tmpbuf = 0;
-  u32 nitems;
-  void **saved_print_handlers = 0;
-
-  fd = open ((char *) filename, O_RDONLY);
-
-  if (fd < 0)
-    {
-      vlib_cli_output (vm, "Couldn't open %s\n", filename);
-      return;
-    }
-
-  if (fstat (fd, &statb) < 0)
-    {
-      vlib_cli_output (vm, "Couldn't stat %s\n", filename);
-      close (fd);
-      return;
-    }
-
-  if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp)))
-    {
-      vlib_cli_output (vm, "File not plausible: %s\n", filename);
-      close (fd);
-      return;
-    }
-
-  file_size = statb.st_size;
-  file_size = (file_size + 4095) & ~(4096);
-
-  hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
-
-  if (hp == (vl_api_trace_file_header_t *) MAP_FAILED)
-    {
-      vlib_cli_output (vm, "mmap failed: %s\n", filename);
-      close (fd);
-      return;
-    }
-  close (fd);
-
-  if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN)
-      || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN))
-    endian_swap_needed = 1;
-
-  if (endian_swap_needed)
-    nitems = ntohl (hp->nitems);
-  else
-    nitems = hp->nitems;
-
-  if (last_index == (u32) ~ 0)
-    {
-      last_index = nitems - 1;
-    }
-
-  if (first_index >= nitems || last_index >= nitems)
-    {
-      vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n",
-		       first_index, last_index, nitems - 1);
-      munmap (hp, file_size);
-      return;
-    }
-  if (hp->wrapped)
-    vlib_cli_output (vm,
-		     "Note: wrapped/incomplete trace, results may vary\n");
-
-  if (which == CUSTOM_DUMP)
-    {
-      saved_print_handlers = (void **) vec_dup (am->msg_print_handlers);
-      vl_msg_api_custom_dump_configure (am);
-    }
-
-
-  msg = (u8 *) (hp + 1);
-
-  for (i = 0; i < first_index; i++)
-    {
-      trace_cfg_t *cfgp;
-      int size;
-      u16 msg_id;
-
-      size = clib_host_to_net_u32 (*(u32 *) msg);
-      msg += sizeof (u32);
-
-      if (clib_arch_is_little_endian)
-	msg_id = ntohs (*((u16 *) msg));
-      else
-	msg_id = *((u16 *) msg);
-
-      cfgp = am->api_trace_cfg + msg_id;
-      if (!cfgp)
-	{
-	  vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
-	  munmap (hp, file_size);
-	  return;
-	}
-      msg += size;
-    }
-
-  if (which == REPLAY)
-    am->replay_in_progress = 1;
-
-  for (; i <= last_index; i++)
-    {
-      trace_cfg_t *cfgp;
-      u16 *msg_idp;
-      u16 msg_id;
-      int size;
-
-      if (which == DUMP)
-	vlib_cli_output (vm, "---------- trace %d -----------\n", i);
-
-      size = clib_host_to_net_u32 (*(u32 *) msg);
-      msg += sizeof (u32);
-
-      if (clib_arch_is_little_endian)
-	msg_id = ntohs (*((u16 *) msg));
-      else
-	msg_id = *((u16 *) msg);
-
-      cfgp = am->api_trace_cfg + msg_id;
-      if (!cfgp)
-	{
-	  vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
-	  munmap (hp, file_size);
-	  vec_free (tmpbuf);
-	  am->replay_in_progress = 0;
-	  return;
-	}
-
-      /* Copy the buffer (from the read-only mmap'ed file) */
-      vec_validate (tmpbuf, size - 1 + sizeof (uword));
-      clib_memcpy (tmpbuf + sizeof (uword), msg, size);
-      memset (tmpbuf, 0xf, sizeof (uword));
-
-      /*
-       * Endian swap if needed. All msg data is supposed to be
-       * in network byte order. All msg handlers are supposed to
-       * know that. The generic message dumpers don't know that.
-       * One could fix apigen, I suppose.
-       */
-      if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed)
-	{
-	  void (*endian_fp) (void *);
-	  if (msg_id >= vec_len (am->msg_endian_handlers)
-	      || (am->msg_endian_handlers[msg_id] == 0))
-	    {
-	      vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id);
-	      munmap (hp, file_size);
-	      vec_free (tmpbuf);
-	      am->replay_in_progress = 0;
-	      return;
-	    }
-	  endian_fp = am->msg_endian_handlers[msg_id];
-	  (*endian_fp) (tmpbuf + sizeof (uword));
-	}
-
-      /* msg_id always in network byte order */
-      if (clib_arch_is_little_endian)
-	{
-	  msg_idp = (u16 *) (tmpbuf + sizeof (uword));
-	  *msg_idp = msg_id;
-	}
-
-      switch (which)
-	{
-	case CUSTOM_DUMP:
-	case DUMP:
-	  if (msg_id < vec_len (am->msg_print_handlers) &&
-	      am->msg_print_handlers[msg_id])
-	    {
-	      u8 *(*print_fp) (void *, void *);
-
-	      print_fp = (void *) am->msg_print_handlers[msg_id];
-	      (*print_fp) (tmpbuf + sizeof (uword), vm);
-	    }
-	  else
-	    {
-	      vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n",
-			       msg_id);
-	      break;
-	    }
-	  break;
-
-	case INITIALIZERS:
-	  if (msg_id < vec_len (am->msg_print_handlers) &&
-	      am->msg_print_handlers[msg_id])
-	    {
-	      u8 *s;
-	      int j;
-	      u8 *(*print_fp) (void *, void *);
-
-	      print_fp = (void *) am->msg_print_handlers[msg_id];
-
-	      vlib_cli_output (vm, "/*");
-
-	      (*print_fp) (tmpbuf + sizeof (uword), vm);
-	      vlib_cli_output (vm, "*/\n");
-
-	      s = format (0, "static u8 * vl_api_%s_%d[%d] = {",
-			  am->msg_names[msg_id], i,
-			  am->api_trace_cfg[msg_id].size);
-
-	      for (j = 0; j < am->api_trace_cfg[msg_id].size; j++)
-		{
-		  if ((j & 7) == 0)
-		    s = format (s, "\n    ");
-		  s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]);
-		}
-	      s = format (s, "\n};\n%c", 0);
-	      vlib_cli_output (vm, (char *) s);
-	      vec_free (s);
-	    }
-	  break;
-
-	case REPLAY:
-	  if (msg_id < vec_len (am->msg_print_handlers) &&
-	      am->msg_print_handlers[msg_id] && cfgp->replay_enable)
-	    {
-	      void (*handler) (void *);
-
-	      handler = (void *) am->msg_handlers[msg_id];
-
-	      if (!am->is_mp_safe[msg_id])
-		vl_msg_api_barrier_sync ();
-	      (*handler) (tmpbuf + sizeof (uword));
-	      if (!am->is_mp_safe[msg_id])
-		vl_msg_api_barrier_release ();
-	    }
-	  else
-	    {
-	      if (cfgp->replay_enable)
-		vlib_cli_output (vm, "Skipping msg id %d: no handler\n",
-				 msg_id);
-	      break;
-	    }
-	  break;
-	}
-
-      _vec_len (tmpbuf) = 0;
-      msg += size;
-    }
-
-  if (saved_print_handlers)
-    {
-      clib_memcpy (am->msg_print_handlers, saved_print_handlers,
-		   vec_len (am->msg_print_handlers) * sizeof (void *));
-      vec_free (saved_print_handlers);
-    }
-
-  munmap (hp, file_size);
-  vec_free (tmpbuf);
-  am->replay_in_progress = 0;
-}
-
-u8 *
-format_vl_msg_api_trace_status (u8 * s, va_list * args)
-{
-  api_main_t *am = va_arg (*args, api_main_t *);
-  vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t);
-  vl_api_trace_t *tp;
-  char *trace_name;
-
-  switch (which)
-    {
-    case VL_API_TRACE_TX:
-      tp = am->tx_trace;
-      trace_name = "TX trace";
-      break;
-
-    case VL_API_TRACE_RX:
-      tp = am->rx_trace;
-      trace_name = "RX trace";
-      break;
-
-    default:
-      abort ();
-    }
-
-  if (tp == 0)
-    {
-      s = format (s, "%s: not yet configured.\n", trace_name);
-      return s;
-    }
-
-  s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n",
-	      trace_name, vec_len (tp->traces), tp->nitems,
-	      tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not");
-  return s;
-}
 
 static u8 post_mortem_dump_enabled;
 
-static clib_error_t *
-api_trace_command_fn (vlib_main_t * vm,
-		      unformat_input_t * input, vlib_cli_command_t * cmd)
-{
-  u32 nitems = 256 << 10;
-  api_main_t *am = &api_main;
-  vl_api_trace_which_t which = VL_API_TRACE_RX;
-  u8 *filename;
-  u32 first = 0;
-  u32 last = (u32) ~ 0;
-  FILE *fp;
-  int rv;
-
-  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
-    {
-      if (unformat (input, "on") || unformat (input, "enable"))
-	{
-	  if (unformat (input, "nitems %d", &nitems))
-	    ;
-	  vl_msg_api_trace_configure (am, which, nitems);
-	  vl_msg_api_trace_onoff (am, which, 1 /* on */ );
-	}
-      else if (unformat (input, "off"))
-	{
-	  vl_msg_api_trace_onoff (am, which, 0);
-	}
-      else if (unformat (input, "save %s", &filename))
-	{
-	  u8 *chroot_filename;
-	  if (strstr ((char *) filename, "..")
-	      || index ((char *) filename, '/'))
-	    {
-	      vlib_cli_output (vm, "illegal characters in filename '%s'",
-			       filename);
-	      return 0;
-	    }
-
-	  chroot_filename = format (0, "/tmp/%s%c", filename, 0);
-
-	  vec_free (filename);
-
-	  fp = fopen ((char *) chroot_filename, "w");
-	  if (fp == NULL)
-	    {
-	      vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename);
-	      return 0;
-	    }
-	  rv = vl_msg_api_trace_save (am, which, fp);
-	  fclose (fp);
-	  if (rv == -1)
-	    vlib_cli_output (vm, "API Trace data not present\n");
-	  else if (rv == -2)
-	    vlib_cli_output (vm, "File for writing is closed\n");
-	  else if (rv == -10)
-	    vlib_cli_output (vm, "Error while writing header to file\n");
-	  else if (rv == -11)
-	    vlib_cli_output (vm, "Error while writing trace to file\n");
-	  else if (rv == -12)
-	    vlib_cli_output (vm,
-			     "Error while writing end of buffer trace to file\n");
-	  else if (rv == -13)
-	    vlib_cli_output (vm,
-			     "Error while writing start of buffer trace to file\n");
-	  else if (rv < 0)
-	    vlib_cli_output (vm, "Unkown error while saving: %d", rv);
-	  else
-	    vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename);
-	  vec_free (chroot_filename);
-	}
-      else if (unformat (input, "dump %s", &filename))
-	{
-	  vl_msg_api_process_file (vm, filename, first, last, DUMP);
-	}
-      else if (unformat (input, "custom-dump %s", &filename))
-	{
-	  vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP);
-	}
-      else if (unformat (input, "replay %s", &filename))
-	{
-	  vl_msg_api_process_file (vm, filename, first, last, REPLAY);
-	}
-      else if (unformat (input, "initializers %s", &filename))
-	{
-	  vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS);
-	}
-      else if (unformat (input, "tx"))
-	{
-	  which = VL_API_TRACE_TX;
-	}
-      else if (unformat (input, "first %d", &first))
-	{
-	  ;
-	}
-      else if (unformat (input, "last %d", &last))
-	{
-	  ;
-	}
-      else if (unformat (input, "status"))
-	{
-	  vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status,
-			   am, which);
-	}
-      else if (unformat (input, "free"))
-	{
-	  vl_msg_api_trace_onoff (am, which, 0);
-	  vl_msg_api_trace_free (am, which);
-	}
-      else if (unformat (input, "post-mortem-on"))
-	post_mortem_dump_enabled = 1;
-      else if (unformat (input, "post-mortem-off"))
-	post_mortem_dump_enabled = 0;
-      else
-	return clib_error_return (0, "unknown input `%U'",
-				  format_unformat_error, input);
-    }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (api_trace_command, static) = {
-    .path = "api trace",
-    .short_help =
-    "api trace [on|off][dump|save|replay <file>][status][free][post-mortem-on]",
-    .function = api_trace_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-api_config_fn (vlib_main_t * vm, unformat_input_t * input)
+void
+vl_msg_api_post_mortem_dump_enable_disable (int enable)
 {
-  u32 nitems = 256 << 10;
-  vl_api_trace_which_t which = VL_API_TRACE_RX;
-  api_main_t *am = &api_main;
-
-  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
-    {
-      if (unformat (input, "on") || unformat (input, "enable"))
-	{
-	  if (unformat (input, "nitems %d", &nitems))
-	    ;
-	  vl_msg_api_trace_configure (am, which, nitems);
-	  vl_msg_api_trace_onoff (am, which, 1 /* on */ );
-	  post_mortem_dump_enabled = 1;
-	}
-      else
-	return clib_error_return (0, "unknown input `%U'",
-				  format_unformat_error, input);
-    }
-  return 0;
+  post_mortem_dump_enabled = enable;
 }
 
-VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace");
-
 void
 vl_msg_api_post_mortem_dump (void)
 {
diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c
index 4dc1a7d2..50e5c41c 100644
--- a/src/vlibapi/node_serialize.c
+++ b/src/vlibapi/node_serialize.c
@@ -73,16 +73,11 @@ vlib_node_serialize (vlib_node_main_t * nm, u8 * vector,
 
   if (vec_len (stat_vms) == 0)
     {
-      if (vec_len (vlib_mains) == 0)
-	vec_add1 (stat_vms, vm);
-      else
+      for (i = 0; i < vec_len (vlib_mains); i++)
 	{
-	  for (i = 0; i < vec_len (vlib_mains); i++)
-	    {
-	      stat_vm = vlib_mains[i];
-	      if (stat_vm)
-		vec_add1 (stat_vms, stat_vm);
-	    }
+	  stat_vm = vlib_mains[i];
+	  if (stat_vm)
+	    vec_add1 (stat_vms, stat_vm);
 	}
     }
 
@@ -286,7 +281,7 @@ vlib_node_unserialize (u8 * vector)
   return nodes_by_thread;
 }
 
-#if CLIB_DEBUG > 0
+#if TEST_CODE
 
 static clib_error_t *
 test_node_serialize_command_fn (vlib_main_t * vm,
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 3a7415c0..d2e05968 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -22,6 +22,8 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include <signal.h>
 #include <pthread.h>
 #include <vppinfra/vec.h>
@@ -1437,6 +1439,475 @@ rpc_api_hookup (vlib_main_t * vm)
 
 VLIB_API_INIT_FUNCTION (rpc_api_hookup);
 
+typedef enum
+{
+  DUMP,
+  CUSTOM_DUMP,
+  REPLAY,
+  INITIALIZERS,
+} vl_api_replay_t;
+
+u8 *
+format_vl_msg_api_trace_status (u8 * s, va_list * args)
+{
+  api_main_t *am = va_arg (*args, api_main_t *);
+  vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t);
+  vl_api_trace_t *tp;
+  char *trace_name;
+
+  switch (which)
+    {
+    case VL_API_TRACE_TX:
+      tp = am->tx_trace;
+      trace_name = "TX trace";
+      break;
+
+    case VL_API_TRACE_RX:
+      tp = am->rx_trace;
+      trace_name = "RX trace";
+      break;
+
+    default:
+      abort ();
+    }
+
+  if (tp == 0)
+    {
+      s = format (s, "%s: not yet configured.\n", trace_name);
+      return s;
+    }
+
+  s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n",
+	      trace_name, vec_len (tp->traces), tp->nitems,
+	      tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not");
+  return s;
+}
+
+void vl_msg_api_custom_dump_configure (api_main_t * am)
+  __attribute__ ((weak));
+void
+vl_msg_api_custom_dump_configure (api_main_t * am)
+{
+}
+
+static void
+vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
+			 u32 first_index, u32 last_index,
+			 vl_api_replay_t which)
+{
+  vl_api_trace_file_header_t *hp;
+  int i, fd;
+  struct stat statb;
+  size_t file_size;
+  u8 *msg;
+  u8 endian_swap_needed = 0;
+  api_main_t *am = &api_main;
+  u8 *tmpbuf = 0;
+  u32 nitems;
+  void **saved_print_handlers = 0;
+
+  fd = open ((char *) filename, O_RDONLY);
+
+  if (fd < 0)
+    {
+      vlib_cli_output (vm, "Couldn't open %s\n", filename);
+      return;
+    }
+
+  if (fstat (fd, &statb) < 0)
+    {
+      vlib_cli_output (vm, "Couldn't stat %s\n", filename);
+      close (fd);
+      return;
+    }
+
+  if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp)))
+    {
+      vlib_cli_output (vm, "File not plausible: %s\n", filename);
+      close (fd);
+      return;
+    }
+
+  file_size = statb.st_size;
+  file_size = (file_size + 4095) & ~(4096);
+
+  hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+  if (hp == (vl_api_trace_file_header_t *) MAP_FAILED)
+    {
+      vlib_cli_output (vm, "mmap failed: %s\n", filename);
+      close (fd);
+      return;
+    }
+  close (fd);
+
+  if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN)
+      || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN))
+    endian_swap_needed = 1;
+
+  if (endian_swap_needed)
+    nitems = ntohl (hp->nitems);
+  else
+    nitems = hp->nitems;
+
+  if (last_index == (u32) ~ 0)
+    {
+      last_index = nitems - 1;
+    }
+
+  if (first_index >= nitems || last_index >= nitems)
+    {
+      vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n",
+		       first_index, last_index, nitems - 1);
+      munmap (hp, file_size);
+      return;
+    }
+  if (hp->wrapped)
+    vlib_cli_output (vm,
+		     "Note: wrapped/incomplete trace, results may vary\n");
+
+  if (which == CUSTOM_DUMP)
+    {
+      saved_print_handlers = (void **) vec_dup (am->msg_print_handlers);
+      vl_msg_api_custom_dump_configure (am);
+    }
+
+
+  msg = (u8 *) (hp + 1);
+
+  for (i = 0; i < first_index; i++)
+    {
+      trace_cfg_t *cfgp;
+      int size;
+      u16 msg_id;
+
+      size = clib_host_to_net_u32 (*(u32 *) msg);
+      msg += sizeof (u32);
+
+      if (clib_arch_is_little_endian)
+	msg_id = ntohs (*((u16 *) msg));
+      else
+	msg_id = *((u16 *) msg);
+
+      cfgp = am->api_trace_cfg + msg_id;
+      if (!cfgp)
+	{
+	  vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
+	  munmap (hp, file_size);
+	  return;
+	}
+      msg += size;
+    }
+
+  if (which == REPLAY)
+    am->replay_in_progress = 1;
+
+  for (; i <= last_index; i++)
+    {
+      trace_cfg_t *cfgp;
+      u16 *msg_idp;
+      u16 msg_id;
+      int size;
+
+      if (which == DUMP)
+	vlib_cli_output (vm, "---------- trace %d -----------\n", i);
+
+      size = clib_host_to_net_u32 (*(u32 *) msg);
+      msg += sizeof (u32);
+
+      if (clib_arch_is_little_endian)
+	msg_id = ntohs (*((u16 *) msg));
+      else
+	msg_id = *((u16 *) msg);
+
+      cfgp = am->api_trace_cfg + msg_id;
+      if (!cfgp)
+	{
+	  vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
+	  munmap (hp, file_size);
+	  vec_free (tmpbuf);
+	  am->replay_in_progress = 0;
+	  return;
+	}
+
+      /* Copy the buffer (from the read-only mmap'ed file) */
+      vec_validate (tmpbuf, size - 1 + sizeof (uword));
+      clib_memcpy (tmpbuf + sizeof (uword), msg, size);
+      memset (tmpbuf, 0xf, sizeof (uword));
+
+      /*
+       * Endian swap if needed. All msg data is supposed to be
+       * in network byte order. All msg handlers are supposed to
+       * know that. The generic message dumpers don't know that.
+       * One could fix apigen, I suppose.
+       */
+      if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed)
+	{
+	  void (*endian_fp) (void *);
+	  if (msg_id >= vec_len (am->msg_endian_handlers)
+	      || (am->msg_endian_handlers[msg_id] == 0))
+	    {
+	      vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id);
+	      munmap (hp, file_size);
+	      vec_free (tmpbuf);
+	      am->replay_in_progress = 0;
+	      return;
+	    }
+	  endian_fp = am->msg_endian_handlers[msg_id];
+	  (*endian_fp) (tmpbuf + sizeof (uword));
+	}
+
+      /* msg_id always in network byte order */
+      if (clib_arch_is_little_endian)
+	{
+	  msg_idp = (u16 *) (tmpbuf + sizeof (uword));
+	  *msg_idp = msg_id;
+	}
+
+      switch (which)
+	{
+	case CUSTOM_DUMP:
+	case DUMP:
+	  if (msg_id < vec_len (am->msg_print_handlers) &&
+	      am->msg_print_handlers[msg_id])
+	    {
+	      u8 *(*print_fp) (void *, void *);
+
+	      print_fp = (void *) am->msg_print_handlers[msg_id];
+	      (*print_fp) (tmpbuf + sizeof (uword), vm);
+	    }
+	  else
+	    {
+	      vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n",
+			       msg_id);
+	      break;
+	    }
+	  break;
+
+	case INITIALIZERS:
+	  if (msg_id < vec_len (am->msg_print_handlers) &&
+	      am->msg_print_handlers[msg_id])
+	    {
+	      u8 *s;
+	      int j;
+	      u8 *(*print_fp) (void *, void *);
+
+	      print_fp = (void *) am->msg_print_handlers[msg_id];
+
+	      vlib_cli_output (vm, "/*");
+
+	      (*print_fp) (tmpbuf + sizeof (uword), vm);
+	      vlib_cli_output (vm, "*/\n");
+
+	      s = format (0, "static u8 * vl_api_%s_%d[%d] = {",
+			  am->msg_names[msg_id], i,
+			  am->api_trace_cfg[msg_id].size);
+
+	      for (j = 0; j < am->api_trace_cfg[msg_id].size; j++)
+		{
+		  if ((j & 7) == 0)
+		    s = format (s, "\n    ");
+		  s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]);
+		}
+	      s = format (s, "\n};\n%c", 0);
+	      vlib_cli_output (vm, (char *) s);
+	      vec_free (s);
+	    }
+	  break;
+
+	case REPLAY:
+	  if (msg_id < vec_len (am->msg_print_handlers) &&
+	      am->msg_print_handlers[msg_id] && cfgp->replay_enable)
+	    {
+	      void (*handler) (void *);
+
+	      handler = (void *) am->msg_handlers[msg_id];
+
+	      if (!am->is_mp_safe[msg_id])
+		vl_msg_api_barrier_sync ();
+	      (*handler) (tmpbuf + sizeof (uword));
+	      if (!am->is_mp_safe[msg_id])
+		vl_msg_api_barrier_release ();
+	    }
+	  else
+	    {
+	      if (cfgp->replay_enable)
+		vlib_cli_output (vm, "Skipping msg id %d: no handler\n",
+				 msg_id);
+	      break;
+	    }
+	  break;
+	}
+
+      _vec_len (tmpbuf) = 0;
+      msg += size;
+    }
+
+  if (saved_print_handlers)
+    {
+      clib_memcpy (am->msg_print_handlers, saved_print_handlers,
+		   vec_len (am->msg_print_handlers) * sizeof (void *));
+      vec_free (saved_print_handlers);
+    }
+
+  munmap (hp, file_size);
+  vec_free (tmpbuf);
+  am->replay_in_progress = 0;
+}
+
+static clib_error_t *
+api_trace_command_fn (vlib_main_t * vm,
+		      unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  u32 nitems = 256 << 10;
+  api_main_t *am = &api_main;
+  vl_api_trace_which_t which = VL_API_TRACE_RX;
+  u8 *filename;
+  u32 first = 0;
+  u32 last = (u32) ~ 0;
+  FILE *fp;
+  int rv;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "on") || unformat (input, "enable"))
+	{
+	  if (unformat (input, "nitems %d", &nitems))
+	    ;
+	  vl_msg_api_trace_configure (am, which, nitems);
+	  vl_msg_api_trace_onoff (am, which, 1 /* on */ );
+	}
+      else if (unformat (input, "off"))
+	{
+	  vl_msg_api_trace_onoff (am, which, 0);
+	}
+      else if (unformat (input, "save %s", &filename))
+	{
+	  u8 *chroot_filename;
+	  if (strstr ((char *) filename, "..")
+	      || index ((char *) filename, '/'))
+	    {
+	      vlib_cli_output (vm, "illegal characters in filename '%s'",
+			       filename);
+	      return 0;
+	    }
+
+	  chroot_filename = format (0, "/tmp/%s%c", filename, 0);
+
+	  vec_free (filename);
+
+	  fp = fopen ((char *) chroot_filename, "w");
+	  if (fp == NULL)
+	    {
+	      vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename);
+	      return 0;
+	    }
+	  rv = vl_msg_api_trace_save (am, which, fp);
+	  fclose (fp);
+	  if (rv == -1)
+	    vlib_cli_output (vm, "API Trace data not present\n");
+	  else if (rv == -2)
+	    vlib_cli_output (vm, "File for writing is closed\n");
+	  else if (rv == -10)
+	    vlib_cli_output (vm, "Error while writing header to file\n");
+	  else if (rv == -11)
+	    vlib_cli_output (vm, "Error while writing trace to file\n");
+	  else if (rv == -12)
+	    vlib_cli_output (vm,
+			     "Error while writing end of buffer trace to file\n");
+	  else if (rv == -13)
+	    vlib_cli_output (vm,
+			     "Error while writing start of buffer trace to file\n");
+	  else if (rv < 0)
+	    vlib_cli_output (vm, "Unkown error while saving: %d", rv);
+	  else
+	    vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename);
+	  vec_free (chroot_filename);
+	}
+      else if (unformat (input, "dump %s", &filename))
+	{
+	  vl_msg_api_process_file (vm, filename, first, last, DUMP);
+	}
+      else if (unformat (input, "custom-dump %s", &filename))
+	{
+	  vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP);
+	}
+      else if (unformat (input, "replay %s", &filename))
+	{
+	  vl_msg_api_process_file (vm, filename, first, last, REPLAY);
+	}
+      else if (unformat (input, "initializers %s", &filename))
+	{
+	  vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS);
+	}
+      else if (unformat (input, "tx"))
+	{
+	  which = VL_API_TRACE_TX;
+	}
+      else if (unformat (input, "first %d", &first))
+	{
+	  ;
+	}
+      else if (unformat (input, "last %d", &last))
+	{
+	  ;
+	}
+      else if (unformat (input, "status"))
+	{
+	  vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status,
+			   am, which);
+	}
+      else if (unformat (input, "free"))
+	{
+	  vl_msg_api_trace_onoff (am, which, 0);
+	  vl_msg_api_trace_free (am, which);
+	}
+      else if (unformat (input, "post-mortem-on"))
+	vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ );
+      else if (unformat (input, "post-mortem-off"))
+	vl_msg_api_post_mortem_dump_enable_disable (0 /* enable */ );
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (api_trace_command, static) = {
+    .path = "api trace",
+    .short_help =
+    "api trace [on|off][dump|save|replay <file>][status][free][post-mortem-on]",
+    .function = api_trace_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+api_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+  u32 nitems = 256 << 10;
+  vl_api_trace_which_t which = VL_API_TRACE_RX;
+  api_main_t *am = &api_main;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "on") || unformat (input, "enable"))
+	{
+	  if (unformat (input, "nitems %d", &nitems))
+	    ;
+	  vl_msg_api_trace_configure (am, which, nitems);
+	  vl_msg_api_trace_onoff (am, which, 1 /* on */ );
+	  vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ );
+	}
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+  return 0;
+}
+
+VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace");
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index b6b4c04a..100ec613 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -374,8 +374,7 @@ vhost_user_rx_thread_placement ()
   for (i = vum->input_cpu_first_index;
        i < vum->input_cpu_first_index + vum->input_cpu_count; i++)
     {
-      vlib_node_set_state (vlib_mains ? vlib_mains[i] : &vlib_global_main,
-			   vhost_user_input_node.index,
+      vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index,
 			   VLIB_NODE_STATE_DISABLED);
       vec_add1 (workers, i);
     }
@@ -406,9 +405,9 @@ vhost_user_rx_thread_placement ()
 	  iaq.qid = qid;
 	  iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
 	  vec_add1 (vhc->rx_queues, iaq);
-	  vlib_node_set_state (vlib_mains ? vlib_mains[cpu_index] :
-	      &vlib_global_main, vhost_user_input_node.index,
-	      VLIB_NODE_STATE_POLLING);
+	  vlib_node_set_state (vlib_mains[cpu_index],
+                               vhost_user_input_node.index,
+                               VLIB_NODE_STATE_POLLING);
 	}
   });
   /* *INDENT-ON* */
diff --git a/src/vpp-api-test.am b/src/vpp-api-test.am
index f0d5df62..ceab687c 100644
--- a/src/vpp-api-test.am
+++ b/src/vpp-api-test.am
@@ -34,14 +34,12 @@ vpp_json_test_SOURCES = \
   vat/json_test.c
 
 vpp_api_test_LDADD = \
-  libvlib.la				\
   libvlibmemoryclient.la		\
   libsvm.la				\
   libvatplugin.la			\
   libvppinfra.la 			\
   libvlibapi.la				\
   libvlibmemory.la			\
-  libvnet.la				\
   -lpthread -lm -lrt -ldl -lcrypto
 
 vpp_api_test_LDFLAGS = -Wl,--export-dynamic
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
index 828394ed..c85dc680 100644
--- a/src/vpp/api/api.c
+++ b/src/vpp/api/api.c
@@ -2143,7 +2143,6 @@ vpe_api_init (vlib_main_t * vm)
   am->oam_events_registration_hash = hash_create (0, sizeof (uword));
   am->bfd_events_registration_hash = hash_create (0, sizeof (uword));
 
-  vl_api_init (vm);
   vl_set_memory_region_name ("/vpe-api");
   vl_enable_disable_memory_api (vm, 1 /* enable it */ );
 
diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c
index 610f40ed..277be8c0 100644
--- a/src/vpp/api/gmon.c
+++ b/src/vpp/api/gmon.c
@@ -122,13 +122,8 @@ gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
   /* Initial wait for the world to settle down */
   vlib_process_suspend (vm, 5.0);
 
-  if (vec_len (vlib_mains) == 0)
-    vec_add1 (gm->my_vlib_mains, &vlib_global_main);
-  else
-    {
-      for (i = 0; i < vec_len (vlib_mains); i++)
-	vec_add1 (gm->my_vlib_mains, vlib_mains[i]);
-    }
+  for (i = 0; i < vec_len (vlib_mains); i++)
+    vec_add1 (gm->my_vlib_mains, vlib_mains[i]);
 
   while (1)
     {
-- 
cgit 1.2.3-korg


From b64e4e2af314e1b2bc074b12ede50ad6d96c37c0 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Tue, 14 Mar 2017 09:10:56 -0400
Subject: Clean up dead API client reaper callack scheme

Change-Id: Iec3df234ca9f717d87787cefc76b73ed9ad42332
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlibapi/api.h            | 44 ++++++++++++++++++++++++++++++++++++++++++++
 src/vlibmemory/memory_vlib.c | 22 +++++++++++++++-------
 src/vpp/api/api.c            |  6 ++++--
 3 files changed, 63 insertions(+), 9 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index 87a56121..a62fa644 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -112,6 +112,14 @@ typedef struct
   u16 last_msg_id;
 } vl_api_msg_range_t;
 
+typedef clib_error_t *(vl_msg_api_init_function_t) (u32 client_index);
+
+typedef struct _vl_msg_api_init_function_list_elt
+{
+  struct _vl_msg_api_init_function_list_elt *next_init_function;
+  vl_msg_api_init_function_t *f;
+} _vl_msg_api_function_list_elt_t;
+
 typedef struct
 {
   void (**msg_handlers) (void *);
@@ -192,6 +200,10 @@ typedef struct
 
   /* Replay in progress? */
   int replay_in_progress;
+
+  /* List of API client reaper functions */
+  _vl_msg_api_function_list_elt_t *reaper_function_registrations;
+
 } api_main_t;
 
 extern api_main_t api_main;
@@ -291,6 +303,38 @@ vlib_node_t **vlib_node_unserialize (u8 * vector);
   })
 
 
+#define _VL_MSG_API_FUNCTION_SYMBOL(x, type)	\
+  _vl_msg_api_##type##_function_##x
+
+#define VL_MSG_API_FUNCTION_SYMBOL(x)		\
+  _VL_MSG_API_FUNCTION_SYMBOL(x, reaper)
+
+#define VLIB_DECLARE_REAPER_FUNCTION(x, tag)                            \
+vl_msg_api_init_function_t * _VL_MSG_API_FUNCTION_SYMBOL (x, tag) = x;  \
+static void __vl_msg_api_add_##tag##_function_##x (void)                \
+    __attribute__((__constructor__)) ;                                  \
+                                                                        \
+static void __vl_msg_api_add_##tag##_function_##x (void)                \
+{                                                                       \
+ api_main_t * am = &api_main;                                           \
+ static _vl_msg_api_function_list_elt_t _vl_msg_api_function;           \
+ _vl_msg_api_function.next_init_function                                \
+    = am->tag##_function_registrations;                                 \
+  am->tag##_function_registrations = &_vl_msg_api_function;             \
+ _vl_msg_api_function.f = &x;                                           \
+}
+
+#define VL_MSG_API_REAPER_FUNCTION(x) VLIB_DECLARE_REAPER_FUNCTION(x,reaper)
+
+/* Call reaper function with client index */
+#define vl_msg_api_call_reaper_function(ci)                             \
+  ({                                                                    \
+    extern vlib_init_function_t * VLIB_INIT_FUNCTION_SYMBOL (reaper);   \
+    vlib_init_function_t * _f = VLIB_INIT_FUNCTION_SYMBOL (reaper);     \
+    clib_error_t * _error = 0;                                          \
+    _error = _f (ci);                                                   \
+  })
+
 #endif /* included_api_h */
 
 /*
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index d2e05968..7a536ee8 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -221,12 +221,20 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
   vl_msg_api_send_shmem (q, (u8 *) & rp);
 }
 
-/* Application callback to clean up leftover registrations from this client */
-int vl_api_memclnt_delete_callback (u32 client_index) __attribute__ ((weak));
-
-int
-vl_api_memclnt_delete_callback (u32 client_index)
+static int
+call_reaper_functions (u32 client_index)
 {
+  clib_error_t *error = 0;
+  _vl_msg_api_function_list_elt_t *i;
+
+  i = api_main.reaper_function_registrations;
+  while (i)
+    {
+      error = i->f (client_index);
+      if (error)
+	clib_error_report (error);
+      i = i->next_init_function;
+    }
   return 0;
 }
 
@@ -246,7 +254,7 @@ vl_api_memclnt_delete_t_handler (vl_api_memclnt_delete_t * mp)
 
   handle = mp->index;
 
-  if (vl_api_memclnt_delete_callback (handle))
+  if (call_reaper_functions (handle))
     return;
 
   epoch = vl_msg_api_handle_get_epoch (handle);
@@ -621,7 +629,7 @@ memclnt_process (vlib_main_t * vm,
 
 		      handle = vl_msg_api_handle_from_index_and_epoch
 			(dead_indices[i], shm->application_restarts);
-		      (void) vl_api_memclnt_delete_callback (handle);
+		      (void) call_reaper_functions (handle);
 		    }
 		}
 
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
index c85dc680..673ffe56 100644
--- a/src/vpp/api/api.c
+++ b/src/vpp/api/api.c
@@ -164,8 +164,8 @@ static int arp_change_delete_callback (u32 pool_index, u8 * notused);
 static int nd_change_delete_callback (u32 pool_index, u8 * notused);
 
 /* Clean up all registrations belonging to the indicated client */
-int
-vl_api_memclnt_delete_callback (u32 client_index)
+static clib_error_t *
+memclnt_delete_callback (u32 client_index)
 {
   vpe_api_main_t *vam = &vpe_api_main;
   vpe_client_registration_t *rp;
@@ -186,6 +186,8 @@ vl_api_memclnt_delete_callback (u32 client_index)
   return 0;
 }
 
+VL_MSG_API_REAPER_FUNCTION (memclnt_delete_callback);
+
 pub_sub_handler (oam_events, OAM_EVENTS);
 
 #define RESOLUTION_EVENT 1
-- 
cgit 1.2.3-korg


From 5c6c4bfd64722a9a2d410a3e58a817721a083702 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Tue, 11 Apr 2017 13:12:48 -0400
Subject: move binary-api client-only routines to memory_client.c

Change-Id: I0755f731b1b01e6a1a231948d498c625a2c966b7
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlibmemory/memory_client.c | 250 +++++++++++++++++++++++++++++++++++++++--
 src/vlibmemory/memory_shared.c | 241 ---------------------------------------
 2 files changed, 241 insertions(+), 250 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c
index 234a0a5a..25b06f65 100644
--- a/src/vlibmemory/memory_client.c
+++ b/src/vlibmemory/memory_client.c
@@ -104,23 +104,234 @@ vl_api_rx_thread_exit_t_handler (vl_api_rx_thread_exit_t * mp)
 }
 
 static void
-noop_handler (void *notused)
+vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
 {
+  serialize_main_t _sm, *sm = &_sm;
+  api_main_t *am = &api_main;
+  u8 *tblv;
+  u32 nmsgs;
+  int i;
+  u8 *name_and_crc;
+  u32 msg_index;
+
+  am->my_client_index = mp->index;
+  am->my_registration = (vl_api_registration_t *) (uword) mp->handle;
+
+  /* Clean out any previous hash table (unlikely) */
+  if (am->msg_index_by_name_and_crc)
+    {
+      int i;
+      u8 **keys = 0;
+      hash_pair_t *hp;
+      /* *INDENT-OFF* */
+      hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
+      ({
+        vec_add1 (keys, (u8 *) hp->key);
+      }));
+      /* *INDENT-ON* */
+      for (i = 0; i < vec_len (keys); i++)
+	vec_free (keys[i]);
+      vec_free (keys);
+    }
+
+  am->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword));
+
+  /* Recreate the vnet-side API message handler table */
+  tblv = (u8 *) mp->message_table;
+  serialize_open_vector (sm, tblv);
+  unserialize_integer (sm, &nmsgs, sizeof (u32));
+
+  for (i = 0; i < nmsgs; i++)
+    {
+      msg_index = unserialize_likely_small_unsigned_integer (sm);
+      unserialize_cstring (sm, (char **) &name_and_crc);
+      hash_set_mem (am->msg_index_by_name_and_crc, name_and_crc, msg_index);
+    }
 }
 
-#define foreach_api_msg						\
-_(RX_THREAD_EXIT, rx_thread_exit)
+static void
+noop_handler (void *notused)
+{
+}
 
-static int
-connect_to_vlib_internal (char *svm_name, char *client_name,
-			  int rx_queue_size, int want_pthread)
+int
+vl_client_connect (char *name, int ctx_quota, int input_queue_size)
 {
+  svm_region_t *svm;
+  vl_api_memclnt_create_t *mp;
+  vl_api_memclnt_create_reply_t *rp;
+  unix_shared_memory_queue_t *vl_input_queue;
+  vl_shmem_hdr_t *shmem_hdr;
   int rv = 0;
-  memory_client_main_t *mm = &memory_client_main;
+  void *oldheap;
+  api_main_t *am = &api_main;
 
-  if ((rv = vl_client_api_map (svm_name)))
+  if (am->my_registration)
+    {
+      clib_warning ("client %s already connected...", name);
+      return -1;
+    }
+
+  if (am->vlib_rp == 0)
+    {
+      clib_warning ("am->vlib_rp NULL");
+      return -1;
+    }
+
+  svm = am->vlib_rp;
+  shmem_hdr = am->shmem_hdr;
+
+  if (shmem_hdr == 0 || shmem_hdr->vl_input_queue == 0)
+    {
+      clib_warning ("shmem_hdr / input queue NULL");
+      return -1;
+    }
+
+  pthread_mutex_lock (&svm->mutex);
+  oldheap = svm_push_data_heap (svm);
+  vl_input_queue =
+    unix_shared_memory_queue_init (input_queue_size, sizeof (uword),
+				   getpid (), 0);
+  pthread_mutex_unlock (&svm->mutex);
+  svm_pop_heap (oldheap);
+
+  am->my_client_index = ~0;
+  am->my_registration = 0;
+  am->vl_input_queue = vl_input_queue;
+
+  mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_create_t));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE);
+  mp->ctx_quota = ctx_quota;
+  mp->input_queue = (uword) vl_input_queue;
+  strncpy ((char *) mp->name, name, sizeof (mp->name) - 1);
+
+  vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp);
+
+  while (1)
+    {
+      int qstatus;
+      struct timespec ts, tsrem;
+      int i;
+
+      /* Wait up to 10 seconds */
+      for (i = 0; i < 1000; i++)
+	{
+	  qstatus = unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp,
+						  1 /* nowait */ );
+	  if (qstatus == 0)
+	    goto read_one_msg;
+	  ts.tv_sec = 0;
+	  ts.tv_nsec = 10000 * 1000;	/* 10 ms */
+	  while (nanosleep (&ts, &tsrem) < 0)
+	    ts = tsrem;
+	}
+      /* Timeout... */
+      clib_warning ("memclnt_create_reply timeout");
+      return -1;
+
+    read_one_msg:
+      if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_CREATE_REPLY)
+	{
+	  clib_warning ("unexpected reply: id %d", ntohs (rp->_vl_msg_id));
+	  continue;
+	}
+      rv = clib_net_to_host_u32 (rp->response);
+
+      vl_msg_api_handler ((void *) rp);
+      break;
+    }
+  return (rv);
+}
+
+static void
+vl_api_memclnt_delete_reply_t_handler (vl_api_memclnt_delete_reply_t * mp)
+{
+  void *oldheap;
+  api_main_t *am = &api_main;
+
+  pthread_mutex_lock (&am->vlib_rp->mutex);
+  oldheap = svm_push_data_heap (am->vlib_rp);
+  unix_shared_memory_queue_free (am->vl_input_queue);
+  pthread_mutex_unlock (&am->vlib_rp->mutex);
+  svm_pop_heap (oldheap);
+
+  am->my_client_index = ~0;
+  am->my_registration = 0;
+  am->vl_input_queue = 0;
+}
+
+void
+vl_client_disconnect (void)
+{
+  vl_api_memclnt_delete_t *mp;
+  vl_api_memclnt_delete_reply_t *rp;
+  unix_shared_memory_queue_t *vl_input_queue;
+  vl_shmem_hdr_t *shmem_hdr;
+  time_t begin;
+  api_main_t *am = &api_main;
+
+  ASSERT (am->vlib_rp);
+  shmem_hdr = am->shmem_hdr;
+  ASSERT (shmem_hdr && shmem_hdr->vl_input_queue);
+
+  vl_input_queue = am->vl_input_queue;
+
+  mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_delete_t));
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE);
+  mp->index = am->my_client_index;
+  mp->handle = (uword) am->my_registration;
+
+  vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp);
+
+  /*
+   * Have to be careful here, in case the client is disconnecting
+   * because e.g. the vlib process died, or is unresponsive.
+   */
+
+  begin = time (0);
+  while (1)
+    {
+      time_t now;
+
+      now = time (0);
+
+      if (now >= (begin + 2))
+	{
+	  clib_warning ("peer unresponsive, give up");
+	  am->my_client_index = ~0;
+	  am->my_registration = 0;
+	  am->shmem_hdr = 0;
+	  break;
+	}
+      if (unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp, 1) < 0)
+	continue;
+
+      /* drain the queue */
+      if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY)
+	{
+	  vl_msg_api_handler ((void *) rp);
+	  continue;
+	}
+      vl_msg_api_handler ((void *) rp);
+      break;
+    }
+}
+
+#define foreach_api_msg                         \
+_(RX_THREAD_EXIT, rx_thread_exit)               \
+_(MEMCLNT_CREATE_REPLY, memclnt_create_reply)   \
+_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply)
+
+
+int
+vl_client_api_map (char *region_name)
+{
+  int rv;
+
+  if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0)
     {
-      clib_warning ("vl_client_api map rv %d", rv);
       return rv;
     }
 
@@ -133,6 +344,27 @@ connect_to_vlib_internal (char *svm_name, char *client_name,
                             sizeof(vl_api_##n##_t), 1);
   foreach_api_msg;
 #undef _
+  return 0;
+}
+
+void
+vl_client_api_unmap (void)
+{
+  vl_unmap_shmem ();
+}
+
+static int
+connect_to_vlib_internal (char *svm_name, char *client_name,
+			  int rx_queue_size, int want_pthread)
+{
+  int rv = 0;
+  memory_client_main_t *mm = &memory_client_main;
+
+  if ((rv = vl_client_api_map (svm_name)))
+    {
+      clib_warning ("vl_client_api map rv %d", rv);
+      return rv;
+    }
 
   if (vl_client_connect (client_name, 0 /* punt quota */ ,
 			 rx_queue_size /* input queue */ ) < 0)
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
index c41f32f7..6cea5df9 100644
--- a/src/vlibmemory/memory_shared.c
+++ b/src/vlibmemory/memory_shared.c
@@ -559,52 +559,6 @@ vl_msg_api_send_shmem_nolock (unix_shared_memory_queue_t * q, u8 * elem)
   (void) unix_shared_memory_queue_add_nolock (q, elem);
 }
 
-static void
-vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
-{
-  serialize_main_t _sm, *sm = &_sm;
-  api_main_t *am = &api_main;
-  u8 *tblv;
-  u32 nmsgs;
-  int i;
-  u8 *name_and_crc;
-  u32 msg_index;
-
-  am->my_client_index = mp->index;
-  am->my_registration = (vl_api_registration_t *) (uword) mp->handle;
-
-  /* Clean out any previous hash table (unlikely) */
-  if (am->msg_index_by_name_and_crc)
-    {
-      int i;
-      u8 **keys = 0;
-      hash_pair_t *hp;
-      /* *INDENT-OFF* */
-      hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
-      ({
-        vec_add1 (keys, (u8 *) hp->key);
-      }));
-      /* *INDENT-ON* */
-      for (i = 0; i < vec_len (keys); i++)
-	vec_free (keys[i]);
-      vec_free (keys);
-    }
-
-  am->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword));
-
-  /* Recreate the vnet-side API message handler table */
-  tblv = (u8 *) mp->message_table;
-  serialize_open_vector (sm, tblv);
-  unserialize_integer (sm, &nmsgs, sizeof (u32));
-
-  for (i = 0; i < nmsgs; i++)
-    {
-      msg_index = unserialize_likely_small_unsigned_integer (sm);
-      unserialize_cstring (sm, (char **) &name_and_crc);
-      hash_set_mem (am->msg_index_by_name_and_crc, name_and_crc, msg_index);
-    }
-}
-
 u32
 vl_api_get_msg_index (u8 * name_and_crc)
 {
@@ -620,171 +574,6 @@ vl_api_get_msg_index (u8 * name_and_crc)
   return ~0;
 }
 
-int
-vl_client_connect (char *name, int ctx_quota, int input_queue_size)
-{
-  svm_region_t *svm;
-  vl_api_memclnt_create_t *mp;
-  vl_api_memclnt_create_reply_t *rp;
-  unix_shared_memory_queue_t *vl_input_queue;
-  vl_shmem_hdr_t *shmem_hdr;
-  int rv = 0;
-  void *oldheap;
-  api_main_t *am = &api_main;
-
-  if (am->my_registration)
-    {
-      clib_warning ("client %s already connected...", name);
-      return -1;
-    }
-
-  if (am->vlib_rp == 0)
-    {
-      clib_warning ("am->vlib_rp NULL");
-      return -1;
-    }
-
-  svm = am->vlib_rp;
-  shmem_hdr = am->shmem_hdr;
-
-  if (shmem_hdr == 0 || shmem_hdr->vl_input_queue == 0)
-    {
-      clib_warning ("shmem_hdr / input queue NULL");
-      return -1;
-    }
-
-  pthread_mutex_lock (&svm->mutex);
-  oldheap = svm_push_data_heap (svm);
-  vl_input_queue =
-    unix_shared_memory_queue_init (input_queue_size, sizeof (uword),
-				   getpid (), 0);
-  pthread_mutex_unlock (&svm->mutex);
-  svm_pop_heap (oldheap);
-
-  am->my_client_index = ~0;
-  am->my_registration = 0;
-  am->vl_input_queue = vl_input_queue;
-
-  mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_create_t));
-  memset (mp, 0, sizeof (*mp));
-  mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE);
-  mp->ctx_quota = ctx_quota;
-  mp->input_queue = (uword) vl_input_queue;
-  strncpy ((char *) mp->name, name, sizeof (mp->name) - 1);
-
-  vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp);
-
-  while (1)
-    {
-      int qstatus;
-      struct timespec ts, tsrem;
-      int i;
-
-      /* Wait up to 10 seconds */
-      for (i = 0; i < 1000; i++)
-	{
-	  qstatus = unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp,
-						  1 /* nowait */ );
-	  if (qstatus == 0)
-	    goto read_one_msg;
-	  ts.tv_sec = 0;
-	  ts.tv_nsec = 10000 * 1000;	/* 10 ms */
-	  while (nanosleep (&ts, &tsrem) < 0)
-	    ts = tsrem;
-	}
-      /* Timeout... */
-      clib_warning ("memclnt_create_reply timeout");
-      return -1;
-
-    read_one_msg:
-      if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_CREATE_REPLY)
-	{
-	  clib_warning ("unexpected reply: id %d", ntohs (rp->_vl_msg_id));
-	  continue;
-	}
-      rv = clib_net_to_host_u32 (rp->response);
-
-      vl_msg_api_handler ((void *) rp);
-      break;
-    }
-  return (rv);
-}
-
-static void
-vl_api_memclnt_delete_reply_t_handler (vl_api_memclnt_delete_reply_t * mp)
-{
-  void *oldheap;
-  api_main_t *am = &api_main;
-
-  pthread_mutex_lock (&am->vlib_rp->mutex);
-  oldheap = svm_push_data_heap (am->vlib_rp);
-  unix_shared_memory_queue_free (am->vl_input_queue);
-  pthread_mutex_unlock (&am->vlib_rp->mutex);
-  svm_pop_heap (oldheap);
-
-  am->my_client_index = ~0;
-  am->my_registration = 0;
-  am->vl_input_queue = 0;
-}
-
-void
-vl_client_disconnect (void)
-{
-  vl_api_memclnt_delete_t *mp;
-  vl_api_memclnt_delete_reply_t *rp;
-  unix_shared_memory_queue_t *vl_input_queue;
-  vl_shmem_hdr_t *shmem_hdr;
-  time_t begin;
-  api_main_t *am = &api_main;
-
-  ASSERT (am->vlib_rp);
-  shmem_hdr = am->shmem_hdr;
-  ASSERT (shmem_hdr && shmem_hdr->vl_input_queue);
-
-  vl_input_queue = am->vl_input_queue;
-
-  mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_delete_t));
-  memset (mp, 0, sizeof (*mp));
-  mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE);
-  mp->index = am->my_client_index;
-  mp->handle = (uword) am->my_registration;
-
-  vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) & mp);
-
-  /*
-   * Have to be careful here, in case the client is disconnecting
-   * because e.g. the vlib process died, or is unresponsive.
-   */
-
-  begin = time (0);
-  while (1)
-    {
-      time_t now;
-
-      now = time (0);
-
-      if (now >= (begin + 2))
-	{
-	  clib_warning ("peer unresponsive, give up");
-	  am->my_client_index = ~0;
-	  am->my_registration = 0;
-	  am->shmem_hdr = 0;
-	  break;
-	}
-      if (unix_shared_memory_queue_sub (vl_input_queue, (u8 *) & rp, 1) < 0)
-	continue;
-
-      /* drain the queue */
-      if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY)
-	{
-	  vl_msg_api_handler ((void *) rp);
-	  continue;
-	}
-      vl_msg_api_handler ((void *) rp);
-      break;
-    }
-}
-
 static inline vl_api_registration_t *
 vl_api_client_index_to_registration_internal (u32 handle)
 {
@@ -834,36 +623,6 @@ vl_api_client_index_to_input_queue (u32 index)
   return (regp->vl_input_queue);
 }
 
-#define foreach_api_client_msg                  \
-_(MEMCLNT_CREATE_REPLY, memclnt_create_reply)   \
-_(MEMCLNT_DELETE_REPLY, memclnt_delete_reply)
-
-int
-vl_client_api_map (char *region_name)
-{
-  int rv;
-
-  if ((rv = vl_map_shmem (region_name, 0 /* is_vlib */ )) < 0)
-    {
-      return rv;
-    }
-
-#define _(N,n)                                                          \
-    vl_msg_api_set_handlers(VL_API_##N, 0 /* name */,                   \
-                           vl_api_##n##_t_handler,                      \
-                           0/* cleanup */, 0/* endian */, 0/* print */, \
-                           sizeof(vl_api_##n##_t), 1);
-  foreach_api_client_msg;
-#undef _
-  return 0;
-}
-
-void
-vl_client_api_unmap (void)
-{
-  vl_unmap_shmem ();
-}
-
 /*
  * fd.io coding-style-patch-verification: ON
  *
-- 
cgit 1.2.3-korg


From 11b8dbf78af49d270a0e72abe7dea73eec30d85f Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Mon, 24 Apr 2017 10:46:54 -0400
Subject: "autoreply" flag: autogenerate standard xxx_reply_t messages

Change-Id: I72298aaae7d172082ece3a8edea4217c11b28d79
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/examples/sample-plugin/sample/sample.api       |  10 +-
 src/plugins/acl/acl.api                            |  60 +---
 src/plugins/dpdk/api/dpdk.api                      |  35 +-
 src/plugins/flowperpkt/flowperpkt.api              |  23 +-
 .../export-vxlan-gpe/vxlan_gpe_ioam_export.api     |  10 +-
 src/plugins/ioam/export/ioam_export.api            |  10 +-
 src/plugins/ioam/ip6/ioam_cache.api                |  10 +-
 src/plugins/ioam/lib-pot/pot.api                   |  34 +-
 src/plugins/ioam/lib-trace/trace.api               |  26 +-
 src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api  |  82 +----
 src/plugins/lb/lb.api                              |  21 +-
 src/plugins/memif/memif.api                        |  12 +-
 src/plugins/snat/snat.api                          |  88 +----
 src/tools/vppapigen/gram.y                         |   3 +-
 src/tools/vppapigen/lex.c                          |  57 +++-
 src/tools/vppapigen/lex.h                          |   1 +
 src/tools/vppapigen/node.c                         |   5 +
 src/tools/vppapigen/node.h                         |   2 +
 src/vlibmemory/memclnt.api                         |   7 +-
 src/vlibmemory/memory_vlib.c                       |   8 +-
 src/vnet/bfd/bfd.api                               | 132 +-------
 src/vnet/classify/classify.api                     |  37 +--
 src/vnet/cop/cop.api                               |  28 +-
 src/vnet/devices/af_packet/af_packet.api           |  12 +-
 src/vnet/devices/netmap/netmap.api                 |  24 +-
 src/vnet/devices/virtio/vhost_user.api             |  24 +-
 src/vnet/dhcp/dhcp.api                             |  38 +--
 src/vnet/flow/flow.api                             |  32 +-
 src/vnet/interface.api                             | 108 +-----
 src/vnet/ip/ip.api                                 | 108 +-----
 src/vnet/ipsec/ipsec.api                           | 224 ++-----------
 src/vnet/l2/l2.api                                 |  96 +-----
 src/vnet/l2tp/l2tp.api                             |  28 +-
 src/vnet/lisp-cp/lisp.api                          | 164 +--------
 src/vnet/lisp-cp/one.api                           | 185 +----------
 src/vnet/lisp-gpe/lisp_gpe.api                     |  48 +--
 src/vnet/map/map.api                               |  22 +-
 src/vnet/mpls/mpls.api                             |  26 +-
 src/vnet/session/session.api                       |  68 +---
 src/vnet/span/span.api                             |  10 +-
 src/vnet/sr/sr.api                                 |  60 +---
 src/vnet/unix/tap.api                              |  12 +-
 src/vnet/vxlan/vxlan.api                           |  12 +-
 src/vpp/api/vpe.api                                | 367 ++-------------------
 44 files changed, 271 insertions(+), 2098 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/examples/sample-plugin/sample/sample.api b/src/examples/sample-plugin/sample/sample.api
index f99cdb38..d565c0b1 100644
--- a/src/examples/sample-plugin/sample/sample.api
+++ b/src/examples/sample-plugin/sample/sample.api
@@ -16,7 +16,7 @@
 
 /* Define a simple binary API to control the feature */
 
-define sample_macswap_enable_disable {
+autoreply define sample_macswap_enable_disable {
     /* Client identifier, set from api_main.my_client_index */
     u32 client_index;
 
@@ -29,11 +29,3 @@ define sample_macswap_enable_disable {
     /* Interface handle */
     u32 sw_if_index;
 };
-
-define sample_macswap_enable_disable_reply {
-    /* From the request */
-    u32 context;
-
-    /* Return value, zero means all OK */
-    i32 retval;
-};
diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api
index d981338d..3b334113 100644
--- a/src/plugins/acl/acl.api
+++ b/src/plugins/acl/acl.api
@@ -161,24 +161,13 @@ define acl_add_replace_reply
     @param acl_index - ACL index to delete
 */
 
-manual_print define acl_del
+autoreply manual_print define acl_del
 {
   u32 client_index;
   u32 context;
   u32 acl_index;
 };
 
-/** \brief Reply to delete the ACL
-    @param context - returned sender context, to match reply w/ request
-    @param retval 0 - no error
-*/
-
-define acl_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /* acl_interface_add_del(_reply) to be deprecated in lieu of acl_interface_set_acl_list */
 /** \brief Use acl_interface_set_acl_list instead
     Append/remove an ACL index to/from the list of ACLs checked for an interface
@@ -190,7 +179,7 @@ define acl_del_reply
     @param acl_index - index of ACL for the operation
 */
 
-manual_print define acl_interface_add_del
+autoreply manual_print define acl_interface_add_del
 {
   u32 client_index;
   u32 context;
@@ -204,17 +193,6 @@ manual_print define acl_interface_add_del
   u32 acl_index;
 };
 
-/** \brief Reply to alter the ACL list
-    @param context - returned sender context, to match reply w/ request
-    @param retval 0 - no error
-*/
-
-define acl_interface_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set the vector of input/output ACLs checked for an interface
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -224,7 +202,7 @@ define acl_interface_add_del_reply
     @param acls - vector of ACL indices
 */
 
-manual_print define acl_interface_set_acl_list
+autoreply manual_print define acl_interface_set_acl_list
 {
   u32 client_index;
   u32 context;
@@ -239,12 +217,6 @@ manual_print define acl_interface_set_acl_list
     @param retval 0 - no error
 */
 
-define acl_interface_set_acl_list_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump the specific ACL contents or all of the ACLs' contents
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -341,24 +313,13 @@ define macip_acl_add_reply
     @param acl_index - MACIP ACL index to delete
 */
 
-manual_print define macip_acl_del
+autoreply manual_print define macip_acl_del
 {
   u32 client_index;
   u32 context;
   u32 acl_index;
 };
 
-/** \brief Reply to delete the MACIP ACL
-    @param context - returned sender context, to match reply w/ request
-    @param retval 0 - no error
-*/
-
-define macip_acl_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Add or delete a MACIP ACL to/from interface
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -367,7 +328,7 @@ define macip_acl_del_reply
     @param acl_index - MACIP ACL index
 */
 
-manual_print define macip_acl_interface_add_del
+autoreply manual_print define macip_acl_interface_add_del
 {
   u32 client_index;
   u32 context;
@@ -377,17 +338,6 @@ manual_print define macip_acl_interface_add_del
   u32 acl_index;
 };
 
-/** \brief Reply to apply/unapply  the MACIP ACL
-    @param context - returned sender context, to match reply w/ request
-    @param retval 0 - no error
-*/
-
-define macip_acl_interface_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump one or all defined MACIP ACLs
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
diff --git a/src/plugins/dpdk/api/dpdk.api b/src/plugins/dpdk/api/dpdk.api
index 21215d45..d43f8a36 100644
--- a/src/plugins/dpdk/api/dpdk.api
+++ b/src/plugins/dpdk/api/dpdk.api
@@ -21,7 +21,7 @@
     @param pipe - pipe ID within its subport
     @param profile - pipe profile ID
 */
-define sw_interface_set_dpdk_hqos_pipe {
+autoreply define sw_interface_set_dpdk_hqos_pipe {
     u32 client_index;
     u32 context;
     u32 sw_if_index;
@@ -30,15 +30,6 @@ define sw_interface_set_dpdk_hqos_pipe {
     u32 profile;
 };
 
-/** \brief DPDK interface HQoS pipe profile set reply
-    @param context - sender context, to match reply w/ request
-    @param retval - request return code
-*/
-define sw_interface_set_dpdk_hqos_pipe_reply {
-    u32 context;
-    i32 retval;
-};
-
 /** \brief DPDK interface HQoS subport parameters set request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -49,7 +40,7 @@ define sw_interface_set_dpdk_hqos_pipe_reply {
     @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second)
     @param tc_period - enforcement period for rates (measured in milliseconds)
 */
-define sw_interface_set_dpdk_hqos_subport {
+autoreply define sw_interface_set_dpdk_hqos_subport {
     u32 client_index;
     u32 context;
     u32 sw_if_index;
@@ -60,15 +51,6 @@ define sw_interface_set_dpdk_hqos_subport {
     u32 tc_period;
 };
 
-/** \brief DPDK interface HQoS subport parameters set reply
-    @param context - sender context, to match reply w/ request
-    @param retval - request return code
-*/
-define sw_interface_set_dpdk_hqos_subport_reply {
-    u32 context;
-    i32 retval;
-};
-
 /** \brief DPDK interface HQoS tctbl entry set request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -77,7 +59,7 @@ define sw_interface_set_dpdk_hqos_subport_reply {
     @param tc - traffic class (0 .. 3)
     @param queue - traffic class queue (0 .. 3)
 */
-define sw_interface_set_dpdk_hqos_tctbl {
+autoreply define sw_interface_set_dpdk_hqos_tctbl {
     u32 client_index;
     u32 context;
     u32 sw_if_index;
@@ -86,18 +68,9 @@ define sw_interface_set_dpdk_hqos_tctbl {
     u32 queue;
 };
 
-/** \brief DPDK interface HQoS tctbl entry set reply
-    @param context - sender context, to match reply w/ request
-    @param retval - request return code
-*/
-define sw_interface_set_dpdk_hqos_tctbl_reply {
-    u32 context;
-    i32 retval;
-};
-
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
  * End:
  */
- 
\ No newline at end of file
+ 
diff --git a/src/plugins/flowperpkt/flowperpkt.api b/src/plugins/flowperpkt/flowperpkt.api
index 1cf62c54..3ff92dca 100644
--- a/src/plugins/flowperpkt/flowperpkt.api
+++ b/src/plugins/flowperpkt/flowperpkt.api
@@ -12,7 +12,7 @@
     @param is_ipv6 - if non-zero the address is ipv6, else ipv4
     @param sw_if_index - index of the interface
 */
-manual_print define flowperpkt_tx_interface_add_del
+autoreply manual_print define flowperpkt_tx_interface_add_del
 {
   /* Client identifier, set from api_main.my_client_index */
   u32 client_index;
@@ -28,20 +28,7 @@ manual_print define flowperpkt_tx_interface_add_del
   u32 sw_if_index;
 };
 
-/** \brief Reply to enable/disable per-packet IPFIX recording messages
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define flowperpkt_tx_interface_add_del_reply
-{
-  /* From the request */
-  u32 context;
-
-  /* Return value, zero means all OK */
-  i32 retval;
-};
-
-define flowperpkt_params
+autoreply define flowperpkt_params
 {
   u32 client_index;
   u32 context;
@@ -51,9 +38,3 @@ define flowperpkt_params
   u32 active_timer;  /* ~0 is off, 0 is default */
   u32 passive_timer; /* ~0 is off, 0 is default */
 };
-
-define flowperpkt_params_reply
-{
-  u32 context;
-  i32 retval;
-};
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api
index 7b17c3f7..caa97e6e 100644
--- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api
@@ -16,7 +16,7 @@
 
 /* Define a simple binary API to control the feature */
 
-define vxlan_gpe_ioam_export_enable_disable {
+autoreply define vxlan_gpe_ioam_export_enable_disable {
     /* Client identifier, set from api_main.my_client_index */
     u32 client_index;
 
@@ -32,11 +32,3 @@ define vxlan_gpe_ioam_export_enable_disable {
 
     /* Src ip address */
 };
-
-define vxlan_gpe_ioam_export_enable_disable_reply {
-    /* From the request */
-    u32 context;
-
-    /* Return value, zero means all OK */
-    i32 retval;
-};
\ No newline at end of file
diff --git a/src/plugins/ioam/export/ioam_export.api b/src/plugins/ioam/export/ioam_export.api
index f22d9fc8..bb830561 100644
--- a/src/plugins/ioam/export/ioam_export.api
+++ b/src/plugins/ioam/export/ioam_export.api
@@ -16,7 +16,7 @@
 
 /* Define a simple binary API to control the feature */
 
-define ioam_export_ip6_enable_disable {
+autoreply define ioam_export_ip6_enable_disable {
     /* Client identifier, set from api_main.my_client_index */
     u32 client_index;
 
@@ -32,11 +32,3 @@ define ioam_export_ip6_enable_disable {
 
     /* Src ip address */
 };
-
-define ioam_export_ip6_enable_disable_reply {
-    /* From the request */
-    u32 context;
-
-    /* Return value, zero means all OK */
-    i32 retval;
-};
diff --git a/src/plugins/ioam/ip6/ioam_cache.api b/src/plugins/ioam/ip6/ioam_cache.api
index de50d57d..dd9c0186 100644
--- a/src/plugins/ioam/ip6/ioam_cache.api
+++ b/src/plugins/ioam/ip6/ioam_cache.api
@@ -16,7 +16,7 @@
 
 /*  API to control ioam caching */
 
-define ioam_cache_ip6_enable_disable {
+autoreply define ioam_cache_ip6_enable_disable {
     /* Client identifier, set from api_main.my_client_index */
     u32 client_index;
 
@@ -27,11 +27,3 @@ define ioam_cache_ip6_enable_disable {
     u8 is_disable;
 
 };
-
-define ioam_cache_ip6_enable_disable_reply {
-    /* From the request */
-    u32 context;
-
-    /* Return value, zero means all OK */
-    i32 retval;
-};
diff --git a/src/plugins/ioam/lib-pot/pot.api b/src/plugins/ioam/lib-pot/pot.api
index fa2fc126..c377cde0 100644
--- a/src/plugins/ioam/lib-pot/pot.api
+++ b/src/plugins/ioam/lib-pot/pot.api
@@ -27,7 +27,7 @@
     @param list_name_len - length of the name of this profile list
     @param list_name - name of this profile list
 */
-define pot_profile_add {
+autoreply define pot_profile_add {
   u32 client_index;
   u32 context;
   u8 id;
@@ -42,22 +42,12 @@ define pot_profile_add {
   u8 list_name[0];
 };
 
-/** \brief Proof of Transit profile add / del response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define pot_profile_add_reply {
-    u32 context;
-    i32 retval;
-};
-
-
 /** \brief Proof of Transit(POT): Activate POT profile in the list
     @param id - id of the profile 
     @param list_name_len - length of the name of this profile list
     @param list_name - name of this profile list
 */
-define pot_profile_activate {
+autoreply define pot_profile_activate {
   u32 client_index;
   u32 context;
   u8 id;
@@ -65,37 +55,19 @@ define pot_profile_activate {
   u8 list_name[0];
 };
 
-/** \brief Proof of Transit profile activate response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request 
-*/
-define pot_profile_activate_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Delete POT Profile 
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param list_name_len - length of the name of the profile list
     @param list_name - name of profile list to delete
 */
-define pot_profile_del {
+autoreply define pot_profile_del {
   u32 client_index;
   u32 context;
   u8 list_name_len;
   u8 list_name[0];
 };
 
-/** \brief Proof of Transit profile add / del response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define pot_profile_del_reply {
-    u32 context;
-    i32 retval;
-};
-
 /** \brief Show POT Profiles
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
diff --git a/src/plugins/ioam/lib-trace/trace.api b/src/plugins/ioam/lib-trace/trace.api
index cb958325..2f45c6e2 100644
--- a/src/plugins/ioam/lib-trace/trace.api
+++ b/src/plugins/ioam/lib-trace/trace.api
@@ -22,7 +22,7 @@
     @param trace_tsp- Timestamp resolution
     @param app_data - Application specific opaque
 */
-define trace_profile_add {
+autoreply define trace_profile_add {
   u32 client_index;
   u32 context;
   u8 trace_type;
@@ -32,37 +32,15 @@ define trace_profile_add {
   u32 app_data;
 };
 
-/** \brief Trace profile add / del response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define trace_profile_add_reply {
-    u32 context;
-    i32 retval;
-};
-
-
-
 /** \brief Delete trace Profile
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
 */
-define trace_profile_del {
+autoreply define trace_profile_del {
   u32 client_index;
   u32 context;
 };
 
-/** \brief Trace profile add / del response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define trace_profile_del_reply {
-    u32 context;
-    i32 retval;
-};
-
-
-
 /** \brief Show trace Profile
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api
index 056529a4..a6761f07 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api
@@ -24,7 +24,7 @@
     @param trace_enable - iOAM Trace enabled or not flag
 
 */
-define vxlan_gpe_ioam_enable {
+autoreply define vxlan_gpe_ioam_enable {
   u32 client_index;
   u32 context;
   u16 id;
@@ -33,38 +33,18 @@ define vxlan_gpe_ioam_enable {
   u8 trace_enable;
 };
 
-/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VXLAN-GPE reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define vxlan_gpe_ioam_enable_reply {
-    u32 context;
-    i32 retval;
-};
-
-
 /** \brief iOAM for VxLAN-GPE disable
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param id - profile id
 */
-define vxlan_gpe_ioam_disable
+autoreply define vxlan_gpe_ioam_disable
 {
   u32 client_index;
   u32 context;
   u16 id;
 };
 
-/** \brief vxlan_gpe_ioam disable response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define vxlan_gpe_ioam_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Enable iOAM for a VNI (VXLAN-GPE)
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -73,7 +53,7 @@ define vxlan_gpe_ioam_disable_reply
     @param remote - IPv4/6 Address of the remote VTEP
 
 */
-define vxlan_gpe_ioam_vni_enable {
+autoreply define vxlan_gpe_ioam_vni_enable {
   u32 client_index;
   u32 context;
   u32 vni;
@@ -82,18 +62,6 @@ define vxlan_gpe_ioam_vni_enable {
   u8  is_ipv6;
 };
 
-/** \brief Reply to enable iOAM for a VNI (VXLAN-GPE)
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-
-*/
-define vxlan_gpe_ioam_vni_enable_reply {
-  u32 client_index;
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Disable iOAM for a VNI (VXLAN-GPE)
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -102,7 +70,7 @@ define vxlan_gpe_ioam_vni_enable_reply {
     @param remote - IPv4/6 Address of the remote VTEP
 
 */
-define vxlan_gpe_ioam_vni_disable {
+autoreply define vxlan_gpe_ioam_vni_disable {
   u32 client_index;
   u32 context;
   u32 vni;
@@ -111,19 +79,6 @@ define vxlan_gpe_ioam_vni_disable {
   u8  is_ipv6;
 };
 
-/** \brief Reply to disable iOAM for a VNI (VXLAN-GPE)
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-
-*/
-define vxlan_gpe_ioam_vni_disable_reply {
-  u32 client_index;
-  u32 context;
-  i32 retval;
-};
-
-
 /** \brief Enable iOAM for a VXLAN-GPE transit
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -131,7 +86,7 @@ define vxlan_gpe_ioam_vni_disable_reply {
     @param outer_fib_index- FIB index
 
 */
-define vxlan_gpe_ioam_transit_enable {
+autoreply define vxlan_gpe_ioam_transit_enable {
   u32 client_index;
   u32 context;
   u32 outer_fib_index;
@@ -139,18 +94,6 @@ define vxlan_gpe_ioam_transit_enable {
   u8  is_ipv6;
 };
 
-/** \brief Reply to enable iOAM for VXLAN-GPE transit
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-
-*/
-define vxlan_gpe_ioam_transit_enable_reply {
-  u32 client_index;
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Disable iOAM for VXLAN-GPE transit
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -158,7 +101,7 @@ define vxlan_gpe_ioam_transit_enable_reply {
     @param outer_fib_index- FIB index
 
 */
-define vxlan_gpe_ioam_transit_disable {
+autoreply define vxlan_gpe_ioam_transit_disable {
   u32 client_index;
   u32 context;
   u32 outer_fib_index;
@@ -166,16 +109,3 @@ define vxlan_gpe_ioam_transit_disable {
   u8  is_ipv6;
 };
 
-/** \brief Reply to disable iOAM for VXLAN-GPE transit
-    @param client_index - opaque cookie to identify the sender
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-
-*/
-define vxlan_gpe_ioam_transit_disable_reply {
-  u32 client_index;
-  u32 context;
-  i32 retval;
-};
-
-
diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api
index 39ee3c8f..32cc669b 100644
--- a/src/plugins/lb/lb.api
+++ b/src/plugins/lb/lb.api
@@ -8,7 +8,7 @@
     @param flow_timeout - Time in seconds after which, if no packet is received
            for a given flow, the flow is removed from the established flow table.
 */
-define lb_conf
+autoreply define lb_conf
 {
   u32 client_index;
   u32 context;
@@ -18,11 +18,6 @@ define lb_conf
   u32 flow_timeout;
 };
 
-define lb_conf_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Add a virtual address (or prefix)
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -33,7 +28,7 @@ define lb_conf_reply {
            for this VIP (must be power of 2).
     @param is_del - The VIP should be removed.
 */
-define lb_add_del_vip {
+autoreply define lb_add_del_vip {
   u32 client_index;
   u32 context;
   u8 ip_prefix[16];
@@ -43,11 +38,6 @@ define lb_add_del_vip {
   u8 is_del;
 };
 
-define lb_add_del_vip_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Add an application server for a given VIP
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -56,7 +46,7 @@ define lb_add_del_vip_reply {
     @param as_address - The application server address (IPv4 in lower order 32 bits).
     @param is_del - The AS should be removed.
 */
-define lb_add_del_as {
+autoreply define lb_add_del_as {
   u32 client_index;
   u32 context;
   u8 vip_ip_prefix[16];
@@ -64,8 +54,3 @@ define lb_add_del_as {
   u8 as_address[16];
   u8 is_del;
 };
-
-define lb_add_del_as_reply {
-  u32 context;
-  i32 retval;
-};
diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api
index 6f946421..95e016c3 100644
--- a/src/plugins/memif/memif.api
+++ b/src/plugins/memif/memif.api
@@ -57,7 +57,7 @@ define memif_create_reply
     @param context - sender context, to match reply w/ request
     @param sw_if_index - software index of the interface to delete
 */
-define memif_delete
+autoreply define memif_delete
 {
   u32 client_index;
   u32 context;
@@ -65,16 +65,6 @@ define memif_delete
   u32 sw_if_index;
 };
 
-/** \brief Delete host-interface response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define memif_delete_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Memory interface details structure
     @param context - sender context, to match reply w/ request (memif_dump)
     @param sw_if_index - index of the interface
diff --git a/src/plugins/snat/snat.api b/src/plugins/snat/snat.api
index 9689f5f9..573b6753 100644
--- a/src/plugins/snat/snat.api
+++ b/src/plugins/snat/snat.api
@@ -29,7 +29,7 @@
     @param vrf_id - VRF id of tenant, ~0 means independent of VRF
     @param is_add - 1 if add, 0 if delete
 */
-define snat_add_address_range {
+autoreply define snat_add_address_range {
   u32 client_index;
   u32 context;
   u8 is_ip4;
@@ -39,15 +39,6 @@ define snat_add_address_range {
   u8 is_add;
 };
 
-/** \brief Add S-NAT address range reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-*/
-define snat_add_address_range_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump S-NAT addresses
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -77,7 +68,7 @@ define snat_address_details {
     @param is_inside - 1 if inside, 0 if outside
     @param sw_if_index - software index of the interface
 */
-define snat_interface_add_del_feature {
+autoreply define snat_interface_add_del_feature {
   u32 client_index;
   u32 context;
   u8 is_add;
@@ -85,15 +76,6 @@ define snat_interface_add_del_feature {
   u32 sw_if_index;
 };
 
-/** \brief Enable/disable S-NAT feature on the interface reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-*/
-define snat_interface_add_del_feature_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump interfaces with S-NAT feature
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -130,7 +112,7 @@ define snat_interface_details {
                                   used)
     @param vfr_id - VRF ID
 */
-define snat_add_static_mapping {
+autoreply define snat_add_static_mapping {
   u32 client_index;
   u32 context;
   u8 is_add;
@@ -145,15 +127,6 @@ define snat_add_static_mapping {
   u32 vrf_id;
 };
 
-/** \brief Add/delete S-NAT static mapping reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-*/
-define snat_add_static_mapping_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump S-NAT static mappings
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -257,21 +230,12 @@ define snat_show_config_reply
     @param context - sender context, to match reply w/ request
     @param worker_mask - S-NAT workers mask
 */
-define snat_set_workers {
+autoreply define snat_set_workers {
   u32 client_index;
   u32 context;
   u64 worker_mask;
 };
 
-/** \brief Set S-NAT workers reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-*/
-define snat_set_workers_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump S-NAT workers
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -300,7 +264,7 @@ define snat_worker_details {
     @param is_add - 1 if add, 0 if delete
     @param sw_if_index - software index of the interface
 */
-define snat_add_del_interface_addr {
+autoreply define snat_add_del_interface_addr {
   u32 client_index;
   u32 context;
   u8 is_add;
@@ -308,15 +272,6 @@ define snat_add_del_interface_addr {
   u32 sw_if_index;
 };
 
-/** \brief Add/delete S-NAT pool address from specific interfce reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-*/
-define snat_add_del_interface_addr_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump S-NAT pool addresses interfaces
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -342,7 +297,7 @@ define snat_interface_addr_details {
     @param src_port - source port number
     @param enable - 1 if enable, 0 if disable
 */
-define snat_ipfix_enable_disable {
+autoreply define snat_ipfix_enable_disable {
   u32 client_index;
   u32 context;
   u32 domain_id;
@@ -350,15 +305,6 @@ define snat_ipfix_enable_disable {
   u8 enable;
 };
 
-/** \brief Enable/disable S-NAT IPFIX logging reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-*/
-define snat_ipfix_enable_disable_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump S-NAT users
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -437,7 +383,7 @@ define snat_user_session_details {
     @param out_addr - outside IP address
     @param out_addr - outside IP address prefix length
 */
-define snat_add_det_map {
+autoreply define snat_add_det_map {
   u32 client_index;
   u32 context;
   u8 is_add;
@@ -449,15 +395,6 @@ define snat_add_det_map {
   u8 out_plen;
 };
 
-/** \brief Add/delete S-NAT deterministic mapping reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-*/
-define snat_add_det_map_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Get outside address and port range from inside address
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -556,7 +493,7 @@ define snat_det_map_details {
     @param tcp_transitory - TCP transitory timeout (default 240sec)
     @param icmp - ICMP timeout (default 60sec)
 */
-define snat_det_set_timeouts {
+autoreply define snat_det_set_timeouts {
   u32 client_index;
   u32 context;
   u32 udp;
@@ -565,15 +502,6 @@ define snat_det_set_timeouts {
   u32 icmp;
 };
 
-/** \brief Set values of timeouts for deterministic NAT reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-*/
-define snat_det_set_timeouts_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Get values of timeouts for deterministic NAT (seconds)
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
diff --git a/src/tools/vppapigen/gram.y b/src/tools/vppapigen/gram.y
index de26af8d..9cea6023 100644
--- a/src/tools/vppapigen/gram.y
+++ b/src/tools/vppapigen/gram.y
@@ -38,7 +38,7 @@ void generate (YYSTYPE);
 %token NAME RPAR LPAR SEMI LBRACK RBRACK NUMBER PRIMTYPE BARF
 %token TPACKED DEFINE LCURLY RCURLY STRING UNION
 %token HELPER_STRING COMMA 
-%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE
+%token NOVERSION MANUAL_PRINT MANUAL_ENDIAN TYPEONLY DONT_TRACE AUTOREPLY
 
 %%
 
@@ -64,6 +64,7 @@ flag:
         | MANUAL_ENDIAN         {$$ = $1;}
         | DONT_TRACE            {$$ = $1;}
         | TYPEONLY              {$$ = $1;}
+        | AUTOREPLY             {$$ = $1;}
           ;
 
 defn:     DEFINE NAME LCURLY defbody RCURLY SEMI 
diff --git a/src/tools/vppapigen/lex.c b/src/tools/vppapigen/lex.c
index 733942ad..e6358143 100644
--- a/src/tools/vppapigen/lex.c
+++ b/src/tools/vppapigen/lex.c
@@ -27,6 +27,9 @@
 #include "lex.h"
 #include "node.h"
 #include "tools/vppapigen/gram.h"
+#include <vppinfra/clib.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/format.h>
 
 FILE *ifp, *ofp, *pythonfp, *jsonfp;
 char *vlib_app_name = "vpp";
@@ -38,6 +41,9 @@ int current_filename_allocated;
 unsigned long input_crc;
 unsigned long message_crc;
 int yydebug;
+char *push_input_fifo;
+char saved_ungetc_char;
+char have_ungetc_char;
 
 /*
  * lexer variable definitions 
@@ -469,9 +475,50 @@ static char namebuf [MAXNAME];
 static inline char
 getc_char (FILE *ifp)
 {
+    char rv;
+
+    if (have_ungetc_char) {
+        have_ungetc_char = 0;
+        return saved_ungetc_char;
+    }
+        
+    if (clib_fifo_elts (push_input_fifo)) {
+        clib_fifo_sub1(push_input_fifo, rv);
+        return (rv & 0x7f);
+    }
     return ((char)(getc(ifp) & 0x7f));
 }
 
+u32 fe (char *fifo)
+{
+    return clib_fifo_elts (fifo);
+}
+
+static inline void
+ungetc_char (char c, FILE *ifp)
+{
+    saved_ungetc_char = c;
+    have_ungetc_char = 1;
+}
+
+void autoreply (void *np_arg)
+{
+    static u8 *s;
+    node_t *np = (node_t *)np_arg;
+    int i;
+
+    vec_reset_length (s);
+
+    s = format (0, " define %s_reply\n", (char *)(np->data[0]));
+    s = format (s, "{\n");
+    s = format (s, "    u32 context;\n");
+    s = format (s, "    i32 retval;\n");
+    s = format (s, "};\n");
+
+    for (i = 0; i < vec_len (s); i++)
+        clib_fifo_add1 (push_input_fifo, s[i]);
+}
+
 /*
  * yylex (well, yylex_1: The real yylex below does crc-hackery)
  */
@@ -595,7 +642,7 @@ static int yylex_1 (void)
             return (EOF);
         
         if (!isalnum (c) && c != '_') {
-            ungetc (c, ifp);
+            ungetc_char (c, ifp);
             namebuf [nameidx] = 0;
             the_lexer_state = START_STATE;
             return (name_check (namebuf, &yylval));
@@ -616,7 +663,7 @@ static int yylex_1 (void)
             return (EOF);
         
         if (!isdigit (c)) {
-            ungetc (c, ifp);
+            ungetc_char (c, ifp);
             namebuf [nameidx] = 0;
             the_lexer_state = START_STATE;
             yylval = (void *) atol(namebuf);
@@ -889,6 +936,7 @@ int yylex (void)
     case MANUAL_ENDIAN:      code = 276; break;
     case TYPEONLY:           code = 278; break;
     case DONT_TRACE:         code = 279; break;
+    case AUTOREPLY:          code = 280; break;
         
     case EOF: code = ~0; break; /* hysterical compatibility */
 
@@ -929,6 +977,7 @@ static struct keytab {
 } keytab [] = 
 /* Keep the table sorted, binary search used below! */
 {
+    {"autoreply",       NODE_AUTOREPLY},
     {"define",          NODE_DEFINE},  
     {"dont_trace",      NODE_DONT_TRACE},
     {"f64",             NODE_F64},
@@ -1005,6 +1054,10 @@ static int name_check (const char *s, YYSTYPE *token_value)
                 *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE;
                 return(DONT_TRACE);
 
+            case NODE_AUTOREPLY:
+                *token_value = (YYSTYPE) NODE_FLAG_AUTOREPLY;
+                return(AUTOREPLY);
+
             case NODE_NOVERSION:
                 return(NOVERSION);
 
diff --git a/src/tools/vppapigen/lex.h b/src/tools/vppapigen/lex.h
index a0fdc735..275cf685 100644
--- a/src/tools/vppapigen/lex.h
+++ b/src/tools/vppapigen/lex.h
@@ -24,6 +24,7 @@
 extern int yylex (void);
 extern void yyerror (char *);
 extern int yyparse (void);
+extern void autoreply (void *);
 
 #ifndef YYSTYPE
 #define YYSTYPE void *
diff --git a/src/tools/vppapigen/node.c b/src/tools/vppapigen/node.c
index 359ac9c9..9f234037 100644
--- a/src/tools/vppapigen/node.c
+++ b/src/tools/vppapigen/node.c
@@ -1050,6 +1050,11 @@ YYSTYPE set_flags(YYSTYPE a1, YYSTYPE a2)
     flags = (int)(uword) a1;
 
     np->flags |= flags;
+
+    /* Generate a foo_reply_t right here */
+    if (flags & NODE_FLAG_AUTOREPLY) 
+        autoreply(np);
+
     return (a2);
 }
 /*
diff --git a/src/tools/vppapigen/node.h b/src/tools/vppapigen/node.h
index 297d6036..65bd5d10 100644
--- a/src/tools/vppapigen/node.h
+++ b/src/tools/vppapigen/node.h
@@ -53,6 +53,7 @@ enum node_subclass {  /* WARNING: indices must match the vft... */
     NODE_MANUAL_PRINT,
     NODE_MANUAL_ENDIAN,
     NODE_DONT_TRACE,
+    NODE_AUTOREPLY,
 };
 
 enum passid {
@@ -84,6 +85,7 @@ typedef struct node_ {
 #define NODE_FLAG_MANUAL_ENDIAN (1<<1)
 #define NODE_FLAG_TYPEONLY (1<<3)
 #define NODE_FLAG_DONT_TRACE (1<<4)
+#define NODE_FLAG_AUTOREPLY (1<<5)
 
 typedef struct node_vft_ {
     void (*print)(struct node_ *);
diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api
index c38b483c..32e51407 100644
--- a/src/vlibmemory/memclnt.api
+++ b/src/vlibmemory/memclnt.api
@@ -72,7 +72,7 @@ define memclnt_read_timeout {
 /*
  * RPC
  */
-define rpc_call {
+autoreply define rpc_call {
     u32 client_index;
     u32 context;
     u64 function;
@@ -82,11 +82,6 @@ define rpc_call {
     u8 data[0];
 };
 
-define rpc_reply {
-    i32 retval;
-    u32 context;
-};
-
 /*
  * Lookup message-ID base by name
  */
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 7a536ee8..43574dea 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1275,7 +1275,7 @@ VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) = {
 static void
 vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp)
 {
-  vl_api_rpc_reply_t *rmp;
+  vl_api_rpc_call_reply_t *rmp;
   int (*fp) (void *);
   i32 rv = 0;
   vlib_main_t *vm = vlib_get_main ();
@@ -1305,7 +1305,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp)
       if (q)
 	{
 	  rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp));
-	  rmp->_vl_msg_id = ntohs (VL_API_RPC_REPLY);
+	  rmp->_vl_msg_id = ntohs (VL_API_RPC_CALL_REPLY);
 	  rmp->context = mp->context;
 	  rmp->retval = rv;
 	  vl_msg_api_send_shmem (q, (u8 *) & rmp);
@@ -1318,7 +1318,7 @@ vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp)
 }
 
 static void
-vl_api_rpc_reply_t_handler (vl_api_rpc_reply_t * mp)
+vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t * mp)
 {
   clib_warning ("unimplemented");
 }
@@ -1415,7 +1415,7 @@ vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp)
 
 #define foreach_rpc_api_msg                     \
 _(RPC_CALL,rpc_call)                            \
-_(RPC_REPLY,rpc_reply)
+_(RPC_CALL_REPLY,rpc_call_reply)
 
 #define foreach_plugin_trace_msg		\
 _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids)
diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api
index 2cdcfad3..7bcaa4c3 100644
--- a/src/vnet/bfd/bfd.api
+++ b/src/vnet/bfd/bfd.api
@@ -18,43 +18,23 @@
     @param context - sender context, to match reply w/ request
     @param sw_if_index - interface to use as echo source
 */
-define bfd_udp_set_echo_source
+autoreply define bfd_udp_set_echo_source
 {
   u32 client_index;
   u32 context;
   u32 sw_if_index;
 };
 
-/** \brief Set BFD feature response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define bfd_udp_set_echo_source_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Delete BFD echo source
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
 */
-define bfd_udp_del_echo_source
+autoreply define bfd_udp_del_echo_source
 {
   u32 client_index;
   u32 context;
 };
 
-/** \brief Delete BFD echo source response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define bfd_udp_del_echo_source_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Add UDP BFD session on interface
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -69,7 +49,7 @@ define bfd_udp_del_echo_source_reply
     @param bfd_key_id - key id sent out in BFD packets (if is_authenticated)
     @param conf_key_id - id of already configured key (if is_authenticated)
 */
-define bfd_udp_add
+autoreply define bfd_udp_add
 {
   u32 client_index;
   u32 context;
@@ -85,16 +65,6 @@ define bfd_udp_add
   u32 conf_key_id;
 };
 
-/** \brief Add UDP BFD session response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define bfd_udp_add_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Modify UDP BFD session on interface
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -106,7 +76,7 @@ define bfd_udp_add_reply
     @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
     @param detect_mult - detect multiplier (# of packets missed before connection goes down)
 */
-define bfd_udp_mod
+autoreply define bfd_udp_mod
 {
   u32 client_index;
   u32 context;
@@ -119,16 +89,6 @@ define bfd_udp_mod
   u8 detect_mult;
 };
 
-/** \brief Modify UDP BFD session response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define bfd_udp_mod_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Delete UDP BFD session on interface
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -137,7 +97,7 @@ define bfd_udp_mod_reply
     @param peer_addr - peer address
     @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
 */
-define bfd_udp_del
+autoreply define bfd_udp_del
 {
   u32 client_index;
   u32 context;
@@ -147,16 +107,6 @@ define bfd_udp_del
   u8 is_ipv6;
 };
 
-/** \brief Delete UDP BFD session response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define bfd_udp_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Get all BFD sessions
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -206,7 +156,7 @@ define bfd_udp_session_details
     @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
     @param admin_up_down - set the admin state, 1 = up, 0 = down
 */
-define bfd_udp_session_set_flags
+autoreply define bfd_udp_session_set_flags
 {
   u32 client_index;
   u32 context;
@@ -217,23 +167,13 @@ define bfd_udp_session_set_flags
   u8 admin_up_down;
 };
 
-/** \brief Reply to bfd_udp_session_set_flags
-    @param context - sender context which was passed in the request
-    @param retval - return code of the set flags request
-*/
-define bfd_udp_session_set_flags_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Register for BFD events
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param enable_disable - 1 => register for events, 0 => cancel registration
     @param pid - sender's pid
 */
-define want_bfd_events
+autoreply define want_bfd_events
 {
   u32 client_index;
   u32 context;
@@ -241,16 +181,6 @@ define want_bfd_events
   u32 pid;
 };
 
-/** \brief Reply for BFD events registration
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define want_bfd_events_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief BFD UDP - add/replace key to configuration
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -259,7 +189,7 @@ define want_bfd_events_reply
     @param auth_type - authentication type (RFC 5880/4.1/Auth Type)
     @param key - key data
 */
-define bfd_auth_set_key
+autoreply define bfd_auth_set_key
 {
   u32 client_index;
   u32 context;
@@ -269,16 +199,6 @@ define bfd_auth_set_key
   u8 key[20];
 };
 
-/** \brief BFD UDP - add/replace key reply
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define bfd_auth_set_key_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief BFD UDP - delete key from configuration
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -286,23 +206,13 @@ define bfd_auth_set_key_reply
     @param key_len - length of key (must be non-zero)
     @param key - key data
 */
-define bfd_auth_del_key
+autoreply define bfd_auth_del_key
 {
   u32 client_index;
   u32 context;
   u32 conf_key_id;
 };
 
-/** \brief BFD UDP - delete key reply
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define bfd_auth_del_key_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Get a list of configured authentication keys
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -338,7 +248,7 @@ define bfd_auth_keys_details
     @param bfd_key_id - key id sent out in BFD packets
     @param conf_key_id - id of already configured key
 */
-define bfd_udp_auth_activate
+autoreply define bfd_udp_auth_activate
 {
   u32 client_index;
   u32 context;
@@ -351,16 +261,6 @@ define bfd_udp_auth_activate
   u32 conf_key_id;
 };
 
-/** \brief BFD UDP - activate/change authentication reply
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define bfd_udp_auth_activate_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief BFD UDP - deactivate authentication
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -370,7 +270,7 @@ define bfd_udp_auth_activate_reply
     @param is_ipv6 - local_addr, peer_addr are IPv6 if non-zero, otherwise IPv4
     @param is_delayed - change is applied once peer applies the change (on first received non-authenticated packet)
 */
-define bfd_udp_auth_deactivate
+autoreply define bfd_udp_auth_deactivate
 {
   u32 client_index;
   u32 context;
@@ -381,16 +281,6 @@ define bfd_udp_auth_deactivate
   u8 is_delayed;
 };
 
-/** \brief BFD UDP - deactivate authentication reply
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define bfd_udp_auth_deactivate_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/classify/classify.api b/src/vnet/classify/classify.api
index 51ebd6c8..cacb9bed 100644
--- a/src/vnet/classify/classify.api
+++ b/src/vnet/classify/classify.api
@@ -92,7 +92,7 @@ define classify_add_del_table_reply
            VRF id if action is 1 or 2.
     @param match[] - for add, match value for session, required
 */
-define classify_add_del_session
+autoreply define classify_add_del_session
 {
   u32 client_index;
   u32 context;
@@ -106,16 +106,6 @@ define classify_add_del_session
   u8 match[0];
 };
 
-/** \brief Classify add / del session response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the add/del session request
-*/
-define classify_add_del_session_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set/unset policer classify interface
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -127,7 +117,7 @@ define classify_add_del_session_reply
     Note: User is recommeneded to use just one valid table_index per call.
           (ip4_table_index, ip6_table_index, or l2_table_index)
 */
-define policer_classify_set_interface
+autoreply define policer_classify_set_interface
 {
   u32 client_index;
   u32 context;
@@ -138,16 +128,6 @@ define policer_classify_set_interface
   u8 is_add;
 };
 
-/** \brief Set/unset policer classify interface response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define policer_classify_set_interface_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Get list of policer classify interfaces and tables
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -308,7 +288,7 @@ define classify_session_details
     Note: User is recommeneded to use just one valid table_index per call.
           (ip4_table_index, ip6_table_index, or l2_table_index)
 */
-define flow_classify_set_interface {
+autoreply define flow_classify_set_interface {
     u32 client_index;
     u32 context;
     u32 sw_if_index;
@@ -317,15 +297,6 @@ define flow_classify_set_interface {
     u8  is_add;
 };
 
-/** \brief Set/unset flow classify interface response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define flow_classify_set_interface_reply {
-    u32 context;
-    i32 retval;
-};
-
 /** \brief Get list of flow classify interfaces and tables
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -353,4 +324,4 @@ define flow_classify_details {
  * eval: (c-set-style "gnu")
  * End:
  */
- 
\ No newline at end of file
+ 
diff --git a/src/vnet/cop/cop.api b/src/vnet/cop/cop.api
index b34dae80..69316001 100644
--- a/src/vnet/cop/cop.api
+++ b/src/vnet/cop/cop.api
@@ -20,7 +20,7 @@
     @param enable_disable - 1 => enable, 0 => disable
 */
 
-define cop_interface_enable_disable
+autoreply define cop_interface_enable_disable
 {
   u32 client_index;
   u32 context;
@@ -28,17 +28,6 @@ define cop_interface_enable_disable
   u8 enable_disable;
 };
 
-/** \brief cop: interface enable/disable junk filtration reply
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-
-define cop_interface_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief cop: enable/disable whitelist filtration features on an interface
     Note: the supplied fib_id must match in order to remove the feature!
     
@@ -51,7 +40,7 @@ define cop_interface_enable_disable_reply
     @param default_cop -  1 => enable non-ip4, non-ip6 filtration 0=> disable it
 */
 
-define cop_whitelist_enable_disable
+autoreply define cop_whitelist_enable_disable
 {
   u32 client_index;
   u32 context;
@@ -62,17 +51,6 @@ define cop_whitelist_enable_disable
   u8 default_cop;
 };
 
-/** \brief cop: interface enable/disable junk filtration reply
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-
-define cop_whitelist_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief get_node_graph - get a copy of the vpp node graph
     including the current set of graph arcs.
 
@@ -85,4 +63,4 @@ define cop_whitelist_enable_disable_reply
  * eval: (c-set-style "gnu")
  * End:
  */
- 
\ No newline at end of file
+ 
diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api
index 9fb2a207..8d40ad60 100644
--- a/src/vnet/devices/af_packet/af_packet.api
+++ b/src/vnet/devices/af_packet/af_packet.api
@@ -46,7 +46,7 @@ define af_packet_create_reply
     @param context - sender context, to match reply w/ request
     @param host_if_name - interface name
 */
-define af_packet_delete
+autoreply define af_packet_delete
 {
   u32 client_index;
   u32 context;
@@ -54,16 +54,6 @@ define af_packet_delete
   u8 host_if_name[64];
 };
 
-/** \brief Delete host-interface response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define af_packet_delete_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/devices/netmap/netmap.api b/src/vnet/devices/netmap/netmap.api
index 377ccffd..8dc698b9 100644
--- a/src/vnet/devices/netmap/netmap.api
+++ b/src/vnet/devices/netmap/netmap.api
@@ -22,7 +22,7 @@
     @param is_pipe - is pipe
     @param is_master - 0=slave, 1=master
 */
-define netmap_create
+autoreply define netmap_create
 {
   u32 client_index;
   u32 context;
@@ -34,22 +34,12 @@ define netmap_create
   u8 is_master;
 };
 
-/** \brief Create netmap response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define netmap_create_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Delete netmap
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param netmap_if_name - interface name
 */
-define netmap_delete
+autoreply define netmap_delete
 {
   u32 client_index;
   u32 context;
@@ -57,16 +47,6 @@ define netmap_delete
   u8 netmap_if_name[64];
 };
 
-/** \brief Delete netmap response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define netmap_delete_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/vnet/devices/virtio/vhost_user.api
index 4f604e45..df7ce7ab 100644
--- a/src/vnet/devices/virtio/vhost_user.api
+++ b/src/vnet/devices/virtio/vhost_user.api
@@ -53,7 +53,7 @@ define create_vhost_user_if_reply
     @param sock_filename - unix socket filename, used to speak with frontend
     @param operation_mode - polling=0, interrupt=1, or adaptive=2
 */
-define modify_vhost_user_if
+autoreply define modify_vhost_user_if
 {
   u32 client_index;
   u32 context;
@@ -65,36 +65,16 @@ define modify_vhost_user_if
   u8 operation_mode;
 };
 
-/** \brief vhost-user interface modify response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define modify_vhost_user_if_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief vhost-user interface delete request
     @param client_index - opaque cookie to identify the sender
 */
-define delete_vhost_user_if
+autoreply define delete_vhost_user_if
 {
   u32 client_index;
   u32 context;
   u32 sw_if_index;
 };
 
-/** \brief vhost-user interface delete response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define delete_vhost_user_if_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Vhost-user interface details structure (fix this)
     @param sw_if_index - index of the interface
     @param interface_name - name of interface
diff --git a/src/vnet/dhcp/dhcp.api b/src/vnet/dhcp/dhcp.api
index 2db85a79..eb0b070d 100644
--- a/src/vnet/dhcp/dhcp.api
+++ b/src/vnet/dhcp/dhcp.api
@@ -24,7 +24,7 @@
     @param dhcp_server[] - server address
     @param dhcp_src_address[] - <fix this, need details>
 */
-define dhcp_proxy_config
+autoreply define dhcp_proxy_config
 {
   u32 client_index;
   u32 context;
@@ -36,16 +36,6 @@ define dhcp_proxy_config
   u8 dhcp_src_address[16];
 };
 
-/** \brief DHCP Proxy config response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define dhcp_proxy_config_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief DHCP Proxy set / unset vss request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -55,7 +45,7 @@ define dhcp_proxy_config_reply
     @param is_ipv6 - ip6 if non-zero, else ip4
     @param is_add - set vss if non-zero, else delete
 */
-define dhcp_proxy_set_vss
+autoreply define dhcp_proxy_set_vss
 {
   u32 client_index;
   u32 context;
@@ -66,16 +56,6 @@ define dhcp_proxy_set_vss
   u8 is_add;
 };
 
-/** \brief DHCP proxy set / unset vss response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define dhcp_proxy_set_vss_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief DHCP Client config add / del request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -86,7 +66,7 @@ define dhcp_proxy_set_vss_reply
            via dhcp_compl_event API message if non-zero
     @param pid - sender's pid
 */
-define dhcp_client_config
+autoreply define dhcp_client_config
 {
   u32 client_index;
   u32 context;
@@ -97,16 +77,6 @@ define dhcp_client_config
   u32 pid;
 };
 
-/** \brief DHCP Client config response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define dhcp_client_config_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Tell client about a DHCP completion event
     @param client_index - opaque cookie to identify the sender
     @param pid - client pid registered to receive notification
@@ -162,4 +132,4 @@ manual_endian manual_print define dhcp_proxy_details
  * Local Variables:
  * eval: (c-set-style "gnu")
  * End:
- */
\ No newline at end of file
+ */
diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api
index 0e0f99bf..1c5e8c5c 100644
--- a/src/vnet/flow/flow.api
+++ b/src/vnet/flow/flow.api
@@ -24,7 +24,7 @@
     @param template_interval - number of seconds after which to resend template
     @param udp_checksum - UDP checksum calculation enable flag
 */
-define set_ipfix_exporter
+autoreply define set_ipfix_exporter
 {
   u32 client_index;
   u32 context;
@@ -37,15 +37,6 @@ define set_ipfix_exporter
   u8 udp_checksum;
 };
 
-/** \brief Reply to IPFIX exporter configure request
-    @param context - sender context which was passed in the request
-*/
-define set_ipfix_exporter_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPFIX exporter dump request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -84,22 +75,13 @@ define ipfix_exporter_details
     @param domain_id - domain ID reported in IPFIX messages for classify stream
     @param src_port - source port of UDP session for classify stream
 */
-define set_ipfix_classify_stream {
+autoreply define set_ipfix_classify_stream {
     u32 client_index;
     u32 context;
     u32 domain_id;
     u16 src_port;
 };
 
-/** \brief IPFIX classify stream configure response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define set_ipfix_classify_stream_reply {
-    u32 context;
-    i32 retval;
-};
-
 /** \brief IPFIX classify stream dump request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -127,7 +109,7 @@ define ipfix_classify_stream_details {
     @param ip_version - version of IP used in the classifier table
     @param transport_protocol - transport protocol used in the classifier table or 255 for unspecified
 */
-define ipfix_classify_table_add_del {
+autoreply define ipfix_classify_table_add_del {
     u32 client_index;
     u32 context;
     u32 table_id;
@@ -136,14 +118,6 @@ define ipfix_classify_table_add_del {
     u8 is_add;
 };
 
-/** \brief IPFIX add classifier table response
-    @param context - sender context which was passed in the request
-*/
-define ipfix_classify_table_add_del_reply {
-    u32 context;
-    i32 retval;
-};
-
 /** \brief IPFIX classify tables dump request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
diff --git a/src/vnet/interface.api b/src/vnet/interface.api
index 85fd73fb..9df63f18 100644
--- a/src/vnet/interface.api
+++ b/src/vnet/interface.api
@@ -6,7 +6,7 @@
     @param link_up_down - Oper state sent on change event, not used in config.
     @param deleted - interface was deleted
 */
-define sw_interface_set_flags
+autoreply define sw_interface_set_flags
 {
   u32 client_index;
   u32 context;
@@ -17,23 +17,13 @@ define sw_interface_set_flags
   u8 deleted;
 };
 
-/** \brief Reply to sw_interface_set_flags 
-    @param context - sender context which was passed in the request
-    @param retval - return code of the set flags request
-*/
-define sw_interface_set_flags_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set interface MTU
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param sw_if_index - index of the interface to set MTU on
     @param mtu - MTU
 */
-define sw_interface_set_mtu
+autoreply define sw_interface_set_mtu
 {
   u32 client_index;
   u32 context;
@@ -41,23 +31,13 @@ define sw_interface_set_mtu
   u16 mtu;
 };
 
-/** \brief Reply to sw_interface_set_mtu
-    @param context - sender context which was passed in the request
-    @param retval - return code of the set flags request
-*/
-define sw_interface_set_mtu_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Register for interface events
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param enable_disable - 1 => register for events, 0 => cancel registration
     @param pid - sender's pid
 */
-define want_interface_events
+autoreply define want_interface_events
 {
   u32 client_index;
   u32 context;
@@ -65,16 +45,6 @@ define want_interface_events
   u32 pid;
 };
 
-/** \brief Reply for interface events registration
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define want_interface_events_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Interface details structure (fix this) 
     @param sw_if_index - index of the interface
     @param sup_sw_if_index - index of parent interface if any, else same as sw_if_index  
@@ -184,7 +154,7 @@ define sw_interface_dump
     @param address_length - address length in bytes, 4 for ip4, 16 for ip6
     @param address - array of address bytes
 */
-define sw_interface_add_del_address
+autoreply define sw_interface_add_del_address
 {
   u32 client_index;
   u32 context;
@@ -196,16 +166,6 @@ define sw_interface_add_del_address
   u8 address[16];
 };
 
-/** \brief Reply to sw_interface_add_del_address
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define sw_interface_add_del_address_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Associate the specified interface with a fib table
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -213,7 +173,7 @@ define sw_interface_add_del_address_reply
     @param is_ipv6 - if non-zero ipv6, else ipv4
     @param vrf_id - fib table/vrd id to associate the interface with
 */
-define sw_interface_set_table
+autoreply define sw_interface_set_table
 {
   u32 client_index;
   u32 context;
@@ -222,16 +182,6 @@ define sw_interface_set_table
   u32 vrf_id;
 };
 
-/** \brief Reply to sw_interface_set_table
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define sw_interface_set_table_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Get VRF id assigned to interface
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -280,7 +230,7 @@ define vnet_interface_counters
     @param unnumbered_sw_if_index - interface which will use the address
     @param is_add - if non-zero set the association, else unset it
 */
-define sw_interface_set_unnumbered
+autoreply define sw_interface_set_unnumbered
 {
   u32 client_index;
   u32 context;
@@ -289,38 +239,18 @@ define sw_interface_set_unnumbered
   u8 is_add;
 };
 
-/** \brief Set unnumbered interface add / del response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define sw_interface_set_unnumbered_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Clear interface statistics
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param sw_if_index - index of the interface to clear statistics
 */
-define sw_interface_clear_stats
+autoreply define sw_interface_clear_stats
 {
   u32 client_index;
   u32 context;
   u32 sw_if_index;
 };
 
-/** \brief Reply to sw_interface_clear_stats 
-    @param context - sender context which was passed in the request
-    @param retval - return code of the set flags request
-*/
-define sw_interface_clear_stats_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set / clear software interface tag
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -328,7 +258,7 @@ define sw_interface_clear_stats_reply
     @param add_del - 1 = add, 0 = delete
     @param tag - an ascii tag
 */
-define sw_interface_tag_add_del 
+autoreply define sw_interface_tag_add_del 
 {
     u32 client_index;
     u32 context;
@@ -337,23 +267,13 @@ define sw_interface_tag_add_del
     u8 tag[64];
 };
 
-/** \brief Reply to set / clear software interface tag
-    @param context - sender context which was passed in the request
-    @param retval - return code for the request
-*/
-define sw_interface_tag_add_del_reply
-{
-    u32 context;
-    i32 retval;
-};
-
 /** \brief Set an interface's MAC address
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param sw_if_index - the interface whose MAC will be set
     @param mac_addr - the new MAC address
 */
-define sw_interface_set_mac_address
+autoreply define sw_interface_set_mac_address
 {
     u32 client_index;
     u32 context;
@@ -361,16 +281,6 @@ define sw_interface_set_mac_address
     u8 mac_address[6];
 };
 
-/** \brief Reply to setting an interface MAC address request
-    @param context - sender context which was passed in the request
-    @param retval - return code for the request
-*/
-define sw_interface_set_mac_address_reply
-{
-    u32 context;
-    i32 retval;
-};
-
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index 6af1714f..7097a130 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -136,7 +136,7 @@ define ip_neighbor_details {
     @param mac_address - l2 address of the neighbor
     @param dst_address - ip4 or ip6 address of the neighbor
 */
-define ip_neighbor_add_del
+autoreply define ip_neighbor_add_del
 {
   u32 client_index;
   u32 context;
@@ -150,16 +150,6 @@ define ip_neighbor_add_del
   u8 dst_address[16];
 };
 
-/** \brief Reply for IP Neighbor add / delete request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ip_neighbor_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set the ip flow hash config for a fib request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -172,7 +162,7 @@ define ip_neighbor_add_del_reply
     @param proto -if non-zero include proto in flow hash
     @param reverse - if non-zero include reverse in flow hash
 */
-define set_ip_flow_hash
+autoreply define set_ip_flow_hash
 {
   u32 client_index;
   u32 context;
@@ -186,16 +176,6 @@ define set_ip_flow_hash
   u8 reverse;
 };
 
-/** \brief Set the ip flow hash config for a fib response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define set_ip_flow_hash_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPv6 router advertisement config request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -213,7 +193,7 @@ define set_ip_flow_hash_reply
     @param initial_count -
     @param initial_interval -
 */
-define sw_interface_ip6nd_ra_config
+autoreply define sw_interface_ip6nd_ra_config
 {
   u32 client_index;
   u32 context;
@@ -233,16 +213,6 @@ define sw_interface_ip6nd_ra_config
   u32 initial_interval;
 };
 
-/** \brief IPv6 router advertisement config response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define sw_interface_ip6nd_ra_config_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPv6 router advertisement prefix config request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -272,7 +242,7 @@ define sw_interface_ip6nd_ra_config_reply
                      preferred [ADDRCONF].  A value of all one bits
                      (0xffffffff) represents infinity.
 */
-define sw_interface_ip6nd_ra_prefix
+autoreply define sw_interface_ip6nd_ra_prefix
 {
   u32 client_index;
   u32 context;
@@ -289,16 +259,6 @@ define sw_interface_ip6nd_ra_prefix
   u32 pref_lifetime;
 };
 
-/** \brief IPv6 router advertisement prefix config response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define sw_interface_ip6nd_ra_prefix_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPv6 ND proxy config
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -306,7 +266,7 @@ define sw_interface_ip6nd_ra_prefix_reply
     @param address - The address of the host for which to proxy for
     @param is_add - Adding or deleting
 */
-define ip6nd_proxy_add_del
+autoreply define ip6nd_proxy_add_del
 {
   u32 client_index;
   u32 context;
@@ -315,16 +275,6 @@ define ip6nd_proxy_add_del
   u8 address[16];
 };
 
-/** \brief IPv6 ND proxy response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define ip6nd_proxy_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPv6 ND proxy details returned after request
     @param context - sender context, to match reply w/ request
     @param retval - return code for the request
@@ -355,7 +305,7 @@ define ip6nd_proxy_dump
     @param sw_if_index - interface used to reach neighbor
     @param enable - if non-zero enable ip6 on interface, else disable
 */
-define sw_interface_ip6_enable_disable
+autoreply define sw_interface_ip6_enable_disable
 {
   u32 client_index;
   u32 context;
@@ -363,23 +313,13 @@ define sw_interface_ip6_enable_disable
   u8 enable;			/* set to true if enable */
 };
 
-/** \brief IPv6 interface enable / disable response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define sw_interface_ip6_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPv6 set link local address on interface request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param sw_if_index - interface to set link local on
     @param address[] - the new link local address
 */
-define sw_interface_ip6_set_link_local_address
+autoreply define sw_interface_ip6_set_link_local_address
 {
   u32 client_index;
   u32 context;
@@ -387,16 +327,6 @@ define sw_interface_ip6_set_link_local_address
   u8 address[16];
 };
 
-/** \brief IPv6 set link local address on interface response
-    @param context - sender context, to match reply w/ request
-    @param retval - error code for the request
-*/
-define sw_interface_ip6_set_link_local_address_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Add / del route request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -422,7 +352,7 @@ define sw_interface_ip6_set_link_local_address_reply
     @param next_hop_out_label_stack - the next-hop output label stack, outer most first
     @param next_hop_via_label - The next-hop is a resolved via a local label
 */
-define ip_add_del_route
+autoreply define ip_add_del_route
 {
   u32 client_index;
   u32 context;
@@ -452,16 +382,6 @@ define ip_add_del_route
   u32 next_hop_out_label_stack[next_hop_n_out_labels];
 };
 
-/** \brief Reply for add / del route request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ip_add_del_route_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Add / del route request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -470,7 +390,7 @@ define ip_add_del_route_reply
 
     FIXME
 */
-define ip_mroute_add_del
+autoreply define ip_mroute_add_del
 {
   u32 client_index;
   u32 context;
@@ -488,16 +408,6 @@ define ip_mroute_add_del
   u8 src_address[16];
 };
 
-/** \brief Reply for add / del mroute request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ip_mroute_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump IP multicast fib table
     @param client_index - opaque cookie to identify the sender
 */
diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api
index ef090f84..203c5272 100644
--- a/src/vnet/ipsec/ipsec.api
+++ b/src/vnet/ipsec/ipsec.api
@@ -20,7 +20,7 @@
     @param spd_id - SPD instance id (control plane allocated)
 */
 
-define ipsec_spd_add_del
+autoreply define ipsec_spd_add_del
 {
   u32 client_index;
   u32 context;
@@ -28,17 +28,6 @@ define ipsec_spd_add_del
   u32 spd_id;
 };
 
-/** \brief Reply for IPsec: Add/delete Security Policy Database entry
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-
-define ipsec_spd_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPsec: Add/delete SPD from interface
 
     @param client_index - opaque cookie to identify the sender
@@ -49,7 +38,7 @@ define ipsec_spd_add_del_reply
 */
 
 
-define ipsec_interface_add_del_spd
+autoreply define ipsec_interface_add_del_spd
 {
   u32 client_index;
   u32 context;
@@ -59,17 +48,6 @@ define ipsec_interface_add_del_spd
   u32 spd_id;
 };
 
-/** \brief Reply for IPsec: Add/delete SPD from interface
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-
-define ipsec_interface_add_del_spd_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPsec: Add/delete Security Policy Database entry
 
     See RFC 4301, 4.4.1.1 on how to match packet to selectors
@@ -95,7 +73,7 @@ define ipsec_interface_add_del_spd_reply
 
 */
 
-define ipsec_spd_add_del_entry
+autoreply define ipsec_spd_add_del_entry
 {
   u32 client_index;
   u32 context;
@@ -125,17 +103,6 @@ define ipsec_spd_add_del_entry
   u32 sa_id;
 };
 
-/** \brief Reply for IPsec: Add/delete Security Policy Database entry
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-
-define ipsec_spd_add_del_entry_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPsec: Add/delete Security Association Database entry
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -167,7 +134,7 @@ define ipsec_spd_add_del_entry_reply
      IPsec tunnel address copy mode (to support GDOI)
  */
 
-define ipsec_sad_add_del_entry
+autoreply define ipsec_sad_add_del_entry
 {
   u32 client_index;
   u32 context;
@@ -195,17 +162,6 @@ define ipsec_sad_add_del_entry
   u8 tunnel_dst_address[16];
 };
 
-/** \brief Reply for IPsec: Add/delete Security Association Database entry
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-
-define ipsec_sad_add_del_entry_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPsec: Update Security Association keys
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -219,7 +175,7 @@ define ipsec_sad_add_del_entry_reply
     @param integrity_key - integrity keying material
 */
 
-define ipsec_sa_set_key
+autoreply define ipsec_sa_set_key
 {
   u32 client_index;
   u32 context;
@@ -233,17 +189,6 @@ define ipsec_sa_set_key
   u8 integrity_key[128];
 };
 
-/** \brief Reply for IPsec: Update Security Association keys
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-
-define ipsec_sa_set_key_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Add/delete profile
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -251,7 +196,7 @@ define ipsec_sa_set_key_reply
     @param name - IKEv2 profile name
     @param is_add - Add IKEv2 profile if non-zero, else delete
 */
-define ikev2_profile_add_del
+autoreply define ikev2_profile_add_del
 {
   u32 client_index;
   u32 context;
@@ -260,16 +205,6 @@ define ikev2_profile_add_del
   u8 is_add;
 };
 
-/** \brief Reply for IKEv2: Add/delete profile
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_profile_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Set IKEv2 profile authentication method
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -280,7 +215,7 @@ define ikev2_profile_add_del_reply
     @param data_len - Authentication data length
     @param data - Authentication data (for rsa-sig cert file path)
 */
-define ikev2_profile_set_auth
+autoreply define ikev2_profile_set_auth
 {
   u32 client_index;
   u32 context;
@@ -292,16 +227,6 @@ define ikev2_profile_set_auth
   u8 data[0];
 };
 
-/** \brief Reply for IKEv2: Set IKEv2 profile authentication method
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_profile_set_auth_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Set IKEv2 profile local/remote identification
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -312,7 +237,7 @@ define ikev2_profile_set_auth_reply
     @param data_len - Identification data length
     @param data - Identification data
 */
-define ikev2_profile_set_id
+autoreply define ikev2_profile_set_id
 {
   u32 client_index;
   u32 context;
@@ -324,16 +249,6 @@ define ikev2_profile_set_id
   u8 data[0];
 };
 
-/** \brief Reply for IKEv2:
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_profile_set_id_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Set IKEv2 profile traffic selector parameters
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -346,7 +261,7 @@ define ikev2_profile_set_id_reply
     @param start_addr - The smallest address included in traffic selector
     @param end_addr - The largest address included in traffic selector
 */
-define ikev2_profile_set_ts
+autoreply define ikev2_profile_set_ts
 {
   u32 client_index;
   u32 context;
@@ -360,23 +275,13 @@ define ikev2_profile_set_ts
   u32 end_addr;
 };
 
-/** \brief Reply for IKEv2: Set IKEv2 profile traffic selector parameters
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_profile_set_ts_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Set IKEv2 local RSA private key
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
 
     @param key_file - Key file absolute path
 */
-define ikev2_set_local_key
+autoreply define ikev2_set_local_key
 {
   u32 client_index;
   u32 context;
@@ -384,16 +289,6 @@ define ikev2_set_local_key
   u8 key_file[256];
 };
 
-/** \brief Reply for IKEv2: Set IKEv2 local key
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_set_local_key_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Set IKEv2 responder interface and IP address
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -402,7 +297,7 @@ define ikev2_set_local_key_reply
     @param sw_if_index - interface index
     @param address - interface address
 */
-define ikev2_set_responder
+autoreply define ikev2_set_responder
 {
   u32 client_index;
   u32 context;
@@ -412,17 +307,6 @@ define ikev2_set_responder
   u8 address[4];
 };
 
-/** \brief Reply for IKEv2: Set IKEv2 responder interface and IP address
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_set_responder_reply
-{
-  u32 context;
-  i32 retval;
-};
-
-
 /** \brief IKEv2: Set IKEv2 IKE transforms in SA_INIT proposal (RFC 7296)
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -434,7 +318,7 @@ define ikev2_set_responder_reply
     @param dh_group - Diffie-Hellman group
     
 */
-define ikev2_set_ike_transforms
+autoreply define ikev2_set_ike_transforms
 {
   u32 client_index;
   u32 context;
@@ -446,16 +330,6 @@ define ikev2_set_ike_transforms
   u32 dh_group;
 };
 
-/** \brief Reply for IKEv2: Set IKEv2 IKE transforms
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_set_ike_transforms_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Set IKEv2 ESP transforms in SA_INIT proposal (RFC 7296)
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -467,7 +341,7 @@ define ikev2_set_ike_transforms_reply
     @param dh_group - Diffie-Hellman group
     
 */
-define ikev2_set_esp_transforms
+autoreply define ikev2_set_esp_transforms
 {
   u32 client_index;
   u32 context;
@@ -479,16 +353,6 @@ define ikev2_set_esp_transforms
   u32 dh_group;
 };
 
-/** \brief Reply for IKEv2: Set IKEv2 ESP transforms
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_set_esp_transforms_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Set Child SA lifetime, limited by time and/or data
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -500,7 +364,7 @@ define ikev2_set_esp_transforms_reply
     @param lifetime_maxdata - SA maximum life time in bytes (0 to disable)
     
 */
-define ikev2_set_sa_lifetime
+autoreply define ikev2_set_sa_lifetime
 {
   u32 client_index;
   u32 context;
@@ -512,16 +376,6 @@ define ikev2_set_sa_lifetime
   u64 lifetime_maxdata;
 };
 
-/** \brief Reply for IKEv2: Set Child SA lifetime
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_set_sa_lifetime_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Initiate the SA_INIT exchange
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -529,7 +383,7 @@ define ikev2_set_sa_lifetime_reply
     @param name - IKEv2 profile name
     
 */
-define ikev2_initiate_sa_init
+autoreply define ikev2_initiate_sa_init
 {
   u32 client_index;
   u32 context;
@@ -537,16 +391,6 @@ define ikev2_initiate_sa_init
   u8 name[64];
 };
 
-/** \brief Reply for IKEv2: Initiate the SA_INIT exchange
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_initiate_sa_init_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Initiate the delete IKE SA exchange
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -554,7 +398,7 @@ define ikev2_initiate_sa_init_reply
     @param ispi - IKE SA initiator SPI
     
 */
-define ikev2_initiate_del_ike_sa
+autoreply define ikev2_initiate_del_ike_sa
 {
   u32 client_index;
   u32 context;
@@ -562,16 +406,6 @@ define ikev2_initiate_del_ike_sa
   u64 ispi;
 };
 
-/** \brief Reply for IKEv2: Initiate the delete IKE SA exchange
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_initiate_del_ike_sa_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Initiate the delete Child SA exchange
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -579,7 +413,7 @@ define ikev2_initiate_del_ike_sa_reply
     @param ispi - Child SA initiator SPI
     
 */
-define ikev2_initiate_del_child_sa
+autoreply define ikev2_initiate_del_child_sa
 {
   u32 client_index;
   u32 context;
@@ -587,16 +421,6 @@ define ikev2_initiate_del_child_sa
   u32 ispi;
 };
 
-/** \brief Reply for IKEv2: Initiate the delete Child SA exchange
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_initiate_del_child_sa_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IKEv2: Initiate the rekey Child SA exchange
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -604,7 +428,7 @@ define ikev2_initiate_del_child_sa_reply
     @param ispi - Child SA initiator SPI
     
 */
-define ikev2_initiate_rekey_child_sa
+autoreply define ikev2_initiate_rekey_child_sa
 {
   u32 client_index;
   u32 context;
@@ -612,16 +436,6 @@ define ikev2_initiate_rekey_child_sa
   u32 ispi;
 };
 
-/** \brief Reply for IKEv2: Initiate the rekey Child SA exchange
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ikev2_initiate_rekey_child_sa_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump ipsec policy database data
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -682,4 +496,4 @@ define ipsec_spd_details {
  * eval: (c-set-style "gnu")
  * End:
  */
- 
\ No newline at end of file
+ 
diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api
index c23eebec..db42d635 100644
--- a/src/vnet/l2/l2.api
+++ b/src/vnet/l2/l2.api
@@ -70,66 +70,36 @@ define l2_fib_table_dump
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
 */
-define l2_fib_clear_table
+autoreply define l2_fib_clear_table
 {
   u32 client_index;
   u32 context;
 };
 
-/** \brief L2 fib clear table response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define l2_fib_clear_table_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief L2 FIB flush bridge domain entries
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param bd_id - the entry's bridge domain id
 */
-define l2fib_flush_bd
+autoreply define l2fib_flush_bd
 {
   u32 client_index;
   u32 context;
   u32 bd_id;
 };
 
-/** \brief L2 FIB flush bridge domain entries response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define l2fib_flush_bd_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief L2 FIB flush interface entries
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param bd_id - the entry's bridge domain id
 */
-define l2fib_flush_int
+autoreply define l2fib_flush_int
 {
   u32 client_index;
   u32 context;
   u32 sw_if_index;
 };
 
-/** \brief L2 FIB flush interface entries response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define l2fib_flush_int_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief L2 FIB add entry request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -140,7 +110,7 @@ define l2fib_flush_int_reply
     @param static_mac - 
     @param filter_mac -
 */
-define l2fib_add_del
+autoreply define l2fib_add_del
 {
   u32 client_index;
   u32 context;
@@ -153,16 +123,6 @@ define l2fib_add_del
   u8 bvi_mac;
 };
 
-/** \brief L2 FIB add entry response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the add l2fib entry request
-*/
-define l2fib_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set L2 flags request !!! TODO - need more info, feature bits in l2_input.h
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -196,7 +156,7 @@ define l2_flags_reply
     @param bd_id - the bridge domain to create
     @param mac_age - mac aging time in min, 0 for disabled
 */
-define bridge_domain_set_mac_age
+autoreply define bridge_domain_set_mac_age
 {
   u32 client_index;
   u32 context;
@@ -204,16 +164,6 @@ define bridge_domain_set_mac_age
   u8 mac_age;
 };
 
-/** \brief Set bridge domain response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the set l2 bits request
-*/
-define bridge_domain_set_mac_age_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief L2 bridge domain add or delete request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -226,7 +176,7 @@ define bridge_domain_set_mac_age_reply
     @param mac_age - mac aging time in min, 0 for disabled
     @param is_add - add or delete flag
 */
-define bridge_domain_add_del
+autoreply define bridge_domain_add_del
 {
   u32 client_index;
   u32 context;
@@ -240,16 +190,6 @@ define bridge_domain_add_del
   u8 is_add;
 };
 
-/** \brief L2 bridge domain add or delete response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the set bridge flags request
-*/
-define bridge_domain_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief L2 bridge domain request operational state details
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -337,7 +277,7 @@ define bridge_flags_reply
     @param tag1 - Needed for any push or translate vtr op
     @param tag2 - Needed for any push 2 or translate x-2 vtr ops
 */
-define l2_interface_vlan_tag_rewrite
+autoreply define l2_interface_vlan_tag_rewrite
 {
   u32 client_index;
   u32 context;
@@ -348,16 +288,6 @@ define l2_interface_vlan_tag_rewrite
   u32 tag2;			// second pushed tag
 };
 
-/** \brief L2 interface vlan tag rewrite response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define l2_interface_vlan_tag_rewrite_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief L2 interface pbb tag rewrite configure request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -370,7 +300,7 @@ define l2_interface_vlan_tag_rewrite_reply
     @param b_vlanid - B-tag vlanid, needed for any push or translate qinq vtr op
     @param i_sid - I-tag service id, needed for any push or translate qinq vtr op
 */
-define l2_interface_pbb_tag_rewrite
+autoreply define l2_interface_pbb_tag_rewrite
 {
   u32 client_index;
   u32 context;
@@ -383,16 +313,6 @@ define l2_interface_pbb_tag_rewrite
   u32 i_sid;
 };
 
-/** \brief L2 interface pbb tag rewrite response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define l2_interface_pbb_tag_rewrite_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/l2tp/l2tp.api b/src/vnet/l2tp/l2tp.api
index 5a5a5a48..4587a807 100644
--- a/src/vnet/l2tp/l2tp.api
+++ b/src/vnet/l2tp/l2tp.api
@@ -52,7 +52,7 @@ define l2tpv3_create_tunnel_reply
   u32 sw_if_index;
 };
 
-define l2tpv3_set_tunnel_cookies
+autoreply define l2tpv3_set_tunnel_cookies
 {
   u32 client_index;
   u32 context;
@@ -61,16 +61,6 @@ define l2tpv3_set_tunnel_cookies
   u64 new_remote_cookie;
 };
 
-/** \brief L2TP tunnel set cookies response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define l2tpv3_set_tunnel_cookies_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 define sw_if_l2tpv3_tunnel_details
 {
   u32 context;
@@ -91,7 +81,7 @@ define sw_if_l2tpv3_tunnel_dump
   u32 context;
 };
 
-define l2tpv3_interface_enable_disable
+autoreply define l2tpv3_interface_enable_disable
 {
   u32 client_index;
   u32 context;
@@ -99,13 +89,7 @@ define l2tpv3_interface_enable_disable
   u32 sw_if_index;
 };
 
-define l2tpv3_interface_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
-define l2tpv3_set_lookup_key
+autoreply define l2tpv3_set_lookup_key
 {
   u32 client_index;
   u32 context;
@@ -113,12 +97,6 @@ define l2tpv3_set_lookup_key
   u8 key;
 };
 
-define l2tpv3_set_lookup_key_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/lisp-cp/lisp.api b/src/vnet/lisp-cp/lisp.api
index a50a5ccb..8bed71b3 100644
--- a/src/vnet/lisp-cp/lisp.api
+++ b/src/vnet/lisp-cp/lisp.api
@@ -59,7 +59,7 @@ define lisp_add_del_locator_set_reply
     @param priority - priority of the lisp locator
     @param weight - weight of the lisp locator
 */
-define lisp_add_del_locator
+autoreply define lisp_add_del_locator
 {
   u32 client_index;
   u32 context;
@@ -70,16 +70,6 @@ define lisp_add_del_locator
   u8 weight;
 };
 
-/** \brief Reply for locator add/del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_add_del_locator_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief add or delete lisp eid-table
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -98,7 +88,7 @@ define lisp_add_del_locator_reply
       HMAC_SHA_256_128      2
     @param key - secret key
 */
-define lisp_add_del_local_eid
+autoreply define lisp_add_del_local_eid
 {
   u32 client_index;
   u32 context;
@@ -112,16 +102,6 @@ define lisp_add_del_local_eid
   u8 key[64];
 };
 
-/** \brief Reply for local_eid add/del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_add_del_local_eid_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Add/delete map server
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -129,7 +109,7 @@ define lisp_add_del_local_eid_reply
     @param is_ipv6 - if non-zero the address is ipv6, else ipv4
     @param ip_address - map server IP address
 */
-define lisp_add_del_map_server
+autoreply define lisp_add_del_map_server
 {
   u32 client_index;
   u32 context;
@@ -138,16 +118,6 @@ define lisp_add_del_map_server
   u8 ip_address[16];
 };
 
-/** \brief Reply for lisp_add_del_map_server
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_add_del_map_server_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief add or delete map-resolver
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -155,7 +125,7 @@ define lisp_add_del_map_server_reply
     @param is_ipv6 - if non-zero the address is ipv6, else ipv4
     @param ip_address - array of address bytes
 */
-define lisp_add_del_map_resolver
+autoreply define lisp_add_del_map_resolver
 {
   u32 client_index;
   u32 context;
@@ -164,45 +134,25 @@ define lisp_add_del_map_resolver
   u8 ip_address[16];
 };
 
-/** \brief Reply for map_resolver add/del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_add_del_map_resolver_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief enable or disable LISP feature
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_en - enable protocol if non-zero, else disable
 */
-define lisp_enable_disable
+autoreply define lisp_enable_disable
 {
   u32 client_index;
   u32 context;
   u8 is_en;
 };
 
-/** \brief Reply for gpe enable/disable
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief configure or disable LISP PITR node
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param ls_name - locator set name
     @param is_add - add locator set if non-zero, else disable pitr
 */
-define lisp_pitr_set_locator_set
+autoreply define lisp_pitr_set_locator_set
 {
   u32 client_index;
   u32 context;
@@ -210,16 +160,6 @@ define lisp_pitr_set_locator_set
   u8 ls_name[64];
 };
 
-/** \brief Reply for lisp_pitr_set_locator_set
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_pitr_set_locator_set_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief configure or disable use of PETR
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -227,7 +167,7 @@ define lisp_pitr_set_locator_set_reply
     @param address - PETR IP address
     @param is_add - add locator set if non-zero, else disable pitr
 */
-define lisp_use_petr
+autoreply define lisp_use_petr
 {
   u32 client_index;
   u32 context;
@@ -236,16 +176,6 @@ define lisp_use_petr
   u8 is_add;
 };
 
-/** \brief Reply for lisp_pitr_set_locator_set
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_use_petr_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Request for LISP PETR status
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -298,45 +228,25 @@ define show_lisp_rloc_probe_state_reply
     @param context - sender context, to match reply w/ request
     @param is_enable - enable if non-zero; disable otherwise
 */
-define lisp_rloc_probe_enable_disable
+autoreply define lisp_rloc_probe_enable_disable
 {
   u32 client_index;
   u32 context;
   u8 is_enabled;
 };
 
-/** \brief Reply for lisp_rloc_probe_enable_disable
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_rloc_probe_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief enable/disable LISP map-register
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_enable - enable if non-zero; disable otherwise
 */
-define lisp_map_register_enable_disable
+autoreply define lisp_map_register_enable_disable
 {
   u32 client_index;
   u32 context;
   u8 is_enabled;
 };
 
-/** \brief Reply for lisp_map_register_enable_disable
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_map_register_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Get state of LISP map-register
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -366,23 +276,13 @@ define show_lisp_map_register_state_reply
       0 - destination only
       1 - source/destaination
 */
-define lisp_map_request_mode
+autoreply define lisp_map_request_mode
 {
   u32 client_index;
   u32 context;
   u8 mode;
 };
 
-/** \brief Reply for lisp_map_request_mode
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_map_request_mode_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Request for LISP map-request mode
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -430,7 +330,7 @@ typeonly manual_endian manual_print define remote_locator
     @param rloc_num - number of remote locators
     @param rlocs - remote locator records
 */
-manual_print manual_endian define lisp_add_del_remote_mapping
+autoreply manual_print manual_endian define lisp_add_del_remote_mapping
 {
   u32 client_index;
   u32 context;
@@ -448,16 +348,6 @@ manual_print manual_endian define lisp_add_del_remote_mapping
   vl_api_remote_locator_t rlocs[rloc_num];
 };
 
-/** \brief Reply for lisp_add_del_remote_mapping
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_add_del_remote_mapping_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief add or delete LISP adjacency adjacency
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -470,7 +360,7 @@ define lisp_add_del_remote_mapping_reply
     @param reid - remote EID
     @param leid - local EID
 */
-define lisp_add_del_adjacency
+autoreply define lisp_add_del_adjacency
 {
   u32 client_index;
   u32 context;
@@ -483,23 +373,13 @@ define lisp_add_del_adjacency
   u8 leid_len;
 };
 
-/** \brief Reply for lisp_add_del_adjacency
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_add_del_adjacency_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief add or delete map request itr rlocs
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_add - add address if non-zero, else delete
     @param locator_set_name - locator set name
 */
-define lisp_add_del_map_request_itr_rlocs
+autoreply define lisp_add_del_map_request_itr_rlocs
 {
   u32 client_index;
   u32 context;
@@ -512,12 +392,6 @@ define lisp_add_del_map_request_itr_rlocs
     @param retval - return code
 */
 
-define lisp_add_del_map_request_itr_rlocs_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief map/unmap vni/bd_index to vrf
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -525,7 +399,7 @@ define lisp_add_del_map_request_itr_rlocs_reply
     @param dp_table - virtual network id/bridge domain index
     @param vrf - vrf
 */
-define lisp_eid_table_add_del_map
+autoreply define lisp_eid_table_add_del_map
 {
   u32 client_index;
   u32 context;
@@ -535,16 +409,6 @@ define lisp_eid_table_add_del_map
   u8 is_l2;
 };
 
-/** \brief Reply for lisp_eid_table_add_del_map
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define lisp_eid_table_add_del_map_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Request for map lisp locator status
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
diff --git a/src/vnet/lisp-cp/one.api b/src/vnet/lisp-cp/one.api
index ca82f694..2fa1edf6 100644
--- a/src/vnet/lisp-cp/one.api
+++ b/src/vnet/lisp-cp/one.api
@@ -59,7 +59,7 @@ define one_add_del_locator_set_reply
     @param priority - priority of the locator
     @param weight - weight of the locator
 */
-define one_add_del_locator
+autoreply define one_add_del_locator
 {
   u32 client_index;
   u32 context;
@@ -70,16 +70,6 @@ define one_add_del_locator
   u8 weight;
 };
 
-/** \brief Reply for locator add/del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_add_del_locator_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief add or delete ONE eid-table
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -98,7 +88,7 @@ define one_add_del_locator_reply
       HMAC_SHA_256_128      2
     @param key - secret key
 */
-define one_add_del_local_eid
+autoreply define one_add_del_local_eid
 {
   u32 client_index;
   u32 context;
@@ -112,16 +102,6 @@ define one_add_del_local_eid
   u8 key[64];
 };
 
-/** \brief Reply for local_eid add/del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_add_del_local_eid_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Add/delete map server
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -129,7 +109,7 @@ define one_add_del_local_eid_reply
     @param is_ipv6 - if non-zero the address is ipv6, else ipv4
     @param ip_address - map server IP address
 */
-define one_add_del_map_server
+autoreply define one_add_del_map_server
 {
   u32 client_index;
   u32 context;
@@ -138,16 +118,6 @@ define one_add_del_map_server
   u8 ip_address[16];
 };
 
-/** \brief Reply for one_add_del_map_server
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_add_del_map_server_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief add or delete map-resolver
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -155,7 +125,7 @@ define one_add_del_map_server_reply
     @param is_ipv6 - if non-zero the address is ipv6, else ipv4
     @param ip_address - array of address bytes
 */
-define one_add_del_map_resolver
+autoreply define one_add_del_map_resolver
 {
   u32 client_index;
   u32 context;
@@ -164,45 +134,25 @@ define one_add_del_map_resolver
   u8 ip_address[16];
 };
 
-/** \brief Reply for map_resolver add/del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_add_del_map_resolver_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief enable or disable ONE feature
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_en - enable protocol if non-zero, else disable
 */
-define one_enable_disable
+autoreply define one_enable_disable
 {
   u32 client_index;
   u32 context;
   u8 is_en;
 };
 
-/** \brief Reply for gpe enable/disable
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief configure or disable ONE PITR node
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param ls_name - locator set name
     @param is_add - add locator set if non-zero, else disable pitr
 */
-define one_pitr_set_locator_set
+autoreply define one_pitr_set_locator_set
 {
   u32 client_index;
   u32 context;
@@ -210,16 +160,6 @@ define one_pitr_set_locator_set
   u8 ls_name[64];
 };
 
-/** \brief Reply for one_pitr_set_locator_set
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_pitr_set_locator_set_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief configure or disable use of PETR
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -227,7 +167,7 @@ define one_pitr_set_locator_set_reply
     @param address - PETR IP address
     @param is_add - add locator set if non-zero, else disable PETR
 */
-define one_use_petr
+autoreply define one_use_petr
 {
   u32 client_index;
   u32 context;
@@ -236,16 +176,6 @@ define one_use_petr
   u8 is_add;
 };
 
-/** \brief Reply for one_use_petr
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_use_petr_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Request for ONE PETR status
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -298,45 +228,25 @@ define show_one_rloc_probe_state_reply
     @param context - sender context, to match reply w/ request
     @param is_enable - enable if non-zero; disable otherwise
 */
-define one_rloc_probe_enable_disable
+autoreply define one_rloc_probe_enable_disable
 {
   u32 client_index;
   u32 context;
   u8 is_enabled;
 };
 
-/** \brief Reply for one_rloc_probe_enable_disable
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_rloc_probe_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief enable/disable ONE map-register
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_enable - enable if non-zero; disable otherwise
 */
-define one_map_register_enable_disable
+autoreply define one_map_register_enable_disable
 {
   u32 client_index;
   u32 context;
   u8 is_enabled;
 };
 
-/** \brief Reply for one_map_register_enable_disable
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_map_register_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Get state of ONE map-register
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -366,23 +276,13 @@ define show_one_map_register_state_reply
       0 - destination only
       1 - source/destaination
 */
-define one_map_request_mode
+autoreply define one_map_request_mode
 {
   u32 client_index;
   u32 context;
   u8 mode;
 };
 
-/** \brief Reply for one_map_request_mode
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_map_request_mode_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Request for ONE map-request mode
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -430,7 +330,7 @@ typeonly manual_endian manual_print define one_remote_locator
     @param rloc_num - number of remote locators
     @param rlocs - remote locator records
 */
-manual_print manual_endian define one_add_del_remote_mapping
+autoreply manual_print manual_endian define one_add_del_remote_mapping
 {
   u32 client_index;
   u32 context;
@@ -448,16 +348,6 @@ manual_print manual_endian define one_add_del_remote_mapping
   vl_api_one_remote_locator_t rlocs[rloc_num];
 };
 
-/** \brief Reply for one_add_del_remote_mapping
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_add_del_remote_mapping_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief add or delete ONE adjacency adjacency
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -470,7 +360,7 @@ define one_add_del_remote_mapping_reply
     @param reid - remote EID
     @param leid - local EID
 */
-define one_add_del_adjacency
+autoreply define one_add_del_adjacency
 {
   u32 client_index;
   u32 context;
@@ -483,23 +373,13 @@ define one_add_del_adjacency
   u8 leid_len;
 };
 
-/** \brief Reply for one_add_del_adjacency
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_add_del_adjacency_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief add or delete map request itr rlocs
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_add - add address if non-zero, else delete
     @param locator_set_name - locator set name
 */
-define one_add_del_map_request_itr_rlocs
+autoreply define one_add_del_map_request_itr_rlocs
 {
   u32 client_index;
   u32 context;
@@ -507,17 +387,6 @@ define one_add_del_map_request_itr_rlocs
   u8 locator_set_name[64];
 };
 
-/** \brief Reply for one_add_del_map_request_itr_rlocs
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-
-define one_add_del_map_request_itr_rlocs_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief map/unmap vni/bd_index to vrf
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -525,7 +394,7 @@ define one_add_del_map_request_itr_rlocs_reply
     @param dp_table - virtual network id/bridge domain index
     @param vrf - vrf
 */
-define one_eid_table_add_del_map
+autoreply define one_eid_table_add_del_map
 {
   u32 client_index;
   u32 context;
@@ -535,16 +404,6 @@ define one_eid_table_add_del_map
   u8 is_l2;
 };
 
-/** \brief Reply for one_eid_table_add_del_map
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define one_eid_table_add_del_map_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Request for map one locator status
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -901,31 +760,19 @@ define one_stats_details
   u32 bytes;
 };
 
-define one_stats_flush
+autoreply define one_stats_flush
 {
   u32 client_index;
   u32 context;
 };
 
-define one_stats_flush_reply
-{
-  u32 context;
-  i32 retval;
-};
-
-define one_stats_enable_disable
+autoreply define one_stats_enable_disable
 {
   u32 client_index;
   u32 context;
   u8 is_en;
 };
 
-define one_stats_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 define show_one_stats_enable_disable
 {
   u32 client_index;
diff --git a/src/vnet/lisp-gpe/lisp_gpe.api b/src/vnet/lisp-gpe/lisp_gpe.api
index 43a6a6cd..f79d18c1 100644
--- a/src/vnet/lisp-gpe/lisp_gpe.api
+++ b/src/vnet/lisp-gpe/lisp_gpe.api
@@ -43,7 +43,7 @@ typeonly manual_print manual_endian define gpe_locator
     @param loc_num - number of locators
     @param locs - array of remote locators
 */
-manual_print manual_endian define gpe_add_del_fwd_entry
+autoreply manual_print manual_endian define gpe_add_del_fwd_entry
 {
   u32 client_index;
   u32 context;
@@ -60,44 +60,24 @@ manual_print manual_endian define gpe_add_del_fwd_entry
   vl_api_gpe_locator_t locs[loc_num];
 };
 
-/** \brief Reply for gpe_fwd_entry add/del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define gpe_add_del_fwd_entry_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief enable or disable gpe protocol
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_en - enable protocol if non-zero, else disable
 */
-define gpe_enable_disable
+autoreply define gpe_enable_disable
 {
   u32 client_index;
   u32 context;
   u8 is_en;
 };
 
-/** \brief Reply for gpe enable/disable
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define gpe_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief add or delete gpe_iface
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_add - add address if non-zero, else delete
 */
-define gpe_add_del_iface
+autoreply define gpe_add_del_iface
 {
   u32 client_index;
   u32 context;
@@ -107,16 +87,6 @@ define gpe_add_del_iface
   u32 vni;
 };
 
-/** \brief Reply for gpe_iface add/del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define gpe_add_del_iface_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 define gpe_fwd_entries_get
 {
   u32 client_index;
@@ -163,23 +133,13 @@ manual_endian manual_print define gpe_fwd_entry_path_details
     @param context - sender context, to match reply w/ request
     @param mode - LISP (value 0) or VXLAN (value 1)
 */
-define gpe_set_encap_mode
+autoreply define gpe_set_encap_mode
 {
   u32 client_index;
   u32 context;
   u8 mode;
 };
 
-/** \brief Reply for set_encap_mode
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define gpe_set_encap_mode_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief get GPE encapsulation mode
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
diff --git a/src/vnet/map/map.api b/src/vnet/map/map.api
index 4e4be85e..d68f13f0 100644
--- a/src/vnet/map/map.api
+++ b/src/vnet/map/map.api
@@ -62,22 +62,13 @@ define map_add_domain_reply
     @param context - sender context, to match reply w/ request
     @param index - MAP Domain index
 */
-define map_del_domain
+autoreply define map_del_domain
 {
   u32 client_index;
   u32 context;
   u32 index;
 };
 
-/** \brief Reply for MAP domain del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define map_del_domain_reply
-{
-  u32 context;
-  i32 retval;
-};
 
 /** \brief Add or Delete MAP rule from a domain (Only used for shared IPv4 per subscriber)
     @param client_index - opaque cookie to identify the sender
@@ -87,7 +78,7 @@ define map_del_domain_reply
     @param ip6_dst - MAP CE IPv6 address
     @param psid - Rule PSID
 */
-define map_add_del_rule
+autoreply define map_add_del_rule
 {
   u32 client_index;
   u32 context;
@@ -97,15 +88,6 @@ define map_add_del_rule
   u16 psid;
 };
 
-/** \brief Reply for MAP rule add/del
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define map_add_del_rule_reply
-{
-  u32 context;
-  i32 retval;
-};
 
 /** \brief Get list of map domains
     @param client_index - opaque cookie to identify the sender
diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api
index a1e1270a..c8a3ffb7 100644
--- a/src/vnet/mpls/mpls.api
+++ b/src/vnet/mpls/mpls.api
@@ -26,7 +26,7 @@
     @param mb_address_length - Length of IP prefix
     @param mb_address[16] - IP prefix/
 */
-define mpls_ip_bind_unbind
+autoreply define mpls_ip_bind_unbind
 {
   u32 client_index;
   u32 context;
@@ -40,16 +40,6 @@ define mpls_ip_bind_unbind
   u8 mb_address[16];
 };
 
-/** \brief Reply for MPLS IP bind/unbind request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define mpls_ip_bind_unbind_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief MPLS tunnel Add / del route
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -172,7 +162,7 @@ manual_endian manual_print define mpls_tunnel_details
     @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first
     @param next_hop_via_label - The next-hop is a resolved via a local label
 */
-define mpls_route_add_del
+autoreply define mpls_route_add_del
 {
   u32 client_index;
   u32 context;
@@ -199,16 +189,6 @@ define mpls_route_add_del
   u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels];
 };
 
-/** \brief Reply for MPLS route add / del request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define mpls_route_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump MPLS fib table
     @param client_index - opaque cookie to identify the sender
 */
@@ -240,4 +220,4 @@ manual_endian manual_print define mpls_fib_details
  * eval: (c-set-style "gnu")
  * End:
  */
- 
\ No newline at end of file
+ 
diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api
index e207e46f..4aef09da 100644
--- a/src/vnet/session/session.api
+++ b/src/vnet/session/session.api
@@ -49,26 +49,17 @@ define application_attach_reply {
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
 */
- define application_detach {
+autoreply define application_detach {
     u32 client_index;
     u32 context;
  };
  
- /** \brief detach reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define application_detach_reply {
-    u32 context;
-    i32 retval;
-};
- 
 /** \brief vpp->client, please map an additional shared memory segment
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param segment_name - 
 */
-define map_another_segment {
+autoreply define map_another_segment {
     u32 client_index;
     u32 context;
     u32 segment_size;
@@ -83,7 +74,7 @@ define map_another_segment {
                  "tcp://::/0/80" [ipv6] etc.
     @param options - socket options, fifo sizes, etc.
 */
-define bind_uri {
+autoreply define bind_uri {
   u32 client_index;
   u32 context;
   u32 accept_cookie;
@@ -97,7 +88,7 @@ define bind_uri {
                  "tcp://::/0/80" [ipv6], etc.
     @param options - socket options, fifo sizes, etc.
 */
-define unbind_uri {
+autoreply define unbind_uri {
   u32 client_index;
   u32 context;
   u8 uri[128];
@@ -122,24 +113,6 @@ define connect_uri {
   u64 options[16];
 };
 
-/** \brief Bind reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define bind_uri_reply {
-    u32 context;
-    i32 retval;
-};
-
-/** \brief unbind reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define unbind_uri_reply {
-    u32 context;
-    i32 retval;
-};
-
 /** \brief vpp->client, connect reply
     @param context - sender context, to match reply w/ request
     @param retval - return code for the request
@@ -165,15 +138,6 @@ define connect_uri_reply {
   u8 segment_name[128];
 };
 
-/** \brief client->vpp
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define map_another_segment_reply {
-    u32 context;
-    i32 retval;
-};
-
 /** \brief vpp->client, accept this session
     @param context - sender context, to match reply w/ request
     @param listener_handle - tells client which listener this pertains to
@@ -290,7 +254,7 @@ define bind_sock {
     @param context - sender context, to match reply w/ request
     @param handle - bind handle obtained from bind reply
 */
-define unbind_sock {
+autoreply define unbind_sock {
   u32 client_index;
   u32 context;
   u64 handle;
@@ -339,15 +303,6 @@ define bind_sock_reply {
   u8 segment_name[128];
 };
 
-/** \brief unbind reply
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define unbind_sock_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief vpp/server->client, connect reply
     @param context - sender context, to match reply w/ request
     @param retval - return code for the request
@@ -378,23 +333,14 @@ define connect_sock_reply {
     @param context - sender context, to match reply w/ request
     @param is_enable - disable session layer if 0, enable otherwise
 */
-define session_enable_disable {
+autoreply define session_enable_disable {
   u32 client_index;
   u32 context;
   u8 is_enable;
 };
 
-/** \brief Reply for session enable/disable
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define session_enable_disable_reply {
-  u32 context;
-  i32 retval;
-};
-
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
  * End:
- */
\ No newline at end of file
+ */
diff --git a/src/vnet/span/span.api b/src/vnet/span/span.api
index 4babdd83..914fd8d0 100644
--- a/src/vnet/span/span.api
+++ b/src/vnet/span/span.api
@@ -21,7 +21,7 @@
     @param sw_if_index_to - interface where the traffic is mirrored
     @param state - 0 = disabled, 1 = rx enabled, 2 = tx enabled, 3 tx & rx enabled
 */
-define sw_interface_span_enable_disable {
+autoreply define sw_interface_span_enable_disable {
     u32 client_index;
     u32 context;
     u32 sw_if_index_from;
@@ -29,14 +29,6 @@ define sw_interface_span_enable_disable {
     u8  state;
 };
 
-/** \brief Reply to SPAN enable/disable request
-    @param context - sender context which was passed in the request
-*/
-define sw_interface_span_enable_disable_reply {
-    u32 context;
-    i32 retval;
-};
-
 /** \brief SPAN dump request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api
index 5feadcb0..9e900741 100644
--- a/src/vnet/sr/sr.api
+++ b/src/vnet/sr/sr.api
@@ -25,7 +25,7 @@
     @param fib_table  FIB table in which we should install the localsid entry
     @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
 */
-define sr_localsid_add_del
+autoreply define sr_localsid_add_del
 {
   u32 client_index;
   u32 context;
@@ -39,16 +39,6 @@ define sr_localsid_add_del
   u8 nh_addr[16];
 };
 
-/** \brief IPv6 SR LocalSID add/del request response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define sr_localsid_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPv6 SR policy add
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -59,7 +49,7 @@ define sr_localsid_add_del_reply
     @param fib_table is the VRF where to install the FIB entry for the BSID
     @param segments is a vector of IPv6 address composing the segment list
 */
-define sr_policy_add
+autoreply define sr_policy_add
 {
   u32 client_index;
   u32 context;
@@ -72,16 +62,6 @@ define sr_policy_add
   u8 segments[0];
 };
 
-/** \brief IPv6 SR Policy add request response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define sr_policy_add_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPv6 SR policy modification
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -94,7 +74,7 @@ define sr_policy_add_reply
     @param weight is the weight of the sid list. optional.
     @param is_encap Mode. Encapsulation or SRH insertion.
 */
-define sr_policy_mod
+autoreply define sr_policy_mod
 {
   u32 client_index;
   u32 context;
@@ -108,23 +88,13 @@ define sr_policy_mod
   u8 segments[0];
 };
 
-/** \brief IPv6 SR Policy modification request response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define sr_policy_mod_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPv6 SR policy deletion
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param bsid is the bindingSID of the SR Policy
     @param index is the index of the SR policy
 */
-define sr_policy_del
+autoreply define sr_policy_del
 {
   u32 client_index;
   u32 context;
@@ -132,16 +102,6 @@ define sr_policy_del
   u32 sr_policy_index;
 };
 
-/** \brief IPv6 SR Policy deletion request response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define sr_policy_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief IPv6 SR steering add/del
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -154,7 +114,7 @@ define sr_policy_del_reply
     @param sw_if_index is the incoming interface for L2 traffic
     @param traffic_type describes the type of traffic
 */
-define sr_steering_add_del
+autoreply define sr_steering_add_del
 {
   u32 client_index;
   u32 context;
@@ -168,16 +128,6 @@ define sr_steering_add_del
   u8 traffic_type;
 };
 
-/** \brief IPv6 SR steering add/del request response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define sr_steering_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump the list of SR LocalSIDs
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
diff --git a/src/vnet/unix/tap.api b/src/vnet/unix/tap.api
index 1fd0bb09..d9fba371 100644
--- a/src/vnet/unix/tap.api
+++ b/src/vnet/unix/tap.api
@@ -93,23 +93,13 @@ define tap_modify_reply
     @param context - sender context, to match reply w/ request
     @param sw_if_index - interface index of existing tap interface
 */
-define tap_delete
+autoreply define tap_delete
 {
   u32 client_index;
   u32 context;
   u32 sw_if_index;
 };
 
-/** \brief Reply for tap delete request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define tap_delete_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Dump tap interfaces request */
 define sw_interface_tap_dump
 {
diff --git a/src/vnet/vxlan/vxlan.api b/src/vnet/vxlan/vxlan.api
index 048220fb..6c331a58 100644
--- a/src/vnet/vxlan/vxlan.api
+++ b/src/vnet/vxlan/vxlan.api
@@ -61,7 +61,7 @@ define vxlan_tunnel_details
     @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
     @param enable - if non-zero enable, else disable
 */
-define sw_interface_set_vxlan_bypass
+autoreply define sw_interface_set_vxlan_bypass
 {
   u32 client_index;
   u32 context;
@@ -69,13 +69,3 @@ define sw_interface_set_vxlan_bypass
   u8 is_ipv6;
   u8 enable;
 };
-
-/** \brief Interface set vxlan-bypass response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define sw_interface_set_vxlan_bypass_reply
-{
-  u32 context;
-  i32 retval;
-};
\ No newline at end of file
diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api
index a4ba180d..7c07c822 100644
--- a/src/vpp/api/vpe.api
+++ b/src/vpp/api/vpe.api
@@ -80,7 +80,7 @@ define create_vlan_subif_reply
     @param sw_if_index - index of the interface
     @param enable - if non-zero enable, else disable
 */
-define sw_interface_set_mpls_enable
+autoreply define sw_interface_set_mpls_enable
 {
   u32 client_index;
   u32 context;
@@ -88,16 +88,6 @@ define sw_interface_set_mpls_enable
   u8 enable;
 };
 
-/** \brief Reply for MPLS state on an interface
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define sw_interface_set_mpls_enable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Proxy ARP add / del request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -106,7 +96,7 @@ define sw_interface_set_mpls_enable_reply
     @param low_address[4] - Low address of the Proxy ARP range
     @param hi_address[4] - High address of the Proxy ARP range
 */
-define proxy_arp_add_del
+autoreply define proxy_arp_add_del
 {
   u32 client_index;
   u32 context;
@@ -116,23 +106,13 @@ define proxy_arp_add_del
   u8 hi_address[4];
 };
 
-/** \brief Reply for proxy arp add / del request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define proxy_arp_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Proxy ARP add / del request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param sw_if_index - Which interface to enable / disable Proxy Arp on
     @param enable_disable - 1 to enable Proxy ARP on interface, 0 to disable
 */
-define proxy_arp_intfc_enable_disable
+autoreply define proxy_arp_intfc_enable_disable
 {
   u32 client_index;
   u32 context;
@@ -141,23 +121,13 @@ define proxy_arp_intfc_enable_disable
   u8 enable_disable;
 };
 
-/** \brief Reply for Proxy ARP interface enable / disable request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define proxy_arp_intfc_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Reset VRF (remove all routes etc) request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4
     @param vrf_id - ID of th FIB table / VRF to reset
 */
-define reset_vrf
+autoreply define reset_vrf
 {
   u32 client_index;
   u32 context;
@@ -165,16 +135,6 @@ define reset_vrf
   u32 vrf_id;
 };
 
-/** \brief Reply for Reset VRF request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define reset_vrf_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Is Address Reachable request - DISABLED
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -200,7 +160,7 @@ define is_address_reachable
     @param enable_disable - 1 = enable stats, 0 = disable
     @param pid - pid of process requesting stats updates
 */
-define want_stats
+autoreply define want_stats
 {
   u32 client_index;
   u32 context;
@@ -208,16 +168,6 @@ define want_stats
   u32 pid;
 };
 
-/** \brief Reply for Want Stats request
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define want_stats_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 typeonly manual_print manual_endian define ip4_fib_counter
 {
   u32 address;
@@ -331,7 +281,7 @@ define oam_event
     @param enable_disable- enable if non-zero, else disable
     @param pid - pid of the requesting process
 */
-define want_oam_events
+autoreply define want_oam_events
 {
   u32 client_index;
   u32 context;
@@ -339,16 +289,6 @@ define want_oam_events
   u32 pid;
 };
 
-/** \brief Want OAM events response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the want oam stats request
-*/
-define want_oam_events_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief OAM add / del target request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -357,7 +297,7 @@ define want_oam_events_reply
     @param dst_address[] - destination address of the target
     @param is_add - add target if non-zero, else delete
 */
-define oam_add_del
+autoreply define oam_add_del
 {
   u32 client_index;
   u32 context;
@@ -367,23 +307,13 @@ define oam_add_del
   u8 is_add;
 };
 
-/** \brief OAM add / del target response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code of the request
-*/
-define oam_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Reset fib table request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param vrf_id - vrf/table id of the fib table to reset
     @param is_ipv6 - an ipv6 fib to reset if non-zero, else ipv4
 */
-define reset_fib
+autoreply define reset_fib
 {
   u32 client_index;
   u32 context;
@@ -391,16 +321,6 @@ define reset_fib
   u8 is_ipv6;
 };
 
-/** \brief Reset fib response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the reset bfib request
-*/
-define reset_fib_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Create loopback interface request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -458,23 +378,13 @@ define create_loopback_instance_reply
     @param context - sender context, to match reply w/ request
     @param sw_if_index - sw index of the interface that was created
 */
-define delete_loopback
+autoreply define delete_loopback
 {
   u32 client_index;
   u32 context;
   u32 sw_if_index;
 };
 
-/** \brief Delete loopback interface response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define delete_loopback_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Control ping from client to api server request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -543,7 +453,7 @@ define cli_inband_reply
     @param is_ipv6 - neighbor limit if non-zero, else ARP limit
     @param arp_neighbor_limit - the new limit, defaults are ~ 50k
 */
-define set_arp_neighbor_limit
+autoreply define set_arp_neighbor_limit
 {
   u32 client_index;
   u32 context;
@@ -551,16 +461,6 @@ define set_arp_neighbor_limit
   u32 arp_neighbor_limit;
 };
 
-/** \brief Set max allowed ARP or ip6 neighbor entries response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for request
-*/
-define set_arp_neighbor_limit_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief L2 interface patch add / del request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -568,7 +468,7 @@ define set_arp_neighbor_limit_reply
     @param tx_sw_if_index - transmit side interface
     @param is_add - if non-zero set up the interface patch, else remove it
 */
-define l2_patch_add_del
+autoreply define l2_patch_add_del
 {
   u32 client_index;
   u32 context;
@@ -577,23 +477,13 @@ define l2_patch_add_del
   u8 is_add;
 };
 
-/** \brief L2 interface patch add / del response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define l2_patch_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Interface set vpath request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param sw_if_index - interface used to reach neighbor
     @param enable - if non-zero enable, else disable
 */
-define sw_interface_set_vpath
+autoreply define sw_interface_set_vpath
 {
   u32 client_index;
   u32 context;
@@ -601,16 +491,6 @@ define sw_interface_set_vpath
   u8 enable;
 };
 
-/** \brief Interface set vpath response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define sw_interface_set_vpath_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set L2 XConnect between two interfaces request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -618,7 +498,7 @@ define sw_interface_set_vpath_reply
     @param tx_sw_if_index - Transmit interface index
     @param enable - enable xconnect if not 0, else set to L3 mode
 */
-define sw_interface_set_l2_xconnect
+autoreply define sw_interface_set_l2_xconnect
 {
   u32 client_index;
   u32 context;
@@ -627,16 +507,6 @@ define sw_interface_set_l2_xconnect
   u8 enable;
 };
 
-/** \brief Set L2 XConnect response
-    @param context - sender context, to match reply w/ request
-    @param retval - L2 XConnect request return code
-*/
-define sw_interface_set_l2_xconnect_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Interface bridge mode request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -646,7 +516,7 @@ define sw_interface_set_l2_xconnect_reply
     @param shg - Shared horizon group, for bridge mode only
     @param enable - Enable beige mode if not 0, else set to L3 mode
 */
-define sw_interface_set_l2_bridge
+autoreply define sw_interface_set_l2_bridge
 {
   u32 client_index;
   u32 context;
@@ -657,16 +527,6 @@ define sw_interface_set_l2_bridge
   u8 enable;
 };
 
-/** \brief Interface bridge mode response
-    @param context - sender context, to match reply w/ request
-    @param retval - Bridge mode request return code
-*/
-define sw_interface_set_l2_bridge_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set bridge domain ip to mac entry request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -676,7 +536,7 @@ define sw_interface_set_l2_bridge_reply
     @param mac_address - MAC address
     @param 
 */
-define bd_ip_mac_add_del
+autoreply define bd_ip_mac_add_del
 {
   u32 client_index;
   u32 context;
@@ -687,16 +547,6 @@ define bd_ip_mac_add_del
   u8 mac_address[6];
 };
 
-/** \brief Set bridge domain ip to mac entry response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the set bridge flags request
-*/
-define bd_ip_mac_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set/unset the classification table for an interface request 
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -704,7 +554,7 @@ define bd_ip_mac_add_del_reply
     @param sw_if_index - interface to associate with the table
     @param table_index - index of the table, if ~0 unset the table
 */
-define classify_set_interface_ip_table
+autoreply define classify_set_interface_ip_table
 {
   u32 client_index;
   u32 context;
@@ -713,16 +563,6 @@ define classify_set_interface_ip_table
   u32 table_index;		/* ~0 => off */
 };
 
-/** \brief Set/unset interface classification table response 
-    @param context - sender context, to match reply w/ request
-    @param retval - return code
-*/
-define classify_set_interface_ip_table_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set/unset l2 classification tables for an interface request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -731,7 +571,7 @@ define classify_set_interface_ip_table_reply
     @param ip6_table_index - ip6 index
     @param other_table_index - other index
 */
-define classify_set_interface_l2_tables
+autoreply define classify_set_interface_l2_tables
 {
   u32 client_index;
   u32 context;
@@ -743,16 +583,6 @@ define classify_set_interface_l2_tables
   u8 is_input;
 };
 
-/** \brief Set/unset l2 classification tables for an interface response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define classify_set_interface_l2_tables_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Get node index using name request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -809,7 +639,7 @@ define add_node_next_reply
     @param sw_if_index - interface to enable/disable filtering on
     @param enable_disable - if non-zero enable filtering, else disable
 */
-define l2_interface_efp_filter
+autoreply define l2_interface_efp_filter
 {
   u32 client_index;
   u32 context;
@@ -817,16 +647,6 @@ define l2_interface_efp_filter
   u32 enable_disable;
 };
 
-/** \brief L2 interface ethernet flow point filtering response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define l2_interface_efp_filter_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 define create_subif
 {
   u32 client_index;
@@ -882,7 +702,7 @@ define show_version_reply
 };
 
 /* Gross kludge, DGMS */
-define interface_name_renumber
+autoreply define interface_name_renumber
 {
   u32 client_index;
   u32 context;
@@ -890,12 +710,6 @@ define interface_name_renumber
   u32 new_show_dev_instance;
 };
 
-define interface_name_renumber_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Register for ip4 arp resolution events
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -903,7 +717,7 @@ define interface_name_renumber_reply
     @param pid - sender's pid
     @param address - the exact ip4 address of interest
 */
-define want_ip4_arp_events
+autoreply define want_ip4_arp_events
 {
   u32 client_index;
   u32 context;
@@ -912,16 +726,6 @@ define want_ip4_arp_events
   u32 address;
 };
 
-/** \brief Reply for interface events registration
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define want_ip4_arp_events_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Tell client about an ip4 arp resolution event
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -949,7 +753,7 @@ define ip4_arp_event
     @param pid - sender's pid
     @param address - the exact ip6 address of interest
 */
-define want_ip6_nd_events
+autoreply define want_ip6_nd_events
 {
   u32 client_index;
   u32 context;
@@ -958,16 +762,6 @@ define want_ip6_nd_events
   u8 address[16];
 };
 
-/** \brief Reply for ip6 nd resolution events registration
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define want_ip6_nd_events_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Tell client about an ip6 nd resolution or mac/ip event
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -999,7 +793,7 @@ define ip6_nd_event
     Note: User is recommeneded to use just one valid table_index per call.
           (ip4_table_index, ip6_table_index, or l2_table_index)
 */
-define input_acl_set_interface
+autoreply define input_acl_set_interface
 {
   u32 client_index;
   u32 context;
@@ -1010,16 +804,6 @@ define input_acl_set_interface
   u8 is_add;
 };
 
-/** \brief Set/unset input ACL interface response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define input_acl_set_interface_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 define get_node_graph
 {
   u32 client_index;
@@ -1048,7 +832,7 @@ define get_node_graph_reply
     @param pow_enable - Proof of Work enabled or not flag
     @param trace_enable - iOAM Trace enabled or not flag
 */
-define ioam_enable
+autoreply define ioam_enable
 {
   u32 client_index;
   u32 context;
@@ -1060,38 +844,18 @@ define ioam_enable
   u32 node_id;
 };
 
-/** \brief iOAM Trace profile add / del response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define ioam_enable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief iOAM disable
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param index - MAP Domain index
 */
-define ioam_disable
+autoreply define ioam_disable
 {
   u32 client_index;
   u32 context;
   u16 id;
 };
 
-/** \brief iOAM disable response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define ioam_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Query relative index via node names
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -1149,7 +913,7 @@ define pg_create_interface_reply
     @param count - number of packets to be captured
     @param pcap_file - pacp file name to store captured packets
 */
-define pg_capture
+autoreply define pg_capture
 {
   u32 client_index;
   u32 context;
@@ -1160,23 +924,13 @@ define pg_capture
   u8 pcap_file_name[pcap_name_length];
 };
 
-/** \brief PacketGenerator capture packets response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define pg_capture_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Enable / disable packet generator request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param is_enabled - 1 if enabling streams, 0 if disabling
     @param stream - stream name to be enable/disabled, if not specified handle all streams
 */
-define pg_enable_disable
+autoreply define pg_enable_disable
 {
   u32 client_index;
   u32 context;
@@ -1185,16 +939,6 @@ define pg_enable_disable
   u8 stream_name[stream_name_length];
 };
 
-/** \brief Reply for enable / disable packet generator
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define pg_enable_disable_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Configure IP source and L4 port-range check
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -1208,7 +952,7 @@ define pg_enable_disable_reply
     @param vrf_id - fib table/vrf id to associate the source and port-range check with
     @note To specify a single port set low_port and high_port entry the same
 */
-define ip_source_and_port_range_check_add_del
+autoreply define ip_source_and_port_range_check_add_del
 {
   u32 client_index;
   u32 context;
@@ -1222,16 +966,6 @@ define ip_source_and_port_range_check_add_del
   u32 vrf_id;
 };
 
-/** \brief Configure IP source and L4 port-range check reply
-    @param context - returned sender context, to match reply w/ request
-    @param retval - return code
-*/
-define ip_source_and_port_range_check_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Set interface source and L4 port-range request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -1239,7 +973,7 @@ define ip_source_and_port_range_check_add_del_reply
     @param tcp_vrf_id - VRF associated with source and TCP port-range check
     @param udp_vrf_id - VRF associated with source and TCP port-range check
 */
-define ip_source_and_port_range_check_interface_add_del
+autoreply define ip_source_and_port_range_check_interface_add_del
 {
   u32 client_index;
   u32 context;
@@ -1251,36 +985,17 @@ define ip_source_and_port_range_check_interface_add_del
   u32 udp_out_vrf_id;
 };
 
-/** \brief Set interface source and L4 port-range response
-    @param context - sender context, to match reply w/ request
-    @param retval - return value for request
-*/
-define ip_source_and_port_range_check_interface_add_del_reply
-{
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Delete sub interface request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param sw_if_index - sw index of the interface that was created by create_subif
 */
-define delete_subif {
+autoreply define delete_subif {
   u32 client_index;
   u32 context;
   u32 sw_if_index;
 };
 
-/** \brief Delete sub interface response
-    @param context - sender context, to match reply w/ request
-    @param retval - return code for the request
-*/
-define delete_subif_reply {
-  u32 context;
-  i32 retval;
-};
-
 /** \brief Punt traffic to the host
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
@@ -1289,7 +1004,7 @@ define delete_subif_reply {
     @param l4_protocol - L4 protocol to be punted, only UDP (0x11) is supported
     @param l4_port - TCP/UDP port to be punted
 */
-define punt {
+autoreply define punt {
     u32 client_index;
     u32 context;
     u8 is_add;
@@ -1298,23 +1013,13 @@ define punt {
     u16 l4_port;
 };
 
-/** \brief Reply to the punt request
-    @param context - sender context which was passed in the request
-    @param retval - return code of punt request
-*/
-define punt_reply
-{
-    u32 context;
-    i32 retval;
-};
-
 /** \brief Feature path enable/disable request
     @param client_index - opaque cookie to identify the sender
     @param context - sender context, to match reply w/ request
     @param sw_if_index - the interface
     @param enable - 1 = on, 0 = off
 */
-define feature_enable_disable {
+autoreply define feature_enable_disable {
     u32 client_index;
     u32 context;
     u32 sw_if_index;
@@ -1323,16 +1028,6 @@ define feature_enable_disable {
     u8 feature_name[64];
 };
 
-/** \brief Reply to the eature path enable/disable request
-    @param context - sender context which was passed in the request
-    @param retval - return code for the request
-*/
-define feature_enable_disable_reply
-{
-    u32 context;
-    i32 retval;
-};
-
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
-- 
cgit 1.2.3-korg


From e72be39cd0f498178fd62dfc0a0b0daa2b633f62 Mon Sep 17 00:00:00 2001
From: Neale Ranns <nranns@cisco.com>
Date: Wed, 26 Apr 2017 13:59:20 -0700
Subject: A sprinkling of const in vlibmemory/api.h and friends

Change-Id: I953ebb37eeec7de0c4a6b00258c3c67a83cbc020
Signed-off-by: Neale Ranns <nranns@cisco.com>
---
 src/svm/svm.c                  |  6 +++---
 src/svm/svm.h                  | 10 +++++-----
 src/svm/svmdb.h                |  2 +-
 src/vlibapi/api.h              | 11 ++++++-----
 src/vlibapi/api_shared.c       |  4 ++--
 src/vlibmemory/api.h           | 20 ++++++++++----------
 src/vlibmemory/memory_client.c | 16 +++++++++-------
 src/vlibmemory/memory_shared.c |  4 ++--
 src/vlibmemory/memory_vlib.c   |  4 ++--
 src/vpp/api/api_main.c         |  2 +-
 10 files changed, 41 insertions(+), 38 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/svm/svm.c b/src/svm/svm.c
index e4ca98e1..97add5a7 100644
--- a/src/svm/svm.c
+++ b/src/svm/svm.c
@@ -796,7 +796,7 @@ svm_region_init (void)
 }
 
 void
-svm_region_init_chroot (char *root_path)
+svm_region_init_chroot (const char *root_path)
 {
   svm_map_region_args_t _a, *a = &_a;
 
@@ -813,7 +813,7 @@ svm_region_init_chroot (char *root_path)
 }
 
 void
-svm_region_init_chroot_uid_gid (char *root_path, int uid, int gid)
+svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid)
 {
   svm_map_region_args_t _a, *a = &_a;
 
@@ -1151,7 +1151,7 @@ svm_client_scan_this_region_nolock (svm_region_t * rp)
  * Scan svm regions for dead clients
  */
 void
-svm_client_scan (char *root_path)
+svm_client_scan (const char *root_path)
 {
   int i, j;
   svm_main_region_t *mp;
diff --git a/src/svm/svm.h b/src/svm/svm.h
index 0b87dbcb..06797fa1 100644
--- a/src/svm/svm.h
+++ b/src/svm/svm.h
@@ -69,8 +69,8 @@ typedef struct svm_region_
 
 typedef struct svm_map_region_args_
 {
-  char *root_path;		/* NULL means use the truly global arena */
-  char *name;
+  const char *root_path;	/* NULL means use the truly global arena */
+  const char *name;
   u64 baseva;
   u64 size;
   u64 pvt_heap_size;
@@ -115,12 +115,12 @@ typedef struct
 
 void *svm_region_find_or_create (svm_map_region_args_t * a);
 void svm_region_init (void);
-void svm_region_init_chroot (char *root_path);
-void svm_region_init_chroot_uid_gid (char *root_path, int uid, int gid);
+void svm_region_init_chroot (const char *root_path);
+void svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid);
 void svm_region_init_args (svm_map_region_args_t * a);
 void svm_region_exit (void);
 void svm_region_unmap (void *rp_arg);
-void svm_client_scan (char *root_path);
+void svm_client_scan (const char *root_path);
 void svm_client_scan_this_region_nolock (svm_region_t * rp);
 u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t * a);
 
diff --git a/src/svm/svmdb.h b/src/svm/svmdb.h
index e02628a0..e35be8aa 100644
--- a/src/svm/svmdb.h
+++ b/src/svm/svmdb.h
@@ -83,7 +83,7 @@ typedef struct
 
 typedef struct
 {
-  char *root_path;
+  const char *root_path;
   uword size;
   u32 uid;
   u32 gid;
diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index a62fa644..7538050e 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -127,7 +127,7 @@ typedef struct
   void (**msg_cleanup_handlers) (void *);
   void (**msg_endian_handlers) (void *);
   void (**msg_print_handlers) (void *, void *);
-  char **msg_names;
+  const char **msg_names;
   u8 *message_bounce;
   u8 *is_mp_safe;
   struct ring_alloc_ *arings;
@@ -195,8 +195,8 @@ typedef struct
   /* client side message index hash table */
   uword *msg_index_by_name_and_crc;
 
-  char *region_name;
-  char *root_path;
+  const char *region_name;
+  const char *root_path;
 
   /* Replay in progress? */
   int replay_in_progress;
@@ -276,8 +276,9 @@ void vl_msg_api_register_pd_handler (void *handler,
 int vl_msg_api_pd_handler (void *mp, int rv);
 
 void vl_msg_api_set_first_available_msg_id (u16 first_avail);
-u16 vl_msg_api_get_msg_ids (char *name, int n);
-void vl_msg_api_add_msg_name_crc (api_main_t * am, char *string, u32 id);
+u16 vl_msg_api_get_msg_ids (const char *name, int n);
+void vl_msg_api_add_msg_name_crc (api_main_t * am, const char *string,
+				  u32 id);
 u32 vl_api_get_msg_index (u8 * name_and_crc);
 
 /* node_serialize.c prototypes */
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index 6774e3dd..0817f38e 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -828,7 +828,7 @@ vl_msg_api_set_first_available_msg_id (u16 first_avail)
 }
 
 u16
-vl_msg_api_get_msg_ids (char *name, int n)
+vl_msg_api_get_msg_ids (const char *name, int n)
 {
   api_main_t *am = &api_main;
   u8 *name_copy;
@@ -872,7 +872,7 @@ vl_msg_api_get_msg_ids (char *name, int n)
 }
 
 void
-vl_msg_api_add_msg_name_crc (api_main_t * am, char *string, u32 id)
+vl_msg_api_add_msg_name_crc (api_main_t * am, const char *string, u32 id)
 {
   uword *p;
 
diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h
index 8e44c20d..c195e181 100644
--- a/src/vlibmemory/api.h
+++ b/src/vlibmemory/api.h
@@ -123,20 +123,20 @@ void *vl_msg_api_alloc_or_null (int nbytes);
 void *vl_msg_api_alloc_as_if_client (int nbytes);
 void *vl_msg_api_alloc_as_if_client_or_null (int nbytes);
 void vl_msg_api_free (void *a);
-int vl_map_shmem (char *region_name, int is_vlib);
+int vl_map_shmem (const char *region_name, int is_vlib);
 void vl_register_mapped_shmem_region (svm_region_t * rp);
 void vl_unmap_shmem (void);
 void vl_msg_api_send_shmem (unix_shared_memory_queue_t * q, u8 * elem);
 void vl_msg_api_send_shmem_nolock (unix_shared_memory_queue_t * q, u8 * elem);
 void vl_msg_api_send (vl_api_registration_t * rp, u8 * elem);
-int vl_client_connect (char *name, int ctx_quota, int input_queue_size);
+int vl_client_connect (const char *name, int ctx_quota, int input_queue_size);
 void vl_client_disconnect (void);
 unix_shared_memory_queue_t *vl_api_client_index_to_input_queue (u32 index);
 vl_api_registration_t *vl_api_client_index_to_registration (u32 index);
-int vl_client_api_map (char *region_name);
+int vl_client_api_map (const char *region_name);
 void vl_client_api_unmap (void);
-void vl_set_memory_region_name (char *name);
-void vl_set_memory_root_path (char *root_path);
+void vl_set_memory_region_name (const char *name);
+void vl_set_memory_root_path (const char *root_path);
 void vl_set_memory_uid (int uid);
 void vl_set_memory_gid (int gid);
 void vl_set_global_memory_baseva (u64 baseva);
@@ -146,12 +146,12 @@ void vl_set_global_pvt_heap_size (u64 size);
 void vl_set_api_pvt_heap_size (u64 size);
 void vl_enable_disable_memory_api (vlib_main_t * vm, int yesno);
 void vl_client_disconnect_from_vlib (void);
-int vl_client_connect_to_vlib (char *svm_name, char *client_name,
-			       int rx_queue_size);
-int vl_client_connect_to_vlib_no_rx_pthread (char *svm_name,
-					     char *client_name,
+int vl_client_connect_to_vlib (const char *svm_name,
+			       const char *client_name, int rx_queue_size);
+int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name,
+					     const char *client_name,
 					     int rx_queue_size);
-u16 vl_client_get_first_plugin_msg_id (char *plugin_name);
+u16 vl_client_get_first_plugin_msg_id (const char *plugin_name);
 
 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
 
diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c
index 25b06f65..d48a4fa1 100644
--- a/src/vlibmemory/memory_client.c
+++ b/src/vlibmemory/memory_client.c
@@ -155,7 +155,7 @@ noop_handler (void *notused)
 }
 
 int
-vl_client_connect (char *name, int ctx_quota, int input_queue_size)
+vl_client_connect (const char *name, int ctx_quota, int input_queue_size)
 {
   svm_region_t *svm;
   vl_api_memclnt_create_t *mp;
@@ -326,7 +326,7 @@ _(MEMCLNT_DELETE_REPLY, memclnt_delete_reply)
 
 
 int
-vl_client_api_map (char *region_name)
+vl_client_api_map (const char *region_name)
 {
   int rv;
 
@@ -354,7 +354,8 @@ vl_client_api_unmap (void)
 }
 
 static int
-connect_to_vlib_internal (char *svm_name, char *client_name,
+connect_to_vlib_internal (const char *svm_name,
+			  const char *client_name,
 			  int rx_queue_size, int want_pthread)
 {
   int rv = 0;
@@ -388,15 +389,16 @@ connect_to_vlib_internal (char *svm_name, char *client_name,
 }
 
 int
-vl_client_connect_to_vlib (char *svm_name, char *client_name,
-			   int rx_queue_size)
+vl_client_connect_to_vlib (const char *svm_name,
+			   const char *client_name, int rx_queue_size)
 {
   return connect_to_vlib_internal (svm_name, client_name, rx_queue_size,
 				   1 /* want pthread */ );
 }
 
 int
-vl_client_connect_to_vlib_no_rx_pthread (char *svm_name, char *client_name,
+vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name,
+					 const char *client_name,
 					 int rx_queue_size)
 {
   return connect_to_vlib_internal (svm_name, client_name, rx_queue_size,
@@ -437,7 +439,7 @@ static void vl_api_get_first_msg_id_reply_t_handler
 }
 
 u16
-vl_client_get_first_plugin_msg_id (char *plugin_name)
+vl_client_get_first_plugin_msg_id (const char *plugin_name)
 {
   vl_api_get_first_msg_id_t *mp;
   api_main_t *am = &api_main;
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
index 6cea5df9..aea90330 100644
--- a/src/vlibmemory/memory_shared.c
+++ b/src/vlibmemory/memory_shared.c
@@ -257,7 +257,7 @@ vl_msg_api_free_nolock (void *a)
 }
 
 void
-vl_set_memory_root_path (char *name)
+vl_set_memory_root_path (const char *name)
 {
   api_main_t *am = &api_main;
 
@@ -321,7 +321,7 @@ vl_set_api_pvt_heap_size (u64 size)
 }
 
 int
-vl_map_shmem (char *region_name, int is_vlib)
+vl_map_shmem (const char *region_name, int is_vlib)
 {
   svm_map_region_args_t _a, *a = &_a;
   svm_region_t *vlib_rp, *root_rp;
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 43574dea..29a5c2c2 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -361,7 +361,7 @@ _(GET_FIRST_MSG_ID, get_first_msg_id)
  * vl_api_init
  */
 static int
-memory_api_init (char *region_name)
+memory_api_init (const char *region_name)
 {
   int rv;
   vl_msg_api_msg_config_t cfg;
@@ -1202,7 +1202,7 @@ vlibmemory_init (vlib_main_t * vm)
 VLIB_INIT_FUNCTION (vlibmemory_init);
 
 void
-vl_set_memory_region_name (char *name)
+vl_set_memory_region_name (const char *name)
 {
   api_main_t *am = &api_main;
 
diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c
index d48e4eff..ac09cd15 100644
--- a/src/vpp/api/api_main.c
+++ b/src/vpp/api/api_main.c
@@ -211,7 +211,7 @@ api_cli_output (void *notused, const char *fmt, ...)
 }
 
 u16
-vl_client_get_first_plugin_msg_id (char *plugin_name)
+vl_client_get_first_plugin_msg_id (const char *plugin_name)
 {
   api_main_t *am = &api_main;
   vl_api_msg_range_t *rp;
-- 
cgit 1.2.3-korg


From 7bee80c823ca77de3aca803fdede77e4c7385a52 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Wed, 26 Apr 2017 15:32:12 +0200
Subject: Fix remaining 32-bit compile issues

Change-Id: I9664214652229b663c3e3ba7406b4ede96bfb123
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 Makefile                                 |  8 ++++----
 build-data/platforms/vpp.mk              |  5 +++++
 src/plugins/dpdk/buffer.c                |  6 +++---
 src/plugins/ixge/ixge.c                  |  5 +++--
 src/svm/svm.c                            | 11 ++++++-----
 src/svm/svmtool.c                        |  4 ++--
 src/tools/vppapigen/gram.y               |  6 +++---
 src/tools/vppapigen/node.c               |  4 ++--
 src/uri/uri_tcp_test.c                   | 21 ++++++++++++---------
 src/uri/uri_udp_test.c                   | 15 +++++++++------
 src/vat/api_format.c                     | 10 +++++-----
 src/vlib/threads.c                       |  2 +-
 src/vlibmemory/memory_client.c           |  2 +-
 src/vlibmemory/memory_vlib.c             |  3 ++-
 src/vnet/devices/virtio/vhost-user.c     |  2 +-
 src/vnet/session/application_interface.c |  2 +-
 src/vnet/session/session_api.c           | 14 +++++++-------
 src/vnet/tcp/builtin_client.c            | 19 +++++++++++--------
 src/vnet/tcp/builtin_server.c            |  2 +-
 src/vppinfra/mheap.c                     |  2 +-
 20 files changed, 80 insertions(+), 63 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/Makefile b/Makefile
index 8240e789..b344f377 100644
--- a/Makefile
+++ b/Makefile
@@ -263,9 +263,9 @@ define test
 	  TEST_DIR=$(WS_ROOT)/test \
 	  VPP_TEST_BUILD_DIR=$(BR)/build-$(2)-native \
 	  VPP_TEST_BIN=$(BR)/install-$(2)-native/vpp/bin/vpp \
-	  VPP_TEST_PLUGIN_PATH=$(BR)/install-$(2)-native/vpp/lib64/vpp_plugins \
+	  VPP_TEST_PLUGIN_PATH=$(wildcard $(BR)/install-$(2)-native/vpp/lib*/vpp_plugins) \
 	  VPP_TEST_INSTALL_PATH=$(BR)/install-$(2)-native/ \
-	  LD_LIBRARY_PATH=$(BR)/install-$(2)-native/vpp/lib64/ \
+	  LD_LIBRARY_PATH=$(subst $(subst ,, ),:,$(wildcard $(BR)/install-$(2)-native/vpp/lib*/)) \
 	  EXTENDED_TESTS=$(EXTENDED_TESTS) \
 	  PYTHON=$(PYTHON) \
 	  $(3)
@@ -325,12 +325,12 @@ define run
 	@echo "WARNING: STARTUP_CONF not defined or file doesn't exist."
 	@echo "         Running with minimal startup config: $(MINIMAL_STARTUP_CONF)\n"
 	@cd $(STARTUP_DIR) && \
-	  sudo $(2) $(1)/vpp/bin/vpp $(MINIMAL_STARTUP_CONF) plugin_path $(1)/vpp/lib64/vpp_plugins
+	  sudo $(2) $(1)/vpp/bin/vpp $(MINIMAL_STARTUP_CONF) plugin_path $(wildcard $(1)/vpp/lib*/vpp_plugins)
 endef
 else
 define run
 	@cd $(STARTUP_DIR) && \
-	  sudo $(2) $(1)/vpp/bin/vpp $(shell cat $(STARTUP_CONF) | sed -e 's/#.*//') plugin_path $(1)/vpp/lib64/vpp_plugins
+	  sudo $(2) $(1)/vpp/bin/vpp $(shell cat $(STARTUP_CONF) | sed -e 's/#.*//') plugin_path $(wildcard $(1)/vpp/lib*/vpp_plugins)
 endef
 endif
 
diff --git a/build-data/platforms/vpp.mk b/build-data/platforms/vpp.mk
index 5aafdd76..4577fa2e 100644
--- a/build-data/platforms/vpp.mk
+++ b/build-data/platforms/vpp.mk
@@ -46,6 +46,11 @@ vpp_root_packages = vpp gmod
 # vpp_dpdk_lib_dir = /usr/lib
 # vpp_dpdk_shared_lib = yes
 
+# load balancer plugin is not portable on 32 bit platform
+ifeq ($(MACHINE),i686)
+vpp_configure_args_vpp = --disable-lb-plugin
+endif
+
 vpp_debug_TAG_CFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \
 	-fstack-protector-all -fPIC -Werror
 vpp_debug_TAG_LDFLAGS = -g -O0 -DCLIB_DEBUG -DFORTIFY_SOURCE=2 -march=$(MARCH) \
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index c80b3fa8..2d4762ab 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -455,8 +455,8 @@ vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
 	uword save_vpm_start, save_vpm_end, save_vpm_size;
 	struct rte_mempool_memhdr *memhdr;
 
-	this_pool_start = ~0ULL;
-	this_pool_end = 0LL;
+	this_pool_start = ~0;
+	this_pool_end = 0;
 
 	STAILQ_FOREACH (memhdr, &rmp->mem_list, next)
 	{
@@ -465,7 +465,7 @@ vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
 	  if (((uword) memhdr->addr) < this_pool_start)
 	    this_pool_start = (uword) (memhdr->addr);
 	}
-	ASSERT (this_pool_start < ~0ULL && this_pool_end > 0);
+	ASSERT (this_pool_start < ~0 && this_pool_end > 0);
 	this_pool_size = this_pool_end - this_pool_start;
 
 	if (CLIB_DEBUG > 1)
diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c
index 08f5b692..0d287250 100644
--- a/src/plugins/ixge/ixge.c
+++ b/src/plugins/ixge/ixge.c
@@ -20,7 +20,7 @@
  *   Please use supported DPDK driver instead.
  */
 
-#if __x86_64__
+#if __x86_64__ || __i386__
 #include <vppinfra/vector.h>
 
 #ifndef CLIB_HAVE_VEC128
@@ -2929,7 +2929,6 @@ ixge_set_next_node (ixge_rx_next_t next, char *name)
       break;
     }
 }
-#endif
 
 /* *INDENT-OFF* */
 VLIB_PLUGIN_REGISTER () = {
@@ -2937,8 +2936,10 @@ VLIB_PLUGIN_REGISTER () = {
     .default_disabled = 1,
     .description = "Intel 82599 Family Native Driver (experimental)",
 };
+#endif
 
 /* *INDENT-ON* */
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/svm/svm.c b/src/svm/svm.c
index 97add5a7..c96135cf 100644
--- a/src/svm/svm.c
+++ b/src/svm/svm.c
@@ -491,7 +491,7 @@ svm_map_region (svm_map_region_args_t * a)
 	  return (0);
 	}
 
-      rp = mmap ((void *) a->baseva, a->size,
+      rp = mmap (uword_to_pointer (a->baseva, void *), a->size,
 		 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
 
       if (rp == (svm_region_t *) MAP_FAILED)
@@ -533,9 +533,10 @@ svm_map_region (svm_map_region_args_t * a)
       rp->virtual_size = a->size;
 
       rp->region_heap =
-	mheap_alloc_with_flags ((void *) (a->baseva + MMAP_PAGESIZE),
-				(a->pvt_heap_size != 0) ?
-				a->pvt_heap_size : SVM_PVT_MHEAP_SIZE,
+	mheap_alloc_with_flags (uword_to_pointer
+				(a->baseva + MMAP_PAGESIZE, void *),
+				(a->pvt_heap_size !=
+				 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE,
 				MHEAP_FLAG_DISABLE_VM);
       oldheap = svm_push_pvt_heap (rp);
 
@@ -661,7 +662,7 @@ svm_map_region (svm_map_region_args_t * a)
       a->size = rp->virtual_size;
       munmap (rp, MMAP_PAGESIZE);
 
-      rp = (void *) mmap ((void *) a->baseva, a->size,
+      rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
 			  PROT_READ | PROT_WRITE,
 			  MAP_SHARED | MAP_FIXED, svm_fd, 0);
       if ((uword) rp == (uword) MAP_FAILED)
diff --git a/src/svm/svmtool.c b/src/svm/svmtool.c
index b3195514..01ae4221 100644
--- a/src/svm/svmtool.c
+++ b/src/svm/svmtool.c
@@ -172,7 +172,7 @@ svm_map_region_nolock (svm_map_region_args_t * a)
   a->size = rp->virtual_size;
   munmap (rp, MMAP_PAGESIZE);
 
-  rp = (void *) mmap ((void *) a->baseva, a->size,
+  rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
 		      PROT_READ | PROT_WRITE,
 		      MAP_SHARED | MAP_FIXED, svm_fd, 0);
   if ((uword) rp == (uword) MAP_FAILED)
@@ -401,7 +401,7 @@ repair (char *chroot_path, int crash_root_region)
   a->size = root_rp->virtual_size;
   munmap (root_rp, MMAP_PAGESIZE);
 
-  root_rp = (void *) mmap ((void *) a->baseva, a->size,
+  root_rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
 			   PROT_READ | PROT_WRITE,
 			   MAP_SHARED | MAP_FIXED, svm_fd, 0);
   if ((uword) root_rp == (uword) MAP_FAILED)
diff --git a/src/tools/vppapigen/gram.y b/src/tools/vppapigen/gram.y
index 9cea6023..52bb65c5 100644
--- a/src/tools/vppapigen/gram.y
+++ b/src/tools/vppapigen/gram.y
@@ -53,9 +53,9 @@ stmt:     flist defn            {$$ = set_flags($1, $2);}
         | defn                  {$$ = $1;}
           ;
 
-flist:    flist flag            {$$ = (YYSTYPE)(unsigned long long)
-                                     ((unsigned long long) $1 
-                                    | (unsigned long long) $2);}
+flist:    flist flag            {$$ = (YYSTYPE)(unsigned long)
+                                     ((unsigned long) $1 
+                                    | (unsigned long) $2);}
         | flag                  {$$ = $1;}
           ;
 
diff --git a/src/tools/vppapigen/node.c b/src/tools/vppapigen/node.c
index 9f234037..15868ee5 100644
--- a/src/tools/vppapigen/node.c
+++ b/src/tools/vppapigen/node.c
@@ -397,7 +397,7 @@ void node_define_generate (node_t *this, enum passid which, FILE *fp)
 	    fprintf(fp, ",\n");
         }
 	indent_me(fp);
-	fprintf (fp, "{\"crc\" : \"0x%08x\"}\n", (u32)(u64)CDATA3);
+	fprintf (fp, "{\"crc\" : \"0x%08x\"}\n", (u32)(uword)CDATA3);
         indent -= 4;
 	indent_me(fp);
         fprintf(fp, "]");
@@ -1219,7 +1219,7 @@ void generate_msg_name_crc_list (YYSTYPE a1, FILE *fp)
             if (!(np->flags & NODE_FLAG_TYPEONLY)) {
                 fprintf (fp, "\\\n_(VL_API_%s, %s, %08x) ",
                          uppercase (np->data[0]), (i8 *) np->data[0],
-                         (u32)(u64)np->data[3]);
+                         (u32)(uword)np->data[3]);
             }
         }
         np = np->peer;
diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c
index b15fd6ce..22f246e5 100755
--- a/src/uri/uri_tcp_test.c
+++ b/src/uri/uri_tcp_test.c
@@ -262,7 +262,8 @@ vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t *
     }
 
   utm->our_event_queue =
-    (unix_shared_memory_queue_t *) mp->app_event_queue_address;
+    uword_to_pointer (mp->app_event_queue_address,
+		      unix_shared_memory_queue_t *);
   utm->state = STATE_ATTACHED;
 }
 
@@ -524,8 +525,9 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
       return;
     }
 
-  utm->vpp_event_queue = (unix_shared_memory_queue_t *)
-    mp->vpp_event_queue_address;
+  utm->vpp_event_queue =
+    uword_to_pointer (mp->vpp_event_queue_address,
+		      unix_shared_memory_queue_t *);
 
   /*
    * Setup session
@@ -534,9 +536,9 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
   pool_get (utm->sessions, session);
   session_index = session - utm->sessions;
 
-  rx_fifo = (svm_fifo_t *) mp->server_rx_fifo;
+  rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
   rx_fifo->client_session_index = session_index;
-  tx_fifo = (svm_fifo_t *) mp->server_tx_fifo;
+  tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
   tx_fifo->client_session_index = session_index;
 
   session->server_rx_fifo = rx_fifo;
@@ -858,16 +860,17 @@ vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
   ip_str = format (0, "%U", format_ip46_address, &mp->ip, mp->is_ip4);
   clib_warning ("Accepted session from: %s:%d", ip_str,
 		clib_net_to_host_u16 (mp->port));
-  utm->vpp_event_queue = (unix_shared_memory_queue_t *)
-    mp->vpp_event_queue_address;
+  utm->vpp_event_queue =
+    uword_to_pointer (mp->vpp_event_queue_address,
+		      unix_shared_memory_queue_t *);
 
   /* Allocate local session and set it up */
   pool_get (utm->sessions, session);
   session_index = session - utm->sessions;
 
-  rx_fifo = (svm_fifo_t *) mp->server_rx_fifo;
+  rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
   rx_fifo->client_session_index = session_index;
-  tx_fifo = (svm_fifo_t *) mp->server_tx_fifo;
+  tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
   tx_fifo->client_session_index = session_index;
 
   session->server_rx_fifo = rx_fifo;
diff --git a/src/uri/uri_udp_test.c b/src/uri/uri_udp_test.c
index 266215c8..8fb12ed2 100644
--- a/src/uri/uri_udp_test.c
+++ b/src/uri/uri_udp_test.c
@@ -232,7 +232,8 @@ vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t *
     }
 
   utm->our_event_queue =
-    (unix_shared_memory_queue_t *) mp->app_event_queue_address;
+    uword_to_pointer (mp->app_event_queue_address,
+		      unix_shared_memory_queue_t *);
 }
 
 static void
@@ -581,7 +582,8 @@ send_reply:
 
   vec_free (a->segment_name);
 
-  client_q = (unix_shared_memory_queue_t *) mp->client_queue_address;
+  client_q =
+    uword_to_pointer (mp->client_queue_address, unix_shared_memory_queue_t *);
   vl_msg_api_send_shmem (client_q, (u8 *) & rmp);
 }
 
@@ -608,14 +610,15 @@ vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
   if (start_time == 0.0)
     start_time = clib_time_now (&utm->clib_time);
 
-  utm->vpp_event_queue = (unix_shared_memory_queue_t *)
-    mp->vpp_event_queue_address;
+  utm->vpp_event_queue =
+    uword_to_pointer (mp->vpp_event_queue_address,
+		      unix_shared_memory_queue_t *);
 
   pool_get (utm->sessions, session);
 
-  rx_fifo = (svm_fifo_t *) mp->server_rx_fifo;
+  rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
   rx_fifo->client_session_index = session - utm->sessions;
-  tx_fifo = (svm_fifo_t *) mp->server_tx_fifo;
+  tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
   tx_fifo->client_session_index = session - utm->sessions;
 
   session->server_rx_fifo = rx_fifo;
diff --git a/src/vat/api_format.c b/src/vat/api_format.c
index 28b227b4..495b660e 100644
--- a/src/vat/api_format.c
+++ b/src/vat/api_format.c
@@ -1037,7 +1037,7 @@ vl_api_cli_reply_t_handler (vl_api_cli_reply_t * mp)
   i32 retval = ntohl (mp->retval);
 
   vam->retval = retval;
-  vam->shmem_result = (u8 *) mp->reply_in_shmem;
+  vam->shmem_result = uword_to_pointer (mp->reply_in_shmem, u8 *);
   vam->result_ready = 1;
 }
 
@@ -1058,7 +1058,7 @@ vl_api_cli_reply_t_handler_json (vl_api_cli_reply_t * mp)
   pthread_mutex_lock (&am->vlib_rp->mutex);
   oldheap = svm_push_data_heap (am->vlib_rp);
 
-  reply = (u8 *) (mp->reply_in_shmem);
+  reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
   vec_free (reply);
 
   svm_pop_heap (oldheap);
@@ -2405,7 +2405,7 @@ static void vl_api_get_node_graph_reply_t_handler
   if (retval != 0)
     return;
 
-  reply = (u8 *) (mp->reply_in_shmem);
+  reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
   pvt_copy = vec_dup (reply);
 
   /* Toss the shared-memory original... */
@@ -2456,7 +2456,7 @@ static void vl_api_get_node_graph_reply_t_handler_json
   vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
   vat_json_object_add_uint (&node, "reply_in_shmem", mp->reply_in_shmem);
 
-  reply = (u8 *) (mp->reply_in_shmem);
+  reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
 
   /* Toss the shared-memory original... */
   pthread_mutex_lock (&am->vlib_rp->mutex);
@@ -4959,7 +4959,7 @@ exec (vat_main_t * vam)
   svm_pop_heap (oldheap);
   pthread_mutex_unlock (&am->vlib_rp->mutex);
 
-  mp->cmd_in_shmem = (u64) cmd;
+  mp->cmd_in_shmem = pointer_to_uword (cmd);
   S (mp);
   timeout = vat_time_now (vam) + 10.0;
 
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 4a111f8d..9ccfd3a2 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -1125,7 +1125,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
 
 VLIB_EARLY_CONFIG_FUNCTION (cpu_config, "cpu");
 
-#if !defined (__x86_64__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__)
+#if !defined (__x86_64__) && !defined (__i386__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__)
 void
 __sync_fetch_and_add_8 (void)
 {
diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c
index d48a4fa1..a162d6bb 100644
--- a/src/vlibmemory/memory_client.c
+++ b/src/vlibmemory/memory_client.c
@@ -137,7 +137,7 @@ vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
   am->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword));
 
   /* Recreate the vnet-side API message handler table */
-  tblv = (u8 *) mp->message_table;
+  tblv = uword_to_pointer (mp->message_table, u8 *);
   serialize_open_vector (sm, tblv);
   unserialize_integer (sm, &nmsgs, sizeof (u32));
 
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 29a5c2c2..acba8b3f 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -216,7 +216,8 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
      am->shmem_hdr->application_restarts);
   rp->context = mp->context;
   rp->response = ntohl (rv);
-  rp->message_table = (u64) am->serialized_message_table_in_shmem;
+  rp->message_table =
+    pointer_to_uword (am->serialized_message_table_in_shmem);
 
   vl_msg_api_send_shmem (q, (u8 *) & rp);
 }
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index acc7bf82..6ccc0d87 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -719,7 +719,7 @@ vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui,
     }
   if (is_host_address)
     {
-      addr = (u64) map_user_mem (vui, (uword) addr);
+      addr = pointer_to_uword (map_user_mem (vui, (uword) addr));
     }
   if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
     {
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index ad44baa1..f74b0cfe 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -247,7 +247,7 @@ vnet_application_attach (vnet_app_attach_args_t * a)
 			      a->session_cb_vft)))
     return rv;
 
-  a->app_event_queue_address = (u64) app->event_queue;
+  a->app_event_queue_address = pointer_to_uword (app->event_queue);
   sm = segment_manager_get (app->first_segment_manager);
   segment_manager_get_segment_info (sm->segment_indices[0],
 				    &seg_name, &a->segment_size);
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 5a02a08e..8266922c 100755
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -102,9 +102,9 @@ send_session_accept_callback (stream_session_t * s)
   tc = tp_vft->get_connection (s->connection_index, s->thread_index);
   mp->listener_handle = listen_session_get_handle (listener);
   mp->handle = stream_session_handle (s);
-  mp->server_rx_fifo = (u64) s->server_rx_fifo;
-  mp->server_tx_fifo = (u64) s->server_tx_fifo;
-  mp->vpp_event_queue_address = (u64) vpp_queue;
+  mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo);
+  mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo);
+  mp->vpp_event_queue_address = pointer_to_uword (vpp_queue);
   mp->port = tc->rmt_port;
   mp->is_ip4 = tc->is_ip4;
   clib_memcpy (&mp->ip, &tc->rmt_ip, sizeof (tc->rmt_ip));
@@ -172,10 +172,10 @@ send_session_connected_callback (u32 app_index, u32 api_context,
   if (!is_fail)
     {
       vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
-      mp->server_rx_fifo = (u64) s->server_rx_fifo;
-      mp->server_tx_fifo = (u64) s->server_tx_fifo;
+      mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo);
+      mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo);
       mp->handle = stream_session_handle (s);
-      mp->vpp_event_queue_address = (u64) vpp_queue;
+      mp->vpp_event_queue_address = pointer_to_uword (vpp_queue);
       mp->retval = 0;
     }
   else
@@ -225,7 +225,7 @@ redirect_connect_callback (u32 server_api_client_index, void *mp_arg)
     }
 
   /* Tell the server the client's API queue address, so it can reply */
-  mp->client_queue_address = (u64) client_q;
+  mp->client_queue_address = pointer_to_uword (client_q);
   app = application_lookup (mp->client_index);
   if (!app)
     {
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index 32d69a96..6f890874 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -274,11 +274,12 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
       return;
     }
 
-  tm->our_event_queue = (unix_shared_memory_queue_t *)
-    mp->vpp_event_queue_address;
-
-  tm->vpp_event_queue = (unix_shared_memory_queue_t *)
-    mp->vpp_event_queue_address;
+  tm->our_event_queue =
+    uword_to_pointer (mp->vpp_event_queue_address,
+		      unix_shared_memory_queue_t *);
+  tm->vpp_event_queue =
+    uword_to_pointer (mp->vpp_event_queue_address,
+		      unix_shared_memory_queue_t *);
 
   /*
    * Setup session
@@ -288,9 +289,11 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
   session_index = session - tm->sessions;
   session->bytes_to_receive = session->bytes_to_send = tm->bytes_to_send;
 
-  session->server_rx_fifo = (svm_fifo_t *) mp->server_rx_fifo;
+  session->server_rx_fifo =
+    uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
   session->server_rx_fifo->client_session_index = session_index;
-  session->server_tx_fifo = (svm_fifo_t *) mp->server_tx_fifo;
+  session->server_tx_fifo =
+    uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
   session->server_tx_fifo->client_session_index = session_index;
   session->vpp_session_handle = mp->handle;
 
@@ -321,7 +324,7 @@ create_api_loopback (tclient_main_t * tm)
   memset (mp, 0, sizeof (*mp));
   mp->_vl_msg_id = VL_API_MEMCLNT_CREATE;
   mp->context = 0xFEEDFACE;
-  mp->input_queue = (u64) tm->vl_input_queue;
+  mp->input_queue = pointer_to_uword (tm->vl_input_queue);
   strncpy ((char *) mp->name, "tcp_tester", sizeof (mp->name) - 1);
 
   vl_api_memclnt_create_t_handler (mp);
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index 34682699..621ce02a 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -244,7 +244,7 @@ create_api_loopback (vlib_main_t * vm)
   memset (mp, 0, sizeof (*mp));
   mp->_vl_msg_id = VL_API_MEMCLNT_CREATE;
   mp->context = 0xFEEDFACE;
-  mp->input_queue = (u64) bsm->vl_input_queue;
+  mp->input_queue = pointer_to_uword (bsm->vl_input_queue);
   strncpy ((char *) mp->name, "tcp_test_server", sizeof (mp->name) - 1);
 
   vl_api_memclnt_create_t_handler (mp);
diff --git a/src/vppinfra/mheap.c b/src/vppinfra/mheap.c
index b8828f9e..192732db 100644
--- a/src/vppinfra/mheap.c
+++ b/src/vppinfra/mheap.c
@@ -304,7 +304,7 @@ mheap_small_object_cache_mask (mheap_small_object_cache_t * c, uword bin)
   uword mask;
 
 /* $$$$ ELIOT FIXME: add Altivec version of this routine */
-#if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__)
+#if !defined (CLIB_HAVE_VEC128) || defined (__ALTIVEC__) || defined (__i386__)
   mask = 0;
 #else
   u8x16 b = u8x16_splat (bin);
-- 
cgit 1.2.3-korg


From f55f9b851f59264d737d92c6277a87588c565d24 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Wed, 10 May 2017 21:06:28 +0200
Subject: completelly deprecate os_get_cpu_number, replace new occurences

Change-Id: I82c663bc0866c6c68ba354104b0bb059387f4b9d
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/plugins/flowperpkt/l2_node.c       | 20 ++++++++++----------
 src/plugins/flowperpkt/node.c          | 20 ++++++++++----------
 src/plugins/snat/in2out.c              |  2 +-
 src/plugins/snat/out2in.c              |  2 +-
 src/vlib/main.h                        |  2 +-
 src/vlib/threads.c                     | 12 ++----------
 src/vlib/threads.h                     |  3 +--
 src/vlib/unix/main.c                   |  2 +-
 src/vlibmemory/memory_vlib.c           |  2 +-
 src/vnet/dpo/interface_dpo.c           |  8 ++++----
 src/vnet/lisp-gpe/lisp_gpe_adjacency.c |  2 +-
 src/vppinfra/bihash_template.c         | 16 ++++++++--------
 src/vppinfra/lock.h                    |  6 +++---
 src/vppinfra/mem.h                     |  6 +++---
 src/vppinfra/mhash.c                   |  2 +-
 src/vppinfra/mhash.h                   |  2 +-
 src/vppinfra/mheap.c                   |  4 ++--
 src/vppinfra/os.h                      | 20 ++++++++++++++++++--
 src/vppinfra/smp.c                     |  2 +-
 src/vppinfra/unix-misc.c               | 19 +++++++------------
 20 files changed, 77 insertions(+), 75 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/plugins/flowperpkt/l2_node.c b/src/plugins/flowperpkt/l2_node.c
index fdaf81d1..db80e990 100644
--- a/src/plugins/flowperpkt/l2_node.c
+++ b/src/plugins/flowperpkt/l2_node.c
@@ -102,7 +102,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 		       u8 * src_mac, u8 * dst_mac,
 		       u16 ethertype, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->thread_index;
+  u32 my_thread_index = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
@@ -116,7 +116,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
   vlib_buffer_free_list_t *fl;
 
   /* Find or allocate a buffer */
-  b0 = fm->l2_buffers_per_worker[my_cpu_number];
+  b0 = fm->l2_buffers_per_worker[my_thread_index];
 
   /* Need to allocate a buffer? */
   if (PREDICT_FALSE (b0 == 0))
@@ -130,7 +130,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 	return;
 
       /* Initialize the buffer */
-      b0 = fm->l2_buffers_per_worker[my_cpu_number] =
+      b0 = fm->l2_buffers_per_worker[my_thread_index] =
 	vlib_get_buffer (vm, bi0);
       fl =
 	vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
@@ -142,16 +142,16 @@ add_to_flow_record_l2 (vlib_main_t * vm,
     {
       /* use the current buffer */
       bi0 = vlib_get_buffer_index (vm, b0);
-      offset = fm->l2_next_record_offset_per_worker[my_cpu_number];
+      offset = fm->l2_next_record_offset_per_worker[my_thread_index];
     }
 
   /* Find or allocate a frame */
-  f = fm->l2_frames_per_worker[my_cpu_number];
+  f = fm->l2_frames_per_worker[my_thread_index];
   if (PREDICT_FALSE (f == 0))
     {
       u32 *to_next;
       f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
-      fm->l2_frames_per_worker[my_cpu_number] = f;
+      fm->l2_frames_per_worker[my_thread_index] = f;
 
       /* Enqueue the buffer */
       to_next = vlib_frame_vector_args (f);
@@ -299,13 +299,13 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 	}
 
       vlib_put_frame_to_node (vm, ip4_lookup_node.index,
-			      fm->l2_frames_per_worker[my_cpu_number]);
-      fm->l2_frames_per_worker[my_cpu_number] = 0;
-      fm->l2_buffers_per_worker[my_cpu_number] = 0;
+			      fm->l2_frames_per_worker[my_thread_index]);
+      fm->l2_frames_per_worker[my_thread_index] = 0;
+      fm->l2_buffers_per_worker[my_thread_index] = 0;
       offset = 0;
     }
 
-  fm->l2_next_record_offset_per_worker[my_cpu_number] = offset;
+  fm->l2_next_record_offset_per_worker[my_thread_index] = offset;
 }
 
 void
diff --git a/src/plugins/flowperpkt/node.c b/src/plugins/flowperpkt/node.c
index 0277682d..9bac4166 100644
--- a/src/plugins/flowperpkt/node.c
+++ b/src/plugins/flowperpkt/node.c
@@ -101,7 +101,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 			 u32 src_address, u32 dst_address,
 			 u8 tos, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->thread_index;
+  u32 my_thread_index = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
@@ -115,7 +115,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
   vlib_buffer_free_list_t *fl;
 
   /* Find or allocate a buffer */
-  b0 = fm->ipv4_buffers_per_worker[my_cpu_number];
+  b0 = fm->ipv4_buffers_per_worker[my_thread_index];
 
   /* Need to allocate a buffer? */
   if (PREDICT_FALSE (b0 == 0))
@@ -129,7 +129,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 	return;
 
       /* Initialize the buffer */
-      b0 = fm->ipv4_buffers_per_worker[my_cpu_number] =
+      b0 = fm->ipv4_buffers_per_worker[my_thread_index] =
 	vlib_get_buffer (vm, bi0);
       fl =
 	vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
@@ -141,16 +141,16 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
     {
       /* use the current buffer */
       bi0 = vlib_get_buffer_index (vm, b0);
-      offset = fm->ipv4_next_record_offset_per_worker[my_cpu_number];
+      offset = fm->ipv4_next_record_offset_per_worker[my_thread_index];
     }
 
   /* Find or allocate a frame */
-  f = fm->ipv4_frames_per_worker[my_cpu_number];
+  f = fm->ipv4_frames_per_worker[my_thread_index];
   if (PREDICT_FALSE (f == 0))
     {
       u32 *to_next;
       f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
-      fm->ipv4_frames_per_worker[my_cpu_number] = f;
+      fm->ipv4_frames_per_worker[my_thread_index] = f;
 
       /* Enqueue the buffer */
       to_next = vlib_frame_vector_args (f);
@@ -300,13 +300,13 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 	}
 
       vlib_put_frame_to_node (vm, ip4_lookup_node.index,
-			      fm->ipv4_frames_per_worker[my_cpu_number]);
-      fm->ipv4_frames_per_worker[my_cpu_number] = 0;
-      fm->ipv4_buffers_per_worker[my_cpu_number] = 0;
+			      fm->ipv4_frames_per_worker[my_thread_index]);
+      fm->ipv4_frames_per_worker[my_thread_index] = 0;
+      fm->ipv4_buffers_per_worker[my_thread_index] = 0;
       offset = 0;
     }
 
-  fm->ipv4_next_record_offset_per_worker[my_cpu_number] = offset;
+  fm->ipv4_next_record_offset_per_worker[my_thread_index] = offset;
 }
 
 void
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c
index f7d29c69..bc86a7a4 100644
--- a/src/plugins/snat/in2out.c
+++ b/src/plugins/snat/in2out.c
@@ -1514,7 +1514,7 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
   u32 now = (u32) vlib_time_now (vm);
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c
index 3d7b106a..824406ab 100644
--- a/src/plugins/snat/out2in.c
+++ b/src/plugins/snat/out2in.c
@@ -1168,7 +1168,7 @@ snat_det_out2in_node_fn (vlib_main_t * vm,
   snat_out2in_next_t next_index;
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 329bf073..0e8026d1 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -320,7 +320,7 @@ always_inline void vlib_set_queue_signal_callback
 /* Main routine. */
 int vlib_main (vlib_main_t * vm, unformat_input_t * input);
 
-/* Thread stacks, for os_get_cpu_number */
+/* Thread stacks, for os_get_thread_index */
 extern u8 **vlib_thread_stacks;
 
 /* Number of thread stacks that the application needs */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 9ccfd3a2..b7bc9e26 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,16 +35,8 @@ vl (void *p)
 vlib_worker_thread_t *vlib_worker_threads;
 vlib_thread_main_t vlib_thread_main;
 
-__thread uword vlib_thread_index = 0;
-
-uword
-os_get_cpu_number (void)
-{
-  return vlib_thread_index;
-}
-
 uword
-os_get_ncpus (void)
+os_get_nthreads (void)
 {
   u32 len;
 
@@ -467,7 +459,7 @@ vlib_worker_thread_bootstrap_fn (void *arg)
   w->lwp = syscall (SYS_gettid);
   w->thread_id = pthread_self ();
 
-  vlib_thread_index = w - vlib_worker_threads;
+  __os_thread_index = w - vlib_worker_threads;
 
   rv = (void *) clib_calljmp
     ((uword (*)(uword)) w->thread_function,
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 101d3d4a..17d35a24 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -181,11 +181,10 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 
-extern __thread uword vlib_thread_index;
 static_always_inline uword
 vlib_get_thread_index (void)
 {
-  return vlib_thread_index;
+  return __os_thread_index;
 }
 
 always_inline void
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index db5ddd64..103576db 100644
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -565,7 +565,7 @@ vlib_unix_main (int argc, char *argv[])
 
   vlib_thread_stack_init (0);
 
-  vlib_thread_index = 0;
+  __os_thread_index = 0;
 
   i = clib_calljmp (thread0, (uword) vm,
 		    (void *) (vlib_thread_stacks[0] +
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index acba8b3f..e5d88732 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1333,7 +1333,7 @@ vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
   unix_shared_memory_queue_t *q;
 
   /* Main thread: call the function directly */
-  if (os_get_cpu_number () == 0)
+  if (vlib_get_thread_index () == 0)
     {
       vlib_main_t *vm = vlib_get_main ();
       void (*call_fp) (void *);
diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c
index 50ca756f..8d700c23 100644
--- a/src/vnet/dpo/interface_dpo.c
+++ b/src/vnet/dpo/interface_dpo.c
@@ -231,7 +231,7 @@ interface_dpo_inline (vlib_main_t * vm,
                       vlib_frame_t * from_frame)
 {
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index ();
     vnet_interface_main_t *im;
 
     im = &vnet_get_main ()->interface_main;
@@ -274,13 +274,13 @@ interface_dpo_inline (vlib_main_t * vm,
 
             vlib_increment_combined_counter (im->combined_sw_if_counters
                                              + VNET_INTERFACE_COUNTER_RX,
-                                             cpu_index,
+                                             thread_index,
                                              ido0->ido_sw_if_index,
                                              1,
                                              vlib_buffer_length_in_chain (vm, b0));
             vlib_increment_combined_counter (im->combined_sw_if_counters
                                              + VNET_INTERFACE_COUNTER_RX,
-                                             cpu_index,
+                                             thread_index,
                                              ido1->ido_sw_if_index,
                                              1,
                                              vlib_buffer_length_in_chain (vm, b1));
@@ -331,7 +331,7 @@ interface_dpo_inline (vlib_main_t * vm,
             /* Bump the interface's RX coutners */
             vlib_increment_combined_counter (im->combined_sw_if_counters
                                              + VNET_INTERFACE_COUNTER_RX,
-                                             cpu_index,
+                                             thread_index,
                                              ido0->ido_sw_if_index,
                                              1,
                                              vlib_buffer_length_in_chain (vm, b0));
diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
index d5f3a28a..7db1c9bb 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
@@ -302,7 +302,7 @@ lisp_gpe_increment_stats_counters (lisp_cp_main_t * lcm, ip_adjacency_t * adj,
 
   /* compute payload length starting after GPE */
   u32 bytes = b->current_length - (lisp_data - b->data - b->current_data);
-  vlib_increment_combined_counter (&lgm->counters, os_get_cpu_number (),
+  vlib_increment_combined_counter (&lgm->counters, vlib_get_thread_index (),
 				   p[0], 1, bytes);
 }
 
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
index d8b97b5f..51fadeb8 100644
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -96,12 +96,12 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
   clib_bihash_bucket_t working_bucket __attribute__ ((aligned (8)));
   void *oldheap;
   BVT (clib_bihash_value) * working_copy;
-  u32 cpu_number = os_get_cpu_number ();
+  u32 thread_index = os_get_thread_index ();
 
-  if (cpu_number >= vec_len (h->working_copies))
+  if (thread_index >= vec_len (h->working_copies))
     {
       oldheap = clib_mem_set_heap (h->mheap);
-      vec_validate (h->working_copies, cpu_number);
+      vec_validate (h->working_copies, thread_index);
       clib_mem_set_heap (oldheap);
     }
 
@@ -110,7 +110,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
    * updates from multiple threads will not result in sporadic, spurious
    * lookup failures.
    */
-  working_copy = h->working_copies[cpu_number];
+  working_copy = h->working_copies[thread_index];
 
   h->saved_bucket.as_u64 = b->as_u64;
   oldheap = clib_mem_set_heap (h->mheap);
@@ -119,7 +119,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
     {
       vec_validate_aligned (working_copy, (1 << b->log2_pages) - 1,
 			    sizeof (u64));
-      h->working_copies[cpu_number] = working_copy;
+      h->working_copies[thread_index] = working_copy;
     }
 
   _vec_len (working_copy) = 1 << b->log2_pages;
@@ -132,7 +132,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
   working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy);
   CLIB_MEMORY_BARRIER ();
   b->as_u64 = working_bucket.as_u64;
-  h->working_copies[cpu_number] = working_copy;
+  h->working_copies[thread_index] = working_copy;
 }
 
 static
@@ -233,7 +233,7 @@ int BV (clib_bihash_add_del)
   int i, limit;
   u64 hash, new_hash;
   u32 new_log2_pages;
-  u32 cpu_number = os_get_cpu_number ();
+  u32 thread_index = os_get_thread_index ();
   int mark_bucket_linear;
   int resplit_once;
 
@@ -323,7 +323,7 @@ int BV (clib_bihash_add_del)
   new_log2_pages = h->saved_bucket.log2_pages + 1;
   mark_bucket_linear = 0;
 
-  working_copy = h->working_copies[cpu_number];
+  working_copy = h->working_copies[thread_index];
   resplit_once = 0;
 
   new_v = BV (split_and_rehash) (h, working_copy, new_log2_pages);
diff --git a/src/vppinfra/lock.h b/src/vppinfra/lock.h
index c60ff414..0cd2b4fe 100644
--- a/src/vppinfra/lock.h
+++ b/src/vppinfra/lock.h
@@ -24,7 +24,7 @@ typedef struct
   u32 lock;
 #if CLIB_DEBUG > 0
   pid_t pid;
-  uword cpu_index;
+  uword thread_index;
   void *frame_address;
 #endif
 } *clib_spinlock_t;
@@ -57,7 +57,7 @@ clib_spinlock_lock (clib_spinlock_t * p)
 #if CLIB_DEBUG > 0
   (*p)->frame_address = __builtin_frame_address (0);
   (*p)->pid = getpid ();
-  (*p)->cpu_index = os_get_cpu_number ();
+  (*p)->thread_index = os_get_thread_index ();
 #endif
 }
 
@@ -75,7 +75,7 @@ clib_spinlock_unlock (clib_spinlock_t * p)
 #if CLIB_DEBUG > 0
   (*p)->frame_address = 0;
   (*p)->pid = 0;
-  (*p)->cpu_index = 0;
+  (*p)->thread_index = 0;
 #endif
 }
 
diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h
index 1260eab2..63c5ac16 100644
--- a/src/vppinfra/mem.h
+++ b/src/vppinfra/mem.h
@@ -54,14 +54,14 @@ extern void *clib_per_cpu_mheaps[CLIB_MAX_MHEAPS];
 always_inline void *
 clib_mem_get_per_cpu_heap (void)
 {
-  int cpu = os_get_cpu_number ();
+  int cpu = os_get_thread_index ();
   return clib_per_cpu_mheaps[cpu];
 }
 
 always_inline void *
 clib_mem_set_per_cpu_heap (u8 * new_heap)
 {
-  int cpu = os_get_cpu_number ();
+  int cpu = os_get_thread_index ();
   void *old = clib_per_cpu_mheaps[cpu];
   clib_per_cpu_mheaps[cpu] = new_heap;
   return old;
@@ -83,7 +83,7 @@ clib_mem_alloc_aligned_at_offset (uword size, uword align, uword align_offset,
 	align_offset = align;
     }
 
-  cpu = os_get_cpu_number ();
+  cpu = os_get_thread_index ();
   heap = clib_per_cpu_mheaps[cpu];
   heap = mheap_get_aligned (heap, size, align, align_offset, &offset);
   clib_per_cpu_mheaps[cpu] = heap;
diff --git a/src/vppinfra/mhash.c b/src/vppinfra/mhash.c
index c917e164..00b67c49 100644
--- a/src/vppinfra/mhash.c
+++ b/src/vppinfra/mhash.c
@@ -226,7 +226,7 @@ static uword
 mhash_set_tmp_key (mhash_t * h, const void *key)
 {
   u8 *key_tmp;
-  int my_cpu = os_get_cpu_number ();
+  int my_cpu = os_get_thread_index ();
 
   vec_validate (h->key_tmps, my_cpu);
   key_tmp = h->key_tmps[my_cpu];
diff --git a/src/vppinfra/mhash.h b/src/vppinfra/mhash.h
index 102adf4e..7eb19183 100644
--- a/src/vppinfra/mhash.h
+++ b/src/vppinfra/mhash.h
@@ -93,7 +93,7 @@ mhash_key_to_mem (mhash_t * h, uword key)
     {
       u8 *key_tmp;
 
-      int my_cpu = os_get_cpu_number ();
+      int my_cpu = os_get_thread_index ();
       vec_validate (h->key_tmps, my_cpu);
       key_tmp = h->key_tmps[my_cpu];
       return key_tmp;
diff --git a/src/vppinfra/mheap.c b/src/vppinfra/mheap.c
index 192732db..d4010ceb 100644
--- a/src/vppinfra/mheap.c
+++ b/src/vppinfra/mheap.c
@@ -56,7 +56,7 @@ mheap_maybe_lock (void *v)
   mheap_t *h = mheap_header (v);
   if (v && (h->flags & MHEAP_FLAG_THREAD_SAFE))
     {
-      u32 my_cpu = os_get_cpu_number ();
+      u32 my_cpu = os_get_thread_index ();
       if (h->owner_cpu == my_cpu)
 	{
 	  h->recursion_count++;
@@ -77,7 +77,7 @@ mheap_maybe_unlock (void *v)
   mheap_t *h = mheap_header (v);
   if (v && h->flags & MHEAP_FLAG_THREAD_SAFE)
     {
-      ASSERT (os_get_cpu_number () == h->owner_cpu);
+      ASSERT (os_get_thread_index () == h->owner_cpu);
       if (--h->recursion_count == 0)
 	{
 	  h->owner_cpu = ~0;
diff --git a/src/vppinfra/os.h b/src/vppinfra/os.h
index a5c74f8c..33300716 100644
--- a/src/vppinfra/os.h
+++ b/src/vppinfra/os.h
@@ -56,8 +56,24 @@ void os_out_of_memory (void);
 /* Estimate, measure or divine CPU timestamp clock frequency. */
 f64 os_cpu_clock_frequency (void);
 
-uword os_get_cpu_number (void);
-uword os_get_ncpus (void);
+extern __thread uword __os_thread_index;
+
+static_always_inline uword
+os_get_thread_index (void)
+{
+  return __os_thread_index;
+}
+
+static_always_inline uword
+os_get_cpu_number (void) __attribute__ ((deprecated));
+
+static_always_inline uword
+os_get_cpu_number (void)
+{
+  return __os_thread_index;
+}
+
+uword os_get_nthreads (void);
 
 #include <vppinfra/smp.h>
 
diff --git a/src/vppinfra/smp.c b/src/vppinfra/smp.c
index 8ac19960..f603283e 100644
--- a/src/vppinfra/smp.c
+++ b/src/vppinfra/smp.c
@@ -53,7 +53,7 @@ allocate_per_cpu_mheap (uword cpu)
   void *heap;
   uword vm_size, stack_size, mheap_flags;
 
-  ASSERT (os_get_cpu_number () == cpu);
+  ASSERT (os_get_thread_index () == cpu);
 
   vm_size = (uword) 1 << m->log2_n_per_cpu_vm_bytes;
   stack_size = (uword) 1 << m->log2_n_per_cpu_stack_bytes;
diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c
index 2928369d..361015b4 100644
--- a/src/vppinfra/unix-misc.c
+++ b/src/vppinfra/unix-misc.c
@@ -45,6 +45,8 @@
 #include <fcntl.h>
 #include <stdio.h>		/* for sprintf */
 
+__thread uword __os_thread_index = 0;
+
 clib_error_t *
 unix_file_n_bytes (char *file, uword * result)
 {
@@ -188,14 +190,14 @@ void os_puts (u8 * string, uword string_length, uword is_error)
 void
 os_puts (u8 * string, uword string_length, uword is_error)
 {
-  int cpu = os_get_cpu_number ();
-  int ncpus = os_get_ncpus ();
+  int cpu = os_get_thread_index ();
+  int nthreads = os_get_nthreads ();
   char buf[64];
   int fd = is_error ? 2 : 1;
   struct iovec iovs[2];
   int n_iovs = 0;
 
-  if (ncpus > 1)
+  if (nthreads > 1)
     {
       snprintf (buf, sizeof (buf), "%d: ", cpu);
 
@@ -219,16 +221,9 @@ os_out_of_memory (void)
   os_panic ();
 }
 
-uword os_get_cpu_number (void) __attribute__ ((weak));
-uword
-os_get_cpu_number (void)
-{
-  return 0;
-}
-
-uword os_get_ncpus (void) __attribute__ ((weak));
+uword os_get_nthreads (void) __attribute__ ((weak));
 uword
-os_get_ncpus (void)
+os_get_nthreads (void)
 {
   return 1;
 }
-- 
cgit 1.2.3-korg


From 10d8cc6bf92851fcaec4a6b4c6d3554dc1eb2386 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Tue, 30 May 2017 09:30:07 -0400
Subject: Improve fifo allocator performance

- add option to preallocate fifos in a segment
- track active fifos with doubly linked list instead of vector
- update udp redirect test code to read fifo pointers from API call
  instead of digging them up from fifo segment header
- input-node based active-open session generator

Change-Id: I804b81e99d95f8690d17e12660c6645995e28a9a
Signed-off-by: Dave Barach <dave@barachs.net>
Signed-off-by: Florin Coras <fcoras@cisco.com>
Signed-off-by: Dave Barach <dbarach@cisco.com>
---
 src/svm/svm_fifo.h                       |   5 +-
 src/svm/svm_fifo_segment.c               | 146 ++++++++++++--
 src/svm/svm_fifo_segment.h               |  30 ++-
 src/svm/test_svm_fifo1.c                 |  23 ++-
 src/uri/uri_tcp_test.c                   |   1 +
 src/uri/uri_udp_test.c                   |  39 ++--
 src/vlibapi/api.h                        |   3 +
 src/vlibmemory/memory_shared.c           |  22 ++-
 src/vlibmemory/memory_vlib.c             |  26 +++
 src/vnet/session/application.c           |  18 +-
 src/vnet/session/application_interface.h |   2 +
 src/vnet/session/segment_manager.c       |  79 ++++----
 src/vnet/session/segment_manager.h       |   3 +
 src/vnet/session/session.c               |  38 +++-
 src/vnet/session/session.h               |   8 +-
 src/vnet/session/session_api.c           |   2 +-
 src/vnet/tcp/builtin_client.c            | 330 +++++++++++++++++++------------
 src/vnet/tcp/builtin_client.h            |  13 +-
 src/vnet/tcp/builtin_http_server.c       |   1 +
 src/vnet/tcp/builtin_server.c            |   9 +-
 src/vnet/udp/builtin_server.c            |   1 +
 21 files changed, 534 insertions(+), 265 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h
index 69369163..9cb93ff4 100644
--- a/src/svm/svm_fifo.h
+++ b/src/svm/svm_fifo.h
@@ -38,7 +38,7 @@ format_function_t format_ooo_list;
 
 #define OOO_SEGMENT_INVALID_INDEX ((u32)~0)
 
-typedef struct
+typedef struct _svm_fifo
 {
   volatile u32 cursize;		/**< current fifo size */
   u32 nitems;
@@ -62,7 +62,8 @@ typedef struct
   ooo_segment_t *ooo_segments;	/**< Pool of ooo segments */
   u32 ooos_list_head;		/**< Head of out-of-order linked-list */
   u32 ooos_newest;		/**< Last segment to have been updated */
-
+  struct _svm_fifo *next;	/**< next in freelist/active chain */
+  struct _svm_fifo *prev;	/**< prev in active chain */
     CLIB_CACHE_LINE_ALIGN_MARK (data);
 } svm_fifo_t;
 
diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c
index 281fae27..eef2168c 100644
--- a/src/svm/svm_fifo_segment.c
+++ b/src/svm/svm_fifo_segment.c
@@ -17,6 +17,71 @@
 
 svm_fifo_segment_main_t svm_fifo_segment_main;
 
+static void
+preallocate_fifo_pairs (svm_fifo_segment_header_t * fsh,
+			svm_fifo_segment_create_args_t * a)
+{
+  u32 rx_fifo_size, tx_fifo_size;
+  svm_fifo_t *f;
+  u8 *rx_fifo_space, *tx_fifo_space;
+  int i;
+
+  /* Parameter check */
+  if (a->rx_fifo_size == 0 || a->tx_fifo_size == 0
+      || a->preallocated_fifo_pairs == 0)
+    return;
+
+  /* Calculate space requirements */
+  rx_fifo_size = (sizeof (*f) + a->rx_fifo_size) * a->preallocated_fifo_pairs;
+  tx_fifo_size = (sizeof (*f) + a->tx_fifo_size) * a->preallocated_fifo_pairs;
+
+  /* Allocate rx fifo space. May fail. */
+  rx_fifo_space = clib_mem_alloc_aligned_at_offset
+    (rx_fifo_size, CLIB_CACHE_LINE_BYTES, 0 /* align_offset */ ,
+     0 /* os_out_of_memory */ );
+
+  /* Same for TX */
+  tx_fifo_space = clib_mem_alloc_aligned_at_offset
+    (tx_fifo_size, CLIB_CACHE_LINE_BYTES, 0 /* align_offset */ ,
+     0 /* os_out_of_memory */ );
+
+  /* Make sure it worked. Clean up if it didn't... */
+  if (rx_fifo_space == 0 || tx_fifo_space == 0)
+    {
+      if (rx_fifo_space)
+	clib_mem_free (rx_fifo_space);
+      else
+	clib_warning ("rx fifo preallocation failure: size %d npairs %d",
+		      a->rx_fifo_size, a->preallocated_fifo_pairs);
+
+      if (tx_fifo_space)
+	clib_mem_free (tx_fifo_space);
+      else
+	clib_warning ("tx fifo preallocation failure: size %d nfifos %d",
+		      a->tx_fifo_size, a->preallocated_fifo_pairs);
+      return;
+    }
+
+  /* Carve rx fifo space */
+  f = (svm_fifo_t *) rx_fifo_space;
+  for (i = 0; i < a->preallocated_fifo_pairs; i++)
+    {
+      f->next = fsh->free_fifos[FIFO_SEGMENT_RX_FREELIST];
+      fsh->free_fifos[FIFO_SEGMENT_RX_FREELIST] = f;
+      rx_fifo_space += sizeof (*f) + a->rx_fifo_size;
+      f = (svm_fifo_t *) rx_fifo_space;
+    }
+  /* Carve tx fifo space */
+  f = (svm_fifo_t *) tx_fifo_space;
+  for (i = 0; i < a->preallocated_fifo_pairs; i++)
+    {
+      f->next = fsh->free_fifos[FIFO_SEGMENT_TX_FREELIST];
+      fsh->free_fifos[FIFO_SEGMENT_TX_FREELIST] = f;
+      tx_fifo_space += sizeof (*f) + a->tx_fifo_size;
+      f = (svm_fifo_t *) tx_fifo_space;
+    }
+}
+
 /** (master) create an svm fifo segment */
 int
 svm_fifo_segment_create (svm_fifo_segment_create_args_t * a)
@@ -59,9 +124,7 @@ svm_fifo_segment_create (svm_fifo_segment_create_args_t * a)
   s->h = fsh;
   fsh->segment_name = format (0, "%s%c", a->segment_name, 0);
 
-  /* Avoid vec_add1(...) failure when adding a fifo, etc. */
-  vec_validate (fsh->fifos, 64);
-  _vec_len (fsh->fifos) = 0;
+  preallocate_fifo_pairs (fsh, a);
 
   ssvm_pop_heap (oldheap);
 
@@ -103,6 +166,8 @@ svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t * a)
   s->h = fsh;
   fsh->segment_name = format (0, "%s%c", a->segment_name, 0);
 
+  preallocate_fifo_pairs (fsh, a);
+
   sh->ready = 1;
   a->new_segment_index = s - sm->segments;
   return (0);
@@ -154,7 +219,8 @@ svm_fifo_segment_delete (svm_fifo_segment_private_t * s)
 
 svm_fifo_t *
 svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
-			     u32 data_size_in_bytes)
+			     u32 data_size_in_bytes,
+			     svm_fifo_segment_freelist_t list_index)
 {
   ssvm_shared_header_t *sh;
   svm_fifo_segment_header_t *fsh;
@@ -167,6 +233,29 @@ svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
   ssvm_lock (sh, 1, 0);
   oldheap = ssvm_push_heap (sh);
 
+  switch (list_index)
+    {
+    case FIFO_SEGMENT_RX_FREELIST:
+    case FIFO_SEGMENT_TX_FREELIST:
+      f = fsh->free_fifos[list_index];
+      if (f)
+	{
+	  fsh->free_fifos[list_index] = f->next;
+	  /* (re)initialize the fifo, as in svm_fifo_create */
+	  memset (f, 0, sizeof (*f));
+	  f->nitems = data_size_in_bytes;
+	  f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX;
+	  goto found;
+	}
+      /* FALLTHROUGH */
+    case FIFO_SEGMENT_FREELIST_NONE:
+      break;
+
+    default:
+      clib_warning ("ignore bogus freelist %d", list_index);
+      break;
+    }
+
   /* Note: this can fail, in which case: create another segment */
   f = svm_fifo_create (data_size_in_bytes);
   if (PREDICT_FALSE (f == 0))
@@ -176,37 +265,62 @@ svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
       return (0);
     }
 
-  vec_add1 (fsh->fifos, f);
+found:
+  /* If rx_freelist add to active fifos list. When cleaning up segment,
+   * we need a list of active sessions that should be disconnected. Since
+   * both rx and tx fifos keep pointers to the session, it's enough to track
+   * only one. */
+  if (list_index == FIFO_SEGMENT_RX_FREELIST)
+    {
+      if (fsh->fifos)
+	{
+	  fsh->fifos->prev = f;
+	  f->next = fsh->fifos;
+	}
+      fsh->fifos = f;
+    }
+
   ssvm_pop_heap (oldheap);
   ssvm_unlock (sh);
   return (f);
 }
 
 void
-svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f)
+svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f,
+			    svm_fifo_segment_freelist_t list_index)
 {
   ssvm_shared_header_t *sh;
   svm_fifo_segment_header_t *fsh;
   void *oldheap;
-  int i;
 
   sh = s->ssvm.sh;
   fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
 
   ssvm_lock (sh, 1, 0);
   oldheap = ssvm_push_heap (sh);
-  for (i = 0; i < vec_len (fsh->fifos); i++)
+
+  switch (list_index)
     {
-      if (fsh->fifos[i] == f)
-	{
-	  vec_delete (fsh->fifos, 1, i);
-	  goto found;
-	}
+    case FIFO_SEGMENT_RX_FREELIST:
+      /* Remove from active list */
+      if (f->prev)
+	f->prev->next = f->next;
+      if (f->next)
+	f->next->prev = f->prev;
+      /* FALLTHROUGH */
+    case FIFO_SEGMENT_TX_FREELIST:
+      /* Add to free list */
+      f->next = fsh->free_fifos[list_index];
+      fsh->free_fifos[list_index] = f;
+      /* FALLTHROUGH */
+    case FIFO_SEGMENT_FREELIST_NONE:
+      break;
+
+    default:
+      clib_warning ("ignore bogus freelist %d", list_index);
+      break;
     }
-  clib_warning ("fifo 0x%llx not found in fifo table...", f);
 
-found:
-  clib_mem_free (f);
   ssvm_pop_heap (oldheap);
   ssvm_unlock (sh);
 }
diff --git a/src/svm/svm_fifo_segment.h b/src/svm/svm_fifo_segment.h
index 4218013a..31e14db5 100644
--- a/src/svm/svm_fifo_segment.h
+++ b/src/svm/svm_fifo_segment.h
@@ -19,10 +19,19 @@
 #include <svm/ssvm.h>
 #include <vppinfra/lock.h>
 
+typedef enum
+{
+  FIFO_SEGMENT_FREELIST_NONE = -1,
+  FIFO_SEGMENT_RX_FREELIST = 0,
+  FIFO_SEGMENT_TX_FREELIST,
+  FIFO_SEGMENT_N_FREELISTS
+} svm_fifo_segment_freelist_t;
+
 typedef struct
 {
-  volatile svm_fifo_t **fifos;
-  u8 *segment_name;
+  svm_fifo_t *fifos;		/**< Linked list of active RX fifos */
+  u8 *segment_name;		/**< Segment name */
+  svm_fifo_t *free_fifos[FIFO_SEGMENT_N_FREELISTS];	/**< Free lists */
 } svm_fifo_segment_header_t;
 
 typedef struct
@@ -49,6 +58,9 @@ typedef struct
   char *segment_name;
   u32 segment_size;
   u32 new_segment_index;
+  u32 rx_fifo_size;
+  u32 tx_fifo_size;
+  u32 preallocated_fifo_pairs;
 } svm_fifo_segment_create_args_t;
 
 static inline svm_fifo_segment_private_t *
@@ -61,13 +73,13 @@ svm_fifo_get_segment (u32 segment_index)
 static inline u8
 svm_fifo_segment_has_fifos (svm_fifo_segment_private_t * fifo_segment)
 {
-  return vec_len ((svm_fifo_t **) fifo_segment->h->fifos) != 0;
+  return fifo_segment->h->fifos != 0;
 }
 
-static inline svm_fifo_t **
-svm_fifo_segment_get_fifos (svm_fifo_segment_private_t * fifo_segment)
+static inline svm_fifo_t *
+svm_fifo_segment_get_fifo_list (svm_fifo_segment_private_t * fifo_segment)
 {
-  return (svm_fifo_t **) fifo_segment->h->fifos;
+  return fifo_segment->h->fifos;
 }
 
 #define foreach_ssvm_fifo_segment_api_error             \
@@ -87,9 +99,11 @@ int svm_fifo_segment_attach (svm_fifo_segment_create_args_t * a);
 void svm_fifo_segment_delete (svm_fifo_segment_private_t * s);
 
 svm_fifo_t *svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
-					 u32 data_size_in_bytes);
+					 u32 data_size_in_bytes,
+					 svm_fifo_segment_freelist_t index);
 void svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s,
-				 svm_fifo_t * f);
+				 svm_fifo_t * f,
+				 svm_fifo_segment_freelist_t index);
 void svm_fifo_segment_init (u64 baseva, u32 timeout_in_seconds);
 u32 svm_fifo_segment_index (svm_fifo_segment_private_t * s);
 
diff --git a/src/svm/test_svm_fifo1.c b/src/svm/test_svm_fifo1.c
index 398dd6d7..63b4a9b7 100644
--- a/src/svm/test_svm_fifo1.c
+++ b/src/svm/test_svm_fifo1.c
@@ -30,6 +30,9 @@ hello_world (int verbose)
 
   a->segment_name = "fifo-test1";
   a->segment_size = 256 << 10;
+  a->rx_fifo_size = 4096;
+  a->tx_fifo_size = 4096;
+  a->preallocated_fifo_pairs = 4;
 
   rv = svm_fifo_segment_create (a);
 
@@ -38,7 +41,7 @@ hello_world (int verbose)
 
   sp = svm_fifo_get_segment (a->new_segment_index);
 
-  f = svm_fifo_segment_alloc_fifo (sp, 4096);
+  f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
 
   if (f == 0)
     return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed");
@@ -63,7 +66,7 @@ hello_world (int verbose)
   else
     error = clib_error_return (0, "data test FAIL!");
 
-  svm_fifo_segment_free_fifo (sp, f);
+  svm_fifo_segment_free_fifo (sp, f, FIFO_SEGMENT_RX_FREELIST);
 
   return error;
 }
@@ -91,7 +94,7 @@ master (int verbose)
 
   sp = svm_fifo_get_segment (a->new_segment_index);
 
-  f = svm_fifo_segment_alloc_fifo (sp, 4096);
+  f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
 
   if (f == 0)
     return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed");
@@ -129,7 +132,7 @@ mempig (int verbose)
 
   for (i = 0; i < 1000; i++)
     {
-      f = svm_fifo_segment_alloc_fifo (sp, 4096);
+      f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
       if (f == 0)
 	break;
       vec_add1 (flist, f);
@@ -139,14 +142,14 @@ mempig (int verbose)
   for (i = 0; i < vec_len (flist); i++)
     {
       f = flist[i];
-      svm_fifo_segment_free_fifo (sp, f);
+      svm_fifo_segment_free_fifo (sp, f, FIFO_SEGMENT_RX_FREELIST);
     }
 
   _vec_len (flist) = 0;
 
   for (i = 0; i < 1000; i++)
     {
-      f = svm_fifo_segment_alloc_fifo (sp, 4096);
+      f = svm_fifo_segment_alloc_fifo (sp, 4096, FIFO_SEGMENT_RX_FREELIST);
       if (f == 0)
 	break;
       vec_add1 (flist, f);
@@ -156,7 +159,7 @@ mempig (int verbose)
   for (i = 0; i < vec_len (flist); i++)
     {
       f = flist[i];
-      svm_fifo_segment_free_fifo (sp, f);
+      svm_fifo_segment_free_fifo (sp, f, FIFO_SEGMENT_RX_FREELIST);
     }
 
   return 0;
@@ -185,7 +188,7 @@ offset (int verbose)
 
   sp = svm_fifo_get_segment (a->new_segment_index);
 
-  f = svm_fifo_segment_alloc_fifo (sp, 200 << 10);
+  f = svm_fifo_segment_alloc_fifo (sp, 200 << 10, FIFO_SEGMENT_RX_FREELIST);
 
   if (f == 0)
     return clib_error_return (0, "svm_fifo_segment_alloc_fifo failed");
@@ -226,9 +229,9 @@ slave (int verbose)
 {
   svm_fifo_segment_create_args_t _a, *a = &_a;
   svm_fifo_segment_private_t *sp;
-  svm_fifo_segment_header_t *fsh;
   svm_fifo_t *f;
   ssvm_shared_header_t *sh;
+  svm_fifo_segment_header_t *fsh;
   int rv;
   u8 *test_data;
   u8 *retrieved_data = 0;
@@ -248,7 +251,7 @@ slave (int verbose)
   fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
 
   /* might wanna wait.. */
-  f = (svm_fifo_t *) fsh->fifos[0];
+  f = fsh->fifos;
 
   /* Lazy bastards united */
   test_data = format (0, "Hello world%c", 0);
diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c
index 22f246e5..e201a359 100755
--- a/src/uri/uri_tcp_test.c
+++ b/src/uri/uri_tcp_test.c
@@ -193,6 +193,7 @@ application_send_attach (uri_tcp_test_main_t * utm)
   bmp->context = ntohl (0xfeedface);
   bmp->options[APP_OPTIONS_FLAGS] =
     APP_OPTIONS_FLAGS_USE_FIFO | APP_OPTIONS_FLAGS_ADD_SEGMENT;
+  bmp->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 16;
   bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = fifo_size;
   bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = fifo_size;
   bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20;
diff --git a/src/uri/uri_udp_test.c b/src/uri/uri_udp_test.c
index 8fb12ed2..45ad35a4 100644
--- a/src/uri/uri_udp_test.c
+++ b/src/uri/uri_udp_test.c
@@ -176,6 +176,7 @@ application_send_attach (uri_udp_test_main_t * utm)
   bmp->context = ntohl (0xfeedface);
   bmp->options[APP_OPTIONS_FLAGS] =
     APP_OPTIONS_FLAGS_USE_FIFO | APP_OPTIONS_FLAGS_ADD_SEGMENT;
+  bmp->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 16;
   bmp->options[SESSION_OPTIONS_RX_FIFO_SIZE] = fifo_size;
   bmp->options[SESSION_OPTIONS_TX_FIFO_SIZE] = fifo_size;
   bmp->options[SESSION_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20;
@@ -522,7 +523,7 @@ vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
   svm_fifo_segment_private_t *seg;
   unix_shared_memory_queue_t *client_q;
   vl_api_connect_uri_reply_t *rmp;
-  session_t *session;
+  session_t *session = 0;
   int rv = 0;
 
   /* Create the segment */
@@ -545,17 +546,12 @@ vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
 
   pool_get (utm->sessions, session);
 
-  /*
-   * By construction the master's idea of the rx fifo ends up in
-   * fsh->fifos[0], and the master's idea of the tx fifo ends up in
-   * fsh->fifos[1].
-   */
-  session->server_rx_fifo = svm_fifo_segment_alloc_fifo (utm->seg,
-							 128 * 1024);
+  session->server_rx_fifo = svm_fifo_segment_alloc_fifo
+    (utm->seg, 128 * 1024, FIFO_SEGMENT_RX_FREELIST);
   ASSERT (session->server_rx_fifo);
 
-  session->server_tx_fifo = svm_fifo_segment_alloc_fifo (utm->seg,
-							 128 * 1024);
+  session->server_tx_fifo = svm_fifo_segment_alloc_fifo
+    (utm->seg, 128 * 1024, FIFO_SEGMENT_TX_FREELIST);
   ASSERT (session->server_tx_fifo);
 
   session->server_rx_fifo->master_session_index = session - utm->sessions;
@@ -578,6 +574,12 @@ send_reply:
   rmp->context = mp->context;
   rmp->retval = ntohl (rv);
   rmp->segment_name_length = vec_len (a->segment_name);
+  if (session)
+    {
+      rmp->server_rx_fifo = pointer_to_uword (session->server_rx_fifo);
+      rmp->server_tx_fifo = pointer_to_uword (session->server_tx_fifo);
+    }
+
   memcpy (rmp->segment_name, a->segment_name, vec_len (a->segment_name));
 
   vec_free (a->segment_name);
@@ -689,9 +691,7 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
       svm_fifo_segment_create_args_t _a, *a = &_a;
       u32 segment_index;
       session_t *session;
-      ssvm_shared_header_t *sh;
       svm_fifo_segment_private_t *seg;
-      svm_fifo_segment_header_t *fsh;
       int rv;
 
       memset (a, 0, sizeof (*a));
@@ -707,22 +707,19 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
 	  return;
 	}
 
-      segment_index = vec_len (sm->segments) - 1;
+      segment_index = a->new_segment_index;
       vec_add2 (utm->seg, seg, 1);
-
       memcpy (seg, sm->segments + segment_index, sizeof (*seg));
-      sh = seg->ssvm.sh;
-      fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
-
-      while (vec_len (fsh->fifos) < 2)
-	sleep (1);
+      sleep (1);
 
       pool_get (utm->sessions, session);
       utm->cut_through_session_index = session - utm->sessions;
 
-      session->server_rx_fifo = (svm_fifo_t *) fsh->fifos[0];
+      session->server_rx_fifo = uword_to_pointer (mp->server_rx_fifo,
+						  svm_fifo_t *);
       ASSERT (session->server_rx_fifo);
-      session->server_tx_fifo = (svm_fifo_t *) fsh->fifos[1];
+      session->server_tx_fifo = uword_to_pointer (mp->server_tx_fifo,
+						  svm_fifo_t *);
       ASSERT (session->server_tx_fifo);
     }
 
diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index 3403e1c6..0e2c2101 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -193,6 +193,9 @@ typedef struct
 
   i32 vlib_signal;
 
+  /* vlib input queue length */
+  u32 vlib_input_queue_length;
+
   /* client side message index hash table */
   uword *msg_index_by_name_and_crc;
 
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
index aea90330..41aa1231 100644
--- a/src/vlibmemory/memory_shared.c
+++ b/src/vlibmemory/memory_shared.c
@@ -104,8 +104,17 @@ vl_msg_api_alloc_internal (int nbytes, int pool, int may_return_null)
 	      if (now - rv->gc_mark_timestamp > 10)
 		{
 		  if (CLIB_DEBUG > 0)
-		    clib_warning ("garbage collect pool %d ring %d index %d",
-				  pool, i, q->head);
+		    {
+		      u16 *msg_idp, msg_id;
+		      clib_warning
+			("garbage collect pool %d ring %d index %d", pool, i,
+			 q->head);
+		      msg_idp = (u16 *) (rv->data);
+		      msg_id = clib_net_to_host_u16 (*msg_idp);
+		      if (msg_id < vec_len (api_main.msg_names))
+			clib_warning ("msg id %d name %s", (u32) msg_id,
+				      api_main.msg_names[msg_id]);
+		    }
 		  shmem_hdr->garbage_collects++;
 		  goto collected;
 		}
@@ -330,6 +339,7 @@ vl_map_shmem (const char *region_name, int is_vlib)
   api_main_t *am = &api_main;
   int i;
   struct timespec ts, tsrem;
+  u32 vlib_input_queue_length;
 
   if (is_vlib == 0)
     svm_region_init_chroot (am->root_path);
@@ -449,9 +459,13 @@ vl_map_shmem (const char *region_name, int is_vlib)
   shmem_hdr->version = VL_SHM_VERSION;
 
   /* vlib main input queue */
+  vlib_input_queue_length = 1024;
+  if (am->vlib_input_queue_length)
+    vlib_input_queue_length = am->vlib_input_queue_length;
+
   shmem_hdr->vl_input_queue =
-    unix_shared_memory_queue_init (1024, sizeof (uword), getpid (),
-				   am->vlib_signal);
+    unix_shared_memory_queue_init (vlib_input_queue_length, sizeof (uword),
+				   getpid (), am->vlib_signal);
 
   /* Set up the msg ring allocator */
 #define _(sz,n)                                                 \
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index e5d88732..004a9974 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1917,6 +1917,32 @@ api_config_fn (vlib_main_t * vm, unformat_input_t * input)
 
 VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace");
 
+static clib_error_t *
+api_queue_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+  api_main_t *am = &api_main;
+  u32 nitems;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "length %d", &nitems) ||
+	  (unformat (input, "len %d", &nitems)))
+	{
+	  if (nitems >= 1024)
+	    am->vlib_input_queue_length = nitems;
+	  else
+	    clib_warning ("vlib input queue length %d too small, ignored",
+			  nitems);
+	}
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+  return 0;
+}
+
+VLIB_CONFIG_FUNCTION (api_queue_config_fn, "api-queue");
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index ccf9837f..c679b1f5 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -160,6 +160,7 @@ application_init (application_t * app, u32 api_client_index, u64 * options,
   props->rx_fifo_size = options[SESSION_OPTIONS_RX_FIFO_SIZE];
   props->tx_fifo_size = options[SESSION_OPTIONS_TX_FIFO_SIZE];
   props->add_segment = props->add_segment_size != 0;
+  props->preallocated_fifo_pairs = options[APP_OPTIONS_PREALLOC_FIFO_PAIRS];
   props->use_private_segment = options[APP_OPTIONS_FLAGS]
     & APP_OPTIONS_FLAGS_BUILTIN_APP;
 
@@ -395,7 +396,7 @@ application_format_connects (application_t * app, int verbose)
   vlib_main_t *vm = vlib_get_main ();
   segment_manager_t *sm;
   u8 *app_name, *s = 0;
-  int i, j;
+  int j;
 
   /* Header */
   if (app == 0)
@@ -419,22 +420,16 @@ application_format_connects (application_t * app, int verbose)
   for (j = 0; j < vec_len (sm->segment_indices); j++)
     {
       svm_fifo_segment_private_t *fifo_segment;
-      svm_fifo_t **fifos;
+      svm_fifo_t *fifo;
       u8 *str;
 
       fifo_segment = svm_fifo_get_segment (sm->segment_indices[j]);
-      fifos = svm_fifo_segment_get_fifos (fifo_segment);
-      for (i = 0; i < vec_len (fifos); i++)
+      fifo = svm_fifo_segment_get_fifo_list (fifo_segment);
+      while (fifo)
 	{
-	  svm_fifo_t *fifo;
 	  u32 session_index, thread_index;
 	  stream_session_t *session;
 
-	  /* There are 2 fifos/session. Avoid printing twice. */
-	  if (i % 2)
-	    continue;
-
-	  fifo = fifos[i];
 	  session_index = fifo->master_session_index;
 	  thread_index = fifo->master_thread_index;
 
@@ -448,9 +443,10 @@ application_format_connects (application_t * app, int verbose)
 	    s = format (s, "%-40s%-20s", str, app_name);
 
 	  vlib_cli_output (vm, "%v", s);
-
 	  vec_reset_length (s);
 	  vec_free (str);
+
+	  fifo = fifo->next;
 	}
       vec_free (s);
     }
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index 7d924c14..4d6f9def 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -119,10 +119,12 @@ typedef enum
 {
   APP_EVT_QUEUE_SIZE,
   APP_OPTIONS_FLAGS,
+  APP_OPTIONS_PREALLOC_FIFO_PAIRS,
   SESSION_OPTIONS_SEGMENT_SIZE,
   SESSION_OPTIONS_ADD_SEGMENT_SIZE,
   SESSION_OPTIONS_RX_FIFO_SIZE,
   SESSION_OPTIONS_TX_FIFO_SIZE,
+  SESSION_OPTIONS_PREALLOCATED_FIFO_PAIRS,
   SESSION_OPTIONS_ACCEPT_COOKIE,
   SESSION_OPTIONS_N_OPTIONS
 } app_attach_options_index_t;
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index b13df21c..caf8eaa3 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -58,6 +58,9 @@ session_manager_add_segment_i (segment_manager_t * sm, u32 segment_size,
 
   ca->segment_name = (char *) segment_name;
   ca->segment_size = segment_size;
+  ca->rx_fifo_size = sm->properties->rx_fifo_size;
+  ca->tx_fifo_size = sm->properties->tx_fifo_size;
+  ca->preallocated_fifo_pairs = sm->properties->preallocated_fifo_pairs;
 
   rv = svm_fifo_segment_create (ca);
   if (rv)
@@ -104,7 +107,8 @@ session_manager_add_first_segment (segment_manager_t * sm, u32 segment_size)
 }
 
 static void
-segment_manager_alloc_process_private_segment ()
+  segment_manager_alloc_process_private_segment
+  (segment_manager_properties_t * props)
 {
   svm_fifo_segment_create_args_t _a, *a = &_a;
 
@@ -115,6 +119,9 @@ segment_manager_alloc_process_private_segment ()
   a->segment_name = "process-private-segment";
   a->segment_size = ~0;
   a->new_segment_index = ~0;
+  a->rx_fifo_size = props->rx_fifo_size;
+  a->tx_fifo_size = props->tx_fifo_size;
+  a->preallocated_fifo_pairs = props->preallocated_fifo_pairs;
 
   if (svm_fifo_segment_create_process_private (a))
     clib_warning ("Failed to create process private segment");
@@ -151,7 +158,7 @@ segment_manager_init (segment_manager_t * sm,
   else
     {
       if (private_segment_index == ~0)
-	segment_manager_alloc_process_private_segment ();
+	segment_manager_alloc_process_private_segment (properties);
       ASSERT (private_segment_index != ~0);
       vec_add1 (sm->segment_indices, private_segment_index);
     }
@@ -170,74 +177,46 @@ segment_manager_init (segment_manager_t * sm,
 void
 segment_manager_del (segment_manager_t * sm)
 {
-  u32 *deleted_sessions = 0;
-  u32 *deleted_thread_indices = 0;
-  int i, j;
+  int j;
 
   /* Across all fifo segments used by the server */
   for (j = 0; j < vec_len (sm->segment_indices); j++)
     {
       svm_fifo_segment_private_t *fifo_segment;
-      svm_fifo_t **fifos;
+      svm_fifo_t *fifo;
+
       /* Vector of fifos allocated in the segment */
       fifo_segment = svm_fifo_get_segment (sm->segment_indices[j]);
-      fifos = svm_fifo_segment_get_fifos (fifo_segment);
+      fifo = svm_fifo_segment_get_fifo_list (fifo_segment);
 
       /*
        * Remove any residual sessions from the session lookup table
        * Don't bother deleting the individual fifos, we're going to
        * throw away the fifo segment in a minute.
        */
-      for (i = 0; i < vec_len (fifos); i++)
+      while (fifo)
 	{
-	  svm_fifo_t *fifo;
 	  u32 session_index, thread_index;
 	  stream_session_t *session;
 
-	  fifo = fifos[i];
 	  session_index = fifo->master_session_index;
 	  thread_index = fifo->master_thread_index;
 
 	  session = stream_session_get (session_index, thread_index);
 
-	  /* Add to the deleted_sessions vector (once!) */
-	  if (!session->is_deleted)
-	    {
-	      session->is_deleted = 1;
-	      vec_add1 (deleted_sessions, session_index);
-	      vec_add1 (deleted_thread_indices, thread_index);
-	    }
-	}
-
-      for (i = 0; i < vec_len (deleted_sessions); i++)
-	{
-	  stream_session_t *session;
-	  session = stream_session_get (deleted_sessions[i],
-					deleted_thread_indices[i]);
-
 	  /* Instead of directly removing the session call disconnect */
 	  session_send_session_evt_to_thread (stream_session_handle (session),
 					      FIFO_EVENT_DISCONNECT,
-					      deleted_thread_indices[i]);
-
-	  /*
-	     stream_session_table_del (smm, session);
-	     pool_put(smm->sessions[deleted_thread_indices[i]], session);
-	   */
+					      thread_index);
+	  fifo = fifo->next;
 	}
 
-      vec_reset_length (deleted_sessions);
-      vec_reset_length (deleted_thread_indices);
-
-      /* Instead of removing the segment, test when removing the session if
-       * the segment can be removed
+      /* Instead of removing the segment, test when cleaning up disconnected
+       * sessions if the segment can be removed.
        */
-      /* svm_fifo_segment_delete (fifo_segment); */
     }
 
   clib_spinlock_free (&sm->lockp);
-  vec_free (deleted_sessions);
-  vec_free (deleted_thread_indices);
   pool_put (segment_managers, sm);
 }
 
@@ -281,20 +260,27 @@ again:
       *fifo_segment_index = sm->segment_indices[i];
       fifo_segment = svm_fifo_get_segment (*fifo_segment_index);
 
+      /* FC: cleanup, make sure sm->properties->xxx_fifo_size always set */
       fifo_size = sm->properties->rx_fifo_size;
       fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size;
-      *server_rx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size);
+      *server_rx_fifo =
+	svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size,
+				     FIFO_SEGMENT_RX_FREELIST);
 
+      /* FC: cleanup, make sure sm->properties->xxx_fifo_size always set */
       fifo_size = sm->properties->tx_fifo_size;
       fifo_size = (fifo_size == 0) ? default_fifo_size : fifo_size;
-      *server_tx_fifo = svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size);
+      *server_tx_fifo =
+	svm_fifo_segment_alloc_fifo (fifo_segment, fifo_size,
+				     FIFO_SEGMENT_TX_FREELIST);
 
       if (*server_rx_fifo == 0)
 	{
 	  /* This would be very odd, but handle it... */
 	  if (*server_tx_fifo != 0)
 	    {
-	      svm_fifo_segment_free_fifo (fifo_segment, *server_tx_fifo);
+	      svm_fifo_segment_free_fifo (fifo_segment, *server_tx_fifo,
+					  FIFO_SEGMENT_TX_FREELIST);
 	      *server_tx_fifo = 0;
 	    }
 	  continue;
@@ -303,7 +289,8 @@ again:
 	{
 	  if (*server_rx_fifo != 0)
 	    {
-	      svm_fifo_segment_free_fifo (fifo_segment, *server_rx_fifo);
+	      svm_fifo_segment_free_fifo (fifo_segment, *server_rx_fifo,
+					  FIFO_SEGMENT_RX_FREELIST);
 	      *server_rx_fifo = 0;
 	    }
 	  continue;
@@ -365,8 +352,10 @@ segment_manager_dealloc_fifos (u32 svm_segment_index, svm_fifo_t * rx_fifo,
     return;
 
   fifo_segment = svm_fifo_get_segment (svm_segment_index);
-  svm_fifo_segment_free_fifo (fifo_segment, rx_fifo);
-  svm_fifo_segment_free_fifo (fifo_segment, tx_fifo);
+  svm_fifo_segment_free_fifo (fifo_segment, rx_fifo,
+			      FIFO_SEGMENT_RX_FREELIST);
+  svm_fifo_segment_free_fifo (fifo_segment, tx_fifo,
+			      FIFO_SEGMENT_TX_FREELIST);
 
   /* Remove segment only if it holds no fifos and not the first */
   if (sm->segment_indices[0] != svm_segment_index
diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h
index 2710bb54..d4b73208 100644
--- a/src/vnet/session/segment_manager.h
+++ b/src/vnet/session/segment_manager.h
@@ -28,6 +28,9 @@ typedef struct _segment_manager_properties
   u32 rx_fifo_size;
   u32 tx_fifo_size;
 
+  /** Preallocated pool sizes */
+  u32 preallocated_fifo_pairs;
+
   /** Configured additional segment size */
   u32 add_segment_size;
 
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index c5aaf2e2..02b0cced 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -1048,19 +1048,21 @@ session_vpp_event_queue_allocate (session_manager_main_t * smm,
 {
   api_main_t *am = &api_main;
   void *oldheap;
+  u32 event_queue_length = 2048;
 
   if (smm->vpp_event_queues[thread_index] == 0)
     {
       /* Allocate event fifo in the /vpe-api shared-memory segment */
       oldheap = svm_push_data_heap (am->vlib_rp);
 
+      if (smm->configured_event_queue_length)
+	event_queue_length = smm->configured_event_queue_length;
+
       smm->vpp_event_queues[thread_index] =
-	unix_shared_memory_queue_init (2048 /* nels $$$$ config */ ,
-				       sizeof (session_fifo_event_t),
-				       0 /* consumer pid */ ,
-				       0
-				       /* (do not) send signal when queue non-empty */
-	);
+	unix_shared_memory_queue_init
+	(event_queue_length,
+	 sizeof (session_fifo_event_t), 0 /* consumer pid */ ,
+	 0 /* (do not) send signal when queue non-empty */ );
 
       svm_pop_heap (oldheap);
     }
@@ -1187,6 +1189,30 @@ session_manager_main_init (vlib_main_t * vm)
 }
 
 VLIB_INIT_FUNCTION (session_manager_main_init)
+     static clib_error_t *session_config_fn (vlib_main_t * vm,
+					     unformat_input_t * input)
+{
+  session_manager_main_t *smm = &session_manager_main;
+  u32 nitems;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "event-queue-length %d", &nitems))
+	{
+	  if (nitems >= 2048)
+	    smm->configured_event_queue_length = nitems;
+	  else
+	    clib_warning ("event queue length %d too small, ignored", nitems);
+	}
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+  return 0;
+}
+
+VLIB_CONFIG_FUNCTION (session_config_fn, "session");
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index d60cca29..a8728649 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -125,14 +125,11 @@ typedef struct _stream_session_t
 
   u8 thread_index;
 
-  /** used during unbind processing */
-  u8 is_deleted;
-
   /** To avoid n**2 "one event per frame" check */
   u8 enqueue_epoch;
 
   /** Pad to a multiple of 8 octets */
-  u8 align_pad[2];
+  u8 align_pad[4];
 
   /** svm segment index where fifos were allocated */
   u32 svm_segment_index;
@@ -205,6 +202,9 @@ struct _session_manager_main
   /** vpp fifo event queue */
   unix_shared_memory_queue_t **vpp_event_queues;
 
+  /** vpp fifo event queue configured length */
+  u32 configured_event_queue_length;
+
   /** Unique segment name counter */
   u32 unique_segment_name_counter;
 
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 8c073a08..98d6946a 100755
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -419,7 +419,7 @@ done:
   REPLY_MACRO (VL_API_UNBIND_URI_REPLY);
 }
 
-static void
+void
 vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
 {
   vl_api_connect_uri_reply_t *rmp;
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index aaefa7eb..768f0c3c 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -44,8 +44,6 @@
 #undef vl_printfun
 
 #define TCP_BUILTIN_CLIENT_DBG (1)
-#define TCP_BUILTIN_CLIENT_VPP_THREAD (0)
-#define TCP_BUILTIN_CLIENT_PTHREAD (!TCP_BUILTIN_CLIENT_VPP_THREAD)
 
 static void
 send_test_chunk (tclient_main_t * tm, session_t * s)
@@ -156,131 +154,76 @@ receive_test_chunk (tclient_main_t * tm, session_t * s)
     }
 }
 
-#if TCP_BUILTIN_CLIENT_VPP_THREAD
-#define THREAD_PROTOTYPE static void
-#else
-#define THREAD_PROTOTYPE static void *
-#endif
-
-THREAD_PROTOTYPE
-tclient_thread_fn (void *arg)
+static uword
+builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+			vlib_frame_t * frame)
 {
   tclient_main_t *tm = &tclient_main;
+  int my_thread_index = vlib_get_thread_index ();
   vl_api_disconnect_session_t *dmp;
   session_t *sp;
-  struct timespec ts, tsrem;
   int i;
-  int try_tx, try_rx;
-  u32 *session_indices = 0;
-  clib_time_t ttime;
-  f64 before, after;
-  u64 rx_total;
+  int delete_session;
+  u32 *connection_indices;
 
-  clib_time_init (&ttime);
+  connection_indices = tm->connection_index_by_thread[my_thread_index];
 
-  /* stats thread wants no signals. */
-  {
-    sigset_t s;
-    sigfillset (&s);
-    pthread_sigmask (SIG_SETMASK, &s, 0);
-  }
+  if (tm->run_test == 0 || vec_len (connection_indices) == 0)
+    return 0;
 
-  clib_per_cpu_mheaps[vlib_get_thread_index ()] = clib_per_cpu_mheaps[0];
+  for (i = 0; i < vec_len (connection_indices); i++)
+    {
+      delete_session = 1;
 
-  vec_validate (session_indices, 0);
-  vec_reset_length (session_indices);
+      sp = pool_elt_at_index (tm->sessions, connection_indices[i]);
 
-  while (1)
-    {
-      /* Wait until we're told to get busy */
-      while (tm->run_test == 0
-	     || (tm->ready_connections != tm->expected_connections))
+      if (sp->bytes_to_send > 0)
 	{
-	  ts.tv_sec = 0;
-	  ts.tv_nsec = 100000000;
-	  while (nanosleep (&ts, &tsrem) < 0)
-	    ts = tsrem;
+	  send_test_chunk (tm, sp);
+	  delete_session = 0;
 	}
-      tm->run_test = 0;
-      rx_total = 0;
-
-      clib_warning ("Start test...");
-
-      before = clib_time_now (&ttime);
-
-      do
+      if (sp->bytes_to_receive > 0)
 	{
-	  do
-	    {
-	      try_tx = try_rx = 0;
-
-	      /* *INDENT-OFF* */
-	      pool_foreach (sp, tm->sessions,
-              ({
-                if (sp->bytes_to_send > 0)
-                  {
-                    send_test_chunk (tm, sp);
-                    try_tx = 1;
-                  }
-	      }));
-	      pool_foreach (sp, tm->sessions,
-              ({
-		if (sp->bytes_to_receive > 0)
-                  {
-                    receive_test_chunk (tm, sp);
-                    try_rx = 1;
-                  }
-                else
-                  {
-                    /* Session is complete */
-                    vec_add1 (session_indices, sp - tm->sessions);
-                  }
-              }));
-              /* Terminate any completed sessions */
-              if (PREDICT_FALSE (_vec_len(session_indices) != 0))
-                {
-                  for (i = 0; i < _vec_len (session_indices); i++)
-                    {
-                      sp = pool_elt_at_index (tm->sessions, session_indices[i]);
-                      rx_total += sp->bytes_received;
-                      dmp = vl_msg_api_alloc_as_if_client (sizeof (*dmp));
-                      memset (dmp, 0, sizeof (*dmp));
-                      dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
-                      dmp->client_index = tm->my_client_index;
-                      dmp->handle = sp->vpp_session_handle;
-                      vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & dmp);
-                      pool_put (tm->sessions, sp);
-                    }
-                  _vec_len(session_indices) = 0;
-                }
-	      /* *INDENT-ON* */
-	    }
-	  while (try_tx || try_rx);
+	  receive_test_chunk (tm, sp);
+	  delete_session = 0;
 	}
-      while (0);
-      after = clib_time_now (&ttime);
-
-      clib_warning ("Test complete %lld bytes in %.2f secs",
-		    rx_total, (after - before));
-      if ((after - before) != 0.0)
+      if (PREDICT_FALSE (delete_session == 1))
 	{
-	  clib_warning ("%.2f bytes/second full-duplex",
-			((f64) rx_total) / (after - before));
-	  clib_warning ("%.4f gbit/second full-duplex",
-			(((f64) rx_total * 8.0) / (after - before)) / 1e9);
+	  __sync_fetch_and_add (&tm->rx_total, sp->bytes_received);
+	  dmp = vl_msg_api_alloc_as_if_client (sizeof (*dmp));
+	  memset (dmp, 0, sizeof (*dmp));
+	  dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
+	  dmp->client_index = tm->my_client_index;
+	  dmp->handle = sp->vpp_session_handle;
+	  vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & dmp);
+	  vec_delete (connection_indices, 1, i);
+	  tm->connection_index_by_thread[my_thread_index] =
+	    connection_indices;
+	  __sync_fetch_and_add (&tm->ready_connections, -1);
+
+	  /* Kick the debug CLI process */
+	  if (tm->ready_connections == 0)
+	    {
+	      tm->test_end_time = vlib_time_now (vm);
+	      vlib_process_signal_event (vm, tm->cli_node_index,
+					 2, 0 /* data */ );
+	    }
 	}
-
-      if (pool_elts (tm->sessions))
-	clib_warning ("BUG: %d active sessions remain...",
-		      pool_elts (tm->sessions));
     }
-  while (0);
-  /* NOTREACHED */
-#if TCP_BUILTIN_CLIENT_PTHREAD
   return 0;
-#endif
 }
 
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (builtin_client_node) =
+{
+  .function = builtin_client_node_fn,
+  .name = "builtin-tcp-client",
+  .type = VLIB_NODE_TYPE_INPUT,
+  .state = VLIB_NODE_STATE_DISABLED,
+};
+/* *INDENT-ON* */
+
+
 /* So we don't get "no handler for... " msgs */
 static void
 vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
@@ -299,6 +242,7 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
   session_t *session;
   u32 session_index;
   i32 retval = /* clib_net_to_host_u32 ( */ mp->retval /*) */ ;
+  int i;
 
   if (retval < 0)
     {
@@ -332,7 +276,29 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
   /* Add it to the session lookup table */
   hash_set (tm->session_index_by_vpp_handles, mp->handle, session_index);
 
-  tm->ready_connections++;
+  if (tm->ready_connections == tm->expected_connections - 1)
+    {
+      vlib_thread_main_t *thread_main = vlib_get_thread_main ();
+      int thread_index;
+
+      thread_index = 0;
+      for (i = 0; i < pool_elts (tm->sessions); i++)
+	{
+	  vec_add1 (tm->connection_index_by_thread[thread_index], i);
+	  thread_index++;
+	  if (thread_index == thread_main->n_vlib_mains)
+	    thread_index = 0;
+	}
+    }
+  __sync_fetch_and_add (&tm->ready_connections, 1);
+  if (tm->ready_connections == tm->expected_connections)
+    {
+      tm->run_test = 1;
+      tm->test_start_time = vlib_time_now (tm->vlib_main);
+      /* Signal the CLI process that the action is starting... */
+      vlib_process_signal_event (tm->vlib_main, tm->cli_node_index,
+				 1, 0 /* data */ );
+    }
 }
 
 static int
@@ -414,6 +380,7 @@ static int
 tcp_test_clients_init (vlib_main_t * vm)
 {
   tclient_main_t *tm = &tclient_main;
+  vlib_thread_main_t *thread_main = vlib_get_thread_main ();
   int i;
 
   tclient_api_hookup (vm);
@@ -429,6 +396,46 @@ tcp_test_clients_init (vlib_main_t * vm)
   vec_validate (tm->rx_buf, vec_len (tm->connect_test_data) - 1);
 
   tm->is_init = 1;
+  tm->vlib_main = vm;
+
+  vec_validate (tm->connection_index_by_thread, thread_main->n_vlib_mains);
+  return 0;
+}
+
+static int
+builtin_session_connected_callback (u32 app_index, u32 api_context,
+				    stream_session_t * s, u8 is_fail)
+{
+  vl_api_connect_uri_reply_t _m, *mp = &_m;
+  unix_shared_memory_queue_t *q;
+  application_t *app;
+  unix_shared_memory_queue_t *vpp_queue;
+
+  app = application_get (app_index);
+  q = vl_api_client_index_to_input_queue (app->api_client_index);
+
+  if (!q)
+    return -1;
+
+  memset (mp, 0, sizeof (*mp));
+  mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_URI_REPLY);
+  mp->context = api_context;
+  if (!is_fail)
+    {
+      vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
+      mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo);
+      mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo);
+      mp->handle = stream_session_handle (s);
+      mp->vpp_event_queue_address = pointer_to_uword (vpp_queue);
+      mp->retval = 0;
+      s->session_state = SESSION_STATE_READY;
+    }
+  else
+    {
+      mp->retval = clib_host_to_net_u32 (VNET_API_ERROR_SESSION_CONNECT_FAIL);
+    }
+
+  vl_api_connect_uri_reply_t_handler (mp);
 
   return 0;
 }
@@ -461,7 +468,7 @@ builtin_server_rx_callback (stream_session_t * s)
 static session_cb_vft_t builtin_clients =
   {
     .session_reset_callback = builtin_session_reset_callback,
-    .session_connected_callback = send_session_connected_callback,
+    .session_connected_callback = builtin_session_connected_callback,
     .session_accept_callback = builtin_session_create_callback,
     .session_disconnect_callback = builtin_session_disconnect_callback,
     .builtin_server_rx_callback = builtin_server_rx_callback
@@ -502,11 +509,16 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
 			     vlib_cli_command_t * cmd)
 {
   tclient_main_t *tm = &tclient_main;
+  vlib_thread_main_t *thread_main = vlib_get_thread_main ();
+  uword *event_data = 0;
+  uword event_type;
   u8 *connect_uri = (u8 *) "tcp://6.0.1.1/1234";
   u8 *uri;
   u32 n_clients = 1;
   int i;
   u64 tmp;
+  f64 cli_timeout = 20.0;
+  f64 delta;
 
   tm->bytes_to_send = 8192;
   vec_free (tm->connect_uri);
@@ -523,6 +535,8 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
 	;
       else if (unformat (input, "uri %s", &tm->connect_uri))
 	;
+      else if (unformat (input, "cli-timeout %f", &cli_timeout))
+	;
       else
 	return clib_error_return (0, "unknown input `%U'",
 				  format_unformat_error, input);
@@ -536,6 +550,7 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
 
   tm->ready_connections = 0;
   tm->expected_connections = n_clients;
+  tm->rx_total = 0;
 
   uri = connect_uri;
   if (tm->connect_uri)
@@ -556,40 +571,99 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
     }
 #endif
   vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
-  attach_builtin_test_clients ();
+  if (tm->test_client_attached == 0)
+    attach_builtin_test_clients ();
+  tm->test_client_attached = 1;
+
+  /* Turn on the builtin client input nodes */
+  for (i = 0; i < thread_main->n_vlib_mains; i++)
+    vlib_node_set_state (vlib_mains[i], builtin_client_node.index,
+			 VLIB_NODE_STATE_POLLING);
 
-  /* Fire off connect requests, in something approaching a normal manner */
+  tm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+
+  /* Fire off connect requests */
   for (i = 0; i < n_clients; i++)
     {
-      vl_api_connect_uri_t *cmp;
-      cmp = vl_msg_api_alloc_as_if_client (sizeof (*cmp));
+      vl_api_connect_uri_t _cmp, *cmp = &_cmp;
+      void vl_api_connect_uri_t_handler (vl_api_connect_uri_t * cmp);
+
       memset (cmp, 0, sizeof (*cmp));
 
       cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI);
       cmp->client_index = tm->my_client_index;
       cmp->context = ntohl (0xfeedface);
       memcpy (cmp->uri, uri, strlen ((char *) uri) + 1);
-      vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & cmp);
+
+      vl_api_connect_uri_t_handler (cmp);
+      /* Crude pacing for call setups, 100k/sec  */
+      vlib_process_suspend (vm, 10e-6);
+    }
+
+  /* Park until the sessions come up, or ten seconds elapse... */
+  vlib_process_wait_for_event_or_clock (vm, 10.0 /* timeout, seconds */ );
+  event_type = vlib_process_get_events (vm, &event_data);
+
+  switch (event_type)
+    {
+    case ~0:
+      vlib_cli_output (vm, "Timeout with only %d sessions active...",
+		       tm->ready_connections);
+      goto cleanup;
+
+    case 1:
+      vlib_cli_output (vm, "Test started at %.6f", tm->test_start_time);
+      break;
+
+    default:
+      vlib_cli_output (vm, "unexpected event(1): %d", event_type);
+      goto cleanup;
     }
 
-  tm->run_test = 1;
+  /* Now wait for the sessions to finish... */
+  vlib_process_wait_for_event_or_clock (vm, cli_timeout);
+  event_type = vlib_process_get_events (vm, &event_data);
+
+  switch (event_type)
+    {
+    case ~0:
+      vlib_cli_output (vm, "Timeout with %d sessions still active...",
+		       tm->ready_connections);
+      goto cleanup;
+
+    case 2:
+      vlib_cli_output (vm, "Test finished at %.6f", tm->test_end_time);
+      break;
+
+    default:
+      vlib_cli_output (vm, "unexpected event(2): %d", event_type);
+      goto cleanup;
+    }
+
+  delta = tm->test_end_time - tm->test_start_time;
+
+  if (delta != 0.0)
+    {
+      vlib_cli_output (vm,
+		       "%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds",
+		       tm->rx_total, tm->rx_total / (1ULL << 20),
+		       tm->rx_total / (1ULL << 30), delta);
+      vlib_cli_output (vm, "%.2f bytes/second full-duplex",
+		       ((f64) tm->rx_total) / (delta));
+      vlib_cli_output (vm, "%.4f gbit/second full-duplex",
+		       (((f64) tm->rx_total * 8.0) / delta / 1e9));
+    }
+  else
+    vlib_cli_output (vm, "zero delta-t?");
+
+cleanup:
+  pool_free (tm->sessions);
+  for (i = 0; i < vec_len (tm->connection_index_by_thread); i++)
+    vec_reset_length (tm->connection_index_by_thread[i]);
 
   return 0;
 }
 
-/* *INDENT-OFF* */
-#if TCP_BUILTIN_CLIENT_VPP_THREAD
-VLIB_REGISTER_THREAD (builtin_client_reg, static) =
-{
-  .name = "tcp-builtin-client",
-  .function = tclient_thread_fn,
-  .fixed_count = 1,
-  .count = 1,
-  .no_data_structure_clone = 1,
-};
-#endif
-/* *INDENT-ON* */
-
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (test_clients_command, static) =
 {
diff --git a/src/vnet/tcp/builtin_client.h b/src/vnet/tcp/builtin_client.h
index 57d112e6..d5d79e53 100644
--- a/src/vnet/tcp/builtin_client.h
+++ b/src/vnet/tcp/builtin_client.h
@@ -83,14 +83,18 @@ typedef struct
 
   pid_t my_pid;
 
-  /* For deadman timers */
-  clib_time_t clib_time;
+  f64 test_start_time;
+  f64 test_end_time;
 
-  /* Connection counts */
   u32 expected_connections;
+  u32 **connection_index_by_thread;
   volatile u32 ready_connections;
+  volatile u32 finished_connections;
 
-  /* Signal variables */
+  volatile u64 rx_total;
+  u32 cli_node_index;
+
+  /* Signal variable */
   volatile int run_test;
 
   /* Bytes to send */
@@ -107,6 +111,7 @@ typedef struct
   u8 test_return_packets;
 
   u8 is_init;
+  u8 test_client_attached;
 
   u32 node_index;
 
diff --git a/src/vnet/tcp/builtin_http_server.c b/src/vnet/tcp/builtin_http_server.c
index 763a46e9..8b4801cd 100644
--- a/src/vnet/tcp/builtin_http_server.c
+++ b/src/vnet/tcp/builtin_http_server.c
@@ -513,6 +513,7 @@ server_attach ()
   a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 8 << 10;
   a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 32 << 10;
   a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+  a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 16;
   a->segment_name = segment_name;
   a->segment_name_length = ARRAY_LEN (segment_name);
 
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index 64fc4a71..4f0e211c 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -62,7 +62,6 @@ int
 builtin_session_accept_callback (stream_session_t * s)
 {
   builtin_server_main_t *bsm = &builtin_server_main;
-  clib_warning ("called...");
 
   bsm->vpp_queue[s->thread_index] =
     session_manager_get_vpp_event_queue (s->thread_index);
@@ -76,7 +75,6 @@ builtin_session_disconnect_callback (stream_session_t * s)
 {
   builtin_server_main_t *bsm = &builtin_server_main;
   vnet_disconnect_args_t _a, *a = &_a;
-  clib_warning ("called...");
 
   a->handle = stream_session_handle (s);
   a->app_index = bsm->app_index;
@@ -280,10 +278,11 @@ server_attach ()
   a->api_client_index = bsm->my_client_index;
   a->session_cb_vft = &builtin_session_cb_vft;
   a->options = options;
-  a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 128 << 20;
-  a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 1 << 16;
-  a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 1 << 16;
+  a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 512 << 20;
+  a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 64 << 10;
+  a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 64 << 10;
   a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+  a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 8192;
   a->segment_name = segment_name;
   a->segment_name_length = ARRAY_LEN (segment_name);
 
diff --git a/src/vnet/udp/builtin_server.c b/src/vnet/udp/builtin_server.c
index 18684d54..7dd03670 100644
--- a/src/vnet/udp/builtin_server.c
+++ b/src/vnet/udp/builtin_server.c
@@ -111,6 +111,7 @@ attach_builtin_uri_server ()
   options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
   options[SESSION_OPTIONS_SEGMENT_SIZE] = (2 << 30);	/*$$$$ config / arg */
   options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+  options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 1024;
 
   a->options = options;
 
-- 
cgit 1.2.3-korg


From 58eb866b15a45514dc356170f28640d6c9db8034 Mon Sep 17 00:00:00 2001
From: Klement Sekera <ksekera@cisco.com>
Date: Fri, 9 Jun 2017 06:06:49 +0200
Subject: Refactor API message handling code

This is preparation for new C API. Moving common stuff to separate
headers reduces dependency issues.

Change-Id: Ie7adb23398de72448e5eba6c1c1da4e1bc678725
Signed-off-by: Klement Sekera <ksekera@cisco.com>
---
 src/svm.am                  |   2 +-
 src/svm/svm.h               | 102 +---------------
 src/svm/svm_common.h        | 133 +++++++++++++++++++++
 src/vlib-api.am             |   6 +-
 src/vlibapi/api.h           | 280 ++++----------------------------------------
 src/vlibapi/api_common.h    | 268 ++++++++++++++++++++++++++++++++++++++++++
 src/vlibmemory/api.h        | 107 +----------------
 src/vlibmemory/api_common.h | 138 ++++++++++++++++++++++
 src/vppinfra.am             |   1 +
 src/vppinfra/clib_error.h   |  35 ++++++
 src/vppinfra/error.h        |  14 +--
 11 files changed, 609 insertions(+), 477 deletions(-)
 create mode 100644 src/svm/svm_common.h
 create mode 100644 src/vlibapi/api_common.h
 create mode 100644 src/vlibmemory/api_common.h
 create mode 100644 src/vppinfra/clib_error.h

(limited to 'src/vlibmemory')

diff --git a/src/svm.am b/src/svm.am
index 442eba8e..7116eb37 100644
--- a/src/svm.am
+++ b/src/svm.am
@@ -13,7 +13,7 @@
 
 bin_PROGRAMS += svmtool svmdbtool
 
-nobase_include_HEADERS += svm/svm.h svm/ssvm.h svm/svmdb.h \
+nobase_include_HEADERS += svm/svm.h svm/svm_common.h svm/ssvm.h svm/svmdb.h \
 	svm/svm_fifo.h svm/svm_fifo_segment.h
 
 lib_LTLIBRARIES += libsvm.la libsvmdb.la
diff --git a/src/svm/svm.h b/src/svm/svm.h
index 06797fa1..894c3d95 100644
--- a/src/svm/svm.h
+++ b/src/svm/svm.h
@@ -24,106 +24,10 @@
 #include <pthread.h>
 #include <vppinfra/clib.h>
 #include <vppinfra/mem.h>
+#include <svm/svm_common.h>
 
 #define MMAP_PAGESIZE (clib_mem_get_page_size())
 
-#define SVM_VERSION ((1<<16) | 1)	/* set to declare region ready. */
-
-#define SVM_FLAGS_MHEAP (1<<0)	/* region contains an mheap */
-#define SVM_FLAGS_FILE  (1<<1)	/* region backed by one or more files */
-#define SVM_FLAGS_NODATA (1<<2)	/* region will be further subdivided */
-#define SVM_FLAGS_NEED_DATA_INIT (1<<3)
-
-#define SVM_PVT_MHEAP_SIZE (128<<10)	/* region's private mheap (128k) */
-
-typedef struct svm_region_
-{
-  volatile uword version;
-  pthread_mutex_t mutex;
-  pthread_cond_t condvar;
-  int mutex_owner_pid;		/* in case of trouble */
-  int mutex_owner_tag;
-  uword flags;
-  uword virtual_base;		/* base of the region object */
-  uword virtual_size;
-  void *region_heap;
-  void *data_base;		/* data portion base address */
-  void *data_heap;		/* data heap, if any */
-  volatile void *user_ctx;	/* user context pointer */
-  /* stuff allocated in the region's heap */
-  uword bitmap_size;		/* nbits in virtual alloc bitmap */
-  uword *bitmap;		/* the bitmap */
-  char *region_name;
-  char *backing_file;
-  char **filenames;
-  uword *client_pids;
-  /* pad */
-
-  /* next page:
-   * (64K) clib heap for the region itself
-   *
-   * data_base -> whatever is in this region
-   */
-
-} svm_region_t;
-
-typedef struct svm_map_region_args_
-{
-  const char *root_path;	/* NULL means use the truly global arena */
-  const char *name;
-  u64 baseva;
-  u64 size;
-  u64 pvt_heap_size;
-  uword flags;
-  char *backing_file;
-  uword backing_mmap_size;
-  /* uid, gid to own the svm region(s) */
-  int uid;
-  int gid;
-} svm_map_region_args_t;
-
-
-/*
- * Memory shared across all router instances. Packet buffers, etc
- * Base should be "out of the way," and size should be big enough to
- * cover everything we plan to put here.
- */
-#define SVM_GLOBAL_REGION_BASEVA  0x30000000
-#define SVM_GLOBAL_REGION_SIZE    (64<<20)
-#define SVM_GLOBAL_REGION_NAME "/global_vm"
-
-/*
- * Memory shared across individual router instances.
- */
-#define SVM_OVERLAY_REGION_BASEVA \
-               (SVM_GLOBAL_REGION_BASEVA + SVM_GLOBAL_REGION_SIZE)
-#define SVM_OVERLAY_REGION_SIZE   (1<<20)
-#define SVM_OVERLAY_REGION_BASENAME "/overlay_vm"
-
-typedef struct
-{
-  u8 *subregion_name;
-} svm_subregion_t;
-
-typedef struct
-{
-  svm_subregion_t *subregions;	/* subregion pool */
-  uword *name_hash;
-  u8 *root_path;
-} svm_main_region_t;
-
-
-void *svm_region_find_or_create (svm_map_region_args_t * a);
-void svm_region_init (void);
-void svm_region_init_chroot (const char *root_path);
-void svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid);
-void svm_region_init_args (svm_map_region_args_t * a);
-void svm_region_exit (void);
-void svm_region_unmap (void *rp_arg);
-void svm_client_scan (const char *root_path);
-void svm_client_scan_this_region_nolock (svm_region_t * rp);
-u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t * a);
-
 static inline void *
 svm_mem_alloc (svm_region_t * rp, uword size)
 {
@@ -192,10 +96,6 @@ svm_pop_heap (void *oldheap)
   clib_mem_set_heap (oldheap);
 }
 
-u8 *format_svm_region (u8 * s, va_list * args);
-
-svm_region_t *svm_get_root_rp (void);
-
 #endif /* __included_svm_h__ */
 
 /*
diff --git a/src/svm/svm_common.h b/src/svm/svm_common.h
new file mode 100644
index 00000000..1f184432
--- /dev/null
+++ b/src/svm/svm_common.h
@@ -0,0 +1,133 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_svm_common_h__
+#define __included_svm_common_h__
+
+#include <stdarg.h>
+#include <pthread.h>
+#include <vppinfra/types.h>
+
+#define SVM_VERSION ((1<<16) | 1)	/* set to declare region ready. */
+
+#define SVM_FLAGS_MHEAP (1<<0)	/* region contains an mheap */
+#define SVM_FLAGS_FILE  (1<<1)	/* region backed by one or more files */
+#define SVM_FLAGS_NODATA (1<<2)	/* region will be further subdivided */
+#define SVM_FLAGS_NEED_DATA_INIT (1<<3)
+
+#define SVM_PVT_MHEAP_SIZE (128<<10)	/* region's private mheap (128k) */
+
+typedef struct svm_region_
+{
+  volatile uword version;
+  pthread_mutex_t mutex;
+  pthread_cond_t condvar;
+  int mutex_owner_pid;		/* in case of trouble */
+  int mutex_owner_tag;
+  uword flags;
+  uword virtual_base;		/* base of the region object */
+  uword virtual_size;
+  void *region_heap;
+  void *data_base;		/* data portion base address */
+  void *data_heap;		/* data heap, if any */
+  volatile void *user_ctx;	/* user context pointer */
+  /* stuff allocated in the region's heap */
+  uword bitmap_size;		/* nbits in virtual alloc bitmap */
+  uword *bitmap;		/* the bitmap */
+  char *region_name;
+  char *backing_file;
+  char **filenames;
+  uword *client_pids;
+  /* pad */
+
+  /* next page:
+   * (64K) clib heap for the region itself
+   *
+   * data_base -> whatever is in this region
+   */
+
+} svm_region_t;
+
+typedef struct svm_map_region_args_
+{
+  const char *root_path;	/* NULL means use the truly global arena */
+  const char *name;
+  u64 baseva;
+  u64 size;
+  u64 pvt_heap_size;
+  uword flags;
+  char *backing_file;
+  uword backing_mmap_size;
+  /* uid, gid to own the svm region(s) */
+  int uid;
+  int gid;
+} svm_map_region_args_t;
+
+
+/*
+ * Memory shared across all router instances. Packet buffers, etc
+ * Base should be "out of the way," and size should be big enough to
+ * cover everything we plan to put here.
+ */
+#define SVM_GLOBAL_REGION_BASEVA  0x30000000
+#define SVM_GLOBAL_REGION_SIZE    (64<<20)
+#define SVM_GLOBAL_REGION_NAME "/global_vm"
+
+/*
+ * Memory shared across individual router instances.
+ */
+#define SVM_OVERLAY_REGION_BASEVA \
+               (SVM_GLOBAL_REGION_BASEVA + SVM_GLOBAL_REGION_SIZE)
+#define SVM_OVERLAY_REGION_SIZE   (1<<20)
+#define SVM_OVERLAY_REGION_BASENAME "/overlay_vm"
+
+typedef struct
+{
+  u8 *subregion_name;
+} svm_subregion_t;
+
+typedef struct
+{
+  svm_subregion_t *subregions;	/* subregion pool */
+  uword *name_hash;
+  u8 *root_path;
+} svm_main_region_t;
+
+
+void *svm_region_find_or_create (svm_map_region_args_t * a);
+void svm_region_init (void);
+void svm_region_init_chroot (const char *root_path);
+void svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid);
+void svm_region_init_args (svm_map_region_args_t * a);
+void svm_region_exit (void);
+void svm_region_unmap (void *rp_arg);
+void svm_client_scan (const char *root_path);
+void svm_client_scan_this_region_nolock (svm_region_t * rp);
+u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t * a);
+u8 *format_svm_region (u8 * s, va_list * args);
+
+svm_region_t *svm_get_root_rp (void);
+
+#endif /* __included_svm_common_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib-api.am b/src/vlib-api.am
index eea52420..677811bc 100644
--- a/src/vlib-api.am
+++ b/src/vlib-api.am
@@ -30,7 +30,10 @@ libvlibmemory_la_SOURCES =			\
 	vlibmemory/vl_memory_api_h.h		\
 	vlibmemory/vl_memory_msg_enum.h
 
-nobase_include_HEADERS += vlibapi/api.h vlibapi/api_helper_macros.h vlibapi/vat_helper_macros.h
+nobase_include_HEADERS += vlibapi/api.h		\
+	vlibapi/api_common.h			\
+	vlibapi/api_helper_macros.h		\
+	vlibapi/vat_helper_macros.h
 
 libvlibmemoryclient_la_DEPENDENCIES = libvppinfra.la libsvm.la
 libvlibmemoryclient_la_LIBADD = $(libvlibmemoryclient_la_DEPENDENCIES) -lpthread
@@ -49,6 +52,7 @@ libvlibmemoryclient_la_SOURCES = 		\
 
 nobase_include_HEADERS +=			\
 	vlibmemory/api.h			\
+	vlibmemory/api_common.h			\
 	vlibmemory/vl_memory_api_h.h		\
 	vlibmemory/vl_memory_msg_enum.h		\
 	vlibmemory/unix_shared_memory_queue.h 	\
diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index 0e2c2101..49f5d5fb 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -20,73 +20,13 @@
 #ifndef included_api_h
 #define included_api_h
 
+#include <stddef.h>
 #include <vppinfra/error.h>
 #include <svm/svm.h>
 #include <vlib/vlib.h>
 #include <vlibmemory/unix_shared_memory_queue.h>
 #include <vlib/unix/unix.h>
-#include <stddef.h>
-
-typedef enum
-{
-  REGISTRATION_TYPE_FREE = 0,
-  REGISTRATION_TYPE_SHMEM,
-  REGISTRATION_TYPE_SOCKET_LISTEN,
-  REGISTRATION_TYPE_SOCKET_SERVER,
-  REGISTRATION_TYPE_SOCKET_CLIENT,
-} vl_registration_type_t;
-
-typedef struct vl_api_registration_
-{
-  vl_registration_type_t registration_type;
-
-  /* Index in VLIB's brain (not shared memory). */
-  u32 vl_api_registration_pool_index;
-
-  u8 *name;
-
-  /*
-   * The following groups of data could be unioned, but my fingers are
-   * going to be sore enough.
-   */
-
-  /* shared memory only */
-  unix_shared_memory_queue_t *vl_input_queue;
-
-  /* socket server and client */
-  u32 unix_file_index;
-  i8 *unprocessed_input;
-  u32 unprocessed_msg_length;
-  u8 *output_vector;
-
-  /* socket client only */
-  u32 server_handle;
-  u32 server_index;
-
-} vl_api_registration_t;
-
-
-/* Trace configuration for a single message */
-typedef struct
-{
-  int size;
-  int trace_enable;
-  int replay_enable;
-} trace_cfg_t;
-
-/*
- * API recording
- */
-typedef struct
-{
-  u8 endian;
-  u8 enabled;
-  u8 wrapped;
-  u8 pad;
-  u32 nitems;
-  u32 curindex;
-  u8 **traces;
-} vl_api_trace_t;
+#include <vlibapi/api_common.h>
 
 /* *INDENT-OFF* */
 typedef CLIB_PACKED
@@ -97,200 +37,8 @@ typedef CLIB_PACKED
 }) vl_api_trace_file_header_t;
 /* *INDENT-ON* */
 
-typedef enum
-{
-  VL_API_TRACE_TX,
-  VL_API_TRACE_RX,
-} vl_api_trace_which_t;
-
-#define VL_API_LITTLE_ENDIAN 0x00
-#define VL_API_BIG_ENDIAN 0x01
-
-typedef struct
-{
-  u8 *name;
-  u16 first_msg_id;
-  u16 last_msg_id;
-} vl_api_msg_range_t;
-
-typedef clib_error_t *(vl_msg_api_init_function_t) (u32 client_index);
-
-typedef struct _vl_msg_api_init_function_list_elt
-{
-  struct _vl_msg_api_init_function_list_elt *next_init_function;
-  vl_msg_api_init_function_t *f;
-} _vl_msg_api_function_list_elt_t;
-
-typedef struct
-{
-  void (**msg_handlers) (void *);
-  int (**pd_msg_handlers) (void *, int);
-  void (**msg_cleanup_handlers) (void *);
-  void (**msg_endian_handlers) (void *);
-  void (**msg_print_handlers) (void *, void *);
-  const char **msg_names;
-  u8 *message_bounce;
-  u8 *is_mp_safe;
-  struct ring_alloc_ *arings;
-  u32 ring_misses;
-  u32 garbage_collects;
-  u32 missing_clients;
-  vl_api_trace_t *rx_trace;
-  vl_api_trace_t *tx_trace;
-  int msg_print_flag;
-  trace_cfg_t *api_trace_cfg;
-  int our_pid;
-  svm_region_t *vlib_rp;
-  svm_region_t **mapped_shmem_regions;
-  struct vl_shmem_hdr_ *shmem_hdr;
-  vl_api_registration_t **vl_clients;
-
-  u8 *serialized_message_table_in_shmem;
-
-  /* For plugin msg allocator */
-  u16 first_available_msg_id;
-
-  /* message range by name hash */
-  uword *msg_range_by_name;
-
-  /* vector of message ranges */
-  vl_api_msg_range_t *msg_ranges;
-
-  /* uid for the api shared memory region */
-  int api_uid;
-  /* gid for the api shared memory region */
-  int api_gid;
-
-  /* base virtual address for global VM region */
-  u64 global_baseva;
-
-  /* size of the global VM region */
-  u64 global_size;
-
-  /* size of the API region */
-  u64 api_size;
-
-  /* size of the global VM private mheap */
-  u64 global_pvt_heap_size;
-
-  /* size of the api private mheap */
-  u64 api_pvt_heap_size;
-
-  /* Client-only data structures */
-  unix_shared_memory_queue_t *vl_input_queue;
-
-  /*
-   * All VLIB-side message handlers use my_client_index to identify
-   * the queue / client. This works in sim replay.
-   */
-  int my_client_index;
-  /*
-   * This is the (shared VM) address of the registration,
-   * don't use it to id the connection since it can't possibly
-   * work in simulator replay.
-   */
-  vl_api_registration_t *my_registration;
-
-  i32 vlib_signal;
-
-  /* vlib input queue length */
-  u32 vlib_input_queue_length;
-
-  /* client side message index hash table */
-  uword *msg_index_by_name_and_crc;
-
-  const char *region_name;
-  const char *root_path;
-
-  /* Replay in progress? */
-  int replay_in_progress;
-
-  /* List of API client reaper functions */
-  _vl_msg_api_function_list_elt_t *reaper_function_registrations;
-
-} api_main_t;
-
-extern api_main_t api_main;
-
-typedef struct
-{
-  int id;
-  char *name;
-  u32 crc;
-  void *handler;
-  void *cleanup;
-  void *endian;
-  void *print;
-  int size;
-  int traced;
-  int replay;
-  int message_bounce;
-  int is_mp_safe;
-} vl_msg_api_msg_config_t;
-
-typedef struct msgbuf_
-{
-  unix_shared_memory_queue_t *q;
-  u32 data_len;
-  u32 gc_mark_timestamp;
-  u8 data[0];
-} msgbuf_t;
-
-/* api_shared.c prototypes */
-int vl_msg_api_rx_trace_enabled (api_main_t * am);
-int vl_msg_api_tx_trace_enabled (api_main_t * am);
-void vl_msg_api_trace (api_main_t * am, vl_api_trace_t * tp, void *msg);
-int vl_msg_api_trace_onoff (api_main_t * am, vl_api_trace_which_t which,
-			    int onoff);
-int vl_msg_api_trace_free (api_main_t * am, vl_api_trace_which_t which);
 int vl_msg_api_trace_save (api_main_t * am,
 			   vl_api_trace_which_t which, FILE * fp);
-int vl_msg_api_trace_configure (api_main_t * am, vl_api_trace_which_t which,
-				u32 nitems);
-void vl_msg_api_handler_with_vm_node (api_main_t * am,
-				      void *the_msg, vlib_main_t * vm,
-				      vlib_node_runtime_t * node);
-void vl_msg_api_handler (void *the_msg);
-void vl_msg_api_handler_no_free (void *the_msg);
-void vl_msg_api_handler_no_trace_no_free (void *the_msg);
-void vl_msg_api_trace_only (void *the_msg);
-void vl_msg_api_cleanup_handler (void *the_msg);
-void vl_msg_api_replay_handler (void *the_msg);
-void vl_msg_api_socket_handler (void *the_msg);
-void vl_msg_api_set_handlers (int msg_id, char *msg_name,
-			      void *handler,
-			      void *cleanup,
-			      void *endian,
-			      void *print, int msg_size, int traced);
-void vl_msg_api_config (vl_msg_api_msg_config_t *);
-void vl_msg_api_set_cleanup_handler (int msg_id, void *fp);
-void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q);
-vl_api_trace_t *vl_msg_api_trace_get (api_main_t * am,
-				      vl_api_trace_which_t which);
-
-void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
-void vl_msg_api_barrier_release (void) __attribute__ ((weak));
-void vl_msg_api_free (void *);
-void vl_noop_handler (void *mp);
-void vl_msg_api_increment_missing_client_counter (void);
-void vl_msg_api_post_mortem_dump (void);
-void vl_msg_api_post_mortem_dump_enable_disable (int enable);
-void vl_msg_api_register_pd_handler (void *handler,
-				     u16 msg_id_host_byte_order);
-int vl_msg_api_pd_handler (void *mp, int rv);
-
-void vl_msg_api_set_first_available_msg_id (u16 first_avail);
-u16 vl_msg_api_get_msg_ids (const char *name, int n);
-void vl_msg_api_add_msg_name_crc (api_main_t * am, const char *string,
-				  u32 id);
-u32 vl_api_get_msg_index (u8 * name_and_crc);
-u32 vl_msg_api_get_msg_length (void *msg_arg);
-
-/* node_serialize.c prototypes */
-u8 *vlib_node_serialize (vlib_node_main_t * nm, u8 * vector,
-			 u32 max_threads, int include_nexts,
-			 int include_stats);
-vlib_node_t **vlib_node_unserialize (u8 * vector);
 
 #define VLIB_API_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,api_init)
 
@@ -350,8 +98,30 @@ vl_msg_api_get_msg_length_inline (void *msg_arg)
 
   return clib_net_to_host_u32 (header->data_len);
 }
-#endif /* included_api_h */
 
+int vl_msg_api_rx_trace_enabled (api_main_t * am);
+int vl_msg_api_tx_trace_enabled (api_main_t * am);
+void vl_msg_api_trace (api_main_t * am, vl_api_trace_t * tp, void *msg);
+int vl_msg_api_trace_onoff (api_main_t * am, vl_api_trace_which_t which,
+			    int onoff);
+int vl_msg_api_trace_free (api_main_t * am, vl_api_trace_which_t which);
+int vl_msg_api_trace_configure (api_main_t * am, vl_api_trace_which_t which,
+				u32 nitems);
+void vl_msg_api_handler_with_vm_node (api_main_t * am,
+				      void *the_msg, vlib_main_t * vm,
+				      vlib_node_runtime_t * node);
+vl_api_trace_t *vl_msg_api_trace_get (api_main_t * am,
+				      vl_api_trace_which_t which);
+void vl_msg_api_add_msg_name_crc (api_main_t * am, const char *string,
+				  u32 id);
+/* node_serialize.c prototypes */
+u8 *vlib_node_serialize (vlib_node_main_t * nm, u8 * vector,
+			 u32 max_threads, int include_nexts,
+			 int include_stats);
+vlib_node_t **vlib_node_unserialize (u8 * vector);
+u32 vl_msg_api_get_msg_length (void *msg_arg);
+
+#endif /* included_api_h */
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
new file mode 100644
index 00000000..b84d269e
--- /dev/null
+++ b/src/vlibapi/api_common.h
@@ -0,0 +1,268 @@
+/*
+ *------------------------------------------------------------------
+ * api_common.h
+ *
+ * Copyright (c) 2009-2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_api_common_h
+#define included_api_common_h
+
+#include <vppinfra/clib_error.h>
+#include <svm/svm_common.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+
+typedef enum
+{
+  REGISTRATION_TYPE_FREE = 0,
+  REGISTRATION_TYPE_SHMEM,
+  REGISTRATION_TYPE_SOCKET_LISTEN,
+  REGISTRATION_TYPE_SOCKET_SERVER,
+  REGISTRATION_TYPE_SOCKET_CLIENT,
+} vl_registration_type_t;
+
+typedef struct vl_api_registration_
+{
+  vl_registration_type_t registration_type;
+
+  /* Index in VLIB's brain (not shared memory). */
+  u32 vl_api_registration_pool_index;
+
+  u8 *name;
+
+  /*
+   * The following groups of data could be unioned, but my fingers are
+   * going to be sore enough.
+   */
+
+  /* shared memory only */
+  unix_shared_memory_queue_t *vl_input_queue;
+
+  /* socket server and client */
+  u32 unix_file_index;
+  i8 *unprocessed_input;
+  u32 unprocessed_msg_length;
+  u8 *output_vector;
+
+  /* socket client only */
+  u32 server_handle;
+  u32 server_index;
+
+} vl_api_registration_t;
+
+
+/* Trace configuration for a single message */
+typedef struct
+{
+  int size;
+  int trace_enable;
+  int replay_enable;
+} trace_cfg_t;
+
+/*
+ * API recording
+ */
+typedef struct
+{
+  u8 endian;
+  u8 enabled;
+  u8 wrapped;
+  u8 pad;
+  u32 nitems;
+  u32 curindex;
+  u8 **traces;
+} vl_api_trace_t;
+
+typedef enum
+{
+  VL_API_TRACE_TX,
+  VL_API_TRACE_RX,
+} vl_api_trace_which_t;
+
+#define VL_API_LITTLE_ENDIAN 0x00
+#define VL_API_BIG_ENDIAN 0x01
+
+typedef struct
+{
+  u8 *name;
+  u16 first_msg_id;
+  u16 last_msg_id;
+} vl_api_msg_range_t;
+
+typedef struct
+{
+  int id;
+  char *name;
+  u32 crc;
+  void *handler;
+  void *cleanup;
+  void *endian;
+  void *print;
+  int size;
+  int traced;
+  int replay;
+  int message_bounce;
+  int is_mp_safe;
+} vl_msg_api_msg_config_t;
+
+typedef struct msgbuf_
+{
+  unix_shared_memory_queue_t *q;
+  u32 data_len;
+  u32 gc_mark_timestamp;
+  u8 data[0];
+} msgbuf_t;
+
+/* api_shared.c prototypes */
+void vl_msg_api_handler (void *the_msg);
+void vl_msg_api_handler_no_free (void *the_msg);
+void vl_msg_api_handler_no_trace_no_free (void *the_msg);
+void vl_msg_api_trace_only (void *the_msg);
+void vl_msg_api_cleanup_handler (void *the_msg);
+void vl_msg_api_replay_handler (void *the_msg);
+void vl_msg_api_socket_handler (void *the_msg);
+void vl_msg_api_set_handlers (int msg_id, char *msg_name,
+			      void *handler,
+			      void *cleanup,
+			      void *endian,
+			      void *print, int msg_size, int traced);
+void vl_msg_api_config (vl_msg_api_msg_config_t *);
+void vl_msg_api_set_cleanup_handler (int msg_id, void *fp);
+void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q);
+
+void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
+void vl_msg_api_barrier_release (void) __attribute__ ((weak));
+void vl_msg_api_free (void *);
+void vl_noop_handler (void *mp);
+void vl_msg_api_increment_missing_client_counter (void);
+void vl_msg_api_post_mortem_dump (void);
+void vl_msg_api_post_mortem_dump_enable_disable (int enable);
+void vl_msg_api_register_pd_handler (void *handler,
+				     u16 msg_id_host_byte_order);
+int vl_msg_api_pd_handler (void *mp, int rv);
+
+void vl_msg_api_set_first_available_msg_id (u16 first_avail);
+u16 vl_msg_api_get_msg_ids (const char *name, int n);
+u32 vl_api_get_msg_index (u8 * name_and_crc);
+
+typedef clib_error_t *(vl_msg_api_init_function_t) (u32 client_index);
+
+typedef struct _vl_msg_api_init_function_list_elt
+{
+  struct _vl_msg_api_init_function_list_elt *next_init_function;
+  vl_msg_api_init_function_t *f;
+} _vl_msg_api_function_list_elt_t;
+
+typedef struct
+{
+  void (**msg_handlers) (void *);
+  int (**pd_msg_handlers) (void *, int);
+  void (**msg_cleanup_handlers) (void *);
+  void (**msg_endian_handlers) (void *);
+  void (**msg_print_handlers) (void *, void *);
+  const char **msg_names;
+  u8 *message_bounce;
+  u8 *is_mp_safe;
+  struct ring_alloc_ *arings;
+  u32 ring_misses;
+  u32 garbage_collects;
+  u32 missing_clients;
+  vl_api_trace_t *rx_trace;
+  vl_api_trace_t *tx_trace;
+  int msg_print_flag;
+  trace_cfg_t *api_trace_cfg;
+  int our_pid;
+  svm_region_t *vlib_rp;
+  svm_region_t **mapped_shmem_regions;
+  struct vl_shmem_hdr_ *shmem_hdr;
+  vl_api_registration_t **vl_clients;
+
+  u8 *serialized_message_table_in_shmem;
+
+  /* For plugin msg allocator */
+  u16 first_available_msg_id;
+
+  /* message range by name hash */
+  uword *msg_range_by_name;
+
+  /* vector of message ranges */
+  vl_api_msg_range_t *msg_ranges;
+
+  /* uid for the api shared memory region */
+  int api_uid;
+  /* gid for the api shared memory region */
+  int api_gid;
+
+  /* base virtual address for global VM region */
+  u64 global_baseva;
+
+  /* size of the global VM region */
+  u64 global_size;
+
+  /* size of the API region */
+  u64 api_size;
+
+  /* size of the global VM private mheap */
+  u64 global_pvt_heap_size;
+
+  /* size of the api private mheap */
+  u64 api_pvt_heap_size;
+
+  /* Client-only data structures */
+  unix_shared_memory_queue_t *vl_input_queue;
+
+  /*
+   * All VLIB-side message handlers use my_client_index to identify
+   * the queue / client. This works in sim replay.
+   */
+  int my_client_index;
+  /*
+   * This is the (shared VM) address of the registration,
+   * don't use it to id the connection since it can't possibly
+   * work in simulator replay.
+   */
+  vl_api_registration_t *my_registration;
+
+  i32 vlib_signal;
+
+  /* vlib input queue length */
+  u32 vlib_input_queue_length;
+
+  /* client side message index hash table */
+  uword *msg_index_by_name_and_crc;
+
+  const char *region_name;
+  const char *root_path;
+
+  /* Replay in progress? */
+  int replay_in_progress;
+
+  /* List of API client reaper functions */
+  _vl_msg_api_function_list_elt_t *reaper_function_registrations;
+
+} api_main_t;
+
+extern api_main_t api_main;
+
+
+#endif /* included_api_common_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h
index c195e181..2a1438fd 100644
--- a/src/vlibmemory/api.h
+++ b/src/vlibmemory/api.h
@@ -20,81 +20,12 @@
 #ifndef included_vlibmemory_api_h
 #define included_vlibmemory_api_h
 
-#include <vppinfra/error.h>
 #include <svm/svm.h>
 #include <vlib/vlib.h>
 #include <vlibmemory/unix_shared_memory_queue.h>
 #include <vlib/unix/unix.h>
 #include <vlibapi/api.h>
-
-/* Allocated in shared memory */
-
-/*
- * Ring-allocation scheme for client API messages
- *
- * Only one proc/thread has control of a given message buffer.
- * To free a buffer allocated from one of these rings, we clear
- * a field in the buffer (header), and leave.
- *
- * No locks, no hits, no errors...
- */
-typedef struct ring_alloc_
-{
-  unix_shared_memory_queue_t *rp;
-  u16 size;
-  u16 nitems;
-  u32 hits;
-  u32 misses;
-} ring_alloc_t;
-
-/*
- * Initializers for the (shared-memory) rings
- * _(size, n). Note: each msg has an 8 byte header.
- * Might want to change that to an index sometime.
- */
-#define foreach_vl_aring_size                   \
-_(64+8, 1024)                                   \
-_(256+8, 128)                                   \
-_(1024+8, 64)
-
-#define foreach_clnt_aring_size                 \
-_(1024+8, 1024)                                 \
-_(2048+8, 128)                                  \
-_(4096+8, 8)
-
-typedef struct vl_shmem_hdr_
-{
-  int version;
-
-  /* getpid () for the VLIB client process */
-  volatile int vl_pid;
-
-  /* Client sends VLIB msgs here. */
-  unix_shared_memory_queue_t *vl_input_queue;
-
-  /* Vector of rings; one for each size. */
-
-  /* VLIB allocates buffers to send msgs to clients here. */
-  ring_alloc_t *vl_rings;
-
-  /* Clients allocate buffer to send msgs to VLIB here. */
-  ring_alloc_t *client_rings;
-
-  /* Number of detected application restarts */
-  u32 application_restarts;
-
-  /* Number of messages reclaimed during application restart */
-  u32 restart_reclaims;
-
-  /* Number of garbage-collected messages */
-  u32 garbage_collects;
-
-} vl_shmem_hdr_t;
-
-#define VL_SHM_VERSION 2
-
-#define VL_API_EPOCH_MASK 0xFF
-#define VL_API_EPOCH_SHIFT 8
+#include <vlibmemory/api_common.h>
 
 static inline u32
 vl_msg_api_handle_get_epoch (u32 index)
@@ -118,43 +49,7 @@ vl_msg_api_handle_from_index_and_epoch (u32 index, u32 epoch)
   return handle;
 }
 
-void *vl_msg_api_alloc (int nbytes);
-void *vl_msg_api_alloc_or_null (int nbytes);
-void *vl_msg_api_alloc_as_if_client (int nbytes);
-void *vl_msg_api_alloc_as_if_client_or_null (int nbytes);
-void vl_msg_api_free (void *a);
-int vl_map_shmem (const char *region_name, int is_vlib);
-void vl_register_mapped_shmem_region (svm_region_t * rp);
-void vl_unmap_shmem (void);
-void vl_msg_api_send_shmem (unix_shared_memory_queue_t * q, u8 * elem);
-void vl_msg_api_send_shmem_nolock (unix_shared_memory_queue_t * q, u8 * elem);
-void vl_msg_api_send (vl_api_registration_t * rp, u8 * elem);
-int vl_client_connect (const char *name, int ctx_quota, int input_queue_size);
-void vl_client_disconnect (void);
-unix_shared_memory_queue_t *vl_api_client_index_to_input_queue (u32 index);
-vl_api_registration_t *vl_api_client_index_to_registration (u32 index);
-int vl_client_api_map (const char *region_name);
-void vl_client_api_unmap (void);
-void vl_set_memory_region_name (const char *name);
-void vl_set_memory_root_path (const char *root_path);
-void vl_set_memory_uid (int uid);
-void vl_set_memory_gid (int gid);
-void vl_set_global_memory_baseva (u64 baseva);
-void vl_set_global_memory_size (u64 size);
-void vl_set_api_memory_size (u64 size);
-void vl_set_global_pvt_heap_size (u64 size);
-void vl_set_api_pvt_heap_size (u64 size);
 void vl_enable_disable_memory_api (vlib_main_t * vm, int yesno);
-void vl_client_disconnect_from_vlib (void);
-int vl_client_connect_to_vlib (const char *svm_name,
-			       const char *client_name, int rx_queue_size);
-int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name,
-					     const char *client_name,
-					     int rx_queue_size);
-u16 vl_client_get_first_plugin_msg_id (const char *plugin_name);
-
-void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
-
 #endif /* included_vlibmemory_api_h */
 
 /*
diff --git a/src/vlibmemory/api_common.h b/src/vlibmemory/api_common.h
new file mode 100644
index 00000000..53909cc4
--- /dev/null
+++ b/src/vlibmemory/api_common.h
@@ -0,0 +1,138 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef included_vlibmemory_api_common_h
+#define included_vlibmemory_api_common_h
+
+#include <svm/svm_common.h>
+#include <vlibapi/api_common.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+
+/* Allocated in shared memory */
+
+/*
+ * Ring-allocation scheme for client API messages
+ *
+ * Only one proc/thread has control of a given message buffer.
+ * To free a buffer allocated from one of these rings, we clear
+ * a field in the buffer (header), and leave.
+ *
+ * No locks, no hits, no errors...
+ */
+typedef struct ring_alloc_
+{
+  unix_shared_memory_queue_t *rp;
+  u16 size;
+  u16 nitems;
+  u32 hits;
+  u32 misses;
+} ring_alloc_t;
+
+/*
+ * Initializers for the (shared-memory) rings
+ * _(size, n). Note: each msg has an 8 byte header.
+ * Might want to change that to an index sometime.
+ */
+#define foreach_vl_aring_size                   \
+_(64+8, 1024)                                   \
+_(256+8, 128)                                   \
+_(1024+8, 64)
+
+#define foreach_clnt_aring_size                 \
+_(1024+8, 1024)                                 \
+_(2048+8, 128)                                  \
+_(4096+8, 8)
+
+typedef struct vl_shmem_hdr_
+{
+  int version;
+
+  /* getpid () for the VLIB client process */
+  volatile int vl_pid;
+
+  /* Client sends VLIB msgs here. */
+  unix_shared_memory_queue_t *vl_input_queue;
+
+  /* Vector of rings; one for each size. */
+
+  /* VLIB allocates buffers to send msgs to clients here. */
+  ring_alloc_t *vl_rings;
+
+  /* Clients allocate buffer to send msgs to VLIB here. */
+  ring_alloc_t *client_rings;
+
+  /* Number of detected application restarts */
+  u32 application_restarts;
+
+  /* Number of messages reclaimed during application restart */
+  u32 restart_reclaims;
+
+  /* Number of garbage-collected messages */
+  u32 garbage_collects;
+
+} vl_shmem_hdr_t;
+
+#define VL_SHM_VERSION 2
+
+#define VL_API_EPOCH_MASK 0xFF
+#define VL_API_EPOCH_SHIFT 8
+
+void *vl_msg_api_alloc (int nbytes);
+void *vl_msg_api_alloc_or_null (int nbytes);
+void *vl_msg_api_alloc_as_if_client (int nbytes);
+void *vl_msg_api_alloc_as_if_client_or_null (int nbytes);
+void vl_msg_api_free (void *a);
+int vl_map_shmem (const char *region_name, int is_vlib);
+void vl_register_mapped_shmem_region (svm_region_t * rp);
+void vl_unmap_shmem (void);
+void vl_msg_api_send_shmem (unix_shared_memory_queue_t * q, u8 * elem);
+void vl_msg_api_send_shmem_nolock (unix_shared_memory_queue_t * q, u8 * elem);
+void vl_msg_api_send (vl_api_registration_t * rp, u8 * elem);
+int vl_client_connect (const char *name, int ctx_quota, int input_queue_size);
+void vl_client_disconnect (void);
+unix_shared_memory_queue_t *vl_api_client_index_to_input_queue (u32 index);
+vl_api_registration_t *vl_api_client_index_to_registration (u32 index);
+int vl_client_api_map (const char *region_name);
+void vl_client_api_unmap (void);
+void vl_set_memory_region_name (const char *name);
+void vl_set_memory_root_path (const char *root_path);
+void vl_set_memory_uid (int uid);
+void vl_set_memory_gid (int gid);
+void vl_set_global_memory_baseva (u64 baseva);
+void vl_set_global_memory_size (u64 size);
+void vl_set_api_memory_size (u64 size);
+void vl_set_global_pvt_heap_size (u64 size);
+void vl_set_api_pvt_heap_size (u64 size);
+void vl_client_disconnect_from_vlib (void);
+int vl_client_connect_to_vlib (const char *svm_name, const char *client_name,
+			       int rx_queue_size);
+int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name,
+					     const char *client_name,
+					     int rx_queue_size);
+u16 vl_client_get_first_plugin_msg_id (const char *plugin_name);
+
+void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+#endif /* included_vlibmemory_api_common_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra.am b/src/vppinfra.am
index ff2b8ea4..785445a6 100644
--- a/src/vppinfra.am
+++ b/src/vppinfra.am
@@ -167,6 +167,7 @@ nobase_include_HEADERS = \
   vppinfra/byte_order.h \
   vppinfra/cache.h \
   vppinfra/clib.h \
+  vppinfra/clib_error.h \
   vppinfra/cpu.h \
   vppinfra/crc32.h \
   vppinfra/dlist.h \
diff --git a/src/vppinfra/clib_error.h b/src/vppinfra/clib_error.h
new file mode 100644
index 00000000..45f18eb1
--- /dev/null
+++ b/src/vppinfra/clib_error.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_clib_error_h
+#define included_clib_error_h
+
+#include <vppinfra/types.h>
+
+typedef struct
+{
+  /* Error message. */
+  u8 *what;
+
+  /* Where error occurred (e.g. __FUNCTION__ __LINE__) */
+  const u8 *where;
+
+  uword flags;
+
+  /* Error code (e.g. errno for Unix errors). */
+  any code;
+} clib_error_t;
+
+#endif
diff --git a/src/vppinfra/error.h b/src/vppinfra/error.h
index e51f938e..e0e2d472 100644
--- a/src/vppinfra/error.h
+++ b/src/vppinfra/error.h
@@ -72,19 +72,7 @@ void clib_error_register_handler (clib_error_handler_func_t func, void *arg);
 #define clib_panic(format,args...) \
   _clib_error (CLIB_ERROR_ABORT, (char *) clib_error_function, __LINE__, format, ## args)
 
-typedef struct
-{
-  /* Error message. */
-  u8 *what;
-
-  /* Where error occurred (e.g. __FUNCTION__ __LINE__) */
-  const u8 *where;
-
-  uword flags;
-
-  /* Error code (e.g. errno for Unix errors). */
-  any code;
-} clib_error_t;
+#include <vppinfra/clib_error.h>
 
 #define clib_error_get_code(err) ((err) ? (err)->code : 0)
 #define clib_error_set_code(err, c)		\
-- 
cgit 1.2.3-korg


From 52851e6aa9304054fd1059c8dd284abf8e532bf2 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Mon, 7 Aug 2017 09:35:25 -0400
Subject: TCP proxy prototype

- Clean up internal API client registration
- Add proxy server
- Add a reference count to the svm fifo

Change-Id: I5ace1c85497062ed412d26ae76a9e6741af1e984
Signed-off-by: Dave Barach <dave@barachs.net>
Signed-off-by: Florin Coras <fcoras@cisco.com>
---
 src/svm/svm_fifo.c                       |  10 +-
 src/svm/svm_fifo.h                       |   1 +
 src/svm/svm_fifo_segment.c               |   5 +
 src/vlibmemory/api_common.h              |   1 +
 src/vlibmemory/memory_vlib.c             |  38 ++
 src/vnet.am                              |   1 +
 src/vnet/session/application.c           |   6 +
 src/vnet/session/application.h           |   1 +
 src/vnet/session/application_interface.h |   4 +
 src/vnet/session/session.c               |  43 ++-
 src/vnet/session/session.h               |   8 +-
 src/vnet/session/stream_session.h        |   5 +-
 src/vnet/tcp/builtin_client.c            |  95 +----
 src/vnet/tcp/builtin_proxy.c             | 599 +++++++++++++++++++++++++++++++
 src/vnet/tcp/builtin_proxy.h             | 100 ++++++
 src/vnet/tcp/builtin_server.c            |  96 +----
 16 files changed, 801 insertions(+), 212 deletions(-)
 create mode 100644 src/vnet/tcp/builtin_proxy.c
 create mode 100644 src/vnet/tcp/builtin_proxy.h

(limited to 'src/vlibmemory')

diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c
index fc2189c5..e478c06e 100644
--- a/src/svm/svm_fifo.c
+++ b/src/svm/svm_fifo.c
@@ -201,14 +201,20 @@ svm_fifo_create (u32 data_size_in_bytes)
   memset (f, 0, sizeof (*f));
   f->nitems = data_size_in_bytes;
   f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX;
+  f->refcnt = 1;
   return (f);
 }
 
 void
 svm_fifo_free (svm_fifo_t * f)
 {
-  pool_free (f->ooo_segments);
-  clib_mem_free (f);
+  ASSERT (f->refcnt > 0);
+
+  if (--f->refcnt == 0)
+    {
+      pool_free (f->ooo_segments);
+      clib_mem_free (f);
+    }
 }
 
 always_inline ooo_segment_t *
diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h
index a83cd858..f10b4d91 100644
--- a/src/svm/svm_fifo.h
+++ b/src/svm/svm_fifo.h
@@ -75,6 +75,7 @@ typedef struct _svm_fifo
 #if SVM_FIFO_TRACE
   svm_fifo_trace_elem_t *trace;
 #endif
+  i8 refcnt;
     CLIB_CACHE_LINE_ALIGN_MARK (data);
 } svm_fifo_t;
 
diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c
index 86661e51..c04b9d8c 100644
--- a/src/svm/svm_fifo_segment.c
+++ b/src/svm/svm_fifo_segment.c
@@ -296,6 +296,7 @@ svm_fifo_segment_alloc_fifo (svm_fifo_segment_private_t * s,
 	  memset (f, 0, sizeof (*f));
 	  f->nitems = data_size_in_bytes;
 	  f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX;
+	  f->refcnt = 1;
 	  goto found;
 	}
       /* FALLTHROUGH */
@@ -344,6 +345,10 @@ svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f,
   svm_fifo_segment_header_t *fsh;
   void *oldheap;
 
+  ASSERT (f->refcnt > 0);
+
+  if (--f->refcnt > 0)
+    return;
 
   sh = s->ssvm.sh;
   fsh = (svm_fifo_segment_header_t *) sh->opaque[0];
diff --git a/src/vlibmemory/api_common.h b/src/vlibmemory/api_common.h
index 53909cc4..19daecdf 100644
--- a/src/vlibmemory/api_common.h
+++ b/src/vlibmemory/api_common.h
@@ -126,6 +126,7 @@ int vl_client_connect_to_vlib_no_rx_pthread (const char *svm_name,
 u16 vl_client_get_first_plugin_msg_id (const char *plugin_name);
 
 void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+u32 vl_api_memclnt_create_internal (char *, unix_shared_memory_queue_t *);
 
 #endif /* included_vlibmemory_api_common_h */
 
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 004a9974..688ce604 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -136,6 +136,44 @@ vl_api_serialize_message_table (api_main_t * am, u8 * vector)
   return serialize_close_vector (sm);
 }
 
+/*
+ * vl_api_memclnt_create_internal
+ */
+
+u32
+vl_api_memclnt_create_internal (char *name, unix_shared_memory_queue_t * q)
+{
+  vl_api_registration_t **regpp;
+  vl_api_registration_t *regp;
+  svm_region_t *svm;
+  void *oldheap;
+  api_main_t *am = &api_main;
+
+  ASSERT (vlib_get_thread_index () == 0);
+  pool_get (am->vl_clients, regpp);
+
+  svm = am->vlib_rp;
+
+  pthread_mutex_lock (&svm->mutex);
+  oldheap = svm_push_data_heap (svm);
+  *regpp = clib_mem_alloc (sizeof (vl_api_registration_t));
+
+  regp = *regpp;
+  memset (regp, 0, sizeof (*regp));
+  regp->registration_type = REGISTRATION_TYPE_SHMEM;
+  regp->vl_api_registration_pool_index = regpp - am->vl_clients;
+
+  regp->vl_input_queue = q;
+  regp->name = format (0, "%s%c", name, 0);
+
+  pthread_mutex_unlock (&svm->mutex);
+  svm_pop_heap (oldheap);
+  return vl_msg_api_handle_from_index_and_epoch
+    (regp->vl_api_registration_pool_index,
+     am->shmem_hdr->application_restarts);
+}
+
+
 /*
  * vl_api_memclnt_create_t_handler
  */
diff --git a/src/vnet.am b/src/vnet.am
index ad84c028..ede0376d 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -475,6 +475,7 @@ libvnet_la_SOURCES +=				\
  vnet/tcp/builtin_client.c			\
  vnet/tcp/builtin_server.c			\
  vnet/tcp/builtin_http_server.c			\
+ vnet/tcp/builtin_proxy.c			\
  vnet/tcp/tcp_test.c				\
  vnet/tcp/tcp.c
 
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index 076c66f1..78c41b93 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -367,6 +367,12 @@ app_get_name_from_reg_index (application_t * app)
   return app_name;
 }
 
+int
+application_is_proxy (application_t * app)
+{
+  return !(app->flags & APP_OPTIONS_FLAGS_IS_PROXY);
+}
+
 u8 *
 format_application_listener (u8 * s, va_list * args)
 {
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index 35caae85..29d37a06 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -116,6 +116,7 @@ segment_manager_t *application_get_listen_segment_manager (application_t *
 							   s);
 segment_manager_t *application_get_connect_segment_manager (application_t *
 							    app);
+int application_is_proxy (application_t * app);
 
 #endif /* SRC_VNET_SESSION_APPLICATION_H_ */
 
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index ed9f89b3..1d63f6cc 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -100,6 +100,9 @@ typedef struct _vnet_connect_args
 
   /* Used for redirects */
   void *mp;
+
+  /* used for proxy connections */
+  u64 server_handle;
 } vnet_connect_args_t;
 
 typedef struct _vnet_disconnect_args_t
@@ -129,6 +132,7 @@ typedef enum
   _(USE_FIFO, "Use FIFO with redirects")			\
   _(ADD_SEGMENT, "Add segment and signal app if needed")	\
   _(BUILTIN_APP, "Application is builtin")			\
+  _(IS_PROXY, "Application is proxying")
 
 typedef enum _app_options
 {
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 4ba15291..991bcd5a 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -30,7 +30,7 @@ extern transport_proto_vft_t *tp_vfts;
 
 int
 stream_session_create_i (segment_manager_t * sm, transport_connection_t * tc,
-			 stream_session_t ** ret_s)
+			 u8 alloc_fifos, stream_session_t ** ret_s)
 {
   session_manager_main_t *smm = &session_manager_main;
   svm_fifo_t *server_rx_fifo = 0, *server_tx_fifo = 0;
@@ -43,31 +43,37 @@ stream_session_create_i (segment_manager_t * sm, transport_connection_t * tc,
 
   ASSERT (thread_index == vlib_get_thread_index ());
 
-  if ((rv = segment_manager_alloc_session_fifos (sm, &server_rx_fifo,
-						 &server_tx_fifo,
-						 &fifo_segment_index)))
-    return rv;
-
   /* Create the session */
   pool_get_aligned (smm->sessions[thread_index], s, CLIB_CACHE_LINE_BYTES);
   memset (s, 0, sizeof (*s));
-
-  /* Initialize backpointers */
   pool_index = s - smm->sessions[thread_index];
-  server_rx_fifo->master_session_index = pool_index;
-  server_rx_fifo->master_thread_index = thread_index;
 
-  server_tx_fifo->master_session_index = pool_index;
-  server_tx_fifo->master_thread_index = thread_index;
+  /* Allocate fifos */
+  if (alloc_fifos)
+    {
+      if ((rv = segment_manager_alloc_session_fifos (sm, &server_rx_fifo,
+						     &server_tx_fifo,
+						     &fifo_segment_index)))
+	{
+	  pool_put (smm->sessions[thread_index], s);
+	  return rv;
+	}
+      /* Initialize backpointers */
+      server_rx_fifo->master_session_index = pool_index;
+      server_rx_fifo->master_thread_index = thread_index;
+
+      server_tx_fifo->master_session_index = pool_index;
+      server_tx_fifo->master_thread_index = thread_index;
 
-  s->server_rx_fifo = server_rx_fifo;
-  s->server_tx_fifo = server_tx_fifo;
+      s->server_rx_fifo = server_rx_fifo;
+      s->server_tx_fifo = server_tx_fifo;
+      s->svm_segment_index = fifo_segment_index;
+    }
 
   /* Initialize state machine, such as it is... */
   s->session_type = session_type_from_proto_and_ip (tc->transport_proto,
 						    tc->is_ip4);
   s->session_state = SESSION_STATE_CONNECTING;
-  s->svm_segment_index = fifo_segment_index;
   s->thread_index = thread_index;
   s->session_index = pool_index;
 
@@ -379,10 +385,11 @@ stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
   if (!is_fail)
     {
       segment_manager_t *sm;
+      u8 alloc_fifos;
       sm = application_get_connect_segment_manager (app);
-
+      alloc_fifos = application_is_proxy (app);
       /* Create new session (svm segments are allocated if needed) */
-      if (stream_session_create_i (sm, tc, &new_s))
+      if (stream_session_create_i (sm, tc, alloc_fifos, &new_s))
 	{
 	  is_fail = 1;
 	  error = -1;
@@ -515,7 +522,7 @@ stream_session_accept (transport_connection_t * tc, u32 listener_index,
   server = application_get (listener->app_index);
 
   sm = application_get_listen_segment_manager (server, listener);
-  if ((rv = stream_session_create_i (sm, tc, &s)))
+  if ((rv = stream_session_create_i (sm, tc, 1, &s)))
     return rv;
 
   s->app_index = server->index;
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index 538433da..74d82a40 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -183,10 +183,10 @@ stream_session_is_valid (u32 si, u8 thread_index)
   stream_session_t *s;
   s = pool_elt_at_index (session_manager_main.sessions[thread_index], si);
   if (s->thread_index != thread_index || s->session_index != si
-      || s->server_rx_fifo->master_session_index != si
-      || s->server_tx_fifo->master_session_index != si
-      || s->server_rx_fifo->master_thread_index != thread_index
-      || s->server_tx_fifo->master_thread_index != thread_index)
+      /* || s->server_rx_fifo->master_session_index != si
+         || s->server_tx_fifo->master_session_index != si
+         || s->server_rx_fifo->master_thread_index != thread_index
+         || s->server_tx_fifo->master_thread_index != thread_index */ )
     return 0;
   return 1;
 }
diff --git a/src/vnet/session/stream_session.h b/src/vnet/session/stream_session.h
index 82bbf521..4c263211 100644
--- a/src/vnet/session/stream_session.h
+++ b/src/vnet/session/stream_session.h
@@ -83,8 +83,11 @@ typedef struct _stream_session_t
 
   u32 opaque2;
 
+  /** connected (server) session handle */
+  u64 server_session_handle;
+
   /** Opaque, pad to a 64-octet boundary */
-  u64 opaque[2];
+  u64 opaque[1];
 } stream_session_t;
 
 #endif /* SRC_VNET_SESSION_STREAM_SESSION_H_ */
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index 0cb9faa8..938e07ba 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -24,25 +24,6 @@
 #include <vlibsocket/api.h>
 #include <vpp/app/version.h>
 
-/* define message IDs */
-#include <vpp/api/vpe_msg_enum.h>
-
-/* define message structures */
-#define vl_typedefs
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
-
-/* define generated endian-swappers */
-#define vl_endianfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_endianfun
-
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define vl_printfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_printfun
-
 #define TCP_BUILTIN_CLIENT_DBG (0)
 
 static void
@@ -308,87 +289,16 @@ VLIB_REGISTER_NODE (builtin_client_node) =
 };
 /* *INDENT-ON* */
 
-/* So we don't get "no handler for... " msgs */
-static void
-vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
-{
-  vlib_main_t *vm = vlib_get_main ();
-  tclient_main_t *tm = &tclient_main;
-  tm->my_client_index = mp->index;
-  vlib_process_signal_event (vm, tm->cli_node_index, 1 /* evt */ ,
-			     0 /* data */ );
-}
-
 static int
 create_api_loopback (tclient_main_t * tm)
 {
-  vlib_main_t *vm = vlib_get_main ();
-  vl_api_memclnt_create_t _m, *mp = &_m;
-  extern void vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t *);
   api_main_t *am = &api_main;
   vl_shmem_hdr_t *shmem_hdr;
-  uword *event_data = 0, event_type;
-  int resolved = 0;
-
-  /*
-   * Create a "loopback" API client connection
-   * Don't do things like this unless you know what you're doing...
-   */
 
   shmem_hdr = am->shmem_hdr;
   tm->vl_input_queue = shmem_hdr->vl_input_queue;
-  memset (mp, 0, sizeof (*mp));
-  mp->_vl_msg_id = VL_API_MEMCLNT_CREATE;
-  mp->context = 0xFEEDFACE;
-  mp->input_queue = pointer_to_uword (tm->vl_input_queue);
-  strncpy ((char *) mp->name, "tcp_clients_tester", sizeof (mp->name) - 1);
-
-  vl_api_memclnt_create_t_handler (mp);
-
-  /* Wait for reply */
-  vlib_process_wait_for_event_or_clock (vm, 1.0);
-  event_type = vlib_process_get_events (vm, &event_data);
-  switch (event_type)
-    {
-    case 1:
-      resolved = 1;
-      break;
-    case ~0:
-      /* timed out */
-      break;
-    default:
-      clib_warning ("unknown event_type %d", event_type);
-    }
-  if (!resolved)
-    return -1;
-  return 0;
-}
-
-#define foreach_tclient_static_api_msg       	\
-_(MEMCLNT_CREATE_REPLY, memclnt_create_reply)   \
-
-static clib_error_t *
-tclient_api_hookup (vlib_main_t * vm)
-{
-  vl_msg_api_msg_config_t _c, *c = &_c;
-
-  /* Hook up client-side static APIs to our handlers */
-#define _(N,n) do {                                             \
-    c->id = VL_API_##N;                                         \
-    c->name = #n;                                               \
-    c->handler = vl_api_##n##_t_handler;                        \
-    c->cleanup = vl_noop_handler;                               \
-    c->endian = vl_api_##n##_t_endian;                          \
-    c->print = vl_api_##n##_t_print;                            \
-    c->size = sizeof(vl_api_##n##_t);                           \
-    c->traced = 1; /* trace, so these msgs print */             \
-    c->replay = 0; /* don't replay client create/delete msgs */ \
-    c->message_bounce = 0; /* don't bounce this message */	\
-    vl_msg_api_config(c);} while (0);
-
-  foreach_tclient_static_api_msg;
-#undef _
-
+  tm->my_client_index =
+    vl_api_memclnt_create_internal ("tcp_test_client", tm->vl_input_queue);
   return 0;
 }
 
@@ -400,7 +310,6 @@ tcp_test_clients_init (vlib_main_t * vm)
   u32 num_threads;
   int i;
 
-  tclient_api_hookup (vm);
   if (create_api_loopback (tm))
     return -1;
 
diff --git a/src/vnet/tcp/builtin_proxy.c b/src/vnet/tcp/builtin_proxy.c
new file mode 100644
index 00000000..d8cfb11d
--- /dev/null
+++ b/src/vnet/tcp/builtin_proxy.c
@@ -0,0 +1,599 @@
+/*
+* Copyright (c) 2015-2017 Cisco and/or its affiliates.
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/tcp/builtin_proxy.h>
+
+builtin_proxy_main_t builtin_proxy_main;
+
+static void
+delete_proxy_session (stream_session_t * s, int is_active_open)
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  proxy_session_t *ps = 0;
+  vnet_disconnect_args_t _a, *a = &_a;
+  stream_session_t *active_open_session = 0;
+  stream_session_t *server_session = 0;
+  uword *p;
+  u64 handle;
+
+  handle = stream_session_handle (s);
+
+  clib_spinlock_lock_if_init (&bpm->sessions_lock);
+  if (is_active_open)
+    {
+      active_open_session = s;
+
+      p = hash_get (bpm->proxy_session_by_active_open_handle, handle);
+      if (p == 0)
+	{
+	  clib_warning ("proxy session for %s handle %lld (%llx) AWOL",
+			is_active_open ? "active open" : "server",
+			handle, handle);
+	}
+      else
+	{
+	  ps = pool_elt_at_index (bpm->sessions, p[0]);
+	  if (ps->vpp_server_handle != ~0)
+	    server_session = stream_session_get_from_handle
+	      (ps->vpp_server_handle);
+	  else
+	    server_session = 0;
+	}
+    }
+  else
+    {
+      server_session = s;
+
+      p = hash_get (bpm->proxy_session_by_server_handle, handle);
+      if (p == 0)
+	{
+	  clib_warning ("proxy session for %s handle %lld (%llx) AWOL",
+			is_active_open ? "active open" : "server",
+			handle, handle);
+	}
+      else
+	{
+	  ps = pool_elt_at_index (bpm->sessions, p[0]);
+	  if (ps->vpp_server_handle != ~0)
+	    active_open_session = stream_session_get_from_handle
+	      (ps->vpp_server_handle);
+	  else
+	    active_open_session = 0;
+	}
+    }
+
+  if (ps)
+    {
+      if (CLIB_DEBUG > 0)
+	memset (ps, 0xFE, sizeof (*ps));
+      pool_put (bpm->sessions, ps);
+    }
+
+  clib_spinlock_unlock_if_init (&bpm->sessions_lock);
+
+  if (active_open_session)
+    {
+      a->handle = stream_session_handle (active_open_session);
+      a->app_index = bpm->active_open_app_index;
+      hash_unset (bpm->proxy_session_by_active_open_handle,
+		  stream_session_handle (active_open_session));
+      vnet_disconnect_session (a);
+    }
+
+  if (server_session)
+    {
+      a->handle = stream_session_handle (server_session);
+      a->app_index = bpm->server_app_index;
+      hash_unset (bpm->proxy_session_by_server_handle,
+		  stream_session_handle (server_session));
+      vnet_disconnect_session (a);
+    }
+}
+
+static int
+server_accept_callback (stream_session_t * s)
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+
+  s->session_state = SESSION_STATE_READY;
+
+  clib_spinlock_lock_if_init (&bpm->sessions_lock);
+
+  return 0;
+}
+
+static void
+server_disconnect_callback (stream_session_t * s)
+{
+  delete_proxy_session (s, 0 /* is_active_open */ );
+}
+
+static void
+server_reset_callback (stream_session_t * s)
+{
+  clib_warning ("Reset session %U", format_stream_session, s, 2);
+  delete_proxy_session (s, 0 /* is_active_open */ );
+}
+
+static int
+server_connected_callback (u32 app_index, u32 api_context,
+			   stream_session_t * s, u8 is_fail)
+{
+  clib_warning ("called...");
+  return -1;
+}
+
+static int
+server_add_segment_callback (u32 client_index,
+			     const u8 * seg_name, u32 seg_size)
+{
+  clib_warning ("called...");
+  return -1;
+}
+
+static int
+server_redirect_connect_callback (u32 client_index, void *mp)
+{
+  clib_warning ("called...");
+  return -1;
+}
+
+static int
+server_rx_callback (stream_session_t * s)
+{
+  u32 max_dequeue;
+  int actual_transfer __attribute__ ((unused));
+  svm_fifo_t *tx_fifo, *rx_fifo;
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  u32 thread_index = vlib_get_thread_index ();
+  vnet_connect_args_t _a, *a = &_a;
+  proxy_session_t *ps;
+  int proxy_index;
+  uword *p;
+  svm_fifo_t *active_open_tx_fifo;
+  session_fifo_event_t evt;
+
+  ASSERT (s->thread_index == thread_index);
+
+  clib_spinlock_lock_if_init (&bpm->sessions_lock);
+  p =
+    hash_get (bpm->proxy_session_by_server_handle, stream_session_handle (s));
+
+  if (PREDICT_TRUE (p != 0))
+    {
+      clib_spinlock_unlock_if_init (&bpm->sessions_lock);
+      active_open_tx_fifo = s->server_rx_fifo;
+
+      /*
+       * Send event for active open tx fifo
+       */
+      if (svm_fifo_set_event (active_open_tx_fifo))
+	{
+	  evt.fifo = active_open_tx_fifo;
+	  evt.event_type = FIFO_EVENT_APP_TX;
+	  if (unix_shared_memory_queue_add
+	      (bpm->active_open_event_queue[thread_index], (u8 *) & evt,
+	       0 /* do wait for mutex */ ))
+	    clib_warning ("failed to enqueue tx evt");
+	}
+    }
+  else
+    {
+      rx_fifo = s->server_rx_fifo;
+      tx_fifo = s->server_tx_fifo;
+
+      ASSERT (rx_fifo->master_thread_index == thread_index);
+      ASSERT (tx_fifo->master_thread_index == thread_index);
+
+      max_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo);
+
+      if (PREDICT_FALSE (max_dequeue == 0))
+	return 0;
+
+      actual_transfer = svm_fifo_peek (rx_fifo, 0 /* relative_offset */ ,
+				       max_dequeue,
+				       bpm->rx_buf[thread_index]);
+
+      /* $$$ your message in this space: parse url, etc. */
+
+      memset (a, 0, sizeof (*a));
+
+      clib_spinlock_lock_if_init (&bpm->sessions_lock);
+      pool_get (bpm->sessions, ps);
+      memset (ps, 0, sizeof (*ps));
+      ps->server_rx_fifo = rx_fifo;
+      ps->server_tx_fifo = tx_fifo;
+      ps->vpp_server_handle = stream_session_handle (s);
+
+      proxy_index = ps - bpm->sessions;
+
+      hash_set (bpm->proxy_session_by_server_handle, ps->vpp_server_handle,
+		proxy_index);
+
+      clib_spinlock_unlock_if_init (&bpm->sessions_lock);
+
+      a->uri = "tcp://6.0.2.2/23";
+      a->api_context = proxy_index;
+      a->app_index = bpm->active_open_app_index;
+      a->mp = 0;
+      vnet_connect_uri (a);
+    }
+
+  return 0;
+}
+
+static session_cb_vft_t builtin_session_cb_vft = {
+  .session_accept_callback = server_accept_callback,
+  .session_disconnect_callback = server_disconnect_callback,
+  .session_connected_callback = server_connected_callback,
+  .add_segment_callback = server_add_segment_callback,
+  .redirect_connect_callback = server_redirect_connect_callback,
+  .builtin_server_rx_callback = server_rx_callback,
+  .session_reset_callback = server_reset_callback
+};
+
+static int
+active_open_connected_callback (u32 app_index, u32 opaque,
+				stream_session_t * s, u8 is_fail)
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  proxy_session_t *ps;
+  u8 thread_index = vlib_get_thread_index ();
+  session_fifo_event_t evt;
+
+  if (is_fail)
+    {
+      clib_warning ("connection %d failed!", opaque);
+      return 0;
+    }
+
+  /*
+   * Setup proxy session handle.
+   */
+  clib_spinlock_lock_if_init (&bpm->sessions_lock);
+
+  ps = pool_elt_at_index (bpm->sessions, opaque);
+  ps->vpp_active_open_handle = stream_session_handle (s);
+
+  s->server_tx_fifo = ps->server_rx_fifo;
+  s->server_rx_fifo = ps->server_tx_fifo;
+
+  /*
+   * Reset the active-open tx-fifo master indices so the active-open session
+   * will receive data, etc.
+   */
+  s->server_tx_fifo->master_session_index = s->session_index;
+  s->server_tx_fifo->master_thread_index = s->thread_index;
+
+  /*
+   * Account for the active-open session's use of the fifos
+   * so they won't disappear until the last session which uses
+   * them disappears
+   */
+  s->server_tx_fifo->refcnt++;
+  s->server_rx_fifo->refcnt++;
+
+  hash_set (bpm->proxy_session_by_active_open_handle,
+	    ps->vpp_active_open_handle, opaque);
+
+  clib_spinlock_unlock_if_init (&bpm->sessions_lock);
+
+  /*
+   * Send event for active open tx fifo
+   */
+  if (svm_fifo_set_event (s->server_tx_fifo))
+    {
+      evt.fifo = s->server_tx_fifo;
+      evt.event_type = FIFO_EVENT_APP_TX;
+      if (unix_shared_memory_queue_add
+	  (bpm->active_open_event_queue[thread_index], (u8 *) & evt,
+	   0 /* do wait for mutex */ ))
+	clib_warning ("failed to enqueue tx evt");
+    }
+
+  return 0;
+}
+
+static void
+active_open_reset_callback (stream_session_t * s)
+{
+  delete_proxy_session (s, 1 /* is_active_open */ );
+}
+
+static int
+active_open_create_callback (stream_session_t * s)
+{
+  return 0;
+}
+
+static void
+active_open_disconnect_callback (stream_session_t * s)
+{
+  delete_proxy_session (s, 1 /* is_active_open */ );
+}
+
+static int
+active_open_rx_callback (stream_session_t * s)
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  session_fifo_event_t evt;
+  svm_fifo_t *server_rx_fifo;
+  u32 thread_index = vlib_get_thread_index ();
+
+  server_rx_fifo = s->server_rx_fifo;
+
+  /*
+   * Send event for server tx fifo
+   */
+  if (svm_fifo_set_event (server_rx_fifo))
+    {
+      evt.fifo = server_rx_fifo;
+      evt.event_type = FIFO_EVENT_APP_TX;
+      if (unix_shared_memory_queue_add
+	  (bpm->server_event_queue[thread_index], (u8 *) & evt,
+	   0 /* do wait for mutex */ ))
+	clib_warning ("failed to enqueue server rx evt");
+    }
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+static session_cb_vft_t builtin_clients = {
+  .session_reset_callback = active_open_reset_callback,
+  .session_connected_callback = active_open_connected_callback,
+  .session_accept_callback = active_open_create_callback,
+  .session_disconnect_callback = active_open_disconnect_callback,
+  .builtin_server_rx_callback = active_open_rx_callback
+};
+/* *INDENT-ON* */
+
+
+static void
+create_api_loopbacks (vlib_main_t * vm)
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  api_main_t *am = &api_main;
+  vl_shmem_hdr_t *shmem_hdr;
+
+  shmem_hdr = am->shmem_hdr;
+  bpm->vl_input_queue = shmem_hdr->vl_input_queue;
+  bpm->server_client_index =
+    vl_api_memclnt_create_internal ("proxy_server", bpm->vl_input_queue);
+  bpm->active_open_client_index =
+    vl_api_memclnt_create_internal ("proxy_active_open", bpm->vl_input_queue);
+}
+
+static int
+server_attach ()
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  u8 segment_name[128];
+  u64 options[SESSION_OPTIONS_N_OPTIONS];
+  vnet_app_attach_args_t _a, *a = &_a;
+
+  memset (a, 0, sizeof (*a));
+  memset (options, 0, sizeof (options));
+
+  a->api_client_index = bpm->server_client_index;
+  a->session_cb_vft = &builtin_session_cb_vft;
+  a->options = options;
+  a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 512 << 20;
+  a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = bpm->fifo_size;
+  a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = bpm->fifo_size;
+  a->options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = bpm->private_segment_count;
+  a->options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = bpm->private_segment_size;
+  a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] =
+    bpm->prealloc_fifos ? bpm->prealloc_fifos : 1;
+
+  a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+
+  a->segment_name = segment_name;
+  a->segment_name_length = ARRAY_LEN (segment_name);
+
+  if (vnet_application_attach (a))
+    {
+      clib_warning ("failed to attach server");
+      return -1;
+    }
+  bpm->server_app_index = a->app_index;
+
+  return 0;
+}
+
+static int
+active_open_attach (void)
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  vnet_app_attach_args_t _a, *a = &_a;
+  u8 segment_name[128];
+  u32 segment_name_length;
+  u64 options[16];
+
+  segment_name_length = ARRAY_LEN (segment_name);
+
+  memset (a, 0, sizeof (*a));
+  memset (options, 0, sizeof (options));
+
+  a->api_client_index = bpm->active_open_client_index;
+  a->segment_name = segment_name;
+  a->segment_name_length = segment_name_length;
+  a->session_cb_vft = &builtin_clients;
+
+  options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
+  options[SESSION_OPTIONS_SEGMENT_SIZE] = 512 << 20;
+  options[SESSION_OPTIONS_RX_FIFO_SIZE] = bpm->fifo_size;
+  options[SESSION_OPTIONS_TX_FIFO_SIZE] = bpm->fifo_size;
+  options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = bpm->private_segment_count;
+  options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = bpm->private_segment_size;
+  options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] =
+    bpm->prealloc_fifos ? bpm->prealloc_fifos : 1;
+
+  options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP
+    | APP_OPTIONS_FLAGS_IS_PROXY;
+
+  a->options = options;
+
+  if (vnet_application_attach (a))
+    return -1;
+
+  bpm->active_open_app_index = a->app_index;
+
+  return 0;
+}
+
+static int
+server_listen ()
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  vnet_bind_args_t _a, *a = &_a;
+  memset (a, 0, sizeof (*a));
+  a->app_index = bpm->server_app_index;
+  a->uri = "tcp://0.0.0.0/23";
+  return vnet_bind_uri (a);
+}
+
+static int
+server_create (vlib_main_t * vm)
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  u32 num_threads;
+  int i;
+
+  if (bpm->server_client_index == (u32) ~ 0)
+    create_api_loopbacks (vm);
+
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+  vec_validate (builtin_proxy_main.server_event_queue, num_threads - 1);
+  vec_validate (builtin_proxy_main.active_open_event_queue, num_threads - 1);
+  vec_validate (bpm->rx_buf, num_threads - 1);
+
+  for (i = 0; i < num_threads; i++)
+    vec_validate (bpm->rx_buf[i], bpm->rcv_buffer_size);
+
+  if (server_attach ())
+    {
+      clib_warning ("failed to attach server app");
+      return -1;
+    }
+  if (server_listen ())
+    {
+      clib_warning ("failed to start listening");
+      return -1;
+    }
+  if (active_open_attach ())
+    {
+      clib_warning ("failed to attach active open app");
+      return -1;
+    }
+
+  for (i = 0; i < num_threads; i++)
+    {
+      bpm->active_open_event_queue[i] =
+	session_manager_get_vpp_event_queue (i);
+
+      ASSERT (bpm->active_open_event_queue[i]);
+
+      bpm->server_event_queue[i] = session_manager_get_vpp_event_queue (i);
+    }
+
+  return 0;
+}
+
+static clib_error_t *
+proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+				vlib_cli_command_t * cmd)
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  int rv;
+  u32 tmp;
+
+  bpm->fifo_size = 64 << 10;
+  bpm->rcv_buffer_size = 1024;
+  bpm->prealloc_fifos = 0;
+  bpm->private_segment_count = 0;
+  bpm->private_segment_size = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "fifo-size %d", &bpm->fifo_size))
+	bpm->fifo_size <<= 10;
+      else if (unformat (input, "rcv-buf-size %d", &bpm->rcv_buffer_size))
+	;
+      else if (unformat (input, "prealloc-fifos %d", &bpm->prealloc_fifos))
+	;
+      else if (unformat (input, "private-segment-count %d",
+			 &bpm->private_segment_count))
+	;
+      else if (unformat (input, "private-segment-size %dm", &tmp))
+	bpm->private_segment_size = tmp << 20;
+      else if (unformat (input, "private-segment-size %dg", &tmp))
+	bpm->private_segment_size = tmp << 30;
+      else if (unformat (input, "private-segment-size %d", &tmp))
+	bpm->private_segment_size = tmp;
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+
+  vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+
+  rv = server_create (vm);
+  switch (rv)
+    {
+    case 0:
+      break;
+    default:
+      return clib_error_return (0, "server_create returned %d", rv);
+    }
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (server_create_command, static) =
+{
+  .path = "test proxy server",
+  .short_help = "test proxy server",
+  .function = proxy_server_create_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+builtin_tcp_proxy_main_init (vlib_main_t * vm)
+{
+  builtin_proxy_main_t *bpm = &builtin_proxy_main;
+  bpm->server_client_index = ~0;
+  bpm->active_open_client_index = ~0;
+  bpm->proxy_session_by_active_open_handle = hash_create (0, sizeof (uword));
+  bpm->proxy_session_by_server_handle = hash_create (0, sizeof (uword));
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (builtin_tcp_proxy_main_init);
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/tcp/builtin_proxy.h b/src/vnet/tcp/builtin_proxy.h
new file mode 100644
index 00000000..cf707a15
--- /dev/null
+++ b/src/vnet/tcp/builtin_proxy.h
@@ -0,0 +1,100 @@
+
+/*
+ * builtin_proxy.h - skeleton vpp engine plug-in header file
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_builtin_proxy_h__
+#define __included_builtin_proxy_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vlibmemory/unix_shared_memory_queue.h>
+#include <svm/svm_fifo_segment.h>
+#include <vnet/session/session.h>
+#include <vnet/session/application_interface.h>
+
+typedef struct
+{
+  svm_fifo_t *server_rx_fifo;
+  svm_fifo_t *server_tx_fifo;
+
+  u64 vpp_server_handle;
+  u64 vpp_active_open_handle;
+} proxy_session_t;
+
+typedef struct
+{
+  unix_shared_memory_queue_t *vl_input_queue;	/**< vpe input queue */
+  /** per-thread vectors */
+  unix_shared_memory_queue_t **server_event_queue;
+  unix_shared_memory_queue_t **active_open_event_queue;
+  u8 **rx_buf;				/**< intermediate rx buffers */
+
+  u32 cli_node_index;			/**< cli process node index */
+  u32 server_client_index;		/**< server API client handle */
+  u32 server_app_index;			/**< server app index */
+  u32 active_open_client_index;		/**< active open API client handle */
+  u32 active_open_app_index;		/**< active open index after attach */
+
+  uword *proxy_session_by_server_handle;
+  uword *proxy_session_by_active_open_handle;
+
+  /*
+   * Configuration params
+   */
+  u8 *connect_uri;			/**< URI for slave's connect */
+  u32 configured_segment_size;
+  u32 fifo_size;
+  u32 private_segment_count;		/**< Number of private fifo segs */
+  u32 private_segment_size;		/**< size of private fifo segs */
+  int rcv_buffer_size;
+
+  /*
+   * Test state variables
+   */
+  proxy_session_t *sessions;		/**< Session pool, shared */
+  clib_spinlock_t sessions_lock;
+  u32 **connection_index_by_thread;
+  pthread_t client_thread_handle;
+
+  /*
+   * Flags
+   */
+  u8 is_init;
+  u8 prealloc_fifos;		/**< Request fifo preallocation */
+
+  /*
+   * Convenience
+   */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+  ethernet_main_t *ethernet_main;
+} builtin_proxy_main_t;
+
+builtin_proxy_main_t builtin_proxy_main;
+
+#endif /* __included_builtin_proxy_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index 3416678e..2454a831 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -18,25 +18,6 @@
 #include <vnet/session/application.h>
 #include <vnet/session/application_interface.h>
 
-/* define message IDs */
-#include <vpp/api/vpe_msg_enum.h>
-
-/* define message structures */
-#define vl_typedefs
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
-
-/* define generated endian-swappers */
-#define vl_endianfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_endianfun
-
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define vl_printfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_printfun
-
 typedef struct
 {
   /*
@@ -279,46 +260,13 @@ static int
 create_api_loopback (vlib_main_t * vm)
 {
   builtin_server_main_t *bsm = &builtin_server_main;
-  vl_api_memclnt_create_t _m, *mp = &_m;
-  extern void vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t *);
   api_main_t *am = &api_main;
   vl_shmem_hdr_t *shmem_hdr;
-  uword *event_data = 0, event_type;
-  int resolved = 0;
-
-  /*
-   * Create a "loopback" API client connection
-   * Don't do things like this unless you know what you're doing...
-   */
 
   shmem_hdr = am->shmem_hdr;
   bsm->vl_input_queue = shmem_hdr->vl_input_queue;
-  memset (mp, 0, sizeof (*mp));
-  mp->_vl_msg_id = VL_API_MEMCLNT_CREATE;
-  mp->context = 0xFEEDFACE;
-  mp->input_queue = pointer_to_uword (bsm->vl_input_queue);
-  strncpy ((char *) mp->name, "tcp_test_server", sizeof (mp->name) - 1);
-
-  vl_api_memclnt_create_t_handler (mp);
-
-  /* Wait for reply */
-  bsm->node_index = vlib_get_current_process (vm)->node_runtime.node_index;
-  vlib_process_wait_for_event_or_clock (vm, 2.0);
-  event_type = vlib_process_get_events (vm, &event_data);
-  switch (event_type)
-    {
-    case 1:
-      resolved = 1;
-      break;
-    case ~0:
-      /* timed out */
-      break;
-    default:
-      clib_warning ("unknown event_type %d", event_type);
-    }
-  if (!resolved)
-    return -1;
-
+  bsm->my_client_index =
+    vl_api_memclnt_create_internal ("tcp_test_server", bsm->vl_input_queue);
   return 0;
 }
 
@@ -413,45 +361,6 @@ server_create (vlib_main_t * vm)
   return 0;
 }
 
-/* Get our api client index */
-static void
-vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
-{
-  vlib_main_t *vm = vlib_get_main ();
-  builtin_server_main_t *bsm = &builtin_server_main;
-  bsm->my_client_index = mp->index;
-  vlib_process_signal_event (vm, bsm->node_index, 1 /* evt */ ,
-			     0 /* data */ );
-}
-
-#define foreach_tcp_builtin_server_api_msg      		\
-_(MEMCLNT_CREATE_REPLY, memclnt_create_reply)   		\
-
-static clib_error_t *
-tcp_builtin_server_api_hookup (vlib_main_t * vm)
-{
-  vl_msg_api_msg_config_t _c, *c = &_c;
-
-  /* Hook up client-side static APIs to our handlers */
-#define _(N,n) do {                                             \
-    c->id = VL_API_##N;                                         \
-    c->name = #n;                                               \
-    c->handler = vl_api_##n##_t_handler;                        \
-    c->cleanup = vl_noop_handler;                               \
-    c->endian = vl_api_##n##_t_endian;                          \
-    c->print = vl_api_##n##_t_print;                            \
-    c->size = sizeof(vl_api_##n##_t);                           \
-    c->traced = 1; /* trace, so these msgs print */             \
-    c->replay = 0; /* don't replay client create/delete msgs */ \
-    c->message_bounce = 0; /* don't bounce this message */	\
-    vl_msg_api_config(c);} while (0);
-
-  foreach_tcp_builtin_server_api_msg;
-#undef _
-
-  return 0;
-}
-
 static clib_error_t *
 server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
 			  vlib_cli_command_t * cmd)
@@ -491,7 +400,6 @@ server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
 				  format_unformat_error, input);
     }
 
-  tcp_builtin_server_api_hookup (vm);
   vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
 
   rv = server_create (vm);
-- 
cgit 1.2.3-korg


From 5beec81360146536086f1996869b4ee32ca37ddc Mon Sep 17 00:00:00 2001
From: Jan Srnicek <jsrnicek@cisco.com>
Date: Fri, 24 Mar 2017 10:18:11 +0100
Subject: jvpp: make shm_prefix configurable (VPP-591)

svm.c - set default map region root path only if root path is not
already present
memory_shared.c - added option for tests to send memory region name
and root path in one variable, if so name and root path are
separated here and set to map region structure so find function
can find it properly
jvpp-registry.c - added parameters shmPrefix to be able pass +
removed sudo restriction
specific shared memory prefix that is used while starting
python tests(see framework.py)
JVppRegistyImpl - added option to specify shmPrefix
VppJNIConnection - added option to specify shmPrefix

Change-Id: I3f89f867fb9b20eef00fbd497cb0e41b25d6eab7
Signed-off-by: Jan Srnicek <jsrnicek@cisco.com>
Signed-off-by: Matej Perina <mperina@cisco.com>
---
 src/svm/svm.c                                      |  3 +-
 src/vlibmemory/memory_shared.c                     | 19 ++++++++++--
 .../io/fd/vpp/jvpp/JVppRegistryImpl.java           |  7 +++++
 .../io/fd/vpp/jvpp/VppJNIConnection.java           | 23 +++++++++++----
 src/vpp-api/java/jvpp-registry/jvpp_registry.c     | 34 ++++++++++++----------
 5 files changed, 62 insertions(+), 24 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/svm/svm.c b/src/svm/svm.c
index 600fa744..0442ecb2 100644
--- a/src/svm/svm.c
+++ b/src/svm/svm.c
@@ -862,7 +862,8 @@ svm_region_find_or_create (svm_map_region_args_t * a)
   ASSERT (mp);
 
   /* Map the named region from the correct chroot environment */
-  a->root_path = (char *) mp->root_path;
+  if (a->root_path == NULL)
+    a->root_path = (char *) mp->root_path;
 
   /*
    * See if this region is already known. If it is, we're
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
index 41aa1231..9bab6573 100644
--- a/src/vlibmemory/memory_shared.c
+++ b/src/vlibmemory/memory_shared.c
@@ -341,12 +341,25 @@ vl_map_shmem (const char *region_name, int is_vlib)
   struct timespec ts, tsrem;
   u32 vlib_input_queue_length;
 
+  memset (a, 0, sizeof (*a));
+
+  if (strstr (region_name, "-vpe-api"))
+    {
+      char root_path[strlen (region_name)];
+      strncpy (root_path, region_name, strlen (region_name) - 8);
+      a->root_path = root_path;
+      am->root_path = root_path;
+    }
+
   if (is_vlib == 0)
     svm_region_init_chroot (am->root_path);
 
-  memset (a, 0, sizeof (*a));
-
-  a->name = region_name;
+  if (a->root_path != NULL)
+    {
+      a->name = "/vpe-api";
+    }
+  else
+    a->name = region_name;
   a->size = am->api_size ? am->api_size : (16 << 20);
   a->flags = SVM_FLAGS_MHEAP;
   a->uid = am->api_uid;
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistryImpl.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistryImpl.java
index 98ef1c15..6e938ae3 100644
--- a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistryImpl.java
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/JVppRegistryImpl.java
@@ -48,6 +48,13 @@ public final class JVppRegistryImpl implements JVppRegistry, ControlPingCallback
         pingCalls = new HashMap<>();
     }
 
+    public JVppRegistryImpl(final String clientName, final String shmPrefix) throws IOException {
+        connection = new VppJNIConnection(clientName, shmPrefix);
+        connection.connect();
+        pluginRegistry = new ConcurrentHashMap<>();
+        pingCalls = new HashMap<>();
+    }
+
     @Override
     public VppConnection getConnection() {
         return connection;
diff --git a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppJNIConnection.java b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppJNIConnection.java
index 320c1283..53eaa790 100644
--- a/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppJNIConnection.java
+++ b/src/vpp-api/java/jvpp-registry/io/fd/vpp/jvpp/VppJNIConnection.java
@@ -17,8 +17,11 @@
 package io.fd.vpp.jvpp;
 
 import static io.fd.vpp.jvpp.NativeLibraryLoader.loadLibrary;
+import static java.lang.String.format;
 
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Objects;
@@ -30,13 +33,14 @@ import java.util.logging.Logger;
  */
 public final class VppJNIConnection implements VppConnection {
     private static final Logger LOG = Logger.getLogger(VppJNIConnection.class.getName());
+    private static final String DEFAULT_SHM_PREFIX = "/vpe-api";
 
     static {
         final String libName = "libjvpp_registry.so";
         try {
             loadLibrary(libName, VppJNIConnection.class);
         } catch (IOException e) {
-            LOG.log(Level.SEVERE, String.format("Can't find vpp jni library: %s", libName), e);
+            LOG.log(Level.SEVERE, format("Can't find vpp jni library: %s", libName), e);
             throw new ExceptionInInitializerError(e);
         }
     }
@@ -44,6 +48,7 @@ public final class VppJNIConnection implements VppConnection {
     private ConnectionInfo connectionInfo;
 
     private final String clientName;
+    private final String shmPrefix;
     private volatile boolean disconnected = false;
 
     /**
@@ -54,6 +59,12 @@ public final class VppJNIConnection implements VppConnection {
      */
     public VppJNIConnection(final String clientName) {
         this.clientName = Objects.requireNonNull(clientName, "Null clientName");
+        this.shmPrefix = DEFAULT_SHM_PREFIX;
+    }
+
+    public VppJNIConnection(final String clientName, final String shmPrefix) {
+        this.clientName = Objects.requireNonNull(clientName, "Null clientName");
+        this.shmPrefix = Objects.requireNonNull(shmPrefix, "Null shmPrefix");
     }
 
     /**
@@ -73,16 +84,18 @@ public final class VppJNIConnection implements VppConnection {
 
     @Override
     public void connect() throws IOException {
-        _connect();
+        _connect(shmPrefix);
     }
 
-    private void _connect() throws IOException {
+    private void _connect(final String shmPrefix) throws IOException {
+        Objects.requireNonNull(shmPrefix, "Shared memory prefix must be defined");
+
         synchronized (VppJNIConnection.class) {
             if (connections.containsKey(clientName)) {
                 throw new IOException("Client " + clientName + " already connected");
             }
 
-            connectionInfo = clientConnect(clientName);
+            connectionInfo = clientConnect(shmPrefix, clientName);
             if (connectionInfo.status != 0) {
                 throw new IOException("Connection returned error " + connectionInfo.status);
             }
@@ -130,7 +143,7 @@ public final class VppJNIConnection implements VppConnection {
         }
     }
 
-    private static native ConnectionInfo clientConnect(String clientName);
+    private static native ConnectionInfo clientConnect(String shmPrefix, String clientName);
 
     private static native void clientDisconnect();
 
diff --git a/src/vpp-api/java/jvpp-registry/jvpp_registry.c b/src/vpp-api/java/jvpp-registry/jvpp_registry.c
index 66adfea0..1e2c0176 100644
--- a/src/vpp-api/java/jvpp-registry/jvpp_registry.c
+++ b/src/vpp-api/java/jvpp-registry/jvpp_registry.c
@@ -243,14 +243,13 @@ static int send_initial_control_ping() {
     return rv;
 }
 
-static int connect_to_vpe(char *name) {
+static int connect_to_vpe(char *shm_prefix, char *name) {
     jvpp_main_t * jm = &jvpp_main;
     api_main_t * am = &api_main;
     jvpp_registry_main_t * rm = &jvpp_registry_main;
 
-    if (vl_client_connect_to_vlib("/vpe-api", name, 32) < 0)
+    if (vl_client_connect_to_vlib(shm_prefix, name, 32) < 0)
         return -1;
-
     jm->my_client_index = am->my_client_index;
 
     jm->vl_input_queue = am->shmem_hdr->vl_input_queue;
@@ -268,9 +267,15 @@ static int connect_to_vpe(char *name) {
 }
 
 JNIEXPORT jobject JNICALL Java_io_fd_vpp_jvpp_VppJNIConnection_clientConnect(
-        JNIEnv *env, jclass obj, jstring clientName) {
+        JNIEnv *env, jclass obj, jstring shmPrefix, jstring clientName) {
+    /*
+     * TODO introducing memory prefix as variable can be used in hc2vpp
+     * to be able to run without root privileges
+     * https://jira.fd.io/browse/HC2VPP-176
+     */
     int rv;
     const char *client_name;
+    const char *shm_prefix;
     void vl_msg_reply_handler_hookup(void);
     jvpp_main_t * jm = &jvpp_main;
     jvpp_registry_main_t * rm = &jvpp_registry_main;
@@ -280,15 +285,6 @@ JNIEXPORT jobject JNICALL Java_io_fd_vpp_jvpp_VppJNIConnection_clientConnect(
     jmethodID connectionInfoConstructor = (*env)->GetMethodID(env,
             connectionInfoClass, "<init>", "(JII)V");
 
-    /*
-     * Bail out now if we're not running as root
-     */
-    if (geteuid() != 0) {
-        return (*env)->NewObject(env, connectionInfoClass,
-                connectionInfoConstructor, 0, 0,
-                VNET_API_ERROR_NOT_RUNNING_AS_ROOT);
-    }
-
     if (rm->is_connected) {
         return (*env)->NewObject(env, connectionInfoClass,
                 connectionInfoConstructor, 0, 0,
@@ -296,17 +292,25 @@ JNIEXPORT jobject JNICALL Java_io_fd_vpp_jvpp_VppJNIConnection_clientConnect(
     }
 
     client_name = (*env)->GetStringUTFChars(env, clientName, 0);
+    shm_prefix = (*env)->GetStringUTFChars(env, shmPrefix, 0);
+
     if (!client_name) {
         return (*env)->NewObject(env, connectionInfoClass,
-                connectionInfoConstructor, 0, 0, VNET_API_ERROR_INVALID_VALUE);
+                connectionInfoConstructor, 0, 0, VNET_API_ERROR_INVALID_VALUE, shmPrefix);
+    }
+
+    if (!shm_prefix) {
+        return (*env)->NewObject(env, connectionInfoClass,
+                connectionInfoConstructor, 0, 0, VNET_API_ERROR_INVALID_VALUE, shmPrefix);
     }
 
-    rv = connect_to_vpe((char *) client_name);
+    rv = connect_to_vpe((char *) shm_prefix, (char *) client_name);
 
     if (rv < 0)
         clib_warning("connection failed, rv %d", rv);
 
     (*env)->ReleaseStringUTFChars(env, clientName, client_name);
+    (*env)->ReleaseStringUTFChars(env, shmPrefix, shm_prefix);
 
     return (*env)->NewObject(env, connectionInfoClass,
             connectionInfoConstructor, (jlong) pointer_to_uword (jm->vl_input_queue),
-- 
cgit 1.2.3-korg


From 3cdc25ffbaa572639f99e197172c568e4324bc03 Mon Sep 17 00:00:00 2001
From: Ole Troan <ot@cisco.com>
Date: Thu, 17 Aug 2017 11:07:33 +0200
Subject: API: More gracefully fail when opening shared memory segment fails.

API clients would fail with an ASSERT (and core dump) whenever
the API shared memory segment could not be opened.
This returns an error value to the client's connect instead.

Change-Id: Id122a3a090b24b139c382ae09f341bde61fd2540
Signed-off-by: Ole Troan <ot@cisco.com>
---
 src/svm/svm.c                  | 13 ++++++++-----
 src/svm/svm_common.h           |  2 +-
 src/vlibmemory/memory_shared.c |  8 ++++++--
 3 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/svm/svm.c b/src/svm/svm.c
index 0442ecb2..663324e0 100644
--- a/src/svm/svm.c
+++ b/src/svm/svm.c
@@ -733,7 +733,7 @@ svm_mutex_cleanup (void)
     }
 }
 
-static void
+static int
 svm_region_init_internal (svm_map_region_args_t * a)
 {
   svm_region_t *rp;
@@ -742,7 +742,7 @@ svm_region_init_internal (svm_map_region_args_t * a)
 
   /* guard against klutz calls */
   if (root_rp)
-    return;
+    return -1;
 
   root_rp_refcount++;
 
@@ -757,7 +757,8 @@ svm_region_init_internal (svm_map_region_args_t * a)
   a->baseva += randomize_baseva;
 
   rp = svm_map_region (a);
-  ASSERT (rp);
+  if (!rp)
+    return -1;
 
   region_lock (rp, 3);
 
@@ -778,6 +779,8 @@ svm_region_init_internal (svm_map_region_args_t * a)
     }
   region_unlock (rp);
   root_rp = rp;
+
+  return 0;
 }
 
 void
@@ -797,7 +800,7 @@ svm_region_init (void)
   svm_region_init_internal (a);
 }
 
-void
+int
 svm_region_init_chroot (const char *root_path)
 {
   svm_map_region_args_t _a, *a = &_a;
@@ -811,7 +814,7 @@ svm_region_init_chroot (const char *root_path)
   a->uid = 0;
   a->gid = 0;
 
-  svm_region_init_internal (a);
+  return svm_region_init_internal (a);
 }
 
 void
diff --git a/src/svm/svm_common.h b/src/svm/svm_common.h
index 1f184432..1f6d83c0 100644
--- a/src/svm/svm_common.h
+++ b/src/svm/svm_common.h
@@ -110,7 +110,7 @@ typedef struct
 
 void *svm_region_find_or_create (svm_map_region_args_t * a);
 void svm_region_init (void);
-void svm_region_init_chroot (const char *root_path);
+int svm_region_init_chroot (const char *root_path);
 void svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid);
 void svm_region_init_args (svm_map_region_args_t * a);
 void svm_region_exit (void);
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
index 9bab6573..fbdabd06 100644
--- a/src/vlibmemory/memory_shared.c
+++ b/src/vlibmemory/memory_shared.c
@@ -337,7 +337,7 @@ vl_map_shmem (const char *region_name, int is_vlib)
   void *oldheap;
   vl_shmem_hdr_t *shmem_hdr = 0;
   api_main_t *am = &api_main;
-  int i;
+  int i, rv;
   struct timespec ts, tsrem;
   u32 vlib_input_queue_length;
 
@@ -352,7 +352,11 @@ vl_map_shmem (const char *region_name, int is_vlib)
     }
 
   if (is_vlib == 0)
-    svm_region_init_chroot (am->root_path);
+    {
+      rv = svm_region_init_chroot (am->root_path);
+      if (rv)
+	return rv;
+    }
 
   if (a->root_path != NULL)
     {
-- 
cgit 1.2.3-korg


From cfc997ef3da9f406afe5caad99fc98a53aab7a77 Mon Sep 17 00:00:00 2001
From: Dave Wallace <dwallacelf@gmail.com>
Date: Tue, 22 Aug 2017 18:32:34 -0400
Subject: Fix vl_map_shmem() root_path dangling reference.

Change-Id: I90c9d8e151cacf50a99ce76b7a589079303196e8
Signed-off-by: Dave Wallace <dwallacelf@gmail.com>
---
 src/vlibmemory/memory_shared.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlibmemory/memory_shared.c b/src/vlibmemory/memory_shared.c
index fbdabd06..8c646908 100644
--- a/src/vlibmemory/memory_shared.c
+++ b/src/vlibmemory/memory_shared.c
@@ -340,15 +340,18 @@ vl_map_shmem (const char *region_name, int is_vlib)
   int i, rv;
   struct timespec ts, tsrem;
   u32 vlib_input_queue_length;
+  char *vpe_api_region_suffix = "-vpe-api";
 
   memset (a, 0, sizeof (*a));
 
-  if (strstr (region_name, "-vpe-api"))
+  if (strstr (region_name, vpe_api_region_suffix))
     {
-      char root_path[strlen (region_name)];
-      strncpy (root_path, region_name, strlen (region_name) - 8);
-      a->root_path = root_path;
-      am->root_path = root_path;
+      u8 *root_path = format (0, "%s", region_name);
+      _vec_len (root_path) = (vec_len (root_path) -
+			      strlen (vpe_api_region_suffix));
+      vec_terminate_c_string (root_path);
+      a->root_path = (const char *) root_path;
+      am->root_path = (const char *) root_path;
     }
 
   if (is_vlib == 0)
-- 
cgit 1.2.3-korg


From eb1ac1732f15f9a99edbeffeb94c525b9ff25c1d Mon Sep 17 00:00:00 2001
From: Colin Tregenza Dancer <ctd@metaswitch.com>
Date: Wed, 6 Sep 2017 20:23:24 +0100
Subject: Recombine diags and minimum barrier open time changes (VPP-968)
 Support logging to both syslog and elog Also include DaveB is_mp_safe fix,
 which had been lost

Change-Id: If82f7969e2f43c63c3fed5b1a0c7434c90c1f380
Signed-off-by: Colin Tregenza Dancer <ctd@metaswitch.com>
---
 src/vlib/main.h              |   9 ++
 src/vlib/threads.c           | 313 +++++++++++++++++++++++++++++++++++++++++--
 src/vlib/threads.h           |  29 +++-
 src/vlibapi/api_common.h     |   6 +
 src/vlibapi/api_shared.c     |  10 +-
 src/vlibmemory/memory_vlib.c |   5 +
 src/vpp/vnet/main.c          |   8 ++
 7 files changed, 367 insertions(+), 13 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlib/main.h b/src/vlib/main.h
index 4c0cde3f..fb67334e 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -199,6 +199,15 @@ typedef struct vlib_main_t
    */
   int need_vlib_worker_thread_node_runtime_update;
 
+  /*
+   * Barrier epoch - Set to current time, each time barrier_sync or
+   * barrier_release is called with zero recursion.
+   */
+  f64 barrier_epoch;
+
+  /* Earliest barrier can be closed again */
+  f64 barrier_no_close_before;
+
 } vlib_main_t;
 
 /* Global main structure. */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 6cd325b3..2d9ce84a 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,6 +35,222 @@ vl (void *p)
 vlib_worker_thread_t *vlib_worker_threads;
 vlib_thread_main_t vlib_thread_main;
 
+/*
+ * Barrier tracing can be enabled on a normal build to collect information
+ * on barrier use, including timings and call stacks.  Deliberately not
+ * keyed off CLIB_DEBUG, because that can add significant overhead which
+ * imapacts observed timings.
+ */
+
+#ifdef BARRIER_TRACING
+ /*
+  * Output of barrier tracing can be to syslog or elog as suits
+  */
+#ifdef BARRIER_TRACING_ELOG
+static u32
+elog_id_for_msg_name (const char *msg_name)
+{
+  uword *p, r;
+  static uword *h;
+  u8 *name_copy;
+
+  if (!h)
+    h = hash_create_string (0, sizeof (uword));
+
+  p = hash_get_mem (h, msg_name);
+  if (p)
+    return p[0];
+  r = elog_string (&vlib_global_main.elog_main, "%s", msg_name);
+
+  name_copy = format (0, "%s%c", msg_name, 0);
+
+  hash_set_mem (h, name_copy, r);
+
+  return r;
+}
+
+  /*
+   * elog Barrier trace functions, which are nulled out if BARRIER_TRACING isn't
+   * defined
+   */
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+    /* *INDENT-OFF* */
+    ELOG_TYPE_DECLARE (e) =
+      {
+        .format = "barrier <%d#%s(O:%dus:%dus)(%dus)",
+        .format_args = "i4T4i4i4i4",
+      };
+    /* *INDENT-ON* */
+  struct
+  {
+    u32 count, caller, t_entry, t_open, t_closed;
+  } *ed = 0;
+
+  ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+  ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+  ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+  ed->t_entry = (int) (1000000.0 * t_entry);
+  ed->t_open = (int) (1000000.0 * t_open);
+  ed->t_closed = (int) (1000000.0 * t_closed);
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+    /* *INDENT-OFF* */
+    ELOG_TYPE_DECLARE (e) =
+      {
+        .format = "barrier    <%d(%dus)%s",
+        .format_args = "i4i4T4",
+      };
+    /* *INDENT-ON* */
+  struct
+  {
+    u32 depth, t_entry, caller;
+  } *ed = 0;
+
+  ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+  ed->depth = (int) vlib_worker_threads[0].recursion_level - 1;
+  ed->t_entry = (int) (1000000.0 * t_entry);
+  ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+    /* *INDENT-OFF* */
+    ELOG_TYPE_DECLARE (e) =
+      {
+        .format = "barrier      (%dus)%d>",
+        .format_args = "i4i4",
+      };
+    /* *INDENT-ON* */
+  struct
+  {
+    u32 t_entry, depth;
+  } *ed = 0;
+
+  ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+  ed->t_entry = (int) (1000000.0 * t_entry);
+  ed->depth = (int) vlib_worker_threads[0].recursion_level;
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+    /* *INDENT-OFF* */
+    ELOG_TYPE_DECLARE (e) =
+      {
+        .format = "barrier   (%dus){%d}(C:%dus)#%d>",
+        .format_args = "i4i4i4i4",
+      };
+    /* *INDENT-ON* */
+  struct
+  {
+    u32 t_entry, t_update_main, t_closed_total, count;
+  } *ed = 0;
+
+  ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+  ed->t_entry = (int) (1000000.0 * t_entry);
+  ed->t_update_main = (int) (1000000.0 * t_update_main);
+  ed->t_closed_total = (int) (1000000.0 * t_closed_total);
+  ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+
+  /* Reset context for next trace */
+  vlib_worker_threads[0].barrier_context = NULL;
+}
+#else
+char barrier_trace[65536];
+char *btp = barrier_trace;
+
+  /*
+   * syslog Barrier trace functions, which are nulled out if BARRIER_TRACING
+   * isn't defined
+   */
+
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+  btp += sprintf (btp, "<%u#%s",
+		  (unsigned int) vlib_worker_threads[0].barrier_sync_count,
+		  vlib_worker_threads[0].barrier_caller);
+
+  if (vlib_worker_threads[0].barrier_context)
+    {
+      btp += sprintf (btp, "[%s]", vlib_worker_threads[0].barrier_context);
+
+    }
+
+  btp += sprintf (btp, "(O:%dus:%dus)(%dus):",
+		  (int) (1000000.0 * t_entry),
+		  (int) (1000000.0 * t_open), (int) (1000000.0 * t_closed));
+
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+  btp += sprintf (btp, "<%u(%dus)%s:",
+		  (int) vlib_worker_threads[0].recursion_level - 1,
+		  (int) (1000000.0 * t_entry),
+		  vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+  btp += sprintf (btp, ":(%dus)%u>", (int) (1000000.0 * t_entry),
+		  (int) vlib_worker_threads[0].recursion_level);
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+
+  btp += sprintf (btp, ":(%dus)", (int) (1000000.0 * t_entry));
+  if (t_update_main > 0)
+    {
+      btp += sprintf (btp, "{%dus}", (int) (1000000.0 * t_update_main));
+    }
+
+  btp += sprintf (btp, "(C:%dus)#%u>",
+		  (int) (1000000.0 * t_closed_total),
+		  (int) vlib_worker_threads[0].barrier_sync_count);
+
+  /* Dump buffer to syslog, and reset for next trace */
+  fformat (stderr, "BTRC %s\n", barrier_trace);
+  btp = barrier_trace;
+  vlib_worker_threads[0].barrier_context = NULL;
+}
+#endif
+#else
+
+  /* Null functions for default case where barrier tracing isn't used */
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+}
+#endif
+
 uword
 os_get_nthreads (void)
 {
@@ -558,6 +774,10 @@ start_workers (vlib_main_t * vm)
       *vlib_worker_threads->node_reforks_required = 0;
       vm->need_vlib_worker_thread_node_runtime_update = 0;
 
+      /* init timing */
+      vm->barrier_epoch = 0;
+      vm->barrier_no_close_before = 0;
+
       worker_thread_index = 1;
 
       for (i = 0; i < vec_len (tm->registrations); i++)
@@ -790,6 +1010,7 @@ start_workers (vlib_main_t * vm)
 
 VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
 
+
 static inline void
 worker_thread_node_runtime_update_internal (void)
 {
@@ -993,7 +1214,6 @@ vlib_worker_thread_node_refork (void)
   nm_clone->processes = vec_dup (nm->processes);
 }
 
-
 void
 vlib_worker_thread_node_runtime_update (void)
 {
@@ -1192,10 +1412,29 @@ vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
   vlib_worker_thread_barrier_release (vm);
 }
 
+  /*
+   * Enforce minimum open time to minimize packet loss due to Rx overflow,
+   * based on a test based heuristic that barrier should be open for at least
+   * 3 time as long as it is closed (with an upper bound of 1ms because by that
+   *  point it is probably too late to make a difference)
+   */
+
+#ifndef BARRIER_MINIMUM_OPEN_LIMIT
+#define BARRIER_MINIMUM_OPEN_LIMIT 0.001
+#endif
+
+#ifndef BARRIER_MINIMUM_OPEN_FACTOR
+#define BARRIER_MINIMUM_OPEN_FACTOR 3
+#endif
+
 void
-vlib_worker_thread_barrier_sync (vlib_main_t * vm)
+vlib_worker_thread_barrier_sync_int (vlib_main_t * vm)
 {
   f64 deadline;
+  f64 now;
+  f64 t_entry;
+  f64 t_open;
+  f64 t_closed;
   u32 count;
 
   if (vec_len (vlib_mains) < 2)
@@ -1205,29 +1444,55 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
 
   count = vec_len (vlib_mains) - 1;
 
+  /* Record entry relative to last close */
+  now = vlib_time_now (vm);
+  t_entry = now - vm->barrier_epoch;
+
   /* Tolerate recursive calls */
   if (++vlib_worker_threads[0].recursion_level > 1)
-    return;
+    {
+      barrier_trace_sync_rec (t_entry);
+      return;
+    }
 
   vlib_worker_threads[0].barrier_sync_count++;
 
-  deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+  /* Enforce minimum barrier open time to minimize packet loss */
+  ASSERT (vm->barrier_no_close_before <= (now + BARRIER_MINIMUM_OPEN_LIMIT));
+  while ((now = vlib_time_now (vm)) < vm->barrier_no_close_before)
+    ;
+
+  /* Record time of closure */
+  t_open = now - vm->barrier_epoch;
+  vm->barrier_epoch = now;
+
+  deadline = now + BARRIER_SYNC_TIMEOUT;
 
   *vlib_worker_threads->wait_at_barrier = 1;
   while (*vlib_worker_threads->workers_at_barrier != count)
     {
-      if (vlib_time_now (vm) > deadline)
+      if ((now = vlib_time_now (vm)) > deadline)
 	{
 	  fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
 	  os_panic ();
 	}
     }
+
+  t_closed = now - vm->barrier_epoch;
+
+  barrier_trace_sync (t_entry, t_open, t_closed);
+
 }
 
 void
 vlib_worker_thread_barrier_release (vlib_main_t * vm)
 {
   f64 deadline;
+  f64 now;
+  f64 minimum_open;
+  f64 t_entry;
+  f64 t_closed_total;
+  f64 t_update_main = 0.0;
   int refork_needed = 0;
 
   if (vec_len (vlib_mains) < 2)
@@ -1235,8 +1500,15 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 
   ASSERT (vlib_get_thread_index () == 0);
 
+
+  now = vlib_time_now (vm);
+  t_entry = now - vm->barrier_epoch;
+
   if (--vlib_worker_threads[0].recursion_level > 0)
-    return;
+    {
+      barrier_trace_release_rec (t_entry);
+      return;
+    }
 
   /* Update (all) node runtimes before releasing the barrier, if needed */
   if (vm->need_vlib_worker_thread_node_runtime_update)
@@ -1249,15 +1521,17 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
       refork_needed = 1;
       clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
 			   (vec_len (vlib_mains) - 1));
+      now = vlib_time_now (vm);
+      t_update_main = now - vm->barrier_epoch;
     }
 
-  deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+  deadline = now + BARRIER_SYNC_TIMEOUT;
 
   *vlib_worker_threads->wait_at_barrier = 0;
 
   while (*vlib_worker_threads->workers_at_barrier > 0)
     {
-      if (vlib_time_now (vm) > deadline)
+      if ((now = vlib_time_now (vm)) > deadline)
 	{
 	  fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
 	  os_panic ();
@@ -1267,11 +1541,13 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
   /* Wait for reforks before continuing */
   if (refork_needed)
     {
-      deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+      now = vlib_time_now (vm);
+
+      deadline = now + BARRIER_SYNC_TIMEOUT;
 
       while (*vlib_worker_threads->node_reforks_required > 0)
 	{
-	  if (vlib_time_now (vm) > deadline)
+	  if ((now = vlib_time_now (vm)) > deadline)
 	    {
 	      fformat (stderr, "%s: worker thread refork deadlock\n",
 		       __FUNCTION__);
@@ -1279,6 +1555,23 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 	    }
 	}
     }
+
+  t_closed_total = now - vm->barrier_epoch;
+
+  minimum_open = t_closed_total * BARRIER_MINIMUM_OPEN_FACTOR;
+
+  if (minimum_open > BARRIER_MINIMUM_OPEN_LIMIT)
+    {
+      minimum_open = BARRIER_MINIMUM_OPEN_LIMIT;
+    }
+
+  vm->barrier_no_close_before = now + minimum_open;
+
+  /* Record barrier epoch (used to enforce minimum open time) */
+  vm->barrier_epoch = now;
+
+  barrier_trace_release (t_entry, t_closed_total, t_update_main);
+
 }
 
 /*
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index c3f1cade..72340ee1 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -18,6 +18,22 @@
 #include <vlib/main.h>
 #include <linux/sched.h>
 
+/*
+ * To enable detailed tracing of barrier usage, including call stacks and
+ * timings, define BARRIER_TRACING here or in relevant TAGS.  If also used
+ * with CLIB_DEBUG, timing will _not_ be representative of normal code
+ * execution.
+ *
+ */
+
+// #define BARRIER_TRACING 1
+
+/*
+ * Two options for barrier tracing output: syslog & elog.
+ */
+
+// #define BARRIER_TRACING_ELOG 1
+
 extern vlib_main_t **vlib_mains;
 
 void vlib_set_thread_name (char *name);
@@ -102,6 +118,10 @@ typedef struct
   vlib_thread_registration_t *registration;
   u8 *name;
   u64 barrier_sync_count;
+#ifdef BARRIER_TRACING
+  const char *barrier_caller;
+  const char *barrier_context;
+#endif
   volatile u32 *node_reforks_required;
 
   long lwp;
@@ -179,7 +199,14 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 #define BARRIER_SYNC_TIMEOUT (1.0)
 #endif
 
-void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
+#ifdef BARRIER_TRACING
+#define vlib_worker_thread_barrier_sync(X) {vlib_worker_threads[0].barrier_caller=__FUNCTION__;vlib_worker_thread_barrier_sync_int(X);}
+#else
+#define vlib_worker_thread_barrier_sync(X) vlib_worker_thread_barrier_sync_int(X)
+#endif
+
+
+void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 void vlib_worker_thread_node_refork (void);
 
diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
index 651566ae..bbeccfc2 100644
--- a/src/vlibapi/api_common.h
+++ b/src/vlibapi/api_common.h
@@ -144,6 +144,12 @@ void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q);
 
 void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
 void vl_msg_api_barrier_release (void) __attribute__ ((weak));
+#ifdef BARRIER_TRACING
+void vl_msg_api_barrier_trace_context (const char *context)
+  __attribute__ ((weak));
+#else
+#define vl_msg_api_barrier_trace_context(X)
+#endif
 void vl_msg_api_free (void *);
 void vl_noop_handler (void *mp);
 void vl_msg_api_increment_missing_client_counter (void);
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index 5c1a9940..59dc2375 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -418,7 +418,10 @@ msg_handler_internal (api_main_t * am,
       if (do_it)
 	{
 	  if (!am->is_mp_safe[id])
-	    vl_msg_api_barrier_sync ();
+	    {
+	      vl_msg_api_barrier_trace_context (am->msg_names[id]);
+	      vl_msg_api_barrier_sync ();
+	    }
 	  (*am->msg_handlers[id]) (the_msg);
 	  if (!am->is_mp_safe[id])
 	    vl_msg_api_barrier_release ();
@@ -498,7 +501,10 @@ vl_msg_api_handler_with_vm_node (api_main_t * am,
 	vl_msg_api_trace (am, am->rx_trace, the_msg);
 
       if (!am->is_mp_safe[id])
-	vl_msg_api_barrier_sync ();
+	{
+	  vl_msg_api_barrier_trace_context (am->msg_names[id]);
+	  vl_msg_api_barrier_sync ();
+	}
       (*handler) (the_msg, vm, node);
       if (!am->is_mp_safe[id])
 	vl_msg_api_barrier_release ();
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 688ce604..55a90d64 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1462,6 +1462,7 @@ _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids)
 static clib_error_t *
 rpc_api_hookup (vlib_main_t * vm)
 {
+  api_main_t *am = &api_main;
 #define _(N,n)                                                  \
     vl_msg_api_set_handlers(VL_API_##N, #n,                     \
                            vl_api_##n##_t_handler,              \
@@ -1481,6 +1482,10 @@ rpc_api_hookup (vlib_main_t * vm)
                            sizeof(vl_api_##n##_t), 1 /* do trace */);
   foreach_plugin_trace_msg;
 #undef _
+
+  /* No reason to halt the parade to create a trace record... */
+  am->is_mp_safe[VL_API_TRACE_PLUGIN_MSG_IDS] = 1;
+
   return 0;
 }
 
diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c
index 76371dbe..b330f60f 100644
--- a/src/vpp/vnet/main.c
+++ b/src/vpp/vnet/main.c
@@ -294,6 +294,14 @@ os_exit (int code)
   exit (code);
 }
 
+#ifdef BARRIER_TRACING
+void
+vl_msg_api_barrier_trace_context (const char *context)
+{
+  vlib_worker_threads[0].barrier_context = context;
+}
+#endif
+
 void
 vl_msg_api_barrier_sync (void)
 {
-- 
cgit 1.2.3-korg


From 49fe046e431c4d76b0c45c609e05e1b0a3063360 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Tue, 12 Sep 2017 17:06:56 -0400
Subject: API message table inspection utilities

Add doxygen tags for show/clear commands

Change-Id: Ic939c561b15b0b720a8db1ecacc17e3d74419e1d
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlibapi/api_common.h     |   3 +
 src/vlibmemory/memory_vlib.c | 493 +++++++++++++++++++++++++++++++++++++------
 src/vpp/conf/startup.conf    |  13 ++
 3 files changed, 449 insertions(+), 60 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
index bbeccfc2..dc6761bc 100644
--- a/src/vlibapi/api_common.h
+++ b/src/vlibapi/api_common.h
@@ -255,6 +255,9 @@ typedef struct
   /* Replay in progress? */
   int replay_in_progress;
 
+  /* Dump (msg-name, crc) snapshot here at startup */
+  u8 *save_msg_table_filename;
+
   /* List of API client reaper functions */
   _vl_msg_api_function_list_elt_t *reaper_function_registrations;
 
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 55a90d64..401f388a 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -38,6 +38,14 @@
 #include <vlibapi/api.h>
 #include <vlibmemory/api.h>
 
+/**
+ * @file
+ * @brief Binary API messaging via shared memory
+ * Low-level, primary provisioning interface
+ */
+/*? %%clicmd:group_label Binary API CLI %% ?*/
+/*? %%syscfg:group_label Binary API configuration %% ?*/
+
 #define TRACE_VLIB_MEMORY_QUEUE 0
 
 #include <vlibmemory/vl_memory_msg_enum.h>	/* enumerate all vlib messages */
@@ -188,7 +196,6 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
   int rv = 0;
   void *oldheap;
   api_main_t *am = &api_main;
-  u8 *serialized_message_table = 0;
 
   /*
    * This is tortured. Maintain a vlib-address-space private
@@ -220,9 +227,6 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
 
   svm = am->vlib_rp;
 
-  if (am->serialized_message_table_in_shmem == 0)
-    serialized_message_table = vl_api_serialize_message_table (am, 0);
-
   pthread_mutex_lock (&svm->mutex);
   oldheap = svm_push_data_heap (svm);
   *regpp = clib_mem_alloc (sizeof (vl_api_registration_t));
@@ -237,14 +241,11 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
 
   regp->name = format (0, "%s", mp->name);
   vec_add1 (regp->name, 0);
-  if (serialized_message_table)
-    am->serialized_message_table_in_shmem =
-      vec_dup (serialized_message_table);
 
   pthread_mutex_unlock (&svm->mutex);
   svm_pop_heap (oldheap);
 
-  vec_free (serialized_message_table);
+  ASSERT (am->serialized_message_table_in_shmem);
 
   rp = vl_msg_api_alloc (sizeof (*rp));
   rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE_REPLY);
@@ -487,6 +488,9 @@ memclnt_process (vlib_main_t * vm,
   f64 sleep_time, start_time;
   f64 vector_rate;
   int i;
+  u8 *serialized_message_table = 0;
+  svm_region_t *svm;
+  void *oldheap;
 
   vlib_set_queue_signal_callback (vm, memclnt_queue_callback);
 
@@ -519,6 +523,60 @@ memclnt_process (vlib_main_t * vm,
 				   rp->last_msg_id);
     }
 
+  /*
+   * Snapshoot the api message table.
+   */
+  serialized_message_table = vl_api_serialize_message_table (am, 0);
+
+  svm = am->vlib_rp;
+  pthread_mutex_lock (&svm->mutex);
+  oldheap = svm_push_data_heap (svm);
+
+  am->serialized_message_table_in_shmem = vec_dup (serialized_message_table);
+
+  pthread_mutex_unlock (&svm->mutex);
+  svm_pop_heap (oldheap);
+
+  /*
+   * Save the api message table snapshot, if configured
+   */
+  if (am->save_msg_table_filename)
+    {
+      int fd, rv;
+      u8 *chroot_file;
+      if (strstr ((char *) am->save_msg_table_filename, "..")
+	  || index ((char *) am->save_msg_table_filename, '/'))
+	{
+	  clib_warning ("illegal save-message-table filename '%s'",
+			am->save_msg_table_filename);
+	  goto skip_save;
+	}
+
+      chroot_file = format (0, "/tmp/%s%c", am->save_msg_table_filename, 0);
+
+      fd = creat ((char *) chroot_file, 0644);
+
+      if (fd < 0)
+	{
+	  clib_unix_warning ("creat");
+	  goto skip_save;
+	}
+      rv = write (fd, serialized_message_table,
+		  vec_len (serialized_message_table));
+
+      if (rv != vec_len (serialized_message_table))
+	clib_unix_warning ("write");
+
+      rv = close (fd);
+      if (rv < 0)
+	clib_unix_warning ("close");
+
+      vec_free (chroot_file);
+    }
+
+skip_save:
+  vec_free (serialized_message_table);
+
   /* $$$ pay attention to frame size, control CPU usage */
   while (1)
     {
@@ -726,6 +784,15 @@ memclnt_process (vlib_main_t * vm,
 
   return 0;
 }
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (memclnt_node,static) = {
+    .function = memclnt_process,
+    .type = VLIB_NODE_TYPE_PROCESS,
+    .name = "api-rx-from-ring",
+    .state = VLIB_NODE_STATE_DISABLED,
+};
+/* *INDENT-ON* */
+
 
 static clib_error_t *
 vl_api_show_histogram_command (vlib_main_t * vm,
@@ -762,11 +829,15 @@ vl_api_show_histogram_command (vlib_main_t * vm,
   return 0;
 }
 
+/*?
+ * Display the binary api sleep-time histogram
+?*/
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cli_show_api_histogram_command, static) = {
-    .path = "show api histogram",
-    .short_help = "show api histogram",
-    .function = vl_api_show_histogram_command,
+VLIB_CLI_COMMAND (cli_show_api_histogram_command, static) =
+{
+  .path = "show api histogram",
+  .short_help = "show api histogram",
+  .function = vl_api_show_histogram_command,
 };
 /* *INDENT-ON* */
 
@@ -782,21 +853,15 @@ vl_api_clear_histogram_command (vlib_main_t * vm,
   return 0;
 }
 
+/*?
+ * Clear the binary api sleep-time histogram
+?*/
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) = {
-    .path = "clear api histogram",
-    .short_help = "clear api histogram",
-    .function = vl_api_clear_histogram_command,
-};
-/* *INDENT-ON* */
-
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (memclnt_node,static) = {
-    .function = memclnt_process,
-    .type = VLIB_NODE_TYPE_PROCESS,
-    .name = "api-rx-from-ring",
-    .state = VLIB_NODE_STATE_DISABLED,
+VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) =
+{
+  .path = "clear api histogram",
+  .short_help = "clear api histogram",
+  .function = vl_api_clear_histogram_command,
 };
 /* *INDENT-ON* */
 
@@ -1064,33 +1129,46 @@ vl_api_status_command (vlib_main_t * vm,
 }
 
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cli_show_api_command, static) = {
-    .path = "show api",
-    .short_help = "Show API information",
+VLIB_CLI_COMMAND (cli_show_api_command, static) =
+{
+  .path = "show api",
+  .short_help = "Show API information",
 };
 /* *INDENT-ON* */
 
+/*?
+ * Display binary api message allocation ring statistics
+?*/
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cli_show_api_ring_command, static) = {
-    .path = "show api ring-stats",
-    .short_help = "Message ring statistics",
-    .function = vl_api_ring_command,
+VLIB_CLI_COMMAND (cli_show_api_ring_command, static) =
+{
+  .path = "show api ring-stats",
+  .short_help = "Message ring statistics",
+  .function = vl_api_ring_command,
 };
 /* *INDENT-ON* */
 
+/*?
+ * Display current api client connections
+?*/
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cli_show_api_clients_command, static) = {
-    .path = "show api clients",
-    .short_help = "Client information",
-    .function = vl_api_client_command,
+VLIB_CLI_COMMAND (cli_show_api_clients_command, static) =
+{
+  .path = "show api clients",
+  .short_help = "Client information",
+  .function = vl_api_client_command,
 };
 /* *INDENT-ON* */
 
+/*?
+ * Display the current api message tracing status
+?*/
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cli_show_api_status_command, static) = {
-    .path = "show api status",
-    .short_help = "Show API trace status",
-    .function = vl_api_status_command,
+VLIB_CLI_COMMAND (cli_show_api_status_command, static) =
+{
+  .path = "show api trace-status",
+  .short_help = "Display API trace status",
+  .function = vl_api_status_command,
 };
 /* *INDENT-ON* */
 
@@ -1133,11 +1211,15 @@ vl_api_message_table_command (vlib_main_t * vm,
   return 0;
 }
 
+/*?
+ * Display the current api message decode tables
+?*/
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cli_show_api_message_table_command, static) = {
-    .path = "show api message-table",
-    .short_help = "Message Table",
-    .function = vl_api_message_table_command,
+VLIB_CLI_COMMAND (cli_show_api_message_table_command, static) =
+{
+  .path = "show api message-table",
+  .short_help = "Message Table",
+  .function = vl_api_message_table_command,
 };
 /* *INDENT-ON* */
 
@@ -1207,11 +1289,15 @@ configure:
   return 0;
 }
 
+/*?
+ * Control the binary API trace mechanism
+?*/
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (trace, static) = {
-    .path = "set api-trace",
-    .short_help = "API trace",
-    .function = vl_api_trace_command,
+VLIB_CLI_COMMAND (trace, static) =
+{
+  .path = "set api-trace [on][on tx][on rx][off][free][debug on][debug off]",
+  .short_help = "API trace",
+  .function = vl_api_trace_command,
 };
 /* *INDENT-ON* */
 
@@ -1265,9 +1351,9 @@ format_api_msg_range (u8 * s, va_list * args)
   vl_api_msg_range_t *rp = va_arg (*args, vl_api_msg_range_t *);
 
   if (rp == 0)
-    s = format (s, "%-20s%9s%9s", "Name", "First-ID", "Last-ID");
+    s = format (s, "%-50s%9s%9s", "Name", "First-ID", "Last-ID");
   else
-    s = format (s, "%-20s%9d%9d", rp->name, rp->first_msg_id,
+    s = format (s, "%-50s%9d%9d", rp->name, rp->first_msg_id,
 		rp->last_msg_id);
 
   return s;
@@ -1303,11 +1389,15 @@ vl_api_show_plugin_command (vlib_main_t * vm,
   return 0;
 }
 
+/*?
+ * Display the plugin binary API message range table
+?*/
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) = {
-    .path = "show api plugin",
-    .short_help = "show api plugin",
-    .function = vl_api_show_plugin_command,
+VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) =
+{
+  .path = "show api plugin",
+  .short_help = "show api plugin",
+  .function = vl_api_show_plugin_command,
 };
 /* *INDENT-ON* */
 
@@ -1925,12 +2015,17 @@ api_trace_command_fn (vlib_main_t * vm,
   return 0;
 }
 
+/*?
+ * Display, replay, or save a binary API trace
+?*/
+
 /* *INDENT-OFF* */
-VLIB_CLI_COMMAND (api_trace_command, static) = {
-    .path = "api trace",
-    .short_help =
-    "api trace [on|off][dump|save|replay <file>][status][free][post-mortem-on]",
-    .function = api_trace_command_fn,
+VLIB_CLI_COMMAND (api_trace_command, static) =
+{
+  .path = "api trace",
+  .short_help =
+  "api trace [on|off][dump|save|replay <file>][status][free][post-mortem-on]",
+  .function = api_trace_command_fn,
 };
 /* *INDENT-ON* */
 
@@ -1951,6 +2046,9 @@ api_config_fn (vlib_main_t * vm, unformat_input_t * input)
 	  vl_msg_api_trace_onoff (am, which, 1 /* on */ );
 	  vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ );
 	}
+      else if (unformat (input, "save-api-table %s",
+			 &am->save_msg_table_filename))
+	;
       else
 	return clib_error_return (0, "unknown input `%U'",
 				  format_unformat_error, input);
@@ -1958,6 +2056,12 @@ api_config_fn (vlib_main_t * vm, unformat_input_t * input)
   return 0;
 }
 
+/*?
+ * This module has three configuration parameters:
+ * "on" or "enable" - enables binary api tracing
+ * "nitems <nnn>" - sets the size of the circular buffer to <nnn>
+ * "save-api-table <filename>" - dumps the API message table to /tmp/<filename>
+?*/
 VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace");
 
 static clib_error_t *
@@ -1986,6 +2090,275 @@ api_queue_config_fn (vlib_main_t * vm, unformat_input_t * input)
 
 VLIB_CONFIG_FUNCTION (api_queue_config_fn, "api-queue");
 
+static u8 *
+extract_name (u8 * s)
+{
+  u8 *rv;
+
+  rv = vec_dup (s);
+
+  while (vec_len (rv) && rv[vec_len (rv)] != '_')
+    _vec_len (rv)--;
+
+  rv[vec_len (rv)] = 0;
+
+  return rv;
+}
+
+static u8 *
+extract_crc (u8 * s)
+{
+  int i;
+  u8 *rv;
+
+  rv = vec_dup (s);
+
+  for (i = vec_len (rv) - 1; i >= 0; i--)
+    {
+      if (rv[i] == '_')
+	{
+	  vec_delete (rv, i + 1, 0);
+	  break;
+	}
+    }
+  return rv;
+}
+
+typedef struct
+{
+  u8 *name_and_crc;
+  u8 *name;
+  u8 *crc;
+  u32 msg_index;
+  int which;
+} msg_table_unserialize_t;
+
+static int
+table_id_cmp (void *a1, void *a2)
+{
+  msg_table_unserialize_t *n1 = a1;
+  msg_table_unserialize_t *n2 = a2;
+
+  return (n1->msg_index - n2->msg_index);
+}
+
+static int
+table_name_and_crc_cmp (void *a1, void *a2)
+{
+  msg_table_unserialize_t *n1 = a1;
+  msg_table_unserialize_t *n2 = a2;
+
+  return strcmp ((char *) n1->name_and_crc, (char *) n2->name_and_crc);
+}
+
+static clib_error_t *
+dump_api_table_file_command_fn (vlib_main_t * vm,
+				unformat_input_t * input,
+				vlib_cli_command_t * cmd)
+{
+  u8 *filename = 0;
+  api_main_t *am = &api_main;
+  serialize_main_t _sm, *sm = &_sm;
+  clib_error_t *error;
+  u32 nmsgs;
+  u32 msg_index;
+  u8 *name_and_crc;
+  int compare_current = 0;
+  int numeric_sort = 0;
+  msg_table_unserialize_t *table = 0, *item;
+  u32 i;
+  u32 ndifferences = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "file %s", &filename))
+	;
+      else if (unformat (input, "compare-current")
+	       || unformat (input, "compare"))
+	compare_current = 1;
+      else if (unformat (input, "numeric"))
+	numeric_sort = 1;
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+
+  if (numeric_sort && compare_current)
+    return clib_error_return
+      (0, "Comparison and numeric sorting are incompatible");
+
+  if (filename == 0)
+    return clib_error_return (0, "File not specified");
+
+  /* Load the serialized message table from the table dump */
+
+  error = unserialize_open_unix_file (sm, (char *) filename);
+
+  if (error)
+    return error;
+
+  unserialize_integer (sm, &nmsgs, sizeof (u32));
+
+  for (i = 0; i < nmsgs; i++)
+    {
+      msg_index = unserialize_likely_small_unsigned_integer (sm);
+      unserialize_cstring (sm, (char **) &name_and_crc);
+      vec_add2 (table, item, 1);
+      item->msg_index = msg_index;
+      item->name_and_crc = name_and_crc;
+      item->name = extract_name (name_and_crc);
+      item->crc = extract_crc (name_and_crc);
+      item->which = 0;		/* file */
+    }
+  serialize_close (sm);
+
+  /* Compare with the current image? */
+  if (compare_current)
+    {
+      /* Append the current message table */
+      u8 *tblv = vec_dup (am->serialized_message_table_in_shmem);
+
+      serialize_open_vector (sm, tblv);
+      unserialize_integer (sm, &nmsgs, sizeof (u32));
+
+      for (i = 0; i < nmsgs; i++)
+	{
+	  msg_index = unserialize_likely_small_unsigned_integer (sm);
+	  unserialize_cstring (sm, (char **) &name_and_crc);
+
+	  vec_add2 (table, item, 1);
+	  item->msg_index = msg_index;
+	  item->name_and_crc = name_and_crc;
+	  item->name = extract_name (name_and_crc);
+	  item->crc = extract_crc (name_and_crc);
+	  item->which = 1;	/* current_image */
+	}
+    }
+
+  /* Sort the table. */
+  if (numeric_sort)
+    vec_sort_with_function (table, table_id_cmp);
+  else
+    vec_sort_with_function (table, table_name_and_crc_cmp);
+
+  if (compare_current)
+    {
+      ndifferences = 0;
+
+      /*
+       * In this case, the recovered table will have two entries per
+       * API message. So, if entries i and i+1 match, the message definitions
+       * are identical. Otherwise, the crc is different, or a message is
+       * present in only one of the tables.
+       */
+      vlib_cli_output (vm, "%=60s %s", "Message Name", "Result");
+
+      for (i = 0; i < vec_len (table);)
+	{
+	  /* Last message lonely? */
+	  if (i == vec_len (table) - 1)
+	    {
+	      ndifferences++;
+	      goto last_unique;
+	    }
+
+	  /* Identical pair? */
+	  if (!strncmp
+	      ((char *) table[i].name_and_crc,
+	       (char *) table[i + 1].name_and_crc,
+	       vec_len (table[i].name_and_crc)))
+	    {
+	      i += 2;
+	      continue;
+	    }
+
+	  ndifferences++;
+
+	  /* Only in one of two tables? */
+	  if (strncmp ((char *) table[i].name, (char *) table[i + 1].name,
+		       vec_len (table[i].name)))
+	    {
+	    last_unique:
+	      vlib_cli_output (vm, "%-60s only in %s",
+			       table[i].name, table[i].which ?
+			       "image" : "file");
+	      i++;
+	      continue;
+	    }
+	  /* In both tables, but with different signatures */
+	  vlib_cli_output (vm, "%-60s definition changed", table[i].name);
+	  i += 2;
+	}
+      if (ndifferences == 0)
+	vlib_cli_output (vm, "No api message signature differences found.");
+      else
+	vlib_cli_output (vm, "Found %u api message signature differences",
+			 ndifferences);
+      goto cleanup;
+    }
+
+  /* Dump the table, sorted as shown above */
+  vlib_cli_output (vm, "%=60s %=8s %=10s", "Message name", "MsgID", "CRC");
+
+  for (i = 0; i < vec_len (table); i++)
+    {
+      item = table + i;
+      vlib_cli_output (vm, "%-60s %8u %10s", item->name,
+		       item->msg_index, item->crc);
+    }
+
+cleanup:
+  for (i = 0; i < vec_len (table); i++)
+    {
+      vec_free (table[i].name_and_crc);
+      vec_free (table[i].name);
+      vec_free (table[i].crc);
+    }
+
+  vec_free (table);
+
+  return 0;
+}
+
+/*?
+ * Displays a serialized API message decode table, sorted by message name
+ *
+ * @cliexpar
+ * @cliexstart{show api dump file <filename>}
+ *                                                Message name    MsgID        CRC
+ * accept_session                                                    407   8e2a127e
+ * accept_session_reply                                              408   67d8c22a
+ * add_node_next                                                     549   e4202993
+ * add_node_next_reply                                               550   e89d6eed
+ * etc.
+ * @cliexend
+?*/
+
+/*?
+ * Compares a serialized API message decode table with the current image
+ *
+ * @cliexpar
+ * @cliexstart{show api dump file <filename> compare}
+ * ip_add_del_route                                             definition changed
+ * ip_table_add_del                                             definition changed
+ * l2_macs_event                                                only in image
+ * vnet_ip4_fib_counters                                        only in file
+ * vnet_ip4_nbr_counters                                        only in file
+ * @cliexend
+?*/
+
+/*?
+ * Display a serialized API message decode table
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (dump_api_table_file, static) =
+{
+  .path = "show api dump",
+  .short_help = "show api dump file <filename> [numeric | compare-current]",
+  .function = dump_api_table_file_command_fn,
+};
+/* *INDENT-ON* */
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf
index 6ebe1efe..c3b9872e 100644
--- a/src/vpp/conf/startup.conf
+++ b/src/vpp/conf/startup.conf
@@ -8,7 +8,20 @@ unix {
 }
 
 api-trace {
+## This stanza controls binary API tracing. Unless there is a very strong reason,
+## please leave this feature enabled.
   on
+## Additional parameters:
+##
+## To set the number of binary API trace records in the circular buffer, configure nitems
+##
+## nitems <nnn> 
+##
+## To save the api message table decode tables, configure a filename. Results in /tmp/<filename>
+## Very handy for understanding api message changes between versions, identifying missing
+## plugins, and so forth.
+##
+## save-api-table <filename>
 }
 
 api-segment {
-- 
cgit 1.2.3-korg


From 8f2a4eafeaa439432107563033728e09665c16d9 Mon Sep 17 00:00:00 2001
From: Klement Sekera <ksekera@cisco.com>
Date: Thu, 4 May 2017 06:15:18 +0200
Subject: Add new C API

Change-Id: I717ce3cd7c867c155de149ec56623269d26d0ff7
Signed-off-by: Klement Sekera <ksekera@cisco.com>
---
 .gitignore                                |    1 +
 Makefile                                  |   11 +-
 src/Makefile.am                           |    2 +
 src/configure.ac                          |    2 +-
 src/vlibmemory/unix_shared_memory_queue.c |   65 ++
 src/vlibmemory/unix_shared_memory_queue.h |   11 +-
 src/vpp-api/vapi/Makefile.am              |   63 ++
 src/vpp-api/vapi/libvapiclient.map        |   41 +
 src/vpp-api/vapi/vapi.c                   |  895 ++++++++++++++++++++++
 src/vpp-api/vapi/vapi.h                   |  285 +++++++
 src/vpp-api/vapi/vapi_c_gen.py            |  809 ++++++++++++++++++++
 src/vpp-api/vapi/vapi_dbg.h               |   76 ++
 src/vpp-api/vapi/vapi_internal.h          |  126 ++++
 src/vpp-api/vapi/vapi_json_parser.py      |  303 ++++++++
 test/Makefile                             |    6 +-
 test/ext/Makefile                         |   17 +
 test/ext/vapi_test.c                      | 1152 +++++++++++++++++++++++++++++
 test/scripts/test-loop.sh                 |   14 +-
 test/test_vapi.py                         |   78 ++
 19 files changed, 3943 insertions(+), 14 deletions(-)
 create mode 100644 src/vpp-api/vapi/Makefile.am
 create mode 100644 src/vpp-api/vapi/libvapiclient.map
 create mode 100644 src/vpp-api/vapi/vapi.c
 create mode 100644 src/vpp-api/vapi/vapi.h
 create mode 100755 src/vpp-api/vapi/vapi_c_gen.py
 create mode 100644 src/vpp-api/vapi/vapi_dbg.h
 create mode 100644 src/vpp-api/vapi/vapi_internal.h
 create mode 100644 src/vpp-api/vapi/vapi_json_parser.py
 create mode 100644 test/ext/Makefile
 create mode 100644 test/ext/vapi_test.c
 create mode 100644 test/test_vapi.py

(limited to 'src/vlibmemory')

diff --git a/.gitignore b/.gitignore
index ba4e104a..5a6266d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@
 /build-root/test-doc/
 /build-root/test-cov/
 /build-root/python/
+/build-root/vapi_test/
 /build-config.mk
 /dpdk/*.tar.gz
 /dpdk/*.tar.xz
diff --git a/Makefile b/Makefile
index c08115d4..c46fa6bb 100644
--- a/Makefile
+++ b/Makefile
@@ -62,7 +62,7 @@ DEB_DEPENDS  = curl build-essential autoconf automake bison libssl-dev ccache
 DEB_DEPENDS += debhelper dkms git libtool libapr1-dev dh-systemd
 DEB_DEPENDS += libconfuse-dev git-review exuberant-ctags cscope pkg-config
 DEB_DEPENDS += lcov chrpath autoconf nasm indent libnuma-dev
-DEB_DEPENDS += python-all python-dev python-virtualenv python-pip libffi6
+DEB_DEPENDS += python-all python-dev python-virtualenv python-pip libffi6 check
 ifeq ($(OS_VERSION_ID),14.04)
 	DEB_DEPENDS += openjdk-8-jdk-headless
 else ifeq ($(OS_ID)-$(OS_VERSION_ID),debian-8)
@@ -76,6 +76,7 @@ RPM_DEPENDS  = redhat-lsb glibc-static java-1.8.0-openjdk-devel yum-utils
 RPM_DEPENDS += apr-devel
 RPM_DEPENDS += openssl-devel
 RPM_DEPENDS += numactl-devel
+RPM_DEPENDS += check
 ifeq ($(OS_ID)-$(OS_VERSION_ID),fedora-25)
 	RPM_DEPENDS += python-devel
 	RPM_DEPENDS += python2-virtualenv
@@ -101,7 +102,13 @@ endif
 
 RPM_SUSE_DEPENDS = autoconf automake bison ccache chrpath distribution-release gcc6 glibc-devel-static
 RPM_SUSE_DEPENDS += java-1_8_0-openjdk-devel libopenssl-devel libtool lsb-release make openssl-devel
-RPM_SUSE_DEPENDS += python-devel python-pip python-rpm-macros shadow nasm libnuma-devel
+RPM_SUSE_DEPENDS += python-devel python-pip python-rpm-macros shadow nasm libnuma-devel python3
+
+ifeq ($(filter rhel centos,$(OS_ID)),$(OS_ID))
+	RPM_DEPENDS += python34
+else
+	RPM_DEPENDS += python3
+endif
 
 ifneq ($(wildcard $(STARTUP_DIR)/startup.conf),)
         STARTUP_CONF ?= $(STARTUP_DIR)/startup.conf
diff --git a/src/Makefile.am b/src/Makefile.am
index 41076e0e..7b35e50c 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -80,6 +80,8 @@ if ENABLE_JAPI
 SUBDIRS += vpp-api/java
 endif
 
+SUBDIRS += vpp-api/vapi
+
 ###############################################################################
 # API
 ###############################################################################
diff --git a/src/configure.ac b/src/configure.ac
index 6b6d9636..2efb23ad 100644
--- a/src/configure.ac
+++ b/src/configure.ac
@@ -3,7 +3,7 @@ LT_INIT
 AC_CONFIG_AUX_DIR([.])
 AM_INIT_AUTOMAKE([subdir-objects])
 AM_SILENT_RULES([yes])
-AC_CONFIG_FILES([Makefile plugins/Makefile vpp-api/python/Makefile vpp-api/java/Makefile])
+AC_CONFIG_FILES([Makefile plugins/Makefile vpp-api/python/Makefile vpp-api/java/Makefile vpp-api/vapi/Makefile])
 AC_CONFIG_MACRO_DIR([m4])
 
 AC_PROG_CC
diff --git a/src/vlibmemory/unix_shared_memory_queue.c b/src/vlibmemory/unix_shared_memory_queue.c
index e86edec3..4db4851c 100644
--- a/src/vlibmemory/unix_shared_memory_queue.c
+++ b/src/vlibmemory/unix_shared_memory_queue.c
@@ -234,6 +234,71 @@ unix_shared_memory_queue_add (unix_shared_memory_queue_t * q,
   return 0;
 }
 
+/*
+ * unix_shared_memory_queue_add2
+ */
+int
+unix_shared_memory_queue_add2 (unix_shared_memory_queue_t * q, u8 * elem,
+			       u8 * elem2, int nowait)
+{
+  i8 *tailp;
+  int need_broadcast = 0;
+
+  if (nowait)
+    {
+      /* zero on success */
+      if (pthread_mutex_trylock (&q->mutex))
+	{
+	  return (-1);
+	}
+    }
+  else
+    pthread_mutex_lock (&q->mutex);
+
+  if (PREDICT_FALSE (q->cursize + 1 == q->maxsize))
+    {
+      if (nowait)
+	{
+	  pthread_mutex_unlock (&q->mutex);
+	  return (-2);
+	}
+      while (q->cursize + 1 == q->maxsize)
+	{
+	  (void) pthread_cond_wait (&q->condvar, &q->mutex);
+	}
+    }
+
+  tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+  clib_memcpy (tailp, elem, q->elsize);
+
+  q->tail++;
+  q->cursize++;
+
+  if (q->tail == q->maxsize)
+    q->tail = 0;
+
+  need_broadcast = (q->cursize == 1);
+
+  tailp = (i8 *) (&q->data[0] + q->elsize * q->tail);
+  clib_memcpy (tailp, elem2, q->elsize);
+
+  q->tail++;
+  q->cursize++;
+
+  if (q->tail == q->maxsize)
+    q->tail = 0;
+
+  if (need_broadcast)
+    {
+      (void) pthread_cond_broadcast (&q->condvar);
+      if (q->signal_when_queue_non_empty)
+	kill (q->consumer_pid, q->signal_when_queue_non_empty);
+    }
+  pthread_mutex_unlock (&q->mutex);
+
+  return 0;
+}
+
 /*
  * unix_shared_memory_queue_sub
  */
diff --git a/src/vlibmemory/unix_shared_memory_queue.h b/src/vlibmemory/unix_shared_memory_queue.h
index 13800065..27de3218 100644
--- a/src/vlibmemory/unix_shared_memory_queue.h
+++ b/src/vlibmemory/unix_shared_memory_queue.h
@@ -21,7 +21,6 @@
 #define included_unix_shared_memory_queue_h
 
 #include <pthread.h>
-#include <vppinfra/mem.h>
 
 typedef struct _unix_shared_memory_queue
 {
@@ -43,10 +42,12 @@ unix_shared_memory_queue_t *unix_shared_memory_queue_init (int nels,
 							   int
 							   signal_when_queue_non_empty);
 void unix_shared_memory_queue_free (unix_shared_memory_queue_t * q);
-int unix_shared_memory_queue_add (unix_shared_memory_queue_t * q,
-				  u8 * elem, int nowait);
-int unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q,
-				  u8 * elem, int nowait);
+int unix_shared_memory_queue_add (unix_shared_memory_queue_t * q, u8 * elem,
+				  int nowait);
+int unix_shared_memory_queue_add2 (unix_shared_memory_queue_t * q, u8 * elem,
+				   u8 * elem2, int nowait);
+int unix_shared_memory_queue_sub (unix_shared_memory_queue_t * q, u8 * elem,
+				  int nowait);
 void unix_shared_memory_queue_lock (unix_shared_memory_queue_t * q);
 void unix_shared_memory_queue_unlock (unix_shared_memory_queue_t * q);
 int unix_shared_memory_queue_is_full (unix_shared_memory_queue_t * q);
diff --git a/src/vpp-api/vapi/Makefile.am b/src/vpp-api/vapi/Makefile.am
new file mode 100644
index 00000000..ce681c38
--- /dev/null
+++ b/src/vpp-api/vapi/Makefile.am
@@ -0,0 +1,63 @@
+# Copyright (c) 2017 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+AUTOMAKE_OPTIONS = foreign
+ACLOCAL_AMFLAGS = -I m4
+AM_LIBTOOLFLAGS = --quiet
+
+AM_CFLAGS = -Wall -I${top_srcdir} -I${top_builddir} -I. -I$(top_srcdir)/vpp-api/vapi
+
+AM_LDFLAGS = -shared -avoid-version -rpath /none -no-undefined
+
+bin_PROGRAMS =
+noinst_LTLIBRARIES =
+CLEANDIRS =
+
+%.api.vapi.h: %.api.json vapi_c_gen.py
+	@echo "  VAPI C GEN $< " $@ ;			\
+	mkdir -p `dirname $@` ;				\
+        $(top_srcdir)/vpp-api/vapi/vapi_c_gen.py $<
+
+%.api.json:
+	find $(top_builddir) -name '$@' | xargs ln -s
+
+BUILT_SOURCES = $(shell find $(top_builddir) -name '*.api.json' | xargs -n1 basename) \
+  $(patsubst %.api.json,%.api.vapi.h,$(JSON_FILES))
+
+vapi.c: $(BUILT_SOURCES)
+
+JSON_FILES = $(wildcard *.api.json)
+
+
+lib_LTLIBRARIES = libvapiclient.la
+
+libvapiclient_la_SOURCES = vapi.c
+
+libvapiclient_la_LIBADD = -lpthread -lm -lrt \
+  $(top_builddir)/libvppinfra.la \
+  $(top_builddir)/libvlibmemoryclient.la \
+  $(top_builddir)/libsvm.la
+
+libvapiclient_la_LDFLAGS = \
+  -Wl,-L$(top_builddir)/.libs,--whole-archive,--no-whole-archive \
+  -Wl,--version-script=$(srcdir)/libvapiclient.map,-lrt
+
+libvapiclient_la_CPPFLAGS = -I. -I$(top_builddir)/vpp-api/vapi
+
+nobase_include_HEADERS = ${top_srcdir}/vpp-api/client/vppapiclient.h \
+  vapi.h \
+  vapi_dbg.h \
+  vapi_internal.h \
+  $(patsubst %.api.json,%.api.vapi.h,$(JSON_FILES))
+
+# vi:syntax=automake
diff --git a/src/vpp-api/vapi/libvapiclient.map b/src/vpp-api/vapi/libvapiclient.map
new file mode 100644
index 00000000..53733002
--- /dev/null
+++ b/src/vpp-api/vapi/libvapiclient.map
@@ -0,0 +1,41 @@
+
+VAPICLIENT_17.07 {
+	global:
+	vapi_msg_alloc;
+	vapi_msg_free;
+	vapi_ctx_alloc;
+	vapi_ctx_free;
+	vapi_is_msg_available;
+	vapi_connect;
+	vapi_disconnect;
+	vapi_get_fd;
+	vapi_send;
+	vapi_send2;
+	vapi_recv;
+	vapi_wait;
+	vapi_dispatch_one;
+	vapi_dispatch;
+	vapi_set_event_cb;
+	vapi_clear_event_cb;
+	vapi_set_generic_event_cb;
+	vapi_clear_generic_event_cb;
+	vapi_get_client_index;
+	vapi_register_msg;
+	vapi_get_client_index;
+	vapi_is_nonblocking;
+	vapi_requests_full;
+	vapi_gen_req_context;
+	vapi_producer_lock;
+	vapi_send_with_control_ping;
+	vapi_store_request;
+	vapi_is_nonblocking;
+	vapi_producer_unlock;
+	vapi_lookup_vl_msg_id;
+	vapi_lookup_vapi_msg_id_t;
+	vapi_msg_is_with_context;
+	vapi_get_context_offset;
+	vapi_msg_id_control_ping;
+	vapi_msg_id_control_ping_reply;
+
+	local: *;
+};
diff --git a/src/vpp-api/vapi/vapi.c b/src/vpp-api/vapi/vapi.c
new file mode 100644
index 00000000..b9c81a13
--- /dev/null
+++ b/src/vpp-api/vapi/vapi.c
@@ -0,0 +1,895 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <arpa/inet.h>
+#include <stddef.h>
+#include <assert.h>
+
+#include <vpp-api/vapi/vapi_dbg.h>
+#include <vpp-api/vapi/vapi.h>
+#include <vpp-api/vapi/vapi_internal.h>
+#include <vppinfra/types.h>
+#include <vlibapi/api_common.h>
+#include <vlibmemory/api_common.h>
+
+/* we need to use control pings for some stuff and because we're forced to put
+ * the code in headers, we need a way to be able to grab the ids of these
+ * messages - so declare them here as extern */
+vapi_msg_id_t vapi_msg_id_control_ping = 0;
+vapi_msg_id_t vapi_msg_id_control_ping_reply = 0;
+
+struct
+{
+  size_t count;
+  vapi_message_desc_t **msgs;
+  size_t max_len_name_with_crc;
+} __vapi_metadata;
+
+typedef struct
+{
+  u32 context;
+  vapi_cb_t callback;
+  void *callback_ctx;
+  bool is_dump;
+} vapi_req_t;
+
+static const u32 context_counter_mask = (1 << 31);
+
+typedef struct
+{
+  vapi_error_e (*cb) (vapi_ctx_t ctx, void *callback_ctx, vapi_msg_id_t id,
+		      void *payload);
+  void *ctx;
+} vapi_generic_cb_with_ctx;
+
+typedef struct
+{
+  vapi_error_e (*cb) (vapi_ctx_t ctx, void *callback_ctx, void *payload);
+  void *ctx;
+} vapi_event_cb_with_ctx;
+
+struct vapi_ctx_s
+{
+  vapi_mode_e mode;
+  int requests_size;		/* size of the requests array (circular queue) */
+  int requests_start;		/* index of first request */
+  int requests_count;		/* number of used slots */
+  vapi_req_t *requests;
+  u32 context_counter;
+  vapi_generic_cb_with_ctx generic_cb;
+  vapi_event_cb_with_ctx *event_cbs;
+  u16 *vapi_msg_id_t_to_vl_msg_id;
+  u16 vl_msg_id_max;
+  vapi_msg_id_t *vl_msg_id_to_vapi_msg_t;
+  bool connected;
+  pthread_mutex_t requests_mutex;
+};
+
+u32
+vapi_gen_req_context (vapi_ctx_t ctx)
+{
+  ++ctx->context_counter;
+  ctx->context_counter %= context_counter_mask;
+  return ctx->context_counter | context_counter_mask;
+}
+
+size_t
+vapi_get_request_count (vapi_ctx_t ctx)
+{
+  return ctx->requests_count;
+}
+
+bool
+vapi_requests_full (vapi_ctx_t ctx)
+{
+  return (ctx->requests_count == ctx->requests_size);
+}
+
+static bool
+vapi_requests_empty (vapi_ctx_t ctx)
+{
+  return (0 == ctx->requests_count);
+}
+
+static int
+vapi_requests_end (vapi_ctx_t ctx)
+{
+  return (ctx->requests_start + ctx->requests_count) % ctx->requests_size;
+}
+
+void
+vapi_store_request (vapi_ctx_t ctx, u32 context, bool is_dump,
+		    vapi_cb_t callback, void *callback_ctx)
+{
+  assert (!vapi_requests_full (ctx));
+  /* if the mutex is not held, bad things will happen */
+  assert (0 != pthread_mutex_trylock (&ctx->requests_mutex));
+  const int requests_end = vapi_requests_end (ctx);
+  vapi_req_t *slot = &ctx->requests[requests_end];
+  slot->is_dump = is_dump;
+  slot->context = context;
+  slot->callback = callback;
+  slot->callback_ctx = callback_ctx;
+  VAPI_DBG ("stored@%d: context:%x (start is @%d)", requests_end, context,
+	    ctx->requests_start);
+  ++ctx->requests_count;
+  assert (!vapi_requests_empty (ctx));
+}
+
+#if VAPI_DEBUG_ALLOC
+struct to_be_freed_s;
+struct to_be_freed_s
+{
+  void *v;
+  struct to_be_freed_s *next;
+};
+
+static struct to_be_freed_s *to_be_freed = NULL;
+
+void
+vapi_add_to_be_freed (void *v)
+{
+  struct to_be_freed_s *prev = NULL;
+  struct to_be_freed_s *tmp;
+  tmp = to_be_freed;
+  while (tmp && tmp->v)
+    {
+      prev = tmp;
+      tmp = tmp->next;
+    }
+  if (!tmp)
+    {
+      if (!prev)
+	{
+	  tmp = to_be_freed = calloc (1, sizeof (*to_be_freed));
+	}
+      else
+	{
+	  tmp = prev->next = calloc (1, sizeof (*to_be_freed));
+	}
+    }
+  VAPI_DBG ("To be freed %p", v);
+  tmp->v = v;
+}
+
+void
+vapi_trace_free (void *v)
+{
+  struct to_be_freed_s *tmp = to_be_freed;
+  while (tmp && tmp->v != v)
+    {
+      tmp = tmp->next;
+    }
+  if (tmp && tmp->v == v)
+    {
+      VAPI_DBG ("Freed %p", v);
+      tmp->v = NULL;
+    }
+  else
+    {
+      VAPI_ERR ("Trying to free untracked pointer %p", v);
+      abort ();
+    }
+}
+
+void
+vapi_to_be_freed_validate ()
+{
+  struct to_be_freed_s *tmp = to_be_freed;
+  while (tmp)
+    {
+      if (tmp->v)
+	{
+	  VAPI_ERR ("Unfreed msg %p!", tmp->v);
+	}
+      tmp = tmp->next;
+    }
+}
+
+#endif
+
+void *
+vapi_msg_alloc (vapi_ctx_t ctx, size_t size)
+{
+  if (!ctx->connected)
+    {
+      return NULL;
+    }
+  void *rv = vl_msg_api_alloc_or_null (size);
+  return rv;
+}
+
+void
+vapi_msg_free (vapi_ctx_t ctx, void *msg)
+{
+  if (!ctx->connected)
+    {
+      return;
+    }
+#if VAPI_DEBUG_ALLOC
+  vapi_trace_free (msg);
+#endif
+  vl_msg_api_free (msg);
+}
+
+vapi_error_e
+vapi_ctx_alloc (vapi_ctx_t * result)
+{
+  vapi_ctx_t ctx = calloc (1, sizeof (struct vapi_ctx_s));
+  if (!ctx)
+    {
+      return VAPI_ENOMEM;
+    }
+  ctx->context_counter = 0;
+  ctx->vapi_msg_id_t_to_vl_msg_id =
+    malloc (__vapi_metadata.count *
+	    sizeof (*ctx->vapi_msg_id_t_to_vl_msg_id));
+  if (!ctx->vapi_msg_id_t_to_vl_msg_id)
+    {
+      goto fail;
+    }
+  ctx->event_cbs = calloc (__vapi_metadata.count, sizeof (*ctx->event_cbs));
+  if (!ctx->event_cbs)
+    {
+      goto fail;
+    }
+  pthread_mutex_init (&ctx->requests_mutex, NULL);
+  *result = ctx;
+  return VAPI_OK;
+fail:
+  vapi_ctx_free (ctx);
+  return VAPI_ENOMEM;
+}
+
+void
+vapi_ctx_free (vapi_ctx_t ctx)
+{
+  assert (!ctx->connected);
+  free (ctx->requests);
+  free (ctx->vapi_msg_id_t_to_vl_msg_id);
+  free (ctx->event_cbs);
+  free (ctx->vl_msg_id_to_vapi_msg_t);
+  pthread_mutex_destroy (&ctx->requests_mutex);
+  free (ctx);
+}
+
+bool
+vapi_is_msg_available (vapi_ctx_t ctx, vapi_msg_id_t id)
+{
+  return vapi_lookup_vl_msg_id (ctx, id) != UINT16_MAX;
+}
+
+vapi_error_e
+vapi_connect (vapi_ctx_t ctx, const char *name,
+	      const char *chroot_prefix,
+	      int max_outstanding_requests,
+	      int response_queue_size, vapi_mode_e mode)
+{
+  if (response_queue_size <= 0 || max_outstanding_requests <= 0)
+    {
+      return VAPI_EINVAL;
+    }
+  ctx->requests_size = max_outstanding_requests;
+  const size_t size = ctx->requests_size * sizeof (*ctx->requests);
+  void *tmp = realloc (ctx->requests, size);
+  if (!tmp)
+    {
+      return VAPI_ENOMEM;
+    }
+  ctx->requests = tmp;
+  memset (ctx->requests, 0, size);
+  ctx->requests_start = ctx->requests_count = 0;
+  if (chroot_prefix)
+    {
+      VAPI_DBG ("set memory root path `%s'", chroot_prefix);
+      vl_set_memory_root_path ((char *) chroot_prefix);
+    }
+  static char api_map[] = "/vpe-api";
+  VAPI_DBG ("client api map `%s'", api_map);
+  if ((vl_client_api_map (api_map)) < 0)
+    {
+      return VAPI_EMAP_FAIL;
+    }
+  VAPI_DBG ("connect client `%s'", name);
+  if (vl_client_connect ((char *) name, 0, response_queue_size) < 0)
+    {
+      vl_client_api_unmap ();
+      return VAPI_ECON_FAIL;
+    }
+#if VAPI_DEBUG_CONNECT
+  VAPI_DBG ("start probing messages");
+#endif
+  int rv;
+  int i;
+  for (i = 0; i < __vapi_metadata.count; ++i)
+    {
+      vapi_message_desc_t *m = __vapi_metadata.msgs[i];
+      u8 scratch[m->name_with_crc_len + 1];
+      memcpy (scratch, m->name_with_crc, m->name_with_crc_len + 1);
+      u32 id = vl_api_get_msg_index (scratch);
+      if (~0 != id)
+	{
+	  if (id > UINT16_MAX)
+	    {
+	      VAPI_ERR ("Returned vl_msg_id `%u' > UINT16MAX `%u'!", id,
+			UINT16_MAX);
+	      rv = VAPI_EINVAL;
+	      goto fail;
+	    }
+	  if (id > ctx->vl_msg_id_max)
+	    {
+	      vapi_msg_id_t *tmp = realloc (ctx->vl_msg_id_to_vapi_msg_t,
+					    sizeof
+					    (*ctx->vl_msg_id_to_vapi_msg_t) *
+					    (id + 1));
+	      if (!tmp)
+		{
+		  rv = VAPI_ENOMEM;
+		  goto fail;
+		}
+	      ctx->vl_msg_id_to_vapi_msg_t = tmp;
+	      ctx->vl_msg_id_max = id;
+	    }
+	  ctx->vl_msg_id_to_vapi_msg_t[id] = m->id;
+	  ctx->vapi_msg_id_t_to_vl_msg_id[m->id] = id;
+#if VAPI_DEBUG_CONNECT
+	  VAPI_DBG ("Message `%s' has vl_msg_id `%u'", m->name_with_crc,
+		    (unsigned) id);
+#endif
+	}
+      else
+	{
+	  ctx->vapi_msg_id_t_to_vl_msg_id[m->id] = UINT16_MAX;
+	  VAPI_DBG ("Message `%s' not available", m->name_with_crc);
+	}
+    }
+#if VAPI_DEBUG_CONNECT
+  VAPI_DBG ("finished probing messages");
+#endif
+  if (!vapi_is_msg_available (ctx, vapi_msg_id_control_ping) ||
+      !vapi_is_msg_available (ctx, vapi_msg_id_control_ping_reply))
+    {
+      VAPI_ERR
+	("control ping or control ping reply not available, cannot connect");
+      rv = VAPI_EINCOMPATIBLE;
+      goto fail;
+    }
+  ctx->mode = mode;
+  ctx->connected = true;
+  return VAPI_OK;
+fail:
+  vl_client_disconnect ();
+  vl_client_api_unmap ();
+  return rv;
+}
+
+vapi_error_e
+vapi_disconnect (vapi_ctx_t ctx)
+{
+  if (!ctx->connected)
+    {
+      return VAPI_EINVAL;
+    }
+  vl_client_disconnect ();
+  vl_client_api_unmap ();
+#if VAPI_DEBUG_ALLOC
+  vapi_to_be_freed_validate ();
+#endif
+  ctx->connected = false;
+  return VAPI_OK;
+}
+
+vapi_error_e
+vapi_get_fd (vapi_ctx_t ctx, int *fd)
+{
+  return VAPI_ENOTSUP;
+}
+
+vapi_error_e
+vapi_send (vapi_ctx_t ctx, void *msg)
+{
+  vapi_error_e rv = VAPI_OK;
+  if (!ctx || !msg || !ctx->connected)
+    {
+      rv = VAPI_EINVAL;
+      goto out;
+    }
+  int tmp;
+  unix_shared_memory_queue_t *q = api_main.shmem_hdr->vl_input_queue;
+#if VAPI_DEBUG
+  unsigned msgid = be16toh (*(u16 *) msg);
+  if (msgid <= ctx->vl_msg_id_max)
+    {
+      vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid];
+      if (id < __vapi_metadata.count)
+	{
+	  VAPI_DBG ("send msg %u[%s]", msgid, __vapi_metadata.msgs[id]->name);
+	}
+      else
+	{
+	  VAPI_DBG ("send msg %u[UNKNOWN]", msgid);
+	}
+    }
+  else
+    {
+      VAPI_DBG ("send msg %u[UNKNOWN]", msgid);
+    }
+#endif
+  tmp = unix_shared_memory_queue_add (q, (u8 *) & msg,
+				      VAPI_MODE_BLOCKING ==
+				      ctx->mode ? 0 : 1);
+  if (tmp < 0)
+    {
+      rv = VAPI_EAGAIN;
+    }
+out:
+  VAPI_DBG ("vapi_send() rv = %d", rv);
+  return rv;
+}
+
+vapi_error_e
+vapi_send2 (vapi_ctx_t ctx, void *msg1, void *msg2)
+{
+  vapi_error_e rv = VAPI_OK;
+  if (!ctx || !msg1 || !msg2 || !ctx->connected)
+    {
+      rv = VAPI_EINVAL;
+      goto out;
+    }
+  unix_shared_memory_queue_t *q = api_main.shmem_hdr->vl_input_queue;
+#if VAPI_DEBUG
+  unsigned msgid1 = be16toh (*(u16 *) msg1);
+  unsigned msgid2 = be16toh (*(u16 *) msg2);
+  const char *name1 = "UNKNOWN";
+  const char *name2 = "UNKNOWN";
+  if (msgid1 <= ctx->vl_msg_id_max)
+    {
+      vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid1];
+      if (id < __vapi_metadata.count)
+	{
+	  name1 = __vapi_metadata.msgs[id]->name;
+	}
+    }
+  if (msgid2 <= ctx->vl_msg_id_max)
+    {
+      vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid2];
+      if (id < __vapi_metadata.count)
+	{
+	  name2 = __vapi_metadata.msgs[id]->name;
+	}
+    }
+  VAPI_DBG ("send two: %u[%s], %u[%s]", msgid1, name1, msgid2, name2);
+#endif
+  int tmp = unix_shared_memory_queue_add2 (q, (u8 *) & msg1, (u8 *) & msg2,
+					   VAPI_MODE_BLOCKING ==
+					   ctx->mode ? 0 : 1);
+  if (tmp < 0)
+    {
+      rv = VAPI_EAGAIN;
+    }
+out:
+  VAPI_DBG ("vapi_send() rv = %d", rv);
+  return rv;
+}
+
+vapi_error_e
+vapi_recv (vapi_ctx_t ctx, void **msg, size_t * msg_size)
+{
+  if (!ctx || !ctx->connected || !msg || !msg_size)
+    {
+      return VAPI_EINVAL;
+    }
+  vapi_error_e rv = VAPI_OK;
+  api_main_t *am = &api_main;
+  uword data;
+
+  if (am->our_pid == 0)
+    {
+      return VAPI_EINVAL;
+    }
+
+  unix_shared_memory_queue_t *q = am->vl_input_queue;
+  VAPI_DBG ("doing shm queue sub");
+  int tmp = unix_shared_memory_queue_sub (q, (u8 *) & data, 0);
+  if (tmp == 0)
+    {
+#if VAPI_DEBUG_ALLOC
+      vapi_add_to_be_freed ((void *) data);
+#endif
+      msgbuf_t *msgbuf =
+	(msgbuf_t *) ((u8 *) data - offsetof (msgbuf_t, data));
+      if (!msgbuf->data_len)
+	{
+	  vapi_msg_free (ctx, (u8 *) data);
+	  return VAPI_EAGAIN;
+	}
+      *msg = (u8 *) data;
+      *msg_size = ntohl (msgbuf->data_len);
+      VAPI_DBG ("recv msg %p", *msg);
+    }
+  else
+    {
+      rv = VAPI_EAGAIN;
+    }
+  return rv;
+}
+
+vapi_error_e
+vapi_wait (vapi_ctx_t ctx, vapi_wait_mode_e mode)
+{
+  /* FIXME */
+  return VAPI_ENOTSUP;
+}
+
+static vapi_error_e
+vapi_dispatch_response (vapi_ctx_t ctx, vapi_msg_id_t id,
+			u32 context, void *msg)
+{
+  int mrv;
+  if (0 != (mrv = pthread_mutex_lock (&ctx->requests_mutex)))
+    {
+      VAPI_DBG ("pthread_mutex_lock() failed, rv=%d:%s", mrv, strerror (mrv));
+      return VAPI_MUTEX_FAILURE;
+    }
+  int tmp = ctx->requests_start;
+  const int requests_end = vapi_requests_end (ctx);
+  while (ctx->requests[tmp].context != context && tmp != requests_end)
+    {
+      ++tmp;
+      if (tmp == ctx->requests_size)
+	{
+	  tmp = 0;
+	}
+    }
+  VAPI_DBG ("dispatch, search from %d, %s at %d", ctx->requests_start,
+	    ctx->requests[tmp].context == context ? "matched" : "stopped",
+	    tmp);
+  vapi_error_e rv = VAPI_OK;
+  if (ctx->requests[tmp].context == context)
+    {
+      while (ctx->requests_start != tmp)
+	{
+	  VAPI_ERR ("No response to req with context=%u",
+		    (unsigned) ctx->requests[tmp].context);
+	  ctx->requests[ctx->requests_start].callback (ctx,
+						       ctx->requests
+						       [ctx->
+							requests_start].callback_ctx,
+						       VAPI_ENORESP, true,
+						       NULL);
+	  memset (&ctx->requests[ctx->requests_start], 0,
+		  sizeof (ctx->requests[ctx->requests_start]));
+	  ++ctx->requests_start;
+	  --ctx->requests_count;
+	  if (ctx->requests_start == ctx->requests_size)
+	    {
+	      ctx->requests_start = 0;
+	    }
+	}
+      // now ctx->requests_start == tmp
+      int payload_offset = vapi_get_payload_offset (id);
+      void *payload = ((u8 *) msg) + payload_offset;
+      bool is_last = true;
+      if (ctx->requests[tmp].is_dump)
+	{
+	  if (vapi_msg_id_control_ping_reply == id)
+	    {
+	      payload = NULL;
+	    }
+	  else
+	    {
+	      is_last = false;
+	    }
+	}
+      if (payload_offset != -1)
+	{
+	  rv =
+	    ctx->requests[tmp].callback (ctx, ctx->requests[tmp].callback_ctx,
+					 VAPI_OK, is_last, payload);
+	}
+      else
+	{
+	  /* this is a message without payload, so bend the callback a little
+	   */
+	  rv =
+	    ((vapi_error_e (*)(vapi_ctx_t, void *, vapi_error_e, bool))
+	     ctx->requests[tmp].callback) (ctx,
+					   ctx->requests[tmp].callback_ctx,
+					   VAPI_OK, is_last);
+	}
+      if (is_last)
+	{
+	  memset (&ctx->requests[ctx->requests_start], 0,
+		  sizeof (ctx->requests[ctx->requests_start]));
+	  ++ctx->requests_start;
+	  --ctx->requests_count;
+	  if (ctx->requests_start == ctx->requests_size)
+	    {
+	      ctx->requests_start = 0;
+	    }
+	}
+      VAPI_DBG ("after dispatch, req start = %d, end = %d, count = %d",
+		ctx->requests_start, requests_end, ctx->requests_count);
+    }
+  if (0 != (mrv = pthread_mutex_unlock (&ctx->requests_mutex)))
+    {
+      VAPI_DBG ("pthread_mutex_unlock() failed, rv=%d:%s", mrv,
+		strerror (mrv));
+      abort ();			/* this really shouldn't happen */
+    }
+  return rv;
+}
+
+static vapi_error_e
+vapi_dispatch_event (vapi_ctx_t ctx, vapi_msg_id_t id, void *msg)
+{
+  if (ctx->event_cbs[id].cb)
+    {
+      return ctx->event_cbs[id].cb (ctx, ctx->event_cbs[id].ctx, msg);
+    }
+  else if (ctx->generic_cb.cb)
+    {
+      return ctx->generic_cb.cb (ctx, ctx->generic_cb.ctx, id, msg);
+    }
+  else
+    {
+      VAPI_DBG
+	("No handler/generic handler for msg id %u[%s], message ignored",
+	 (unsigned) id, __vapi_metadata.msgs[id]->name);
+    }
+  return VAPI_OK;
+}
+
+static bool
+vapi_msg_is_with_context (vapi_msg_id_t id)
+{
+  assert (id <= __vapi_metadata.count);
+  return __vapi_metadata.msgs[id]->has_context;
+}
+
+vapi_error_e
+vapi_dispatch_one (vapi_ctx_t ctx)
+{
+  VAPI_DBG ("vapi_dispatch_one()");
+  void *msg;
+  size_t size;
+  vapi_error_e rv = vapi_recv (ctx, &msg, &size);
+  if (VAPI_OK != rv)
+    {
+      VAPI_DBG ("vapi_recv failed with rv=%d", rv);
+      return rv;
+    }
+  u16 vpp_id = be16toh (*(u16 *) msg);
+  if (vpp_id > ctx->vl_msg_id_max)
+    {
+      VAPI_ERR ("Unknown msg ID received, id `%u', out of range <0,%u>",
+		(unsigned) vpp_id, (unsigned) ctx->vl_msg_id_max);
+      vapi_msg_free (ctx, msg);
+      return VAPI_EINVAL;
+    }
+  if (~0 == (unsigned) ctx->vl_msg_id_to_vapi_msg_t[vpp_id])
+    {
+      VAPI_ERR ("Unknown msg ID received, id `%u' marked as not supported",
+		(unsigned) vpp_id);
+      vapi_msg_free (ctx, msg);
+      return VAPI_EINVAL;
+    }
+  const vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[vpp_id];
+  const size_t expect_size = vapi_get_message_size (id);
+  if (size < expect_size)
+    {
+      VAPI_ERR
+	("Invalid msg received, unexpected size `%zu' < expected min `%zu'",
+	 size, expect_size);
+      vapi_msg_free (ctx, msg);
+      return VAPI_EINVAL;
+    }
+  u32 context;
+  vapi_get_swap_to_host_func (id) (msg);
+  if (vapi_msg_is_with_context (id))
+    {
+      context = *(u32 *) (((u8 *) msg) + vapi_get_context_offset (id));
+      /* is this a message originating from VAPI? */
+      VAPI_DBG ("dispatch, context is %x", context);
+      if (context & context_counter_mask)
+	{
+	  rv = vapi_dispatch_response (ctx, id, context, msg);
+	  goto done;
+	}
+    }
+  rv = vapi_dispatch_event (ctx, id, msg);
+
+done:
+  vapi_msg_free (ctx, msg);
+  return rv;
+}
+
+vapi_error_e
+vapi_dispatch (vapi_ctx_t ctx)
+{
+  vapi_error_e rv = VAPI_OK;
+  while (!vapi_requests_empty (ctx))
+    {
+      rv = vapi_dispatch_one (ctx);
+      if (VAPI_OK != rv)
+	{
+	  return rv;
+	}
+    }
+  return rv;
+}
+
+void
+vapi_set_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id,
+		   vapi_event_cb callback, void *callback_ctx)
+{
+  vapi_event_cb_with_ctx *c = &ctx->event_cbs[id];
+  c->cb = callback;
+  c->ctx = callback_ctx;
+}
+
+void
+vapi_clear_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id)
+{
+  vapi_set_event_cb (ctx, id, NULL, NULL);
+}
+
+void
+vapi_set_generic_event_cb (vapi_ctx_t ctx, vapi_generic_event_cb callback,
+			   void *callback_ctx)
+{
+  ctx->generic_cb.cb = callback;
+  ctx->generic_cb.ctx = callback_ctx;
+}
+
+void
+vapi_clear_generic_event_cb (vapi_ctx_t ctx)
+{
+  ctx->generic_cb.cb = NULL;
+  ctx->generic_cb.ctx = NULL;
+}
+
+u16
+vapi_lookup_vl_msg_id (vapi_ctx_t ctx, vapi_msg_id_t id)
+{
+  assert (id < __vapi_metadata.count);
+  return ctx->vapi_msg_id_t_to_vl_msg_id[id];
+}
+
+int
+vapi_get_client_index (vapi_ctx_t ctx)
+{
+  return api_main.my_client_index;
+}
+
+bool
+vapi_is_nonblocking (vapi_ctx_t ctx)
+{
+  return (VAPI_MODE_NONBLOCKING == ctx->mode);
+}
+
+bool vapi_requests_full (vapi_ctx_t ctx);
+
+size_t vapi_get_request_count (vapi_ctx_t ctx);
+
+size_t
+vapi_get_max_request_count (vapi_ctx_t ctx)
+{
+  return ctx->requests_size - 1;
+}
+
+int
+vapi_get_payload_offset (vapi_msg_id_t id)
+{
+  assert (id < __vapi_metadata.count);
+  return __vapi_metadata.msgs[id]->payload_offset;
+}
+
+void (*vapi_get_swap_to_host_func (vapi_msg_id_t id)) (void *msg)
+{
+  assert (id < __vapi_metadata.count);
+  return __vapi_metadata.msgs[id]->swap_to_host;
+}
+
+void (*vapi_get_swap_to_be_func (vapi_msg_id_t id)) (void *msg)
+{
+  assert (id < __vapi_metadata.count);
+  return __vapi_metadata.msgs[id]->swap_to_be;
+}
+
+size_t
+vapi_get_message_size (vapi_msg_id_t id)
+{
+  assert (id < __vapi_metadata.count);
+  return __vapi_metadata.msgs[id]->size;
+}
+
+size_t
+vapi_get_context_offset (vapi_msg_id_t id)
+{
+  assert (id < __vapi_metadata.count);
+  return __vapi_metadata.msgs[id]->context_offset;
+}
+
+vapi_msg_id_t
+vapi_register_msg (vapi_message_desc_t * msg)
+{
+  int i = 0;
+  for (i = 0; i < __vapi_metadata.count; ++i)
+    {
+      if (!strcmp
+	  (msg->name_with_crc, __vapi_metadata.msgs[i]->name_with_crc))
+	{
+	  /* this happens if somebody is linking together several objects while
+	   * using the static inline headers, just fill in the already
+	   * assigned id here so that all the objects are in sync */
+	  msg->id = __vapi_metadata.msgs[i]->id;
+	  return msg->id;
+	}
+    }
+  vapi_msg_id_t id = __vapi_metadata.count;
+  ++__vapi_metadata.count;
+  __vapi_metadata.msgs =
+    realloc (__vapi_metadata.msgs,
+	     sizeof (*__vapi_metadata.msgs) * __vapi_metadata.count);
+  __vapi_metadata.msgs[id] = msg;
+  size_t s = strlen (msg->name_with_crc);
+  if (s > __vapi_metadata.max_len_name_with_crc)
+    {
+      __vapi_metadata.max_len_name_with_crc = s;
+    }
+  msg->id = id;
+  return id;
+}
+
+vapi_error_e
+vapi_producer_lock (vapi_ctx_t ctx)
+{
+  int mrv;
+  if (0 != (mrv = pthread_mutex_lock (&ctx->requests_mutex)))
+    {
+      VAPI_DBG ("pthread_mutex_lock() failed, rv=%d:%s", mrv, strerror (mrv));
+      (void) mrv;		/* avoid warning if the above debug is not enabled */
+      return VAPI_MUTEX_FAILURE;
+    }
+  return VAPI_OK;
+}
+
+vapi_error_e
+vapi_producer_unlock (vapi_ctx_t ctx)
+{
+  int mrv;
+  if (0 != (mrv = pthread_mutex_unlock (&ctx->requests_mutex)))
+    {
+      VAPI_DBG ("pthread_mutex_unlock() failed, rv=%d:%s", mrv,
+		strerror (mrv));
+      (void) mrv;		/* avoid warning if the above debug is not enabled */
+      return VAPI_MUTEX_FAILURE;
+    }
+  return VAPI_OK;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp-api/vapi/vapi.h b/src/vpp-api/vapi/vapi.h
new file mode 100644
index 00000000..1e1d567a
--- /dev/null
+++ b/src/vpp-api/vapi/vapi.h
@@ -0,0 +1,285 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef vpp_api_h_included
+#define vpp_api_h_included
+
+#include <string.h>
+#include <stdbool.h>
+#include <vppinfra/types.h>
+
+/**
+ * @file vapi.h
+ *
+ * common vpp api C declarations
+ *
+ * This file declares the common C API functions. These include connect,
+ * disconnect and utility functions as well as the low-level vapi_send and
+ * vapi_recv API. This is only the transport layer.
+ *
+ * Message formats and higher-level APIs are generated by running the
+ * vapi_c_gen.py script (which is run for in-tree APIs as part of the build
+ * process). It's not recommended to mix the higher and lower level APIs. Due
+ * to version issues, the higher-level APIs are not part of the shared library.
+ */
+
+typedef enum
+{
+  VAPI_OK = 0,	      /**< success */
+  VAPI_EINVAL,	      /**< invalid value encountered */
+  VAPI_EAGAIN,	      /**< operation would block */
+  VAPI_ENOTSUP,	      /**< operation not supported */
+  VAPI_ENOMEM,	      /**< out of memory */
+  VAPI_ENORESP,	      /**< no response to request */
+  VAPI_EMAP_FAIL,     /**< failure while mapping api */
+  VAPI_ECON_FAIL,     /**< failure while connecting to vpp */
+  VAPI_EINCOMPATIBLE, /**< fundamental incompatibility while connecting to vpp
+                           (control ping/control ping reply mismatch) */
+  VAPI_MUTEX_FAILURE, /**< failure manipulating internal mutex(es) */
+  VAPI_EUSER,	      /**< user error used for breaking dispatch,
+                           never used by VAPI */
+} vapi_error_e;
+
+typedef enum
+{
+  VAPI_MODE_BLOCKING = 1,    /**< operations block until response received */
+  VAPI_MODE_NONBLOCKING = 2, /**< operations never block */
+} vapi_mode_e;
+
+typedef enum
+{
+  VAPI_WAIT_FOR_READ,	    /**< wait until a message can be read */
+  VAPI_WAIT_FOR_WRITE,	    /**< wait until a message can be written */
+  VAPI_WAIT_FOR_READ_WRITE, /**< wait until a read or write can be done */
+} vapi_wait_mode_e;
+
+typedef int vapi_msg_id_t;
+typedef struct vapi_ctx_s *vapi_ctx_t;
+
+/**
+ * @brief allocate vapi message of given size
+ *
+ * @note message must be freed by vapi_msg_free if not consumed by vapi_send
+ * call
+ *
+ * @param ctx opaque vapi context
+ *
+ * @return pointer to message or NULL if out of memory
+ */
+void *vapi_msg_alloc (vapi_ctx_t ctx, size_t size);
+
+/**
+ * @brief free a vapi message
+ *
+ * @note messages received by vapi_recv must be freed when no longer needed
+ *
+ * @param ctx opaque vapi context
+ * @param msg message to be freed
+ */
+void vapi_msg_free (vapi_ctx_t ctx, void *msg);
+
+/**
+ * @brief allocate vapi context
+ *
+ * @param[out] pointer to result variable
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_ctx_alloc (vapi_ctx_t * result);
+
+/**
+ * @brief free vapi context
+ */
+void vapi_ctx_free (vapi_ctx_t ctx);
+
+/**
+ * @brief check if message identified by it's message id is known by the vpp to
+ * which the connection is open
+ */
+bool vapi_is_msg_available (vapi_ctx_t ctx, vapi_msg_id_t type);
+
+/**
+ * @brief connect to vpp
+ *
+ * @param ctx opaque vapi context, must be allocated using vapi_ctx_alloc first
+ * @param name application name
+ * @param chroot_prefix shared memory prefix
+ * @param max_outstanding_requests max number of outstanding requests queued
+ * @param response_queue_size size of the response queue
+ * @param mode mode of operation - blocking or nonblocking
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_connect (vapi_ctx_t ctx, const char *name,
+			   const char *chroot_prefix,
+			   int max_outstanding_requests,
+			   int response_queue_size, vapi_mode_e mode);
+
+/**
+ * @brief disconnect from vpp
+ *
+ * @param ctx opaque vapi context
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_disconnect (vapi_ctx_t ctx);
+
+/**
+ * @brief get event file descriptor
+ *
+ * @note this file descriptor becomes readable when messages (from vpp)
+ * are waiting in queue
+ *
+ * @param ctx opaque vapi context
+ * @param[out] fd pointer to result variable
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_get_fd (vapi_ctx_t ctx, int *fd);
+
+/**
+ * @brief low-level api for sending messages to vpp
+ *
+ * @note it is not recommended to use this api directly, use generated api
+ * instead
+ *
+ * @param ctx opaque vapi context
+ * @param msg message to send
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_send (vapi_ctx_t ctx, void *msg);
+
+/**
+ * @brief low-level api for atomically sending two messages to vpp - either
+ * both messages are sent or neither one is
+ *
+ * @note it is not recommended to use this api directly, use generated api
+ * instead
+ *
+ * @param ctx opaque vapi context
+ * @param msg1 first message to send
+ * @param msg2 second message to send
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_send2 (vapi_ctx_t ctx, void *msg1, void *msg2);
+
+/**
+ * @brief low-level api for reading messages from vpp
+ *
+ * @note it is not recommended to use this api directly, use generated api
+ * instead
+ *
+ * @param ctx opaque vapi context
+ * @param[out] msg pointer to result variable containing message
+ * @param[out] msg_size pointer to result variable containing message size
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_recv (vapi_ctx_t ctx, void **msg, size_t * msg_size);
+
+/**
+ * @brief wait for connection to become readable or writable
+ *
+ * @param ctx opaque vapi context
+ * @param mode type of property to wait for - readability, writability or both
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_wait (vapi_ctx_t ctx, vapi_wait_mode_e mode);
+
+/**
+ * @brief pick next message sent by vpp and call the appropriate callback
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_dispatch_one (vapi_ctx_t ctx);
+
+/**
+ * @brief loop vapi_dispatch_one until responses to all currently outstanding
+ * requests have been received and their callbacks called
+ *
+ * @note the dispatch loop is interrupted if any error is encountered or
+ * returned from the callback, in which case this error is returned as the
+ * result of vapi_dispatch. In this case it might be necessary to call dispatch
+ * again to process the remaining messages. Returning VAPI_EUSER from
+ * a callback allows the user to break the dispatch loop (and distinguish
+ * this case in the calling code from other failures). VAPI never returns
+ * VAPI_EUSER on its own.
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_dispatch (vapi_ctx_t ctx);
+
+/** generic vapi event callback */
+typedef vapi_error_e (*vapi_event_cb) (vapi_ctx_t ctx, void *callback_ctx,
+				       void *payload);
+
+/**
+ * @brief set event callback to call when message with given id is dispatched
+ *
+ * @param ctx opaque vapi context
+ * @param id message id
+ * @param callback callback
+ * @param callback_ctx context pointer stored and passed to callback
+ */
+void vapi_set_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id,
+			vapi_event_cb callback, void *callback_ctx);
+
+/**
+ * @brief clear event callback for given message id
+ *
+ * @param ctx opaque vapi context
+ * @param id message id
+ */
+void vapi_clear_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id);
+
+/** generic vapi event callback */
+typedef vapi_error_e (*vapi_generic_event_cb) (vapi_ctx_t ctx,
+					       void *callback_ctx,
+					       vapi_msg_id_t id, void *msg);
+/**
+ * @brief set generic event callback
+ *
+ * @note this callback is called by dispatch if no message-type specific
+ * callback is set (so it's a fallback callback)
+ *
+ * @param ctx opaque vapi context
+ * @param callback callback
+ * @param callback_ctx context pointer stored and passed to callback
+ */
+void vapi_set_generic_event_cb (vapi_ctx_t ctx,
+				vapi_generic_event_cb callback,
+				void *callback_ctx);
+
+/**
+ * @brief clear generic event callback
+ *
+ * @param ctx opaque vapi context
+ */
+void vapi_clear_generic_event_cb (vapi_ctx_t ctx);
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp-api/vapi/vapi_c_gen.py b/src/vpp-api/vapi/vapi_c_gen.py
new file mode 100755
index 00000000..2bc1eef8
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_c_gen.py
@@ -0,0 +1,809 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import sys
+import logging
+from vapi_json_parser import Field, Struct, Message, JsonParser,\
+    SimpleType, StructType
+
+
+class CField(Field):
+    def __init__(
+            self,
+            field_name,
+            field_type,
+            array_len=None,
+            nelem_field=None):
+        super().__init__(field_name, field_type, array_len, nelem_field)
+
+    def get_c_def(self):
+        if self.len is not None:
+            return "%s %s[%d]" % (self.type.get_c_name(), self.name, self.len)
+        else:
+            return "%s %s" % (self.type.get_c_name(), self.name)
+
+    def get_swap_to_be_code(self, struct, var):
+        if self.len is not None:
+            if self.len > 0:
+                return "do { int i; for (i = 0; i < %d; ++i) { %s } }"\
+                    " while(0);" % (
+                        self.len,
+                        self.type.get_swap_to_be_code(struct, "%s[i]" % var))
+            else:
+                if self.nelem_field.needs_byte_swap():
+                    nelem_field = "%s(%s%s)" % (
+                        self.nelem_field.type.get_swap_to_host_func_name(),
+                        struct, self.nelem_field.name)
+                else:
+                    nelem_field = "%s%s" % (struct, self.nelem_field.name)
+                return (
+                    "do { int i; for (i = 0; i < %s; ++i) { %s } }"
+                    " while(0);" %
+                    (nelem_field, self.type.get_swap_to_be_code(
+                        struct, "%s[i]" % var)))
+        return self.type.get_swap_to_be_code(struct, "%s" % var)
+
+    def get_swap_to_host_code(self, struct, var):
+        if self.len is not None:
+            if self.len > 0:
+                return "do { int i; for (i = 0; i < %d; ++i) { %s } }"\
+                    " while(0);" % (
+                        self.len,
+                        self.type.get_swap_to_host_code(struct, "%s[i]" % var))
+            else:
+                # nelem_field already swapped to host here...
+                return (
+                    "do { int i; for (i = 0; i < %s%s; ++i) { %s } }"
+                    " while(0);" %
+                    (struct, self.nelem_field.name,
+                     self.type.get_swap_to_host_code(
+                         struct, "%s[i]" % var)))
+        return self.type.get_swap_to_host_code(struct, "%s" % var)
+
+    def needs_byte_swap(self):
+        return self.type.needs_byte_swap()
+
+
+class CStruct(Struct):
+    def __init__(self, name, fields):
+        super().__init__(name, fields)
+
+    def get_c_def(self):
+        return "\n".join([
+            "typedef struct __attribute__((__packed__)) {",
+            "%s;" % ";\n".join(["  %s" % x.get_c_def()
+                                for x in self.fields]),
+            "} %s;" % self.get_c_name()])
+
+
+class CSimpleType (SimpleType):
+
+    swap_to_be_dict = {
+        'i16': 'htobe16', 'u16': 'htobe16',
+        'i32': 'htobe32', 'u32': 'htobe32',
+        'i64': 'htobe64', 'u64': 'htobe64',
+    }
+
+    swap_to_host_dict = {
+        'i16': 'be16toh', 'u16': 'be16toh',
+        'i32': 'be32toh', 'u32': 'be32toh',
+        'i64': 'be64toh', 'u64': 'be64toh',
+    }
+
+    def __init__(self, name):
+        super().__init__(name)
+
+    def get_c_name(self):
+        return self.name
+
+    def get_swap_to_be_func_name(self):
+        return self.swap_to_be_dict[self.name]
+
+    def get_swap_to_host_func_name(self):
+        return self.swap_to_host_dict[self.name]
+
+    def get_swap_to_be_code(self, struct, var):
+        x = "%s%s" % (struct, var)
+        return "%s = %s(%s);" % (x, self.get_swap_to_be_func_name(), x)
+
+    def get_swap_to_host_code(self, struct, var):
+        x = "%s%s" % (struct, var)
+        return "%s = %s(%s);" % (x, self.get_swap_to_host_func_name(), x)
+
+    def needs_byte_swap(self):
+        try:
+            self.get_swap_to_host_func_name()
+            return True
+        except:
+            pass
+        return False
+
+
+class CStructType (StructType, CStruct):
+    def __init__(self, definition, typedict, field_class):
+        super().__init__(definition, typedict, field_class)
+
+    def get_c_name(self):
+        return "vapi_type_%s" % self.name
+
+    def get_swap_to_be_func_name(self):
+        return "%s_hton" % self.get_c_name()
+
+    def get_swap_to_host_func_name(self):
+        return "%s_ntoh" % self.get_c_name()
+
+    def get_swap_to_be_func_decl(self):
+        return "void %s(%s *msg)" % (
+            self.get_swap_to_be_func_name(), self.get_c_name())
+
+    def get_swap_to_be_func_def(self):
+        return "%s\n{\n%s\n}" % (
+            self.get_swap_to_be_func_decl(),
+            "\n".join([
+                "  %s" % p.get_swap_to_be_code("msg->", "%s" % p.name)
+                for p in self.fields if p.needs_byte_swap()]),
+        )
+
+    def get_swap_to_host_func_decl(self):
+        return "void %s(%s *msg)" % (
+            self.get_swap_to_host_func_name(), self.get_c_name())
+
+    def get_swap_to_host_func_def(self):
+        return "%s\n{\n%s\n}" % (
+            self.get_swap_to_host_func_decl(),
+            "\n".join([
+                "  %s" % p.get_swap_to_host_code("msg->", "%s" % p.name)
+                for p in self.fields if p.needs_byte_swap()]),
+        )
+
+    def get_swap_to_be_code(self, struct, var):
+        return "%s(&%s%s);" % (self.get_swap_to_be_func_name(), struct, var)
+
+    def get_swap_to_host_code(self, struct, var):
+        return "%s(&%s%s);" % (self.get_swap_to_host_func_name(), struct, var)
+
+    def needs_byte_swap(self):
+        for f in self.fields:
+            if f.needs_byte_swap():
+                return True
+        return False
+
+
+class CMessage (Message):
+    def __init__(self, logger, definition, typedict,
+                 struct_type_class, simple_type_class, field_class):
+        super().__init__(logger, definition, typedict, struct_type_class,
+                         simple_type_class, field_class)
+        self.payload_members = [
+            "  %s" % p.get_c_def()
+            for p in self.fields
+            if p.type != self.header
+        ]
+
+    def has_payload(self):
+        return len(self.payload_members) > 0
+
+    def get_msg_id_name(self):
+        return "vapi_msg_id_%s" % self.name
+
+    def get_c_name(self):
+        return "vapi_msg_%s" % self.name
+
+    def get_payload_struct_name(self):
+        return "vapi_payload_%s" % self.name
+
+    def get_alloc_func_vla_field_length_name(self, field):
+        return "%s_array_size" % field.name
+
+    def get_alloc_func_name(self):
+        return "vapi_alloc_%s" % self.name
+
+    def get_alloc_func_decl(self):
+        return "%s* %s(struct vapi_ctx_s *ctx%s)" % (
+            self.get_c_name(),
+            self.get_alloc_func_name(),
+            "".join([", size_t %s" %
+                     self.get_alloc_func_vla_field_length_name(f)
+                     for f in self.fields
+                     if f.nelem_field is not None]))
+
+    def get_alloc_func_def(self):
+        extra = []
+        if self.header.has_field('client_index'):
+            extra.append(
+                "  msg->header.client_index = vapi_get_client_index(ctx);")
+        if self.header.has_field('context'):
+            extra.append("  msg->header.context = 0;")
+        return "\n".join([
+            "%s" % self.get_alloc_func_decl(),
+            "{",
+            "  %s *msg = NULL;" % self.get_c_name(),
+            "  const size_t size = sizeof(%s)%s;" % (
+                self.get_c_name(),
+                "".join([
+                    " + sizeof(msg->payload.%s[0]) * %s" % (
+                        f.name,
+                        self.get_alloc_func_vla_field_length_name(f))
+                    for f in self.fields
+                    if f.nelem_field is not None
+                ])),
+            "  msg = vapi_msg_alloc(ctx, size);",
+            "  if (!msg) {",
+            "    return NULL;",
+            "  }",
+        ] + extra + [
+            "  msg->header._vl_msg_id = vapi_lookup_vl_msg_id(ctx, %s);" %
+            self.get_msg_id_name(),
+            "\n".join(["  msg->payload.%s = %s;" % (
+                f.nelem_field.name,
+                self.get_alloc_func_vla_field_length_name(f))
+                for f in self.fields
+                if f.nelem_field is not None]),
+            "  return msg;",
+            "}"])
+
+    def get_calc_msg_size_func_name(self):
+        return "vapi_calc_%s_msg_size" % self.name
+
+    def get_calc_msg_size_func_decl(self):
+        return "uword %s(%s *msg)" % (
+            self.get_calc_msg_size_func_name(),
+            self.get_c_name())
+
+    def get_calc_msg_size_func_def(self):
+        return "\n".join([
+            "%s" % self.get_calc_msg_size_func_decl(),
+            "{",
+            "  return sizeof(*msg)%s;" %
+            "".join(["+ msg->payload.%s * sizeof(msg->payload.%s[0])" % (
+                    f.nelem_field.name,
+                    f.name)
+                for f in self.fields
+                if f.nelem_field is not None
+            ]),
+            "}",
+        ])
+
+    def get_c_def(self):
+        if self.has_payload():
+            return "\n".join([
+                "typedef struct __attribute__ ((__packed__)) {",
+                "%s; " %
+                ";\n".join(self.payload_members),
+                "} %s;" % self.get_payload_struct_name(),
+                "",
+                "typedef struct __attribute__ ((__packed__)) {",
+                ("  %s %s;" % (self.header.get_c_name(),
+                               self.fields[0].name)
+                    if self.header is not None else ""),
+                "  %s payload;" % self.get_payload_struct_name(),
+                "} %s;" % self.get_c_name(), ])
+        else:
+            return "\n".join([
+                "typedef struct __attribute__ ((__packed__)) {",
+                ("  %s %s;" % (self.header.get_c_name(),
+                               self.fields[0].name)
+                    if self.header is not None else ""),
+                "} %s;" % self.get_c_name(), ])
+
+    def get_swap_payload_to_host_func_name(self):
+        return "%s_payload_ntoh" % self.get_c_name()
+
+    def get_swap_payload_to_be_func_name(self):
+        return "%s_payload_hton" % self.get_c_name()
+
+    def get_swap_payload_to_host_func_decl(self):
+        return "void %s(%s *payload)" % (
+            self.get_swap_payload_to_host_func_name(),
+            self.get_payload_struct_name())
+
+    def get_swap_payload_to_be_func_decl(self):
+        return "void %s(%s *payload)" % (
+            self.get_swap_payload_to_be_func_name(),
+            self.get_payload_struct_name())
+
+    def get_swap_payload_to_be_func_def(self):
+        return "%s\n{\n%s\n}" % (
+            self.get_swap_payload_to_be_func_decl(),
+            "\n".join([
+                "  %s" % p.get_swap_to_be_code("payload->", "%s" % p.name)
+                for p in self.fields
+                if p.needs_byte_swap() and p.type != self.header]),
+        )
+
+    def get_swap_payload_to_host_func_def(self):
+        return "%s\n{\n%s\n}" % (
+            self.get_swap_payload_to_host_func_decl(),
+            "\n".join([
+                "  %s" % p.get_swap_to_host_code("payload->", "%s" % p.name)
+                for p in self.fields
+                if p.needs_byte_swap() and p.type != self.header]),
+        )
+
+    def get_swap_to_host_func_name(self):
+        return "%s_ntoh" % self.get_c_name()
+
+    def get_swap_to_be_func_name(self):
+        return "%s_hton" % self.get_c_name()
+
+    def get_swap_to_host_func_decl(self):
+        return "void %s(%s *msg)" % (
+            self.get_swap_to_host_func_name(), self.get_c_name())
+
+    def get_swap_to_be_func_decl(self):
+        return "void %s(%s *msg)" % (
+            self.get_swap_to_be_func_name(), self.get_c_name())
+
+    def get_swap_to_be_func_def(self):
+        return "\n".join([
+            "%s" % self.get_swap_to_be_func_decl(),
+            "{",
+            ("  VAPI_DBG(\"Swapping `%s'@%%p to big endian\", msg);" %
+                self.get_c_name()),
+            "  %s(&msg->header);" % self.header.get_swap_to_be_func_name()
+            if self.header is not None else "",
+            "  %s(&msg->payload);" % self.get_swap_payload_to_be_func_name()
+            if self.has_payload() else "",
+            "}",
+        ])
+
+    def get_swap_to_host_func_def(self):
+        return "\n".join([
+            "%s" % self.get_swap_to_host_func_decl(),
+            "{",
+            ("  VAPI_DBG(\"Swapping `%s'@%%p to host byte order\", msg);" %
+                self.get_c_name()),
+            "  %s(&msg->header);" % self.header.get_swap_to_host_func_name()
+            if self.header is not None else "",
+            "  %s(&msg->payload);" % self.get_swap_payload_to_host_func_name()
+            if self.has_payload() else "",
+            "}",
+        ])
+
+    def get_op_func_name(self):
+        return "vapi_%s" % self.name
+
+    def get_op_func_decl(self):
+        if self.reply.has_payload():
+            return "vapi_error_e %s(%s)" % (
+                self.get_op_func_name(),
+                ",\n  ".join([
+                    'struct vapi_ctx_s *ctx',
+                    '%s *msg' % self.get_c_name(),
+                    'vapi_error_e (*callback)(struct vapi_ctx_s *ctx',
+                    '                         void *callback_ctx',
+                    '                         vapi_error_e rv',
+                    '                         bool is_last',
+                    '                         %s *reply)' %
+                    self.reply.get_payload_struct_name(),
+                    'void *callback_ctx',
+                ])
+            )
+        else:
+            return "vapi_error_e %s(%s)" % (
+                self.get_op_func_name(),
+                ",\n  ".join([
+                    'struct vapi_ctx_s *ctx',
+                    '%s *msg' % self.get_c_name(),
+                    'vapi_error_e (*callback)(struct vapi_ctx_s *ctx',
+                    '                         void *callback_ctx',
+                    '                         vapi_error_e rv',
+                    '                         bool is_last)',
+                    'void *callback_ctx',
+                ])
+            )
+
+    def get_op_func_def(self):
+        return "\n".join([
+            "%s" % self.get_op_func_decl(),
+            "{",
+            "  if (!msg || !callback) {",
+            "    return VAPI_EINVAL;",
+            "  }",
+            "  if (vapi_is_nonblocking(ctx) && vapi_requests_full(ctx)) {",
+            "    return VAPI_EAGAIN;",
+            "  }",
+            "  vapi_error_e rv;",
+            "  if (VAPI_OK != (rv = vapi_producer_lock (ctx))) {",
+            "    return rv;",
+            "  }",
+            "  u32 req_context = vapi_gen_req_context(ctx);",
+            "  msg->header.context = req_context;",
+            "  %s(msg);" % self.get_swap_to_be_func_name(),
+            ("  if (VAPI_OK == (rv = vapi_send_with_control_ping "
+                "(ctx, msg, req_context))) {"
+                if self.is_dump() else
+                "  if (VAPI_OK == (rv = vapi_send (ctx, msg))) {"
+             ),
+            ("    vapi_store_request(ctx, req_context, %s, "
+             "(vapi_cb_t)callback, callback_ctx);" %
+             ("true" if self.is_dump() else "false")),
+            "    if (VAPI_OK != vapi_producer_unlock (ctx)) {",
+            "      abort (); /* this really shouldn't happen */",
+            "    }",
+            "    if (vapi_is_nonblocking(ctx)) {",
+            "      rv = VAPI_OK;",
+            "    } else {",
+            "      rv = vapi_dispatch(ctx);",
+            "    }",
+            "  } else {",
+            "    %s(msg);" % self.get_swap_to_host_func_name(),
+            "    if (VAPI_OK != vapi_producer_unlock (ctx)) {",
+            "      abort (); /* this really shouldn't happen */",
+            "    }",
+            "  }",
+            "  return rv;",
+            "}",
+            "",
+        ])
+
+    def get_event_cb_func_decl(self):
+        if not self.is_reply():
+            raise Exception(
+                "Cannot register event callback for non-reply function")
+        if self.has_payload():
+            return "\n".join([
+                "void vapi_set_%s_event_cb (" %
+                self.get_c_name(),
+                "  struct vapi_ctx_s *ctx, ",
+                ("  vapi_error_e (*callback)(struct vapi_ctx_s *ctx, "
+                 "void *callback_ctx, %s *payload)," %
+                 self.get_payload_struct_name()),
+                "  void *callback_ctx)",
+            ])
+        else:
+            return "\n".join([
+                "void vapi_set_%s_event_cb (" %
+                self.get_c_name(),
+                "  struct vapi_ctx_s *ctx, ",
+                "  vapi_error_e (*callback)(struct vapi_ctx_s *ctx, "
+                "void *callback_ctx),",
+                "  void *callback_ctx)",
+            ])
+
+    def get_event_cb_func_def(self):
+        if not self.is_reply():
+            raise Exception(
+                "Cannot register event callback for non-reply function")
+        return "\n".join([
+            "%s" % self.get_event_cb_func_decl(),
+            "{",
+            ("  vapi_set_event_cb(ctx, %s, (vapi_event_cb)callback, "
+             "callback_ctx);" %
+             self.get_msg_id_name()),
+            "}"])
+
+    def get_c_metadata_struct_name(self):
+        return "__vapi_metadata_%s" % self.name
+
+    def get_c_constructor(self):
+        has_context = False
+        if self.header is not None:
+            has_context = self.header.has_field('context')
+        return '\n'.join([
+            'static void __attribute__((constructor)) __vapi_constructor_%s()'
+            % self.name,
+            '{',
+            '  static const char name[] = "%s";' % self.name,
+            '  static const char name_with_crc[] = "%s_%s";'
+            % (self.name, self.crc[2:]),
+            '  static vapi_message_desc_t %s = {' %
+            self.get_c_metadata_struct_name(),
+            '    name,',
+            '    sizeof(name) - 1,',
+            '    name_with_crc,',
+            '    sizeof(name_with_crc) - 1,',
+            '    true,' if has_context else '    false,',
+            '    offsetof(%s, context),' % self.header.get_c_name()
+            if has_context else '    0,',
+            ('    offsetof(%s, payload),' % self.get_c_name())
+            if self.has_payload() else '-1,',
+            '    sizeof(%s),' % self.get_c_name(),
+            '    (generic_swap_fn_t)%s,' % self.get_swap_to_be_func_name(),
+            '    (generic_swap_fn_t)%s,' % self.get_swap_to_host_func_name(),
+            '    ~0,',
+            '  };',
+            '',
+            '  %s = vapi_register_msg(&%s);' %
+            (self.get_msg_id_name(), self.get_c_metadata_struct_name()),
+            '  VAPI_DBG("Assigned msg id %%d to %s", %s);' %
+            (self.name, self.get_msg_id_name()),
+            '}',
+        ])
+
+
+vapi_send_with_control_ping = """
+static inline vapi_error_e
+vapi_send_with_control_ping (vapi_ctx_t ctx, void *msg, u32 context)
+{
+  vapi_msg_control_ping *ping = vapi_alloc_control_ping (ctx);
+  if (!ping)
+    {
+      return VAPI_ENOMEM;
+    }
+  ping->header.context = context;
+  vapi_msg_control_ping_hton (ping);
+  return vapi_send2 (ctx, msg, ping);
+}
+"""
+
+
+def gen_json_header(parser, logger, j, io):
+    logger.info("Generating header `%s'" % io.name)
+    orig_stdout = sys.stdout
+    sys.stdout = io
+    include_guard = "__included_%s" % (
+        j.replace(".", "_").replace("/", "_").replace("-", "_"))
+    print("#ifndef %s" % include_guard)
+    print("#define %s" % include_guard)
+    print("")
+    print("#include <vapi_internal.h>")
+    print("")
+    if io.name == "vpe.api.vapi.h":
+        print("static inline vapi_error_e vapi_send_with_control_ping "
+              "(vapi_ctx_t ctx, void * msg, u32 context);")
+        print("")
+    for m in parser.messages_by_json[j].values():
+        print("extern vapi_msg_id_t %s;" % m.get_msg_id_name())
+    print("")
+    for t in parser.types_by_json[j].values():
+        try:
+            print("%s" % t.get_c_def())
+            print("")
+        except:
+            pass
+    for t in parser.types_by_json[j].values():
+        print("%s;" % t.get_swap_to_be_func_decl())
+        print("")
+        print("%s;" % t.get_swap_to_host_func_decl())
+        print("")
+    for m in parser.messages_by_json[j].values():
+        print("%s" % m.get_c_def())
+        print("")
+    for m in parser.messages_by_json[j].values():
+        if not m.is_reply():
+            print("%s;" % m.get_alloc_func_decl())
+            print("")
+            print("%s;" % m.get_op_func_decl())
+        if m.has_payload():
+            print("%s;" % m.get_swap_payload_to_be_func_decl())
+            print("")
+            print("%s;" % m.get_swap_payload_to_host_func_decl())
+            print("")
+        print("%s;" % m.get_calc_msg_size_func_decl())
+        print("")
+        print("%s;" % m.get_swap_to_host_func_decl())
+        print("")
+        print("%s;" % m.get_swap_to_be_func_decl())
+        print("")
+    for m in parser.messages_by_json[j].values():
+        if not m.is_reply():
+            continue
+        print("%s;" % m.get_event_cb_func_decl())
+        print("")
+
+    if io.name == "vpe.api.vapi.h":
+        print("%s" % vapi_send_with_control_ping)
+        print("")
+
+    print("#endif")
+    sys.stdout = orig_stdout
+
+
+def gen_json_code(parser, logger, j, io):
+    logger.info("Generating code `%s'" % io.name)
+    orig_stdout = sys.stdout
+    sys.stdout = io
+    print("#include <%s>" % json_to_header_name(j))
+    print("#include <stdlib.h>")
+    print("#include <stddef.h>")
+    print("#include <arpa/inet.h>")
+    print("#include <vapi_internal.h>")
+    print("#include <vapi_dbg.h>")
+    print("")
+    for t in parser.types_by_json[j].values():
+        print("%s" % t.get_swap_to_be_func_def())
+        print("")
+        print("%s" % t.get_swap_to_host_func_def())
+        print("")
+    for m in parser.messages_by_json[j].values():
+        if m.has_payload():
+            print("%s" % m.get_swap_payload_to_be_func_def())
+            print("")
+            print("%s" % m.get_swap_payload_to_host_func_def())
+            print("")
+        print("%s" % m.get_calc_msg_size_func_def())
+        print("")
+        print("%s" % m.get_swap_to_be_func_def())
+        print("")
+        print("%s" % m.get_swap_to_host_func_def())
+        print("")
+    for m in parser.messages_by_json[j].values():
+        if m.is_reply():
+            continue
+        print("%s" % m.get_alloc_func_def())
+        print("")
+        print("%s" % m.get_op_func_def())
+        print("")
+    print("")
+    for m in parser.messages_by_json[j].values():
+        print("%s" % m.get_c_constructor())
+        print("")
+    print("")
+    for m in parser.messages_by_json[j].values():
+        if not m.is_reply():
+            continue
+        print("%s;" % m.get_event_cb_func_def())
+        print("")
+    print("")
+    for m in parser.messages_by_json[j].values():
+        print("vapi_msg_id_t %s;" % m.get_msg_id_name())
+    sys.stdout = orig_stdout
+
+
+def gen_json_unified_header(parser, logger, j, io):
+    logger.info("Generating header `%s'" % io.name)
+    orig_stdout = sys.stdout
+    sys.stdout = io
+    include_guard = "__included_%s" % (
+        j.replace(".", "_").replace("/", "_").replace("-", "_"))
+    print("#ifndef %s" % include_guard)
+    print("#define %s" % include_guard)
+    print("")
+    print("#include <vapi_internal.h>")
+    print("#include <vapi.h>")
+    print("#include <stdlib.h>")
+    print("#include <stddef.h>")
+    print("#include <arpa/inet.h>")
+    print("#include <vapi_dbg.h>")
+    if io.name == "vpe.api.vapi.h":
+        print("")
+        print("static inline vapi_error_e vapi_send_with_control_ping "
+              "(vapi_ctx_t ctx, void * msg, u32 context);")
+    else:
+        print("#include <vpe.api.vapi.h>")
+    print("")
+    for m in parser.messages_by_json[j].values():
+        print("extern vapi_msg_id_t %s;" % m.get_msg_id_name())
+    print("")
+    print("#define DEFINE_VAPI_MSG_IDS_%s\\" %
+          j.replace(".", "_").replace("/", "_").replace("-", "_").upper())
+    print("\\\n".join([
+        "  vapi_msg_id_t %s;" % m.get_msg_id_name()
+        for m in parser.messages_by_json[j].values()
+    ]))
+    print("")
+    print("")
+    for t in parser.types_by_json[j].values():
+        try:
+            print("%s" % t.get_c_def())
+            print("")
+        except:
+            pass
+    for m in parser.messages_by_json[j].values():
+        print("%s" % m.get_c_def())
+        print("")
+
+    print("")
+    function_attrs = "static inline "
+    for t in parser.types_by_json[j].values():
+        print("%s%s" % (function_attrs, t.get_swap_to_be_func_def()))
+        print("")
+        print("%s%s" % (function_attrs, t.get_swap_to_host_func_def()))
+        print("")
+    for m in parser.messages_by_json[j].values():
+        if m.has_payload():
+            print("%s%s" % (function_attrs,
+                            m.get_swap_payload_to_be_func_def()))
+            print("")
+            print("%s%s" % (function_attrs,
+                            m.get_swap_payload_to_host_func_def()))
+            print("")
+        print("%s%s" % (function_attrs, m.get_calc_msg_size_func_def()))
+        print("")
+        print("%s%s" % (function_attrs, m.get_swap_to_be_func_def()))
+        print("")
+        print("%s%s" % (function_attrs, m.get_swap_to_host_func_def()))
+        print("")
+    for m in parser.messages_by_json[j].values():
+        if m.is_reply():
+            continue
+        print("%s%s" % (function_attrs, m.get_alloc_func_def()))
+        print("")
+        print("%s%s" % (function_attrs, m.get_op_func_def()))
+        print("")
+    print("")
+    for m in parser.messages_by_json[j].values():
+        print("%s" % m.get_c_constructor())
+        print("")
+    print("")
+    for m in parser.messages_by_json[j].values():
+        if not m.is_reply():
+            continue
+        print("%s%s;" % (function_attrs, m.get_event_cb_func_def()))
+        print("")
+    print("")
+
+    if io.name == "vpe.api.vapi.h":
+        print("%s" % vapi_send_with_control_ping)
+        print("")
+
+    print("#endif")
+    sys.stdout = orig_stdout
+
+
+def json_to_header_name(json_name):
+    if json_name.endswith(".json"):
+        return "%s.vapi.h" % os.path.splitext(json_name)[0]
+    raise Exception("Unexpected json name `%s'!" % json_name)
+
+
+def json_to_code_name(json_name):
+    if json_name.endswith(".json"):
+        return "%s.vapi.c" % os.path.splitext(json_name)[0]
+    raise Exception("Unexpected json name `%s'!" % json_name)
+
+
+def gen_c_headers_and_code(parser, logger, prefix):
+    if prefix == "" or prefix is None:
+        prefix = ""
+    else:
+        prefix = "%s/" % prefix
+    for j in parser.json_files:
+        with open('%s%s' % (prefix, json_to_header_name(j)), "w") as io:
+            gen_json_header(parser, logger, j, io)
+        with open('%s%s' % (prefix, json_to_code_name(j)), "w") as io:
+            gen_json_code(parser, logger, j, io)
+
+
+def gen_c_unified_headers(parser, logger, prefix):
+    if prefix == "" or prefix is None:
+        prefix = ""
+    else:
+        prefix = "%s/" % prefix
+    for j in parser.json_files:
+        with open('%s%s' % (prefix, json_to_header_name(j)), "w") as io:
+            gen_json_unified_header(parser, logger, j, io)
+
+
+if __name__ == '__main__':
+    try:
+        verbose = int(os.getenv("V", 0))
+    except:
+        verbose = 0
+
+    if verbose >= 2:
+        log_level = 10
+    elif verbose == 1:
+        log_level = 20
+    else:
+        log_level = 40
+
+    logging.basicConfig(stream=sys.stdout, level=log_level)
+    logger = logging.getLogger("VAPI C GEN")
+    logger.setLevel(log_level)
+
+    argparser = argparse.ArgumentParser(description="VPP JSON API parser")
+    argparser.add_argument('files', metavar='api-file', action='append',
+                           type=str, help='json api file'
+                           '(may be specified multiple times)')
+    argparser.add_argument('--prefix', action='store', default=None,
+                           help='path prefix')
+    args = argparser.parse_args()
+
+    jsonparser = JsonParser(logger, args.files,
+                            simple_type_class=CSimpleType,
+                            struct_type_class=CStructType,
+                            field_class=CField,
+                            message_class=CMessage)
+
+    # not using the model of having separate generated header and code files
+    # with generated symbols present in shared library (per discussion with
+    # Damjan), to avoid symbol version issues in .so
+    # gen_c_headers_and_code(jsonparser, logger, args.prefix)
+
+    gen_c_unified_headers(jsonparser, logger, args.prefix)
+
+    for e in jsonparser.exceptions:
+        logger.error(e)
diff --git a/src/vpp-api/vapi/vapi_dbg.h b/src/vpp-api/vapi/vapi_dbg.h
new file mode 100644
index 00000000..95a80089
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_dbg.h
@@ -0,0 +1,76 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_vapi_debug_h__
+#define __included_vapi_debug_h__
+
+/* controls debug prints */
+#define VAPI_DEBUG (0)
+#define VAPI_DEBUG_CONNECT (0)
+#define VAPI_DEBUG_ALLOC (0)
+
+#if VAPI_DEBUG
+#include <stdio.h>
+#define VAPI_DEBUG_FILE_DEF           \
+  static const char *__file = NULL;   \
+  {                                   \
+    __file = strrchr (__FILE__, '/'); \
+    if (__file)                       \
+      {                               \
+        ++__file;                     \
+      }                               \
+    else                              \
+      {                               \
+        __file = __FILE__;            \
+      }                               \
+  }
+
+#define VAPI_DBG(fmt, ...)                                       \
+  do                                                             \
+    {                                                            \
+      VAPI_DEBUG_FILE_DEF                                        \
+      printf ("DBG:%s:%d:%s():" fmt, __file, __LINE__, __func__, \
+              ##__VA_ARGS__);                                    \
+      printf ("\n");                                             \
+      fflush (stdout);                                           \
+    }                                                            \
+  while (0);
+
+#define VAPI_ERR(fmt, ...)                                       \
+  do                                                             \
+    {                                                            \
+      VAPI_DEBUG_FILE_DEF                                        \
+      printf ("ERR:%s:%d:%s():" fmt, __file, __LINE__, __func__, \
+              ##__VA_ARGS__);                                    \
+      printf ("\n");                                             \
+      fflush (stdout);                                           \
+    }                                                            \
+  while (0);
+#else
+#define VAPI_DBG(...)
+#define VAPI_ERR(...)
+#endif
+
+#endif /* __included_vapi_debug_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp-api/vapi/vapi_internal.h b/src/vpp-api/vapi/vapi_internal.h
new file mode 100644
index 00000000..5b85788d
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_internal.h
@@ -0,0 +1,126 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef VAPI_INTERNAL_H
+#define VAPI_INTERNAL_H
+
+#include <string.h>
+#include <vppinfra/types.h>
+
+/**
+ * @file vapi_internal.h
+ *
+ * internal vpp api C declarations
+ *
+ * This file contains internal vpp api C declarations. It's not intended to be
+ * used by the client programmer and the API defined here might change at any
+ * time..
+ */
+
+struct vapi_ctx_s;
+
+typedef struct __attribute__ ((__packed__))
+{
+  u16 _vl_msg_id;
+  u32 context;
+} vapi_type_msg_header1_t;
+
+typedef struct __attribute__ ((__packed__))
+{
+  u16 _vl_msg_id;
+  u32 client_index;
+  u32 context;
+} vapi_type_msg_header2_t;
+
+static inline void
+vapi_type_msg_header1_t_hton (vapi_type_msg_header1_t * h)
+{
+  h->_vl_msg_id = htobe16 (h->_vl_msg_id);
+}
+
+static inline void
+vapi_type_msg_header1_t_ntoh (vapi_type_msg_header1_t * h)
+{
+  h->_vl_msg_id = be16toh (h->_vl_msg_id);
+}
+
+static inline void
+vapi_type_msg_header2_t_hton (vapi_type_msg_header2_t * h)
+{
+  h->_vl_msg_id = htobe16 (h->_vl_msg_id);
+}
+
+static inline void
+vapi_type_msg_header2_t_ntoh (vapi_type_msg_header2_t * h)
+{
+  h->_vl_msg_id = be16toh (h->_vl_msg_id);
+}
+
+
+#include <vapi.h>
+
+typedef vapi_error_e (*vapi_cb_t) (struct vapi_ctx_s *, void *, vapi_error_e,
+				   bool, void *);
+
+typedef void (*generic_swap_fn_t) (void *payload);
+
+typedef struct
+{
+  const char *name;
+  size_t name_len;
+  const char *name_with_crc;
+  size_t name_with_crc_len;
+  bool has_context;
+  size_t context_offset;
+  size_t payload_offset;
+  size_t size;
+  generic_swap_fn_t swap_to_be;
+  generic_swap_fn_t swap_to_host;
+  vapi_msg_id_t id;		/* assigned at run-time */
+} vapi_message_desc_t;
+
+typedef struct
+{
+  const char *name;
+  int payload_offset;
+  size_t size;
+  void (*swap_to_be) (void *payload);
+  void (*swap_to_host) (void *payload);
+} vapi_event_desc_t;
+
+extern bool *__vapi_msg_is_with_context;
+
+vapi_msg_id_t vapi_register_msg (vapi_message_desc_t * msg);
+u16 vapi_lookup_vl_msg_id (vapi_ctx_t ctx, vapi_msg_id_t id);
+int vapi_get_client_index (vapi_ctx_t ctx);
+bool vapi_is_nonblocking (vapi_ctx_t ctx);
+bool vapi_requests_full (vapi_ctx_t ctx);
+size_t vapi_get_request_count (vapi_ctx_t ctx);
+size_t vapi_get_max_request_count (vapi_ctx_t ctx);
+u32 vapi_gen_req_context (vapi_ctx_t ctx);
+void vapi_store_request (vapi_ctx_t ctx, u32 context, bool is_dump,
+			 vapi_cb_t callback, void *callback_ctx);
+int vapi_get_payload_offset (vapi_msg_id_t id);
+void (*vapi_get_swap_to_host_func (vapi_msg_id_t id)) (void *payload);
+void (*vapi_get_swap_to_be_func (vapi_msg_id_t id)) (void *payload);
+size_t vapi_get_message_size (vapi_msg_id_t id);
+size_t vapi_get_context_offset (vapi_msg_id_t id);
+
+vapi_error_e vapi_producer_lock (vapi_ctx_t ctx);
+vapi_error_e vapi_producer_unlock (vapi_ctx_t ctx);
+
+#endif
diff --git a/src/vpp-api/vapi/vapi_json_parser.py b/src/vpp-api/vapi/vapi_json_parser.py
new file mode 100644
index 00000000..57a22383
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_json_parser.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env python3
+
+import json
+
+
+def msg_is_reply(name):
+    return name.endswith('_reply') or name.endswith('_details') \
+        or name.endswith('_event') or name.endswith('_counters')
+
+
+class ParseError (Exception):
+    pass
+
+
+magic_prefix = "vl_api_"
+magic_suffix = "_t"
+
+
+def remove_magic(what):
+    if what.startswith(magic_prefix) and what.endswith(magic_suffix):
+        return what[len(magic_prefix): - len(magic_suffix)]
+    return what
+
+
+class Field:
+
+    def __init__(
+            self,
+            field_name,
+            field_type,
+            array_len=None,
+            nelem_field=None):
+        self.name = field_name
+        self.type = field_type
+        self.len = array_len
+        self.nelem_field = nelem_field
+
+    def __str__(self):
+        if self.len is None:
+            return "name: %s, type: %s" % (self.name, self.type)
+        elif self.len > 0:
+            return "name: %s, type: %s, length: %s" % (self.name, self.type,
+                                                       self.len)
+        else:
+            return ("name: %s, type: %s, variable length stored in: %s" %
+                    (self.name, self.type, self.nelem_field))
+
+
+class Type:
+    def __init__(self, name):
+        self.name = name
+
+
+class SimpleType (Type):
+
+    def __init__(self, name):
+        super().__init__(name)
+
+    def __str__(self):
+        return self.name
+
+
+def get_msg_header_defs(struct_type_class, field_class, typedict):
+    return [
+        struct_type_class(['msg_header1_t',
+                           ['u16', '_vl_msg_id'],
+                           ['u32', 'context'],
+                           ],
+                          typedict, field_class
+                          ),
+        struct_type_class(['msg_header2_t',
+                           ['u16', '_vl_msg_id'],
+                           ['u32', 'client_index'],
+                           ['u32', 'context'],
+                           ],
+                          typedict, field_class
+                          ),
+    ]
+
+
+class Struct:
+
+    def __init__(self, name, fields):
+        self.name = name
+        self.fields = fields
+        self.field_names = [n.name for n in self.fields]
+
+
+class Message:
+
+    def __init__(self, logger, definition, typedict,
+                 struct_type_class, simple_type_class, field_class):
+        self.logger = logger
+        m = definition
+        logger.debug("Parsing message definition `%s'" % m)
+        name = m[0]
+        self.name = name
+        logger.debug("Message name is `%s'" % name)
+        ignore = True
+        self.header = None
+        fields = []
+        for header in get_msg_header_defs(struct_type_class, field_class,
+                                          typedict):
+            logger.debug("Probing header `%s'" % header.name)
+            if header.is_part_of_def(m[1:]):
+                self.header = header
+                logger.debug("Found header `%s'" % header.name)
+                fields.append(field_class(field_name='header',
+                                          field_type=self.header))
+                ignore = False
+                break
+        if ignore and not msg_is_reply(name):
+            raise ParseError("While parsing message `%s': could not find all "
+                             "common header fields" % name)
+        for field in m[1:]:
+            if len(field) == 1 and 'crc' in field:
+                self.crc = field['crc']
+                logger.debug("Found CRC `%s'" % self.crc)
+                continue
+            else:
+                field_type = field[0]
+                if field_type in typedict:
+                    field_type = typedict[field_type]
+                else:
+                    field_type = typedict[remove_magic(field_type)]
+                if len(field) == 2:
+                    if self.header is not None and\
+                            self.header.has_field(field[1]):
+                        continue
+                    p = field_class(field_name=field[1],
+                                    field_type=field_type)
+                elif len(field) == 3:
+                    if field[2] == 0:
+                        raise ParseError(
+                            "While parsing message `%s': variable length "
+                            "array `%s' doesn't have reference to member "
+                            "containing the actual length" % (
+                                name, field[1]))
+                    p = field_class(
+                        field_name=field[1],
+                        field_type=field_type,
+                        array_len=field[2])
+                elif len(field) == 4:
+                    nelem_field = None
+                    for f in fields:
+                        if f.name == field[3]:
+                            nelem_field = f
+                    if nelem_field is None:
+                        raise ParseError(
+                            "While parsing message `%s': couldn't find "
+                            "variable length array `%s' member containing "
+                            "the actual length `%s'" % (
+                                name, field[1], field[3]))
+                    p = field_class(
+                        field_name=field[1],
+                        field_type=field_type,
+                        array_len=field[2],
+                        nelem_field=nelem_field)
+                else:
+                    raise Exception("Don't know how to parse message "
+                                    "definition for message `%s': `%s'" %
+                                    (m, m[1:]))
+                logger.debug("Parsed field `%s'" % p)
+                fields.append(p)
+        self.fields = fields
+
+    def is_dump(self):
+        return self.name.endswith('_dump')
+
+    def is_reply(self):
+        return msg_is_reply(self.name)
+
+
+class StructType (Type, Struct):
+
+    def __init__(self, definition, typedict, field_class):
+        t = definition
+        name = t[0]
+        fields = []
+        for field in t[1:]:
+            if len(field) == 1 and 'crc' in field:
+                self.crc = field['crc']
+                continue
+            elif len(field) == 2:
+                p = field_class(field_name=field[1],
+                                field_type=typedict[field[0]])
+            elif len(field) == 3:
+                if field[2] == 0:
+                    raise ParseError("While parsing type `%s': array `%s' has "
+                                     "variable length" % (name, field[1]))
+                p = field_class(field_name=field[1],
+                                field_type=typedict[field[0]],
+                                array_len=field[2])
+            else:
+                raise ParseError(
+                    "Don't know how to parse type definition for "
+                    "type `%s': `%s'" % (t, t[1:]))
+            fields.append(p)
+        Type.__init__(self, name)
+        Struct.__init__(self, name, fields)
+
+    def has_field(self, name):
+        return name in self.field_names
+
+    def is_part_of_def(self, definition):
+        for idx in range(len(self.fields)):
+            field = definition[idx]
+            p = self.fields[idx]
+            if field[1] != p.name:
+                return False
+            if field[0] != p.type.name:
+                raise ParseError(
+                    "Unexpected field type `%s' (should be `%s'), "
+                    "while parsing msg/def/field `%s/%s/%s'" %
+                    (field[0], p.type, p.name, definition, field))
+        return True
+
+
+class JsonParser:
+    def __init__(self, logger, files, simple_type_class=SimpleType,
+                 struct_type_class=StructType, field_class=Field,
+                 message_class=Message):
+        self.messages = {}
+        self.types = {
+            x: simple_type_class(x) for x in [
+                'i8', 'i16', 'i32', 'i64',
+                'u8', 'u16', 'u32', 'u64',
+                'f64'
+            ]
+        }
+
+        self.simple_type_class = simple_type_class
+        self.struct_type_class = struct_type_class
+        self.field_class = field_class
+        self.message_class = message_class
+
+        self.exceptions = []
+        self.json_files = []
+        self.types_by_json = {}
+        self.messages_by_json = {}
+        self.logger = logger
+        for f in files:
+            self.parse_json_file(f)
+        self.finalize_parsing()
+
+    def parse_json_file(self, path):
+        self.logger.info("Parsing json api file: `%s'" % path)
+        self.json_files.append(path)
+        self.types_by_json[path] = {}
+        self.messages_by_json[path] = {}
+        with open(path) as f:
+            j = json.load(f)
+            for t in j['types']:
+                try:
+                    type_ = self.struct_type_class(t, self.types,
+                                                   self.field_class)
+                    if type_.name in self.types:
+                        raise ParseError("Duplicate type `%s'" % type_.name)
+                except ParseError as e:
+                    self.exceptions.append(e)
+                    continue
+                self.types[type_.name] = type_
+                self.types_by_json[path][type_.name] = type_
+            for m in j['messages']:
+                try:
+                    msg = self.message_class(self.logger, m, self.types,
+                                             self.struct_type_class,
+                                             self.simple_type_class,
+                                             self.field_class)
+                    if msg.name in self.messages:
+                        raise ParseError("Duplicate message `%s'" % msg.name)
+                except ParseError as e:
+                    self.exceptions.append(e)
+                    continue
+                self.messages[msg.name] = msg
+                self.messages_by_json[path][msg.name] = msg
+
+    def get_reply(self, message):
+        if self.messages[message].is_dump():
+            return self.messages["%s_details" % message[:-len("_dump")]]
+        return self.messages["%s_reply" % message]
+
+    def finalize_parsing(self):
+        if len(self.messages) == 0:
+            for e in self.exceptions:
+                self.logger.error(e)
+            raise Exception("No messages parsed.")
+        for jn, j in self.messages_by_json.items():
+            remove = []
+            for n, m in j.items():
+                try:
+                    if not m.is_reply():
+                        try:
+                            m.reply = self.get_reply(n)
+                        except:
+                            raise ParseError(
+                                "Cannot find reply to message `%s'" % n)
+                except ParseError as e:
+                    self.exceptions.append(e)
+                    remove.append(n)
+
+            self.messages_by_json[jn] = {
+                k: v for k, v in j.items() if k not in remove}
diff --git a/test/Makefile b/test/Makefile
index 72b4dac7..132ebee6 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -107,7 +107,11 @@ sanity: verify-no-running-vpp
 	         echo \"*******************************************************************\" &&\
 		 false)"
 
-test: verify-python-path $(PAPI_INSTALL_DONE) sanity reset 
+.PHONY: ext
+ext:
+	make -C ext
+
+test: verify-python-path $(PAPI_INSTALL_DONE) ext sanity reset
 	$(call retest-func)
 
 retest: verify-python-path sanity reset
diff --git a/test/ext/Makefile b/test/ext/Makefile
new file mode 100644
index 00000000..4a45fef6
--- /dev/null
+++ b/test/ext/Makefile
@@ -0,0 +1,17 @@
+BINDIR = $(BR)/vapi_test/
+BIN = $(addprefix $(BINDIR), vapi_test)
+LIBS = -L$(VPP_TEST_BUILD_DIR)/vpp/.libs/ -L$(VPP_TEST_BUILD_DIR)/vpp/vpp-api/vapi/.libs/ -lvppinfra -lvlibmemoryclient -lsvm -lpthread -lcheck -lsubunit -lrt -lm -lvapiclient
+CFLAGS = -ggdb -O0 -Wall -pthread -I$(WS_ROOT)/src -I$(VPP_TEST_BUILD_DIR)/vpp/vpp-api/vapi -I$(WS_ROOT)/src/vpp-api/vapi/
+
+all: $(BIN)
+
+$(BINDIR):
+	mkdir -p $(BINDIR)
+
+SRC = vapi_test.c
+
+$(BIN): $(SRC) $(BINDIR) $(VPP_TEST_BUILD_DIR)/vpp/vpp-api/vapi/.libs/libvapiclient.so $(VPP_TEST_BUILD_DIR)/vpp/.libs/libvppinfra.so $(VPP_TEST_BUILD_DIR)/vpp/.libs/libvlibmemoryclient.so $(VPP_TEST_BUILD_DIR)/vpp/.libs/libsvm.so
+	gcc -ggdb -o $@ $(SRC) $(CFLAGS) $(LIBS)
+
+clean:
+	rm -rf $(BINDIR)
diff --git a/test/ext/vapi_test.c b/test/ext/vapi_test.c
new file mode 100644
index 00000000..eca6be7d
--- /dev/null
+++ b/test/ext/vapi_test.c
@@ -0,0 +1,1152 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <endian.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+#include <setjmp.h>
+#include <check.h>
+#include <vpp-api/vapi/vapi.h>
+#include <vpe.api.vapi.h>
+#include <interface.api.vapi.h>
+#include <l2.api.vapi.h>
+#include <stats.api.vapi.h>
+
+DEFINE_VAPI_MSG_IDS_VPE_API_JSON;
+DEFINE_VAPI_MSG_IDS_INTERFACE_API_JSON;
+DEFINE_VAPI_MSG_IDS_L2_API_JSON;
+DEFINE_VAPI_MSG_IDS_STATS_API_JSON;
+
+static char *app_name = NULL;
+static char *api_prefix = NULL;
+static const int max_outstanding_requests = 64;
+static const int response_queue_size = 32;
+
+START_TEST (test_invalid_values)
+{
+  vapi_ctx_t ctx;
+  vapi_error_e rv = vapi_ctx_alloc (&ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  vapi_msg_show_version *sv = vapi_alloc_show_version (ctx);
+  ck_assert_ptr_eq (NULL, sv);
+  rv = vapi_send (ctx, sv);
+  ck_assert_int_eq (VAPI_EINVAL, rv);
+  rv = vapi_connect (ctx, app_name, api_prefix, max_outstanding_requests,
+		     response_queue_size, VAPI_MODE_BLOCKING);
+  ck_assert_int_eq (VAPI_OK, rv);
+  rv = vapi_send (ctx, NULL);
+  ck_assert_int_eq (VAPI_EINVAL, rv);
+  rv = vapi_send (NULL, NULL);
+  ck_assert_int_eq (VAPI_EINVAL, rv);
+  rv = vapi_recv (NULL, NULL, NULL);
+  ck_assert_int_eq (VAPI_EINVAL, rv);
+  rv = vapi_recv (ctx, NULL, NULL);
+  ck_assert_int_eq (VAPI_EINVAL, rv);
+  vapi_msg_show_version_reply *reply;
+  rv = vapi_recv (ctx, (void **) &reply, NULL);
+  ck_assert_int_eq (VAPI_EINVAL, rv);
+  rv = vapi_disconnect (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  vapi_ctx_free (ctx);
+}
+
+END_TEST;
+
+START_TEST (test_hton_1)
+{
+  const u16 _vl_msg_id = 1;
+  vapi_type_msg_header1_t h;
+  h._vl_msg_id = _vl_msg_id;
+  vapi_type_msg_header1_t_hton (&h);
+  ck_assert_int_eq (be16toh (h._vl_msg_id), _vl_msg_id);
+}
+
+END_TEST;
+
+START_TEST (test_hton_2)
+{
+  const u16 _vl_msg_id = 1;
+  const u32 client_index = 3;
+  vapi_type_msg_header2_t h;
+  h._vl_msg_id = _vl_msg_id;
+  h.client_index = client_index;
+  vapi_type_msg_header2_t_hton (&h);
+  ck_assert_int_eq (be16toh (h._vl_msg_id), _vl_msg_id);
+  ck_assert_int_eq (h.client_index, client_index);
+}
+
+END_TEST;
+
+START_TEST (test_hton_3)
+{
+  const size_t data_size = 10;
+  vapi_msg_vnet_interface_combined_counters *m =
+    malloc (sizeof (vapi_msg_vnet_interface_combined_counters) +
+	    data_size * sizeof (vapi_type_vlib_counter));
+  ck_assert_ptr_ne (NULL, m);
+  vapi_payload_vnet_interface_combined_counters *p = &m->payload;
+  const u16 _vl_msg_id = 1;
+  p->_vl_msg_id = _vl_msg_id;
+  const u32 first_sw_if_index = 2;
+  p->first_sw_if_index = first_sw_if_index;
+  p->count = data_size;
+  const u64 packets = 1234;
+  const u64 bytes = 2345;
+  int i;
+  for (i = 0; i < data_size; ++i)
+    {
+      p->data[i].packets = packets;
+      p->data[i].bytes = bytes;
+    }
+  vapi_msg_vnet_interface_combined_counters_hton (m);
+  ck_assert_int_eq (_vl_msg_id, be16toh (p->_vl_msg_id));
+  ck_assert_int_eq (first_sw_if_index, be32toh (p->first_sw_if_index));
+  ck_assert_int_eq (data_size, be32toh (p->count));
+  for (i = 0; i < data_size; ++i)
+    {
+      ck_assert_int_eq (packets, be64toh (p->data[i].packets));
+      ck_assert_int_eq (bytes, be64toh (p->data[i].bytes));
+    }
+  free (p);
+}
+
+END_TEST;
+
+#define verify_hton_swap(expr, value)           \
+  if (4 == sizeof (expr))                       \
+    {                                           \
+      ck_assert_int_eq (expr, htobe32 (value)); \
+    }                                           \
+  else if (2 == sizeof (expr))                  \
+    {                                           \
+      ck_assert_int_eq (expr, htobe16 (value)); \
+    }                                           \
+  else                                          \
+    {                                           \
+      ck_assert_int_eq (expr, value);           \
+    }
+
+START_TEST (test_hton_4)
+{
+  const int vla_count = 3;
+  char x[sizeof (vapi_msg_bridge_domain_details) +
+	 vla_count * sizeof (vapi_type_bridge_domain_sw_if)];
+  vapi_msg_bridge_domain_details *d = (void *) x;
+  int cnt = 1;
+  d->header._vl_msg_id = cnt++;
+  d->header.context = cnt++;
+  d->payload.bd_id = cnt++;
+  d->payload.flood = cnt++;
+  d->payload.uu_flood = cnt++;
+  d->payload.forward = cnt++;
+  d->payload.learn = cnt++;
+  d->payload.arp_term = cnt++;
+  d->payload.mac_age = cnt++;
+  d->payload.bvi_sw_if_index = cnt++;
+  d->payload.n_sw_ifs = vla_count;
+  int i;
+  for (i = 0; i < vla_count; ++i)
+    {
+      vapi_type_bridge_domain_sw_if *det = &d->payload.sw_if_details[i];
+      det->context = cnt++;
+      det->sw_if_index = cnt++;
+      det->shg = cnt++;
+    }
+  ck_assert_int_eq (sizeof (x), vapi_calc_bridge_domain_details_msg_size (d));
+  vapi_msg_bridge_domain_details_hton (d);
+  int tmp = 1;
+  verify_hton_swap (d->header._vl_msg_id, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->header.context, tmp);
+  ++tmp;
+  verify_hton_swap (d->payload.bd_id, tmp);
+  ++tmp;
+  verify_hton_swap (d->payload.flood, tmp);
+  ++tmp;
+  verify_hton_swap (d->payload.uu_flood, tmp);
+  ++tmp;
+  verify_hton_swap (d->payload.forward, tmp);
+  ++tmp;
+  verify_hton_swap (d->payload.learn, tmp);
+  ++tmp;
+  verify_hton_swap (d->payload.arp_term, tmp);
+  ++tmp;
+  verify_hton_swap (d->payload.mac_age, tmp);
+  ++tmp;
+  verify_hton_swap (d->payload.bvi_sw_if_index, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.n_sw_ifs, htobe32 (vla_count));
+  for (i = 0; i < vla_count; ++i)
+    {
+      vapi_type_bridge_domain_sw_if *det = &d->payload.sw_if_details[i];
+      verify_hton_swap (det->context, tmp);
+      ++tmp;
+      verify_hton_swap (det->sw_if_index, tmp);
+      ++tmp;
+      verify_hton_swap (det->shg, tmp);
+      ++tmp;
+    }
+  vapi_msg_bridge_domain_details_ntoh (d);
+  tmp = 1;
+  ck_assert_int_eq (d->header._vl_msg_id, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->header.context, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.bd_id, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.flood, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.uu_flood, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.forward, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.learn, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.arp_term, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.mac_age, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.bvi_sw_if_index, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.n_sw_ifs, vla_count);
+  for (i = 0; i < vla_count; ++i)
+    {
+      vapi_type_bridge_domain_sw_if *det = &d->payload.sw_if_details[i];
+      ck_assert_int_eq (det->context, tmp);
+      ++tmp;
+      ck_assert_int_eq (det->sw_if_index, tmp);
+      ++tmp;
+      ck_assert_int_eq (det->shg, tmp);
+      ++tmp;
+    }
+  ck_assert_int_eq (sizeof (x), vapi_calc_bridge_domain_details_msg_size (d));
+}
+
+END_TEST;
+
+START_TEST (test_ntoh_1)
+{
+  const u16 _vl_msg_id = 1;
+  vapi_type_msg_header1_t h;
+  h._vl_msg_id = _vl_msg_id;
+  vapi_type_msg_header1_t_ntoh (&h);
+  ck_assert_int_eq (htobe16 (h._vl_msg_id), _vl_msg_id);
+}
+
+END_TEST;
+
+START_TEST (test_ntoh_2)
+{
+  const u16 _vl_msg_id = 1;
+  const u32 client_index = 3;
+  vapi_type_msg_header2_t h;
+  h._vl_msg_id = _vl_msg_id;
+  h.client_index = client_index;
+  vapi_type_msg_header2_t_ntoh (&h);
+  ck_assert_int_eq (htobe16 (h._vl_msg_id), _vl_msg_id);
+  ck_assert_int_eq (h.client_index, client_index);
+}
+
+END_TEST;
+
+START_TEST (test_ntoh_3)
+{
+  const size_t data_size = 10;
+  vapi_msg_vnet_interface_combined_counters *m =
+    malloc (sizeof (vapi_msg_vnet_interface_combined_counters) +
+	    data_size * sizeof (vapi_type_vlib_counter));
+  ck_assert_ptr_ne (NULL, m);
+  vapi_payload_vnet_interface_combined_counters *p = &m->payload;
+  const u16 _vl_msg_id = 1;
+  p->_vl_msg_id = _vl_msg_id;
+  const u32 first_sw_if_index = 2;
+  p->first_sw_if_index = first_sw_if_index;
+  const size_t be_data_size = htobe32 (data_size);
+  p->count = be_data_size;
+  const u64 packets = 1234;
+  const u64 bytes = 2345;
+  int i;
+  for (i = 0; i < data_size; ++i)
+    {
+      p->data[i].packets = packets;
+      p->data[i].bytes = bytes;
+    }
+  vapi_msg_vnet_interface_combined_counters_ntoh (m);
+  ck_assert_int_eq (_vl_msg_id, be16toh (p->_vl_msg_id));
+  ck_assert_int_eq (first_sw_if_index, be32toh (p->first_sw_if_index));
+  ck_assert_int_eq (be_data_size, be32toh (p->count));
+  for (i = 0; i < data_size; ++i)
+    {
+      ck_assert_int_eq (packets, htobe64 (p->data[i].packets));
+      ck_assert_int_eq (bytes, htobe64 (p->data[i].bytes));
+    }
+  free (p);
+}
+
+END_TEST;
+
+#define verify_ntoh_swap(expr, value)           \
+  if (4 == sizeof (expr))                       \
+    {                                           \
+      ck_assert_int_eq (expr, be32toh (value)); \
+    }                                           \
+  else if (2 == sizeof (expr))                  \
+    {                                           \
+      ck_assert_int_eq (expr, be16toh (value)); \
+    }                                           \
+  else                                          \
+    {                                           \
+      ck_assert_int_eq (expr, value);           \
+    }
+
+START_TEST (test_ntoh_4)
+{
+  const int vla_count = 3;
+  char x[sizeof (vapi_msg_bridge_domain_details) +
+	 vla_count * sizeof (vapi_type_bridge_domain_sw_if)];
+  vapi_msg_bridge_domain_details *d = (void *) x;
+  int cnt = 1;
+  d->header._vl_msg_id = cnt++;
+  d->header.context = cnt++;
+  d->payload.bd_id = cnt++;
+  d->payload.flood = cnt++;
+  d->payload.uu_flood = cnt++;
+  d->payload.forward = cnt++;
+  d->payload.learn = cnt++;
+  d->payload.arp_term = cnt++;
+  d->payload.mac_age = cnt++;
+  d->payload.bvi_sw_if_index = cnt++;
+  d->payload.n_sw_ifs = htobe32 (vla_count);
+  int i;
+  for (i = 0; i < vla_count; ++i)
+    {
+      vapi_type_bridge_domain_sw_if *det = &d->payload.sw_if_details[i];
+      det->context = cnt++;
+      det->sw_if_index = cnt++;
+      det->shg = cnt++;
+    }
+  vapi_msg_bridge_domain_details_ntoh (d);
+  ck_assert_int_eq (sizeof (x), vapi_calc_bridge_domain_details_msg_size (d));
+  int tmp = 1;
+  verify_ntoh_swap (d->header._vl_msg_id, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->header.context, tmp);
+  ++tmp;
+  verify_ntoh_swap (d->payload.bd_id, tmp);
+  ++tmp;
+  verify_ntoh_swap (d->payload.flood, tmp);
+  ++tmp;
+  verify_ntoh_swap (d->payload.uu_flood, tmp);
+  ++tmp;
+  verify_ntoh_swap (d->payload.forward, tmp);
+  ++tmp;
+  verify_ntoh_swap (d->payload.learn, tmp);
+  ++tmp;
+  verify_ntoh_swap (d->payload.arp_term, tmp);
+  ++tmp;
+  verify_ntoh_swap (d->payload.mac_age, tmp);
+  ++tmp;
+  verify_ntoh_swap (d->payload.bvi_sw_if_index, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.n_sw_ifs, vla_count);
+  for (i = 0; i < vla_count; ++i)
+    {
+      vapi_type_bridge_domain_sw_if *det = &d->payload.sw_if_details[i];
+      verify_ntoh_swap (det->context, tmp);
+      ++tmp;
+      verify_ntoh_swap (det->sw_if_index, tmp);
+      ++tmp;
+      verify_ntoh_swap (det->shg, tmp);
+      ++tmp;
+    }
+  vapi_msg_bridge_domain_details_hton (d);
+  tmp = 1;
+  ck_assert_int_eq (d->header._vl_msg_id, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->header.context, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.bd_id, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.flood, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.uu_flood, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.forward, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.learn, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.arp_term, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.mac_age, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.bvi_sw_if_index, tmp);
+  ++tmp;
+  ck_assert_int_eq (d->payload.n_sw_ifs, htobe32 (vla_count));
+  for (i = 0; i < vla_count; ++i)
+    {
+      vapi_type_bridge_domain_sw_if *det = &d->payload.sw_if_details[i];
+      ck_assert_int_eq (det->context, tmp);
+      ++tmp;
+      ck_assert_int_eq (det->sw_if_index, tmp);
+      ++tmp;
+      ck_assert_int_eq (det->shg, tmp);
+      ++tmp;
+    }
+}
+
+END_TEST;
+
+vapi_error_e
+show_version_cb (vapi_ctx_t ctx, void *caller_ctx,
+		 vapi_error_e rv, bool is_last,
+		 vapi_payload_show_version_reply * p)
+{
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (true, is_last);
+  ck_assert_str_eq ("vpe", (char *) p->program);
+  printf
+    ("show_version_reply: program: `%s', version: `%s', build directory: "
+     "`%s', build date: `%s'\n", p->program, p->version, p->build_directory,
+     p->build_date);
+  ++*(int *) caller_ctx;
+  return VAPI_OK;
+}
+
+typedef struct
+{
+  int called;
+  int expected_retval;
+  u32 *sw_if_index_storage;
+} test_create_loopback_ctx_t;
+
+vapi_error_e
+loopback_create_cb (vapi_ctx_t ctx, void *caller_ctx,
+		    vapi_error_e rv, bool is_last,
+		    vapi_payload_create_loopback_reply * p)
+{
+  test_create_loopback_ctx_t *clc = caller_ctx;
+  ck_assert_int_eq (clc->expected_retval, p->retval);
+  *clc->sw_if_index_storage = p->sw_if_index;
+  ++clc->called;
+  return VAPI_OK;
+}
+
+typedef struct
+{
+  int called;
+  int expected_retval;
+  u32 *sw_if_index_storage;
+} test_delete_loopback_ctx_t;
+
+vapi_error_e
+loopback_delete_cb (vapi_ctx_t ctx, void *caller_ctx,
+		    vapi_error_e rv, bool is_last,
+		    vapi_payload_delete_loopback_reply * p)
+{
+  test_delete_loopback_ctx_t *dlc = caller_ctx;
+  ck_assert_int_eq (dlc->expected_retval, p->retval);
+  ++dlc->called;
+  return VAPI_OK;
+}
+
+START_TEST (test_connect)
+{
+  vapi_ctx_t ctx;
+  vapi_error_e rv = vapi_ctx_alloc (&ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  rv = vapi_connect (ctx, app_name, api_prefix, max_outstanding_requests,
+		     response_queue_size, VAPI_MODE_BLOCKING);
+  ck_assert_int_eq (VAPI_OK, rv);
+  rv = vapi_disconnect (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  vapi_ctx_free (ctx);
+}
+
+END_TEST;
+
+vapi_ctx_t ctx;
+
+void
+setup_blocking (void)
+{
+  vapi_error_e rv = vapi_ctx_alloc (&ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  rv = vapi_connect (ctx, app_name, api_prefix, max_outstanding_requests,
+		     response_queue_size, VAPI_MODE_BLOCKING);
+  ck_assert_int_eq (VAPI_OK, rv);
+}
+
+void
+setup_nonblocking (void)
+{
+  vapi_error_e rv = vapi_ctx_alloc (&ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  rv = vapi_connect (ctx, app_name, api_prefix, max_outstanding_requests,
+		     response_queue_size, VAPI_MODE_NONBLOCKING);
+  ck_assert_int_eq (VAPI_OK, rv);
+}
+
+void
+teardown (void)
+{
+  vapi_disconnect (ctx);
+  vapi_ctx_free (ctx);
+}
+
+START_TEST (test_show_version_1)
+{
+  printf ("--- Basic show version message - reply test ---\n");
+  vapi_msg_show_version *sv = vapi_alloc_show_version (ctx);
+  ck_assert_ptr_ne (NULL, sv);
+  vapi_msg_show_version_hton (sv);
+  vapi_error_e rv = vapi_send (ctx, sv);
+  ck_assert_int_eq (VAPI_OK, rv);
+  vapi_msg_show_version_reply *resp;
+  size_t size;
+  rv = vapi_recv (ctx, (void *) &resp, &size);
+  ck_assert_int_eq (VAPI_OK, rv);
+  vapi_payload_show_version_reply *payload = &resp->payload;
+  int dummy;
+  show_version_cb (NULL, &dummy, VAPI_OK, true, payload);
+  vapi_msg_free (ctx, resp);
+}
+
+END_TEST;
+
+START_TEST (test_show_version_2)
+{
+  int called = 0;
+  printf ("--- Show version via blocking callback API ---\n");
+  const int attempts = response_queue_size * 4;
+  int i = 0;
+  for (i = 0; i < attempts; ++i)
+    {
+      vapi_msg_show_version *sv = vapi_alloc_show_version (ctx);
+      ck_assert_ptr_ne (NULL, sv);
+      vapi_error_e rv = vapi_show_version (ctx, sv, show_version_cb, &called);
+      ck_assert_int_eq (VAPI_OK, rv);
+    }
+  ck_assert_int_eq (attempts, called);
+}
+
+END_TEST;
+
+typedef struct
+{
+  bool last_called;
+  size_t num_ifs;
+  u32 *sw_if_indexes;
+  bool *seen;
+  int called;
+} sw_interface_dump_ctx;
+
+vapi_error_e
+sw_interface_dump_cb (struct vapi_ctx_s *ctx, void *callback_ctx,
+		      vapi_error_e rv, bool is_last,
+		      vapi_payload_sw_interface_details * reply)
+{
+  sw_interface_dump_ctx *dctx = callback_ctx;
+  ck_assert_int_eq (false, dctx->last_called);
+  if (is_last)
+    {
+      ck_assert (NULL == reply);
+      dctx->last_called = true;
+    }
+  else
+    {
+      ck_assert (reply);
+      printf ("Interface dump entry: [%u]: %s\n", reply->sw_if_index,
+	      reply->interface_name);
+      size_t i = 0;
+      for (i = 0; i < dctx->num_ifs; ++i)
+	{
+	  if (dctx->sw_if_indexes[i] == reply->sw_if_index)
+	    {
+	      ck_assert_int_eq (false, dctx->seen[i]);
+	      dctx->seen[i] = true;
+	    }
+	}
+    }
+  ++dctx->called;
+  return VAPI_OK;
+}
+
+START_TEST (test_loopbacks_1)
+{
+  printf ("--- Create/delete loopbacks using blocking API ---\n");
+  const size_t num_ifs = 5;
+  u8 mac_addresses[num_ifs][6];
+  memset (&mac_addresses, 0, sizeof (mac_addresses));
+  u32 sw_if_indexes[num_ifs];
+  memset (&sw_if_indexes, 0xff, sizeof (sw_if_indexes));
+  test_create_loopback_ctx_t clcs[num_ifs];
+  memset (&clcs, 0, sizeof (clcs));
+  test_delete_loopback_ctx_t dlcs[num_ifs];
+  memset (&dlcs, 0, sizeof (dlcs));
+  int i;
+  for (i = 0; i < num_ifs; ++i)
+    {
+      memcpy (&mac_addresses[i], "\1\2\3\4\5\6", 6);
+      mac_addresses[i][5] = i;
+      clcs[i].sw_if_index_storage = &sw_if_indexes[i];
+    }
+  for (i = 0; i < num_ifs; ++i)
+    {
+      vapi_msg_create_loopback *cl = vapi_alloc_create_loopback (ctx);
+      memcpy (cl->payload.mac_address, mac_addresses[i],
+	      sizeof (cl->payload.mac_address));
+      vapi_error_e rv =
+	vapi_create_loopback (ctx, cl, loopback_create_cb, &clcs[i]);
+      ck_assert_int_eq (VAPI_OK, rv);
+    }
+  for (i = 0; i < num_ifs; ++i)
+    {
+      ck_assert_int_eq (1, clcs[i].called);
+      printf ("Created loopback with MAC %02x:%02x:%02x:%02x:%02x:%02x --> "
+	      "sw_if_index %u\n",
+	      mac_addresses[i][0], mac_addresses[i][1], mac_addresses[i][2],
+	      mac_addresses[i][3], mac_addresses[i][4], mac_addresses[i][5],
+	      sw_if_indexes[i]);
+    }
+  bool seen[num_ifs];
+  sw_interface_dump_ctx dctx = { false, num_ifs, sw_if_indexes, seen, 0 };
+  vapi_msg_sw_interface_dump *dump;
+  vapi_error_e rv;
+  const int attempts = response_queue_size * 4;
+  for (i = 0; i < attempts; ++i)
+    {
+      dctx.last_called = false;
+      memset (&seen, 0, sizeof (seen));
+      dump = vapi_alloc_sw_interface_dump (ctx);
+      dump->payload.name_filter_valid = 0;
+      memset (dump->payload.name_filter, 0,
+	      sizeof (dump->payload.name_filter));
+      while (VAPI_EAGAIN ==
+	     (rv =
+	      vapi_sw_interface_dump (ctx, dump, sw_interface_dump_cb,
+				      &dctx)))
+	;
+      ck_assert_int_eq (true, dctx.last_called);
+      int j = 0;
+      for (j = 0; j < num_ifs; ++j)
+	{
+	  ck_assert_int_eq (true, seen[j]);
+	}
+    }
+  memset (&seen, 0, sizeof (seen));
+  for (i = 0; i < num_ifs; ++i)
+    {
+      vapi_msg_delete_loopback *dl = vapi_alloc_delete_loopback (ctx);
+      dl->payload.sw_if_index = sw_if_indexes[i];
+      vapi_error_e rv =
+	vapi_delete_loopback (ctx, dl, loopback_delete_cb, &dlcs[i]);
+      ck_assert_int_eq (VAPI_OK, rv);
+    }
+  for (i = 0; i < num_ifs; ++i)
+    {
+      ck_assert_int_eq (1, dlcs[i].called);
+      printf ("Deleted loopback with sw_if_index %u\n", sw_if_indexes[i]);
+    }
+  dctx.last_called = false;
+  memset (&seen, 0, sizeof (seen));
+  dump = vapi_alloc_sw_interface_dump (ctx);
+  dump->payload.name_filter_valid = 0;
+  memset (dump->payload.name_filter, 0, sizeof (dump->payload.name_filter));
+  while (VAPI_EAGAIN ==
+	 (rv =
+	  vapi_sw_interface_dump (ctx, dump, sw_interface_dump_cb, &dctx)))
+    ;
+  ck_assert_int_eq (true, dctx.last_called);
+  for (i = 0; i < num_ifs; ++i)
+    {
+      ck_assert_int_eq (false, seen[i]);
+    }
+}
+
+END_TEST;
+
+START_TEST (test_show_version_3)
+{
+  printf ("--- Show version via async callback ---\n");
+  int called = 0;
+  vapi_error_e rv;
+  vapi_msg_show_version *sv = vapi_alloc_show_version (ctx);
+  ck_assert_ptr_ne (NULL, sv);
+  while (VAPI_EAGAIN ==
+	 (rv = vapi_show_version (ctx, sv, show_version_cb, &called)))
+    ;
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (0, called);
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (1, called);
+  called = 0;
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (0, called);
+}
+
+END_TEST;
+
+START_TEST (test_show_version_4)
+{
+  printf ("--- Show version via async callback - multiple messages ---\n");
+  vapi_error_e rv;
+  const size_t num_req = 5;
+  int contexts[num_req];
+  memset (contexts, 0, sizeof (contexts));
+  int i;
+  for (i = 0; i < num_req; ++i)
+    {
+      vapi_msg_show_version *sv = vapi_alloc_show_version (ctx);
+      ck_assert_ptr_ne (NULL, sv);
+      while (VAPI_EAGAIN ==
+	     (rv =
+	      vapi_show_version (ctx, sv, show_version_cb, &contexts[i])))
+	;
+      ck_assert_int_eq (VAPI_OK, rv);
+      int j;
+      for (j = 0; j < num_req; ++j)
+	{
+	  ck_assert_int_eq (0, contexts[j]);
+	}
+    }
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  for (i = 0; i < num_req; ++i)
+    {
+      ck_assert_int_eq (1, contexts[i]);
+    }
+  memset (contexts, 0, sizeof (contexts));
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  for (i = 0; i < num_req; ++i)
+    {
+      ck_assert_int_eq (0, contexts[i]);
+    }
+}
+
+END_TEST;
+
+START_TEST (test_loopbacks_2)
+{
+  printf ("--- Create/delete loopbacks using non-blocking API ---\n");
+  vapi_error_e rv;
+  const size_t num_ifs = 5;
+  u8 mac_addresses[num_ifs][6];
+  memset (&mac_addresses, 0, sizeof (mac_addresses));
+  u32 sw_if_indexes[num_ifs];
+  memset (&sw_if_indexes, 0xff, sizeof (sw_if_indexes));
+  test_create_loopback_ctx_t clcs[num_ifs];
+  memset (&clcs, 0, sizeof (clcs));
+  test_delete_loopback_ctx_t dlcs[num_ifs];
+  memset (&dlcs, 0, sizeof (dlcs));
+  int i;
+  for (i = 0; i < num_ifs; ++i)
+    {
+      memcpy (&mac_addresses[i], "\1\2\3\4\5\6", 6);
+      mac_addresses[i][5] = i;
+      clcs[i].sw_if_index_storage = &sw_if_indexes[i];
+    }
+  for (i = 0; i < num_ifs; ++i)
+    {
+      vapi_msg_create_loopback *cl = vapi_alloc_create_loopback (ctx);
+      memcpy (cl->payload.mac_address, mac_addresses[i],
+	      sizeof (cl->payload.mac_address));
+      while (VAPI_EAGAIN ==
+	     (rv =
+	      vapi_create_loopback (ctx, cl, loopback_create_cb, &clcs[i])))
+	;
+      ck_assert_int_eq (VAPI_OK, rv);
+    }
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  for (i = 0; i < num_ifs; ++i)
+    {
+      ck_assert_int_eq (1, clcs[i].called);
+      printf ("Loopback with MAC %02x:%02x:%02x:%02x:%02x:%02x --> "
+	      "sw_if_index %u\n",
+	      mac_addresses[i][0], mac_addresses[i][1], mac_addresses[i][2],
+	      mac_addresses[i][3], mac_addresses[i][4], mac_addresses[i][5],
+	      sw_if_indexes[i]);
+    }
+  bool seen[num_ifs];
+  memset (&seen, 0, sizeof (seen));
+  sw_interface_dump_ctx dctx = { false, num_ifs, sw_if_indexes, seen, 0 };
+  vapi_msg_sw_interface_dump *dump = vapi_alloc_sw_interface_dump (ctx);
+  dump->payload.name_filter_valid = 0;
+  memset (dump->payload.name_filter, 0, sizeof (dump->payload.name_filter));
+  while (VAPI_EAGAIN ==
+	 (rv =
+	  vapi_sw_interface_dump (ctx, dump, sw_interface_dump_cb, &dctx)))
+    ;
+  for (i = 0; i < num_ifs; ++i)
+    {
+      ck_assert_int_eq (false, seen[i]);
+    }
+  memset (&seen, 0, sizeof (seen));
+  ck_assert_int_eq (false, dctx.last_called);
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  for (i = 0; i < num_ifs; ++i)
+    {
+      ck_assert_int_eq (true, seen[i]);
+    }
+  memset (&seen, 0, sizeof (seen));
+  ck_assert_int_eq (true, dctx.last_called);
+  for (i = 0; i < num_ifs; ++i)
+    {
+      vapi_msg_delete_loopback *dl = vapi_alloc_delete_loopback (ctx);
+      dl->payload.sw_if_index = sw_if_indexes[i];
+      while (VAPI_EAGAIN ==
+	     (rv =
+	      vapi_delete_loopback (ctx, dl, loopback_delete_cb, &dlcs[i])))
+	;
+      ck_assert_int_eq (VAPI_OK, rv);
+    }
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  for (i = 0; i < num_ifs; ++i)
+    {
+      ck_assert_int_eq (1, dlcs[i].called);
+      printf ("Deleted loopback with sw_if_index %u\n", sw_if_indexes[i]);
+    }
+  memset (&seen, 0, sizeof (seen));
+  dctx.last_called = false;
+  dump = vapi_alloc_sw_interface_dump (ctx);
+  dump->payload.name_filter_valid = 0;
+  memset (dump->payload.name_filter, 0, sizeof (dump->payload.name_filter));
+  while (VAPI_EAGAIN ==
+	 (rv =
+	  vapi_sw_interface_dump (ctx, dump, sw_interface_dump_cb, &dctx)))
+    ;
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  for (i = 0; i < num_ifs; ++i)
+    {
+      ck_assert_int_eq (false, seen[i]);
+    }
+  memset (&seen, 0, sizeof (seen));
+  ck_assert_int_eq (true, dctx.last_called);
+}
+
+END_TEST;
+
+vapi_error_e
+interface_simple_stats_cb (vapi_ctx_t ctx, void *callback_ctx,
+			   vapi_error_e rv, bool is_last,
+			   vapi_payload_want_interface_simple_stats_reply *
+			   payload)
+{
+  return VAPI_OK;
+}
+
+vapi_error_e
+simple_counters_cb (vapi_ctx_t ctx, void *callback_ctx,
+		    vapi_payload_vnet_interface_simple_counters * payload)
+{
+  int *called = callback_ctx;
+  ++*called;
+  printf ("simple counters: first_sw_if_index=%u\n",
+	  payload->first_sw_if_index);
+  return VAPI_OK;
+}
+
+START_TEST (test_stats_1)
+{
+  printf ("--- Receive stats using generic blocking API ---\n");
+  vapi_msg_want_interface_simple_stats *ws =
+    vapi_alloc_want_interface_simple_stats (ctx);
+  ws->payload.enable_disable = 1;
+  ws->payload.pid = getpid ();
+  vapi_error_e rv;
+  rv = vapi_want_interface_simple_stats (ctx, ws, interface_simple_stats_cb,
+					 NULL);
+  ck_assert_int_eq (VAPI_OK, rv);
+  int called = 0;
+  vapi_set_event_cb (ctx, vapi_msg_id_vnet_interface_simple_counters,
+		     (vapi_event_cb) simple_counters_cb, &called);
+  rv = vapi_dispatch_one (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (1, called);
+}
+
+END_TEST;
+
+START_TEST (test_stats_2)
+{
+  printf ("--- Receive stats using stat-specific blocking API ---\n");
+  vapi_msg_want_interface_simple_stats *ws =
+    vapi_alloc_want_interface_simple_stats (ctx);
+  ws->payload.enable_disable = 1;
+  ws->payload.pid = getpid ();
+  vapi_error_e rv;
+  rv = vapi_want_interface_simple_stats (ctx, ws, interface_simple_stats_cb,
+					 NULL);
+  ck_assert_int_eq (VAPI_OK, rv);
+  int called = 0;
+  vapi_set_vapi_msg_vnet_interface_simple_counters_event_cb (ctx,
+							     simple_counters_cb,
+							     &called);
+  rv = vapi_dispatch_one (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (1, called);
+}
+
+END_TEST;
+
+vapi_error_e
+generic_cb (vapi_ctx_t ctx, void *callback_ctx, vapi_msg_id_t id, void *msg)
+{
+  int *called = callback_ctx;
+  ck_assert_int_eq (0, *called);
+  ++*called;
+  ck_assert_int_eq (id, vapi_msg_id_show_version_reply);
+  ck_assert_ptr_ne (NULL, msg);
+  vapi_msg_show_version_reply *reply = msg;
+  ck_assert_str_eq ("vpe", (char *) reply->payload.program);
+  return VAPI_OK;
+}
+
+START_TEST (test_show_version_5)
+{
+  printf ("--- Receive show version using generic callback - nonblocking "
+	  "API ---\n");
+  vapi_error_e rv;
+  vapi_msg_show_version *sv = vapi_alloc_show_version (ctx);
+  ck_assert_ptr_ne (NULL, sv);
+  vapi_msg_show_version_hton (sv);
+  while (VAPI_EAGAIN == (rv = vapi_send (ctx, sv)))
+    ;
+  ck_assert_int_eq (VAPI_OK, rv);
+  int called = 0;
+  vapi_set_generic_event_cb (ctx, generic_cb, &called);
+  ck_assert_int_eq (VAPI_OK, rv);
+  rv = vapi_dispatch_one (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (1, called);
+  sv = vapi_alloc_show_version (ctx);
+  ck_assert_ptr_ne (NULL, sv);
+  vapi_msg_show_version_hton (sv);
+  while (VAPI_EAGAIN == (rv = vapi_send (ctx, sv)))
+    ;
+  ck_assert_int_eq (VAPI_OK, rv);
+  vapi_clear_generic_event_cb (ctx);
+  rv = vapi_dispatch_one (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (1, called);	/* needs to remain unchanged */
+}
+
+END_TEST;
+
+vapi_error_e
+combined_counters_cb (struct vapi_ctx_s *ctx, void *callback_ctx,
+		      vapi_payload_vnet_interface_combined_counters * payload)
+{
+  int *called = callback_ctx;
+  ++*called;
+  printf ("combined counters: first_sw_if_index=%u\n",
+	  payload->first_sw_if_index);
+  return VAPI_OK;
+}
+
+vapi_error_e
+stats_cb (vapi_ctx_t ctx, void *callback_ctx, vapi_error_e rv,
+	  bool is_last, vapi_payload_want_stats_reply * payload)
+{
+  return VAPI_OK;
+}
+
+START_TEST (test_stats_3)
+{
+  printf ("--- Receive multiple stats using stat-specific non-blocking API "
+	  "---\n");
+  vapi_msg_want_stats *ws = vapi_alloc_want_stats (ctx);
+  ws->payload.enable_disable = 1;
+  ws->payload.pid = getpid ();
+  vapi_error_e rv;
+  rv = vapi_want_stats (ctx, ws, stats_cb, NULL);
+  ck_assert_int_eq (VAPI_OK, rv);
+  int called = 0;
+  int called2 = 0;
+  vapi_set_vapi_msg_vnet_interface_simple_counters_event_cb (ctx,
+							     simple_counters_cb,
+							     &called);
+  vapi_set_vapi_msg_vnet_interface_combined_counters_event_cb (ctx,
+							       combined_counters_cb,
+							       &called2);
+  while (!called || !called2)
+    {
+      if (VAPI_EAGAIN != (rv = vapi_dispatch_one (ctx)))
+	{
+	  ck_assert_int_eq (VAPI_OK, rv);
+	}
+    }
+}
+
+END_TEST;
+
+vapi_error_e
+show_version_no_cb (vapi_ctx_t ctx, void *caller_ctx,
+		    vapi_error_e rv, bool is_last,
+		    vapi_payload_show_version_reply * p)
+{
+  ck_assert_int_eq (VAPI_ENORESP, rv);
+  ck_assert_int_eq (true, is_last);
+  ck_assert_ptr_eq (NULL, p);
+  ++*(int *) caller_ctx;
+  return VAPI_OK;
+}
+
+START_TEST (test_no_response_1)
+{
+  printf ("--- Simulate no response to regular message ---\n");
+  vapi_error_e rv;
+  vapi_msg_show_version *sv = vapi_alloc_show_version (ctx);
+  ck_assert_ptr_ne (NULL, sv);
+  sv->header._vl_msg_id = ~0;	/* malformed ID causes vpp to drop the msg */
+  int called = 0;
+  while (VAPI_EAGAIN ==
+	 (rv = vapi_show_version (ctx, sv, show_version_no_cb, &called)))
+    ;
+  ck_assert_int_eq (VAPI_OK, rv);
+  sv = vapi_alloc_show_version (ctx);
+  ck_assert_ptr_ne (NULL, sv);
+  while (VAPI_EAGAIN ==
+	 (rv = vapi_show_version (ctx, sv, show_version_cb, &called)))
+    ;
+  ck_assert_int_eq (VAPI_OK, rv);
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (2, called);
+}
+
+END_TEST;
+
+vapi_error_e
+no_msg_cb (struct vapi_ctx_s *ctx, void *callback_ctx,
+	   vapi_error_e rv, bool is_last,
+	   vapi_payload_sw_interface_details * reply)
+{
+  int *called = callback_ctx;
+  ++*called;
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (true, is_last);
+  ck_assert_ptr_eq (NULL, reply);
+  return VAPI_OK;
+}
+
+START_TEST (test_no_response_2)
+{
+  printf ("--- Simulate no response to dump message ---\n");
+  vapi_error_e rv;
+  vapi_msg_sw_interface_dump *dump = vapi_alloc_sw_interface_dump (ctx);
+  dump->header._vl_msg_id = ~0;	/* malformed ID causes vpp to drop the msg */
+  int no_called = 0;
+  while (VAPI_EAGAIN ==
+	 (rv = vapi_sw_interface_dump (ctx, dump, no_msg_cb, &no_called)))
+    ;
+  ck_assert_int_eq (VAPI_OK, rv);
+  rv = vapi_dispatch (ctx);
+  ck_assert_int_eq (VAPI_OK, rv);
+  ck_assert_int_eq (1, no_called);
+}
+
+END_TEST;
+Suite *
+test_suite (void)
+{
+  Suite *s = suite_create ("VAPI test");
+
+  TCase *tc_negative = tcase_create ("Negative tests");
+  tcase_add_test (tc_negative, test_invalid_values);
+  suite_add_tcase (s, tc_negative);
+
+  TCase *tc_swap = tcase_create ("Byteswap tests");
+  tcase_add_test (tc_swap, test_hton_1);
+  tcase_add_test (tc_swap, test_hton_2);
+  tcase_add_test (tc_swap, test_hton_3);
+  tcase_add_test (tc_swap, test_hton_4);
+  tcase_add_test (tc_swap, test_ntoh_1);
+  tcase_add_test (tc_swap, test_ntoh_2);
+  tcase_add_test (tc_swap, test_ntoh_3);
+  tcase_add_test (tc_swap, test_ntoh_4);
+  suite_add_tcase (s, tc_swap);
+
+  TCase *tc_connect = tcase_create ("Connect");
+  tcase_add_test (tc_connect, test_connect);
+  suite_add_tcase (s, tc_connect);
+
+  TCase *tc_block = tcase_create ("Blocking API");
+  tcase_set_timeout (tc_block, 25);
+  tcase_add_checked_fixture (tc_block, setup_blocking, teardown);
+  tcase_add_test (tc_block, test_show_version_1);
+  tcase_add_test (tc_block, test_show_version_2);
+  tcase_add_test (tc_block, test_loopbacks_1);
+  tcase_add_test (tc_block, test_stats_1);
+  tcase_add_test (tc_block, test_stats_2);
+  suite_add_tcase (s, tc_block);
+
+  TCase *tc_nonblock = tcase_create ("Nonblocking API");
+  tcase_set_timeout (tc_nonblock, 25);
+  tcase_add_checked_fixture (tc_nonblock, setup_nonblocking, teardown);
+  tcase_add_test (tc_nonblock, test_show_version_3);
+  tcase_add_test (tc_nonblock, test_show_version_4);
+  tcase_add_test (tc_nonblock, test_show_version_5);
+  tcase_add_test (tc_nonblock, test_loopbacks_2);
+  tcase_add_test (tc_nonblock, test_stats_3);
+  tcase_add_test (tc_nonblock, test_no_response_1);
+  tcase_add_test (tc_nonblock, test_no_response_2);
+  suite_add_tcase (s, tc_nonblock);
+
+  return s;
+}
+
+int
+main (int argc, char *argv[])
+{
+  if (3 != argc)
+    {
+      printf ("Invalid argc==`%d'\n", argc);
+      return EXIT_FAILURE;
+    }
+  app_name = argv[1];
+  api_prefix = argv[2];
+  printf ("App name: `%s', API prefix: `%s'\n", app_name, api_prefix);
+
+  int number_failed;
+  Suite *s;
+  SRunner *sr;
+
+  s = test_suite ();
+  sr = srunner_create (s);
+
+  srunner_run_all (sr, CK_NORMAL);
+  number_failed = srunner_ntests_failed (sr);
+  srunner_free (sr);
+  return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/test/scripts/test-loop.sh b/test/scripts/test-loop.sh
index 17dc7c39..51f5d5ce 100755
--- a/test/scripts/test-loop.sh
+++ b/test/scripts/test-loop.sh
@@ -3,14 +3,15 @@
 function usage() {
  echo "$0" 1>&2
  echo "" 1>&2
- echo "Usage: $0 [-p <pre-exec-cmd>] [-m <email>] -- <make test options>" 1>&2
+ echo "Usage: $0 [-p <pre-exec-cmd>] [-m <email>] -- <make test options|verify>" 1>&2
  echo "" 1>&2
  echo "Parameters:" 1>&2
  echo "    -p <pre-exec-cmd> - run a command before each test loop (e.g. 'git pull')" 1>&2
  echo "    -m <email>        - if set, email is sent to this address on failure" 1>&2
  echo "" 1>&2
- echo "Example:" 1>&2
- echo "    $0 -m <somebody@cisco.com> -- test-debug TEST=l2bd"
+ echo "Examples:" 1>&2
+ echo "    $0 -m <somebody@cisco.com> -- test-debug TEST=l2bd" 1>&2
+ echo "    $0 -m <somebody@cisco.com> -- verify" 1>&2
  exit 1;
 }
 
@@ -44,8 +45,11 @@ shift $((OPTIND-1))
 
 if ! echo $* | grep test >/dev/null
 then
-        echo "Error: command line doesn't look right - should contain \`test' token..." >&2
-	usage
+	if ! echo $* | grep verify >/dev/null
+	then
+		echo "Error: command line doesn't look right - should contain \`test' or \`verify' token..." >&2
+		usage
+	fi
 fi
 
 function finish {
diff --git a/test/test_vapi.py b/test/test_vapi.py
new file mode 100644
index 00000000..86c1ee06
--- /dev/null
+++ b/test/test_vapi.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+""" VAPI test """
+
+from __future__ import division
+import unittest
+import os
+import signal
+import subprocess
+from threading import Thread
+from log import single_line_delim
+from framework import VppTestCase, running_extended_tests, VppTestRunner
+
+
+class Worker(Thread):
+    def __init__(self, args, logger):
+        self.logger = logger
+        self.args = args
+        self.result = None
+        super(Worker, self).__init__()
+
+    def run(self):
+        executable = self.args[0]
+        self.logger.debug("Running executable w/args `%s'" % self.args)
+        env = os.environ.copy()
+        env["CK_LOG_FILE_NAME"] = "-"
+        self.process = subprocess.Popen(
+            self.args, shell=False, env=env, preexec_fn=os.setpgrp,
+            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        out, err = self.process.communicate()
+        self.logger.debug("Finished running `%s'" % executable)
+        self.logger.info("Return code is `%s'" % self.process.returncode)
+        self.logger.info(single_line_delim)
+        self.logger.info("Executable `%s' wrote to stdout:" % executable)
+        self.logger.info(single_line_delim)
+        self.logger.info(out)
+        self.logger.info(single_line_delim)
+        self.logger.info("Executable `%s' wrote to stderr:" % executable)
+        self.logger.info(single_line_delim)
+        self.logger.error(err)
+        self.logger.info(single_line_delim)
+        self.result = self.process.returncode
+
+
+@unittest.skipUnless(running_extended_tests(), "part of extended tests")
+class VAPITestCase(VppTestCase):
+    """ VAPI test """
+
+    def test_vapi(self):
+        """ run VAPI tests """
+        var = "BR"
+        built_root = os.getenv(var, None)
+        self.assertIsNotNone(built_root,
+                             "Environment variable `%s' not set" % var)
+        executable = "%s/vapi_test/vapi_test" % built_root
+        worker = Worker(
+            [executable, "vapi client", self.shm_prefix], self.logger)
+        worker.start()
+        timeout = 45
+        worker.join(timeout)
+        self.logger.info("Worker result is `%s'" % worker.result)
+        error = False
+        if worker.result is None:
+            try:
+                error = True
+                self.logger.error(
+                    "Timeout! Worker did not finish in %ss" % timeout)
+                os.killpg(os.getpgid(worker.process.pid), signal.SIGTERM)
+                worker.join()
+            except:
+                raise Exception("Couldn't kill worker-spawned process")
+        if error:
+            raise Exception(
+                "Timeout! Worker did not finish in %ss" % timeout)
+        self.assert_equal(worker.result, 0, "Binary test return code")
+
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)
-- 
cgit 1.2.3-korg


From 905c14af2b1464840cea201daed005cb30513683 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Mon, 25 Sep 2017 08:47:59 -0400
Subject: Add binary API documentation

Change-Id: Id1a5da12b13d87bacfa81094f471b95db40c39be
Signed-off-by: Dave Barach <dave@barachs.net>
---
 doxygen/user_doc.md          |   1 +
 src/vlibapi/api_common.h     | 192 +++++++++++++++---------
 src/vlibapi/api_doc.md       | 349 +++++++++++++++++++++++++++++++++++++++++++
 src/vlibmemory/memory_vlib.c |   4 +-
 4 files changed, 475 insertions(+), 71 deletions(-)
 create mode 100644 src/vlibapi/api_doc.md

(limited to 'src/vlibmemory')

diff --git a/doxygen/user_doc.md b/doxygen/user_doc.md
index 952a72fc..65b09f98 100644
--- a/doxygen/user_doc.md
+++ b/doxygen/user_doc.md
@@ -18,3 +18,4 @@ Several modules provide operational, dataplane-user focused documentation.
 - @subpage srmpls_doc
 - @subpage sample_plugin_doc
 - @subpage nat64_doc
+- @subpage api_doc
diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
index dc6761bc..770cf47d 100644
--- a/src/vlibapi/api_common.h
+++ b/src/vlibapi/api_common.h
@@ -20,71 +20,74 @@
 #ifndef included_api_common_h
 #define included_api_common_h
 
+/** \file API common definitions
+ * See api_doc.md for more info
+ */
+
 #include <vppinfra/clib_error.h>
 #include <svm/svm_common.h>
 #include <vlibmemory/unix_shared_memory_queue.h>
 
+/** API registration types
+ */
 typedef enum
 {
   REGISTRATION_TYPE_FREE = 0,
-  REGISTRATION_TYPE_SHMEM,
-  REGISTRATION_TYPE_SOCKET_LISTEN,
-  REGISTRATION_TYPE_SOCKET_SERVER,
-  REGISTRATION_TYPE_SOCKET_CLIENT,
+  REGISTRATION_TYPE_SHMEM,	/**< Shared memory connection */
+  REGISTRATION_TYPE_SOCKET_LISTEN, /**< Socket listener  */
+  REGISTRATION_TYPE_SOCKET_SERVER, /**< Socket server */
+  REGISTRATION_TYPE_SOCKET_CLIENT, /**< Socket client */
 } vl_registration_type_t;
 
+/** An API client registration, only in vpp/vlib */
+
 typedef struct vl_api_registration_
 {
-  vl_registration_type_t registration_type;
+  vl_registration_type_t registration_type; /**< type */
 
-  /* Index in VLIB's brain (not shared memory). */
+  /** Index in VLIB's brain (not shared memory). */
   u32 vl_api_registration_pool_index;
 
-  u8 *name;
+  u8 *name;			/**< Client name */
 
-  /*
-   * The following groups of data could be unioned, but my fingers are
-   * going to be sore enough.
-   */
-
-  /* shared memory only */
+  /** shared memory only: pointer to client input queue */
   unix_shared_memory_queue_t *vl_input_queue;
 
   /* socket server and client */
-  u32 clib_file_index;
-  i8 *unprocessed_input;
-  u32 unprocessed_msg_length;
-  u8 *output_vector;
+  u32 clib_file_index;		/**< Socket only: file index */
+  i8 *unprocessed_input;	/**< Socket only: pending input */
+  u32 unprocessed_msg_length;	/**< Socket only: unprocssed length */
+  u8 *output_vector;		/**< Socket only: output vecto  */
 
   /* socket client only */
-  u32 server_handle;
-  u32 server_index;
-
+  u32 server_handle;		/**< Socket client only: server handle */
+  u32 server_index;		/**< Socket client only: server index */
 } vl_api_registration_t;
 
 
-/* Trace configuration for a single message */
+/** Trace configuration for a single message */
 typedef struct
 {
-  int size;
-  int trace_enable;
-  int replay_enable;
+  int size;			/**< for sanity checking */
+  int trace_enable;		/**< trace this message  */
+  int replay_enable;		/**< This message can be replayed  */
 } trace_cfg_t;
 
-/*
- * API recording
+/**
+ * API trace state
  */
 typedef struct
 {
-  u8 endian;
-  u8 enabled;
-  u8 wrapped;
+  u8 endian;			/**< trace endianness */
+  u8 enabled;			/**< trace is enabled  */
+  u8 wrapped;			/**< trace has wrapped */
   u8 pad;
-  u32 nitems;
-  u32 curindex;
-  u8 **traces;
+  u32 nitems;			/**< Number of trace records */
+  u32 curindex;			/**< Current index in circular buffer  */
+  u8 **traces;			/**< Trace ring */
 } vl_api_trace_t;
 
+/** Trace RX / TX enum */
 typedef enum
 {
   VL_API_TRACE_TX,
@@ -94,35 +97,38 @@ typedef enum
 #define VL_API_LITTLE_ENDIAN 0x00
 #define VL_API_BIG_ENDIAN 0x01
 
+/** Message range (belonging to a plugin) */
 typedef struct
 {
-  u8 *name;
-  u16 first_msg_id;
-  u16 last_msg_id;
+  u8 *name;			/**< name of the plugin  */
+  u16 first_msg_id;		/**< first assigned message ID */
+  u16 last_msg_id;		/**< last assigned message ID */
 } vl_api_msg_range_t;
 
+/** Message configuration definition */
 typedef struct
 {
-  int id;
-  char *name;
-  u32 crc;
-  void *handler;
-  void *cleanup;
-  void *endian;
-  void *print;
-  int size;
-  int traced;
-  int replay;
-  int message_bounce;
-  int is_mp_safe;
+  int id;			/**< the message ID */
+  char *name;			/**< the message name */
+  u32 crc;			/**< message definition CRC  */
+  void *handler;		/**< the message handler  */
+  void *cleanup;		/**< non-default message cleanup handler */
+  void *endian;			/**< message endian function  */
+  void *print;			/**< message print function  */
+  int size;			/**< message size  */
+  int traced;			/**< is this message to be traced?  */
+  int replay;			/**< is this message to be replayed?  */
+  int message_bounce;		/**< do not free message after processing */
+  int is_mp_safe;		/**< worker thread barrier required?  */
 } vl_msg_api_msg_config_t;
 
+/** Message header structure */
 typedef struct msgbuf_
 {
-  unix_shared_memory_queue_t *q;
-  u32 data_len;
-  u32 gc_mark_timestamp;
-  u8 data[0];
+  unix_shared_memory_queue_t *q; /**< message allocated in this shmem ring  */
+  u32 data_len;			 /**< message length not including header  */
+  u32 gc_mark_timestamp;	 /**< message garbage collector mark TS  */
+  u8 data[0];			 /**< actual message begins here  */
 } msgbuf_t;
 
 /* api_shared.c prototypes */
@@ -171,101 +177,147 @@ typedef struct _vl_msg_api_init_function_list_elt
   vl_msg_api_init_function_t *f;
 } _vl_msg_api_function_list_elt_t;
 
+/** API main structure, used by both vpp and binary API clients */
 typedef struct
 {
+  /** Message handler vector  */
   void (**msg_handlers) (void *);
+  /** Plaform-dependent (aka hardware) message handler vector */
   int (**pd_msg_handlers) (void *, int);
+
+  /** non-default message cleanup handler vector */
   void (**msg_cleanup_handlers) (void *);
+
+  /** Message endian handler vector */
   void (**msg_endian_handlers) (void *);
+
+  /** Message print function vector */
   void (**msg_print_handlers) (void *, void *);
+
+  /** Message name vector */
   const char **msg_names;
+
+  /** Don't automatically free message buffer vetor */
   u8 *message_bounce;
+
+  /** Message is mp safe vector */
   u8 *is_mp_safe;
+
+  /** Allocator ring vectors (in shared memory) */
   struct ring_alloc_ *arings;
+
+  /** Number of times that the ring allocator failed */
   u32 ring_misses;
+
+  /** Number of garbage-collected message buffers */
   u32 garbage_collects;
+
+  /** Number of missing clients / failed message sends */
   u32 missing_clients;
+
+  /** Received message trace configuration */
   vl_api_trace_t *rx_trace;
+
+  /** Sent message trace configuration */
   vl_api_trace_t *tx_trace;
+
+  /** Print every received message */
   int msg_print_flag;
+
+  /** Current trace configuration */
   trace_cfg_t *api_trace_cfg;
+
+  /** Current process PID */
   int our_pid;
+
+  /** Binary api segment descriptor */
   svm_region_t *vlib_rp;
+
+  /** Vector of all mapped shared-VM segments */
   svm_region_t **mapped_shmem_regions;
+
+  /** Binary API shared-memory segment header pointer */
   struct vl_shmem_hdr_ *shmem_hdr;
+
+  /** vlib/vpp only: vector of client registrations */
   vl_api_registration_t **vl_clients;
 
+  /** vlib/vpp only: serialized (message, name, crc) table */
   u8 *serialized_message_table_in_shmem;
 
-  /* For plugin msg allocator */
+  /** First available message ID, for theplugin msg allocator */
   u16 first_available_msg_id;
 
-  /* message range by name hash */
+  /** Message range by name hash */
   uword *msg_range_by_name;
 
-  /* vector of message ranges */
+  /** vector of message ranges */
   vl_api_msg_range_t *msg_ranges;
 
-  /* uid for the api shared memory region */
+  /** uid for the api shared memory region */
   int api_uid;
-  /* gid for the api shared memory region */
+
+  /** gid for the api shared memory region */
   int api_gid;
 
-  /* base virtual address for global VM region */
+  /** base virtual address for global VM region */
   u64 global_baseva;
 
-  /* size of the global VM region */
+  /** size of the global VM region */
   u64 global_size;
 
-  /* size of the API region */
+  /** size of the API region */
   u64 api_size;
 
-  /* size of the global VM private mheap */
+  /** size of the global VM private mheap */
   u64 global_pvt_heap_size;
 
-  /* size of the api private mheap */
+  /** size of the api private mheap */
   u64 api_pvt_heap_size;
 
-  /* Client-only data structures */
+  /** Peer input queue pointer */
   unix_shared_memory_queue_t *vl_input_queue;
 
-  /*
+  /**
    * All VLIB-side message handlers use my_client_index to identify
    * the queue / client. This works in sim replay.
    */
   int my_client_index;
-  /*
+  /**
    * This is the (shared VM) address of the registration,
    * don't use it to id the connection since it can't possibly
    * work in simulator replay.
    */
   vl_api_registration_t *my_registration;
 
+  /** (Historical) signal-based queue non-empty signal, to be removed */
   i32 vlib_signal;
 
-  /* vlib input queue length */
+  /** vpp/vlib input queue length */
   u32 vlib_input_queue_length;
 
-  /* client side message index hash table */
+  /** client message index hash table */
   uword *msg_index_by_name_and_crc;
 
+  /** Shared VM binary API region name */
   const char *region_name;
+
+  /** Chroot path to the shared memory API files */
   const char *root_path;
 
-  /* Replay in progress? */
+  /** Replay in progress? */
   int replay_in_progress;
 
-  /* Dump (msg-name, crc) snapshot here at startup */
+  /** Dump (msg-name, crc) snapshot here at startup */
   u8 *save_msg_table_filename;
 
-  /* List of API client reaper functions */
+  /** List of API client reaper functions */
   _vl_msg_api_function_list_elt_t *reaper_function_registrations;
 
 } api_main_t;
 
 extern api_main_t api_main;
 
-
 #endif /* included_api_common_h */
 
 /*
diff --git a/src/vlibapi/api_doc.md b/src/vlibapi/api_doc.md
new file mode 100644
index 00000000..e620ee12
--- /dev/null
+++ b/src/vlibapi/api_doc.md
@@ -0,0 +1,349 @@
+# Binary API support    {#api_doc}
+
+VPP provides a binary API scheme to allow a wide variety of client codes to
+program data-plane tables. As of this writing, there are hundreds of binary
+APIs.
+
+Messages are defined in `*.api` files. Today, there are about 50 api files,
+with more arriving as folks add programmable features.  The API file compiler
+sources reside in @ref src/tools/vppapigen .
+
+Here's a typical request/response message definition, from
+@ref src/vnet/interface.api :
+
+```
+     autoreply define sw_interface_set_flags
+     {
+       u32 client_index;
+       u32 context;
+       u32 sw_if_index;
+       /* 1 = up, 0 = down */
+       u8 admin_up_down;
+     };
+```
+
+To a first approximation, the API compiler renders this definition  as
+follows:
+
+```
+    /****** Message ID / handler enum ******/
+    #ifdef vl_msg_id
+    vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS, vl_api_sw_interface_set_flags_t_handler)
+    vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, vl_api_sw_interface_set_flags_reply_t_handler)
+    #endif	
+
+    /****** Message names ******/
+    #ifdef vl_msg_name
+    vl_msg_name(vl_api_sw_interface_set_flags_t, 1)
+    vl_msg_name(vl_api_sw_interface_set_flags_reply_t, 1)
+    #endif	
+
+    /****** Message name, crc list ******/
+    #ifdef vl_msg_name_crc_list
+    #define foreach_vl_msg_name_crc_interface \
+    _(VL_API_SW_INTERFACE_SET_FLAGS, sw_interface_set_flags, f890584a) \
+    _(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply, dfbf3afa) \
+    #endif	
+
+    /****** Typedefs *****/
+    #ifdef vl_typedefs
+    typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags {
+        u16 _vl_msg_id;
+        u32 client_index;
+        u32 context;
+        u32 sw_if_index;
+        u8 admin_up_down;
+    }) vl_api_sw_interface_set_flags_t;
+
+    typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags_reply {
+        u16 _vl_msg_id;
+        u32 context;
+        i32 retval;
+    }) vl_api_sw_interface_set_flags_reply_t;
+```
+
+To change the admin state of an interface, a binary api client sends a
+@ref vl_api_sw_interface_set_flags_t to vpp, which will respond  with a
+@ref vl_api_sw_interface_set_flags_reply_t message.
+
+Multiple layers of software, transport types, and shared libraries
+implement a variety of features:
+
+* API message allocation, tracing, pretty-printing, and replay.
+* Message transport via global shared memory, pairwise/private shared
+  memory, and sockets.
+* Barrier synchronization of worker threads across thread-unsafe
+  message handlers.
+    
+Correctly-coded message handlers know nothing about the transport used to
+deliver messages to/from vpp. It's reasonably straighforward to use multiple
+API message transport types simultaneously.
+
+For historical reasons, binary api messages are (putatively) sent in network
+byte order. As of this writing, we're seriously considering whether that
+choice makes sense.
+
+
+## Message Allocation
+
+Since binary API messages are always processed in order, we allocate messages
+using a ring allocator whenever possible. This scheme is extremely fast when
+compared with a traditional memory allocator, and doesn't cause heap
+fragmentation. See
+@ref src/vlibmemory/memory_shared.c @ref vl_msg_api_alloc_internal() .
+
+Regardless of transport, binary api messages always follow a @ref msgbuf_t
+header:
+
+```
+    typedef struct msgbuf_
+    {
+      unix_shared_memory_queue_t *q;
+      u32 data_len;
+      u32 gc_mark_timestamp;
+      u8 data[0];
+    } msgbuf_t;
+```
+
+This structure makes it easy to trace messages without having to
+decode them - simply save data_len bytes - and allows
+@ref vl_msg_api_free() to rapidly dispose of message buffers:
+
+```
+    void
+    vl_msg_api_free (void *a)
+    {
+      msgbuf_t *rv;
+      api_main_t *am = &api_main;
+
+      rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data));
+
+      /*
+       * Here's the beauty of the scheme.  Only one proc/thread has
+       * control of a given message buffer. To free a buffer, we just 
+       * clear the queue field, and leave. No locks, no hits, no errors...
+       */
+      if (rv->q)
+        {
+          rv->q = 0;
+          rv->gc_mark_timestamp = 0;
+          return;
+        }
+      <snip>
+     }
+```
+
+## Message Tracing and Replay
+
+It's extremely important that vpp can capture and replay sizeable binary API
+traces. System-level issues involving hundreds of thousands of API
+transactions can be re-run in a second or less. Partial replay allows one to
+binary-search for the point where the wheels fall off. One can add scaffolding
+to the data plane, to trigger when complex conditions obtain.
+
+With binary API trace, print, and replay, system-level bug reports of the form
+"after 300,000 API transactions, the vpp data-plane stopped forwarding
+traffic, FIX IT!" can be solved offline.
+
+More often than not, one discovers that a control-plane client
+misprograms the data plane after a long time or under complex
+circumstances. Without direct evidence, "it's a data-plane problem!"
+
+See @ref src/vlibmemory/memory_vlib.c @ref vl_msg_api_process_file() ,
+and @ref src/vlibapi/api_shared.c . See also the debug CLI command "api trace"
+
+## Client connection details
+
+Establishing a binary API connection to vpp from a C-language client
+is easy:
+
+```
+        int
+        connect_to_vpe (char *client_name, int client_message_queue_length)
+        {
+          vat_main_t *vam = &vat_main;
+          api_main_t *am = &api_main;
+
+          if (vl_client_connect_to_vlib ("/vpe-api", client_name, 
+                                    	client_message_queue_length) < 0)
+            return -1;
+
+          /* Memorize vpp's binary API message input queue address */
+          vam->vl_input_queue = am->shmem_hdr->vl_input_queue;
+          /* And our client index */
+          vam->my_client_index = am->my_client_index;
+          return 0;
+        }       
+```
+
+32 is a typical value for client_message_queue_length. Vpp cannot
+block when it needs to send an API message to a binary API client, and
+the vpp-side binary API message handlers are very fast. When sending
+asynchronous messages, make sure to scrape the binary API rx ring with
+some enthusiasm.
+
+### binary API message RX pthread
+
+Calling @ref vl_client_connect_to_vlib spins up a binary API message RX
+pthread:
+
+```
+        static void *
+        rx_thread_fn (void *arg)
+        {
+          unix_shared_memory_queue_t *q;
+          memory_client_main_t *mm = &memory_client_main;
+          api_main_t *am = &api_main;
+
+          q = am->vl_input_queue;
+
+          /* So we can make the rx thread terminate cleanly */
+          if (setjmp (mm->rx_thread_jmpbuf) == 0)
+            {
+              mm->rx_thread_jmpbuf_valid = 1;
+              while (1)
+        	{
+        	  vl_msg_api_queue_handler (q);
+        	}
+            }
+          pthread_exit (0);
+        }       
+```
+
+To handle the binary API message queue yourself, use
+@ref vl_client_connect_to_vlib_no_rx_pthread.
+
+In turn, vl_msg_api_queue_handler(...) uses mutex/condvar signalling
+to wake up, process vpp -> client traffic, then sleep. Vpp supplies a
+condvar broadcast when the vpp -> client API message queue transitions
+from empty to nonempty.
+
+Vpp checks its own binary API input queue at a very high rate.  Vpp
+invokes message handlers in "process" context [aka cooperative
+multitasking thread context] at a variable rate, depending on
+data-plane packet processing requirements.
+
+## Client disconnection details
+
+To disconnect from vpp, call @ref vl_client_disconnect_from_vlib
+. Please arrange to call this function if the client application
+terminates abnormally. Vpp makes every effort to hold a decent funeral
+for dead clients, but vpp can't guarantee to free leaked memory in the
+shared binary API segment.
+
+## Sending binary API messages to vpp
+
+The point of the exercise is to send binary API messages to vpp, and
+to receive replies from vpp. Many vpp binary APIs comprise a client
+request message, and a simple status reply. For example, to
+set the admin status of an interface, one codes:
+
+```
+    vl_api_sw_interface_set_flags_t *mp;
+
+    mp = vl_msg_api_alloc (sizeof (*mp));
+    memset (mp, 0, sizeof (*mp));
+    mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_SW_INTERFACE_SET_FLAGS);
+    mp->client_index = api_main.my_client_index;
+    mp->sw_if_index = clib_host_to_net_u32 (<interface-sw-if-index>);
+    vl_msg_api_send (api_main.shmem_hdr->vl_input_queue, (u8 *)mp);
+```
+
+Key points:
+
+* Use @ref vl_msg_api_alloc to allocate message buffers
+
+* Allocated message buffers are not initialized, and must be presumed
+  to contain trash.
+
+* Don't forget to set the _vl_msg_id field!
+
+* As of this writing, binary API message IDs and data are sent in
+  network byte order
+
+* The client-library global data structure @ref api_main keeps track
+  of sufficient pointers and handles used to communicate with vpp
+
+## Receiving binary API messages from vpp
+
+Unless you've made other arrangements (see @ref
+vl_client_connect_to_vlib_no_rx_pthread), *messages are received on a
+separate rx pthread*. Synchronization with the client application main
+thread is the responsibility of the application!
+
+Set up message handlers about as follows:
+
+```
+    #define vl_typedefs		/* define message structures */
+    #include <vpp/api/vpe_all_api_h.h>
+    #undef vl_typedefs
+
+    /* declare message handlers for each api */
+
+    #define vl_endianfun		/* define message structures */
+    #include <vpp/api/vpe_all_api_h.h>
+    #undef vl_endianfun
+
+    /* instantiate all the print functions we know about */
+    #define vl_print(handle, ...)
+    #define vl_printfun
+    #include <vpp/api/vpe_all_api_h.h>
+    #undef vl_printfun
+
+    /* Define a list of all message that the client handles */
+    #define foreach_vpe_api_reply_msg                            \
+       _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply)           
+
+       static clib_error_t *
+       my_api_hookup (vlib_main_t * vm)
+       {
+         api_main_t *am = &api_main;
+
+       #define _(N,n)                                                  \
+           vl_msg_api_set_handlers(VL_API_##N, #n,                     \
+                                  vl_api_##n##_t_handler,              \
+                                  vl_noop_handler,                     \
+                                  vl_api_##n##_t_endian,               \
+                                  vl_api_##n##_t_print,                \
+                                  sizeof(vl_api_##n##_t), 1);
+         foreach_vpe_api_msg;
+       #undef _
+
+         return 0;
+        }
+```
+
+The key API used to establish message handlers is @ref
+vl_msg_api_set_handlers , which sets values in multiple parallel
+vectors in the @ref api_main_t structure. As of this writing: not all
+vector element values can be set through the API. You'll see sporadic
+API message registrations followed by minor adjustments of this form:
+
+```
+    /*
+     * Thread-safe API messages
+     */
+    am->is_mp_safe[VL_API_IP_ADD_DEL_ROUTE] = 1;
+    am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1;
+```
+
+
+
+
+              
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 401f388a..b6b87529 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -2348,7 +2348,9 @@ cleanup:
 ?*/
 
 /*?
- * Display a serialized API message decode table
+ * Display a serialized API message decode table, compare a saved
+ * decode table with the current image, to establish API differences.
+ *
 ?*/
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (dump_api_table_file, static) =
-- 
cgit 1.2.3-korg


From 7e9743aef924093c9c25bdf445637434c190d31a Mon Sep 17 00:00:00 2001
From: John Lo <loj@cisco.com>
Date: Sat, 23 Sep 2017 08:59:58 -0400
Subject: Fix sending GARP/NA on Bonded Interface Active/Backup Link Up/Down

For bonded interface in Active/Backup mode (mode 1), we need to
send a GARP/NA packet, if IP address is present, on slave link
state change to up or down to help with route convergence. The
callback from DPDK happens in a separate thread so we need to make
sure RPC call is used to signal the send_garp_na process in the
main thread. Also need to fix DPDK polling so the slave links are
not polled.

Change-Id: If5fd8ea2d28c54dd28726ac403ad366386ce9651
Signed-off-by: John Lo <loj@cisco.com>
---
 src/plugins/dpdk/device/common.c | 94 ++++++++++++++++++++++++++++++----------
 src/plugins/dpdk/device/node.c   |  2 +
 src/vlibmemory/memory_vlib.c     | 30 +++++++++++--
 src/vnet/ethernet/arp.c          | 38 +---------------
 src/vnet/ethernet/arp_packet.h   |  9 +---
 5 files changed, 101 insertions(+), 72 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c
index 2707b4d8..aedc3f52 100644
--- a/src/plugins/dpdk/device/common.c
+++ b/src/plugins/dpdk/device/common.c
@@ -181,12 +181,69 @@ dpdk_device_stop (dpdk_device_t * xd)
     }
 }
 
+/* Even type for send_garp_na_process */
+enum
+{
+  SEND_GARP_NA = 1,
+} dpdk_send_garp_na_process_event_t;
+
+static vlib_node_registration_t send_garp_na_proc_node;
+
+static uword
+send_garp_na_process (vlib_main_t * vm,
+		      vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  uword event_type, *event_data = 0;
+
+  while (1)
+    {
+      u32 i;
+      uword dpdk_port;
+      vlib_process_wait_for_event (vm);
+      event_type = vlib_process_get_events (vm, &event_data);
+      ASSERT (event_type == SEND_GARP_NA);
+      for (i = 0; i < vec_len (event_data); i++)
+	{
+	  dpdk_port = event_data[i];
+	  if (i < 5)		/* wait 0.2 sec for link to settle, max total 1 sec */
+	    vlib_process_suspend (vm, 0.2);
+	  dpdk_device_t *xd = &dpdk_main.devices[dpdk_port];
+	  u32 hw_if_index = xd->hw_if_index;
+	  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+	  dpdk_update_link_state (xd, vlib_time_now (vm));
+	  send_ip4_garp (vm, hi);
+	  send_ip6_na (vm, hi);
+	}
+      vec_reset_length (event_data);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (send_garp_na_proc_node, static) = {
+    .function = send_garp_na_process,
+    .type = VLIB_NODE_TYPE_PROCESS,
+    .name = "send-garp-na-process",
+};
+/* *INDENT-ON* */
+
+void vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+static void
+garp_na_proc_callback (uword * dpdk_port)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  ASSERT (vlib_get_thread_index () == 0);
+  vlib_process_signal_event
+    (vm, send_garp_na_proc_node.index, SEND_GARP_NA, *dpdk_port);
+}
+
 always_inline int
 dpdk_port_state_callback_inline (uint8_t port_id,
 				 enum rte_eth_event_type type, void *param)
 {
   struct rte_eth_link link;
-  vlib_main_t *vm = vlib_get_main ();
   dpdk_device_t *xd = &dpdk_main.devices[port_id];
 
   RTE_SET_USED (param);
@@ -201,32 +258,21 @@ dpdk_port_state_callback_inline (uint8_t port_id,
 
   if (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE)
     {
-      u8 bd_port = xd->bond_port;
+      uword bd_port = xd->bond_port;
       int bd_mode = rte_eth_bond_mode_get (bd_port);
-
-      if ((link_up && !(xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP)) ||
-	  (!link_up && (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE_UP)))
+#if 0
+      clib_warning ("Port %d state to %s, "
+		    "slave of port %d BondEthernet%d in mode %d",
+		    port_id, (link_up) ? "UP" : "DOWN",
+		    bd_port, xd->port_id, bd_mode);
+#endif
+      if (bd_mode == BONDING_MODE_ACTIVE_BACKUP)
 	{
-	  clib_warning ("Port %d state to %s, "
-			"slave of port %d BondEthernet%d in mode %d",
-			port_id, (link_up) ? "UP" : "DOWN",
-			bd_port, xd->port_id, bd_mode);
-	  if (bd_mode == BONDING_MODE_ACTIVE_BACKUP)
-	    {
-	      rte_eth_link_get_nowait (bd_port, &link);
-	      if (link.link_status)	/* bonded interface up */
-		{
-		  u32 hw_if_index = dpdk_main.devices[bd_port].hw_if_index;
-		  vlib_process_signal_event
-		    (vm, send_garp_na_process_node_index, SEND_GARP_NA,
-		     hw_if_index);
-		}
-	    }
+	  vl_api_force_rpc_call_main_thread
+	    (garp_na_proc_callback, (u8 *) & bd_port, sizeof (uword));
 	}
-      if (link_up)		/* Update slave link status */
-	xd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE_UP;
-      else
-	xd->flags &= ~DPDK_DEVICE_FLAG_BOND_SLAVE_UP;
+      xd->flags |= link_up ?
+	DPDK_DEVICE_FLAG_BOND_SLAVE_UP : ~DPDK_DEVICE_FLAG_BOND_SLAVE_UP;
     }
   else				/* Should not happen as callback not setup for "normal" links */
     {
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index 74fb8da1..cf8b9699 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -661,6 +661,8 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
   foreach_device_and_queue (dq, rt->devices_and_queues)
     {
       xd = vec_elt_at_index(dm->devices, dq->dev_instance);
+      if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE))
+	continue; 	/* Do not poll slave to a bonded interface */
       if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
         n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 1);
       else
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index b6b87529..77959e6d 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1452,8 +1452,9 @@ vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t * mp)
   clib_warning ("unimplemented");
 }
 
-void
-vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
+always_inline void
+vl_api_rpc_call_main_thread_inline (void *fp, u8 * data, u32 data_length,
+				    u8 force_rpc)
 {
   vl_api_rpc_call_t *mp;
   api_main_t *am = &api_main;
@@ -1461,7 +1462,7 @@ vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
   unix_shared_memory_queue_t *q;
 
   /* Main thread: call the function directly */
-  if (vlib_get_thread_index () == 0)
+  if ((force_rpc == 0) && (vlib_get_thread_index () == 0))
     {
       vlib_main_t *vm = vlib_get_main ();
       void (*call_fp) (void *);
@@ -1507,6 +1508,29 @@ vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
   pthread_mutex_unlock (&q->mutex);
 }
 
+/*
+ * Check if called from worker threads.
+ * If so, make rpc call of fp through shmem.
+ * Otherwise, call fp directly
+ */
+void
+vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
+{
+  vl_api_rpc_call_main_thread_inline (fp, data, data_length,	/*force_rpc */
+				      0);
+}
+
+/*
+ * Always make rpc call of fp through shmem, useful for calling from threads
+ * not setup as worker threads, such as DPDK callback thread
+ */
+void
+vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
+{
+  vl_api_rpc_call_main_thread_inline (fp, data, data_length,	/*force_rpc */
+				      1);
+}
+
 static void
 vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp)
 {
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
index e974d255..120a276c 100644
--- a/src/vnet/ethernet/arp.c
+++ b/src/vnet/ethernet/arp.c
@@ -2482,7 +2482,7 @@ ethernet_arp_change_mac (u32 sw_if_index)
   /* *INDENT-ON* */
 }
 
-void static
+void
 send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi)
 {
   ip4_main_t *i4m = &ip4_main;
@@ -2526,42 +2526,6 @@ send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi)
     }
 }
 
-static vlib_node_registration_t send_garp_na_proc_node;
-
-static uword
-send_garp_na_process (vlib_main_t * vm,
-		      vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
-  vnet_main_t *vnm = vnet_get_main ();
-  uword event_type, *event_data = 0;
-
-  send_garp_na_process_node_index = send_garp_na_proc_node.index;
-
-  while (1)
-    {
-      vlib_process_wait_for_event (vm);
-      event_type = vlib_process_get_events (vm, &event_data);
-      if ((event_type == SEND_GARP_NA) && (vec_len (event_data) >= 1))
-	{
-	  u32 hw_if_index = event_data[0];
-	  vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
-	  send_ip4_garp (vm, hi);
-	  send_ip6_na (vm, hi);
-	}
-      vec_reset_length (event_data);
-    }
-  return 0;
-}
-
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (send_garp_na_proc_node, static) = {
-    .function = send_garp_na_process,
-    .type = VLIB_NODE_TYPE_PROCESS,
-    .name = "send-garp-na-process",
-};
-/* *INDENT-ON* */
-
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h
index d740b844..661f33f9 100644
--- a/src/vnet/ethernet/arp_packet.h
+++ b/src/vnet/ethernet/arp_packet.h
@@ -167,14 +167,7 @@ typedef struct
 ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index);
 u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va);
 
-/* Node index for send_garp_na_process */
-extern u32 send_garp_na_process_node_index;
-
-/* Even type for send_garp_na_process */
-enum
-{
-  SEND_GARP_NA = 1,
-} dpdk_send_garp_na_process_event_t;
+void send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi);
 
 #endif /* included_ethernet_arp_packet_h */
 
-- 
cgit 1.2.3-korg


From 69128d0209ba6108430dca9cc78ab36a9b1c793e Mon Sep 17 00:00:00 2001
From: Dave Barach <dbarach@cisco.com>
Date: Tue, 26 Sep 2017 10:54:34 -0400
Subject: Add thread-safe event signaller, use RPC where required

Update ping code to use the new function

Change-Id: Ieb753b23f8402cbe5667c22747896784c8ece937
Signed-off-by: Florin Coras <fcoras@cisco.com>
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlib/node_funcs.h        | 23 +++++++++++++++++++++++
 src/vlib/threads.c           | 24 +++++++++++++++++++++++-
 src/vlib/threads.h           | 14 +++++++++++++-
 src/vlibmemory/memory_vlib.c | 13 ++++++++++++-
 src/vnet/ip/ping.c           |  4 ++--
 5 files changed, 73 insertions(+), 5 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index 3ae4e541..0734476c 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -965,6 +965,29 @@ vlib_process_signal_event_pointer (vlib_main_t * vm,
   d[0] = data;
 }
 
+/**
+ * Signal event to process from any thread.
+ *
+ * When in doubt, use this.
+ */
+always_inline void
+vlib_process_signal_event_mt (vlib_main_t * vm,
+			      uword node_index, uword type_opaque, uword data)
+{
+  if (vlib_get_thread_index () != 0)
+    {
+      vlib_process_signal_event_mt_args_t args = {
+	.node_index = node_index,
+	.type_opaque = type_opaque,
+	.data = data,
+      };
+      vlib_rpc_call_main_thread (vlib_process_signal_event_mt_helper,
+				 (u8 *) & args, sizeof (args));
+    }
+  else
+    vlib_process_signal_event (vm, node_index, type_opaque, data);
+}
+
 always_inline void
 vlib_process_signal_one_time_event (vlib_main_t * vm,
 				    uword node_index,
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index f9c7043c..be8daa64 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -1767,7 +1767,6 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
   return (fqm - tm->frame_queue_mains);
 }
 
-
 int
 vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb)
 {
@@ -1781,6 +1780,29 @@ vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb)
   return 0;
 }
 
+void
+vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t *
+				     args)
+{
+  ASSERT (vlib_get_thread_index () == 0);
+  vlib_process_signal_event (vlib_get_main (), args->node_index,
+			     args->type_opaque, args->data);
+}
+
+void *rpc_call_main_thread_cb_fn;
+
+void
+vlib_rpc_call_main_thread (void *callback, u8 * args, u32 arg_size)
+{
+  if (rpc_call_main_thread_cb_fn)
+    {
+      void (*fp) (void *, u8 *, u32) = rpc_call_main_thread_cb_fn;
+      (*fp) (callback, args, arg_size);
+    }
+  else
+    clib_warning ("BUG: rpc_call_main_thread_cb_fn NULL!");
+}
+
 clib_error_t *
 threads_init (vlib_main_t * vm)
 {
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 72340ee1..8931584b 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -171,6 +171,13 @@ typedef struct
   frame_queue_nelt_counter_t *frame_queue_histogram;
 } vlib_frame_queue_main_t;
 
+typedef struct
+{
+  uword node_index;
+  uword type_opaque;
+  uword data;
+} vlib_process_signal_event_mt_args_t;
+
 /* Called early, in thread 0's context */
 clib_error_t *vlib_thread_init (vlib_main_t * vm);
 
@@ -510,9 +517,14 @@ vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
 }
 
 u8 *vlib_thread_stack_init (uword thread_index);
-
 int vlib_thread_cb_register (struct vlib_main_t *vm,
 			     vlib_thread_callbacks_t * cb);
+extern void *rpc_call_main_thread_cb_fn;
+
+void
+vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t *
+				     args);
+void vlib_rpc_call_main_thread (void *function, u8 * args, u32 size);
 
 #endif /* included_vlib_threads_h */
 
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 77959e6d..d305ea61 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1573,6 +1573,17 @@ _(RPC_CALL_REPLY,rpc_call_reply)
 #define foreach_plugin_trace_msg		\
 _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids)
 
+/*
+ * Set the rpc callback at our earliest possible convenience.
+ * This avoids ordering issues between thread_init() -> start_workers and
+ * an init function which we could define here. If we ever intend to use
+ * vlib all by itself, we can't create a link-time dependency on
+ * an init function here and a typical "call foo_init first"
+ * guitar lick.
+ */
+
+extern void *rpc_call_main_thread_cb_fn;
+
 static clib_error_t *
 rpc_api_hookup (vlib_main_t * vm)
 {
@@ -1599,7 +1610,7 @@ rpc_api_hookup (vlib_main_t * vm)
 
   /* No reason to halt the parade to create a trace record... */
   am->is_mp_safe[VL_API_TRACE_PLUGIN_MSG_IDS] = 1;
-
+  rpc_call_main_thread_cb_fn = vl_api_rpc_call_main_thread;
   return 0;
 }
 
diff --git a/src/vnet/ip/ping.c b/src/vnet/ip/ping.c
index c847e696..0fa537f6 100755
--- a/src/vnet/ip/ping.c
+++ b/src/vnet/ip/ping.c
@@ -97,7 +97,7 @@ signal_ip46_icmp_reply_event (u8 event_type, vlib_buffer_t * b0)
   clib_memcpy (vnet_buffer
 	       (vlib_get_buffer
 		(vm, bi0_copy))->unused, &nowts, sizeof (nowts));
-  vlib_process_signal_event (vm, pr->cli_process_id, event_type, bi0_copy);
+  vlib_process_signal_event_mt (vm, pr->cli_process_id, event_type, bi0_copy);
   return 1;
 }
 
@@ -646,7 +646,7 @@ run_ping_ip46_address (vlib_main_t * vm, u32 table_id, ip4_address_t * pa4,
 	      i = 1 + ping_repeat;
 	      break;
 	    }
-      vec_free(event_data);
+	  vec_free (event_data);
 	}
     }
   vlib_cli_output (vm, "\n");
-- 
cgit 1.2.3-korg


From fb353ce54efc3abf3c2ba7795d0c9bf7aed6df96 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Wed, 18 Oct 2017 08:03:14 -0400
Subject: CSIT-844: fix binary api rx pthread heap push/pop

We need to push/pop the rx pthread's heap without affecting other
thread(s).

Search clib_per_cpu_mheaps, locate an unused slot. Duplicate the main
thread heap pointer in that slot, and set __os_thread_index
appropriately.

Don't bail out of vpp_api_test with results pending, e.g. at the end
of a vpp_api_test script. Even though vpp will eventuallly
garbage-collect them, We don't want to leave allocated reply messages
lurking in the api message allocation rings...

This patch is a manual cherry-pick from gerrit 8862

Change-Id: If3c84abe61496905432bfa36767068bad4bd243b
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vat/main.c                 | 13 +++++++++++++
 src/vlibmemory/memory_client.c | 25 ++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'src/vlibmemory')

diff --git a/src/vat/main.c b/src/vat/main.c
index 1bad2ebb..b8856cc9 100644
--- a/src/vat/main.c
+++ b/src/vat/main.c
@@ -297,6 +297,7 @@ main (int argc, char **argv)
   u8 *heap;
   mheap_t *h;
   int i;
+  f64 timeout;
 
   clib_mem_init (0, 128 << 20);
 
@@ -408,6 +409,18 @@ main (int argc, char **argv)
       fclose (vam->ifp);
     }
 
+  /*
+   * Particularly when running a script, don't be in a hurry to leave.
+   * A reply message queued to this process will end up constipating
+   * the allocation rings.
+   */
+  timeout = vat_time_now (vam) + 2.0;
+  while (vam->result_ready == 0 && vat_time_now (vam) < timeout)
+    ;
+
+  if (vat_time_now (vam) > timeout)
+    clib_warning ("BUG: message reply spin-wait timeout");
+
   vl_client_disconnect_from_vlib ();
   exit (0);
 }
diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c
index a162d6bb..8a60a322 100644
--- a/src/vlibmemory/memory_client.c
+++ b/src/vlibmemory/memory_client.c
@@ -80,6 +80,7 @@ rx_thread_fn (void *arg)
   unix_shared_memory_queue_t *q;
   memory_client_main_t *mm = &memory_client_main;
   api_main_t *am = &api_main;
+  int i;
 
   q = am->vl_input_queue;
 
@@ -87,10 +88,27 @@ rx_thread_fn (void *arg)
   if (setjmp (mm->rx_thread_jmpbuf) == 0)
     {
       mm->rx_thread_jmpbuf_valid = 1;
-      while (1)
+      /*
+       * Find an unused slot in the per-cpu-mheaps array,
+       * and grab it for this thread. We need to be able to
+       * push/pop the thread heap without affecting other thread(s).
+       */
+      if (__os_thread_index == 0)
 	{
-	  vl_msg_api_queue_handler (q);
+	  for (i = 0; i < ARRAY_LEN (clib_per_cpu_mheaps); i++)
+	    {
+	      if (clib_per_cpu_mheaps[i] == 0)
+		{
+		  /* Copy the main thread mheap pointer */
+		  clib_per_cpu_mheaps[i] = clib_per_cpu_mheaps[0];
+		  __os_thread_index = i;
+		  break;
+		}
+	    }
+	  ASSERT (__os_thread_index > 0);
 	}
+      while (1)
+	vl_msg_api_queue_handler (q);
     }
   pthread_exit (0);
 }
@@ -138,7 +156,7 @@ vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
 
   /* Recreate the vnet-side API message handler table */
   tblv = uword_to_pointer (mp->message_table, u8 *);
-  serialize_open_vector (sm, tblv);
+  unserialize_open_data (sm, tblv, vec_len (tblv));
   unserialize_integer (sm, &nmsgs, sizeof (u32));
 
   for (i = 0; i < nmsgs; i++)
@@ -311,6 +329,7 @@ vl_client_disconnect (void)
       /* drain the queue */
       if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY)
 	{
+	  clib_warning ("queue drain: %d", ntohs (rp->_vl_msg_id));
 	  vl_msg_api_handler ((void *) rp);
 	  continue;
 	}
-- 
cgit 1.2.3-korg