From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Mon, 19 Dec 2016 23:05:39 +0100
Subject: Reorganize source tree to use single autotools instance

Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/vlib/threads.h | 470 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 470 insertions(+)
 create mode 100644 src/vlib/threads.h

(limited to 'src/vlib/threads.h')

diff --git a/src/vlib/threads.h b/src/vlib/threads.h
new file mode 100644
index 00000000..34ab5be8
--- /dev/null
+++ b/src/vlib/threads.h
@@ -0,0 +1,470 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_threads_h
+#define included_vlib_threads_h
+
+#include <vlib/main.h>
+#include <linux/sched.h>
+
+extern vlib_main_t **vlib_mains;
+
+void vlib_set_thread_name (char *name);
+
+/* arg is actually a vlib__thread_t * */
+typedef void (vlib_thread_function_t) (void *arg);
+
+typedef struct vlib_thread_registration_
+{
+  /* constructor generated list of thread registrations */
+  struct vlib_thread_registration_ *next;
+
+  /* config parameters */
+  char *name;
+  char *short_name;
+  vlib_thread_function_t *function;
+  uword mheap_size;
+  int fixed_count;
+  u32 count;
+  int no_data_structure_clone;
+  u32 frame_queue_nelts;
+
+  /* All threads of this type run on pthreads */
+  int use_pthreads;
+  u32 first_index;
+  uword *coremask;
+} vlib_thread_registration_t;
+
+/*
+ * Frames have their cpu / vlib_main_t index in the low-order N bits
+ * Make VLIB_MAX_CPUS a power-of-two, please...
+ */
+
+#ifndef VLIB_MAX_CPUS
+#define VLIB_MAX_CPUS 256
+#endif
+
+#if VLIB_MAX_CPUS > CLIB_MAX_MHEAPS
+#error Please increase number of per-cpu mheaps
+#endif
+
+#define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1)	/* 0x3f, max */
+#define VLIB_OFFSET_MASK (~VLIB_CPU_MASK)
+
+#define VLIB_LOG2_THREAD_STACK_SIZE (20)
+#define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE)
+
+typedef enum
+{
+  VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME,
+} vlib_frame_queue_msg_type_t;
+
+typedef struct
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  volatile u32 valid;
+  u32 msg_type;
+  u32 n_vectors;
+  u32 last_n_vectors;
+
+  /* 256 * 4 = 1024 bytes, even mult of cache line size */
+  u32 buffer_index[VLIB_FRAME_SIZE];
+}
+vlib_frame_queue_elt_t;
+
+typedef struct
+{
+  /* First cache line */
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  volatile u32 *wait_at_barrier;
+  volatile u32 *workers_at_barrier;
+
+  /* Second Cache Line */
+    CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+  void *thread_mheap;
+  u8 *thread_stack;
+  void (*thread_function) (void *);
+  void *thread_function_arg;
+  i64 recursion_level;
+  elog_track_t elog_track;
+  u32 instance_id;
+  vlib_thread_registration_t *registration;
+  u8 *name;
+  u64 barrier_sync_count;
+
+  long lwp;
+  int lcore_id;
+  pthread_t thread_id;
+} vlib_worker_thread_t;
+
+extern vlib_worker_thread_t *vlib_worker_threads;
+
+typedef struct
+{
+  /* enqueue side */
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  volatile u64 tail;
+  u64 enqueues;
+  u64 enqueue_ticks;
+  u64 enqueue_vectors;
+  u32 enqueue_full_events;
+
+  /* dequeue side */
+    CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+  volatile u64 head;
+  u64 dequeues;
+  u64 dequeue_ticks;
+  u64 dequeue_vectors;
+  u64 trace;
+  u64 vector_threshold;
+
+  /* dequeue hint to enqueue side */
+    CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
+  volatile u64 head_hint;
+
+  /* read-only, constant, shared */
+    CLIB_CACHE_LINE_ALIGN_MARK (cacheline3);
+  vlib_frame_queue_elt_t *elts;
+  u32 nelts;
+}
+vlib_frame_queue_t;
+
+typedef struct
+{
+  u32 node_index;
+  vlib_frame_queue_t **vlib_frame_queues;
+
+  /* for frame queue tracing */
+  frame_queue_trace_t *frame_queue_traces;
+  frame_queue_nelt_counter_t *frame_queue_histogram;
+} vlib_frame_queue_main_t;
+
+/* Called early, in thread 0's context */
+clib_error_t *vlib_thread_init (vlib_main_t * vm);
+
+vlib_worker_thread_t *vlib_alloc_thread (vlib_main_t * vm);
+
+int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
+			      u32 frame_queue_index, vlib_frame_t * frame,
+			      vlib_frame_queue_msg_type_t type);
+
+int vlib_frame_queue_dequeue (int thread_id,
+			      vlib_main_t * vm, vlib_node_main_t * nm);
+
+u64 dispatch_node (vlib_main_t * vm,
+		   vlib_node_runtime_t * node,
+		   vlib_node_type_t type,
+		   vlib_node_state_t dispatch_state,
+		   vlib_frame_t * frame, u64 last_time_stamp);
+
+u64 dispatch_pending_node (vlib_main_t * vm,
+			   vlib_pending_frame_t * p, u64 last_time_stamp);
+
+void vlib_worker_thread_node_runtime_update (void);
+
+void vlib_create_worker_threads (vlib_main_t * vm, int n,
+				 void (*thread_function) (void *));
+
+void vlib_worker_thread_init (vlib_worker_thread_t * w);
+u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
+
+/* Check for a barrier sync request every 30ms */
+#define BARRIER_SYNC_DELAY (0.030000)
+
+#if CLIB_DEBUG > 0
+/* long barrier timeout, for gdb... */
+#define BARRIER_SYNC_TIMEOUT (600.1)
+#else
+#define BARRIER_SYNC_TIMEOUT (1.0)
+#endif
+
+void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
+void vlib_worker_thread_barrier_release (vlib_main_t * vm);
+
+always_inline void
+vlib_smp_unsafe_warning (void)
+{
+  if (CLIB_DEBUG > 0)
+    {
+      if (os_get_cpu_number ())
+	fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
+    }
+}
+
+typedef enum
+{
+  VLIB_WORKER_THREAD_FORK_FIXUP_ILLEGAL = 0,
+  VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX,
+} vlib_fork_fixup_t;
+
+void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which);
+
+static inline void
+vlib_worker_thread_barrier_check (void)
+{
+  if (PREDICT_FALSE (*vlib_worker_threads->wait_at_barrier))
+    {
+      clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
+      while (*vlib_worker_threads->wait_at_barrier)
+	;
+      clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+    }
+}
+
+#define foreach_vlib_main(body)			                        \
+do {                                                                    \
+    vlib_main_t ** __vlib_mains = 0, *this_vlib_main;                   \
+    int ii;                                                             \
+                                                                        \
+    if (vec_len (vlib_mains) == 0)                                      \
+        vec_add1 (__vlib_mains, &vlib_global_main);                     \
+    else                                                                \
+    {                                                                   \
+        for (ii = 0; ii < vec_len (vlib_mains); ii++)                   \
+        {                                                               \
+            this_vlib_main = vlib_mains[ii];                            \
+            if (this_vlib_main)                                         \
+                vec_add1 (__vlib_mains, this_vlib_main);                \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    for (ii = 0; ii < vec_len (__vlib_mains); ii++)                     \
+    {                                                                   \
+        this_vlib_main = __vlib_mains[ii];                              \
+        /* body uses this_vlib_main... */                               \
+        (body);                                                         \
+    }                                                                   \
+    vec_free (__vlib_mains);                                            \
+} while (0);
+
+#define foreach_sched_policy \
+  _(SCHED_OTHER, OTHER, "other") \
+  _(SCHED_BATCH, BATCH, "batch") \
+  _(SCHED_IDLE, IDLE, "idle")   \
+  _(SCHED_FIFO, FIFO, "fifo")   \
+  _(SCHED_RR, RR, "rr")
+
+typedef enum
+{
+#define _(v,f,s) SCHED_POLICY_##f = v,
+  foreach_sched_policy
+#undef _
+    SCHED_POLICY_N,
+} sched_policy_t;
+
+typedef struct
+{
+  /* Link list of registrations, built by constructors */
+  vlib_thread_registration_t *next;
+
+  /* Vector of registrations, w/ non-data-structure clones at the top */
+  vlib_thread_registration_t **registrations;
+
+  uword *thread_registrations_by_name;
+
+  vlib_worker_thread_t *worker_threads;
+
+  /*
+   * Launch all threads as pthreads,
+   * not eal_rte_launch (strict affinity) threads
+   */
+  int use_pthreads;
+
+  /* Number of vlib_main / vnet_main clones */
+  u32 n_vlib_mains;
+
+  /* Number of thread stacks to create */
+  u32 n_thread_stacks;
+
+  /* Number of pthreads */
+  u32 n_pthreads;
+
+  /* Number of DPDK eal threads */
+  u32 n_eal_threads;
+
+  /* Number of cores to skip, must match the core mask */
+  u32 skip_cores;
+
+  /* Thread prefix name */
+  u8 *thread_prefix;
+
+  /* main thread lcore */
+  u8 main_lcore;
+
+  /* Bitmap of available CPU cores */
+  uword *cpu_core_bitmap;
+
+  /* Bitmap of available CPU sockets (NUMA nodes) */
+  uword *cpu_socket_bitmap;
+
+  /* Worker handoff queues */
+  vlib_frame_queue_main_t *frame_queue_mains;
+
+  /* worker thread initialization barrier */
+  volatile u32 worker_thread_release;
+
+  /* scheduling policy */
+  u32 sched_policy;
+
+  /* scheduling policy priority */
+  u32 sched_priority;
+
+} vlib_thread_main_t;
+
+extern vlib_thread_main_t vlib_thread_main;
+
+#define VLIB_REGISTER_THREAD(x,...)                     \
+  __VA_ARGS__ vlib_thread_registration_t x;             \
+static void __vlib_add_thread_registration_##x (void)   \
+  __attribute__((__constructor__)) ;                    \
+static void __vlib_add_thread_registration_##x (void)   \
+{                                                       \
+  vlib_thread_main_t * tm = &vlib_thread_main;          \
+  x.next = tm->next;                                    \
+  tm->next = &x;                                        \
+}                                                       \
+__VA_ARGS__ vlib_thread_registration_t x
+
+always_inline u32
+vlib_num_workers ()
+{
+  return vlib_thread_main.n_vlib_mains - 1;
+}
+
+always_inline u32
+vlib_get_worker_cpu_index (u32 worker_index)
+{
+  return worker_index + 1;
+}
+
+always_inline u32
+vlib_get_worker_index (u32 cpu_index)
+{
+  return cpu_index - 1;
+}
+
+always_inline u32
+vlib_get_current_worker_index ()
+{
+  return os_get_cpu_number () - 1;
+}
+
+always_inline vlib_main_t *
+vlib_get_worker_vlib_main (u32 worker_index)
+{
+  vlib_main_t *vm;
+  vlib_thread_main_t *tm = &vlib_thread_main;
+  ASSERT (worker_index < tm->n_vlib_mains - 1);
+  vm = vlib_mains[worker_index + 1];
+  ASSERT (vm);
+  return vm;
+}
+
+static inline void
+vlib_put_frame_queue_elt (vlib_frame_queue_elt_t * hf)
+{
+  CLIB_MEMORY_BARRIER ();
+  hf->valid = 1;
+}
+
+static inline vlib_frame_queue_elt_t *
+vlib_get_frame_queue_elt (u32 frame_queue_index, u32 index)
+{
+  vlib_frame_queue_t *fq;
+  vlib_frame_queue_elt_t *elt;
+  vlib_thread_main_t *tm = &vlib_thread_main;
+  vlib_frame_queue_main_t *fqm =
+    vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+  u64 new_tail;
+
+  fq = fqm->vlib_frame_queues[index];
+  ASSERT (fq);
+
+  new_tail = __sync_add_and_fetch (&fq->tail, 1);
+
+  /* Wait until a ring slot is available */
+  while (new_tail >= fq->head_hint + fq->nelts)
+    vlib_worker_thread_barrier_check ();
+
+  elt = fq->elts + (new_tail & (fq->nelts - 1));
+
+  /* this would be very bad... */
+  while (elt->valid)
+    ;
+
+  elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME;
+  elt->last_n_vectors = elt->n_vectors = 0;
+
+  return elt;
+}
+
+static inline vlib_frame_queue_t *
+is_vlib_frame_queue_congested (u32 frame_queue_index,
+			       u32 index,
+			       u32 queue_hi_thresh,
+			       vlib_frame_queue_t **
+			       handoff_queue_by_worker_index)
+{
+  vlib_frame_queue_t *fq;
+  vlib_thread_main_t *tm = &vlib_thread_main;
+  vlib_frame_queue_main_t *fqm =
+    vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+
+  fq = handoff_queue_by_worker_index[index];
+  if (fq != (vlib_frame_queue_t *) (~0))
+    return fq;
+
+  fq = fqm->vlib_frame_queues[index];
+  ASSERT (fq);
+
+  if (PREDICT_FALSE (fq->tail >= (fq->head_hint + queue_hi_thresh)))
+    {
+      /* a valid entry in the array will indicate the queue has reached
+       * the specified threshold and is congested
+       */
+      handoff_queue_by_worker_index[index] = fq;
+      fq->enqueue_full_events++;
+      return fq;
+    }
+
+  return NULL;
+}
+
+static inline vlib_frame_queue_elt_t *
+vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
+				   u32 vlib_worker_index,
+				   vlib_frame_queue_elt_t **
+				   handoff_queue_elt_by_worker_index)
+{
+  vlib_frame_queue_elt_t *elt;
+
+  if (handoff_queue_elt_by_worker_index[vlib_worker_index])
+    return handoff_queue_elt_by_worker_index[vlib_worker_index];
+
+  elt = vlib_get_frame_queue_elt (frame_queue_index, vlib_worker_index);
+
+  handoff_queue_elt_by_worker_index[vlib_worker_index] = elt;
+
+  return elt;
+}
+
+#endif /* included_vlib_threads_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
-- 
cgit 1.2.3-korg


From 878c609889dcdc58538d40d8b3f662320f88573d Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Wed, 4 Jan 2017 13:19:27 +0100
Subject: vlib: add buffer and thread callbacks

Change-Id: I8e2e8f94a884ab2f9909d0c83ba00edd38cdab77
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/plugins/flowperpkt/flowperpkt.c |   2 +-
 src/vlib.am                         |   1 +
 src/vlib/buffer.c                   | 736 +++---------------------------------
 src/vlib/buffer.h                   |  45 ++-
 src/vlib/buffer_funcs.h             |  82 ++--
 src/vlib/buffer_serialize.c         | 248 ++++++++++++
 src/vlib/main.c                     |   7 +-
 src/vlib/threads.c                  | 112 +++---
 src/vlib/threads.h                  |  17 +-
 src/vlib/threads_cli.c              |  25 --
 src/vlib/unix/physmem.c             |  15 +-
 src/vnet.am                         |   2 +
 src/vnet/devices/dpdk/buffer.c      | 729 +++++++++++++++++++++++++++++++++++
 src/vnet/devices/dpdk/cli.c         |   4 +-
 src/vnet/devices/dpdk/device.c      |   7 +-
 src/vnet/devices/dpdk/dpdk.h        |   3 +
 src/vnet/devices/dpdk/dpdk_priv.h   |   3 +
 src/vnet/devices/dpdk/init.c        |   6 +-
 src/vnet/devices/dpdk/thread.c      |  85 +++++
 src/vnet/sr/sr_replicate.c          |   7 +-
 20 files changed, 1304 insertions(+), 832 deletions(-)
 create mode 100644 src/vlib/buffer_serialize.c
 create mode 100644 src/vnet/devices/dpdk/buffer.c
 create mode 100644 src/vnet/devices/dpdk/thread.c

(limited to 'src/vlib/threads.h')

diff --git a/src/plugins/flowperpkt/flowperpkt.c b/src/plugins/flowperpkt/flowperpkt.c
index fb71d5b0..cc351599 100644
--- a/src/plugins/flowperpkt/flowperpkt.c
+++ b/src/plugins/flowperpkt/flowperpkt.c
@@ -643,7 +643,7 @@ flowperpkt_init (vlib_main_t * vm)
   vec_free (name);
 
   /* Decide how many worker threads we have */
-  num_threads = 1 /* main thread */  + tm->n_eal_threads;
+  num_threads = 1 /* main thread */  + tm->n_threads;
 
   /* Allocate per worker thread vectors */
   vec_validate (fm->ipv4_buffers_per_worker, num_threads - 1);
diff --git a/src/vlib.am b/src/vlib.am
index 0154d841..c21f88c4 100644
--- a/src/vlib.am
+++ b/src/vlib.am
@@ -23,6 +23,7 @@ vlib/config.h:
 
 libvlib_la_SOURCES =				\
   vlib/buffer.c					\
+  vlib/buffer_serialize.c			\
   vlib/cli.c					\
   vlib/cli.h					\
   vlib/config.h					\
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index 4bf6d125..0b0e6054 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -38,50 +38,13 @@
  */
 
 /**
- * @cond (!DPDK)
  * @file
  *
  * Allocate/free network buffers.
  */
 
-#if DPDK > 0
-#include <rte_config.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_tailq.h>
-#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_launch.h>
-#include <rte_atomic.h>
-#include <rte_cycles.h>
-#include <rte_prefetch.h>
-#include <rte_lcore.h>
-#include <rte_per_lcore.h>
-#include <rte_branch_prediction.h>
-#include <rte_interrupts.h>
-#include <rte_pci.h>
-#include <rte_random.h>
-#include <rte_debug.h>
-#include <rte_ether.h>
-#include <rte_ethdev.h>
-#include <rte_ring.h>
-#include <rte_mempool.h>
-#include <rte_mbuf.h>
-#include <rte_version.h>
-#endif
-
 #include <vlib/vlib.h>
 
-#if DPDK > 0
-#pragma weak rte_mem_virt2phy
-#pragma weak rte_eal_has_hugepages
-#pragma weak rte_socket_id
-#pragma weak rte_pktmbuf_pool_create
-#endif
-
 uword
 vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm,
 				       vlib_buffer_t * b_first)
@@ -103,7 +66,6 @@ u8 *
 format_vlib_buffer (u8 * s, va_list * args)
 {
   vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
-#if DPDK > 0
   uword indent = format_get_indent (s);
 
   s = format (s, "current data %d, length %d, free-list %d",
@@ -126,18 +88,6 @@ format_vlib_buffer (u8 * s, va_list * args)
 		  format_white_space, indent, next_buffer, b->current_length);
     }
 
-#else
-
-  s = format (s, "current data %d, length %d, free-list %d",
-	      b->current_data, b->current_length, b->free_list_index);
-
-  if (b->flags & VLIB_BUFFER_IS_TRACED)
-    s = format (s, ", trace 0x%x", b->trace_index);
-
-  if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
-    s = format (s, ", next-buffer 0x%x", b->next_buffer);
-#endif
-
   return s;
 }
 
@@ -153,7 +103,6 @@ format_vlib_buffer_and_data (u8 * s, va_list * args)
   return s;
 }
 
-#if DPDK == 0
 static u8 *
 format_vlib_buffer_known_state (u8 * s, va_list * args)
 {
@@ -181,7 +130,6 @@ format_vlib_buffer_known_state (u8 * s, va_list * args)
 
   return format (s, "%s", t);
 }
-#endif
 
 u8 *
 format_vlib_buffer_contents (u8 * s, va_list * va)
@@ -200,7 +148,6 @@ format_vlib_buffer_contents (u8 * s, va_list * va)
   return s;
 }
 
-#if DPDK == 0
 static u8 *
 vlib_validate_buffer_helper (vlib_main_t * vm,
 			     u32 bi,
@@ -217,11 +164,10 @@ vlib_validate_buffer_helper (vlib_main_t * vm,
 
   if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE)
     return format (0, "current data %d before pre-data", b->current_data);
-#if DPDK == 0
+
   if (b->current_data + b->current_length > fl->n_data_bytes)
     return format (0, "%d-%d beyond end of buffer %d",
 		   b->current_data, b->current_length, fl->n_data_bytes);
-#endif
 
   if (follow_buffer_next && (b->flags & VLIB_BUFFER_NEXT_PRESENT))
     {
@@ -311,14 +257,12 @@ done:
   hash_free (hash);
   return result;
 }
-#endif
 
 vlib_main_t **vlib_mains;
 
-#if DPDK == 0
 /* When dubugging validate that given buffers are either known allocated
    or known free. */
-static void
+static void __attribute__ ((unused))
 vlib_buffer_validate_alloc_free (vlib_main_t * vm,
 				 u32 * buffers,
 				 uword n_buffers,
@@ -359,7 +303,6 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm,
 	 is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED);
     }
 }
-#endif
 
 #define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32))
 
@@ -463,7 +406,6 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm,
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
   vlib_buffer_free_list_t *f;
-#if DPDK > 0
   int i;
 
   ASSERT (os_get_cpu_number () == 0);
@@ -519,47 +461,6 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm,
       wf->unaligned_buffers = 0;
       wf->n_alloc = 0;
     }
-#else
-
-  if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0)
-    {
-      u32 default_free_free_list_index;
-
-      default_free_free_list_index = vlib_buffer_create_free_list_helper (vm,
-									  /* default buffer size */
-									  VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
-									  /* is_public */
-									  1,
-									  /* is_default */
-									  1,
-									  (u8
-									   *)
-									  "default");
-      ASSERT (default_free_free_list_index ==
-	      VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
-
-      if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public)
-	return default_free_free_list_index;
-    }
-
-  pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES);
-
-  memset (f, 0, sizeof (f[0]));
-  f->index = f - bm->buffer_free_list_pool;
-  f->n_data_bytes = vlib_buffer_round_size (n_data_bytes);
-  f->min_n_buffers_each_physmem_alloc = 256;
-  f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name);
-
-  /* Setup free buffer template. */
-  f->buffer_init_template.free_list_index = f->index;
-
-  if (is_public)
-    {
-      uword *p = hash_get (bm->free_list_by_size, f->n_data_bytes);
-      if (!p)
-	hash_set (bm->free_list_by_size, f->n_data_bytes, f->index);
-    }
-#endif
 
   return f->index;
 }
@@ -609,50 +510,30 @@ static void
 del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
 {
   u32 i;
-#if DPDK > 0
-  struct rte_mbuf *mb;
-  vlib_buffer_t *b;
-
-  for (i = 0; i < vec_len (f->unaligned_buffers); i++)
-    {
-      b = vlib_get_buffer (vm, f->unaligned_buffers[i]);
-      mb = rte_mbuf_from_vlib_buffer (b);
-      ASSERT (rte_mbuf_refcnt_read (mb) == 1);
-      rte_pktmbuf_free (mb);
-    }
-  for (i = 0; i < vec_len (f->aligned_buffers); i++)
-    {
-      b = vlib_get_buffer (vm, f->aligned_buffers[i]);
-      mb = rte_mbuf_from_vlib_buffer (b);
-      ASSERT (rte_mbuf_refcnt_read (mb) == 1);
-      rte_pktmbuf_free (mb);
-    }
-  vec_free (f->name);
-#else
 
   for (i = 0; i < vec_len (f->buffer_memory_allocated); i++)
     vm->os_physmem_free (f->buffer_memory_allocated[i]);
   vec_free (f->name);
   vec_free (f->buffer_memory_allocated);
-#endif
   vec_free (f->unaligned_buffers);
   vec_free (f->aligned_buffers);
 }
 
 /* Add buffer free list. */
 void
-vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
+vlib_buffer_delete_free_list_internal (vlib_main_t * vm, u32 free_list_index)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
   vlib_buffer_free_list_t *f;
   u32 merge_index;
-#if DPDK > 0
   int i;
 
   ASSERT (os_get_cpu_number () == 0);
 
   f = vlib_buffer_get_free_list (vm, free_list_index);
 
+  ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) ==
+	  f->n_alloc);
   merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes);
   if (merge_index != ~0 && merge_index != free_list_index)
     {
@@ -674,26 +555,6 @@ vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
       memset (f, 0xab, sizeof (f[0]));
       pool_put (bm->buffer_free_list_pool, f);
     }
-#else
-
-  f = vlib_buffer_get_free_list (vm, free_list_index);
-
-  ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) ==
-	  f->n_alloc);
-  merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes);
-  if (merge_index != ~0 && merge_index != free_list_index)
-    {
-      merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool,
-					   merge_index), f);
-    }
-
-  del_free_list (vm, f);
-
-  /* Poison it. */
-  memset (f, 0xab, sizeof (f[0]));
-
-  pool_put (bm->buffer_free_list_pool, f);
-#endif
 }
 
 /* Make sure free list has at least given number of free buffers. */
@@ -701,63 +562,6 @@ static uword
 fill_free_list (vlib_main_t * vm,
 		vlib_buffer_free_list_t * fl, uword min_free_buffers)
 {
-#if DPDK > 0
-  vlib_buffer_t *b;
-  int n, i;
-  u32 bi;
-  u32 n_remaining = 0, n_alloc = 0;
-  unsigned socket_id = rte_socket_id ? rte_socket_id () : 0;
-  struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id];
-  struct rte_mbuf *mb;
-
-  /* Too early? */
-  if (PREDICT_FALSE (rmp == 0))
-    return 0;
-
-  trim_aligned (fl);
-
-  /* Already have enough free buffers on free list? */
-  n = min_free_buffers - vec_len (fl->aligned_buffers);
-  if (n <= 0)
-    return min_free_buffers;
-
-  /* Always allocate round number of buffers. */
-  n = round_pow2 (n, BUFFERS_PER_COPY);
-
-  /* Always allocate new buffers in reasonably large sized chunks. */
-  n = clib_max (n, fl->min_n_buffers_each_physmem_alloc);
-
-  vec_validate (vm->mbuf_alloc_list, n - 1);
-
-  if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0)
-    return 0;
-
-  _vec_len (vm->mbuf_alloc_list) = n;
-
-  for (i = 0; i < n; i++)
-    {
-      mb = vm->mbuf_alloc_list[i];
-
-      ASSERT (rte_mbuf_refcnt_read (mb) == 0);
-      rte_mbuf_refcnt_set (mb, 1);
-
-      b = vlib_buffer_from_rte_mbuf (mb);
-      bi = vlib_get_buffer_index (vm, b);
-
-      vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t));
-      n_alloc++;
-      n_remaining--;
-
-      vlib_buffer_init_for_free_list (b, fl);
-
-      if (fl->buffer_init_function)
-	fl->buffer_init_function (vm, fl, &bi, 1);
-    }
-
-  fl->n_alloc += n;
-
-  return n;
-#else
   vlib_buffer_t *buffers, *b;
   int n, n_bytes, i;
   u32 *bi;
@@ -824,7 +628,6 @@ fill_free_list (vlib_main_t * vm,
 	fl->buffer_init_function (vm, fl, bi, n_this_chunk);
     }
   return n_alloc;
-#endif
 }
 
 always_inline uword
@@ -833,6 +636,7 @@ copy_alignment (u32 * x)
   return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY;
 }
 
+
 static u32
 alloc_from_free_list (vlib_main_t * vm,
 		      vlib_buffer_free_list_t * free_list,
@@ -842,10 +646,6 @@ alloc_from_free_list (vlib_main_t * vm,
   uword u_len, n_left;
   uword n_unaligned_start, n_unaligned_end, n_filled;
 
-#if DPDK == 0
-  ASSERT (os_get_cpu_number () == 0);
-
-#endif
   n_left = n_alloc_buffers;
   dst = alloc_buffers;
   n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst))
@@ -945,25 +745,21 @@ alloc_from_free_list (vlib_main_t * vm,
   else
     _vec_len (free_list->unaligned_buffers) = u_len;
 
-#if DPDK == 0
   /* Verify that buffers are known free. */
   vlib_buffer_validate_alloc_free (vm, alloc_buffers,
 				   n_alloc_buffers, VLIB_BUFFER_KNOWN_FREE);
-#endif
 
   return n_alloc_buffers;
 }
 
+
 /* Allocate a given number of buffers into given array.
    Returns number actually allocated which will be either zero or
    number requested. */
-u32
-vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+static u32
+vlib_buffer_alloc_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
-#if DPDK == 0
-  ASSERT (os_get_cpu_number () == 0);
-#endif
 
   return alloc_from_free_list
     (vm,
@@ -972,10 +768,10 @@ vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
      buffers, n_buffers);
 }
 
-u32
-vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
-				  u32 * buffers,
-				  u32 n_buffers, u32 free_list_index)
+static u32
+vlib_buffer_alloc_from_free_list_internal (vlib_main_t * vm,
+					   u32 * buffers,
+					   u32 n_buffers, u32 free_list_index)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
   vlib_buffer_free_list_t *f;
@@ -1016,81 +812,10 @@ vlib_set_buffer_free_callback (vlib_main_t * vm, void *fp)
   return rv;
 }
 
-#if DPDK == 0
-void vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) __attribute__ ((weak));
-void
-vnet_buffer_free_dpdk_mb (vlib_buffer_t * b)
-{
-}
-
-#endif
 static_always_inline void
 vlib_buffer_free_inline (vlib_main_t * vm,
 			 u32 * buffers, u32 n_buffers, u32 follow_buffer_next)
 {
-#if DPDK > 0
-  vlib_buffer_main_t *bm = vm->buffer_main;
-  vlib_buffer_free_list_t *fl;
-  u32 fi;
-  int i;
-  u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
-	     u32 follow_buffer_next);
-
-  cb = bm->buffer_free_callback;
-
-  if (PREDICT_FALSE (cb != 0))
-    n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next);
-
-  if (!n_buffers)
-    return;
-
-  for (i = 0; i < n_buffers; i++)
-    {
-      vlib_buffer_t *b;
-      struct rte_mbuf *mb;
-
-      b = vlib_get_buffer (vm, buffers[i]);
-
-      fl = buffer_get_free_list (vm, b, &fi);
-
-      /* The only current use of this callback: multicast recycle */
-      if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0))
-	{
-	  int j;
-
-	  add_buffer_to_free_list
-	    (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0);
-
-	  for (j = 0; j < vec_len (bm->announce_list); j++)
-	    {
-	      if (fl == bm->announce_list[j])
-		goto already_announced;
-	    }
-	  vec_add1 (bm->announce_list, fl);
-	already_announced:
-	  ;
-	}
-      else
-	{
-	  if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0))
-	    {
-	      mb = rte_mbuf_from_vlib_buffer (b);
-	      ASSERT (rte_mbuf_refcnt_read (mb) == 1);
-	      rte_pktmbuf_free (mb);
-	    }
-	}
-    }
-  if (vec_len (bm->announce_list))
-    {
-      vlib_buffer_free_list_t *fl;
-      for (i = 0; i < vec_len (bm->announce_list); i++)
-	{
-	  fl = bm->announce_list[i];
-	  fl->buffers_added_to_freelist_function (vm, fl);
-	}
-      _vec_len (bm->announce_list) = 0;
-    }
-#else
   vlib_buffer_main_t *bm = vm->buffer_main;
   vlib_buffer_free_list_t *fl;
   static u32 *next_to_free[2];	/* smp bad */
@@ -1315,26 +1040,25 @@ again:
 	}
       _vec_len (announce_list) = 0;
     }
-#endif
 }
 
-void
-vlib_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+static void
+vlib_buffer_free_internal (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
 {
   vlib_buffer_free_inline (vm, buffers, n_buffers,	/* follow_buffer_next */
 			   1);
 }
 
-void
-vlib_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+static void
+vlib_buffer_free_no_next_internal (vlib_main_t * vm, u32 * buffers,
+				   u32 n_buffers)
 {
   vlib_buffer_free_inline (vm, buffers, n_buffers,	/* follow_buffer_next */
 			   0);
 }
 
-#if DPDK == 0
 /* Copy template packet data into buffers as they are allocated. */
-static void
+static void __attribute__ ((unused))
 vlib_packet_template_buffer_init (vlib_main_t * vm,
 				  vlib_buffer_free_list_t * fl,
 				  u32 * buffers, u32 n_buffers)
@@ -1352,7 +1076,6 @@ vlib_packet_template_buffer_init (vlib_main_t * vm,
 		   b->current_length);
     }
 }
-#endif
 
 void
 vlib_packet_template_init (vlib_main_t * vm,
@@ -1362,28 +1085,22 @@ vlib_packet_template_init (vlib_main_t * vm,
 			   uword min_n_buffers_each_physmem_alloc,
 			   char *fmt, ...)
 {
-#if DPDK > 0
+  vlib_buffer_main_t *bm = vm->buffer_main;
   va_list va;
   __attribute__ ((unused)) u8 *name;
+  vlib_buffer_free_list_t *fl;
 
   va_start (va, fmt);
   name = va_format (0, fmt, &va);
   va_end (va);
 
-  vlib_worker_thread_barrier_sync (vm);
-  memset (t, 0, sizeof (t[0]));
-
-  vec_add (t->packet_data, packet_data, n_packet_data_bytes);
+  if (bm->cb.vlib_packet_template_init_cb)
+    bm->cb.vlib_packet_template_init_cb (vm, (void *) t, packet_data,
+					 n_packet_data_bytes,
+					 min_n_buffers_each_physmem_alloc,
+					 name);
 
-  vlib_worker_thread_barrier_release (vm);
-#else
-  vlib_buffer_free_list_t *fl;
-  va_list va;
-  u8 *name;
-
-  va_start (va, fmt);
-  name = va_format (0, fmt, &va);
-  va_end (va);
+  vlib_worker_thread_barrier_sync (vm);
 
   memset (t, 0, sizeof (t[0]));
 
@@ -1406,7 +1123,7 @@ vlib_packet_template_init (vlib_main_t * vm,
   fl->buffer_init_template.current_data = 0;
   fl->buffer_init_template.current_length = n_packet_data_bytes;
   fl->buffer_init_template.flags = 0;
-#endif
+  vlib_worker_thread_barrier_release (vm);
 }
 
 void *
@@ -1429,7 +1146,6 @@ vlib_packet_template_get_packet (vlib_main_t * vm,
   return b->data;
 }
 
-#if DPDK == 0
 void
 vlib_packet_template_get_packet_helper (vlib_main_t * vm,
 					vlib_packet_template_t * t)
@@ -1447,7 +1163,6 @@ vlib_packet_template_get_packet_helper (vlib_main_t * vm,
   _vec_len (t->free_buffers) = n_alloc;
 }
 
-#endif
 /* Append given data to end of buffer, possibly allocating new buffers. */
 u32
 vlib_buffer_add_data (vlib_main_t * vm,
@@ -1541,328 +1256,11 @@ vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
   return copied;
 }
 
-#if DPDK > 0
-clib_error_t *
-vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
-			 unsigned socket_id)
-{
-  vlib_buffer_main_t *bm = vm->buffer_main;
-  vlib_physmem_main_t *vpm = &vm->physmem_main;
-  struct rte_mempool *rmp;
-  int i;
-
-  if (!rte_pktmbuf_pool_create)
-    return clib_error_return (0, "not linked with DPDK");
-
-  vec_validate_aligned (bm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES);
-
-  /* pool already exists, nothing to do */
-  if (bm->pktmbuf_pools[socket_id])
-    return 0;
-
-  u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0);
-
-  rmp = rte_pktmbuf_pool_create ((char *) pool_name,	/* pool name */
-				 num_mbufs,	/* number of mbufs */
-				 512,	/* cache size */
-				 VLIB_BUFFER_HDR_SIZE,	/* priv size */
-				 VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE,	/* dataroom size */
-				 socket_id);	/* cpu socket */
-
-  if (rmp)
-    {
-      {
-	uword this_pool_end;
-	uword this_pool_start;
-	uword this_pool_size;
-	uword save_vpm_start, save_vpm_end, save_vpm_size;
-	struct rte_mempool_memhdr *memhdr;
-
-	this_pool_start = ~0ULL;
-	this_pool_end = 0LL;
-
-	STAILQ_FOREACH (memhdr, &rmp->mem_list, next)
-	{
-	  if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end)
-	    this_pool_end = (uword) (memhdr->addr + memhdr->len);
-	  if (((uword) memhdr->addr) < this_pool_start)
-	    this_pool_start = (uword) (memhdr->addr);
-	}
-	ASSERT (this_pool_start < ~0ULL && this_pool_end > 0);
-	this_pool_size = this_pool_end - this_pool_start;
-
-	if (CLIB_DEBUG > 1)
-	  {
-	    clib_warning ("%s: pool start %llx pool end %llx pool size %lld",
-			  pool_name, this_pool_start, this_pool_end,
-			  this_pool_size);
-	    clib_warning
-	      ("before: virtual.start %llx virtual.end %llx virtual.size %lld",
-	       vpm->virtual.start, vpm->virtual.end, vpm->virtual.size);
-	  }
-
-	save_vpm_start = vpm->virtual.start;
-	save_vpm_end = vpm->virtual.end;
-	save_vpm_size = vpm->virtual.size;
-
-	if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0)
-	  vpm->virtual.start = this_pool_start;
-	if (this_pool_end > vpm->virtual.end)
-	  vpm->virtual.end = this_pool_end;
-
-	vpm->virtual.size = vpm->virtual.end - vpm->virtual.start;
-
-	if (CLIB_DEBUG > 1)
-	  {
-	    clib_warning
-	      ("after: virtual.start %llx virtual.end %llx virtual.size %lld",
-	       vpm->virtual.start, vpm->virtual.end, vpm->virtual.size);
-	  }
-
-	/* check if fits into buffer index range */
-	if ((u64) vpm->virtual.size >
-	    ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES)))
-	  {
-	    clib_warning ("physmem: virtual size out of range!");
-	    vpm->virtual.start = save_vpm_start;
-	    vpm->virtual.end = save_vpm_end;
-	    vpm->virtual.size = save_vpm_size;
-	    rmp = 0;
-	  }
-      }
-      if (rmp)
-	{
-	  bm->pktmbuf_pools[socket_id] = rmp;
-	  vec_free (pool_name);
-	  return 0;
-	}
-    }
-
-  vec_free (pool_name);
-
-  /* no usable pool for this socket, try to use pool from another one */
-  for (i = 0; i < vec_len (bm->pktmbuf_pools); i++)
-    {
-      if (bm->pktmbuf_pools[i])
-	{
-	  clib_warning
-	    ("WARNING: Failed to allocate mempool for CPU socket %u. "
-	     "Threads running on socket %u will use socket %u mempool.",
-	     socket_id, socket_id, i);
-	  bm->pktmbuf_pools[socket_id] = bm->pktmbuf_pools[i];
-	  return 0;
-	}
-    }
-
-  return clib_error_return (0, "failed to allocate mempool on socket %u",
-			    socket_id);
-}
-#endif
-
-static void
-vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s)
-{
-  vlib_main_t *vm;
-  vlib_serialize_buffer_main_t *sm;
-  uword n, n_bytes_to_write;
-  vlib_buffer_t *last;
-
-  n_bytes_to_write = s->current_buffer_index;
-  sm =
-    uword_to_pointer (s->data_function_opaque,
-		      vlib_serialize_buffer_main_t *);
-  vm = sm->vlib_main;
-
-  ASSERT (sm->tx.max_n_data_bytes_per_chain > 0);
-  if (serialize_stream_is_end_of_stream (s)
-      || sm->tx.n_total_data_bytes + n_bytes_to_write >
-      sm->tx.max_n_data_bytes_per_chain)
-    {
-      vlib_process_t *p = vlib_get_current_process (vm);
-
-      last = vlib_get_buffer (vm, sm->last_buffer);
-      last->current_length = n_bytes_to_write;
-
-      vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index,
-				  sm->first_buffer);
-
-      sm->first_buffer = sm->last_buffer = ~0;
-      sm->tx.n_total_data_bytes = 0;
-    }
-
-  else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0)
-    {
-      ASSERT (sm->first_buffer == ~0);
-      ASSERT (sm->last_buffer == ~0);
-      n =
-	vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1,
-					  sm->tx.free_list_index);
-      if (n != 1)
-	serialize_error (m,
-			 clib_error_create
-			 ("vlib_buffer_alloc_from_free_list fails"));
-      sm->last_buffer = sm->first_buffer;
-      s->n_buffer_bytes =
-	vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index);
-    }
-
-  if (n_bytes_to_write > 0)
-    {
-      vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer);
-      n =
-	vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1,
-					  sm->tx.free_list_index);
-      if (n != 1)
-	serialize_error (m,
-			 clib_error_create
-			 ("vlib_buffer_alloc_from_free_list fails"));
-      sm->tx.n_total_data_bytes += n_bytes_to_write;
-      prev->current_length = n_bytes_to_write;
-      prev->next_buffer = sm->last_buffer;
-      prev->flags |= VLIB_BUFFER_NEXT_PRESENT;
-    }
-
-  if (sm->last_buffer != ~0)
-    {
-      last = vlib_get_buffer (vm, sm->last_buffer);
-      s->buffer = vlib_buffer_get_current (last);
-      s->current_buffer_index = 0;
-      ASSERT (last->current_data == s->current_buffer_index);
-    }
-}
-
-static void
-vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s)
-{
-  vlib_main_t *vm;
-  vlib_serialize_buffer_main_t *sm;
-  vlib_buffer_t *last;
-
-  sm =
-    uword_to_pointer (s->data_function_opaque,
-		      vlib_serialize_buffer_main_t *);
-  vm = sm->vlib_main;
-
-  if (serialize_stream_is_end_of_stream (s))
-    return;
-
-  if (sm->last_buffer != ~0)
-    {
-      last = vlib_get_buffer (vm, sm->last_buffer);
-
-      if (last->flags & VLIB_BUFFER_NEXT_PRESENT)
-	sm->last_buffer = last->next_buffer;
-      else
-	{
-	  vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1);
-	  sm->first_buffer = sm->last_buffer = ~0;
-	}
-    }
-
-  if (sm->last_buffer == ~0)
-    {
-      while (clib_fifo_elts (sm->rx.buffer_fifo) == 0)
-	{
-	  sm->rx.ready_one_time_event =
-	    vlib_process_create_one_time_event (vm, vlib_current_process (vm),
-						~0);
-	  vlib_process_wait_for_one_time_event (vm, /* no event data */ 0,
-						sm->rx.ready_one_time_event);
-	}
-
-      clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer);
-      sm->last_buffer = sm->first_buffer;
-    }
-
-  ASSERT (sm->last_buffer != ~0);
-
-  last = vlib_get_buffer (vm, sm->last_buffer);
-  s->current_buffer_index = 0;
-  s->buffer = vlib_buffer_get_current (last);
-  s->n_buffer_bytes = last->current_length;
-}
-
-static void
-serialize_open_vlib_helper (serialize_main_t * m,
-			    vlib_main_t * vm,
-			    vlib_serialize_buffer_main_t * sm, uword is_read)
-{
-  /* Initialize serialize main but save overflow buffer for re-use between calls. */
-  {
-    u8 *save = m->stream.overflow_buffer;
-    memset (m, 0, sizeof (m[0]));
-    m->stream.overflow_buffer = save;
-    if (save)
-      _vec_len (save) = 0;
-  }
-
-  sm->first_buffer = sm->last_buffer = ~0;
-  if (is_read)
-    clib_fifo_reset (sm->rx.buffer_fifo);
-  else
-    sm->tx.n_total_data_bytes = 0;
-  sm->vlib_main = vm;
-  m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx;
-  m->stream.data_function_opaque = pointer_to_uword (sm);
-}
-
-void
-serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
-			    vlib_serialize_buffer_main_t * sm)
-{
-  serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0);
-}
-
-void
-unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
-			      vlib_serialize_buffer_main_t * sm)
-{
-  serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1);
-}
-
-u32
-serialize_close_vlib_buffer (serialize_main_t * m)
-{
-  vlib_serialize_buffer_main_t *sm
-    = uword_to_pointer (m->stream.data_function_opaque,
-			vlib_serialize_buffer_main_t *);
-  vlib_buffer_t *last;
-  serialize_stream_t *s = &m->stream;
-
-  last = vlib_get_buffer (sm->vlib_main, sm->last_buffer);
-  last->current_length = s->current_buffer_index;
-
-  if (vec_len (s->overflow_buffer) > 0)
-    {
-      sm->last_buffer
-	= vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index,
-				sm->last_buffer == ~0 ? 0 : sm->last_buffer,
-				s->overflow_buffer,
-				vec_len (s->overflow_buffer));
-      _vec_len (s->overflow_buffer) = 0;
-    }
-
-  return sm->first_buffer;
-}
-
-void
-unserialize_close_vlib_buffer (serialize_main_t * m)
-{
-  vlib_serialize_buffer_main_t *sm
-    = uword_to_pointer (m->stream.data_function_opaque,
-			vlib_serialize_buffer_main_t *);
-  if (sm->first_buffer != ~0)
-    vlib_buffer_free_one (sm->vlib_main, sm->first_buffer);
-  clib_fifo_reset (sm->rx.buffer_fifo);
-  if (m->stream.overflow_buffer)
-    _vec_len (m->stream.overflow_buffer) = 0;
-}
 
 static u8 *
 format_vlib_buffer_free_list (u8 * s, va_list * va)
 {
   vlib_buffer_free_list_t *f = va_arg (*va, vlib_buffer_free_list_t *);
-#if DPDK > 0
   u32 threadnum = va_arg (*va, u32);
   uword bytes_alloc, bytes_free, n_free, size;
 
@@ -1877,21 +1275,6 @@ format_vlib_buffer_free_list (u8 * s, va_list * va)
   bytes_free = size * n_free;
 
   s = format (s, "%7d%30s%12d%12d%=12U%=12U%=12d%=12d", threadnum,
-#else
-  uword bytes_alloc, bytes_free, n_free, size;
-
-  if (!f)
-    return format (s, "%=30s%=12s%=12s%=12s%=12s%=12s%=12s",
-		   "Name", "Index", "Size", "Alloc", "Free", "#Alloc",
-		   "#Free");
-
-  size = sizeof (vlib_buffer_t) + f->n_data_bytes;
-  n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers);
-  bytes_alloc = size * f->n_alloc;
-  bytes_free = size * n_free;
-
-  s = format (s, "%30s%12d%12d%=12U%=12U%=12d%=12d",
-#endif
 	      f->name, f->index, f->n_data_bytes,
 	      format_memory_size, bytes_alloc,
 	      format_memory_size, bytes_free, f->n_alloc, n_free);
@@ -1903,7 +1286,6 @@ static clib_error_t *
 show_buffers (vlib_main_t * vm,
 	      unformat_input_t * input, vlib_cli_command_t * cmd)
 {
-#if DPDK > 0
   vlib_buffer_main_t *bm;
   vlib_buffer_free_list_t *f;
   vlib_main_t *curr_vm;
@@ -1926,18 +1308,6 @@ show_buffers (vlib_main_t * vm,
     }
   while (vm_index < vec_len (vlib_mains));
 
-#else
-  vlib_buffer_main_t *bm = vm->buffer_main;
-  vlib_buffer_free_list_t *f;
-
-  vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0);
-  /* *INDENT-OFF* */
-  pool_foreach (f, bm->buffer_free_list_pool, ({
-    vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f);
-  }));
-/* *INDENT-ON* */
-
-#endif
   return 0;
 }
 
@@ -1949,34 +1319,38 @@ VLIB_CLI_COMMAND (show_buffers_command, static) = {
 };
 /* *INDENT-ON* */
 
-#if DPDK > 0
-#if CLIB_DEBUG > 0
-
-u32 *vlib_buffer_state_validation_lock;
-uword *vlib_buffer_state_validation_hash;
-void *vlib_buffer_state_heap;
-
-static clib_error_t *
-buffer_state_validation_init (vlib_main_t * vm)
+void
+vlib_buffer_cb_init (struct vlib_main_t *vm)
 {
-  void *oldheap;
-
-  vlib_buffer_state_heap = mheap_alloc (0, 10 << 20);
-
-  oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+  vlib_buffer_main_t *bm = vm->buffer_main;
+  bm->cb.vlib_buffer_alloc_cb = &vlib_buffer_alloc_internal;
+  bm->cb.vlib_buffer_alloc_from_free_list_cb =
+    &vlib_buffer_alloc_from_free_list_internal;
+  bm->cb.vlib_buffer_free_cb = &vlib_buffer_free_internal;
+  bm->cb.vlib_buffer_free_no_next_cb = &vlib_buffer_free_no_next_internal;
+  bm->cb.vlib_buffer_delete_free_list_cb =
+    &vlib_buffer_delete_free_list_internal;
+  bm->extern_buffer_mgmt = 0;
+}
 
-  vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword));
-  vec_validate_aligned (vlib_buffer_state_validation_lock, 0,
-			CLIB_CACHE_LINE_BYTES);
-  clib_mem_set_heap (oldheap);
+int
+vlib_buffer_cb_register (struct vlib_main_t *vm, vlib_buffer_callbacks_t * cb)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+  if (bm->extern_buffer_mgmt)
+    return -1;
+
+#define _(x) bm->cb.x = cb->x
+  _(vlib_buffer_alloc_cb);
+  _(vlib_buffer_alloc_from_free_list_cb);
+  _(vlib_buffer_free_cb);
+  _(vlib_buffer_free_no_next_cb);
+  _(vlib_buffer_delete_free_list_cb);
+#undef _
+  bm->extern_buffer_mgmt = 1;
   return 0;
 }
 
-VLIB_INIT_FUNCTION (buffer_state_validation_init);
-#endif
-#endif
-
-
 /** @endcond */
 /*
  * fd.io coding-style-patch-verification: ON
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
index 5f1e62f0..d270c08a 100644
--- a/src/vlib/buffer.h
+++ b/src/vlib/buffer.h
@@ -46,15 +46,9 @@
 #include <vppinfra/vector.h>
 #include <vlib/error.h>		/* for vlib_error_t */
 
-#if DPDK > 0
-#include <rte_config.h>
-#define VLIB_BUFFER_DATA_SIZE		(2048)
-#define VLIB_BUFFER_PRE_DATA_SIZE	RTE_PKTMBUF_HEADROOM
-#else
 #include <vlib/config.h>	/* for __PRE_DATA_SIZE */
-#define VLIB_BUFFER_DATA_SIZE		(512)
+#define VLIB_BUFFER_DATA_SIZE		(2048)
 #define VLIB_BUFFER_PRE_DATA_SIZE	__PRE_DATA_SIZE
-#endif
 
 #if defined (CLIB_HAVE_VEC128) || defined (__aarch64__)
 typedef u8x16 vlib_copy_unit_t;
@@ -296,6 +290,27 @@ typedef struct vlib_buffer_free_list_t
   uword buffer_init_function_opaque;
 } __attribute__ ((aligned (16))) vlib_buffer_free_list_t;
 
+typedef struct
+{
+  u32 (*vlib_buffer_alloc_cb) (struct vlib_main_t * vm, u32 * buffers,
+			       u32 n_buffers);
+  u32 (*vlib_buffer_alloc_from_free_list_cb) (struct vlib_main_t * vm,
+					      u32 * buffers, u32 n_buffers,
+					      u32 free_list_index);
+  void (*vlib_buffer_free_cb) (struct vlib_main_t * vm, u32 * buffers,
+			       u32 n_buffers);
+  void (*vlib_buffer_free_no_next_cb) (struct vlib_main_t * vm, u32 * buffers,
+				       u32 n_buffers);
+  void (*vlib_packet_template_init_cb) (struct vlib_main_t * vm, void *t,
+					void *packet_data,
+					uword n_packet_data_bytes,
+					uword
+					min_n_buffers_each_physmem_alloc,
+					u8 * name);
+  void (*vlib_buffer_delete_free_list_cb) (struct vlib_main_t * vm,
+					   u32 free_list_index);
+} vlib_buffer_callbacks_t;
+
 typedef struct
 {
   /* Buffer free callback, for subversive activities */
@@ -323,12 +338,15 @@ typedef struct
   /* List of free-lists needing Blue Light Special announcements */
   vlib_buffer_free_list_t **announce_list;
 
-  /*  Vector of rte_mempools per socket */
-#if DPDK == 1
-  struct rte_mempool **pktmbuf_pools;
-#endif
+  /* Callbacks */
+  vlib_buffer_callbacks_t cb;
+  int extern_buffer_mgmt;
 } vlib_buffer_main_t;
 
+void vlib_buffer_cb_init (struct vlib_main_t *vm);
+int vlib_buffer_cb_register (struct vlib_main_t *vm,
+			     vlib_buffer_callbacks_t * cb);
+
 typedef struct
 {
   struct vlib_main_t *vlib_main;
@@ -385,11 +403,6 @@ serialize_vlib_buffer_n_bytes (serialize_main_t * m)
     vec_len (s->overflow_buffer);
 }
 
-#if DPDK > 0
-#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1)
-#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1))
-#endif
-
 /*
  */
 
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 75716eca..15d93c16 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -195,8 +195,6 @@ do {                                                             \
 } while (0)
 #endif
 
-#if DPDK == 0
-
 typedef enum
 {
   /* Index is unknown. */
@@ -232,8 +230,6 @@ vlib_buffer_set_known_state (vlib_main_t * vm,
 u8 *vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index,
 			  uword follow_chain);
 
-#endif /* DPDK == 0 */
-
 clib_error_t *vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
 				       unsigned socket_id);
 
@@ -245,7 +241,15 @@ clib_error_t *vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
     @return - (u32) number of buffers actually allocated, may be
     less than the number requested or zero
 */
-u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers);
+always_inline u32
+vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+
+  ASSERT (bm->cb.vlib_buffer_alloc_cb);
+
+  return bm->cb.vlib_buffer_alloc_cb (vm, buffers, n_buffers);
+}
 
 always_inline u32
 vlib_buffer_round_size (u32 size)
@@ -261,9 +265,18 @@ vlib_buffer_round_size (u32 size)
     @return - (u32) number of buffers actually allocated, may be
     less than the number requested or zero
 */
-u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
-				      u32 * buffers,
-				      u32 n_buffers, u32 free_list_index);
+always_inline u32
+vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
+				  u32 * buffers,
+				  u32 n_buffers, u32 free_list_index)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+
+  ASSERT (bm->cb.vlib_buffer_alloc_from_free_list_cb);
+
+  return bm->cb.vlib_buffer_alloc_from_free_list_cb (vm, buffers, n_buffers,
+						     free_list_index);
+}
 
 /** \brief Free buffers
     Frees the entire buffer chain for each buffer
@@ -273,11 +286,19 @@ u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
     @param n_buffers - (u32) number of buffers to free
 
 */
-void vlib_buffer_free (vlib_main_t * vm,
-		       /* pointer to first buffer */
-		       u32 * buffers,
-		       /* number of buffers to free */
-		       u32 n_buffers);
+always_inline void
+vlib_buffer_free (vlib_main_t * vm,
+		  /* pointer to first buffer */
+		  u32 * buffers,
+		  /* number of buffers to free */
+		  u32 n_buffers)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+
+  ASSERT (bm->cb.vlib_buffer_free_cb);
+
+  return bm->cb.vlib_buffer_free_cb (vm, buffers, n_buffers);
+}
 
 /** \brief Free buffers, does not free the buffer chain for each buffer
 
@@ -286,11 +307,19 @@ void vlib_buffer_free (vlib_main_t * vm,
     @param n_buffers - (u32) number of buffers to free
 
 */
-void vlib_buffer_free_no_next (vlib_main_t * vm,
-			       /* pointer to first buffer */
-			       u32 * buffers,
-			       /* number of buffers to free */
-			       u32 n_buffers);
+always_inline void
+vlib_buffer_free_no_next (vlib_main_t * vm,
+			  /* pointer to first buffer */
+			  u32 * buffers,
+			  /* number of buffers to free */
+			  u32 n_buffers)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+
+  ASSERT (bm->cb.vlib_buffer_free_no_next_cb);
+
+  return bm->cb.vlib_buffer_free_no_next_cb (vm, buffers, n_buffers);
+}
 
 /** \brief Free one buffer
     Shorthand to free a single buffer chain.
@@ -307,7 +336,15 @@ vlib_buffer_free_one (vlib_main_t * vm, u32 buffer_index)
 /* Add/delete buffer free lists. */
 u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
 				  char *fmt, ...);
-void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index);
+always_inline void
+vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+
+  ASSERT (bm->cb.vlib_buffer_delete_free_list_cb);
+
+  bm->cb.vlib_buffer_delete_free_list_cb (vm, free_list_index);
+}
 
 /* Find already existing public free list with given size or create one. */
 u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
@@ -453,11 +490,6 @@ vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b)
   return fd;
 }
 
-/*
- * vlib_buffer_chain_* functions provide a way to create long buffers.
- * When DPDK is enabled, the 'hidden' DPDK header is taken care of transparently.
- */
-
 /* Initializes the buffer as an empty packet with no chained buffers. */
 always_inline void
 vlib_buffer_chain_init (vlib_buffer_t * first)
@@ -537,8 +569,6 @@ typedef struct
   /* Vector of packet data. */
   u8 *packet_data;
 
-  /* Note: the next three fields are unused if DPDK == 1 */
-
   /* Number of buffers to allocate in each call to physmem
      allocator. */
   u32 min_n_buffers_each_physmem_alloc;
diff --git a/src/vlib/buffer_serialize.c b/src/vlib/buffer_serialize.c
new file mode 100644
index 00000000..96a5f0a0
--- /dev/null
+++ b/src/vlib/buffer_serialize.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.c: allocate/free network buffers.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+static void
+vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+  vlib_main_t *vm;
+  vlib_serialize_buffer_main_t *sm;
+  uword n, n_bytes_to_write;
+  vlib_buffer_t *last;
+
+  n_bytes_to_write = s->current_buffer_index;
+  sm =
+    uword_to_pointer (s->data_function_opaque,
+		      vlib_serialize_buffer_main_t *);
+  vm = sm->vlib_main;
+
+  ASSERT (sm->tx.max_n_data_bytes_per_chain > 0);
+  if (serialize_stream_is_end_of_stream (s)
+      || sm->tx.n_total_data_bytes + n_bytes_to_write >
+      sm->tx.max_n_data_bytes_per_chain)
+    {
+      vlib_process_t *p = vlib_get_current_process (vm);
+
+      last = vlib_get_buffer (vm, sm->last_buffer);
+      last->current_length = n_bytes_to_write;
+
+      vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index,
+				  sm->first_buffer);
+
+      sm->first_buffer = sm->last_buffer = ~0;
+      sm->tx.n_total_data_bytes = 0;
+    }
+
+  else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0)
+    {
+      ASSERT (sm->first_buffer == ~0);
+      ASSERT (sm->last_buffer == ~0);
+      n =
+	vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1,
+					  sm->tx.free_list_index);
+      if (n != 1)
+	serialize_error (m,
+			 clib_error_create
+			 ("vlib_buffer_alloc_from_free_list fails"));
+      sm->last_buffer = sm->first_buffer;
+      s->n_buffer_bytes =
+	vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index);
+    }
+
+  if (n_bytes_to_write > 0)
+    {
+      vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer);
+      n =
+	vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1,
+					  sm->tx.free_list_index);
+      if (n != 1)
+	serialize_error (m,
+			 clib_error_create
+			 ("vlib_buffer_alloc_from_free_list fails"));
+      sm->tx.n_total_data_bytes += n_bytes_to_write;
+      prev->current_length = n_bytes_to_write;
+      prev->next_buffer = sm->last_buffer;
+      prev->flags |= VLIB_BUFFER_NEXT_PRESENT;
+    }
+
+  if (sm->last_buffer != ~0)
+    {
+      last = vlib_get_buffer (vm, sm->last_buffer);
+      s->buffer = vlib_buffer_get_current (last);
+      s->current_buffer_index = 0;
+      ASSERT (last->current_data == s->current_buffer_index);
+    }
+}
+
+static void
+vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+  vlib_main_t *vm;
+  vlib_serialize_buffer_main_t *sm;
+  vlib_buffer_t *last;
+
+  sm =
+    uword_to_pointer (s->data_function_opaque,
+		      vlib_serialize_buffer_main_t *);
+  vm = sm->vlib_main;
+
+  if (serialize_stream_is_end_of_stream (s))
+    return;
+
+  if (sm->last_buffer != ~0)
+    {
+      last = vlib_get_buffer (vm, sm->last_buffer);
+
+      if (last->flags & VLIB_BUFFER_NEXT_PRESENT)
+	sm->last_buffer = last->next_buffer;
+      else
+	{
+	  vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1);
+	  sm->first_buffer = sm->last_buffer = ~0;
+	}
+    }
+
+  if (sm->last_buffer == ~0)
+    {
+      while (clib_fifo_elts (sm->rx.buffer_fifo) == 0)
+	{
+	  sm->rx.ready_one_time_event =
+	    vlib_process_create_one_time_event (vm, vlib_current_process (vm),
+						~0);
+	  vlib_process_wait_for_one_time_event (vm, /* no event data */ 0,
+						sm->rx.ready_one_time_event);
+	}
+
+      clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer);
+      sm->last_buffer = sm->first_buffer;
+    }
+
+  ASSERT (sm->last_buffer != ~0);
+
+  last = vlib_get_buffer (vm, sm->last_buffer);
+  s->current_buffer_index = 0;
+  s->buffer = vlib_buffer_get_current (last);
+  s->n_buffer_bytes = last->current_length;
+}
+
+static void
+serialize_open_vlib_helper (serialize_main_t * m,
+			    vlib_main_t * vm,
+			    vlib_serialize_buffer_main_t * sm, uword is_read)
+{
+  /* Initialize serialize main but save overflow buffer for re-use between calls. */
+  {
+    u8 *save = m->stream.overflow_buffer;
+    memset (m, 0, sizeof (m[0]));
+    m->stream.overflow_buffer = save;
+    if (save)
+      _vec_len (save) = 0;
+  }
+
+  sm->first_buffer = sm->last_buffer = ~0;
+  if (is_read)
+    clib_fifo_reset (sm->rx.buffer_fifo);
+  else
+    sm->tx.n_total_data_bytes = 0;
+  sm->vlib_main = vm;
+  m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx;
+  m->stream.data_function_opaque = pointer_to_uword (sm);
+}
+
+void
+serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
+			    vlib_serialize_buffer_main_t * sm)
+{
+  serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0);
+}
+
+void
+unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
+			      vlib_serialize_buffer_main_t * sm)
+{
+  serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1);
+}
+
+u32
+serialize_close_vlib_buffer (serialize_main_t * m)
+{
+  vlib_serialize_buffer_main_t *sm
+    = uword_to_pointer (m->stream.data_function_opaque,
+			vlib_serialize_buffer_main_t *);
+  vlib_buffer_t *last;
+  serialize_stream_t *s = &m->stream;
+
+  last = vlib_get_buffer (sm->vlib_main, sm->last_buffer);
+  last->current_length = s->current_buffer_index;
+
+  if (vec_len (s->overflow_buffer) > 0)
+    {
+      sm->last_buffer
+	= vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index,
+				sm->last_buffer == ~0 ? 0 : sm->last_buffer,
+				s->overflow_buffer,
+				vec_len (s->overflow_buffer));
+      _vec_len (s->overflow_buffer) = 0;
+    }
+
+  return sm->first_buffer;
+}
+
+void
+unserialize_close_vlib_buffer (serialize_main_t * m)
+{
+  vlib_serialize_buffer_main_t *sm
+    = uword_to_pointer (m->stream.data_function_opaque,
+			vlib_serialize_buffer_main_t *);
+  if (sm->first_buffer != ~0)
+    vlib_buffer_free_one (sm->vlib_main, sm->first_buffer);
+  clib_fifo_reset (sm->rx.buffer_fifo);
+  if (m->stream.overflow_buffer)
+    _vec_len (m->stream.overflow_buffer) = 0;
+}
+
+/** @endcond */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 6c6cad98..09f34bbd 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -465,7 +465,7 @@ vlib_put_next_frame (vlib_main_t * vm,
   vlib_frame_t *f;
   u32 n_vectors_in_frame;
 
-  if (DPDK == 0 && CLIB_DEBUG > 0)
+  if (vm->buffer_main->extern_buffer_mgmt == 0 && CLIB_DEBUG > 0)
     vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left);
 
   nf = vlib_node_runtime_get_next_frame (vm, r, next_index);
@@ -1012,8 +1012,8 @@ dispatch_node (vlib_main_t * vm,
 
       /* When in interrupt mode and vector rate crosses threshold switch to
          polling mode. */
-      if ((DPDK == 0 && dispatch_state == VLIB_NODE_STATE_INTERRUPT)
-	  || (DPDK == 0 && dispatch_state == VLIB_NODE_STATE_POLLING
+      if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT)
+	  || (dispatch_state == VLIB_NODE_STATE_POLLING
 	      && (node->flags
 		  & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)))
 	{
@@ -1615,6 +1615,7 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
     vm->name = "VLIB";
 
   vec_validate (vm->buffer_main, 0);
+  vlib_buffer_cb_init (vm);
 
   if ((error = vlib_thread_init (vm)))
     {
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index c5e58bc0..b3bbd30e 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -22,29 +22,10 @@
 #include <vlib/threads.h>
 #include <vlib/unix/cj.h>
 
-
-#if DPDK==1
-#include <rte_config.h>
-#include <rte_common.h>
-#include <rte_eal.h>
-#include <rte_launch.h>
-#include <rte_lcore.h>
-#endif
 DECLARE_CJ_GLOBAL_LOG;
 
 #define FRAME_QUEUE_NELTS 32
 
-
-#if DPDK==1
-/*
- *  Weak definitions of DPDK symbols used in this file.
- *  Needed for linking test programs without DPDK libs.
- */
-unsigned __thread __attribute__ ((weak)) RTE_PER_LCORE (_lcore_id);
-struct lcore_config __attribute__ ((weak)) lcore_config[];
-unsigned __attribute__ ((weak)) rte_socket_id ();
-int __attribute__ ((weak)) rte_eal_remote_launch ();
-#endif
 u32
 vl (void *p)
 {
@@ -194,14 +175,17 @@ vlib_thread_init (vlib_main_t * vm)
     tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1);
 
   /* pin main thread to main_lcore  */
-#if DPDK==0
-  {
-    cpu_set_t cpuset;
-    CPU_ZERO (&cpuset);
-    CPU_SET (tm->main_lcore, &cpuset);
-    pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
-  }
-#endif
+  if (tm->cb.vlib_thread_set_lcore_cb)
+    {
+      tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore);
+    }
+  else
+    {
+      cpu_set_t cpuset;
+      CPU_ZERO (&cpuset);
+      CPU_SET (tm->main_lcore, &cpuset);
+      pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
+    }
 
   /* as many threads as stacks... */
   vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1,
@@ -520,32 +504,29 @@ vlib_worker_thread_bootstrap_fn (void *arg)
   return rv;
 }
 
-static int
-vlib_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
+static clib_error_t *
+vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
 {
+  vlib_thread_main_t *tm = &vlib_thread_main;
   void *(*fp_arg) (void *) = fp;
 
   w->lcore_id = lcore_id;
-#if DPDK==1
-  if (!w->registration->use_pthreads)
-    if (rte_eal_remote_launch)	/* do we have dpdk linked */
-      return rte_eal_remote_launch (fp, (void *) w, lcore_id);
-    else
-      return -1;
+  if (tm->cb.vlib_launch_thread_cb && !w->registration->use_pthreads)
+    return tm->cb.vlib_launch_thread_cb (fp, (void *) w, lcore_id);
   else
-#endif
     {
-      int ret;
       pthread_t worker;
       cpu_set_t cpuset;
       CPU_ZERO (&cpuset);
       CPU_SET (lcore_id, &cpuset);
 
-      ret = pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w);
-      if (ret == 0)
-	return pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset);
-      else
-	return ret;
+      if (pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w))
+	return clib_error_return_unix (0, "pthread_create");
+
+      if (pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset))
+	return clib_error_return_unix (0, "pthread_setaffinity_np");
+
+      return 0;
     }
 }
 
@@ -769,6 +750,7 @@ start_workers (vlib_main_t * vm)
 
   for (i = 0; i < vec_len (tm->registrations); i++)
     {
+      clib_error_t *err;
       int j;
 
       tr = tm->registrations[i];
@@ -778,22 +760,24 @@ start_workers (vlib_main_t * vm)
 	  for (j = 0; j < tr->count; j++)
 	    {
 	      w = vlib_worker_threads + worker_thread_index++;
-	      if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, 0) <
-		  0)
-		clib_warning ("Couldn't start '%s' pthread ", tr->name);
+	      err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
+					    w, 0);
+	      if (err)
+		clib_error_report (err);
 	    }
 	}
       else
 	{
 	  uword c;
-            /* *INDENT-OFF* */
-            clib_bitmap_foreach (c, tr->coremask, ({
-              w = vlib_worker_threads + worker_thread_index++;
-              if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, c) < 0)
-                clib_warning ("Couldn't start DPDK lcore %d", c);
-
-            }));
-/* *INDENT-ON* */
+          /* *INDENT-OFF* */
+          clib_bitmap_foreach (c, tr->coremask, ({
+            w = vlib_worker_threads + worker_thread_index++;
+	    err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
+					  w, c);
+	    if (err)
+	      clib_error_report (err);
+          }));
+          /* *INDENT-ON* */
 	}
     }
   vlib_worker_thread_barrier_sync (vm);
@@ -1105,7 +1089,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
     {
       tm->n_thread_stacks += tr->count;
       tm->n_pthreads += tr->count * tr->use_pthreads;
-      tm->n_eal_threads += tr->count * (tr->use_pthreads == 0);
+      tm->n_threads += tr->count * (tr->use_pthreads == 0);
       tr = tr->next;
     }
 
@@ -1423,6 +1407,7 @@ void
 vlib_worker_thread_fn (void *arg)
 {
   vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
   vlib_main_t *vm = vlib_get_main ();
 
   ASSERT (vm->cpu_index == os_get_cpu_number ());
@@ -1431,12 +1416,9 @@ vlib_worker_thread_fn (void *arg)
   clib_time_init (&vm->clib_time);
   clib_mem_set_heap (w->thread_mheap);
 
-#if DPDK > 0
   /* Wait until the dpdk init sequence is complete */
-  vlib_thread_main_t *tm = vlib_get_thread_main ();
-  while (tm->worker_thread_release == 0)
+  while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
     vlib_worker_thread_barrier_check ();
-#endif
 
   vlib_worker_thread_internal (vm);
 }
@@ -1475,6 +1457,20 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
   return (fqm - tm->frame_queue_mains);
 }
 
+
+int
+vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb)
+{
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+  if (tm->extern_thread_mgmt)
+    return -1;
+
+  tm->cb.vlib_launch_thread_cb = cb->vlib_launch_thread_cb;
+  tm->extern_thread_mgmt = 1;
+  return 0;
+}
+
 clib_error_t *
 threads_init (vlib_main_t * vm)
 {
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 34ab5be8..75a5a281 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -263,6 +263,13 @@ typedef enum
     SCHED_POLICY_N,
 } sched_policy_t;
 
+typedef struct
+{
+  clib_error_t *(*vlib_launch_thread_cb) (void *fp, vlib_worker_thread_t * w,
+					  unsigned lcore_id);
+  clib_error_t *(*vlib_thread_set_lcore_cb) (u32 thread, u16 lcore);
+} vlib_thread_callbacks_t;
+
 typedef struct
 {
   /* Link list of registrations, built by constructors */
@@ -290,8 +297,8 @@ typedef struct
   /* Number of pthreads */
   u32 n_pthreads;
 
-  /* Number of DPDK eal threads */
-  u32 n_eal_threads;
+  /* Number of threads */
+  u32 n_threads;
 
   /* Number of cores to skip, must match the core mask */
   u32 skip_cores;
@@ -320,6 +327,9 @@ typedef struct
   /* scheduling policy priority */
   u32 sched_priority;
 
+  /* callbacks */
+  vlib_thread_callbacks_t cb;
+  int extern_thread_mgmt;
 } vlib_thread_main_t;
 
 extern vlib_thread_main_t vlib_thread_main;
@@ -459,6 +469,9 @@ vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
   return elt;
 }
 
+int vlib_thread_cb_register (struct vlib_main_t *vm,
+			     vlib_thread_callbacks_t * cb);
+
 #endif /* included_vlib_threads_h */
 
 /*
diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c
index ee632279..b64028c4 100644
--- a/src/vlib/threads_cli.c
+++ b/src/vlib/threads_cli.c
@@ -20,14 +20,6 @@
 #include <vlib/threads.h>
 #include <vlib/unix/unix.h>
 
-#if DPDK==1
-#include <rte_config.h>
-#include <rte_common.h>
-#include <rte_eal.h>
-#include <rte_launch.h>
-#include <rte_lcore.h>
-#endif
-
 static u8 *
 format_sched_policy_and_priority (u8 * s, va_list * args)
 {
@@ -116,23 +108,6 @@ show_threads_fn (vlib_main_t * vm,
 	  vec_free (p);
 
 	  line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id);
-#if DPDK==1
-	  ASSERT (lcore <= RTE_MAX_LCORE);
-	  switch (lcore_config[lcore].state)
-	    {
-	    case WAIT:
-	      line = format (line, "wait");
-	      break;
-	    case RUNNING:
-	      line = format (line, "running");
-	      break;
-	    case FINISHED:
-	      line = format (line, "finished");
-	      break;
-	    default:
-	      line = format (line, "unknown");
-	    }
-#endif
 	}
       else
 	{
diff --git a/src/vlib/unix/physmem.c b/src/vlib/unix/physmem.c
index 80ab7b9d..8d10ad2e 100644
--- a/src/vlib/unix/physmem.c
+++ b/src/vlib/unix/physmem.c
@@ -45,13 +45,13 @@ static void *
 unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes,
 			    uword alignment)
 {
+  vlib_main_t *vm = vlib_get_main ();
   physmem_main_t *pm = &physmem_main;
   uword lo_offset, hi_offset;
   uword *to_free = 0;
 
-#if DPDK > 0
-  clib_warning ("unsafe alloc!");
-#endif
+  if (vm->buffer_main->extern_buffer_mgmt)
+    clib_warning ("unsafe alloc!");
 
   /* IO memory is always at least cache aligned. */
   alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
@@ -269,16 +269,17 @@ static clib_error_t *
 show_physmem (vlib_main_t * vm,
 	      unformat_input_t * input, vlib_cli_command_t * cmd)
 {
-#if DPDK > 0
-  vlib_cli_output (vm, "Not supported with DPDK drivers.");
-#else
   physmem_main_t *pm = &physmem_main;
+  if (vm->buffer_main->extern_buffer_mgmt)
+    {
+      vlib_cli_output (vm, "Not supported with external buffer management.");
+      return 0;
+    }
 
   if (pm->heap)
     vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 1);
   else
     vlib_cli_output (vm, "No physmem allocated.");
-#endif
   return 0;
 }
 
diff --git a/src/vnet.am b/src/vnet.am
index 665a16ea..47c5eda7 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -761,11 +761,13 @@ nobase_include_HEADERS +=			\
 ########################################
 if WITH_DPDK
 libvnet_la_SOURCES +=				\
+  vnet/devices/dpdk/buffer.c			\
   vnet/devices/dpdk/dpdk_priv.h		\
   vnet/devices/dpdk/device.c		\
   vnet/devices/dpdk/format.c		\
   vnet/devices/dpdk/init.c			\
   vnet/devices/dpdk/node.c			\
+  vnet/devices/dpdk/thread.c			\
   vnet/devices/dpdk/hqos.c			\
   vnet/devices/dpdk/cli.c			\
   vnet/devices/dpdk/dpdk_api.c
diff --git a/src/vnet/devices/dpdk/buffer.c b/src/vnet/devices/dpdk/buffer.c
new file mode 100644
index 00000000..214a9162
--- /dev/null
+++ b/src/vnet/devices/dpdk/buffer.c
@@ -0,0 +1,729 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.c: allocate/free network buffers.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ *
+ * Allocate/free network buffers.
+ */
+
+#include <rte_config.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_version.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/devices/dpdk/dpdk.h>
+#include <vnet/devices/dpdk/dpdk_priv.h>
+
+
+STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM,
+	       "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM");
+
+#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32))
+
+/* Make sure we have at least given number of unaligned buffers. */
+static void
+fill_unaligned (vlib_main_t * vm,
+		vlib_buffer_free_list_t * free_list,
+		uword n_unaligned_buffers)
+{
+  word la = vec_len (free_list->aligned_buffers);
+  word lu = vec_len (free_list->unaligned_buffers);
+
+  /* Aligned come in aligned copy-sized chunks. */
+  ASSERT (la % BUFFERS_PER_COPY == 0);
+
+  ASSERT (la >= n_unaligned_buffers);
+
+  while (lu < n_unaligned_buffers)
+    {
+      /* Copy 4 buffers from end of aligned vector to unaligned vector. */
+      vec_add (free_list->unaligned_buffers,
+	       free_list->aligned_buffers + la - BUFFERS_PER_COPY,
+	       BUFFERS_PER_COPY);
+      la -= BUFFERS_PER_COPY;
+      lu += BUFFERS_PER_COPY;
+    }
+  _vec_len (free_list->aligned_buffers) = la;
+}
+
+/* After free aligned buffers may not contain even sized chunks. */
+static void
+trim_aligned (vlib_buffer_free_list_t * f)
+{
+  uword l, n_trim;
+
+  /* Add unaligned to aligned before trim. */
+  l = vec_len (f->unaligned_buffers);
+  if (l > 0)
+    {
+      vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l,
+		       /* align */ sizeof (vlib_copy_unit_t));
+
+      _vec_len (f->unaligned_buffers) = 0;
+    }
+
+  /* Remove unaligned buffers from end of aligned vector and save for next trim. */
+  l = vec_len (f->aligned_buffers);
+  n_trim = l % BUFFERS_PER_COPY;
+  if (n_trim)
+    {
+      /* Trim aligned -> unaligned. */
+      vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim);
+
+      /* Remove from aligned. */
+      _vec_len (f->aligned_buffers) = l - n_trim;
+    }
+}
+
+static void
+merge_free_lists (vlib_buffer_free_list_t * dst,
+		  vlib_buffer_free_list_t * src)
+{
+  uword l;
+  u32 *d;
+
+  trim_aligned (src);
+  trim_aligned (dst);
+
+  l = vec_len (src->aligned_buffers);
+  if (l > 0)
+    {
+      vec_add2_aligned (dst->aligned_buffers, d, l,
+			/* align */ sizeof (vlib_copy_unit_t));
+      clib_memcpy (d, src->aligned_buffers, l * sizeof (d[0]));
+      vec_free (src->aligned_buffers);
+    }
+
+  l = vec_len (src->unaligned_buffers);
+  if (l > 0)
+    {
+      vec_add (dst->unaligned_buffers, src->unaligned_buffers, l);
+      vec_free (src->unaligned_buffers);
+    }
+}
+
+always_inline u32
+dpdk_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+
+  size = vlib_buffer_round_size (size);
+  uword *p = hash_get (bm->free_list_by_size, size);
+  return p ? p[0] : ~0;
+}
+
+static void
+del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
+{
+  u32 i;
+  struct rte_mbuf *mb;
+  vlib_buffer_t *b;
+
+  for (i = 0; i < vec_len (f->unaligned_buffers); i++)
+    {
+      b = vlib_get_buffer (vm, f->unaligned_buffers[i]);
+      mb = rte_mbuf_from_vlib_buffer (b);
+      ASSERT (rte_mbuf_refcnt_read (mb) == 1);
+      rte_pktmbuf_free (mb);
+    }
+  for (i = 0; i < vec_len (f->aligned_buffers); i++)
+    {
+      b = vlib_get_buffer (vm, f->aligned_buffers[i]);
+      mb = rte_mbuf_from_vlib_buffer (b);
+      ASSERT (rte_mbuf_refcnt_read (mb) == 1);
+      rte_pktmbuf_free (mb);
+    }
+  vec_free (f->name);
+  vec_free (f->unaligned_buffers);
+  vec_free (f->aligned_buffers);
+}
+
+/* Add buffer free list. */
+static void
+dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+  vlib_buffer_free_list_t *f;
+  u32 merge_index;
+  int i;
+
+  ASSERT (os_get_cpu_number () == 0);
+
+  f = vlib_buffer_get_free_list (vm, free_list_index);
+
+  merge_index = dpdk_buffer_get_free_list_with_size (vm, f->n_data_bytes);
+  if (merge_index != ~0 && merge_index != free_list_index)
+    {
+      merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool,
+					   merge_index), f);
+    }
+
+  del_free_list (vm, f);
+
+  /* Poison it. */
+  memset (f, 0xab, sizeof (f[0]));
+
+  pool_put (bm->buffer_free_list_pool, f);
+
+  for (i = 1; i < vec_len (vlib_mains); i++)
+    {
+      bm = vlib_mains[i]->buffer_main;
+      f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);;
+      memset (f, 0xab, sizeof (f[0]));
+      pool_put (bm->buffer_free_list_pool, f);
+    }
+}
+
+/* Make sure free list has at least given number of free buffers. */
+static uword
+fill_free_list (vlib_main_t * vm,
+		vlib_buffer_free_list_t * fl, uword min_free_buffers)
+{
+  dpdk_main_t *dm = &dpdk_main;
+  vlib_buffer_t *b;
+  int n, i;
+  u32 bi;
+  u32 n_remaining = 0, n_alloc = 0;
+  unsigned socket_id = rte_socket_id ();
+  struct rte_mempool *rmp = dm->pktmbuf_pools[socket_id];
+  struct rte_mbuf *mb;
+
+  /* Too early? */
+  if (PREDICT_FALSE (rmp == 0))
+    return 0;
+
+  trim_aligned (fl);
+
+  /* Already have enough free buffers on free list? */
+  n = min_free_buffers - vec_len (fl->aligned_buffers);
+  if (n <= 0)
+    return min_free_buffers;
+
+  /* Always allocate round number of buffers. */
+  n = round_pow2 (n, BUFFERS_PER_COPY);
+
+  /* Always allocate new buffers in reasonably large sized chunks. */
+  n = clib_max (n, fl->min_n_buffers_each_physmem_alloc);
+
+  vec_validate (vm->mbuf_alloc_list, n - 1);
+
+  if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0)
+    return 0;
+
+  _vec_len (vm->mbuf_alloc_list) = n;
+
+  for (i = 0; i < n; i++)
+    {
+      mb = vm->mbuf_alloc_list[i];
+
+      ASSERT (rte_mbuf_refcnt_read (mb) == 0);
+      rte_mbuf_refcnt_set (mb, 1);
+
+      b = vlib_buffer_from_rte_mbuf (mb);
+      bi = vlib_get_buffer_index (vm, b);
+
+      vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t));
+      n_alloc++;
+      n_remaining--;
+
+      vlib_buffer_init_for_free_list (b, fl);
+
+      if (fl->buffer_init_function)
+	fl->buffer_init_function (vm, fl, &bi, 1);
+    }
+
+  fl->n_alloc += n;
+
+  return n;
+}
+
+always_inline uword
+copy_alignment (u32 * x)
+{
+  return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY;
+}
+
+static u32
+alloc_from_free_list (vlib_main_t * vm,
+		      vlib_buffer_free_list_t * free_list,
+		      u32 * alloc_buffers, u32 n_alloc_buffers)
+{
+  u32 *dst, *u_src;
+  uword u_len, n_left;
+  uword n_unaligned_start, n_unaligned_end, n_filled;
+
+  n_left = n_alloc_buffers;
+  dst = alloc_buffers;
+  n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst))
+		       & (BUFFERS_PER_COPY - 1));
+
+  n_filled = fill_free_list (vm, free_list, n_alloc_buffers);
+  if (n_filled == 0)
+    return 0;
+
+  n_left = n_filled < n_left ? n_filled : n_left;
+  n_alloc_buffers = n_left;
+
+  if (n_unaligned_start >= n_left)
+    {
+      n_unaligned_start = n_left;
+      n_unaligned_end = 0;
+    }
+  else
+    n_unaligned_end = copy_alignment (dst + n_alloc_buffers);
+
+  fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end);
+
+  u_len = vec_len (free_list->unaligned_buffers);
+  u_src = free_list->unaligned_buffers + u_len - 1;
+
+  if (n_unaligned_start)
+    {
+      uword n_copy = n_unaligned_start;
+      if (n_copy > n_left)
+	n_copy = n_left;
+      n_left -= n_copy;
+
+      while (n_copy > 0)
+	{
+	  *dst++ = *u_src--;
+	  n_copy--;
+	  u_len--;
+	}
+
+      /* Now dst should be aligned. */
+      if (n_left > 0)
+	ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0);
+    }
+
+  /* Aligned copy. */
+  {
+    vlib_copy_unit_t *d, *s;
+    uword n_copy;
+
+    if (vec_len (free_list->aligned_buffers) <
+	((n_left / BUFFERS_PER_COPY) * BUFFERS_PER_COPY))
+      abort ();
+
+    n_copy = n_left / BUFFERS_PER_COPY;
+    n_left = n_left % BUFFERS_PER_COPY;
+
+    /* Remove buffers from aligned free list. */
+    _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY;
+
+    s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers);
+    d = (vlib_copy_unit_t *) dst;
+
+    /* Fast path loop. */
+    while (n_copy >= 4)
+      {
+	d[0] = s[0];
+	d[1] = s[1];
+	d[2] = s[2];
+	d[3] = s[3];
+	n_copy -= 4;
+	s += 4;
+	d += 4;
+      }
+
+    while (n_copy >= 1)
+      {
+	d[0] = s[0];
+	n_copy -= 1;
+	s += 1;
+	d += 1;
+      }
+
+    dst = (void *) d;
+  }
+
+  /* Unaligned copy. */
+  ASSERT (n_unaligned_end == n_left);
+  while (n_left > 0)
+    {
+      *dst++ = *u_src--;
+      n_left--;
+      u_len--;
+    }
+
+  if (!free_list->unaligned_buffers)
+    ASSERT (u_len == 0);
+  else
+    _vec_len (free_list->unaligned_buffers) = u_len;
+
+  return n_alloc_buffers;
+}
+
+/* Allocate a given number of buffers into given array.
+   Returns number actually allocated which will be either zero or
+   number requested. */
+u32
+dpdk_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+
+  return alloc_from_free_list
+    (vm,
+     pool_elt_at_index (bm->buffer_free_list_pool,
+			VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX),
+     buffers, n_buffers);
+}
+
+
+u32
+dpdk_buffer_alloc_from_free_list (vlib_main_t * vm,
+				  u32 * buffers,
+				  u32 n_buffers, u32 free_list_index)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+  vlib_buffer_free_list_t *f;
+  f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
+  return alloc_from_free_list (vm, f, buffers, n_buffers);
+}
+
+always_inline void
+add_buffer_to_free_list (vlib_main_t * vm,
+			 vlib_buffer_free_list_t * f,
+			 u32 buffer_index, u8 do_init)
+{
+  vlib_buffer_t *b;
+  b = vlib_get_buffer (vm, buffer_index);
+  if (PREDICT_TRUE (do_init))
+    vlib_buffer_init_for_free_list (b, f);
+  vec_add1_aligned (f->aligned_buffers, buffer_index,
+		    sizeof (vlib_copy_unit_t));
+}
+
+always_inline vlib_buffer_free_list_t *
+buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+  u32 i;
+
+  *index = i = b->free_list_index;
+  return pool_elt_at_index (bm->buffer_free_list_pool, i);
+}
+
+static_always_inline void
+vlib_buffer_free_inline (vlib_main_t * vm,
+			 u32 * buffers, u32 n_buffers, u32 follow_buffer_next)
+{
+  vlib_buffer_main_t *bm = vm->buffer_main;
+  vlib_buffer_free_list_t *fl;
+  u32 fi;
+  int i;
+  u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
+	     u32 follow_buffer_next);
+
+  cb = bm->buffer_free_callback;
+
+  if (PREDICT_FALSE (cb != 0))
+    n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next);
+
+  if (!n_buffers)
+    return;
+
+  for (i = 0; i < n_buffers; i++)
+    {
+      vlib_buffer_t *b;
+      struct rte_mbuf *mb;
+
+      b = vlib_get_buffer (vm, buffers[i]);
+
+      fl = buffer_get_free_list (vm, b, &fi);
+
+      /* The only current use of this callback: multicast recycle */
+      if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0))
+	{
+	  int j;
+
+	  add_buffer_to_free_list
+	    (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0);
+
+	  for (j = 0; j < vec_len (bm->announce_list); j++)
+	    {
+	      if (fl == bm->announce_list[j])
+		goto already_announced;
+	    }
+	  vec_add1 (bm->announce_list, fl);
+	already_announced:
+	  ;
+	}
+      else
+	{
+	  if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0))
+	    {
+	      mb = rte_mbuf_from_vlib_buffer (b);
+	      ASSERT (rte_mbuf_refcnt_read (mb) == 1);
+	      rte_pktmbuf_free (mb);
+	    }
+	}
+    }
+  if (vec_len (bm->announce_list))
+    {
+      vlib_buffer_free_list_t *fl;
+      for (i = 0; i < vec_len (bm->announce_list); i++)
+	{
+	  fl = bm->announce_list[i];
+	  fl->buffers_added_to_freelist_function (vm, fl);
+	}
+      _vec_len (bm->announce_list) = 0;
+    }
+}
+
+static void
+dpdk_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+  vlib_buffer_free_inline (vm, buffers, n_buffers,	/* follow_buffer_next */
+			   1);
+}
+
+static void
+dpdk_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+  vlib_buffer_free_inline (vm, buffers, n_buffers,	/* follow_buffer_next */
+			   0);
+}
+
+static void
+dpdk_packet_template_init (vlib_main_t * vm,
+			   void *vt,
+			   void *packet_data,
+			   uword n_packet_data_bytes,
+			   uword min_n_buffers_each_physmem_alloc, u8 * name)
+{
+  vlib_packet_template_t *t = (vlib_packet_template_t *) vt;
+
+  vlib_worker_thread_barrier_sync (vm);
+  memset (t, 0, sizeof (t[0]));
+
+  vec_add (t->packet_data, packet_data, n_packet_data_bytes);
+
+  vlib_worker_thread_barrier_release (vm);
+}
+
+clib_error_t *
+vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
+			 unsigned socket_id)
+{
+  dpdk_main_t *dm = &dpdk_main;
+  vlib_physmem_main_t *vpm = &vm->physmem_main;
+  struct rte_mempool *rmp;
+  int i;
+
+  vec_validate_aligned (dm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES);
+
+  /* pool already exists, nothing to do */
+  if (dm->pktmbuf_pools[socket_id])
+    return 0;
+
+  u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0);
+
+  rmp = rte_pktmbuf_pool_create ((char *) pool_name,	/* pool name */
+				 num_mbufs,	/* number of mbufs */
+				 512,	/* cache size */
+				 VLIB_BUFFER_HDR_SIZE,	/* priv size */
+				 VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE,	/* dataroom size */
+				 socket_id);	/* cpu socket */
+
+  if (rmp)
+    {
+      {
+	uword this_pool_end;
+	uword this_pool_start;
+	uword this_pool_size;
+	uword save_vpm_start, save_vpm_end, save_vpm_size;
+	struct rte_mempool_memhdr *memhdr;
+
+	this_pool_start = ~0ULL;
+	this_pool_end = 0LL;
+
+	STAILQ_FOREACH (memhdr, &rmp->mem_list, next)
+	{
+	  if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end)
+	    this_pool_end = (uword) (memhdr->addr + memhdr->len);
+	  if (((uword) memhdr->addr) < this_pool_start)
+	    this_pool_start = (uword) (memhdr->addr);
+	}
+	ASSERT (this_pool_start < ~0ULL && this_pool_end > 0);
+	this_pool_size = this_pool_end - this_pool_start;
+
+	if (CLIB_DEBUG > 1)
+	  {
+	    clib_warning ("%s: pool start %llx pool end %llx pool size %lld",
+			  pool_name, this_pool_start, this_pool_end,
+			  this_pool_size);
+	    clib_warning
+	      ("before: virtual.start %llx virtual.end %llx virtual.size %lld",
+	       vpm->virtual.start, vpm->virtual.end, vpm->virtual.size);
+	  }
+
+	save_vpm_start = vpm->virtual.start;
+	save_vpm_end = vpm->virtual.end;
+	save_vpm_size = vpm->virtual.size;
+
+	if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0)
+	  vpm->virtual.start = this_pool_start;
+	if (this_pool_end > vpm->virtual.end)
+	  vpm->virtual.end = this_pool_end;
+
+	vpm->virtual.size = vpm->virtual.end - vpm->virtual.start;
+
+	if (CLIB_DEBUG > 1)
+	  {
+	    clib_warning
+	      ("after: virtual.start %llx virtual.end %llx virtual.size %lld",
+	       vpm->virtual.start, vpm->virtual.end, vpm->virtual.size);
+	  }
+
+	/* check if fits into buffer index range */
+	if ((u64) vpm->virtual.size >
+	    ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES)))
+	  {
+	    clib_warning ("physmem: virtual size out of range!");
+	    vpm->virtual.start = save_vpm_start;
+	    vpm->virtual.end = save_vpm_end;
+	    vpm->virtual.size = save_vpm_size;
+	    rmp = 0;
+	  }
+      }
+      if (rmp)
+	{
+	  dm->pktmbuf_pools[socket_id] = rmp;
+	  vec_free (pool_name);
+	  return 0;
+	}
+    }
+
+  vec_free (pool_name);
+
+  /* no usable pool for this socket, try to use pool from another one */
+  for (i = 0; i < vec_len (dm->pktmbuf_pools); i++)
+    {
+      if (dm->pktmbuf_pools[i])
+	{
+	  clib_warning
+	    ("WARNING: Failed to allocate mempool for CPU socket %u. "
+	     "Threads running on socket %u will use socket %u mempool.",
+	     socket_id, socket_id, i);
+	  dm->pktmbuf_pools[socket_id] = dm->pktmbuf_pools[i];
+	  return 0;
+	}
+    }
+
+  return clib_error_return (0, "failed to allocate mempool on socket %u",
+			    socket_id);
+}
+
+#if CLIB_DEBUG > 0
+
+u32 *vlib_buffer_state_validation_lock;
+uword *vlib_buffer_state_validation_hash;
+void *vlib_buffer_state_heap;
+
+static clib_error_t *
+buffer_state_validation_init (vlib_main_t * vm)
+{
+  void *oldheap;
+
+  vlib_buffer_state_heap = mheap_alloc (0, 10 << 20);
+
+  oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+  vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword));
+  vec_validate_aligned (vlib_buffer_state_validation_lock, 0,
+			CLIB_CACHE_LINE_BYTES);
+  clib_mem_set_heap (oldheap);
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (buffer_state_validation_init);
+#endif
+
+static vlib_buffer_callbacks_t callbacks = {
+  .vlib_buffer_alloc_cb = &dpdk_buffer_alloc,
+  .vlib_buffer_alloc_from_free_list_cb = &dpdk_buffer_alloc_from_free_list,
+  .vlib_buffer_free_cb = &dpdk_buffer_free,
+  .vlib_buffer_free_no_next_cb = &dpdk_buffer_free_no_next,
+  .vlib_packet_template_init_cb = &dpdk_packet_template_init,
+  .vlib_buffer_delete_free_list_cb = &dpdk_buffer_delete_free_list,
+};
+
+static clib_error_t *
+dpdk_buffer_init (vlib_main_t * vm)
+{
+  vlib_buffer_cb_register (vm, &callbacks);
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (dpdk_buffer_init);
+
+/** @endcond */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/dpdk/cli.c b/src/vnet/devices/dpdk/cli.c
index 538a00fd..22bd4b4f 100644
--- a/src/vnet/devices/dpdk/cli.c
+++ b/src/vnet/devices/dpdk/cli.c
@@ -164,9 +164,9 @@ show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
   struct rte_mempool *rmp;
   int i;
 
-  for (i = 0; i < vec_len (vm->buffer_main->pktmbuf_pools); i++)
+  for (i = 0; i < vec_len (dpdk_main.pktmbuf_pools); i++)
     {
-      rmp = vm->buffer_main->pktmbuf_pools[i];
+      rmp = dpdk_main.pktmbuf_pools[i];
       if (rmp)
 	{
 	  unsigned count = rte_mempool_avail_count (rmp);
diff --git a/src/vnet/devices/dpdk/device.c b/src/vnet/devices/dpdk/device.c
index b22fbf2e..0deab6aa 100644
--- a/src/vnet/devices/dpdk/device.c
+++ b/src/vnet/devices/dpdk/device.c
@@ -87,19 +87,18 @@ dpdk_set_mc_filter (vnet_hw_interface_t * hi,
 struct rte_mbuf *
 dpdk_replicate_packet_mb (vlib_buffer_t * b)
 {
-  vlib_main_t *vm = vlib_get_main ();
-  vlib_buffer_main_t *bm = vm->buffer_main;
+  dpdk_main_t *dm = &dpdk_main;
   struct rte_mbuf **mbufs = 0, *s, *d;
   u8 nb_segs;
   unsigned socket_id = rte_socket_id ();
   int i;
 
-  ASSERT (bm->pktmbuf_pools[socket_id]);
+  ASSERT (dm->pktmbuf_pools[socket_id]);
   s = rte_mbuf_from_vlib_buffer (b);
   nb_segs = s->nb_segs;
   vec_validate (mbufs, nb_segs - 1);
 
-  if (rte_pktmbuf_alloc_bulk (bm->pktmbuf_pools[socket_id], mbufs, nb_segs))
+  if (rte_pktmbuf_alloc_bulk (dm->pktmbuf_pools[socket_id], mbufs, nb_segs))
     {
       vec_free (mbufs);
       return 0;
diff --git a/src/vnet/devices/dpdk/dpdk.h b/src/vnet/devices/dpdk/dpdk.h
index e0436031..066ec6fa 100644
--- a/src/vnet/devices/dpdk/dpdk.h
+++ b/src/vnet/devices/dpdk/dpdk.h
@@ -425,6 +425,9 @@ typedef struct
   vlib_main_t *vlib_main;
   vnet_main_t *vnet_main;
   dpdk_config_main_t *conf;
+
+  /* mempool */
+  struct rte_mempool **pktmbuf_pools;
 } dpdk_main_t;
 
 dpdk_main_t dpdk_main;
diff --git a/src/vnet/devices/dpdk/dpdk_priv.h b/src/vnet/devices/dpdk/dpdk_priv.h
index 0c81dbc3..dd40ff48 100644
--- a/src/vnet/devices/dpdk/dpdk_priv.h
+++ b/src/vnet/devices/dpdk/dpdk_priv.h
@@ -13,6 +13,9 @@
  * limitations under the License.
  */
 
+#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1)
+#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1))
+
 #define DPDK_NB_RX_DESC_DEFAULT   1024
 #define DPDK_NB_TX_DESC_DEFAULT   1024
 #define DPDK_NB_RX_DESC_VIRTIO    256
diff --git a/src/vnet/devices/dpdk/init.c b/src/vnet/devices/dpdk/init.c
index 60689463..4c040d20 100755
--- a/src/vnet/devices/dpdk/init.c
+++ b/src/vnet/devices/dpdk/init.c
@@ -64,8 +64,6 @@ static struct rte_eth_conf port_conf_template = {
 clib_error_t *
 dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
 {
-  vlib_main_t *vm = vlib_get_main ();
-  vlib_buffer_main_t *bm = vm->buffer_main;
   int rv;
   int j;
 
@@ -107,7 +105,7 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
 
       rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc,
 				   xd->cpu_socket, 0,
-				   bm->
+				   dm->
 				   pktmbuf_pools[xd->cpu_socket_id_by_queue
 						 [j]]);
 
@@ -115,7 +113,7 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
       if (rv < 0)
 	rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc,
 				     SOCKET_ID_ANY, 0,
-				     bm->
+				     dm->
 				     pktmbuf_pools[xd->cpu_socket_id_by_queue
 						   [j]]);
       if (rv < 0)
diff --git a/src/vnet/devices/dpdk/thread.c b/src/vnet/devices/dpdk/thread.c
new file mode 100644
index 00000000..475dd142
--- /dev/null
+++ b/src/vnet/devices/dpdk/thread.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rte_config.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_version.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/devices/dpdk/dpdk.h>
+#include <vnet/devices/dpdk/dpdk_priv.h>
+
+static clib_error_t *
+dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
+{
+  int r;
+  r = rte_eal_remote_launch (fp, (void *) w, lcore_id);
+  if (r)
+    return clib_error_return (0, "Failed to launch thread %u", lcore_id);
+  return 0;
+}
+
+static clib_error_t *
+dpdk_thread_set_lcore (u32 thread, u16 lcore)
+{
+  return 0;
+}
+
+static vlib_thread_callbacks_t callbacks = {
+  .vlib_launch_thread_cb = &dpdk_launch_thread,
+  .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore,
+};
+
+static clib_error_t *
+dpdk_thread_init (vlib_main_t * vm)
+{
+  vlib_thread_cb_register (vm, &callbacks);
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (dpdk_thread_init);
+
+/** @endcond */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sr/sr_replicate.c b/src/vnet/sr/sr_replicate.c
index 5f9de504..fa5a68c3 100644
--- a/src/vnet/sr/sr_replicate.c
+++ b/src/vnet/sr/sr_replicate.c
@@ -30,6 +30,7 @@
 #include <vnet/pg/pg.h>
 #include <vnet/sr/sr.h>
 #include <vnet/devices/dpdk/dpdk.h>
+#include <vnet/devices/dpdk/dpdk_priv.h>
 #include <vnet/ip/ip.h>
 #include <vnet/fib/ip6_fib.h>
 
@@ -142,6 +143,7 @@ static uword
 sr_replicate_node_fn (vlib_main_t * vm,
 		      vlib_node_runtime_t * node, vlib_frame_t * frame)
 {
+  dpdk_main_t *dm = &dpdk_main;
   u32 n_left_from, *from, *to_next;
   sr_replicate_next_t next_index;
   int pkts_replicated = 0;
@@ -149,7 +151,6 @@ sr_replicate_node_fn (vlib_main_t * vm,
   int no_buffer_drops = 0;
   vlib_buffer_free_list_t *fl;
   unsigned socket_id = rte_socket_id ();
-  vlib_buffer_main_t *bm = vm->buffer_main;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -246,13 +247,13 @@ sr_replicate_node_fn (vlib_main_t * vm,
 	      vlib_buffer_t *clone0_c, *clone_b0;
 
 	      t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
-	      hdr_mb0 = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]);
+	      hdr_mb0 = rte_pktmbuf_alloc (dm->pktmbuf_pools[socket_id]);
 
 	      if (i < (num_replicas - 1))
 		{
 		  /* Not the last tunnel to process */
 		  clone0 = rte_pktmbuf_clone
-		    (orig_mb0, bm->pktmbuf_pools[socket_id]);
+		    (orig_mb0, dm->pktmbuf_pools[socket_id]);
 		  if (clone0 == 0)
 		    goto clone_fail;
 		  nb_seg = 0;
-- 
cgit 1.2.3-korg


From 80f54e20270ed0628ee725e3e3c515731a0188f2 Mon Sep 17 00:00:00 2001
From: Dave Barach <dave@barachs.net>
Date: Wed, 8 Mar 2017 19:08:56 -0500
Subject: vlib_mains == 0 special cases be gone

Clean up spurious binary API client link dependency on libvlib.so,
which managed to hide behind vlib_mains == 0 checks reached by
VLIB_xxx_FUNCTION macros.

Change-Id: I5df1f8ab07dca1944250e643ccf06e60a8462325
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/plugins/dpdk/ipsec/ipsec.c       |   8 +-
 src/vlib-api.am                      |   4 +-
 src/vlib/buffer.c                    |  27 +-
 src/vlib/global_funcs.h              |   2 +-
 src/vlib/node_cli.c                  |  28 +-
 src/vlib/node_funcs.h                |   4 +-
 src/vlib/threads.c                   |  16 +-
 src/vlib/threads.h                   |  43 ++-
 src/vlibapi/api.h                    |   4 +-
 src/vlibapi/api_shared.c             | 530 ++---------------------------------
 src/vlibapi/node_serialize.c         |  15 +-
 src/vlibmemory/memory_vlib.c         | 471 +++++++++++++++++++++++++++++++
 src/vnet/devices/virtio/vhost-user.c |   9 +-
 src/vpp-api-test.am                  |   2 -
 src/vpp/api/api.c                    |   1 -
 src/vpp/api/gmon.c                   |   9 +-
 16 files changed, 575 insertions(+), 598 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c
index 16bec20a..b0aaaaec 100644
--- a/src/plugins/dpdk/ipsec/ipsec.c
+++ b/src/plugins/dpdk/ipsec/ipsec.c
@@ -380,13 +380,9 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
   im->cb.check_support_cb = dpdk_ipsec_check_support;
   im->cb.add_del_sa_sess_cb = add_del_sa_sess;
 
-  if (vec_len (vlib_mains) == 0)
-    vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index,
+  for (i = 1; i < tm->n_vlib_mains; i++)
+    vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index,
 			 VLIB_NODE_STATE_POLLING);
-  else
-    for (i = 1; i < tm->n_vlib_mains; i++)
-      vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index,
-			   VLIB_NODE_STATE_POLLING);
 
   /* TODO cryptodev counters */
 
diff --git a/src/vlib-api.am b/src/vlib-api.am
index c05929b1..4e1dae99 100644
--- a/src/vlib-api.am
+++ b/src/vlib-api.am
@@ -14,7 +14,7 @@
 lib_LTLIBRARIES += libvlibmemory.la libvlibapi.la libvlibmemoryclient.la \
 	           libvlibsocket.la
 
-libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la libvlib.la
+libvlibmemory_la_DEPENDENCIES = libvppinfra.la libsvm.la 
 libvlibmemory_la_LIBADD = $(libvlibmemory_la_DEPENDENCIES) -lpthread
 libvlibmemory_la_SOURCES =			\
 	vlibmemory/api.h			\
@@ -26,7 +26,7 @@ libvlibmemory_la_SOURCES =			\
 	vlibmemory/unix_shared_memory_queue.c	\
 	vlibmemory/unix_shared_memory_queue.h
 
-libvlibapi_la_DEPENDENCIES = libvppinfra.la libvlib.la libvlibmemory.la
+libvlibapi_la_DEPENDENCIES = libvppinfra.la 
 libvlibapi_la_LIBADD = $(libvlibapi_la_DEPENDENCIES)
 libvlibapi_la_SOURCES = 			\
 	vlibapi/api.h				\
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index 9f26bec7..6ba82584 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -261,7 +261,28 @@ done:
   return result;
 }
 
-vlib_main_t **vlib_mains;
+/*
+ * Hand-craft a static vector w/ length 1, so vec_len(vlib_mains) =1
+ * and vlib_mains[0] = &vlib_global_main from the beginning of time.
+ *
+ * The only place which should ever expand vlib_mains is start_workers()
+ * in threads.c. It knows about the bootstrap vector.
+ */
+/* *INDENT-OFF* */
+static struct
+{
+  vec_header_t h;
+  vlib_main_t *vm;
+} __attribute__ ((packed)) __bootstrap_vlib_main_vector
+  __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES))) =
+{
+  .h.len = 1,
+  .vm = &vlib_global_main,
+};
+/* *INDENT-ON* */
+
+vlib_main_t **vlib_mains = &__bootstrap_vlib_main_vector.vm;
+
 
 /* When dubugging validate that given buffers are either known allocated
    or known free. */
@@ -280,7 +301,7 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm,
   ASSERT (os_get_cpu_number () == 0);
 
   /* smp disaster check */
-  if (vlib_mains)
+  if (vec_len (vlib_mains) > 1)
     ASSERT (vm == vlib_mains[0]);
 
   is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED;
@@ -956,7 +977,7 @@ show_buffers (vlib_main_t * vm,
 
   do
     {
-      curr_vm = vec_len (vlib_mains) ? vlib_mains[vm_index] : vm;
+      curr_vm = vlib_mains[vm_index];
       bm = curr_vm->buffer_main;
 
     /* *INDENT-OFF* */
diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h
index bbdbdef5..f51ec381 100644
--- a/src/vlib/global_funcs.h
+++ b/src/vlib/global_funcs.h
@@ -23,7 +23,7 @@ always_inline vlib_main_t *
 vlib_get_main (void)
 {
   vlib_main_t *vm;
-  vm = vlib_mains ? vlib_mains[os_get_cpu_number ()] : &vlib_global_main;
+  vm = vlib_mains[os_get_cpu_number ()];
   ASSERT (vm);
   return vm;
 }
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index 05d0f0b5..62ab2e64 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -248,16 +248,11 @@ show_node_runtime (vlib_main_t * vm,
       if (unformat (input, "max") || unformat (input, "m"))
 	max = 1;
 
-      if (vec_len (vlib_mains) == 0)
-	vec_add1 (stat_vms, vm);
-      else
+      for (i = 0; i < vec_len (vlib_mains); i++)
 	{
-	  for (i = 0; i < vec_len (vlib_mains); i++)
-	    {
-	      stat_vm = vlib_mains[i];
-	      if (stat_vm)
-		vec_add1 (stat_vms, stat_vm);
-	    }
+	  stat_vm = vlib_mains[i];
+	  if (stat_vm)
+	    vec_add1 (stat_vms, stat_vm);
 	}
 
       /*
@@ -331,7 +326,7 @@ show_node_runtime (vlib_main_t * vm,
 		}
 	    }
 
-	  if (vec_len (vlib_mains))
+	  if (vec_len (vlib_mains) > 1)
 	    {
 	      vlib_worker_thread_t *w = vlib_worker_threads + j;
 	      if (j > 0)
@@ -404,16 +399,11 @@ clear_node_runtime (vlib_main_t * vm,
   vlib_main_t **stat_vms = 0, *stat_vm;
   vlib_node_runtime_t *r;
 
-  if (vec_len (vlib_mains) == 0)
-    vec_add1 (stat_vms, vm);
-  else
+  for (i = 0; i < vec_len (vlib_mains); i++)
     {
-      for (i = 0; i < vec_len (vlib_mains); i++)
-	{
-	  stat_vm = vlib_mains[i];
-	  if (stat_vm)
-	    vec_add1 (stat_vms, stat_vm);
-	}
+      stat_vm = vlib_mains[i];
+      if (stat_vm)
+	vec_add1 (stat_vms, stat_vm);
     }
 
   vlib_worker_thread_barrier_sync (vm);
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index f49a8d6f..8ccfc438 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -201,7 +201,7 @@ vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index)
   vlib_frame_t *f;
   u32 cpu_index = frame_index & VLIB_CPU_MASK;
   u32 offset = frame_index & VLIB_OFFSET_MASK;
-  vm = vlib_mains ? vlib_mains[cpu_index] : vm;
+  vm = vlib_mains[cpu_index];
   f = vm->heap_base + offset;
   return f;
 }
@@ -213,7 +213,7 @@ vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f)
 
   ASSERT (((uword) f & VLIB_CPU_MASK) == 0);
 
-  vm = vlib_mains ? vlib_mains[f->cpu_index] : vm;
+  vm = vlib_mains[f->cpu_index];
 
   i = ((u8 *) f - (u8 *) vm->heap_base);
   return i | f->cpu_index;
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index e3ea3c9c..4676be97 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -570,9 +570,13 @@ start_workers (vlib_main_t * vm)
 
   if (n_vlib_mains > 1)
     {
-      vec_validate (vlib_mains, tm->n_vlib_mains - 1);
+      /* Replace hand-crafted length-1 vector with a real vector */
+      vlib_mains = 0;
+
+      vec_validate_aligned (vlib_mains, tm->n_vlib_mains - 1,
+			    CLIB_CACHE_LINE_BYTES);
       _vec_len (vlib_mains) = 0;
-      vec_add1 (vlib_mains, vm);
+      vec_add1_aligned (vlib_mains, vm, CLIB_CACHE_LINE_BYTES);
 
       vlib_worker_threads->wait_at_barrier =
 	clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
@@ -685,7 +689,7 @@ start_workers (vlib_main_t * vm)
 	      /* Packet trace buffers are guaranteed to be empty, nothing to do here */
 
 	      clib_mem_set_heap (oldheap);
-	      vec_add1 (vlib_mains, vm_clone);
+	      vec_add1_aligned (vlib_mains, vm_clone, CLIB_CACHE_LINE_BYTES);
 
 	      vm_clone->error_main.counters =
 		vec_dup (vlib_mains[0]->error_main.counters);
@@ -805,7 +809,7 @@ vlib_worker_thread_node_runtime_update (void)
 
   ASSERT (os_get_cpu_number () == 0);
 
-  if (vec_len (vlib_mains) == 0)
+  if (vec_len (vlib_mains) == 1)
     return;
 
   vm = vlib_mains[0];
@@ -1148,7 +1152,7 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
   f64 deadline;
   u32 count;
 
-  if (!vlib_mains)
+  if (vec_len (vlib_mains) < 2)
     return;
 
   count = vec_len (vlib_mains) - 1;
@@ -1179,7 +1183,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 {
   f64 deadline;
 
-  if (!vlib_mains)
+  if (vec_len (vlib_mains) < 2)
     return;
 
   if (--vlib_worker_threads[0].recursion_level > 0)
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 75a5a281..a032311c 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -222,30 +222,25 @@ vlib_worker_thread_barrier_check (void)
     }
 }
 
-#define foreach_vlib_main(body)			                        \
-do {                                                                    \
-    vlib_main_t ** __vlib_mains = 0, *this_vlib_main;                   \
-    int ii;                                                             \
-                                                                        \
-    if (vec_len (vlib_mains) == 0)                                      \
-        vec_add1 (__vlib_mains, &vlib_global_main);                     \
-    else                                                                \
-    {                                                                   \
-        for (ii = 0; ii < vec_len (vlib_mains); ii++)                   \
-        {                                                               \
-            this_vlib_main = vlib_mains[ii];                            \
-            if (this_vlib_main)                                         \
-                vec_add1 (__vlib_mains, this_vlib_main);                \
-        }                                                               \
-    }                                                                   \
-                                                                        \
-    for (ii = 0; ii < vec_len (__vlib_mains); ii++)                     \
-    {                                                                   \
-        this_vlib_main = __vlib_mains[ii];                              \
-        /* body uses this_vlib_main... */                               \
-        (body);                                                         \
-    }                                                                   \
-    vec_free (__vlib_mains);                                            \
+#define foreach_vlib_main(body)                         \
+do {                                                    \
+  vlib_main_t ** __vlib_mains = 0, *this_vlib_main;     \
+  int ii;                                               \
+                                                        \
+  for (ii = 0; ii < vec_len (vlib_mains); ii++)         \
+    {                                                   \
+      this_vlib_main = vlib_mains[ii];                  \
+      if (this_vlib_main)                               \
+        vec_add1 (__vlib_mains, this_vlib_main);        \
+    }                                                   \
+                                                        \
+  for (ii = 0; ii < vec_len (__vlib_mains); ii++)       \
+    {                                                   \
+      this_vlib_main = __vlib_mains[ii];                \
+      /* body uses this_vlib_main... */                 \
+      (body);                                           \
+    }                                                   \
+  vec_free (__vlib_mains);                              \
 } while (0);
 
 #define foreach_sched_policy \
diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index 2cbeb63c..87a56121 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -252,11 +252,13 @@ void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q);
 vl_api_trace_t *vl_msg_api_trace_get (api_main_t * am,
 				      vl_api_trace_which_t which);
 
+void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
+void vl_msg_api_barrier_release (void) __attribute__ ((weak));
 void vl_msg_api_free (void *);
 void vl_noop_handler (void *mp);
-clib_error_t *vl_api_init (vlib_main_t * vm);
 void vl_msg_api_increment_missing_client_counter (void);
 void vl_msg_api_post_mortem_dump (void);
+void vl_msg_api_post_mortem_dump_enable_disable (int enable);
 void vl_msg_api_register_pd_handler (void *handler,
 				     u16 msg_id_host_byte_order);
 int vl_msg_api_pd_handler (void *mp, int rv);
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index 69ba10c1..6774e3dd 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -23,11 +23,6 @@
 #include <stdlib.h>
 #include <stddef.h>
 #include <string.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
 #include <vppinfra/format.h>
 #include <vppinfra/byte_order.h>
 #include <vppinfra/error.h>
@@ -36,19 +31,14 @@
 #include <vlibapi/api.h>
 #include <vppinfra/elog.h>
 
-api_main_t api_main;
-
-void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
-void
-vl_msg_api_barrier_sync (void)
-{
-}
-
-void vl_msg_api_barrier_release (void) __attribute__ ((weak));
-void
-vl_msg_api_barrier_release (void)
-{
-}
+/* *INDENT-OFF* */
+api_main_t api_main =
+  {
+    .region_name = "/unset",
+    .api_uid = -1,
+    .api_gid = -1,
+  };
+/* *INDENT-ON* */
 
 void
 vl_msg_api_increment_missing_client_counter (void)
@@ -57,14 +47,6 @@ vl_msg_api_increment_missing_client_counter (void)
   am->missing_clients++;
 }
 
-typedef enum
-{
-  DUMP,
-  CUSTOM_DUMP,
-  REPLAY,
-  INITIALIZERS,
-} vl_api_replay_t;
-
 int
 vl_msg_api_rx_trace_enabled (api_main_t * am)
 {
@@ -397,6 +379,16 @@ vl_msg_api_trace_configure (api_main_t * am, vl_api_trace_which_t which,
   return 0;
 }
 
+void
+vl_msg_api_barrier_sync (void)
+{
+}
+
+void
+vl_msg_api_barrier_release (void)
+{
+}
+
 always_inline void
 msg_handler_internal (api_main_t * am,
 		      void *the_msg, int trace_it, int do_it, int free_it)
@@ -748,495 +740,15 @@ vl_noop_handler (void *mp)
 {
 }
 
-clib_error_t *
-vl_api_init (vlib_main_t * vm)
-{
-  static u8 once;
-  api_main_t *am = &api_main;
-
-  if (once)
-    return 0;
-
-  once = 1;
-
-  am->region_name = "/unset";
-  /*
-   * Eventually passed to fchown, -1 => "current user"
-   * instead of 0 => "root". A very fine disctinction at best.
-   */
-  if (am->api_uid == 0)
-    am->api_uid = -1;
-  if (am->api_gid == 0)
-    am->api_gid = -1;
-
-  return (0);
-}
-
-void vl_msg_api_custom_dump_configure (api_main_t * am)
-  __attribute__ ((weak));
-void
-vl_msg_api_custom_dump_configure (api_main_t * am)
-{
-}
-
-VLIB_INIT_FUNCTION (vl_api_init);
-
-static void
-vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
-			 u32 first_index, u32 last_index,
-			 vl_api_replay_t which)
-{
-  vl_api_trace_file_header_t *hp;
-  int i, fd;
-  struct stat statb;
-  size_t file_size;
-  u8 *msg;
-  u8 endian_swap_needed = 0;
-  api_main_t *am = &api_main;
-  u8 *tmpbuf = 0;
-  u32 nitems;
-  void **saved_print_handlers = 0;
-
-  fd = open ((char *) filename, O_RDONLY);
-
-  if (fd < 0)
-    {
-      vlib_cli_output (vm, "Couldn't open %s\n", filename);
-      return;
-    }
-
-  if (fstat (fd, &statb) < 0)
-    {
-      vlib_cli_output (vm, "Couldn't stat %s\n", filename);
-      close (fd);
-      return;
-    }
-
-  if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp)))
-    {
-      vlib_cli_output (vm, "File not plausible: %s\n", filename);
-      close (fd);
-      return;
-    }
-
-  file_size = statb.st_size;
-  file_size = (file_size + 4095) & ~(4096);
-
-  hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
-
-  if (hp == (vl_api_trace_file_header_t *) MAP_FAILED)
-    {
-      vlib_cli_output (vm, "mmap failed: %s\n", filename);
-      close (fd);
-      return;
-    }
-  close (fd);
-
-  if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN)
-      || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN))
-    endian_swap_needed = 1;
-
-  if (endian_swap_needed)
-    nitems = ntohl (hp->nitems);
-  else
-    nitems = hp->nitems;
-
-  if (last_index == (u32) ~ 0)
-    {
-      last_index = nitems - 1;
-    }
-
-  if (first_index >= nitems || last_index >= nitems)
-    {
-      vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n",
-		       first_index, last_index, nitems - 1);
-      munmap (hp, file_size);
-      return;
-    }
-  if (hp->wrapped)
-    vlib_cli_output (vm,
-		     "Note: wrapped/incomplete trace, results may vary\n");
-
-  if (which == CUSTOM_DUMP)
-    {
-      saved_print_handlers = (void **) vec_dup (am->msg_print_handlers);
-      vl_msg_api_custom_dump_configure (am);
-    }
-
-
-  msg = (u8 *) (hp + 1);
-
-  for (i = 0; i < first_index; i++)
-    {
-      trace_cfg_t *cfgp;
-      int size;
-      u16 msg_id;
-
-      size = clib_host_to_net_u32 (*(u32 *) msg);
-      msg += sizeof (u32);
-
-      if (clib_arch_is_little_endian)
-	msg_id = ntohs (*((u16 *) msg));
-      else
-	msg_id = *((u16 *) msg);
-
-      cfgp = am->api_trace_cfg + msg_id;
-      if (!cfgp)
-	{
-	  vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
-	  munmap (hp, file_size);
-	  return;
-	}
-      msg += size;
-    }
-
-  if (which == REPLAY)
-    am->replay_in_progress = 1;
-
-  for (; i <= last_index; i++)
-    {
-      trace_cfg_t *cfgp;
-      u16 *msg_idp;
-      u16 msg_id;
-      int size;
-
-      if (which == DUMP)
-	vlib_cli_output (vm, "---------- trace %d -----------\n", i);
-
-      size = clib_host_to_net_u32 (*(u32 *) msg);
-      msg += sizeof (u32);
-
-      if (clib_arch_is_little_endian)
-	msg_id = ntohs (*((u16 *) msg));
-      else
-	msg_id = *((u16 *) msg);
-
-      cfgp = am->api_trace_cfg + msg_id;
-      if (!cfgp)
-	{
-	  vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
-	  munmap (hp, file_size);
-	  vec_free (tmpbuf);
-	  am->replay_in_progress = 0;
-	  return;
-	}
-
-      /* Copy the buffer (from the read-only mmap'ed file) */
-      vec_validate (tmpbuf, size - 1 + sizeof (uword));
-      clib_memcpy (tmpbuf + sizeof (uword), msg, size);
-      memset (tmpbuf, 0xf, sizeof (uword));
-
-      /*
-       * Endian swap if needed. All msg data is supposed to be
-       * in network byte order. All msg handlers are supposed to
-       * know that. The generic message dumpers don't know that.
-       * One could fix apigen, I suppose.
-       */
-      if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed)
-	{
-	  void (*endian_fp) (void *);
-	  if (msg_id >= vec_len (am->msg_endian_handlers)
-	      || (am->msg_endian_handlers[msg_id] == 0))
-	    {
-	      vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id);
-	      munmap (hp, file_size);
-	      vec_free (tmpbuf);
-	      am->replay_in_progress = 0;
-	      return;
-	    }
-	  endian_fp = am->msg_endian_handlers[msg_id];
-	  (*endian_fp) (tmpbuf + sizeof (uword));
-	}
-
-      /* msg_id always in network byte order */
-      if (clib_arch_is_little_endian)
-	{
-	  msg_idp = (u16 *) (tmpbuf + sizeof (uword));
-	  *msg_idp = msg_id;
-	}
-
-      switch (which)
-	{
-	case CUSTOM_DUMP:
-	case DUMP:
-	  if (msg_id < vec_len (am->msg_print_handlers) &&
-	      am->msg_print_handlers[msg_id])
-	    {
-	      u8 *(*print_fp) (void *, void *);
-
-	      print_fp = (void *) am->msg_print_handlers[msg_id];
-	      (*print_fp) (tmpbuf + sizeof (uword), vm);
-	    }
-	  else
-	    {
-	      vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n",
-			       msg_id);
-	      break;
-	    }
-	  break;
-
-	case INITIALIZERS:
-	  if (msg_id < vec_len (am->msg_print_handlers) &&
-	      am->msg_print_handlers[msg_id])
-	    {
-	      u8 *s;
-	      int j;
-	      u8 *(*print_fp) (void *, void *);
-
-	      print_fp = (void *) am->msg_print_handlers[msg_id];
-
-	      vlib_cli_output (vm, "/*");
-
-	      (*print_fp) (tmpbuf + sizeof (uword), vm);
-	      vlib_cli_output (vm, "*/\n");
-
-	      s = format (0, "static u8 * vl_api_%s_%d[%d] = {",
-			  am->msg_names[msg_id], i,
-			  am->api_trace_cfg[msg_id].size);
-
-	      for (j = 0; j < am->api_trace_cfg[msg_id].size; j++)
-		{
-		  if ((j & 7) == 0)
-		    s = format (s, "\n    ");
-		  s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]);
-		}
-	      s = format (s, "\n};\n%c", 0);
-	      vlib_cli_output (vm, (char *) s);
-	      vec_free (s);
-	    }
-	  break;
-
-	case REPLAY:
-	  if (msg_id < vec_len (am->msg_print_handlers) &&
-	      am->msg_print_handlers[msg_id] && cfgp->replay_enable)
-	    {
-	      void (*handler) (void *);
-
-	      handler = (void *) am->msg_handlers[msg_id];
-
-	      if (!am->is_mp_safe[msg_id])
-		vl_msg_api_barrier_sync ();
-	      (*handler) (tmpbuf + sizeof (uword));
-	      if (!am->is_mp_safe[msg_id])
-		vl_msg_api_barrier_release ();
-	    }
-	  else
-	    {
-	      if (cfgp->replay_enable)
-		vlib_cli_output (vm, "Skipping msg id %d: no handler\n",
-				 msg_id);
-	      break;
-	    }
-	  break;
-	}
-
-      _vec_len (tmpbuf) = 0;
-      msg += size;
-    }
-
-  if (saved_print_handlers)
-    {
-      clib_memcpy (am->msg_print_handlers, saved_print_handlers,
-		   vec_len (am->msg_print_handlers) * sizeof (void *));
-      vec_free (saved_print_handlers);
-    }
-
-  munmap (hp, file_size);
-  vec_free (tmpbuf);
-  am->replay_in_progress = 0;
-}
-
-u8 *
-format_vl_msg_api_trace_status (u8 * s, va_list * args)
-{
-  api_main_t *am = va_arg (*args, api_main_t *);
-  vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t);
-  vl_api_trace_t *tp;
-  char *trace_name;
-
-  switch (which)
-    {
-    case VL_API_TRACE_TX:
-      tp = am->tx_trace;
-      trace_name = "TX trace";
-      break;
-
-    case VL_API_TRACE_RX:
-      tp = am->rx_trace;
-      trace_name = "RX trace";
-      break;
-
-    default:
-      abort ();
-    }
-
-  if (tp == 0)
-    {
-      s = format (s, "%s: not yet configured.\n", trace_name);
-      return s;
-    }
-
-  s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n",
-	      trace_name, vec_len (tp->traces), tp->nitems,
-	      tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not");
-  return s;
-}
 
 static u8 post_mortem_dump_enabled;
 
-static clib_error_t *
-api_trace_command_fn (vlib_main_t * vm,
-		      unformat_input_t * input, vlib_cli_command_t * cmd)
-{
-  u32 nitems = 256 << 10;
-  api_main_t *am = &api_main;
-  vl_api_trace_which_t which = VL_API_TRACE_RX;
-  u8 *filename;
-  u32 first = 0;
-  u32 last = (u32) ~ 0;
-  FILE *fp;
-  int rv;
-
-  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
-    {
-      if (unformat (input, "on") || unformat (input, "enable"))
-	{
-	  if (unformat (input, "nitems %d", &nitems))
-	    ;
-	  vl_msg_api_trace_configure (am, which, nitems);
-	  vl_msg_api_trace_onoff (am, which, 1 /* on */ );
-	}
-      else if (unformat (input, "off"))
-	{
-	  vl_msg_api_trace_onoff (am, which, 0);
-	}
-      else if (unformat (input, "save %s", &filename))
-	{
-	  u8 *chroot_filename;
-	  if (strstr ((char *) filename, "..")
-	      || index ((char *) filename, '/'))
-	    {
-	      vlib_cli_output (vm, "illegal characters in filename '%s'",
-			       filename);
-	      return 0;
-	    }
-
-	  chroot_filename = format (0, "/tmp/%s%c", filename, 0);
-
-	  vec_free (filename);
-
-	  fp = fopen ((char *) chroot_filename, "w");
-	  if (fp == NULL)
-	    {
-	      vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename);
-	      return 0;
-	    }
-	  rv = vl_msg_api_trace_save (am, which, fp);
-	  fclose (fp);
-	  if (rv == -1)
-	    vlib_cli_output (vm, "API Trace data not present\n");
-	  else if (rv == -2)
-	    vlib_cli_output (vm, "File for writing is closed\n");
-	  else if (rv == -10)
-	    vlib_cli_output (vm, "Error while writing header to file\n");
-	  else if (rv == -11)
-	    vlib_cli_output (vm, "Error while writing trace to file\n");
-	  else if (rv == -12)
-	    vlib_cli_output (vm,
-			     "Error while writing end of buffer trace to file\n");
-	  else if (rv == -13)
-	    vlib_cli_output (vm,
-			     "Error while writing start of buffer trace to file\n");
-	  else if (rv < 0)
-	    vlib_cli_output (vm, "Unkown error while saving: %d", rv);
-	  else
-	    vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename);
-	  vec_free (chroot_filename);
-	}
-      else if (unformat (input, "dump %s", &filename))
-	{
-	  vl_msg_api_process_file (vm, filename, first, last, DUMP);
-	}
-      else if (unformat (input, "custom-dump %s", &filename))
-	{
-	  vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP);
-	}
-      else if (unformat (input, "replay %s", &filename))
-	{
-	  vl_msg_api_process_file (vm, filename, first, last, REPLAY);
-	}
-      else if (unformat (input, "initializers %s", &filename))
-	{
-	  vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS);
-	}
-      else if (unformat (input, "tx"))
-	{
-	  which = VL_API_TRACE_TX;
-	}
-      else if (unformat (input, "first %d", &first))
-	{
-	  ;
-	}
-      else if (unformat (input, "last %d", &last))
-	{
-	  ;
-	}
-      else if (unformat (input, "status"))
-	{
-	  vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status,
-			   am, which);
-	}
-      else if (unformat (input, "free"))
-	{
-	  vl_msg_api_trace_onoff (am, which, 0);
-	  vl_msg_api_trace_free (am, which);
-	}
-      else if (unformat (input, "post-mortem-on"))
-	post_mortem_dump_enabled = 1;
-      else if (unformat (input, "post-mortem-off"))
-	post_mortem_dump_enabled = 0;
-      else
-	return clib_error_return (0, "unknown input `%U'",
-				  format_unformat_error, input);
-    }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (api_trace_command, static) = {
-    .path = "api trace",
-    .short_help =
-    "api trace [on|off][dump|save|replay <file>][status][free][post-mortem-on]",
-    .function = api_trace_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-api_config_fn (vlib_main_t * vm, unformat_input_t * input)
+void
+vl_msg_api_post_mortem_dump_enable_disable (int enable)
 {
-  u32 nitems = 256 << 10;
-  vl_api_trace_which_t which = VL_API_TRACE_RX;
-  api_main_t *am = &api_main;
-
-  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
-    {
-      if (unformat (input, "on") || unformat (input, "enable"))
-	{
-	  if (unformat (input, "nitems %d", &nitems))
-	    ;
-	  vl_msg_api_trace_configure (am, which, nitems);
-	  vl_msg_api_trace_onoff (am, which, 1 /* on */ );
-	  post_mortem_dump_enabled = 1;
-	}
-      else
-	return clib_error_return (0, "unknown input `%U'",
-				  format_unformat_error, input);
-    }
-  return 0;
+  post_mortem_dump_enabled = enable;
 }
 
-VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace");
-
 void
 vl_msg_api_post_mortem_dump (void)
 {
diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c
index 4dc1a7d2..50e5c41c 100644
--- a/src/vlibapi/node_serialize.c
+++ b/src/vlibapi/node_serialize.c
@@ -73,16 +73,11 @@ vlib_node_serialize (vlib_node_main_t * nm, u8 * vector,
 
   if (vec_len (stat_vms) == 0)
     {
-      if (vec_len (vlib_mains) == 0)
-	vec_add1 (stat_vms, vm);
-      else
+      for (i = 0; i < vec_len (vlib_mains); i++)
 	{
-	  for (i = 0; i < vec_len (vlib_mains); i++)
-	    {
-	      stat_vm = vlib_mains[i];
-	      if (stat_vm)
-		vec_add1 (stat_vms, stat_vm);
-	    }
+	  stat_vm = vlib_mains[i];
+	  if (stat_vm)
+	    vec_add1 (stat_vms, stat_vm);
 	}
     }
 
@@ -286,7 +281,7 @@ vlib_node_unserialize (u8 * vector)
   return nodes_by_thread;
 }
 
-#if CLIB_DEBUG > 0
+#if TEST_CODE
 
 static clib_error_t *
 test_node_serialize_command_fn (vlib_main_t * vm,
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 3a7415c0..d2e05968 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -22,6 +22,8 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include <signal.h>
 #include <pthread.h>
 #include <vppinfra/vec.h>
@@ -1437,6 +1439,475 @@ rpc_api_hookup (vlib_main_t * vm)
 
 VLIB_API_INIT_FUNCTION (rpc_api_hookup);
 
+typedef enum
+{
+  DUMP,
+  CUSTOM_DUMP,
+  REPLAY,
+  INITIALIZERS,
+} vl_api_replay_t;
+
+u8 *
+format_vl_msg_api_trace_status (u8 * s, va_list * args)
+{
+  api_main_t *am = va_arg (*args, api_main_t *);
+  vl_api_trace_which_t which = va_arg (*args, vl_api_trace_which_t);
+  vl_api_trace_t *tp;
+  char *trace_name;
+
+  switch (which)
+    {
+    case VL_API_TRACE_TX:
+      tp = am->tx_trace;
+      trace_name = "TX trace";
+      break;
+
+    case VL_API_TRACE_RX:
+      tp = am->rx_trace;
+      trace_name = "RX trace";
+      break;
+
+    default:
+      abort ();
+    }
+
+  if (tp == 0)
+    {
+      s = format (s, "%s: not yet configured.\n", trace_name);
+      return s;
+    }
+
+  s = format (s, "%s: used %d of %d items, %s enabled, %s wrapped\n",
+	      trace_name, vec_len (tp->traces), tp->nitems,
+	      tp->enabled ? "is" : "is not", tp->wrapped ? "has" : "has not");
+  return s;
+}
+
+void vl_msg_api_custom_dump_configure (api_main_t * am)
+  __attribute__ ((weak));
+void
+vl_msg_api_custom_dump_configure (api_main_t * am)
+{
+}
+
+static void
+vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
+			 u32 first_index, u32 last_index,
+			 vl_api_replay_t which)
+{
+  vl_api_trace_file_header_t *hp;
+  int i, fd;
+  struct stat statb;
+  size_t file_size;
+  u8 *msg;
+  u8 endian_swap_needed = 0;
+  api_main_t *am = &api_main;
+  u8 *tmpbuf = 0;
+  u32 nitems;
+  void **saved_print_handlers = 0;
+
+  fd = open ((char *) filename, O_RDONLY);
+
+  if (fd < 0)
+    {
+      vlib_cli_output (vm, "Couldn't open %s\n", filename);
+      return;
+    }
+
+  if (fstat (fd, &statb) < 0)
+    {
+      vlib_cli_output (vm, "Couldn't stat %s\n", filename);
+      close (fd);
+      return;
+    }
+
+  if (!(statb.st_mode & S_IFREG) || (statb.st_size < sizeof (*hp)))
+    {
+      vlib_cli_output (vm, "File not plausible: %s\n", filename);
+      close (fd);
+      return;
+    }
+
+  file_size = statb.st_size;
+  file_size = (file_size + 4095) & ~(4096);
+
+  hp = mmap (0, file_size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+  if (hp == (vl_api_trace_file_header_t *) MAP_FAILED)
+    {
+      vlib_cli_output (vm, "mmap failed: %s\n", filename);
+      close (fd);
+      return;
+    }
+  close (fd);
+
+  if ((clib_arch_is_little_endian && hp->endian == VL_API_BIG_ENDIAN)
+      || (clib_arch_is_big_endian && hp->endian == VL_API_LITTLE_ENDIAN))
+    endian_swap_needed = 1;
+
+  if (endian_swap_needed)
+    nitems = ntohl (hp->nitems);
+  else
+    nitems = hp->nitems;
+
+  if (last_index == (u32) ~ 0)
+    {
+      last_index = nitems - 1;
+    }
+
+  if (first_index >= nitems || last_index >= nitems)
+    {
+      vlib_cli_output (vm, "Range (%d, %d) outside file range (0, %d)\n",
+		       first_index, last_index, nitems - 1);
+      munmap (hp, file_size);
+      return;
+    }
+  if (hp->wrapped)
+    vlib_cli_output (vm,
+		     "Note: wrapped/incomplete trace, results may vary\n");
+
+  if (which == CUSTOM_DUMP)
+    {
+      saved_print_handlers = (void **) vec_dup (am->msg_print_handlers);
+      vl_msg_api_custom_dump_configure (am);
+    }
+
+
+  msg = (u8 *) (hp + 1);
+
+  for (i = 0; i < first_index; i++)
+    {
+      trace_cfg_t *cfgp;
+      int size;
+      u16 msg_id;
+
+      size = clib_host_to_net_u32 (*(u32 *) msg);
+      msg += sizeof (u32);
+
+      if (clib_arch_is_little_endian)
+	msg_id = ntohs (*((u16 *) msg));
+      else
+	msg_id = *((u16 *) msg);
+
+      cfgp = am->api_trace_cfg + msg_id;
+      if (!cfgp)
+	{
+	  vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
+	  munmap (hp, file_size);
+	  return;
+	}
+      msg += size;
+    }
+
+  if (which == REPLAY)
+    am->replay_in_progress = 1;
+
+  for (; i <= last_index; i++)
+    {
+      trace_cfg_t *cfgp;
+      u16 *msg_idp;
+      u16 msg_id;
+      int size;
+
+      if (which == DUMP)
+	vlib_cli_output (vm, "---------- trace %d -----------\n", i);
+
+      size = clib_host_to_net_u32 (*(u32 *) msg);
+      msg += sizeof (u32);
+
+      if (clib_arch_is_little_endian)
+	msg_id = ntohs (*((u16 *) msg));
+      else
+	msg_id = *((u16 *) msg);
+
+      cfgp = am->api_trace_cfg + msg_id;
+      if (!cfgp)
+	{
+	  vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
+	  munmap (hp, file_size);
+	  vec_free (tmpbuf);
+	  am->replay_in_progress = 0;
+	  return;
+	}
+
+      /* Copy the buffer (from the read-only mmap'ed file) */
+      vec_validate (tmpbuf, size - 1 + sizeof (uword));
+      clib_memcpy (tmpbuf + sizeof (uword), msg, size);
+      memset (tmpbuf, 0xf, sizeof (uword));
+
+      /*
+       * Endian swap if needed. All msg data is supposed to be
+       * in network byte order. All msg handlers are supposed to
+       * know that. The generic message dumpers don't know that.
+       * One could fix apigen, I suppose.
+       */
+      if ((which == DUMP && clib_arch_is_little_endian) || endian_swap_needed)
+	{
+	  void (*endian_fp) (void *);
+	  if (msg_id >= vec_len (am->msg_endian_handlers)
+	      || (am->msg_endian_handlers[msg_id] == 0))
+	    {
+	      vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id);
+	      munmap (hp, file_size);
+	      vec_free (tmpbuf);
+	      am->replay_in_progress = 0;
+	      return;
+	    }
+	  endian_fp = am->msg_endian_handlers[msg_id];
+	  (*endian_fp) (tmpbuf + sizeof (uword));
+	}
+
+      /* msg_id always in network byte order */
+      if (clib_arch_is_little_endian)
+	{
+	  msg_idp = (u16 *) (tmpbuf + sizeof (uword));
+	  *msg_idp = msg_id;
+	}
+
+      switch (which)
+	{
+	case CUSTOM_DUMP:
+	case DUMP:
+	  if (msg_id < vec_len (am->msg_print_handlers) &&
+	      am->msg_print_handlers[msg_id])
+	    {
+	      u8 *(*print_fp) (void *, void *);
+
+	      print_fp = (void *) am->msg_print_handlers[msg_id];
+	      (*print_fp) (tmpbuf + sizeof (uword), vm);
+	    }
+	  else
+	    {
+	      vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n",
+			       msg_id);
+	      break;
+	    }
+	  break;
+
+	case INITIALIZERS:
+	  if (msg_id < vec_len (am->msg_print_handlers) &&
+	      am->msg_print_handlers[msg_id])
+	    {
+	      u8 *s;
+	      int j;
+	      u8 *(*print_fp) (void *, void *);
+
+	      print_fp = (void *) am->msg_print_handlers[msg_id];
+
+	      vlib_cli_output (vm, "/*");
+
+	      (*print_fp) (tmpbuf + sizeof (uword), vm);
+	      vlib_cli_output (vm, "*/\n");
+
+	      s = format (0, "static u8 * vl_api_%s_%d[%d] = {",
+			  am->msg_names[msg_id], i,
+			  am->api_trace_cfg[msg_id].size);
+
+	      for (j = 0; j < am->api_trace_cfg[msg_id].size; j++)
+		{
+		  if ((j & 7) == 0)
+		    s = format (s, "\n    ");
+		  s = format (s, "0x%02x,", tmpbuf[sizeof (uword) + j]);
+		}
+	      s = format (s, "\n};\n%c", 0);
+	      vlib_cli_output (vm, (char *) s);
+	      vec_free (s);
+	    }
+	  break;
+
+	case REPLAY:
+	  if (msg_id < vec_len (am->msg_print_handlers) &&
+	      am->msg_print_handlers[msg_id] && cfgp->replay_enable)
+	    {
+	      void (*handler) (void *);
+
+	      handler = (void *) am->msg_handlers[msg_id];
+
+	      if (!am->is_mp_safe[msg_id])
+		vl_msg_api_barrier_sync ();
+	      (*handler) (tmpbuf + sizeof (uword));
+	      if (!am->is_mp_safe[msg_id])
+		vl_msg_api_barrier_release ();
+	    }
+	  else
+	    {
+	      if (cfgp->replay_enable)
+		vlib_cli_output (vm, "Skipping msg id %d: no handler\n",
+				 msg_id);
+	      break;
+	    }
+	  break;
+	}
+
+      _vec_len (tmpbuf) = 0;
+      msg += size;
+    }
+
+  if (saved_print_handlers)
+    {
+      clib_memcpy (am->msg_print_handlers, saved_print_handlers,
+		   vec_len (am->msg_print_handlers) * sizeof (void *));
+      vec_free (saved_print_handlers);
+    }
+
+  munmap (hp, file_size);
+  vec_free (tmpbuf);
+  am->replay_in_progress = 0;
+}
+
+static clib_error_t *
+api_trace_command_fn (vlib_main_t * vm,
+		      unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  u32 nitems = 256 << 10;
+  api_main_t *am = &api_main;
+  vl_api_trace_which_t which = VL_API_TRACE_RX;
+  u8 *filename;
+  u32 first = 0;
+  u32 last = (u32) ~ 0;
+  FILE *fp;
+  int rv;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "on") || unformat (input, "enable"))
+	{
+	  if (unformat (input, "nitems %d", &nitems))
+	    ;
+	  vl_msg_api_trace_configure (am, which, nitems);
+	  vl_msg_api_trace_onoff (am, which, 1 /* on */ );
+	}
+      else if (unformat (input, "off"))
+	{
+	  vl_msg_api_trace_onoff (am, which, 0);
+	}
+      else if (unformat (input, "save %s", &filename))
+	{
+	  u8 *chroot_filename;
+	  if (strstr ((char *) filename, "..")
+	      || index ((char *) filename, '/'))
+	    {
+	      vlib_cli_output (vm, "illegal characters in filename '%s'",
+			       filename);
+	      return 0;
+	    }
+
+	  chroot_filename = format (0, "/tmp/%s%c", filename, 0);
+
+	  vec_free (filename);
+
+	  fp = fopen ((char *) chroot_filename, "w");
+	  if (fp == NULL)
+	    {
+	      vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename);
+	      return 0;
+	    }
+	  rv = vl_msg_api_trace_save (am, which, fp);
+	  fclose (fp);
+	  if (rv == -1)
+	    vlib_cli_output (vm, "API Trace data not present\n");
+	  else if (rv == -2)
+	    vlib_cli_output (vm, "File for writing is closed\n");
+	  else if (rv == -10)
+	    vlib_cli_output (vm, "Error while writing header to file\n");
+	  else if (rv == -11)
+	    vlib_cli_output (vm, "Error while writing trace to file\n");
+	  else if (rv == -12)
+	    vlib_cli_output (vm,
+			     "Error while writing end of buffer trace to file\n");
+	  else if (rv == -13)
+	    vlib_cli_output (vm,
+			     "Error while writing start of buffer trace to file\n");
+	  else if (rv < 0)
+	    vlib_cli_output (vm, "Unkown error while saving: %d", rv);
+	  else
+	    vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename);
+	  vec_free (chroot_filename);
+	}
+      else if (unformat (input, "dump %s", &filename))
+	{
+	  vl_msg_api_process_file (vm, filename, first, last, DUMP);
+	}
+      else if (unformat (input, "custom-dump %s", &filename))
+	{
+	  vl_msg_api_process_file (vm, filename, first, last, CUSTOM_DUMP);
+	}
+      else if (unformat (input, "replay %s", &filename))
+	{
+	  vl_msg_api_process_file (vm, filename, first, last, REPLAY);
+	}
+      else if (unformat (input, "initializers %s", &filename))
+	{
+	  vl_msg_api_process_file (vm, filename, first, last, INITIALIZERS);
+	}
+      else if (unformat (input, "tx"))
+	{
+	  which = VL_API_TRACE_TX;
+	}
+      else if (unformat (input, "first %d", &first))
+	{
+	  ;
+	}
+      else if (unformat (input, "last %d", &last))
+	{
+	  ;
+	}
+      else if (unformat (input, "status"))
+	{
+	  vlib_cli_output (vm, "%U", format_vl_msg_api_trace_status,
+			   am, which);
+	}
+      else if (unformat (input, "free"))
+	{
+	  vl_msg_api_trace_onoff (am, which, 0);
+	  vl_msg_api_trace_free (am, which);
+	}
+      else if (unformat (input, "post-mortem-on"))
+	vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ );
+      else if (unformat (input, "post-mortem-off"))
+	vl_msg_api_post_mortem_dump_enable_disable (0 /* enable */ );
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (api_trace_command, static) = {
+    .path = "api trace",
+    .short_help =
+    "api trace [on|off][dump|save|replay <file>][status][free][post-mortem-on]",
+    .function = api_trace_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+api_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+  u32 nitems = 256 << 10;
+  vl_api_trace_which_t which = VL_API_TRACE_RX;
+  api_main_t *am = &api_main;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "on") || unformat (input, "enable"))
+	{
+	  if (unformat (input, "nitems %d", &nitems))
+	    ;
+	  vl_msg_api_trace_configure (am, which, nitems);
+	  vl_msg_api_trace_onoff (am, which, 1 /* on */ );
+	  vl_msg_api_post_mortem_dump_enable_disable (1 /* enable */ );
+	}
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+  return 0;
+}
+
+VLIB_CONFIG_FUNCTION (api_config_fn, "api-trace");
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index b6b4c04a..100ec613 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -374,8 +374,7 @@ vhost_user_rx_thread_placement ()
   for (i = vum->input_cpu_first_index;
        i < vum->input_cpu_first_index + vum->input_cpu_count; i++)
     {
-      vlib_node_set_state (vlib_mains ? vlib_mains[i] : &vlib_global_main,
-			   vhost_user_input_node.index,
+      vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index,
 			   VLIB_NODE_STATE_DISABLED);
       vec_add1 (workers, i);
     }
@@ -406,9 +405,9 @@ vhost_user_rx_thread_placement ()
 	  iaq.qid = qid;
 	  iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
 	  vec_add1 (vhc->rx_queues, iaq);
-	  vlib_node_set_state (vlib_mains ? vlib_mains[cpu_index] :
-	      &vlib_global_main, vhost_user_input_node.index,
-	      VLIB_NODE_STATE_POLLING);
+	  vlib_node_set_state (vlib_mains[cpu_index],
+                               vhost_user_input_node.index,
+                               VLIB_NODE_STATE_POLLING);
 	}
   });
   /* *INDENT-ON* */
diff --git a/src/vpp-api-test.am b/src/vpp-api-test.am
index f0d5df62..ceab687c 100644
--- a/src/vpp-api-test.am
+++ b/src/vpp-api-test.am
@@ -34,14 +34,12 @@ vpp_json_test_SOURCES = \
   vat/json_test.c
 
 vpp_api_test_LDADD = \
-  libvlib.la				\
   libvlibmemoryclient.la		\
   libsvm.la				\
   libvatplugin.la			\
   libvppinfra.la 			\
   libvlibapi.la				\
   libvlibmemory.la			\
-  libvnet.la				\
   -lpthread -lm -lrt -ldl -lcrypto
 
 vpp_api_test_LDFLAGS = -Wl,--export-dynamic
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
index 828394ed..c85dc680 100644
--- a/src/vpp/api/api.c
+++ b/src/vpp/api/api.c
@@ -2143,7 +2143,6 @@ vpe_api_init (vlib_main_t * vm)
   am->oam_events_registration_hash = hash_create (0, sizeof (uword));
   am->bfd_events_registration_hash = hash_create (0, sizeof (uword));
 
-  vl_api_init (vm);
   vl_set_memory_region_name ("/vpe-api");
   vl_enable_disable_memory_api (vm, 1 /* enable it */ );
 
diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c
index 610f40ed..277be8c0 100644
--- a/src/vpp/api/gmon.c
+++ b/src/vpp/api/gmon.c
@@ -122,13 +122,8 @@ gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
   /* Initial wait for the world to settle down */
   vlib_process_suspend (vm, 5.0);
 
-  if (vec_len (vlib_mains) == 0)
-    vec_add1 (gm->my_vlib_mains, &vlib_global_main);
-  else
-    {
-      for (i = 0; i < vec_len (vlib_mains); i++)
-	vec_add1 (gm->my_vlib_mains, vlib_mains[i]);
-    }
+  for (i = 0; i < vec_len (vlib_mains); i++)
+    vec_add1 (gm->my_vlib_mains, vlib_mains[i]);
 
   while (1)
     {
-- 
cgit 1.2.3-korg


From e9d52d54361296af520e1ece0c25307a2d86c018 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Thu, 9 Mar 2017 15:42:26 +0100
Subject: vlib: deduplicatee code in main and worker main loop

Change-Id: Id18d59c9442602633a6310b2001a95bce8b6b232
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/vlib/main.c    | 176 +++++++++++++++++++++++++++++++++--------------------
 src/vlib/main.h    |   2 +
 src/vlib/threads.c |  76 +----------------------
 src/vlib/threads.h |   4 +-
 4 files changed, 116 insertions(+), 142 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/vlib/main.c b/src/vlib/main.c
index 09f34bbd..91760706 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -1398,51 +1398,75 @@ dispatch_suspended_process (vlib_main_t * vm,
   return t;
 }
 
-static void
-vlib_main_loop (vlib_main_t * vm)
+static_always_inline void
+vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
 {
   vlib_node_main_t *nm = &vm->node_main;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
   uword i;
   u64 cpu_time_now;
+  vlib_frame_queue_main_t *fqm;
 
   /* Initialize pending node vector. */
-  vec_resize (nm->pending_frames, 32);
-  _vec_len (nm->pending_frames) = 0;
+  if (is_main)
+    {
+      vec_resize (nm->pending_frames, 32);
+      _vec_len (nm->pending_frames) = 0;
+    }
 
   /* Mark time of main loop start. */
-  cpu_time_now = vm->clib_time.last_cpu_time;
-  vm->cpu_time_main_loop_start = cpu_time_now;
+  if (is_main)
+    {
+      cpu_time_now = vm->clib_time.last_cpu_time;
+      vm->cpu_time_main_loop_start = cpu_time_now;
+    }
+  else
+    cpu_time_now = clib_cpu_time_now ();
 
   /* Arrange for first level of timing wheel to cover times we care
      most about. */
-  nm->timing_wheel.min_sched_time = 10e-6;
-  nm->timing_wheel.max_sched_time = 10e-3;
-  timing_wheel_init (&nm->timing_wheel,
-		     cpu_time_now, vm->clib_time.clocks_per_second);
+  if (is_main)
+    {
+      nm->timing_wheel.min_sched_time = 10e-6;
+      nm->timing_wheel.max_sched_time = 10e-3;
+      timing_wheel_init (&nm->timing_wheel,
+			 cpu_time_now, vm->clib_time.clocks_per_second);
+      vec_alloc (nm->data_from_advancing_timing_wheel, 32);
+    }
 
   /* Pre-allocate expired nodes. */
-  vec_alloc (nm->data_from_advancing_timing_wheel, 32);
   vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
 
-  if (!nm->polling_threshold_vector_length)
-    nm->polling_threshold_vector_length = 10;
-  if (!nm->interrupt_threshold_vector_length)
-    nm->interrupt_threshold_vector_length = 5;
+  if (is_main)
+    {
+      if (!nm->polling_threshold_vector_length)
+	nm->polling_threshold_vector_length = 10;
+      if (!nm->interrupt_threshold_vector_length)
+	nm->interrupt_threshold_vector_length = 5;
 
-  nm->current_process_index = ~0;
+      nm->current_process_index = ~0;
+    }
 
   /* Start all processes. */
-  {
-    uword i;
-    for (i = 0; i < vec_len (nm->processes); i++)
-      cpu_time_now =
-	dispatch_process (vm, nm->processes[i], /* frame */ 0, cpu_time_now);
-  }
+  if (is_main)
+    {
+      uword i;
+      for (i = 0; i < vec_len (nm->processes); i++)
+	cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0,
+					 cpu_time_now);
+    }
 
   while (1)
     {
       vlib_node_runtime_t *n;
 
+      if (!is_main)
+	{
+	  vlib_worker_thread_barrier_check ();
+	  vec_foreach (fqm, tm->frame_queue_mains)
+	    vlib_frame_queue_dequeue (vm, fqm);
+	}
+
       /* Process pre-input nodes. */
       vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
 	cpu_time_now = dispatch_node (vm, n,
@@ -1459,7 +1483,7 @@ vlib_main_loop (vlib_main_t * vm)
 				      /* frame */ 0,
 				      cpu_time_now);
 
-      if (PREDICT_TRUE (vm->queue_signal_pending == 0))
+      if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0))
 	vm->queue_signal_callback (vm);
 
       /* Next handle interrupts. */
@@ -1484,58 +1508,64 @@ vlib_main_loop (vlib_main_t * vm)
 	  }
       }
 
-      /* Check if process nodes have expired from timing wheel. */
-      nm->data_from_advancing_timing_wheel
-	= timing_wheel_advance (&nm->timing_wheel, cpu_time_now,
-				nm->data_from_advancing_timing_wheel,
-				&nm->cpu_time_next_process_ready);
-
-      ASSERT (nm->data_from_advancing_timing_wheel != 0);
-      if (PREDICT_FALSE (_vec_len (nm->data_from_advancing_timing_wheel) > 0))
+      if (is_main)
 	{
-	  uword i;
-
-	processes_timing_wheel_data:
-	  for (i = 0; i < _vec_len (nm->data_from_advancing_timing_wheel);
-	       i++)
+	  /* Check if process nodes have expired from timing wheel. */
+	  nm->data_from_advancing_timing_wheel
+	    = timing_wheel_advance (&nm->timing_wheel, cpu_time_now,
+				    nm->data_from_advancing_timing_wheel,
+				    &nm->cpu_time_next_process_ready);
+
+	  ASSERT (nm->data_from_advancing_timing_wheel != 0);
+	  if (PREDICT_FALSE
+	      (_vec_len (nm->data_from_advancing_timing_wheel) > 0))
 	    {
-	      u32 d = nm->data_from_advancing_timing_wheel[i];
-	      u32 di = vlib_timing_wheel_data_get_index (d);
+	      uword i;
 
-	      if (vlib_timing_wheel_data_is_timed_event (d))
+	    processes_timing_wheel_data:
+	      for (i = 0; i < _vec_len (nm->data_from_advancing_timing_wheel);
+		   i++)
 		{
-		  vlib_signal_timed_event_data_t *te =
-		    pool_elt_at_index (nm->signal_timed_event_data_pool, di);
-		  vlib_node_t *n = vlib_get_node (vm, te->process_node_index);
-		  vlib_process_t *p =
-		    vec_elt (nm->processes, n->runtime_index);
-		  void *data;
-		  data =
-		    vlib_process_signal_event_helper (nm, n, p,
-						      te->event_type_index,
-						      te->n_data_elts,
-						      te->n_data_elt_bytes);
-		  if (te->n_data_bytes < sizeof (te->inline_event_data))
-		    clib_memcpy (data, te->inline_event_data,
-				 te->n_data_bytes);
+		  u32 d = nm->data_from_advancing_timing_wheel[i];
+		  u32 di = vlib_timing_wheel_data_get_index (d);
+
+		  if (vlib_timing_wheel_data_is_timed_event (d))
+		    {
+		      vlib_signal_timed_event_data_t *te =
+			pool_elt_at_index (nm->signal_timed_event_data_pool,
+					   di);
+		      vlib_node_t *n =
+			vlib_get_node (vm, te->process_node_index);
+		      vlib_process_t *p =
+			vec_elt (nm->processes, n->runtime_index);
+		      void *data;
+		      data =
+			vlib_process_signal_event_helper (nm, n, p,
+							  te->event_type_index,
+							  te->n_data_elts,
+							  te->n_data_elt_bytes);
+		      if (te->n_data_bytes < sizeof (te->inline_event_data))
+			clib_memcpy (data, te->inline_event_data,
+				     te->n_data_bytes);
+		      else
+			{
+			  clib_memcpy (data, te->event_data_as_vector,
+				       te->n_data_bytes);
+			  vec_free (te->event_data_as_vector);
+			}
+		      pool_put (nm->signal_timed_event_data_pool, te);
+		    }
 		  else
 		    {
-		      clib_memcpy (data, te->event_data_as_vector,
-				   te->n_data_bytes);
-		      vec_free (te->event_data_as_vector);
+		      cpu_time_now = clib_cpu_time_now ();
+		      cpu_time_now =
+			dispatch_suspended_process (vm, di, cpu_time_now);
 		    }
-		  pool_put (nm->signal_timed_event_data_pool, te);
 		}
-	      else
-		{
-		  cpu_time_now = clib_cpu_time_now ();
-		  cpu_time_now =
-		    dispatch_suspended_process (vm, di, cpu_time_now);
-		}
-	    }
 
-	  /* Reset vector. */
-	  _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+	      /* Reset vector. */
+	      _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+	    }
 	}
 
       /* Input nodes may have added work to the pending vector.
@@ -1548,7 +1578,7 @@ vlib_main_loop (vlib_main_t * vm)
       _vec_len (nm->pending_frames) = 0;
 
       /* Pending internal nodes may resume processes. */
-      if (_vec_len (nm->data_from_advancing_timing_wheel) > 0)
+      if (is_main && _vec_len (nm->data_from_advancing_timing_wheel) > 0)
 	goto processes_timing_wheel_data;
 
       vlib_increment_main_loop_counter (vm);
@@ -1559,6 +1589,18 @@ vlib_main_loop (vlib_main_t * vm)
     }
 }
 
+static void
+vlib_main_loop (vlib_main_t * vm)
+{
+  vlib_main_or_worker_loop (vm, /* is_main */ 1);
+}
+
+void
+vlib_worker_loop (vlib_main_t * vm)
+{
+  vlib_main_or_worker_loop (vm, /* is_main */ 0);
+}
+
 vlib_main_t vlib_global_main;
 
 static clib_error_t *
diff --git a/src/vlib/main.h b/src/vlib/main.h
index d9ac1445..a6d50b39 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -178,6 +178,8 @@ typedef struct vlib_main_t
 /* Global main structure. */
 extern vlib_main_t vlib_global_main;
 
+void vlib_worker_loop (vlib_main_t * vm);
+
 always_inline f64
 vlib_time_now (vlib_main_t * vm)
 {
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 4676be97..07dbff33 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -1208,9 +1208,8 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
  * If so, pull the packets off the frames and put them to
  * the handoff node.
  */
-static inline int
-vlib_frame_queue_dequeue_internal (vlib_main_t * vm,
-				   vlib_frame_queue_main_t * fqm)
+int
+vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm)
 {
   u32 thread_id = vm->cpu_index;
   vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
@@ -1337,75 +1336,6 @@ vlib_frame_queue_dequeue_internal (vlib_main_t * vm,
   return processed;
 }
 
-static_always_inline void
-vlib_worker_thread_internal (vlib_main_t * vm)
-{
-  vlib_node_main_t *nm = &vm->node_main;
-  vlib_thread_main_t *tm = vlib_get_thread_main ();
-  u64 cpu_time_now = clib_cpu_time_now ();
-  vlib_frame_queue_main_t *fqm;
-
-  vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
-
-  while (1)
-    {
-      vlib_worker_thread_barrier_check ();
-
-      vec_foreach (fqm, tm->frame_queue_mains)
-	vlib_frame_queue_dequeue_internal (vm, fqm);
-
-      vlib_node_runtime_t *n;
-      vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
-      {
-	cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
-				      VLIB_NODE_STATE_POLLING, /* frame */ 0,
-				      cpu_time_now);
-      }
-
-      /* Next handle interrupts. */
-      {
-	uword l = _vec_len (nm->pending_interrupt_node_runtime_indices);
-	uword i;
-	if (l > 0)
-	  {
-	    _vec_len (nm->pending_interrupt_node_runtime_indices) = 0;
-	    for (i = 0; i < l; i++)
-	      {
-		n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
-				      nm->
-				      pending_interrupt_node_runtime_indices
-				      [i]);
-		cpu_time_now =
-		  dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
-				 VLIB_NODE_STATE_INTERRUPT,
-				 /* frame */ 0,
-				 cpu_time_now);
-	      }
-	  }
-      }
-
-      if (_vec_len (nm->pending_frames))
-	{
-	  int i;
-	  cpu_time_now = clib_cpu_time_now ();
-	  for (i = 0; i < _vec_len (nm->pending_frames); i++)
-	    {
-	      vlib_pending_frame_t *p;
-
-	      p = nm->pending_frames + i;
-
-	      cpu_time_now = dispatch_pending_node (vm, p, cpu_time_now);
-	    }
-	  _vec_len (nm->pending_frames) = 0;
-	}
-      vlib_increment_main_loop_counter (vm);
-
-      /* Record time stamp in case there are no enabled nodes and above
-         calls do not update time stamp. */
-      cpu_time_now = clib_cpu_time_now ();
-    }
-}
-
 void
 vlib_worker_thread_fn (void *arg)
 {
@@ -1423,7 +1353,7 @@ vlib_worker_thread_fn (void *arg)
   while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
     vlib_worker_thread_barrier_check ();
 
-  vlib_worker_thread_internal (vm);
+  vlib_worker_loop (vm);
 }
 
 /* *INDENT-OFF* */
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index a032311c..fc1633f6 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -159,8 +159,8 @@ int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
 			      u32 frame_queue_index, vlib_frame_t * frame,
 			      vlib_frame_queue_msg_type_t type);
 
-int vlib_frame_queue_dequeue (int thread_id,
-			      vlib_main_t * vm, vlib_node_main_t * nm);
+int
+vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm);
 
 u64 dispatch_node (vlib_main_t * vm,
 		   vlib_node_runtime_t * node,
-- 
cgit 1.2.3-korg


From 9a332e1639fbfb4eb4ddf47b1681e05493ae6da3 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Tue, 28 Mar 2017 15:11:20 +0200
Subject: vlib: inline dispatch_node(...) (again)

Worker main loop is now shared code with main thread
main loop so no need to export functions anymore.

Change-Id: I99ee2eee981c1b88ca31d20eabeb6c21d030a34d
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/vlib/main.c    | 4 ++--
 src/vlib/threads.h | 9 ---------
 2 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/vlib/main.c b/src/vlib/main.c
index 50f0b162..55a731f1 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -917,7 +917,7 @@ vlib_dump_context_trace (vlib_main_t * vm, u32 bi)
 }
 
 
-/* static_always_inline */ u64
+static_always_inline u64
 dispatch_node (vlib_main_t * vm,
 	       vlib_node_runtime_t * node,
 	       vlib_node_type_t type,
@@ -1093,7 +1093,7 @@ dispatch_node (vlib_main_t * vm,
   return t;
 }
 
-/* static */ u64
+static u64
 dispatch_pending_node (vlib_main_t * vm,
 		       vlib_pending_frame_t * p, u64 last_time_stamp)
 {
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index fc1633f6..39f64e1d 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -162,15 +162,6 @@ int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
 int
 vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm);
 
-u64 dispatch_node (vlib_main_t * vm,
-		   vlib_node_runtime_t * node,
-		   vlib_node_type_t type,
-		   vlib_node_state_t dispatch_state,
-		   vlib_frame_t * frame, u64 last_time_stamp);
-
-u64 dispatch_pending_node (vlib_main_t * vm,
-			   vlib_pending_frame_t * p, u64 last_time_stamp);
-
 void vlib_worker_thread_node_runtime_update (void);
 
 void vlib_create_worker_threads (vlib_main_t * vm, int n,
-- 
cgit 1.2.3-korg


From ce359db3b68528ce576862129b2a7709681ad2c6 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Thu, 16 Mar 2017 16:15:38 +0100
Subject: vlib: extend foreach_vlib_main macro to assert if workers are not
 parked

Change-Id: I6ff7b65a400734a47bc0a7d03faf86ef1cf4f8c8
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/vlib/main.h    |  3 +++
 src/vlib/threads.h | 36 ++++++++++++++++++++++++------------
 2 files changed, 27 insertions(+), 12 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/vlib/main.h b/src/vlib/main.h
index 98bc823d..0197b4f3 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -174,6 +174,9 @@ typedef struct vlib_main_t
   volatile u32 api_queue_nonempty;
   void (*queue_signal_callback) (struct vlib_main_t *);
   u8 **argv;
+
+  /* debugging */
+  volatile int parked_at_barrier;
 } vlib_main_t;
 
 /* Global main structure. */
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 39f64e1d..eca4fc26 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -201,18 +201,6 @@ typedef enum
 
 void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which);
 
-static inline void
-vlib_worker_thread_barrier_check (void)
-{
-  if (PREDICT_FALSE (*vlib_worker_threads->wait_at_barrier))
-    {
-      clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
-      while (*vlib_worker_threads->wait_at_barrier)
-	;
-      clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
-    }
-}
-
 #define foreach_vlib_main(body)                         \
 do {                                                    \
   vlib_main_t ** __vlib_mains = 0, *this_vlib_main;     \
@@ -221,6 +209,8 @@ do {                                                    \
   for (ii = 0; ii < vec_len (vlib_mains); ii++)         \
     {                                                   \
       this_vlib_main = vlib_mains[ii];                  \
+      ASSERT (ii == 0 ||                                \
+	      this_vlib_main->parked_at_barrier == 1);  \
       if (this_vlib_main)                               \
         vec_add1 (__vlib_mains, this_vlib_main);        \
     }                                                   \
@@ -320,6 +310,8 @@ typedef struct
 
 extern vlib_thread_main_t vlib_thread_main;
 
+#include <vlib/global_funcs.h>
+
 #define VLIB_REGISTER_THREAD(x,...)                     \
   __VA_ARGS__ vlib_thread_registration_t x;             \
 static void __vlib_add_thread_registration_##x (void)   \
@@ -356,6 +348,26 @@ vlib_get_current_worker_index ()
   return os_get_cpu_number () - 1;
 }
 
+static inline void
+vlib_worker_thread_barrier_check (void)
+{
+  if (PREDICT_FALSE (*vlib_worker_threads->wait_at_barrier))
+    {
+      vlib_main_t *vm;
+      clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
+      if (CLIB_DEBUG > 0)
+	{
+	  vm = vlib_get_main ();
+	  vm->parked_at_barrier = 1;
+	}
+      while (*vlib_worker_threads->wait_at_barrier)
+	;
+      if (CLIB_DEBUG > 0)
+	vm->parked_at_barrier = 0;
+      clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+    }
+}
+
 always_inline vlib_main_t *
 vlib_get_worker_vlib_main (u32 worker_index)
 {
-- 
cgit 1.2.3-korg


From 586afd762bfa149f5ca167bd5fd5a0cd59ce94fe Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Wed, 5 Apr 2017 19:18:20 +0200
Subject: Use thread local storage for thread index

This patch deprecates stack-based thread identification,
Also removes requirement that thread stacks are adjacent.

Finally, possibly annoying for some folks, it renames
all occurences of cpu_index and cpu_number with thread
index. Using word "cpu" is misleading here as thread can
be migrated ti different CPU, and also it is not related
to linux cpu index.

Change-Id: I68cdaf661e701d2336fc953dcb9978d10a70f7c1
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/examples/srv6-sample-localsid/node.c           |   4 +-
 src/plugins/dpdk/buffer.c                          |   2 +-
 src/plugins/dpdk/device/device.c                   |   8 +-
 src/plugins/dpdk/device/dpdk_priv.h                |   8 +-
 src/plugins/dpdk/device/init.c                     |   2 +-
 src/plugins/dpdk/device/node.c                     |  32 +++---
 src/plugins/dpdk/hqos/hqos.c                       |  16 +--
 src/plugins/dpdk/ipsec/cli.c                       |   8 +-
 src/plugins/dpdk/ipsec/crypto_node.c               |   4 +-
 src/plugins/dpdk/ipsec/esp.h                       |   4 +-
 src/plugins/dpdk/ipsec/esp_decrypt.c               |   4 +-
 src/plugins/dpdk/ipsec/esp_encrypt.c               |   5 +-
 src/plugins/dpdk/ipsec/ipsec.c                     |   2 +-
 src/plugins/dpdk/ipsec/ipsec.h                     |   4 +-
 src/plugins/dpdk/main.c                            |   2 +-
 src/plugins/flowperpkt/l2_node.c                   |   2 +-
 src/plugins/flowperpkt/node.c                      |   2 +-
 src/plugins/ioam/export-common/ioam_export.h       |   6 +-
 .../ioam/ip6/ioam_cache_tunnel_select_node.c       |  16 +--
 src/plugins/ixge/ixge.c                            |   2 +-
 src/plugins/lb/lb.c                                |   8 +-
 src/plugins/lb/node.c                              |  22 ++--
 src/plugins/lb/refcount.c                          |   8 +-
 src/plugins/lb/refcount.h                          |   4 +-
 src/plugins/memif/node.c                           |  35 +++---
 src/plugins/snat/in2out.c                          | 110 +++++++++---------
 src/plugins/snat/out2in.c                          | 102 ++++++++---------
 src/plugins/snat/snat.h                            |  10 +-
 src/vlib/buffer.c                                  |   6 +-
 src/vlib/buffer_funcs.h                            |   4 +-
 src/vlib/cli.c                                     |   6 +-
 src/vlib/counter.h                                 |  16 +--
 src/vlib/error.c                                   |   2 +-
 src/vlib/global_funcs.h                            |   2 +-
 src/vlib/main.c                                    |  14 +--
 src/vlib/main.h                                    |   2 +-
 src/vlib/node.c                                    |   2 +-
 src/vlib/node.h                                    |   6 +-
 src/vlib/node_funcs.h                              |   8 +-
 src/vlib/threads.c                                 |  69 ++++-------
 src/vlib/threads.h                                 |  21 ++--
 src/vlib/unix/cj.c                                 |   7 +-
 src/vlib/unix/cj.h                                 |   2 +-
 src/vlib/unix/main.c                               |  43 +++----
 src/vnet/adj/adj_l2.c                              |   4 +-
 src/vnet/adj/adj_midchain.c                        |   8 +-
 src/vnet/adj/adj_nsh.c                             |   4 +-
 src/vnet/classify/vnet_classify.c                  |  16 +--
 src/vnet/cop/ip4_whitelist.c                       |   8 +-
 src/vnet/cop/ip6_whitelist.c                       |   8 +-
 src/vnet/devices/af_packet/node.c                  |  20 ++--
 src/vnet/devices/devices.c                         |  61 +++++-----
 src/vnet/devices/devices.h                         |  18 +--
 src/vnet/devices/netmap/node.c                     |  24 ++--
 src/vnet/devices/ssvm/node.c                       |   6 +-
 src/vnet/devices/virtio/vhost-user.c               | 127 +++++++++++----------
 src/vnet/dpo/lookup_dpo.c                          |  20 ++--
 src/vnet/dpo/replicate_dpo.c                       |  12 +-
 src/vnet/ethernet/arp.c                            |   2 +-
 src/vnet/ethernet/interface.c                      |   7 +-
 src/vnet/ethernet/node.c                           |  14 +--
 src/vnet/gre/node.c                                |   8 +-
 src/vnet/interface.h                               |   2 +-
 src/vnet/interface_output.c                        |  53 ++++-----
 src/vnet/ip/ip4_forward.c                          |  34 +++---
 src/vnet/ip/ip4_input.c                            |   8 +-
 src/vnet/ip/ip6_forward.c                          |  24 ++--
 src/vnet/ip/ip6_input.c                            |   8 +-
 src/vnet/ip/ip6_neighbor.c                         |   4 +-
 src/vnet/ipsec/esp.h                               |   8 +-
 src/vnet/ipsec/esp_decrypt.c                       |  13 ++-
 src/vnet/ipsec/esp_encrypt.c                       |  13 ++-
 src/vnet/ipsec/ikev2.c                             |  64 ++++++-----
 src/vnet/ipsec/ipsec.h                             |  12 +-
 src/vnet/ipsec/ipsec_if.c                          |   2 +-
 src/vnet/l2/l2_bvi.h                               |   2 +-
 src/vnet/l2/l2_input.c                             |  14 +--
 src/vnet/l2/l2_output.c                            |   6 +-
 src/vnet/l2tp/decap.c                              |   2 +-
 src/vnet/l2tp/encap.c                              |   2 +-
 src/vnet/l2tp/l2tp.c                               |   6 +-
 src/vnet/lisp-gpe/decap.c                          |  16 +--
 src/vnet/lldp/lldp_input.c                         |   2 +-
 src/vnet/map/ip4_map.c                             |  14 +--
 src/vnet/map/ip4_map_t.c                           |  12 +-
 src/vnet/map/ip6_map.c                             |  19 +--
 src/vnet/map/ip6_map_t.c                           |  12 +-
 src/vnet/mpls/mpls_input.c                         |   8 +-
 src/vnet/mpls/mpls_lookup.c                        |  20 ++--
 src/vnet/mpls/mpls_output.c                        |  10 +-
 src/vnet/pg/input.c                                |   4 +-
 src/vnet/replication.c                             |  20 ++--
 src/vnet/replication.h                             |   2 +-
 src/vnet/session/node.c                            |   2 +-
 src/vnet/sr/sr_localsid.c                          |  44 +++----
 src/vnet/tcp/builtin_client.c                      |   2 +-
 src/vnet/tcp/tcp.c                                 |   8 +-
 src/vnet/tcp/tcp_debug.h                           |   2 +-
 src/vnet/tcp/tcp_input.c                           |  10 +-
 src/vnet/tcp/tcp_output.c                          |  20 ++--
 src/vnet/udp/udp_input.c                           |   2 +-
 src/vnet/unix/tapcli.c                             |   2 +-
 src/vnet/unix/tuntap.c                             |   4 +-
 src/vnet/vxlan-gpe/decap.c                         |  10 +-
 src/vnet/vxlan-gpe/encap.c                         |  12 +-
 src/vnet/vxlan/decap.c                             |  10 +-
 src/vnet/vxlan/encap.c                             |  12 +-
 src/vpp/stats/stats.c                              |  14 +--
 src/vpp/stats/stats.h                              |   2 +-
 109 files changed, 790 insertions(+), 791 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/examples/srv6-sample-localsid/node.c b/src/examples/srv6-sample-localsid/node.c
index 7bae9cd7..e83e2352 100644
--- a/src/examples/srv6-sample-localsid/node.c
+++ b/src/examples/srv6-sample-localsid/node.c
@@ -114,7 +114,7 @@ srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_fram
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
   {
@@ -168,7 +168,7 @@ srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_fram
       /* This increments the SRv6 per LocalSID counters.*/
       vlib_increment_combined_counter
         (((next0 == SRV6_SAMPLE_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : &(sm->sr_ls_valid_counters)),
-        cpu_index,
+        thread_index,
         ls0 - sm->localsids,
         1, vlib_buffer_length_in_chain (vm, b0));
 
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index 2765c292..c80b3fa8 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -132,7 +132,7 @@ dpdk_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
   u32 merge_index;
   int i;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   f = vlib_buffer_get_free_list (vm, free_list_index);
 
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
index 50b26689..91661246 100644
--- a/src/plugins/dpdk/device/device.c
+++ b/src/plugins/dpdk/device/device.c
@@ -243,7 +243,7 @@ static_always_inline
   ASSERT (ring->tx_tail == 0);
 
   n_retry = 16;
-  queue_id = vm->cpu_index;
+  queue_id = vm->thread_index;
 
   do
     {
@@ -266,7 +266,7 @@ static_always_inline
 	{
 	  /* no wrap, transmit in one burst */
 	  dpdk_device_hqos_per_worker_thread_t *hqos =
-	    &xd->hqos_wt[vm->cpu_index];
+	    &xd->hqos_wt[vm->thread_index];
 
 	  ASSERT (hqos->swq != NULL);
 
@@ -332,7 +332,7 @@ dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node,
 		     vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp)
 {
   dpdk_main_t *dm = &dpdk_main;
-  u32 my_cpu = vm->cpu_index;
+  u32 my_cpu = vm->thread_index;
   struct rte_mbuf *mb_new;
 
   if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0)
@@ -376,7 +376,7 @@ dpdk_interface_tx (vlib_main_t * vm,
   tx_ring_hdr_t *ring;
   u32 n_on_ring;
 
-  my_cpu = vm->cpu_index;
+  my_cpu = vm->thread_index;
 
   queue_id = my_cpu;
 
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
index dd40ff48..52b4ca4b 100644
--- a/src/plugins/dpdk/device/dpdk_priv.h
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -79,7 +79,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
 {
   vlib_simple_counter_main_t *cm;
   vnet_main_t *vnm = vnet_get_main ();
-  u32 my_cpu = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u64 rxerrors, last_rxerrors;
 
   /* only update counters for PMD interfaces */
@@ -96,7 +96,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_RX_NO_BUF);
 
-      vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
 				     xd->stats.rx_nombuf -
 				     xd->last_stats.rx_nombuf);
     }
@@ -107,7 +107,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_RX_MISS);
 
-      vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
 				     xd->stats.imissed -
 				     xd->last_stats.imissed);
     }
@@ -119,7 +119,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_RX_ERROR);
 
-      vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
 				     rxerrors - last_rxerrors);
     }
 
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index 538db6cb..7eaf8da7 100755
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -324,7 +324,7 @@ dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
   int rv;
   int j;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
     {
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index e740fd18..b10e0fad 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -283,7 +283,7 @@ dpdk_buffer_init_from_template (void *d0, void *d1, void *d2, void *d3,
  */
 static_always_inline u32
 dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
-		   vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id,
+		   vlib_node_runtime_t * node, u32 thread_index, u16 queue_id,
 		   int maybe_multiseg)
 {
   u32 n_buffers;
@@ -294,7 +294,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
   uword n_rx_bytes = 0;
   u32 n_trace, trace_cnt __attribute__ ((unused));
   vlib_buffer_free_list_t *fl;
-  vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, cpu_index);
+  vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, thread_index);
 
   if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
     return 0;
@@ -306,7 +306,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
       return 0;
     }
 
-  vec_reset_length (xd->d_trace_buffers[cpu_index]);
+  vec_reset_length (xd->d_trace_buffers[thread_index]);
   trace_cnt = n_trace = vlib_get_trace_count (vm, node);
 
   if (n_trace > 0)
@@ -318,7 +318,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
 	{
 	  struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++];
 	  vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
-	  vec_add1 (xd->d_trace_buffers[cpu_index],
+	  vec_add1 (xd->d_trace_buffers[thread_index],
 		    vlib_get_buffer_index (vm, b));
 	}
     }
@@ -546,20 +546,22 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
 
-  if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0))
+  if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[thread_index]) > 0))
     {
-      dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index],
-		     vec_len (xd->d_trace_buffers[cpu_index]));
-      vlib_set_trace_count (vm, node, n_trace -
-			    vec_len (xd->d_trace_buffers[cpu_index]));
+      dpdk_rx_trace (dm, node, xd, queue_id,
+		     xd->d_trace_buffers[thread_index],
+		     vec_len (xd->d_trace_buffers[thread_index]));
+      vlib_set_trace_count (vm, node,
+			    n_trace -
+			    vec_len (xd->d_trace_buffers[thread_index]));
     }
 
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
+     thread_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, mb_index);
+  vnet_device_increment_rx_packets (thread_index, mb_index);
 
   return mb_index;
 }
@@ -630,19 +632,19 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
   dpdk_device_t *xd;
   uword n_rx_packets = 0;
   dpdk_device_and_queue_t *dq;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   /*
    * Poll all devices on this cpu for input/interrupts.
    */
   /* *INDENT-OFF* */
-  vec_foreach (dq, dm->devices_by_cpu[cpu_index])
+  vec_foreach (dq, dm->devices_by_cpu[thread_index])
     {
       xd = vec_elt_at_index(dm->devices, dq->device);
       if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
-        n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 1);
+        n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 1);
       else
-        n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 0);
+        n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 0);
     }
   /* *INDENT-ON* */
 
diff --git a/src/plugins/dpdk/hqos/hqos.c b/src/plugins/dpdk/hqos/hqos.c
index a288fca7..8b251beb 100644
--- a/src/plugins/dpdk/hqos/hqos.c
+++ b/src/plugins/dpdk/hqos/hqos.c
@@ -397,7 +397,7 @@ static_always_inline void
 dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm)
 {
   dpdk_main_t *dm = &dpdk_main;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   u32 dev_pos;
 
   dev_pos = 0;
@@ -405,12 +405,12 @@ dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm)
     {
       vlib_worker_thread_barrier_check ();
 
-      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]);
+      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]);
       if (dev_pos >= n_devs)
 	dev_pos = 0;
 
       dpdk_device_and_queue_t *dq =
-	vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos);
+	vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos);
       dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
 
       dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
@@ -479,7 +479,7 @@ static_always_inline void
 dpdk_hqos_thread_internal (vlib_main_t * vm)
 {
   dpdk_main_t *dm = &dpdk_main;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   u32 dev_pos;
 
   dev_pos = 0;
@@ -487,7 +487,7 @@ dpdk_hqos_thread_internal (vlib_main_t * vm)
     {
       vlib_worker_thread_barrier_check ();
 
-      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]);
+      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]);
       if (PREDICT_FALSE (n_devs == 0))
 	{
 	  dev_pos = 0;
@@ -497,7 +497,7 @@ dpdk_hqos_thread_internal (vlib_main_t * vm)
 	dev_pos = 0;
 
       dpdk_device_and_queue_t *dq =
-	vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos);
+	vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos);
       dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
 
       dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
@@ -586,7 +586,7 @@ dpdk_hqos_thread (vlib_worker_thread_t * w)
 
   vm = vlib_get_main ();
 
-  ASSERT (vm->cpu_index == os_get_cpu_number ());
+  ASSERT (vm->thread_index == vlib_get_thread_index ());
 
   clib_time_init (&vm->clib_time);
   clib_mem_set_heap (w->thread_mheap);
@@ -595,7 +595,7 @@ dpdk_hqos_thread (vlib_worker_thread_t * w)
   while (tm->worker_thread_release == 0)
     vlib_worker_thread_barrier_check ();
 
-  if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0)
+  if (vec_len (dm->devices_by_hqos_cpu[vm->thread_index]) == 0)
     return
       clib_error
       ("current I/O TX thread does not have any devices assigned to it");
diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c
index cd0a6037..3ae8c9b8 100644
--- a/src/plugins/dpdk/ipsec/cli.c
+++ b/src/plugins/dpdk/ipsec/cli.c
@@ -42,8 +42,8 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display)
   for (i = 0; i < tm->n_vlib_mains; i++)
     {
       uword key, data;
-      u32 cpu_index = vlib_mains[i]->cpu_index;
-      crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+      u32 thread_index = vlib_mains[i]->thread_index;
+      crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
       u8 *s = 0;
 
       if (skip_master)
@@ -57,7 +57,7 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display)
 	  i32 last_cdev = -1;
 	  crypto_qp_data_t *qpd;
 
-	  s = format (s, "%u\t", cpu_index);
+	  s = format (s, "%u\t", thread_index);
 
 	  /* *INDENT-OFF* */
 	  vec_foreach (qpd, cwm->qp_data)
@@ -95,7 +95,7 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display)
 	    cap.sym.auth.algo = p_key->auth_algo;
 	    check_algo_is_supported (&cap, auth_str);
 	    vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n",
-			     vlib_mains[i]->cpu_index, cipher_str, auth_str,
+			     vlib_mains[i]->thread_index, cipher_str, auth_str,
 			     p_key->is_outbound ? "out" : "in",
 			     cwm->qp_data[data].dev_id,
 			     cwm->qp_data[data].qp_id);
diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c
index dc3452b2..a3c45902 100644
--- a/src/plugins/dpdk/ipsec/crypto_node.c
+++ b/src/plugins/dpdk/ipsec/crypto_node.c
@@ -171,9 +171,9 @@ static uword
 dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 		      vlib_frame_t * frame)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
   crypto_qp_data_t *qpd;
   u32 n_deq = 0;
 
diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h
index 320295b1..56f0c756 100644
--- a/src/plugins/dpdk/ipsec/esp.h
+++ b/src/plugins/dpdk/ipsec/esp.h
@@ -170,9 +170,9 @@ static_always_inline int
 create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess,
 		 u8 is_outbound)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
   struct rte_crypto_sym_xform cipher_xform = { 0 };
   struct rte_crypto_sym_xform auth_xform = { 0 };
   struct rte_crypto_sym_xform *xfs;
diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c
index 286e03f8..bab76e3b 100644
--- a/src/plugins/dpdk/ipsec/esp_decrypt.c
+++ b/src/plugins/dpdk/ipsec/esp_decrypt.c
@@ -88,7 +88,7 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
 {
   u32 n_left_from, *from, *to_next, next_index;
   ipsec_main_t *im = &ipsec_main;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   dpdk_crypto_main_t * dcm = &dpdk_crypto_main;
   dpdk_esp_main_t * em = &dpdk_esp_main;
   u32 i;
@@ -104,7 +104,7 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
       return n_left_from;
     }
 
-  crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index);
+  crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, thread_index);
   u32 n_qps = vec_len(cwm->qp_data);
   struct rte_crypto_op ** cops_to_enq[n_qps];
   u32 n_cop_qp[n_qps], * bi_to_enq[n_qps];
diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c
index 5b03de73..f996d7df 100644
--- a/src/plugins/dpdk/ipsec/esp_encrypt.c
+++ b/src/plugins/dpdk/ipsec/esp_encrypt.c
@@ -93,7 +93,7 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
 {
   u32 n_left_from, *from, *to_next, next_index;
   ipsec_main_t *im = &ipsec_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
   dpdk_esp_main_t *em = &dpdk_esp_main;
   u32 i;
@@ -111,7 +111,8 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
       return n_left_from;
     }
 
-  crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index);
+  crypto_worker_main_t *cwm =
+    vec_elt_at_index (dcm->workers_main, thread_index);
   u32 n_qps = vec_len (cwm->qp_data);
   struct rte_crypto_op **cops_to_enq[n_qps];
   u32 n_cop_qp[n_qps], *bi_to_enq[n_qps];
diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c
index b0aaaaec..5d8f4fba 100644
--- a/src/plugins/dpdk/ipsec/ipsec.c
+++ b/src/plugins/dpdk/ipsec/ipsec.c
@@ -289,7 +289,7 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
 	      if (!map)
 		{
 		  clib_warning ("unable to create hash table for worker %u",
-				vlib_mains[i]->cpu_index);
+				vlib_mains[i]->thread_index);
 		  goto error;
 		}
 	      cwm->algo_qp_map = map;
diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h
index 28bffc80..f0f793c0 100644
--- a/src/plugins/dpdk/ipsec/ipsec.h
+++ b/src/plugins/dpdk/ipsec/ipsec.h
@@ -95,8 +95,8 @@ static_always_inline void
 crypto_alloc_cops ()
 {
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  u32 cpu_index = os_get_cpu_number ();
-  crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+  u32 thread_index = vlib_get_thread_index ();
+  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
   unsigned socket_id = rte_socket_id ();
   crypto_qp_data_t *qpd;
 
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
index 7ee2a785..942b8b2d 100644
--- a/src/plugins/dpdk/main.c
+++ b/src/plugins/dpdk/main.c
@@ -39,7 +39,7 @@ rte_delay_us_override (unsigned us)
    * thread then do not intercept. (Must not be called from an
    * independent pthread).
    */
-  if (os_get_cpu_number () == 0)
+  if (vlib_get_thread_index () == 0)
     {
       /*
        * We're in the vlib main thread or a vlib process. Make sure
diff --git a/src/plugins/flowperpkt/l2_node.c b/src/plugins/flowperpkt/l2_node.c
index 1c2f681e..fdaf81d1 100644
--- a/src/plugins/flowperpkt/l2_node.c
+++ b/src/plugins/flowperpkt/l2_node.c
@@ -102,7 +102,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 		       u8 * src_mac, u8 * dst_mac,
 		       u16 ethertype, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->cpu_index;
+  u32 my_cpu_number = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
diff --git a/src/plugins/flowperpkt/node.c b/src/plugins/flowperpkt/node.c
index f77f087d..0277682d 100644
--- a/src/plugins/flowperpkt/node.c
+++ b/src/plugins/flowperpkt/node.c
@@ -101,7 +101,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 			 u32 src_address, u32 dst_address,
 			 u8 tos, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->cpu_index;
+  u32 my_cpu_number = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h
index 2bf3fd54..9de0d13b 100644
--- a/src/plugins/ioam/export-common/ioam_export.h
+++ b/src/plugins/ioam/export-common/ioam_export.h
@@ -477,8 +477,8 @@ do {                                                                           \
   from = vlib_frame_vector_args (F);                                           \
   n_left_from = (F)->n_vectors;                                                \
   next_index = (N)->cached_next_index;                                         \
-  while (__sync_lock_test_and_set ((EM)->lockp[(VM)->cpu_index], 1));          \
-  my_buf = ioam_export_get_my_buffer (EM, (VM)->cpu_index);                    \
+  while (__sync_lock_test_and_set ((EM)->lockp[(VM)->thread_index], 1));       \
+  my_buf = ioam_export_get_my_buffer (EM, (VM)->thread_index);                 \
   my_buf->touched_at = vlib_time_now (VM);                                     \
   while (n_left_from > 0)                                                      \
     {                                                                          \
@@ -620,7 +620,7 @@ do {                                                                           \
     }                                                                          \
   vlib_node_increment_counter (VM, export_node.index,                          \
 			       EXPORT_ERROR_RECORDED, pkts_recorded);          \
-  *(EM)->lockp[(VM)->cpu_index] = 0;                                           \
+  *(EM)->lockp[(VM)->thread_index] = 0;                                        \
 } while(0)
 
 #endif /* __included_ioam_export_h__ */
diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
index a56dc040..0cf742c9 100644
--- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
+++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
@@ -396,7 +396,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 					      clib_net_to_host_u32
 					      (tcp0->seq_number) + 1,
 					      no_of_responses, now,
-					      vm->cpu_index, &pool_index0))
+					      vm->thread_index, &pool_index0))
 		    {
 		      cache_ts_added++;
 		    }
@@ -419,7 +419,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 	      e2e =
 		(ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
 					     cm->rewrite_pool_index_offset);
-	      e2e->pool_id = (u8) vm->cpu_index;
+	      e2e->pool_id = (u8) vm->thread_index;
 	      e2e->pool_index = pool_index0;
 	      ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
 					   ((u8 *) e2e +
@@ -455,7 +455,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 					      clib_net_to_host_u32
 					      (tcp1->seq_number) + 1,
 					      no_of_responses, now,
-					      vm->cpu_index, &pool_index1))
+					      vm->thread_index, &pool_index1))
 		    {
 		      cache_ts_added++;
 		    }
@@ -479,7 +479,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 	      e2e =
 		(ioam_e2e_cache_option_t *) ((u8 *) hbh1 +
 					     cm->rewrite_pool_index_offset);
-	      e2e->pool_id = (u8) vm->cpu_index;
+	      e2e->pool_id = (u8) vm->thread_index;
 	      e2e->pool_index = pool_index1;
 	      ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
 					   ((u8 *) e2e +
@@ -562,7 +562,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 					      clib_net_to_host_u32
 					      (tcp0->seq_number) + 1,
 					      no_of_responses, now,
-					      vm->cpu_index, &pool_index0))
+					      vm->thread_index, &pool_index0))
 		    {
 		      cache_ts_added++;
 		    }
@@ -585,7 +585,7 @@ ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
 	      e2e =
 		(ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
 					     cm->rewrite_pool_index_offset);
-	      e2e->pool_id = (u8) vm->cpu_index;
+	      e2e->pool_id = (u8) vm->thread_index;
 	      e2e->pool_index = pool_index0;
 	      ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
 					   ((u8 *) e2e +
@@ -701,7 +701,7 @@ expired_cache_ts_timer_callback (u32 * expired_timers)
   ioam_cache_main_t *cm = &ioam_cache_main;
   int i;
   u32 pool_index;
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 count = 0;
 
   for (i = 0; i < vec_len (expired_timers); i++)
@@ -724,7 +724,7 @@ ioam_cache_ts_timer_tick_node_fn (vlib_main_t * vm,
 				  vlib_frame_t * f)
 {
   ioam_cache_main_t *cm = &ioam_cache_main;
-  u32 my_thread_index = os_get_cpu_number ();
+  u32 my_thread_index = vlib_get_thread_index ();
   struct timespec ts, tsrem;
 
   tw_timer_expire_timers_16t_2w_512sl (&cm->timer_wheels[my_thread_index],
diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c
index f3c5cc09..08f5b692 100644
--- a/src/plugins/ixge/ixge.c
+++ b/src/plugins/ixge/ixge.c
@@ -1887,7 +1887,7 @@ done:
   vlib_increment_combined_counter (vnet_main.
 				   interface_main.combined_sw_if_counters +
 				   VNET_INTERFACE_COUNTER_RX,
-				   0 /* cpu_index */ ,
+				   0 /* thread_index */ ,
 				   xd->vlib_sw_if_index, n_packets,
 				   dq->rx.n_bytes);
 
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
index add81236..addc2a42 100644
--- a/src/plugins/lb/lb.c
+++ b/src/plugins/lb/lb.c
@@ -63,11 +63,11 @@ u8 *format_lb_main (u8 * s, va_list * args)
   s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
   s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
 
-  u32 cpu_index;
-  for(cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++ ) {
-    lb_hash_t *h = lbm->per_cpu[cpu_index].sticky_ht;
+  u32 thread_index;
+  for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
+    lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
     if (h) {
-      s = format(s, "core %d\n", cpu_index);
+      s = format(s, "core %d\n", thread_index);
       s = format(s, "  timeout: %ds\n", h->timeout);
       s = format(s, "  usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())),  lb_hash_size(h));
     }
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
index 8b763c53..3171148b 100644
--- a/src/plugins/lb/node.c
+++ b/src/plugins/lb/node.c
@@ -60,10 +60,10 @@ format_lb_trace (u8 * s, va_list * args)
   return s;
 }
 
-lb_hash_t *lb_get_sticky_table(u32 cpu_index)
+lb_hash_t *lb_get_sticky_table(u32 thread_index)
 {
   lb_main_t *lbm = &lb_main;
-  lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
+  lb_hash_t *sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
   //Check if size changed
   if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht))))
     {
@@ -71,8 +71,8 @@ lb_hash_t *lb_get_sticky_table(u32 cpu_index)
       lb_hash_bucket_t *b;
       u32 i;
       lb_hash_foreach_entry(sticky_ht, b, i) {
-	vlib_refcount_add(&lbm->as_refcount, cpu_index, b->value[i], -1);
-	vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, 1);
+	vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
+	vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
       }
 
       lb_hash_free(sticky_ht);
@@ -81,8 +81,8 @@ lb_hash_t *lb_get_sticky_table(u32 cpu_index)
 
   //Create if necessary
   if (PREDICT_FALSE(sticky_ht == NULL)) {
-    lbm->per_cpu[cpu_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
-    sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
+    lbm->per_cpu[thread_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
+    sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
     clib_warning("Regenerated sticky table %p", sticky_ht);
   }
 
@@ -153,10 +153,10 @@ lb_node_fn (vlib_main_t * vm,
 {
   lb_main_t *lbm = &lb_main;
   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 lb_time = lb_hash_time_now(vm);
 
-  lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index);
+  lb_hash_t *sticky_ht = lb_get_sticky_table(thread_index);
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
@@ -240,9 +240,9 @@ lb_node_fn (vlib_main_t * vm,
 	  //Configuration may be changed, vectors resized, etc...
 
 	  //Dereference previously used
-	  vlib_refcount_add(&lbm->as_refcount, cpu_index,
+	  vlib_refcount_add(&lbm->as_refcount, thread_index,
 			    lb_hash_available_value(sticky_ht, hash0, available_index0), -1);
-	  vlib_refcount_add(&lbm->as_refcount, cpu_index,
+	  vlib_refcount_add(&lbm->as_refcount, thread_index,
 			    asindex0, 1);
 
 	  //Add sticky entry
@@ -260,7 +260,7 @@ lb_node_fn (vlib_main_t * vm,
 	}
 
       vlib_increment_simple_counter(&lbm->vip_counters[counter],
-				    cpu_index,
+				    thread_index,
 				    vnet_buffer (p0)->ip.adj_index[VLIB_TX],
 				    1);
 
diff --git a/src/plugins/lb/refcount.c b/src/plugins/lb/refcount.c
index 22415c88..6f01ab5a 100644
--- a/src/plugins/lb/refcount.c
+++ b/src/plugins/lb/refcount.c
@@ -31,10 +31,10 @@ u64 vlib_refcount_get(vlib_refcount_t *r, u32 index)
 {
   u64 count = 0;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
-  u32 cpu_index;
-  for (cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++) {
-    if (r->per_cpu[cpu_index].length > index)
-      count += r->per_cpu[cpu_index].counters[index];
+  u32 thread_index;
+  for (thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++) {
+    if (r->per_cpu[thread_index].length > index)
+      count += r->per_cpu[thread_index].counters[index];
   }
   return count;
 }
diff --git a/src/plugins/lb/refcount.h b/src/plugins/lb/refcount.h
index 8c26e7be..dcfcb3fe 100644
--- a/src/plugins/lb/refcount.h
+++ b/src/plugins/lb/refcount.h
@@ -45,9 +45,9 @@ typedef struct {
 void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size);
 
 static_always_inline
-void vlib_refcount_add(vlib_refcount_t *r, u32 cpu_index, u32 counter_index, i32 v)
+void vlib_refcount_add(vlib_refcount_t *r, u32 thread_index, u32 counter_index, i32 v)
 {
-  vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[cpu_index];
+  vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[thread_index];
   if (PREDICT_FALSE(counter_index >= per_cpu->length))
     __vlib_refcount_resize(per_cpu, clib_max(counter_index + 16, per_cpu->length * 2));
 
diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c
index 659d5dfb..cee1f3d1 100644
--- a/src/plugins/memif/node.c
+++ b/src/plugins/memif/node.c
@@ -94,7 +94,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 n_rx_bytes = 0;
   u32 *to_next = 0;
   u32 n_free_bufs;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 bi0, bi1;
   vlib_buffer_t *b0, *b1;
   u16 ring_size = 1 << mif->log2_ring_size;
@@ -105,14 +105,15 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
   if (mif->per_interface_next_index != ~0)
     next_index = mif->per_interface_next_index;
 
-  n_free_bufs = vec_len (nm->rx_buffers[cpu_index]);
+  n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
   if (PREDICT_FALSE (n_free_bufs < ring_size))
     {
-      vec_validate (nm->rx_buffers[cpu_index], ring_size + n_free_bufs - 1);
+      vec_validate (nm->rx_buffers[thread_index],
+		    ring_size + n_free_bufs - 1);
       n_free_bufs +=
-	vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs],
+	vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
 			   ring_size);
-      _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs;
+      _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs;
     }
 
   head = ring->head;
@@ -158,15 +159,15 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 			     CLIB_CACHE_LINE_BYTES, LOAD);
 	    }
 	  /* get empty buffer */
-	  u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1;
-	  bi0 = nm->rx_buffers[cpu_index][last_buf];
-	  bi1 = nm->rx_buffers[cpu_index][last_buf - 1];
-	  _vec_len (nm->rx_buffers[cpu_index]) -= 2;
+	  u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1;
+	  bi0 = nm->rx_buffers[thread_index][last_buf];
+	  bi1 = nm->rx_buffers[thread_index][last_buf - 1];
+	  _vec_len (nm->rx_buffers[thread_index]) -= 2;
 
 	  if (last_buf > 4)
 	    {
-	      memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 2]);
-	      memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 3]);
+	      memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 2]);
+	      memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 3]);
 	    }
 
 	  /* enqueue buffer */
@@ -256,9 +257,9 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       while (num_slots && n_left_to_next)
 	{
 	  /* get empty buffer */
-	  u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1;
-	  bi0 = nm->rx_buffers[cpu_index][last_buf];
-	  _vec_len (nm->rx_buffers[cpu_index]) = last_buf;
+	  u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1;
+	  bi0 = nm->rx_buffers[thread_index][last_buf];
+	  _vec_len (nm->rx_buffers[thread_index]) = last_buf;
 
 	  /* enqueue buffer */
 	  to_next[0] = bi0;
@@ -315,7 +316,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
   ring->tail = head;
 
   vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
-				   + VNET_INTERFACE_COUNTER_RX, cpu_index,
+				   + VNET_INTERFACE_COUNTER_RX, thread_index,
 				   mif->hw_if_index, n_rx_packets,
 				   n_rx_bytes);
 
@@ -327,7 +328,7 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 		vlib_frame_t * frame)
 {
   u32 n_rx_packets = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   memif_main_t *nm = &memif_main;
   memif_if_t *mif;
 
@@ -337,7 +338,7 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
       if (mif->flags & MEMIF_IF_FLAG_ADMIN_UP &&
 	  mif->flags & MEMIF_IF_FLAG_CONNECTED &&
 	  (mif->if_index % nm->input_cpu_count) ==
-	  (cpu_index - nm->input_cpu_first_index))
+	  (thread_index - nm->input_cpu_first_index))
 	{
 	  if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
 	    n_rx_packets +=
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c
index b4961365..e5ee965f 100644
--- a/src/plugins/snat/in2out.c
+++ b/src/plugins/snat/in2out.c
@@ -212,7 +212,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
                       snat_session_t ** sessionp,
                       vlib_node_runtime_t * node,
                       u32 next0,
-                      u32 cpu_index)
+                      u32 thread_index)
 {
   snat_user_t *u;
   snat_user_key_t user_key;
@@ -246,27 +246,27 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
     {
       /* no, make a new one */
-      pool_get (sm->per_thread_data[cpu_index].users, u);
+      pool_get (sm->per_thread_data[thread_index].users, u);
       memset (u, 0, sizeof (*u));
       u->addr = ip0->src_address;
       u->fib_index = rx_fib_index0;
 
-      pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
+      pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
 
       u->sessions_per_user_list_head_index = per_user_list_head_elt -
-        sm->per_thread_data[cpu_index].list_pool;
+        sm->per_thread_data[thread_index].list_pool;
 
-      clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                        u->sessions_per_user_list_head_index);
 
-      kv0.value = u - sm->per_thread_data[cpu_index].users;
+      kv0.value = u - sm->per_thread_data[thread_index].users;
 
       /* add user */
       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
     }
   else
     {
-      u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+      u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
                              value0.value);
     }
 
@@ -276,25 +276,25 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
       /* Remove the oldest dynamic translation */
       do {
           oldest_per_user_translation_list_index =
-            clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
+            clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
                                     u->sessions_per_user_list_head_index);
 
           ASSERT (oldest_per_user_translation_list_index != ~0);
 
           /* add it back to the end of the LRU list */
-          clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                               u->sessions_per_user_list_head_index,
                               oldest_per_user_translation_list_index);
           /* Get the list element */
           oldest_per_user_translation_list_elt =
-            pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
+            pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
                                oldest_per_user_translation_list_index);
 
           /* Get the session index from the list element */
           session_index = oldest_per_user_translation_list_elt->value;
 
           /* Get the session */
-          s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+          s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                  session_index);
       } while (snat_is_session_static (s));
 
@@ -346,7 +346,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
         }
 
       /* Create a new session */
-      pool_get (sm->per_thread_data[cpu_index].sessions, s);
+      pool_get (sm->per_thread_data[thread_index].sessions, s);
       memset (s, 0, sizeof (*s));
       
       s->outside_address_index = address_index;
@@ -362,22 +362,22 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
         }
 
       /* Create list elts */
-      pool_get (sm->per_thread_data[cpu_index].list_pool,
+      pool_get (sm->per_thread_data[thread_index].list_pool,
                 per_user_translation_list_elt);
-      clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                        per_user_translation_list_elt -
-                       sm->per_thread_data[cpu_index].list_pool);
+                       sm->per_thread_data[thread_index].list_pool);
 
       per_user_translation_list_elt->value =
-        s - sm->per_thread_data[cpu_index].sessions;
+        s - sm->per_thread_data[thread_index].sessions;
       s->per_user_index = per_user_translation_list_elt -
-                          sm->per_thread_data[cpu_index].list_pool;
+                          sm->per_thread_data[thread_index].list_pool;
       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
 
-      clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                           s->per_user_list_head_index,
                           per_user_translation_list_elt -
-                          sm->per_thread_data[cpu_index].list_pool);
+                          sm->per_thread_data[thread_index].list_pool);
    }
   
   s->in2out = *key0;
@@ -388,12 +388,12 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
 
   /* Add to translation hashes */
   kv0.key = s->in2out.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
       clib_warning ("in2out key add failed");
   
   kv0.key = s->out2in.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
   
   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
       clib_warning ("out2in key add failed");
@@ -403,7 +403,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
   worker_by_out_key.port = s->out2in.port;
   worker_by_out_key.fib_index = s->out2in.fib_index;
   kv0.key = worker_by_out_key.as_u64;
-  kv0.value = cpu_index;
+  kv0.value = thread_index;
   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
 
   /* log NAT event */
@@ -465,7 +465,7 @@ snat_in2out_error_t icmp_get_key(icmp46_header_t *icmp0,
  *
  * @param[in,out] sm             SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -473,7 +473,7 @@ snat_in2out_error_t icmp_get_key(icmp46_header_t *icmp0,
  * @param d                      optional parameter
  */
 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -524,13 +524,13 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
         }
 
       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                         &s0, node, next0, cpu_index);
+                         &s0, node, next0, thread_index);
 
       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
         goto out;
     }
   else
-    s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+    s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                             value0.value);
 
 out:
@@ -548,7 +548,7 @@ out:
  *
  * @param[in] sm                 SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -556,7 +556,7 @@ out:
  * @param d                      optional parameter
  */
 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -624,7 +624,7 @@ static inline u32 icmp_in2out (snat_main_t *sm,
                                u32 rx_fib_index0,
                                vlib_node_runtime_t * node,
                                u32 next0,
-                               u32 cpu_index,
+                               u32 thread_index,
                                void *d)
 {
   snat_session_key_t key0, sm0;
@@ -641,7 +641,7 @@ static inline u32 icmp_in2out (snat_main_t *sm,
 
   echo0 = (icmp_echo_header_t *)(icmp0+1);
 
-  next0_tmp = sm->icmp_match_in2out_cb(sm, node, cpu_index, b0,
+  next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
                                        &key0, &sm0, &dont_translate, d);
   if (next0_tmp != ~0)
     next0 = next0_tmp;
@@ -847,11 +847,11 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
                                          vlib_node_runtime_t * node,
                                          u32 next0,
                                          f64 now,
-                                         u32 cpu_index,
+                                         u32 thread_index,
                                          snat_session_t ** p_s0)
 {
   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                      next0, cpu_index, p_s0);
+                      next0, thread_index, p_s0);
   snat_session_t * s0 = *p_s0;
   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
     {
@@ -862,9 +862,9 @@ static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
       /* Per-user LRU list maintenance for dynamic translations */
       if (!snat_is_session_static (s0))
         {
-          clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                              s0->per_user_index);
-          clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                               s0->per_user_list_head_index,
                               s0->per_user_index);
         }
@@ -884,7 +884,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
   f64 now = vlib_time_now (vm);
   u32 stats_node_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
     snat_in2out_node.index;
@@ -977,7 +977,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 {
                   next0 = icmp_in2out_slow_path 
                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
-                     node, next0, now, cpu_index, &s0);
+                     node, next0, now, thread_index, &s0);
                   goto trace00;
                 }
             }
@@ -1006,7 +1006,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                     goto trace00;
 
                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                                     &s0, node, next0, cpu_index);
+                                     &s0, node, next0, thread_index);
                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
                     goto trace00;
                 }
@@ -1017,7 +1017,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->src_address.as_u32;
@@ -1063,9 +1063,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -1081,7 +1081,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
               t->next_index = next0;
                   t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
@@ -1117,7 +1117,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 {
                   next1 = icmp_in2out_slow_path 
                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
-                     next1, now, cpu_index, &s1);
+                     next1, now, thread_index, &s1);
                   goto trace01;
                 }
             }
@@ -1146,7 +1146,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                     goto trace01;
 
                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
-                                     &s1, node, next1, cpu_index);
+                                     &s1, node, next1, thread_index);
                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
                     goto trace01;
                 }
@@ -1157,7 +1157,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 }
             }
           else
-            s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value1.value);
 
           old_addr1 = ip1->src_address.as_u32;
@@ -1203,9 +1203,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s1))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s1->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s1->per_user_list_head_index,
                                   s1->per_user_index);
             }
@@ -1220,7 +1220,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
               t->next_index = next1;
               t->session_index = ~0;
               if (s1)
-                t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
@@ -1292,7 +1292,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 {
                   next0 = icmp_in2out_slow_path 
                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                     next0, now, cpu_index, &s0);
+                     next0, now, thread_index, &s0);
                   goto trace0;
                 }
             }
@@ -1321,7 +1321,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                     goto trace0;
 
                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                                     &s0, node, next0, cpu_index);
+                                     &s0, node, next0, thread_index);
 
                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
                     goto trace0;
@@ -1333,7 +1333,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->src_address.as_u32;
@@ -1379,9 +1379,9 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -1397,7 +1397,7 @@ snat_in2out_node_fn_inline (vlib_main_t * vm,
               t->next_index = next0;
                   t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
@@ -2010,7 +2010,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm,
   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
   u32 next_worker_index = 0;
   u32 current_worker_index = ~0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ASSERT (vec_len (sm->workers));
 
@@ -2048,7 +2048,7 @@ snat_in2out_worker_handoff_fn (vlib_main_t * vm,
 
       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
 
-      if (PREDICT_FALSE (next_worker_index != cpu_index))
+      if (PREDICT_FALSE (next_worker_index != thread_index))
         {
           do_handoff = 1;
 
diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c
index 656e42db..5d308d78 100644
--- a/src/plugins/snat/out2in.c
+++ b/src/plugins/snat/out2in.c
@@ -129,7 +129,7 @@ create_session_for_static_mapping (snat_main_t *sm,
                                    snat_session_key_t in2out,
                                    snat_session_key_t out2in,
                                    vlib_node_runtime_t * node,
-                                   u32 cpu_index)
+                                   u32 thread_index)
 {
   snat_user_t *u;
   snat_user_key_t user_key;
@@ -146,36 +146,36 @@ create_session_for_static_mapping (snat_main_t *sm,
   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
     {
       /* no, make a new one */
-      pool_get (sm->per_thread_data[cpu_index].users, u);
+      pool_get (sm->per_thread_data[thread_index].users, u);
       memset (u, 0, sizeof (*u));
       u->addr = in2out.addr;
       u->fib_index = in2out.fib_index;
 
-      pool_get (sm->per_thread_data[cpu_index].list_pool,
+      pool_get (sm->per_thread_data[thread_index].list_pool,
                 per_user_list_head_elt);
 
       u->sessions_per_user_list_head_index = per_user_list_head_elt -
-        sm->per_thread_data[cpu_index].list_pool;
+        sm->per_thread_data[thread_index].list_pool;
 
-      clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                        u->sessions_per_user_list_head_index);
 
-      kv0.value = u - sm->per_thread_data[cpu_index].users;
+      kv0.value = u - sm->per_thread_data[thread_index].users;
 
       /* add user */
       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
 
       /* add non-traslated packets worker lookup */
-      kv0.value = cpu_index;
+      kv0.value = thread_index;
       clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
     }
   else
     {
-      u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+      u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
                              value0.value);
     }
 
-  pool_get (sm->per_thread_data[cpu_index].sessions, s);
+  pool_get (sm->per_thread_data[thread_index].sessions, s);
   memset (s, 0, sizeof (*s));
 
   s->outside_address_index = ~0;
@@ -183,22 +183,22 @@ create_session_for_static_mapping (snat_main_t *sm,
   u->nstaticsessions++;
 
   /* Create list elts */
-  pool_get (sm->per_thread_data[cpu_index].list_pool,
+  pool_get (sm->per_thread_data[thread_index].list_pool,
             per_user_translation_list_elt);
-  clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+  clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                    per_user_translation_list_elt -
-                   sm->per_thread_data[cpu_index].list_pool);
+                   sm->per_thread_data[thread_index].list_pool);
 
   per_user_translation_list_elt->value =
-    s - sm->per_thread_data[cpu_index].sessions;
+    s - sm->per_thread_data[thread_index].sessions;
   s->per_user_index =
-    per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool;
+    per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool;
   s->per_user_list_head_index = u->sessions_per_user_list_head_index;
 
-  clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+  clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                       s->per_user_list_head_index,
                       per_user_translation_list_elt -
-                      sm->per_thread_data[cpu_index].list_pool);
+                      sm->per_thread_data[thread_index].list_pool);
 
   s->in2out = in2out;
   s->out2in = out2in;
@@ -206,12 +206,12 @@ create_session_for_static_mapping (snat_main_t *sm,
 
   /* Add to translation hashes */
   kv0.key = s->in2out.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
       clib_warning ("in2out key add failed");
 
   kv0.key = s->out2in.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
 
   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
       clib_warning ("out2in key add failed");
@@ -298,7 +298,7 @@ is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0,
  *
  * @param[in,out] sm             SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -306,7 +306,7 @@ is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0,
  * @param d                      optional parameter
  */
 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -366,7 +366,7 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
 
       /* Create session initiated by host from external network */
       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
-                                             node, cpu_index);
+                                             node, thread_index);
 
       if (!s0)
         {
@@ -375,7 +375,7 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
         }
     }
   else
-    s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+    s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                             value0.value);
 
 out:
@@ -393,7 +393,7 @@ out:
  *
  * @param[in] sm                 SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -401,7 +401,7 @@ out:
  * @param d                      optional parameter
  */
 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -460,7 +460,7 @@ static inline u32 icmp_out2in (snat_main_t *sm,
                                u32 rx_fib_index0,
                                vlib_node_runtime_t * node,
                                u32 next0,
-                               u32 cpu_index,
+                               u32 thread_index,
                                void *d)
 {
   snat_session_key_t key0, sm0;
@@ -477,7 +477,7 @@ static inline u32 icmp_out2in (snat_main_t *sm,
 
   echo0 = (icmp_echo_header_t *)(icmp0+1);
 
-  next0_tmp = sm->icmp_match_out2in_cb(sm, node, cpu_index, b0,
+  next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0,
                                        &key0, &sm0, &dont_translate, d);
   if (next0_tmp != ~0)
     next0 = next0_tmp;
@@ -589,11 +589,11 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
                                          u32 rx_fib_index0,
                                          vlib_node_runtime_t * node,
                                          u32 next0, f64 now,
-                                         u32 cpu_index,
+                                         u32 thread_index,
                                          snat_session_t ** p_s0)
 {
   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                      next0, cpu_index, p_s0);
+                      next0, thread_index, p_s0);
   snat_session_t * s0 = *p_s0;
   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
     {
@@ -604,9 +604,9 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
       /* Per-user LRU list maintenance for dynamic translation */
       if (!snat_is_session_static (s0))
         {
-          clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                              s0->per_user_index);
-          clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                               s0->per_user_list_head_index,
                               s0->per_user_index);
         }
@@ -624,7 +624,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
   f64 now = vlib_time_now (vm);
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -712,7 +712,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
             {
               next0 = icmp_out2in_slow_path 
                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, 
-                 next0, now, cpu_index, &s0);
+                 next0, now, thread_index, &s0);
               goto trace0;
             }
 
@@ -743,7 +743,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
 
               /* Create session initiated by host from external network */
               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
-                                                     cpu_index);
+                                                     thread_index);
               if (!s0)
                 {
                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
@@ -752,7 +752,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->dst_address.as_u32;
@@ -796,9 +796,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -813,7 +813,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
               t->next_index = next0;
               t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
@@ -847,7 +847,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
             {
               next1 = icmp_out2in_slow_path 
                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, 
-                 next1, now, cpu_index, &s1);
+                 next1, now, thread_index, &s1);
               goto trace1;
             }
 
@@ -878,7 +878,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
 
               /* Create session initiated by host from external network */
               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
-                                                     cpu_index);
+                                                     thread_index);
               if (!s1)
                 {
                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
@@ -887,7 +887,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
                 }
             }
           else
-            s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value1.value);
 
           old_addr1 = ip1->dst_address.as_u32;
@@ -931,9 +931,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s1))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s1->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s1->per_user_list_head_index,
                                   s1->per_user_index);
             }
@@ -948,7 +948,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
               t->next_index = next1;
               t->session_index = ~0;
               if (s1)
-                t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
@@ -1016,7 +1016,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
             {
               next0 = icmp_out2in_slow_path 
                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, 
-                 next0, now, cpu_index, &s0);
+                 next0, now, thread_index, &s0);
               goto trace00;
             }
 
@@ -1048,7 +1048,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
 
               /* Create session initiated by host from external network */
               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
-                                                     cpu_index);
+                                                     thread_index);
               if (!s0)
                 {
                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
@@ -1057,7 +1057,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->dst_address.as_u32;
@@ -1101,9 +1101,9 @@ snat_out2in_node_fn (vlib_main_t * vm,
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -1118,7 +1118,7 @@ snat_out2in_node_fn (vlib_main_t * vm,
               t->next_index = next0;
               t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
@@ -1599,7 +1599,7 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm,
   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
   u32 next_worker_index = 0;
   u32 current_worker_index = ~0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ASSERT (vec_len (sm->workers));
 
@@ -1637,7 +1637,7 @@ snat_out2in_worker_handoff_fn (vlib_main_t * vm,
 
       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
 
-      if (PREDICT_FALSE (next_worker_index != cpu_index))
+      if (PREDICT_FALSE (next_worker_index != thread_index))
         {
           do_handoff = 1;
 
diff --git a/src/plugins/snat/snat.h b/src/plugins/snat/snat.h
index 017825c0..f4e1c5c0 100644
--- a/src/plugins/snat/snat.h
+++ b/src/plugins/snat/snat.h
@@ -221,7 +221,7 @@ struct snat_main_s;
 
 typedef u32 snat_icmp_match_function_t (struct snat_main_s *sm,
                                         vlib_node_runtime_t *node,
-                                        u32 cpu_index,
+                                        u32 thread_index,
                                         vlib_buffer_t *b0,
                                         snat_session_key_t *p_key,
                                         snat_session_key_t *p_value,
@@ -402,22 +402,22 @@ typedef struct {
 } tcp_udp_header_t;
 
 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index a517a597..be3b41ef 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -299,7 +299,7 @@ vlib_buffer_validate_alloc_free (vlib_main_t * vm,
   if (CLIB_DEBUG == 0)
     return;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   /* smp disaster check */
   if (vec_len (vlib_mains) > 1)
@@ -355,7 +355,7 @@ vlib_buffer_create_free_list_helper (vlib_main_t * vm,
   vlib_buffer_free_list_t *f;
   int i;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0)
     {
@@ -474,7 +474,7 @@ vlib_buffer_delete_free_list_internal (vlib_main_t * vm, u32 free_list_index)
   u32 merge_index;
   int i;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   f = vlib_buffer_get_free_list (vm, free_list_index);
 
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 394c336a..328660a3 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -209,7 +209,7 @@ always_inline vlib_buffer_known_state_t
 vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   uword *p = hash_get (bm->buffer_known_hash, buffer_index);
   return p ? p[0] : VLIB_BUFFER_UNKNOWN;
@@ -221,7 +221,7 @@ vlib_buffer_set_known_state (vlib_main_t * vm,
 			     vlib_buffer_known_state_t state)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
   hash_set (bm->buffer_known_hash, buffer_index, state);
 }
 
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
index f853f655..3cc95076 100644
--- a/src/vlib/cli.c
+++ b/src/vlib/cli.c
@@ -709,7 +709,7 @@ test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
     {
         /* *INDENT-OFF* */
         foreach_vlib_main({
-          heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+          heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
           mheap = mheap_header(heap);
           mheap->flags |= MHEAP_FLAG_VALIDATE;
           // Turn off small object cache because it delays detection of errors
@@ -722,7 +722,7 @@ test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
     {
         /* *INDENT-OFF* */
         foreach_vlib_main({
-          heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+          heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
           mheap = mheap_header(heap);
           mheap->flags &= ~MHEAP_FLAG_VALIDATE;
           mheap->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
@@ -733,7 +733,7 @@ test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
     {
         /* *INDENT-OFF* */
         foreach_vlib_main({
-          heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+          heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
           mheap = mheap_header(heap);
           mheap_validate(heap);
         });
diff --git a/src/vlib/counter.h b/src/vlib/counter.h
index 17a85217..60e2055d 100644
--- a/src/vlib/counter.h
+++ b/src/vlib/counter.h
@@ -70,17 +70,17 @@ u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm);
 
 /** Increment a simple counter
     @param cm - (vlib_simple_counter_main_t *) simple counter main pointer
-    @param cpu_index - (u32) the current cpu index
+    @param thread_index - (u32) the current cpu index
     @param index - (u32) index of the counter to increment
     @param increment - (u64) quantitiy to add to the counter
 */
 always_inline void
 vlib_increment_simple_counter (vlib_simple_counter_main_t * cm,
-			       u32 cpu_index, u32 index, u64 increment)
+			       u32 thread_index, u32 index, u64 increment)
 {
   counter_t *my_counters;
 
-  my_counters = cm->counters[cpu_index];
+  my_counters = cm->counters[thread_index];
   my_counters[index] += increment;
 }
 
@@ -201,7 +201,7 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm);
 
 /** Increment a combined counter
     @param cm - (vlib_combined_counter_main_t *) comined counter main pointer
-    @param cpu_index - (u32) the current cpu index
+    @param thread_index - (u32) the current cpu index
     @param index - (u32) index of the counter to increment
     @param packet_increment - (u64) number of packets to add to the counter
     @param byte_increment - (u64) number of bytes to add to the counter
@@ -209,13 +209,13 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm);
 
 always_inline void
 vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
-				 u32 cpu_index,
+				 u32 thread_index,
 				 u32 index, u64 n_packets, u64 n_bytes)
 {
   vlib_counter_t *my_counters;
 
   /* Use this CPU's counter array */
-  my_counters = cm->counters[cpu_index];
+  my_counters = cm->counters[thread_index];
 
   my_counters[index].packets += n_packets;
   my_counters[index].bytes += n_bytes;
@@ -224,14 +224,14 @@ vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
 /** Pre-fetch a per-thread combined counter for the given object index */
 always_inline void
 vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm,
-				u32 cpu_index, u32 index)
+				u32 thread_index, u32 index)
 {
   vlib_counter_t *cpu_counters;
 
   /*
    * This CPU's index is assumed to already be in cache
    */
-  cpu_counters = cm->counters[cpu_index];
+  cpu_counters = cm->counters[thread_index];
   CLIB_PREFETCH (cpu_counters + index, CLIB_CACHE_LINE_BYTES, STORE);
 }
 
diff --git a/src/vlib/error.c b/src/vlib/error.c
index a2c23176..e4ed4ee3 100644
--- a/src/vlib/error.c
+++ b/src/vlib/error.c
@@ -149,7 +149,7 @@ vlib_register_errors (vlib_main_t * vm,
   vlib_node_t *n = vlib_get_node (vm, node_index);
   uword l;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   /* Free up any previous error strings. */
   if (n->n_errors > 0)
diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h
index f51ec381..9dd01fbf 100644
--- a/src/vlib/global_funcs.h
+++ b/src/vlib/global_funcs.h
@@ -23,7 +23,7 @@ always_inline vlib_main_t *
 vlib_get_main (void)
 {
   vlib_main_t *vm;
-  vm = vlib_mains[os_get_cpu_number ()];
+  vm = vlib_mains[vlib_get_thread_index ()];
   ASSERT (vm);
   return vm;
 }
diff --git a/src/vlib/main.c b/src/vlib/main.c
index b22203f0..422d3e26 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -136,18 +136,18 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
   else
     {
       f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
-      f->cpu_index = vm->cpu_index;
+      f->thread_index = vm->thread_index;
       fi = vlib_frame_index_no_check (vm, f);
     }
 
   /* Poison frame when debugging. */
   if (CLIB_DEBUG > 0)
     {
-      u32 save_cpu_index = f->cpu_index;
+      u32 save_thread_index = f->thread_index;
 
       memset (f, 0xfe, n);
 
-      f->cpu_index = save_cpu_index;
+      f->thread_index = save_thread_index;
     }
 
   /* Insert magic number. */
@@ -517,7 +517,7 @@ vlib_put_next_frame (vlib_main_t * vm,
 	   * a dangling frame reference. Each thread has its own copy of
 	   * the next_frames vector.
 	   */
-	  if (0 && r->cpu_index != next_runtime->cpu_index)
+	  if (0 && r->thread_index != next_runtime->thread_index)
 	    {
 	      nf->frame_index = ~0;
 	      nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
@@ -866,7 +866,7 @@ vlib_elog_main_loop_event (vlib_main_t * vm,
 				  : evm->node_call_elog_event_types,
 				  node_index),
 		/* track */
-		(vm->cpu_index ? &vlib_worker_threads[vm->cpu_index].
+		(vm->thread_index ? &vlib_worker_threads[vm->thread_index].
 		 elog_track : &em->default_track),
 		/* data to log */ n_vectors);
 }
@@ -963,7 +963,7 @@ dispatch_node (vlib_main_t * vm,
 
   vm->cpu_time_last_node_dispatch = last_time_stamp;
 
-  if (1 /* || vm->cpu_index == node->cpu_index */ )
+  if (1 /* || vm->thread_index == node->thread_index */ )
     {
       vlib_main_t *stat_vm;
 
@@ -1029,7 +1029,7 @@ dispatch_node (vlib_main_t * vm,
 	  {
 	    u32 node_name, vector_length, is_polling;
 	  } *ed;
-	  vlib_worker_thread_t *w = vlib_worker_threads + vm->cpu_index;
+	  vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index;
 #endif
 
 	  if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 0197b4f3..329bf073 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -156,7 +156,7 @@ typedef struct vlib_main_t
   uword *init_functions_called;
 
   /* to compare with node runtime */
-  u32 cpu_index;
+  u32 thread_index;
 
   void **mbuf_alloc_list;
 
diff --git a/src/vlib/node.c b/src/vlib/node.c
index dc0a4de5..bbd3a42e 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -99,7 +99,7 @@ vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index)
   vlib_pending_frame_t *pf;
   i32 i, j, n_insert;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   vlib_worker_thread_barrier_sync (vm);
 
diff --git a/src/vlib/node.h b/src/vlib/node.h
index fc7e7da2..1e2f4c38 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -344,8 +344,8 @@ typedef struct vlib_frame_t
   /* Number of vector elements currently in frame. */
   u16 n_vectors;
 
-  /* Owner cpuid / heap id */
-  u16 cpu_index;
+  /* Owner thread / heap id */
+  u16 thread_index;
 
   /* Scalar and vector arguments to next node. */
   u8 arguments[0];
@@ -459,7 +459,7 @@ typedef struct vlib_node_runtime_t
 					  zero before first run of this
 					  node. */
 
-  u16 cpu_index;			/**< CPU this node runs on */
+  u16 thread_index;			/**< thread this node runs on */
 
   u8 runtime_data[0];			/**< Function dependent
 					  node-runtime data. This data is
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index 1f7d94e1..54e36874 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -201,9 +201,9 @@ always_inline vlib_frame_t *
 vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index)
 {
   vlib_frame_t *f;
-  u32 cpu_index = frame_index & VLIB_CPU_MASK;
+  u32 thread_index = frame_index & VLIB_CPU_MASK;
   u32 offset = frame_index & VLIB_OFFSET_MASK;
-  vm = vlib_mains[cpu_index];
+  vm = vlib_mains[thread_index];
   f = vm->heap_base + offset;
   return f;
 }
@@ -215,10 +215,10 @@ vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f)
 
   ASSERT (((uword) f & VLIB_CPU_MASK) == 0);
 
-  vm = vlib_mains[f->cpu_index];
+  vm = vlib_mains[f->thread_index];
 
   i = ((u8 *) f - (u8 *) vm->heap_base);
-  return i | f->cpu_index;
+  return i | f->thread_index;
 }
 
 always_inline vlib_frame_t *
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index ef3a24d3..4a111f8d 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,27 +35,12 @@ vl (void *p)
 vlib_worker_thread_t *vlib_worker_threads;
 vlib_thread_main_t vlib_thread_main;
 
+__thread uword vlib_thread_index = 0;
+
 uword
 os_get_cpu_number (void)
 {
-  void *sp;
-  uword n;
-  u32 len;
-
-  len = vec_len (vlib_thread_stacks);
-  if (len == 0)
-    return 0;
-
-  /* Get any old stack address. */
-  sp = &sp;
-
-  n = ((uword) sp - (uword) vlib_thread_stacks[0])
-    >> VLIB_LOG2_THREAD_STACK_SIZE;
-
-  /* "processes" have their own stacks, and they always run in thread 0 */
-  n = n >= len ? 0 : n;
-
-  return n;
+  return vlib_thread_index;
 }
 
 uword
@@ -275,21 +260,6 @@ vlib_thread_init (vlib_main_t * vm)
   return 0;
 }
 
-vlib_worker_thread_t *
-vlib_alloc_thread (vlib_main_t * vm)
-{
-  vlib_worker_thread_t *w;
-
-  if (vec_len (vlib_worker_threads) >= vec_len (vlib_thread_stacks))
-    {
-      clib_warning ("out of worker threads... Quitting...");
-      exit (1);
-    }
-  vec_add2 (vlib_worker_threads, w, 1);
-  w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
-  return w;
-}
-
 vlib_frame_queue_t *
 vlib_frame_queue_alloc (int nelts)
 {
@@ -427,7 +397,7 @@ vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
       f64 b4 = vlib_time_now_ticks (vm, before);
       vlib_worker_thread_barrier_check (vm, b4);
       /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */
-      // vlib_frame_queue_dequeue (vm->cpu_index, vm, nm);
+      // vlib_frame_queue_dequeue (vm->thread_index, vm, nm);
     }
 
   elt = fq->elts + (new_tail & (fq->nelts - 1));
@@ -497,6 +467,8 @@ vlib_worker_thread_bootstrap_fn (void *arg)
   w->lwp = syscall (SYS_gettid);
   w->thread_id = pthread_self ();
 
+  vlib_thread_index = w - vlib_worker_threads;
+
   rv = (void *) clib_calljmp
     ((uword (*)(uword)) w->thread_function,
      (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE);
@@ -610,7 +582,9 @@ start_workers (vlib_main_t * vm)
 		  mheap_alloc (0 /* use VM */ , tr->mheap_size);
 	      else
 		w->thread_mheap = main_heap;
-	      w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+
+	      w->thread_stack =
+		vlib_thread_stack_init (w - vlib_worker_threads);
 	      w->thread_function = tr->function;
 	      w->thread_function_arg = w;
 	      w->instance_id = k;
@@ -630,7 +604,7 @@ start_workers (vlib_main_t * vm)
 	      vm_clone = clib_mem_alloc (sizeof (*vm_clone));
 	      clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
 
-	      vm_clone->cpu_index = worker_thread_index;
+	      vm_clone->thread_index = worker_thread_index;
 	      vm_clone->heap_base = w->thread_mheap;
 	      vm_clone->mbuf_alloc_list = 0;
 	      vm_clone->init_functions_called =
@@ -679,7 +653,7 @@ start_workers (vlib_main_t * vm)
 	      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
 	      {
 		vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-		rt->cpu_index = vm_clone->cpu_index;
+		rt->thread_index = vm_clone->thread_index;
 		/* copy initial runtime_data from node */
 		if (n->runtime_data && n->runtime_data_bytes > 0)
 		  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -692,7 +666,7 @@ start_workers (vlib_main_t * vm)
 	      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
 	      {
 		vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-		rt->cpu_index = vm_clone->cpu_index;
+		rt->thread_index = vm_clone->thread_index;
 		/* copy initial runtime_data from node */
 		if (n->runtime_data && n->runtime_data_bytes > 0)
 		  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -756,7 +730,8 @@ start_workers (vlib_main_t * vm)
 		  mheap_alloc (0 /* use VM */ , tr->mheap_size);
 	      else
 		w->thread_mheap = main_heap;
-	      w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+	      w->thread_stack =
+		vlib_thread_stack_init (w - vlib_worker_threads);
 	      w->thread_function = tr->function;
 	      w->thread_function_arg = w;
 	      w->instance_id = j;
@@ -827,7 +802,7 @@ vlib_worker_thread_node_runtime_update (void)
 				  uword n_calls,
 				  uword n_vectors, uword n_clocks);
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (vec_len (vlib_mains) == 1)
     return;
@@ -835,7 +810,7 @@ vlib_worker_thread_node_runtime_update (void)
   vm = vlib_mains[0];
   nm = &vm->node_main;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
   ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
 
   /*
@@ -955,7 +930,7 @@ vlib_worker_thread_node_runtime_update (void)
       vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
       {
 	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-	rt->cpu_index = vm_clone->cpu_index;
+	rt->thread_index = vm_clone->thread_index;
 	/* copy runtime_data, will be overwritten later for existing rt */
 	if (n->runtime_data && n->runtime_data_bytes > 0)
 	  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -981,7 +956,7 @@ vlib_worker_thread_node_runtime_update (void)
       vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
       {
 	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-	rt->cpu_index = vm_clone->cpu_index;
+	rt->thread_index = vm_clone->thread_index;
 	/* copy runtime_data, will be overwritten later for existing rt */
 	if (n->runtime_data && n->runtime_data_bytes > 0)
 	  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -1180,7 +1155,7 @@ vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
   if (vlib_mains == 0)
     return;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
   vlib_worker_thread_barrier_sync (vm);
 
   switch (which)
@@ -1212,7 +1187,7 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
 
   vlib_worker_threads[0].barrier_sync_count++;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
 
@@ -1260,7 +1235,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 int
 vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm)
 {
-  u32 thread_id = vm->cpu_index;
+  u32 thread_id = vm->thread_index;
   vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
   vlib_frame_queue_elt_t *elt;
   u32 *from, *to;
@@ -1393,7 +1368,7 @@ vlib_worker_thread_fn (void *arg)
   vlib_main_t *vm = vlib_get_main ();
   clib_error_t *e;
 
-  ASSERT (vm->cpu_index == os_get_cpu_number ());
+  ASSERT (vm->thread_index == vlib_get_thread_index ());
 
   vlib_worker_thread_init (w);
   clib_time_init (&vm->clib_time);
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index eca4fc26..101d3d4a 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -153,8 +153,6 @@ typedef struct
 /* Called early, in thread 0's context */
 clib_error_t *vlib_thread_init (vlib_main_t * vm);
 
-vlib_worker_thread_t *vlib_alloc_thread (vlib_main_t * vm);
-
 int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
 			      u32 frame_queue_index, vlib_frame_t * frame,
 			      vlib_frame_queue_msg_type_t type);
@@ -183,12 +181,19 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 
+extern __thread uword vlib_thread_index;
+static_always_inline uword
+vlib_get_thread_index (void)
+{
+  return vlib_thread_index;
+}
+
 always_inline void
 vlib_smp_unsafe_warning (void)
 {
   if (CLIB_DEBUG > 0)
     {
-      if (os_get_cpu_number ())
+      if (vlib_get_thread_index ())
 	fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
     }
 }
@@ -331,21 +336,21 @@ vlib_num_workers ()
 }
 
 always_inline u32
-vlib_get_worker_cpu_index (u32 worker_index)
+vlib_get_worker_thread_index (u32 worker_index)
 {
   return worker_index + 1;
 }
 
 always_inline u32
-vlib_get_worker_index (u32 cpu_index)
+vlib_get_worker_index (u32 thread_index)
 {
-  return cpu_index - 1;
+  return thread_index - 1;
 }
 
 always_inline u32
 vlib_get_current_worker_index ()
 {
-  return os_get_cpu_number () - 1;
+  return vlib_get_thread_index () - 1;
 }
 
 static inline void
@@ -467,6 +472,8 @@ vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
   return elt;
 }
 
+u8 *vlib_thread_stack_init (uword thread_index);
+
 int vlib_thread_cb_register (struct vlib_main_t *vm,
 			     vlib_thread_callbacks_t * cb);
 
diff --git a/src/vlib/unix/cj.c b/src/vlib/unix/cj.c
index 33ba163a..7c1e9475 100644
--- a/src/vlib/unix/cj.c
+++ b/src/vlib/unix/cj.c
@@ -48,7 +48,7 @@ cj_log (u32 type, void *data0, void *data1)
 
   r = (cj_record_t *) & (cjm->records[new_tail & (cjm->num_records - 1)]);
   r->time = vlib_time_now (cjm->vlib_main);
-  r->cpu = os_get_cpu_number ();
+  r->thread_index = vlib_get_thread_index ();
   r->type = type;
   r->data[0] = pointer_to_uword (data0);
   r->data[1] = pointer_to_uword (data1);
@@ -133,7 +133,8 @@ static inline void
 cj_dump_one_record (cj_record_t * r)
 {
   fprintf (stderr, "[%d]: %10.6f T%02d %llx %llx\n",
-	   r->cpu, r->time, r->type, (long long unsigned int) r->data[0],
+	   r->thread_index, r->time, r->type,
+	   (long long unsigned int) r->data[0],
 	   (long long unsigned int) r->data[1]);
 }
 
@@ -161,7 +162,7 @@ cj_dump_internal (u8 filter0_enable, u64 filter0,
   index = (cjm->tail + 1) & (cjm->num_records - 1);
   r = &(cjm->records[index]);
 
-  if (r->cpu != (u32) ~ 0)
+  if (r->thread_index != (u32) ~ 0)
     {
       /* Yes, dump from tail + 1 to the end */
       for (i = index; i < cjm->num_records; i++)
diff --git a/src/vlib/unix/cj.h b/src/vlib/unix/cj.h
index 67626afe..d0a1d46e 100644
--- a/src/vlib/unix/cj.h
+++ b/src/vlib/unix/cj.h
@@ -23,7 +23,7 @@
 typedef struct
 {
   f64 time;
-  u32 cpu;
+  u32 thread_index;
   u32 type;
   u64 data[2];
 } cj_record_t;
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index 6b96cc0d..db5ddd64 100644
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -510,13 +510,28 @@ thread0 (uword arg)
   return i;
 }
 
+u8 *
+vlib_thread_stack_init (uword thread_index)
+{
+  vec_validate (vlib_thread_stacks, thread_index);
+  vlib_thread_stacks[thread_index] = clib_mem_alloc_aligned
+    (VLIB_THREAD_STACK_SIZE, VLIB_THREAD_STACK_SIZE);
+
+  /*
+   * Disallow writes to the bottom page of the stack, to
+   * catch stack overflows.
+   */
+  if (mprotect (vlib_thread_stacks[thread_index],
+		clib_mem_get_page_size (), PROT_READ) < 0)
+    clib_unix_warning ("thread stack");
+  return vlib_thread_stacks[thread_index];
+}
+
 int
 vlib_unix_main (int argc, char *argv[])
 {
   vlib_main_t *vm = &vlib_global_main;	/* one and only time for this! */
-  vlib_thread_main_t *tm = &vlib_thread_main;
   unformat_input_t input;
-  u8 *thread_stacks;
   clib_error_t *e;
   int i;
 
@@ -548,29 +563,9 @@ vlib_unix_main (int argc, char *argv[])
     }
   unformat_free (&input);
 
-  /*
-   * allocate n x VLIB_THREAD_STACK_SIZE stacks, aligned to a
-   * VLIB_THREAD_STACK_SIZE boundary
-   * See also: os_get_cpu_number() in vlib/vlib/threads.c
-   */
-  thread_stacks = clib_mem_alloc_aligned
-    ((uword) tm->n_thread_stacks * VLIB_THREAD_STACK_SIZE,
-     VLIB_THREAD_STACK_SIZE);
-
-  vec_validate (vlib_thread_stacks, tm->n_thread_stacks - 1);
-  for (i = 0; i < vec_len (vlib_thread_stacks); i++)
-    {
-      vlib_thread_stacks[i] = thread_stacks;
-
-      /*
-       * Disallow writes to the bottom page of the stack, to
-       * catch stack overflows.
-       */
-      if (mprotect (thread_stacks, clib_mem_get_page_size (), PROT_READ) < 0)
-	clib_unix_warning ("thread stack");
+  vlib_thread_stack_init (0);
 
-      thread_stacks += VLIB_THREAD_STACK_SIZE;
-    }
+  vlib_thread_index = 0;
 
   i = clib_calljmp (thread0, (uword) vm,
 		    (void *) (vlib_thread_stacks[0] +
diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c
index f68e54e0..20d70dd4 100644
--- a/src/vnet/adj/adj_l2.c
+++ b/src/vnet/adj/adj_l2.c
@@ -52,7 +52,7 @@ adj_l2_rewrite_inline (vlib_main_t * vm,
 {
     u32 * from = vlib_frame_vector_args (frame);
     u32 n_left_from, n_left_to_next, * to_next, next_index;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
     ethernet_main_t * em = &ethernet_main;
 
     n_left_from = frame->n_vectors;
@@ -93,7 +93,7 @@ adj_l2_rewrite_inline (vlib_main_t * vm,
             vnet_buffer(p0)->sw_if_index[VLIB_TX] = adj0->rewrite_header.sw_if_index;
 
 	    vlib_increment_combined_counter(&adjacency_counters,
-                                            cpu_index,
+                                            thread_index,
                                             adj_index0,
                                             /* packet increment */ 0,
                                             /* byte increment */ rw_len0);
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index e8087f08..5756de43 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -49,7 +49,7 @@ adj_midchain_tx_inline (vlib_main_t * vm,
     u32 next_index;
     vnet_main_t *vnm = vnet_get_main ();
     vnet_interface_main_t *im = &vnm->interface_main;
-    u32 cpu_index = vm->cpu_index;
+    u32 thread_index = vm->thread_index;
 
     /* Vector of buffer / pkt indices we're supposed to process */
     from = vlib_frame_vector_args (frame);
@@ -124,13 +124,13 @@ adj_midchain_tx_inline (vlib_main_t * vm,
 	    {
 		vlib_increment_combined_counter (im->combined_sw_if_counters
 						 + VNET_INTERFACE_COUNTER_TX,
-						 cpu_index,
+						 thread_index,
 						 adj0->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b0));
 		vlib_increment_combined_counter (im->combined_sw_if_counters
 						 + VNET_INTERFACE_COUNTER_TX,
-						 cpu_index,
+						 thread_index,
 						 adj1->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b1));
@@ -181,7 +181,7 @@ adj_midchain_tx_inline (vlib_main_t * vm,
 	    {
 		vlib_increment_combined_counter (im->combined_sw_if_counters
 						 + VNET_INTERFACE_COUNTER_TX,
-						 cpu_index,
+						 thread_index,
 						 adj0->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b0));
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
index 9a0f9d8b..128570b0 100644
--- a/src/vnet/adj/adj_nsh.c
+++ b/src/vnet/adj/adj_nsh.c
@@ -53,7 +53,7 @@ adj_nsh_rewrite_inline (vlib_main_t * vm,
 {
     u32 * from = vlib_frame_vector_args (frame);
     u32 n_left_from, n_left_to_next, * to_next, next_index;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
 
     n_left_from = frame->n_vectors;
     next_index = node->cached_next_index;
@@ -94,7 +94,7 @@ adj_nsh_rewrite_inline (vlib_main_t * vm,
             vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
 
             vlib_increment_combined_counter(&adjacency_counters,
-                                            cpu_index,
+                                            thread_index,
                                             adj_index0,
                                             /* packet increment */ 0,
                                             /* byte increment */ rw_len0);
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
index 98842a48..70a189b0 100644
--- a/src/vnet/classify/vnet_classify.c
+++ b/src/vnet/classify/vnet_classify.c
@@ -251,12 +251,12 @@ static inline void make_working_copy
   vnet_classify_entry_##size##_t * working_copy##size = 0;
   foreach_size_in_u32x4;
 #undef _
-  u32 cpu_number = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
 
-  if (cpu_number >= vec_len (t->working_copies))
+  if (thread_index >= vec_len (t->working_copies))
     {
       oldheap = clib_mem_set_heap (t->mheap);
-      vec_validate (t->working_copies, cpu_number);
+      vec_validate (t->working_copies, thread_index);
       clib_mem_set_heap (oldheap);
     }
 
@@ -265,7 +265,7 @@ static inline void make_working_copy
    * updates from multiple threads will not result in sporadic, spurious
    * lookup failures. 
    */
-  working_copy = t->working_copies[cpu_number];
+  working_copy = t->working_copies[thread_index];
 
   t->saved_bucket.as_u64 = b->as_u64;
   oldheap = clib_mem_set_heap (t->mheap);
@@ -290,7 +290,7 @@ static inline void make_working_copy
         default:
           abort();
         }
-      t->working_copies[cpu_number] = working_copy;
+      t->working_copies[thread_index] = working_copy;
     }
 
   _vec_len(working_copy) = (1<<b->log2_pages)*t->entries_per_page;
@@ -318,7 +318,7 @@ static inline void make_working_copy
   working_bucket.offset = vnet_classify_get_offset (t, working_copy);
   CLIB_MEMORY_BARRIER();
   b->as_u64 = working_bucket.as_u64;
-  t->working_copies[cpu_number] = working_copy;
+  t->working_copies[thread_index] = working_copy;
 }
 
 static vnet_classify_entry_t *
@@ -387,7 +387,7 @@ int vnet_classify_add_del (vnet_classify_table_t * t,
   int i;
   u64 hash, new_hash;
   u32 new_log2_pages;
-  u32 cpu_number = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u8 * key_minus_skip;
 
   ASSERT ((add_v->flags & VNET_CLASSIFY_ENTRY_FREE) == 0);
@@ -498,7 +498,7 @@ int vnet_classify_add_del (vnet_classify_table_t * t,
   new_log2_pages = t->saved_bucket.log2_pages + 1;
 
  expand_again:
-  working_copy = t->working_copies[cpu_number];
+  working_copy = t->working_copies[thread_index];
   new_v = split_and_rehash (t, working_copy, new_log2_pages);
 
   if (new_v == 0)
diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c
index 6ef3d7d7..1b5e336b 100644
--- a/src/vnet/cop/ip4_whitelist.c
+++ b/src/vnet/cop/ip4_whitelist.c
@@ -60,7 +60,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
   cop_feature_type_t next_index;
   cop_main_t *cm = &cop_main;
   vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -177,12 +177,12 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
           dpo1 = load_balance_get_bucket_i(lb1, 0);
 
           vlib_increment_combined_counter
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0)
                + sizeof(ethernet_header_t));
 
           vlib_increment_combined_counter
-              (vcm, cpu_index, lb_index1, 1,
+              (vcm, thread_index, lb_index1, 1,
                vlib_buffer_length_in_chain (vm, b1)
                + sizeof(ethernet_header_t));
 
@@ -273,7 +273,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm,
           dpo0 = load_balance_get_bucket_i(lb0, 0);
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0) 
                + sizeof(ethernet_header_t));
 
diff --git a/src/vnet/cop/ip6_whitelist.c b/src/vnet/cop/ip6_whitelist.c
index c2e16ccf..f3fe62e3 100644
--- a/src/vnet/cop/ip6_whitelist.c
+++ b/src/vnet/cop/ip6_whitelist.c
@@ -61,7 +61,7 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
   cop_main_t *cm = &cop_main;
   ip6_main_t * im6 = &ip6_main;
   vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -153,12 +153,12 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
           dpo1 = load_balance_get_bucket_i(lb1, 0);
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0) 
                + sizeof(ethernet_header_t));
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index1, 1,
+              (vcm, thread_index, lb_index1, 1,
                vlib_buffer_length_in_chain (vm, b1)
                + sizeof(ethernet_header_t));
 
@@ -233,7 +233,7 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm,
           dpo0 = load_balance_get_bucket_i(lb0, 0);
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0) 
                + sizeof(ethernet_header_t));
 
diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c
index ba337f3f..76980102 100644
--- a/src/vnet/devices/af_packet/node.c
+++ b/src/vnet/devices/af_packet/node.c
@@ -124,7 +124,7 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 frame_num = apif->rx_req->tp_frame_nr;
   u8 *block_start = apif->rx_ring + block * block_size;
   uword n_trace = vlib_get_trace_count (vm, node);
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
 							  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
   u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes;
@@ -132,15 +132,15 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   if (apif->per_interface_next_index != ~0)
     next_index = apif->per_interface_next_index;
 
-  n_free_bufs = vec_len (apm->rx_buffers[cpu_index]);
+  n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
   if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
     {
-      vec_validate (apm->rx_buffers[cpu_index],
+      vec_validate (apm->rx_buffers[thread_index],
 		    VLIB_FRAME_SIZE + n_free_bufs - 1);
       n_free_bufs +=
-	vlib_buffer_alloc (vm, &apm->rx_buffers[cpu_index][n_free_bufs],
+	vlib_buffer_alloc (vm, &apm->rx_buffers[thread_index][n_free_bufs],
 			   VLIB_FRAME_SIZE);
-      _vec_len (apm->rx_buffers[cpu_index]) = n_free_bufs;
+      _vec_len (apm->rx_buffers[thread_index]) = n_free_bufs;
     }
 
   rx_frame = apif->next_rx_frame;
@@ -163,11 +163,11 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	    {
 	      /* grab free buffer */
 	      u32 last_empty_buffer =
-		vec_len (apm->rx_buffers[cpu_index]) - 1;
+		vec_len (apm->rx_buffers[thread_index]) - 1;
 	      prev_bi0 = bi0;
-	      bi0 = apm->rx_buffers[cpu_index][last_empty_buffer];
+	      bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
 	      b0 = vlib_get_buffer (vm, bi0);
-	      _vec_len (apm->rx_buffers[cpu_index]) = last_empty_buffer;
+	      _vec_len (apm->rx_buffers[thread_index]) = last_empty_buffer;
 	      n_free_bufs--;
 
 	      /* copy data */
@@ -236,9 +236,9 @@ af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     os_get_cpu_number (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
+     vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
   return n_rx_packets;
 }
 
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
index 41645220..5e5e812c 100644
--- a/src/vnet/devices/devices.c
+++ b/src/vnet/devices/devices.c
@@ -104,7 +104,7 @@ vnet_device_queue_sort (void *a1, void *a2)
 
 void
 vnet_device_input_assign_thread (u32 hw_if_index,
-				 u16 queue_id, uword cpu_index)
+				 u16 queue_id, uword thread_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
   vnet_device_main_t *vdm = &vnet_device_main;
@@ -115,19 +115,19 @@ vnet_device_input_assign_thread (u32 hw_if_index,
 
   ASSERT (hw->input_node_index > 0);
 
-  if (vdm->first_worker_cpu_index == 0)
-    cpu_index = 0;
+  if (vdm->first_worker_thread_index == 0)
+    thread_index = 0;
 
-  if (cpu_index != 0 &&
-      (cpu_index < vdm->first_worker_cpu_index ||
-       cpu_index > vdm->last_worker_cpu_index))
+  if (thread_index != 0 &&
+      (thread_index < vdm->first_worker_thread_index ||
+       thread_index > vdm->last_worker_thread_index))
     {
-      cpu_index = vdm->next_worker_cpu_index++;
-      if (vdm->next_worker_cpu_index > vdm->last_worker_cpu_index)
-	vdm->next_worker_cpu_index = vdm->first_worker_cpu_index;
+      thread_index = vdm->next_worker_thread_index++;
+      if (vdm->next_worker_thread_index > vdm->last_worker_thread_index)
+	vdm->next_worker_thread_index = vdm->first_worker_thread_index;
     }
 
-  vm = vlib_mains[cpu_index];
+  vm = vlib_mains[thread_index];
   rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
 
   vec_add2 (rt->devices_and_queues, dq, 1);
@@ -136,33 +136,33 @@ vnet_device_input_assign_thread (u32 hw_if_index,
   dq->queue_id = queue_id;
 
   vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort);
-  vec_validate (hw->input_node_cpu_index_by_queue, queue_id);
-  hw->input_node_cpu_index_by_queue[queue_id] = cpu_index;
+  vec_validate (hw->input_node_thread_index_by_queue, queue_id);
+  hw->input_node_thread_index_by_queue[queue_id] = thread_index;
 }
 
 static int
 vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id,
-				   uword cpu_index)
+				   uword thread_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
   vnet_device_input_runtime_t *rt;
   vnet_device_and_queue_t *dq;
-  uword old_cpu_index;
+  uword old_thread_index;
 
-  if (hw->input_node_cpu_index_by_queue == 0)
+  if (hw->input_node_thread_index_by_queue == 0)
     return VNET_API_ERROR_INVALID_INTERFACE;
 
-  if (vec_len (hw->input_node_cpu_index_by_queue) < queue_id + 1)
+  if (vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1)
     return VNET_API_ERROR_INVALID_INTERFACE;
 
-  old_cpu_index = hw->input_node_cpu_index_by_queue[queue_id];
+  old_thread_index = hw->input_node_thread_index_by_queue[queue_id];
 
-  if (old_cpu_index == cpu_index)
+  if (old_thread_index == thread_index)
     return 0;
 
   rt =
-    vlib_node_get_runtime_data (vlib_mains[old_cpu_index],
+    vlib_node_get_runtime_data (vlib_mains[old_thread_index],
 				hw->input_node_index);
 
   vec_foreach (dq, rt->devices_and_queues)
@@ -240,7 +240,7 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input,
   vnet_device_main_t *vdm = &vnet_device_main;
   u32 hw_if_index = (u32) ~ 0;
   u32 queue_id = (u32) 0;
-  u32 cpu_index = (u32) ~ 0;
+  u32 thread_index = (u32) ~ 0;
   int rv;
 
   if (!unformat_user (input, unformat_line_input, line_input))
@@ -253,10 +253,10 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input,
 	;
       else if (unformat (line_input, "queue %d", &queue_id))
 	;
-      else if (unformat (line_input, "main", &cpu_index))
-	cpu_index = 0;
-      else if (unformat (line_input, "worker %d", &cpu_index))
-	cpu_index += vdm->first_worker_cpu_index;
+      else if (unformat (line_input, "main", &thread_index))
+	thread_index = 0;
+      else if (unformat (line_input, "worker %d", &thread_index))
+	thread_index += vdm->first_worker_thread_index;
       else
 	{
 	  error = clib_error_return (0, "parse error: '%U'",
@@ -271,16 +271,17 @@ set_device_placement (vlib_main_t * vm, unformat_input_t * input,
   if (hw_if_index == (u32) ~ 0)
     return clib_error_return (0, "please specify valid interface name");
 
-  if (cpu_index > vdm->last_worker_cpu_index)
+  if (thread_index > vdm->last_worker_thread_index)
     return clib_error_return (0,
 			      "please specify valid worker thread or main");
 
-  rv = vnet_device_input_unassign_thread (hw_if_index, queue_id, cpu_index);
+  rv =
+    vnet_device_input_unassign_thread (hw_if_index, queue_id, thread_index);
 
   if (rv)
     return clib_error_return (0, "not found");
 
-  vnet_device_input_assign_thread (hw_if_index, queue_id, cpu_index);
+  vnet_device_input_assign_thread (hw_if_index, queue_id, thread_index);
 
   return 0;
 }
@@ -326,9 +327,9 @@ vnet_device_init (vlib_main_t * vm)
   tr = p ? (vlib_thread_registration_t *) p[0] : 0;
   if (tr && tr->count > 0)
     {
-      vdm->first_worker_cpu_index = tr->first_index;
-      vdm->next_worker_cpu_index = tr->first_index;
-      vdm->last_worker_cpu_index = tr->first_index + tr->count - 1;
+      vdm->first_worker_thread_index = tr->first_index;
+      vdm->next_worker_thread_index = tr->first_index;
+      vdm->last_worker_thread_index = tr->first_index + tr->count - 1;
     }
   return 0;
 }
diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h
index bbb29fe3..966f8302 100644
--- a/src/vnet/devices/devices.h
+++ b/src/vnet/devices/devices.h
@@ -50,9 +50,9 @@ typedef struct
 typedef struct
 {
   vnet_device_per_worker_data_t *workers;
-  uword first_worker_cpu_index;
-  uword last_worker_cpu_index;
-  uword next_worker_cpu_index;
+  uword first_worker_thread_index;
+  uword last_worker_thread_index;
+  uword next_worker_thread_index;
 } vnet_device_main_t;
 
 typedef struct
@@ -80,7 +80,7 @@ vnet_set_device_input_node (u32 hw_if_index, u32 node_index)
 }
 
 void vnet_device_input_assign_thread (u32 hw_if_index, u16 queue_id,
-				      uword cpu_index);
+				      uword thread_index);
 
 static inline u64
 vnet_get_aggregate_rx_packets (void)
@@ -95,12 +95,12 @@ vnet_get_aggregate_rx_packets (void)
 }
 
 static inline void
-vnet_device_increment_rx_packets (u32 cpu_index, u64 count)
+vnet_device_increment_rx_packets (u32 thread_index, u64 count)
 {
   vnet_device_main_t *vdm = &vnet_device_main;
   vnet_device_per_worker_data_t *pwd;
 
-  pwd = vec_elt_at_index (vdm->workers, cpu_index);
+  pwd = vec_elt_at_index (vdm->workers, thread_index);
   pwd->aggregate_rx_packets += count;
 }
 
@@ -117,9 +117,9 @@ vnet_device_input_set_interrupt_pending (vnet_main_t * vnm, u32 hw_if_index,
 {
   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
 
-  ASSERT (queue_id < vec_len (hw->input_node_cpu_index_by_queue));
-  u32 cpu_index = hw->input_node_cpu_index_by_queue[queue_id];
-  vlib_node_set_interrupt_pending (vlib_mains[cpu_index],
+  ASSERT (queue_id < vec_len (hw->input_node_thread_index_by_queue));
+  u32 thread_index = hw->input_node_thread_index_by_queue[queue_id];
+  vlib_node_set_interrupt_pending (vlib_mains[thread_index],
 				   hw->input_node_index);
 }
 
diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c
index 68ea7832..e120eeae 100644
--- a/src/vnet/devices/netmap/node.c
+++ b/src/vnet/devices/netmap/node.c
@@ -98,22 +98,22 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   u32 n_free_bufs;
   struct netmap_ring *ring;
   int cur_ring;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
 							  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
 
   if (nif->per_interface_next_index != ~0)
     next_index = nif->per_interface_next_index;
 
-  n_free_bufs = vec_len (nm->rx_buffers[cpu_index]);
+  n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
   if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
     {
-      vec_validate (nm->rx_buffers[cpu_index],
+      vec_validate (nm->rx_buffers[thread_index],
 		    VLIB_FRAME_SIZE + n_free_bufs - 1);
       n_free_bufs +=
-	vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs],
+	vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
 			   VLIB_FRAME_SIZE);
-      _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs;
+      _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs;
     }
 
   cur_ring = nif->first_rx_ring;
@@ -163,11 +163,11 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 		  vlib_buffer_t *b0;
 		  /* grab free buffer */
 		  u32 last_empty_buffer =
-		    vec_len (nm->rx_buffers[cpu_index]) - 1;
+		    vec_len (nm->rx_buffers[thread_index]) - 1;
 		  prev_bi0 = bi0;
-		  bi0 = nm->rx_buffers[cpu_index][last_empty_buffer];
+		  bi0 = nm->rx_buffers[thread_index][last_empty_buffer];
 		  b0 = vlib_get_buffer (vm, bi0);
-		  _vec_len (nm->rx_buffers[cpu_index]) = last_empty_buffer;
+		  _vec_len (nm->rx_buffers[thread_index]) = last_empty_buffer;
 		  n_free_bufs--;
 
 		  /* copy data */
@@ -247,9 +247,9 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     os_get_cpu_number (), nif->hw_if_index, n_rx_packets, n_rx_bytes);
+     vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
 
   return n_rx_packets;
 }
@@ -260,7 +260,7 @@ netmap_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   int i;
   u32 n_rx_packets = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   netmap_main_t *nm = &netmap_main;
   netmap_if_t *nmi;
 
@@ -269,7 +269,7 @@ netmap_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
       nmi = vec_elt_at_index (nm->interfaces, i);
       if (nmi->is_admin_up &&
 	  (i % nm->input_cpu_count) ==
-	  (cpu_index - nm->input_cpu_first_index))
+	  (thread_index - nm->input_cpu_first_index))
 	n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi);
     }
 
diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c
index a6c9dfd7..539b4161 100644
--- a/src/vnet/devices/ssvm/node.c
+++ b/src/vnet/devices/ssvm/node.c
@@ -89,7 +89,7 @@ ssvm_eth_device_input (ssvm_eth_main_t * em,
   ethernet_header_t *eh0;
   u16 type0;
   u32 n_rx_bytes = 0, l3_offset0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 trace_cnt __attribute__ ((unused)) = vlib_get_trace_count (vm, node);
   volatile u32 *lock;
   u32 *elt_indices;
@@ -284,10 +284,10 @@ out:
 
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
-     + VNET_INTERFACE_COUNTER_RX, cpu_index,
+     + VNET_INTERFACE_COUNTER_RX, thread_index,
      intfc->vlib_hw_if_index, rx_queue_index, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, rx_queue_index);
+  vnet_device_increment_rx_packets (thread_index, rx_queue_index);
 
   return rx_queue_index;
 }
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 00807dc0..5e720f65 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -331,7 +331,7 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
 {
   //Let's try to assign one queue to each thread
   u32 qid = 0;
-  u32 cpu_index = 0;
+  u32 thread_index = 0;
   vui->use_tx_spinlock = 0;
   while (1)
     {
@@ -341,20 +341,21 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
 	  if (!rxvq->started || !rxvq->enabled)
 	    continue;
 
-	  vui->per_cpu_tx_qid[cpu_index] = qid;
-	  cpu_index++;
-	  if (cpu_index == vlib_get_thread_main ()->n_vlib_mains)
+	  vui->per_cpu_tx_qid[thread_index] = qid;
+	  thread_index++;
+	  if (thread_index == vlib_get_thread_main ()->n_vlib_mains)
 	    return;
 	}
       //We need to loop, meaning the spinlock has to be used
       vui->use_tx_spinlock = 1;
-      if (cpu_index == 0)
+      if (thread_index == 0)
 	{
 	  //Could not find a single valid one
-	  for (cpu_index = 0;
-	       cpu_index < vlib_get_thread_main ()->n_vlib_mains; cpu_index++)
+	  for (thread_index = 0;
+	       thread_index < vlib_get_thread_main ()->n_vlib_mains;
+	       thread_index++)
 	    {
-	      vui->per_cpu_tx_qid[cpu_index] = 0;
+	      vui->per_cpu_tx_qid[thread_index] = 0;
 	    }
 	  return;
 	}
@@ -368,7 +369,7 @@ vhost_user_rx_thread_placement ()
   vhost_user_intf_t *vui;
   vhost_cpu_t *vhc;
   u32 *workers = 0;
-  u32 cpu_index;
+  u32 thread_index;
   vlib_main_t *vm;
 
   //Let's list all workers cpu indexes
@@ -400,9 +401,9 @@ vhost_user_rx_thread_placement ()
 	    continue;
 
 	  i %= vec_len (vui_workers);
-	  cpu_index = vui_workers[i];
+	  thread_index = vui_workers[i];
 	  i++;
-	  vhc = &vum->cpus[cpu_index];
+	  vhc = &vum->cpus[thread_index];
 
 	  iaq.qid = qid;
 	  iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
@@ -429,14 +430,14 @@ vhost_user_rx_thread_placement ()
     vhc->operation_mode = mode;
   }
 
-  for (cpu_index = vum->input_cpu_first_index;
-       cpu_index < vum->input_cpu_first_index + vum->input_cpu_count;
-       cpu_index++)
+  for (thread_index = vum->input_cpu_first_index;
+       thread_index < vum->input_cpu_first_index + vum->input_cpu_count;
+       thread_index++)
     {
       vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
 
-      vhc = &vum->cpus[cpu_index];
-      vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+      vhc = &vum->cpus[thread_index];
+      vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
       switch (vhc->operation_mode)
 	{
 	case VHOST_USER_INTERRUPT_MODE:
@@ -532,7 +533,7 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
 {
   vhost_user_main_t *vum = &vhost_user_main;
   vhost_cpu_t *vhc;
-  u32 cpu_index;
+  u32 thread_index;
   vhost_iface_and_queue_t *vhiq;
   vlib_main_t *vm;
   u32 ifq2;
@@ -553,8 +554,8 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
 	  if ((vhiq->vhost_iface_index == (ifq >> 8)) &&
 	      (VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff)))
 	    {
-	      cpu_index = vhc - vum->cpus;
-	      vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+	      thread_index = vhc - vum->cpus;
+	      vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
 	      /*
 	       * Convert RX virtqueue number in the lower byte to vring
 	       * queue index for the input node process. Top bytes contain
@@ -1592,7 +1593,7 @@ vhost_user_if_input (vlib_main_t * vm,
   u32 n_trace = vlib_get_trace_count (vm, node);
   u16 qsz_mask;
   u32 map_hint = 0;
-  u16 cpu_index = os_get_cpu_number ();
+  u16 thread_index = vlib_get_thread_index ();
   u16 copy_len = 0;
 
   {
@@ -1651,32 +1652,32 @@ vhost_user_if_input (vlib_main_t * vm,
    * in the loop and come back later. This is not an issue as for big packet,
    * processing cost really comes from the memory copy.
    */
-  if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len < n_left + 1))
+  if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1))
     {
-      u32 curr_len = vum->cpus[cpu_index].rx_buffers_len;
-      vum->cpus[cpu_index].rx_buffers_len +=
+      u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
+      vum->cpus[thread_index].rx_buffers_len +=
 	vlib_buffer_alloc_from_free_list (vm,
-					  vum->cpus[cpu_index].rx_buffers +
+					  vum->cpus[thread_index].rx_buffers +
 					  curr_len,
 					  VHOST_USER_RX_BUFFERS_N - curr_len,
 					  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
 
       if (PREDICT_FALSE
-	  (vum->cpus[cpu_index].rx_buffers_len <
+	  (vum->cpus[thread_index].rx_buffers_len <
 	   VHOST_USER_RX_BUFFER_STARVATION))
 	{
 	  /* In case of buffer starvation, discard some packets from the queue
 	   * and log the event.
 	   * We keep doing best effort for the remaining packets. */
-	  u32 flush = (n_left + 1 > vum->cpus[cpu_index].rx_buffers_len) ?
-	    n_left + 1 - vum->cpus[cpu_index].rx_buffers_len : 1;
+	  u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
+	    n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
 	  flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
 
 	  n_left -= flush;
 	  vlib_increment_simple_counter (vnet_main.
 					 interface_main.sw_if_counters +
 					 VNET_INTERFACE_COUNTER_DROP,
-					 os_get_cpu_number (),
+					 vlib_get_thread_index (),
 					 vui->sw_if_index, flush);
 
 	  vlib_error_count (vm, vhost_user_input_node.index,
@@ -1696,7 +1697,7 @@ vhost_user_if_input (vlib_main_t * vm,
 	  u32 desc_data_offset;
 	  vring_desc_t *desc_table = txvq->desc;
 
-	  if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len <= 1))
+	  if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
 	    {
 	      /* Not enough rx_buffers
 	       * Note: We yeld on 1 so we don't need to do an additional
@@ -1707,17 +1708,18 @@ vhost_user_if_input (vlib_main_t * vm,
 	    }
 
 	  desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
-	  vum->cpus[cpu_index].rx_buffers_len--;
-	  bi_current = (vum->cpus[cpu_index].rx_buffers)
-	    [vum->cpus[cpu_index].rx_buffers_len];
+	  vum->cpus[thread_index].rx_buffers_len--;
+	  bi_current = (vum->cpus[thread_index].rx_buffers)
+	    [vum->cpus[thread_index].rx_buffers_len];
 	  b_head = b_current = vlib_get_buffer (vm, bi_current);
 	  to_next[0] = bi_current;	//We do that now so we can forget about bi_current
 	  to_next++;
 	  n_left_to_next--;
 
 	  vlib_prefetch_buffer_with_index (vm,
-					   (vum->cpus[cpu_index].rx_buffers)
-					   [vum->cpus[cpu_index].
+					   (vum->
+					    cpus[thread_index].rx_buffers)
+					   [vum->cpus[thread_index].
 					    rx_buffers_len - 1], LOAD);
 
 	  /* Just preset the used descriptor id and length for later */
@@ -1791,7 +1793,7 @@ vhost_user_if_input (vlib_main_t * vm,
 		  (b_current->current_length == VLIB_BUFFER_DATA_SIZE))
 		{
 		  if (PREDICT_FALSE
-		      (vum->cpus[cpu_index].rx_buffers_len == 0))
+		      (vum->cpus[thread_index].rx_buffers_len == 0))
 		    {
 		      /* Cancel speculation */
 		      to_next--;
@@ -1805,17 +1807,18 @@ vhost_user_if_input (vlib_main_t * vm,
 		       * but valid.
 		       */
 		      vhost_user_input_rewind_buffers (vm,
-						       &vum->cpus[cpu_index],
+						       &vum->cpus
+						       [thread_index],
 						       b_head);
 		      n_left = 0;
 		      goto stop;
 		    }
 
 		  /* Get next output */
-		  vum->cpus[cpu_index].rx_buffers_len--;
+		  vum->cpus[thread_index].rx_buffers_len--;
 		  u32 bi_next =
-		    (vum->cpus[cpu_index].rx_buffers)[vum->cpus
-						      [cpu_index].rx_buffers_len];
+		    (vum->cpus[thread_index].rx_buffers)[vum->cpus
+							 [thread_index].rx_buffers_len];
 		  b_current->next_buffer = bi_next;
 		  b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
 		  bi_current = bi_next;
@@ -1823,7 +1826,7 @@ vhost_user_if_input (vlib_main_t * vm,
 		}
 
 	      /* Prepare a copy order executed later for the data */
-	      vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+	      vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
 	      copy_len++;
 	      u32 desc_data_l =
 		desc_table[desc_current].len - desc_data_offset;
@@ -1880,7 +1883,7 @@ vhost_user_if_input (vlib_main_t * vm,
 	  if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
 	    {
 	      if (PREDICT_FALSE
-		  (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+		  (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
 					  copy_len, &map_hint)))
 		{
 		  clib_warning
@@ -1905,7 +1908,7 @@ vhost_user_if_input (vlib_main_t * vm,
 
   /* Do the memory copies */
   if (PREDICT_FALSE
-      (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+      (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
 			      copy_len, &map_hint)))
     {
       clib_warning ("Memory mapping error on interface hw_if_index=%d "
@@ -1933,9 +1936,9 @@ vhost_user_if_input (vlib_main_t * vm,
   vlib_increment_combined_counter
     (vnet_main.interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
+     vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
 
   return n_rx_packets;
 }
@@ -1946,15 +1949,15 @@ vhost_user_input (vlib_main_t * vm,
 {
   vhost_user_main_t *vum = &vhost_user_main;
   uword n_rx_packets = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   vhost_iface_and_queue_t *vhiq;
   vhost_user_intf_t *vui;
   vhost_cpu_t *vhc;
 
-  vhc = &vum->cpus[cpu_index];
+  vhc = &vum->cpus[thread_index];
   if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
     {
-      vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+      vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
       {
 	vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
 	n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
@@ -2096,7 +2099,7 @@ vhost_user_tx (vlib_main_t * vm,
   vhost_user_vring_t *rxvq;
   u16 qsz_mask;
   u8 error;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 map_hint = 0;
   u8 retry = 8;
   u16 copy_len;
@@ -2116,7 +2119,7 @@ vhost_user_tx (vlib_main_t * vm,
 
   qid =
     VHOST_VRING_IDX_RX (*vec_elt_at_index
-			(vui->per_cpu_tx_qid, os_get_cpu_number ()));
+			(vui->per_cpu_tx_qid, vlib_get_thread_index ()));
   rxvq = &vui->vrings[qid];
   if (PREDICT_FALSE (vui->use_tx_spinlock))
     vhost_user_vring_lock (vui, qid);
@@ -2143,10 +2146,10 @@ retry:
 
       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	{
-	  vum->cpus[cpu_index].current_trace =
+	  vum->cpus[thread_index].current_trace =
 	    vlib_add_trace (vm, node, b0,
-			    sizeof (*vum->cpus[cpu_index].current_trace));
-	  vhost_user_tx_trace (vum->cpus[cpu_index].current_trace,
+			    sizeof (*vum->cpus[thread_index].current_trace));
+	  vhost_user_tx_trace (vum->cpus[thread_index].current_trace,
 			       vui, qid / 2, b0, rxvq);
 	}
 
@@ -2188,14 +2191,14 @@ retry:
       {
 	// Get a header from the header array
 	virtio_net_hdr_mrg_rxbuf_t *hdr =
-	  &vum->cpus[cpu_index].tx_headers[tx_headers_len];
+	  &vum->cpus[thread_index].tx_headers[tx_headers_len];
 	tx_headers_len++;
 	hdr->hdr.flags = 0;
 	hdr->hdr.gso_type = 0;
 	hdr->num_buffers = 1;	//This is local, no need to check
 
 	// Prepare a copy order executed later for the header
-	vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+	vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
 	copy_len++;
 	cpy->len = vui->virtio_net_hdr_sz;
 	cpy->dst = buffer_map_addr;
@@ -2220,7 +2223,7 @@ retry:
 	      else if (vui->virtio_net_hdr_sz == 12)	//MRG is available
 		{
 		  virtio_net_hdr_mrg_rxbuf_t *hdr =
-		    &vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+		    &vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
 
 		  //Move from available to used buffer
 		  rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id =
@@ -2282,7 +2285,7 @@ retry:
 	    }
 
 	  {
-	    vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+	    vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
 	    copy_len++;
 	    cpy->len = bytes_left;
 	    cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
@@ -2325,8 +2328,8 @@ retry:
 
       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	{
-	  vum->cpus[cpu_index].current_trace->hdr =
-	    vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+	  vum->cpus[thread_index].current_trace->hdr =
+	    vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
 	}
 
       n_left--;			//At the end for error counting when 'goto done' is invoked
@@ -2336,7 +2339,7 @@ retry:
 done:
   //Do the memory copies
   if (PREDICT_FALSE
-      (vhost_user_tx_copy (vui, vum->cpus[cpu_index].copy,
+      (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
 			   copy_len, &map_hint)))
     {
       clib_warning ("Memory mapping error on interface hw_if_index=%d "
@@ -2386,7 +2389,7 @@ done3:
       vlib_increment_simple_counter
 	(vnet_main.interface_main.sw_if_counters
 	 + VNET_INTERFACE_COUNTER_DROP,
-	 os_get_cpu_number (), vui->sw_if_index, n_left);
+	 vlib_get_thread_index (), vui->sw_if_index, n_left);
     }
 
   vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
@@ -2773,11 +2776,11 @@ vhost_user_send_interrupt_process (vlib_main_t * vm,
 	case ~0:
 	  vec_foreach (vhc, vum->cpus)
 	  {
-	    u32 cpu_index = vhc - vum->cpus;
+	    u32 thread_index = vhc - vum->cpus;
 	    f64 next_timeout;
 
 	    next_timeout = timeout;
-	    vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+	    vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
 	    {
 	      vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
 	      vhost_user_vring_t *rxvq =
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index e94e871c..97ad0a44 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -266,7 +266,7 @@ lookup_dpo_ip4_inline (vlib_main_t * vm,
                        int table_from_interface)
 {
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
     vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
 
     from = vlib_frame_vector_args (from_frame);
@@ -407,10 +407,10 @@ lookup_dpo_ip4_inline (vlib_main_t * vm,
 	    vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi1, 1,
+		(cm, thread_index, lbi1, 1,
 		 vlib_buffer_length_in_chain (vm, b1));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -511,7 +511,7 @@ lookup_dpo_ip4_inline (vlib_main_t * vm,
 	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -606,7 +606,7 @@ lookup_dpo_ip6_inline (vlib_main_t * vm,
 {
     vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
 
     from = vlib_frame_vector_args (from_frame);
     n_left_from = from_frame->n_vectors;
@@ -749,10 +749,10 @@ lookup_dpo_ip6_inline (vlib_main_t * vm,
 	    vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi1, 1,
+		(cm, thread_index, lbi1, 1,
 		 vlib_buffer_length_in_chain (vm, b1));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -853,7 +853,7 @@ lookup_dpo_ip6_inline (vlib_main_t * vm,
 	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -930,7 +930,7 @@ lookup_dpo_mpls_inline (vlib_main_t * vm,
                        int table_from_interface)
 {
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
     vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
 
     from = vlib_frame_vector_args (from_frame);
@@ -994,7 +994,7 @@ lookup_dpo_mpls_inline (vlib_main_t * vm,
             vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
             vlib_increment_combined_counter
-                (cm, cpu_index, lbi0, 1,
+                (cm, thread_index, lbi0, 1,
                  vlib_buffer_length_in_chain (vm, b0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index a9f334be..e25ceae9 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -627,7 +627,7 @@ replicate_inline (vlib_main_t * vm,
     vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
     replicate_main_t * rm = &replicate_main;
     u32 n_left_from, * from, * to_next, next_index;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
 
     from = vlib_frame_vector_args (frame);
     n_left_from = frame->n_vectors;
@@ -657,12 +657,12 @@ replicate_inline (vlib_main_t * vm,
             rep0 = replicate_get(repi0);
 
             vlib_increment_combined_counter(
-                cm, cpu_index, repi0, 1,
+                cm, thread_index, repi0, 1,
                 vlib_buffer_length_in_chain(vm, b0));
 
-	    vec_validate (rm->clones[cpu_index], rep0->rep_n_buckets - 1);
+	    vec_validate (rm->clones[thread_index], rep0->rep_n_buckets - 1);
 
-	    num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[cpu_index], rep0->rep_n_buckets, 128);
+	    num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[thread_index], rep0->rep_n_buckets, 128);
 
 	    if (num_cloned != rep0->rep_n_buckets)
 	      {
@@ -673,7 +673,7 @@ replicate_inline (vlib_main_t * vm,
 
             for (bucket = 0; bucket < num_cloned; bucket++)
             {
-                ci0 = rm->clones[cpu_index][bucket];
+                ci0 = rm->clones[thread_index][bucket];
                 c0 = vlib_get_buffer(vm, ci0);
 
                 to_next[0] = ci0;
@@ -700,7 +700,7 @@ replicate_inline (vlib_main_t * vm,
 		    vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 		  }
             }
-	    vec_reset_length (rm->clones[cpu_index]);
+	    vec_reset_length (rm->clones[thread_index]);
         }
 
         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
index ee757505..c74a097e 100644
--- a/src/vnet/ethernet/arp.c
+++ b/src/vnet/ethernet/arp.c
@@ -1771,7 +1771,7 @@ set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
 				    * a)
 {
   vnet_main_t *vm = vnet_get_main ();
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 9894e3c8..335e3f9f 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -362,7 +362,7 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
   u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT;
   u32 i, next_node_index, bvi_flag, sw_if_index;
   u32 n_pkts = 0, n_bytes = 0;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   vnet_main_t *vnm = vnet_get_main ();
   vnet_interface_main_t *im = &vnm->interface_main;
   vlib_node_main_t *nm = &vm->node_main;
@@ -420,8 +420,9 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
 
       /* increment TX interface stat */
       vlib_increment_combined_counter (im->combined_sw_if_counters +
-				       VNET_INTERFACE_COUNTER_TX, cpu_index,
-				       sw_if_index, n_pkts, n_bytes);
+				       VNET_INTERFACE_COUNTER_TX,
+				       thread_index, sw_if_index, n_pkts,
+				       n_bytes);
     }
 
   return n_left_from;
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index b699e381..f7787ed2 100755
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -291,7 +291,7 @@ ethernet_input_inline (vlib_main_t * vm,
   vlib_node_runtime_t *error_node;
   u32 n_left_from, next_index, *from, *to_next;
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 cached_sw_if_index = ~0;
   u32 cached_is_l2 = 0;		/* shut up gcc */
   vnet_hw_interface_t *hi = NULL;	/* used for main interface only */
@@ -510,7 +510,7 @@ ethernet_input_inline (vlib_main_t * vm,
 						     interface_main.combined_sw_if_counters
 						     +
 						     VNET_INTERFACE_COUNTER_RX,
-						     cpu_index,
+						     thread_index,
 						     new_sw_if_index0, 1,
 						     len0);
 		  if (new_sw_if_index1 != old_sw_if_index1
@@ -519,7 +519,7 @@ ethernet_input_inline (vlib_main_t * vm,
 						     interface_main.combined_sw_if_counters
 						     +
 						     VNET_INTERFACE_COUNTER_RX,
-						     cpu_index,
+						     thread_index,
 						     new_sw_if_index1, 1,
 						     len1);
 
@@ -530,7 +530,7 @@ ethernet_input_inline (vlib_main_t * vm,
 			  vlib_increment_combined_counter
 			    (vnm->interface_main.combined_sw_if_counters
 			     + VNET_INTERFACE_COUNTER_RX,
-			     cpu_index,
+			     thread_index,
 			     stats_sw_if_index,
 			     stats_n_packets, stats_n_bytes);
 			  stats_n_packets = stats_n_bytes = 0;
@@ -696,13 +696,13 @@ ethernet_input_inline (vlib_main_t * vm,
 		    vlib_increment_combined_counter
 		      (vnm->interface_main.combined_sw_if_counters
 		       + VNET_INTERFACE_COUNTER_RX,
-		       cpu_index, new_sw_if_index0, 1, len0);
+		       thread_index, new_sw_if_index0, 1, len0);
 		  if (stats_n_packets > 0)
 		    {
 		      vlib_increment_combined_counter
 			(vnm->interface_main.combined_sw_if_counters
 			 + VNET_INTERFACE_COUNTER_RX,
-			 cpu_index,
+			 thread_index,
 			 stats_sw_if_index, stats_n_packets, stats_n_bytes);
 		      stats_n_packets = stats_n_bytes = 0;
 		    }
@@ -734,7 +734,7 @@ ethernet_input_inline (vlib_main_t * vm,
       vlib_increment_combined_counter
 	(vnm->interface_main.combined_sw_if_counters
 	 + VNET_INTERFACE_COUNTER_RX,
-	 cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
       node->runtime_data[0] = stats_sw_if_index;
     }
 
diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c
index 2683586e..acf15f24 100644
--- a/src/vnet/gre/node.c
+++ b/src/vnet/gre/node.c
@@ -75,7 +75,7 @@ gre_input (vlib_main_t * vm,
   u64 cached_tunnel_key6[4];
   u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index = 0;
 
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 len;
   vnet_interface_main_t *im = &gm->vnet_main->interface_main;
 
@@ -257,7 +257,7 @@ gre_input (vlib_main_t * vm,
           len = vlib_buffer_length_in_chain (vm, b0);
           vlib_increment_combined_counter (im->combined_sw_if_counters
                                            + VNET_INTERFACE_COUNTER_RX,
-                                           cpu_index,
+                                           thread_index,
                                            tunnel_sw_if_index,
                                            1 /* packets */,
                                            len /* bytes */);
@@ -324,7 +324,7 @@ drop0:
           len = vlib_buffer_length_in_chain (vm, b1);
           vlib_increment_combined_counter (im->combined_sw_if_counters
                                            + VNET_INTERFACE_COUNTER_RX,
-                                           cpu_index,
+                                           thread_index,
                                            tunnel_sw_if_index,
                                            1 /* packets */,
                                            len /* bytes */);
@@ -502,7 +502,7 @@ drop1:
           len = vlib_buffer_length_in_chain (vm, b0);
           vlib_increment_combined_counter (im->combined_sw_if_counters
                                            + VNET_INTERFACE_COUNTER_RX,
-                                           cpu_index,
+                                           thread_index,
                                            tunnel_sw_if_index,
                                            1 /* packets */,
                                            len /* bytes */);
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index a1ea2d61..08f08b10 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -468,7 +468,7 @@ typedef struct vnet_hw_interface_t
   u32 input_node_index;
 
   /* input node cpu index by queue */
-  u32 *input_node_cpu_index_by_queue;
+  u32 *input_node_thread_index_by_queue;
 
 } vnet_hw_interface_t;
 
diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c
index 03f2cdca..663dc309 100644
--- a/src/vnet/interface_output.c
+++ b/src/vnet/interface_output.c
@@ -196,7 +196,7 @@ slow_path (vlib_main_t * vm,
  */
 static_always_inline void
 incr_output_stats (vnet_main_t * vnm,
-		   u32 cpu_index,
+		   u32 thread_index,
 		   u32 length,
 		   u32 sw_if_index,
 		   u32 * last_sw_if_index, u32 * n_packets, u32 * n_bytes)
@@ -216,7 +216,7 @@ incr_output_stats (vnet_main_t * vnm,
 
 	  vlib_increment_combined_counter (im->combined_sw_if_counters
 					   + VNET_INTERFACE_COUNTER_TX,
-					   cpu_index,
+					   thread_index,
 					   *last_sw_if_index,
 					   *n_packets, *n_bytes);
 	}
@@ -240,7 +240,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
   u32 n_left_to_tx, *from, *from_end, *to_tx;
   u32 n_bytes, n_buffers, n_packets;
   u32 last_sw_if_index;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   n_buffers = frame->n_vectors;
 
@@ -266,7 +266,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
 
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_TX_ERROR);
-      vlib_increment_simple_counter (cm, cpu_index,
+      vlib_increment_simple_counter (cm, thread_index,
 				     rt->sw_if_index, n_buffers);
       return vlib_error_drop_buffers (vm, node, from,
 				      /* buffer stride */ 1,
@@ -341,18 +341,18 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
 		  from += 1;
 		  to_tx += n_buffers;
 		  n_left_to_tx -= n_buffers;
-		  incr_output_stats (vnm, cpu_index, n_slow_bytes,
+		  incr_output_stats (vnm, thread_index, n_slow_bytes,
 				     vnet_buffer (b)->sw_if_index[VLIB_TX],
 				     &last_sw_if_index, &n_packets, &n_bytes);
 		}
 	    }
 	  else
 	    {
-	      incr_output_stats (vnm, cpu_index,
+	      incr_output_stats (vnm, thread_index,
 				 vlib_buffer_length_in_chain (vm, b0),
 				 vnet_buffer (b0)->sw_if_index[VLIB_TX],
 				 &last_sw_if_index, &n_packets, &n_bytes);
-	      incr_output_stats (vnm, cpu_index,
+	      incr_output_stats (vnm, thread_index,
 				 vlib_buffer_length_in_chain (vm, b0),
 				 vnet_buffer (b1)->sw_if_index[VLIB_TX],
 				 &last_sw_if_index, &n_packets, &n_bytes);
@@ -396,7 +396,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
 	      to_tx += n_buffers;
 	      n_left_to_tx -= n_buffers;
 	    }
-	  incr_output_stats (vnm, cpu_index,
+	  incr_output_stats (vnm, thread_index,
 			     vlib_buffer_length_in_chain (vm, b0),
 			     vnet_buffer (b0)->sw_if_index[VLIB_TX],
 			     &last_sw_if_index, &n_packets, &n_bytes);
@@ -408,7 +408,7 @@ vnet_interface_output_node_flatten (vlib_main_t * vm,
     }
 
   /* Final update of interface stats. */
-  incr_output_stats (vnm, cpu_index, 0, ~0,	/* ~0 will flush stats */
+  incr_output_stats (vnm, thread_index, 0, ~0,	/* ~0 will flush stats */
 		     &last_sw_if_index, &n_packets, &n_bytes);
 
   return n_buffers;
@@ -428,7 +428,7 @@ vnet_interface_output_node (vlib_main_t * vm,
   u32 n_left_to_tx, *from, *from_end, *to_tx;
   u32 n_bytes, n_buffers, n_packets;
   u32 n_bytes_b0, n_bytes_b1, n_bytes_b2, n_bytes_b3;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   vnet_interface_main_t *im = &vnm->interface_main;
   u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX;
   u32 current_config_index = ~0;
@@ -458,7 +458,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_TX_ERROR);
-      vlib_increment_simple_counter (cm, cpu_index,
+      vlib_increment_simple_counter (cm, thread_index,
 				     rt->sw_if_index, n_buffers);
 
       return vlib_error_drop_buffers (vm, node, from,
@@ -558,7 +558,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 	    {
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif0, 1,
+					       thread_index, tx_swif0, 1,
 					       n_bytes_b0);
 	    }
 
@@ -567,7 +567,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif1, 1,
+					       thread_index, tx_swif1, 1,
 					       n_bytes_b1);
 	    }
 
@@ -576,7 +576,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif2, 1,
+					       thread_index, tx_swif2, 1,
 					       n_bytes_b2);
 	    }
 	  if (PREDICT_FALSE (tx_swif3 != rt->sw_if_index))
@@ -584,7 +584,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif3, 1,
+					       thread_index, tx_swif3, 1,
 					       n_bytes_b3);
 	    }
 	}
@@ -623,7 +623,7 @@ vnet_interface_output_node (vlib_main_t * vm,
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif0, 1,
+					       thread_index, tx_swif0, 1,
 					       n_bytes_b0);
 	    }
 	}
@@ -634,7 +634,7 @@ vnet_interface_output_node (vlib_main_t * vm,
   /* Update main interface stats. */
   vlib_increment_combined_counter (im->combined_sw_if_counters
 				   + VNET_INTERFACE_COUNTER_TX,
-				   cpu_index,
+				   thread_index,
 				   rt->sw_if_index, n_packets, n_bytes);
   return n_buffers;
 }
@@ -893,7 +893,7 @@ process_drop_punt (vlib_main_t * vm,
   u32 current_sw_if_index, n_errors_current_sw_if_index;
   u64 current_counter;
   vlib_simple_counter_main_t *cm;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   static vlib_error_t memory[VNET_ERROR_N_DISPOSITION];
   static char memory_init[VNET_ERROR_N_DISPOSITION];
@@ -965,19 +965,19 @@ process_drop_punt (vlib_main_t * vm,
 	  current_counter -= 2;
 	  n_errors_current_sw_if_index -= 2;
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
 
 	  /* Increment super-interface drop/punt counters for
 	     sub-interfaces. */
 	  sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0);
 	  vlib_increment_simple_counter
-	    (cm, cpu_index, sw_if0->sup_sw_if_index,
+	    (cm, thread_index, sw_if0->sup_sw_if_index,
 	     sw_if0->sup_sw_if_index != sw_if_index0);
 
 	  sw_if1 = vnet_get_sw_interface (vnm, sw_if_index1);
 	  vlib_increment_simple_counter
-	    (cm, cpu_index, sw_if1->sup_sw_if_index,
+	    (cm, thread_index, sw_if1->sup_sw_if_index,
 	     sw_if1->sup_sw_if_index != sw_if_index1);
 
 	  em->counters[current_counter_index] = current_counter;
@@ -1013,11 +1013,12 @@ process_drop_punt (vlib_main_t * vm,
       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
 
       /* Increment drop/punt counters. */
-      vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+      vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
 
       /* Increment super-interface drop/punt counters for sub-interfaces. */
       sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0);
-      vlib_increment_simple_counter (cm, cpu_index, sw_if0->sup_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index,
+				     sw_if0->sup_sw_if_index,
 				     sw_if0->sup_sw_if_index != sw_if_index0);
 
       if (PREDICT_FALSE (e0 != current_error))
@@ -1041,12 +1042,12 @@ process_drop_punt (vlib_main_t * vm,
     {
       vnet_sw_interface_t *si;
 
-      vlib_increment_simple_counter (cm, cpu_index, current_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, current_sw_if_index,
 				     n_errors_current_sw_if_index);
 
       si = vnet_get_sw_interface (vnm, current_sw_if_index);
       if (si->sup_sw_if_index != current_sw_if_index)
-	vlib_increment_simple_counter (cm, cpu_index, si->sup_sw_if_index,
+	vlib_increment_simple_counter (cm, thread_index, si->sup_sw_if_index,
 				       n_errors_current_sw_if_index);
     }
 
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index ee1703e7..fdfe7f63 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -75,7 +75,7 @@ ip4_lookup_inline (vlib_main_t * vm,
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -292,19 +292,19 @@ ip4_lookup_inline (vlib_main_t * vm,
 	  vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index0, 1,
+	    (cm, thread_index, lb_index0, 1,
 	     vlib_buffer_length_in_chain (vm, p0)
 	     + sizeof (ethernet_header_t));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index1, 1,
+	    (cm, thread_index, lb_index1, 1,
 	     vlib_buffer_length_in_chain (vm, p1)
 	     + sizeof (ethernet_header_t));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index2, 1,
+	    (cm, thread_index, lb_index2, 1,
 	     vlib_buffer_length_in_chain (vm, p2)
 	     + sizeof (ethernet_header_t));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index3, 1,
+	    (cm, thread_index, lb_index3, 1,
 	     vlib_buffer_length_in_chain (vm, p3)
 	     + sizeof (ethernet_header_t));
 
@@ -392,7 +392,7 @@ ip4_lookup_inline (vlib_main_t * vm,
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  from += 1;
 	  to_next += 1;
@@ -479,7 +479,7 @@ ip4_load_balance (vlib_main_t * vm,
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -584,9 +584,9 @@ ip4_load_balance (vlib_main_t * vm,
 	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+	    (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
 
 	  vlib_validate_buffer_enqueue_x2 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -639,7 +639,7 @@ ip4_load_balance (vlib_main_t * vm,
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -2330,7 +2330,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -2379,9 +2379,9 @@ ip4_rewrite_inline (vlib_main_t * vm,
 	  if (do_counters)
 	    {
 	      vlib_prefetch_combined_counter (&adjacency_counters,
-					      cpu_index, adj_index0);
+					      thread_index, adj_index0);
 	      vlib_prefetch_combined_counter (&adjacency_counters,
-					      cpu_index, adj_index1);
+					      thread_index, adj_index1);
 	    }
 
 	  ip0 = vlib_buffer_get_current (p0);
@@ -2527,13 +2527,13 @@ ip4_rewrite_inline (vlib_main_t * vm,
 	    {
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index,
+		 thread_index,
 		 adj_index0, 1,
 		 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index,
+		 thread_index,
 		 adj_index1, 1,
 		 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
 	    }
@@ -2618,7 +2618,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
 
 	  if (do_counters)
 	    vlib_prefetch_combined_counter (&adjacency_counters,
-					    cpu_index, adj_index0);
+					    thread_index, adj_index0);
 
 	  /* Guess we are only writing on simple Ethernet header. */
 	  vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
@@ -2637,7 +2637,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
 	  if (do_counters)
 	    vlib_increment_combined_counter
 	      (&adjacency_counters,
-	       cpu_index, adj_index0, 1,
+	       thread_index, adj_index0, 1,
 	       vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 
 	  /* Check MTU of outgoing interface. */
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index ba200a9f..3b08f4b0 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -85,7 +85,7 @@ ip4_input_inline (vlib_main_t * vm,
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip4_input_node.index);
   vlib_simple_counter_main_t *cm;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -178,8 +178,8 @@ ip4_input_inline (vlib_main_t * vm,
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 	  vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
 
 	  /* Punt packets with options or wrong version. */
 	  if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
@@ -299,7 +299,7 @@ ip4_input_inline (vlib_main_t * vm,
 	  vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
 
 	  /* Punt packets with options or wrong version. */
 	  if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index c120f12c..c2fc4f87 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -74,7 +74,7 @@ ip6_lookup_inline (vlib_main_t * vm,
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -185,9 +185,9 @@ ip6_lookup_inline (vlib_main_t * vm,
 	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+	    (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
 
 	  from += 2;
 	  to_next += 2;
@@ -291,7 +291,7 @@ ip6_lookup_inline (vlib_main_t * vm,
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  from += 1;
 	  to_next += 1;
@@ -703,7 +703,7 @@ ip6_load_balance (vlib_main_t * vm,
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   ip6_main_t *im = &ip6_main;
 
   from = vlib_frame_vector_args (frame);
@@ -824,9 +824,9 @@ ip6_load_balance (vlib_main_t * vm,
 	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+	    (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
 
 	  vlib_validate_buffer_enqueue_x2 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -886,7 +886,7 @@ ip6_load_balance (vlib_main_t * vm,
 	    }
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -1897,7 +1897,7 @@ ip6_rewrite_inline (vlib_main_t * vm,
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -2019,11 +2019,11 @@ ip6_rewrite_inline (vlib_main_t * vm,
 	    {
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index, adj_index0, 1,
+		 thread_index, adj_index0, 1,
 		 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index, adj_index1, 1,
+		 thread_index, adj_index1, 1,
 		 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
 	    }
 
@@ -2156,7 +2156,7 @@ ip6_rewrite_inline (vlib_main_t * vm,
 	    {
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index, adj_index0, 1,
+		 thread_index, adj_index0, 1,
 		 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 	    }
 
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
index 20306088..ffdc4727 100644
--- a/src/vnet/ip/ip6_input.c
+++ b/src/vnet/ip/ip6_input.c
@@ -82,7 +82,7 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip6_input_node.index);
   vlib_simple_counter_main_t *cm;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -171,8 +171,8 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 	  vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
 
 	  error0 = error1 = IP6_ERROR_NONE;
 
@@ -270,7 +270,7 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	  vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
 	  error0 = IP6_ERROR_NONE;
 
 	  /* Version != 6?  Drop it. */
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index 5d1fb6f8..2af546df 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -581,7 +581,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
   u32 next_index;
   pending_resolution_t *pr, *mc;
 
-  if (os_get_cpu_number ())
+  if (vlib_get_thread_index ())
     {
       set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
 				  1 /* set new neighbor */ , is_static,
@@ -722,7 +722,7 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
   uword *p;
   int rv = 0;
 
-  if (os_get_cpu_number ())
+  if (vlib_get_thread_index ())
     {
       set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
 				  0 /* unset */ , 0, 0);
diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h
index 50cac806..799003b9 100644
--- a/src/vnet/ipsec/esp.h
+++ b/src/vnet/ipsec/esp.h
@@ -282,8 +282,8 @@ hmac_calc (ipsec_integ_alg_t alg,
 	   u8 * data, int data_len, u8 * signature, u8 use_esn, u32 seq_hi)
 {
   esp_main_t *em = &esp_main;
-  u32 cpu_index = os_get_cpu_number ();
-  HMAC_CTX *ctx = &(em->per_thread_data[cpu_index].hmac_ctx);
+  u32 thread_index = vlib_get_thread_index ();
+  HMAC_CTX *ctx = &(em->per_thread_data[thread_index].hmac_ctx);
   const EVP_MD *md = NULL;
   unsigned int len;
 
@@ -292,10 +292,10 @@ hmac_calc (ipsec_integ_alg_t alg,
   if (PREDICT_FALSE (em->esp_integ_algs[alg].md == 0))
     return 0;
 
-  if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_integ_alg))
+  if (PREDICT_FALSE (alg != em->per_thread_data[thread_index].last_integ_alg))
     {
       md = em->esp_integ_algs[alg].md;
-      em->per_thread_data[cpu_index].last_integ_alg = alg;
+      em->per_thread_data[thread_index].last_integ_alg = alg;
     }
 
   HMAC_Init (ctx, key, key_len, md);
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
index 7289b260..925d2b45 100644
--- a/src/vnet/ipsec/esp_decrypt.c
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -85,8 +85,8 @@ esp_decrypt_aes_cbc (ipsec_crypto_alg_t alg,
 		     u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv)
 {
   esp_main_t *em = &esp_main;
-  u32 cpu_index = os_get_cpu_number ();
-  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[cpu_index].decrypt_ctx);
+  u32 thread_index = vlib_get_thread_index ();
+  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].decrypt_ctx);
   const EVP_CIPHER *cipher = NULL;
   int out_len;
 
@@ -95,10 +95,11 @@ esp_decrypt_aes_cbc (ipsec_crypto_alg_t alg,
   if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == 0))
     return;
 
-  if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_decrypt_alg))
+  if (PREDICT_FALSE
+      (alg != em->per_thread_data[thread_index].last_decrypt_alg))
     {
       cipher = em->esp_crypto_algs[alg].type;
-      em->per_thread_data[cpu_index].last_decrypt_alg = alg;
+      em->per_thread_data[thread_index].last_decrypt_alg = alg;
     }
 
   EVP_DecryptInit_ex (ctx, cipher, NULL, key, iv);
@@ -117,11 +118,11 @@ esp_decrypt_node_fn (vlib_main_t * vm,
   u32 *recycle = 0;
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ipsec_alloc_empty_buffers (vm, im);
 
-  u32 *empty_buffers = im->empty_buffers[cpu_index];
+  u32 *empty_buffers = im->empty_buffers[thread_index];
 
   if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from))
     {
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index 44ae2297..b2bc4e0b 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -88,8 +88,8 @@ esp_encrypt_aes_cbc (ipsec_crypto_alg_t alg,
 		     u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv)
 {
   esp_main_t *em = &esp_main;
-  u32 cpu_index = os_get_cpu_number ();
-  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[cpu_index].encrypt_ctx);
+  u32 thread_index = vlib_get_thread_index ();
+  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].encrypt_ctx);
   const EVP_CIPHER *cipher = NULL;
   int out_len;
 
@@ -98,10 +98,11 @@ esp_encrypt_aes_cbc (ipsec_crypto_alg_t alg,
   if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == IPSEC_CRYPTO_ALG_NONE))
     return;
 
-  if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_encrypt_alg))
+  if (PREDICT_FALSE
+      (alg != em->per_thread_data[thread_index].last_encrypt_alg))
     {
       cipher = em->esp_crypto_algs[alg].type;
-      em->per_thread_data[cpu_index].last_encrypt_alg = alg;
+      em->per_thread_data[thread_index].last_encrypt_alg = alg;
     }
 
   EVP_EncryptInit_ex (ctx, cipher, NULL, key, iv);
@@ -119,11 +120,11 @@ esp_encrypt_node_fn (vlib_main_t * vm,
   n_left_from = from_frame->n_vectors;
   ipsec_main_t *im = &ipsec_main;
   u32 *recycle = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ipsec_alloc_empty_buffers (vm, im);
 
-  u32 *empty_buffers = im->empty_buffers[cpu_index];
+  u32 *empty_buffers = im->empty_buffers[thread_index];
 
   if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from))
     {
diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c
index 2c1074d8..3f9978a7 100644
--- a/src/vnet/ipsec/ikev2.c
+++ b/src/vnet/ipsec/ikev2.c
@@ -303,16 +303,16 @@ static void
 ikev2_delete_sa (ikev2_sa_t * sa)
 {
   ikev2_main_t *km = &ikev2_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   uword *p;
 
   ikev2_sa_free_all_vec (sa);
 
-  p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi, sa->rspi);
+  p = hash_get (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi);
   if (p)
     {
-      hash_unset (km->per_thread_data[cpu_index].sa_by_rspi, sa->rspi);
-      pool_put (km->per_thread_data[cpu_index].sas, sa);
+      hash_unset (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi);
+      pool_put (km->per_thread_data[thread_index].sas, sa);
     }
 }
 
@@ -776,29 +776,31 @@ ikev2_initial_contact_cleanup (ikev2_sa_t * sa)
   ikev2_sa_t *tmp;
   u32 i, *delete = 0;
   ikev2_child_sa_t *c;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   if (!sa->initial_contact)
     return;
 
   /* find old IKE SAs with the same authenticated identity */
   /* *INDENT-OFF* */
-  pool_foreach (tmp, km->per_thread_data[cpu_index].sas, ({
+  pool_foreach (tmp, km->per_thread_data[thread_index].sas, ({
         if (tmp->i_id.type != sa->i_id.type ||
             vec_len(tmp->i_id.data) != vec_len(sa->i_id.data) ||
             memcmp(sa->i_id.data, tmp->i_id.data, vec_len(sa->i_id.data)))
           continue;
 
         if (sa->rspi != tmp->rspi)
-          vec_add1(delete, tmp - km->per_thread_data[cpu_index].sas);
+          vec_add1(delete, tmp - km->per_thread_data[thread_index].sas);
   }));
   /* *INDENT-ON* */
 
   for (i = 0; i < vec_len (delete); i++)
     {
-      tmp = pool_elt_at_index (km->per_thread_data[cpu_index].sas, delete[i]);
-      vec_foreach (c, tmp->childs)
-	ikev2_delete_tunnel_interface (km->vnet_main, tmp, c);
+      tmp =
+	pool_elt_at_index (km->per_thread_data[thread_index].sas, delete[i]);
+      vec_foreach (c,
+		   tmp->childs) ikev2_delete_tunnel_interface (km->vnet_main,
+							       tmp, c);
       ikev2_delete_sa (tmp);
     }
 
@@ -1922,10 +1924,10 @@ ikev2_retransmit_sa_init (ike_header_t * ike,
 {
   ikev2_main_t *km = &ikev2_main;
   ikev2_sa_t *sa;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   /* *INDENT-OFF* */
-  pool_foreach (sa, km->per_thread_data[cpu_index].sas, ({
+  pool_foreach (sa, km->per_thread_data[thread_index].sas, ({
     if (sa->ispi == clib_net_to_host_u64(ike->ispi) &&
         sa->iaddr.as_u32 == iaddr.as_u32 &&
         sa->raddr.as_u32 == raddr.as_u32)
@@ -2036,7 +2038,7 @@ ikev2_node_fn (vlib_main_t * vm,
   u32 n_left_from, *from, *to_next;
   ikev2_next_t next_index;
   ikev2_main_t *km = &ikev2_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -2134,11 +2136,14 @@ ikev2_node_fn (vlib_main_t * vm,
 		      if (sa0->state == IKEV2_STATE_SA_INIT)
 			{
 			  /* add SA to the pool */
-			  pool_get (km->per_thread_data[cpu_index].sas, sa0);
+			  pool_get (km->per_thread_data[thread_index].sas,
+				    sa0);
 			  clib_memcpy (sa0, &sa, sizeof (*sa0));
-			  hash_set (km->per_thread_data[cpu_index].sa_by_rspi,
+			  hash_set (km->
+				    per_thread_data[thread_index].sa_by_rspi,
 				    sa0->rspi,
-				    sa0 - km->per_thread_data[cpu_index].sas);
+				    sa0 -
+				    km->per_thread_data[thread_index].sas);
 			}
 		      else
 			{
@@ -2169,11 +2174,11 @@ ikev2_node_fn (vlib_main_t * vm,
 		  if (sa0->state == IKEV2_STATE_SA_INIT)
 		    {
 		      /* add SA to the pool */
-		      pool_get (km->per_thread_data[cpu_index].sas, sa0);
+		      pool_get (km->per_thread_data[thread_index].sas, sa0);
 		      clib_memcpy (sa0, &sa, sizeof (*sa0));
-		      hash_set (km->per_thread_data[cpu_index].sa_by_rspi,
+		      hash_set (km->per_thread_data[thread_index].sa_by_rspi,
 				sa0->rspi,
-				sa0 - km->per_thread_data[cpu_index].sas);
+				sa0 - km->per_thread_data[thread_index].sas);
 		    }
 		  else
 		    {
@@ -2184,12 +2189,13 @@ ikev2_node_fn (vlib_main_t * vm,
 	  else if (ike0->exchange == IKEV2_EXCHANGE_IKE_AUTH)
 	    {
 	      uword *p;
-	      p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi,
+	      p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
 			    clib_net_to_host_u64 (ike0->rspi));
 	      if (p)
 		{
-		  sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas,
-					   p[0]);
+		  sa0 =
+		    pool_elt_at_index (km->per_thread_data[thread_index].sas,
+				       p[0]);
 
 		  r = ikev2_retransmit_resp (sa0, ike0);
 		  if (r == 1)
@@ -2240,12 +2246,13 @@ ikev2_node_fn (vlib_main_t * vm,
 	  else if (ike0->exchange == IKEV2_EXCHANGE_INFORMATIONAL)
 	    {
 	      uword *p;
-	      p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi,
+	      p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
 			    clib_net_to_host_u64 (ike0->rspi));
 	      if (p)
 		{
-		  sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas,
-					   p[0]);
+		  sa0 =
+		    pool_elt_at_index (km->per_thread_data[thread_index].sas,
+				       p[0]);
 
 		  r = ikev2_retransmit_resp (sa0, ike0);
 		  if (r == 1)
@@ -2305,12 +2312,13 @@ ikev2_node_fn (vlib_main_t * vm,
 	  else if (ike0->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA)
 	    {
 	      uword *p;
-	      p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi,
+	      p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
 			    clib_net_to_host_u64 (ike0->rspi));
 	      if (p)
 		{
-		  sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas,
-					   p[0]);
+		  sa0 =
+		    pool_elt_at_index (km->per_thread_data[thread_index].sas,
+				       p[0]);
 
 		  r = ikev2_retransmit_resp (sa0, ike0);
 		  if (r == 1)
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index 58f0f145..c884e360 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -324,21 +324,21 @@ int ipsec_set_interface_key (vnet_main_t * vnm, u32 hw_if_index,
 always_inline void
 ipsec_alloc_empty_buffers (vlib_main_t * vm, ipsec_main_t * im)
 {
-  u32 cpu_index = os_get_cpu_number ();
-  uword l = vec_len (im->empty_buffers[cpu_index]);
+  u32 thread_index = vlib_get_thread_index ();
+  uword l = vec_len (im->empty_buffers[thread_index]);
   uword n_alloc = 0;
 
   if (PREDICT_FALSE (l < VLIB_FRAME_SIZE))
     {
-      if (!im->empty_buffers[cpu_index])
+      if (!im->empty_buffers[thread_index])
 	{
-	  vec_alloc (im->empty_buffers[cpu_index], 2 * VLIB_FRAME_SIZE);
+	  vec_alloc (im->empty_buffers[thread_index], 2 * VLIB_FRAME_SIZE);
 	}
 
-      n_alloc = vlib_buffer_alloc (vm, im->empty_buffers[cpu_index] + l,
+      n_alloc = vlib_buffer_alloc (vm, im->empty_buffers[thread_index] + l,
 				   2 * VLIB_FRAME_SIZE - l);
 
-      _vec_len (im->empty_buffers[cpu_index]) = l + n_alloc;
+      _vec_len (im->empty_buffers[thread_index]) = l + n_alloc;
     }
 }
 
diff --git a/src/vnet/ipsec/ipsec_if.c b/src/vnet/ipsec/ipsec_if.c
index dc882004..ed124894 100644
--- a/src/vnet/ipsec/ipsec_if.c
+++ b/src/vnet/ipsec/ipsec_if.c
@@ -99,7 +99,7 @@ static int
 ipsec_add_del_tunnel_if_rpc_callback (ipsec_add_del_tunnel_args_t * a)
 {
   vnet_main_t *vnm = vnet_get_main ();
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   return ipsec_add_del_tunnel_if_internal (vnm, a);
 }
diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h
index dd1130a6..e21a1616 100644
--- a/src/vnet/l2/l2_bvi.h
+++ b/src/vnet/l2/l2_bvi.h
@@ -97,7 +97,7 @@ l2_to_bvi (vlib_main_t * vlib_main,
   vlib_increment_combined_counter
     (vnet_main->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     vlib_main->cpu_index,
+     vlib_main->thread_index,
      vnet_buffer (b0)->sw_if_index[VLIB_RX],
      1, vlib_buffer_length_in_chain (vlib_main, b0));
   return TO_BVI_ERR_OK;
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
index 041ff38d..e5d6878a 100644
--- a/src/vnet/l2/l2_input.c
+++ b/src/vnet/l2/l2_input.c
@@ -117,7 +117,7 @@ typedef enum
 static_always_inline void
 classify_and_dispatch (vlib_main_t * vm,
 		       vlib_node_runtime_t * node,
-		       u32 cpu_index,
+		       u32 thread_index,
 		       l2input_main_t * msm, vlib_buffer_t * b0, u32 * next0)
 {
   /*
@@ -237,7 +237,7 @@ l2input_node_inline (vlib_main_t * vm,
   u32 n_left_from, *from, *to_next;
   l2input_next_t next_index;
   l2input_main_t *msm = &l2input_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;	/* number of packets to process */
@@ -350,10 +350,10 @@ l2input_node_inline (vlib_main_t * vm,
 	  vlib_node_increment_counter (vm, l2input_node.index,
 				       L2INPUT_ERROR_L2INPUT, 4);
 
-	  classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0);
-	  classify_and_dispatch (vm, node, cpu_index, msm, b1, &next1);
-	  classify_and_dispatch (vm, node, cpu_index, msm, b2, &next2);
-	  classify_and_dispatch (vm, node, cpu_index, msm, b3, &next3);
+	  classify_and_dispatch (vm, node, thread_index, msm, b0, &next0);
+	  classify_and_dispatch (vm, node, thread_index, msm, b1, &next1);
+	  classify_and_dispatch (vm, node, thread_index, msm, b2, &next2);
+	  classify_and_dispatch (vm, node, thread_index, msm, b3, &next3);
 
 	  /* verify speculative enqueues, maybe switch current next frame */
 	  /* if next0==next1==next_index then nothing special needs to be done */
@@ -393,7 +393,7 @@ l2input_node_inline (vlib_main_t * vm,
 	  vlib_node_increment_counter (vm, l2input_node.index,
 				       L2INPUT_ERROR_L2INPUT, 1);
 
-	  classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0);
+	  classify_and_dispatch (vm, node, thread_index, msm, b0, &next0);
 
 	  /* verify speculative enqueue, maybe switch current next frame */
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c
index 00f22571..e17b2a16 100644
--- a/src/vnet/l2/l2_output.c
+++ b/src/vnet/l2/l2_output.c
@@ -643,11 +643,11 @@ l2output_create_output_node_mapping (vlib_main_t * vlib_main, vnet_main_t * vnet
 
   hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
 
-  uword cpu_number;
+  uword thread_index;
 
-  cpu_number = os_get_cpu_number ();
+  thread_index = vlib_get_thread_index ();
 
-  if (cpu_number)
+  if (thread_index)
     {
       u32 oldflags;
 
diff --git a/src/vnet/l2tp/decap.c b/src/vnet/l2tp/decap.c
index e8986935..46104129 100644
--- a/src/vnet/l2tp/decap.c
+++ b/src/vnet/l2tp/decap.c
@@ -149,7 +149,7 @@ last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi)
 
   /* per-mapping byte stats include the ethernet header */
   vlib_increment_combined_counter (&lm->counter_main,
-				   os_get_cpu_number (),
+				   vlib_get_thread_index (),
 				   counter_index, 1 /* packet_increment */ ,
 				   vlib_buffer_length_in_chain (vm, b) +
 				   sizeof (ethernet_header_t));
diff --git a/src/vnet/l2tp/encap.c b/src/vnet/l2tp/encap.c
index ed7a9580..dcdfde4b 100644
--- a/src/vnet/l2tp/encap.c
+++ b/src/vnet/l2tp/encap.c
@@ -124,7 +124,7 @@ last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, u32 bi)
 
   /* per-mapping byte stats include the ethernet header */
   vlib_increment_combined_counter (&lm->counter_main,
-				   os_get_cpu_number (),
+				   vlib_get_thread_index (),
 				   counter_index, 1 /* packet_increment */ ,
 				   vlib_buffer_length_in_chain (vm, b));
 
diff --git a/src/vnet/l2tp/l2tp.c b/src/vnet/l2tp/l2tp.c
index cb94d7e7..3dedc447 100644
--- a/src/vnet/l2tp/l2tp.c
+++ b/src/vnet/l2tp/l2tp.c
@@ -157,7 +157,7 @@ test_counters_command_fn (vlib_main_t * vm,
   u32 session_index;
   u32 counter_index;
   u32 nincr = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   /* *INDENT-OFF* */
   pool_foreach (session, lm->sessions,
@@ -167,11 +167,11 @@ test_counters_command_fn (vlib_main_t * vm,
       session_index_to_counter_index (session_index,
                                       SESSION_COUNTER_USER_TO_NETWORK);
     vlib_increment_combined_counter (&lm->counter_main,
-                                     cpu_index,
+                                     thread_index,
                                      counter_index,
                                      1/*pkt*/, 1111 /*bytes*/);
     vlib_increment_combined_counter (&lm->counter_main,
-                                     cpu_index,
+                                     thread_index,
                                      counter_index+1,
                                      1/*pkt*/, 2222 /*bytes*/);
     nincr++;
diff --git a/src/vnet/lisp-gpe/decap.c b/src/vnet/lisp-gpe/decap.c
index d887a95f..68769710 100644
--- a/src/vnet/lisp-gpe/decap.c
+++ b/src/vnet/lisp-gpe/decap.c
@@ -103,7 +103,7 @@ next_index_to_iface (lisp_gpe_main_t * lgm, u32 next_index)
 }
 
 static_always_inline void
-incr_decap_stats (vnet_main_t * vnm, u32 cpu_index, u32 length,
+incr_decap_stats (vnet_main_t * vnm, u32 thread_index, u32 length,
 		  u32 sw_if_index, u32 * last_sw_if_index, u32 * n_packets,
 		  u32 * n_bytes)
 {
@@ -122,7 +122,7 @@ incr_decap_stats (vnet_main_t * vnm, u32 cpu_index, u32 length,
 
 	  vlib_increment_combined_counter (im->combined_sw_if_counters +
 					   VNET_INTERFACE_COUNTER_RX,
-					   cpu_index, *last_sw_if_index,
+					   thread_index, *last_sw_if_index,
 					   *n_packets, *n_bytes);
 	}
       *last_sw_if_index = sw_if_index;
@@ -150,11 +150,11 @@ static uword
 lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 		       vlib_frame_t * from_frame, u8 is_v4)
 {
-  u32 n_left_from, next_index, *from, *to_next, cpu_index;
+  u32 n_left_from, next_index, *from, *to_next, thread_index;
   u32 n_bytes = 0, n_packets = 0, last_sw_if_index = ~0, drops = 0;
   lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
 
-  cpu_index = os_get_cpu_number ();
+  thread_index = vlib_get_thread_index ();
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
 
@@ -267,7 +267,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
 	  if (si0)
 	    {
-	      incr_decap_stats (lgm->vnet_main, cpu_index,
+	      incr_decap_stats (lgm->vnet_main, thread_index,
 				vlib_buffer_length_in_chain (vm, b0), si0[0],
 				&last_sw_if_index, &n_packets, &n_bytes);
 	      vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0];
@@ -282,7 +282,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
 	  if (si1)
 	    {
-	      incr_decap_stats (lgm->vnet_main, cpu_index,
+	      incr_decap_stats (lgm->vnet_main, thread_index,
 				vlib_buffer_length_in_chain (vm, b1), si1[0],
 				&last_sw_if_index, &n_packets, &n_bytes);
 	      vnet_buffer (b1)->sw_if_index[VLIB_RX] = si1[0];
@@ -397,7 +397,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
 	  if (si0)
 	    {
-	      incr_decap_stats (lgm->vnet_main, cpu_index,
+	      incr_decap_stats (lgm->vnet_main, thread_index,
 				vlib_buffer_length_in_chain (vm, b0), si0[0],
 				&last_sw_if_index, &n_packets, &n_bytes);
 	      vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0];
@@ -430,7 +430,7 @@ lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
     }
 
   /* flush iface stats */
-  incr_decap_stats (lgm->vnet_main, cpu_index, 0, ~0, &last_sw_if_index,
+  incr_decap_stats (lgm->vnet_main, thread_index, 0, ~0, &last_sw_if_index,
 		    &n_packets, &n_bytes);
   vlib_node_increment_counter (vm, lisp_gpe_ip4_input_node.index,
 			       LISP_GPE_ERROR_NO_TUNNEL, drops);
diff --git a/src/vnet/lldp/lldp_input.c b/src/vnet/lldp/lldp_input.c
index 762743d0..e88f6fdb 100644
--- a/src/vnet/lldp/lldp_input.c
+++ b/src/vnet/lldp/lldp_input.c
@@ -35,7 +35,7 @@ typedef struct
 static void
 lldp_rpc_update_peer_cb (const lldp_intf_update_t * a)
 {
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   lldp_intf_t *n = lldp_get_intf (&lldp_main, a->hw_if_index);
   if (!n)
diff --git a/src/vnet/map/ip4_map.c b/src/vnet/map/ip4_map.c
index 1a20d704..e39b6f14 100644
--- a/src/vnet/map/ip4_map.c
+++ b/src/vnet/map/ip4_map.c
@@ -248,7 +248,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   next_index = node->cached_next_index;
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -377,7 +377,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 					       ip40) ?
 		    IP4_MAP_NEXT_IP6_REWRITE : next0;
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip6h0->payload_length) +
@@ -409,7 +409,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 					       ip41) ?
 		    IP4_MAP_NEXT_IP6_REWRITE : next1;
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index1, 1,
 						   clib_net_to_host_u16
 						   (ip6h1->payload_length) +
@@ -520,7 +520,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 					       ip40) ?
 		    IP4_MAP_NEXT_IP6_REWRITE : next0;
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip6h0->payload_length) +
@@ -564,7 +564,7 @@ ip4_map_reass (vlib_main_t * vm,
   next_index = node->cached_next_index;
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 *fragments_to_drop = NULL;
   u32 *fragments_to_loopback = NULL;
 
@@ -694,8 +694,8 @@ ip4_map_reass (vlib_main_t * vm,
 	    {
 	      if (error0 == MAP_ERROR_NONE)
 		vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						 cpu_index, map_domain_index0,
-						 1,
+						 thread_index,
+						 map_domain_index0, 1,
 						 clib_net_to_host_u16
 						 (ip60->payload_length) + 40);
 	      next0 =
diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c
index b63d76bf..5f2bcbf9 100644
--- a/src/vnet/map/ip4_map_t.c
+++ b/src/vnet/map/ip4_map_t.c
@@ -477,7 +477,7 @@ ip4_map_t_icmp (vlib_main_t * vm,
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -520,7 +520,7 @@ ip4_map_t_icmp (vlib_main_t * vm,
 	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1, len0);
 	    }
@@ -1051,7 +1051,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -1158,7 +1158,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip40->
@@ -1169,7 +1169,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p1)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip41->
@@ -1252,7 +1252,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip40->
diff --git a/src/vnet/map/ip6_map.c b/src/vnet/map/ip6_map.c
index f7eb768f..63ada962 100644
--- a/src/vnet/map/ip6_map.c
+++ b/src/vnet/map/ip6_map.c
@@ -172,7 +172,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
     vlib_node_get_runtime (vm, ip6_map_node.index);
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -319,7 +319,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 			IP6_MAP_NEXT_IP4_REWRITE : next0;
 		    }
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip40->length));
@@ -352,7 +352,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 			IP6_MAP_NEXT_IP4_REWRITE : next1;
 		    }
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index1, 1,
 						   clib_net_to_host_u16
 						   (ip41->length));
@@ -505,7 +505,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 			IP6_MAP_NEXT_IP4_REWRITE : next0;
 		    }
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip40->length));
@@ -820,7 +820,7 @@ ip6_map_ip4_reass (vlib_main_t * vm,
     vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 *fragments_to_drop = NULL;
   u32 *fragments_to_loopback = NULL;
 
@@ -958,8 +958,8 @@ ip6_map_ip4_reass (vlib_main_t * vm,
 	    {
 	      if (error0 == MAP_ERROR_NONE)
 		vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						 cpu_index, map_domain_index0,
-						 1,
+						 thread_index,
+						 map_domain_index0, 1,
 						 clib_net_to_host_u16
 						 (ip40->length));
 	      next0 =
@@ -1015,7 +1015,7 @@ ip6_map_icmp_relay (vlib_main_t * vm,
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
   map_main_t *mm = &map_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u16 *fragment_ids, *fid;
 
   from = vlib_frame_vector_args (frame);
@@ -1143,7 +1143,8 @@ ip6_map_icmp_relay (vlib_main_t * vm,
 	  ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
 	  new_icmp40->checksum = ~ip_csum_fold (sum);
 
-	  vlib_increment_simple_counter (&mm->icmp_relayed, cpu_index, 0, 1);
+	  vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0,
+					 1);
 
 	error:
 	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
diff --git a/src/vnet/map/ip6_map_t.c b/src/vnet/map/ip6_map_t.c
index eb3996c2..99151678 100644
--- a/src/vnet/map/ip6_map_t.c
+++ b/src/vnet/map/ip6_map_t.c
@@ -448,7 +448,7 @@ ip6_map_t_icmp (vlib_main_t * vm,
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -493,7 +493,7 @@ ip6_map_t_icmp (vlib_main_t * vm,
 	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->
 					       map_t.map_domain_index, 1,
 					       len0);
@@ -1051,7 +1051,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip6_map_t_node.index);
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -1218,7 +1218,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->
 					       map_t.map_domain_index, 1,
 					       clib_net_to_host_u16
@@ -1229,7 +1229,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p1)->
 					       map_t.map_domain_index, 1,
 					       clib_net_to_host_u16
@@ -1403,7 +1403,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
 	      (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->
 					       map_t.map_domain_index, 1,
 					       clib_net_to_host_u16
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
index 893c4511..1b9bdd05 100644
--- a/src/vnet/mpls/mpls_input.c
+++ b/src/vnet/mpls/mpls_input.c
@@ -76,7 +76,7 @@ mpls_input_inline (vlib_main_t * vm,
   u32 n_left_from, next_index, * from, * to_next;
   mpls_input_runtime_t * rt;
   mpls_main_t * mm;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   vlib_simple_counter_main_t * cm;
   vnet_main_t * vnm = vnet_get_main();
 
@@ -151,7 +151,7 @@ mpls_input_inline (vlib_main_t * vm,
               next0 = MPLS_INPUT_NEXT_LOOKUP;
               vnet_feature_arc_start(mm->input_feature_arc_index,
                                      sw_if_index0, &next0, b0);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+              vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
           }
 
           if (PREDICT_FALSE(h1[3] == 0))
@@ -164,7 +164,7 @@ mpls_input_inline (vlib_main_t * vm,
               next1 = MPLS_INPUT_NEXT_LOOKUP;
               vnet_feature_arc_start(mm->input_feature_arc_index,
                                      sw_if_index1, &next1, b1);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+              vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
           }
 
           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -215,7 +215,7 @@ mpls_input_inline (vlib_main_t * vm,
             {
               next0 = MPLS_INPUT_NEXT_LOOKUP;
 	      vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+              vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
             }
 
           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 475bb204..ace6a70f 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -67,7 +67,7 @@ mpls_lookup (vlib_main_t * vm,
   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
   u32 n_left_from, next_index, * from, * to_next;
   mpls_main_t * mm = &mpls_main;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -220,16 +220,16 @@ mpls_lookup (vlib_main_t * vm,
           vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, b0));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi1, 1,
+              (cm, thread_index, lbi1, 1,
                vlib_buffer_length_in_chain (vm, b1));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi2, 1,
+              (cm, thread_index, lbi2, 1,
                vlib_buffer_length_in_chain (vm, b2));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi3, 1,
+              (cm, thread_index, lbi3, 1,
                vlib_buffer_length_in_chain (vm, b3));
 
           /*
@@ -351,7 +351,7 @@ mpls_lookup (vlib_main_t * vm,
           vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, b0));
 
           /*
@@ -440,7 +440,7 @@ mpls_load_balance (vlib_main_t * vm,
 {
   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, * from, * to_next;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 next;
 
   from = vlib_frame_vector_args (frame);
@@ -536,10 +536,10 @@ mpls_load_balance (vlib_main_t * vm,
           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, p0));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi1, 1,
+              (cm, thread_index, lbi1, 1,
                vlib_buffer_length_in_chain (vm, p1));
 
           if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
@@ -597,7 +597,7 @@ mpls_load_balance (vlib_main_t * vm,
           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, p0));
 
           vlib_validate_buffer_enqueue_x1 (vm, node, next,
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 08018fd1..d90dec21 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -64,12 +64,12 @@ mpls_output_inline (vlib_main_t * vm,
                     vlib_frame_t * from_frame,
 		    int is_midchain)
 {
-  u32 n_left_from, next_index, * from, * to_next, cpu_index;
+  u32 n_left_from, next_index, * from, * to_next, thread_index;
   vlib_node_runtime_t * error_node;
   u32 n_left_to_next;
   mpls_main_t *mm;
 
-  cpu_index = os_get_cpu_number();
+  thread_index = vlib_get_thread_index();
   error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -137,13 +137,13 @@ mpls_output_inline (vlib_main_t * vm,
           /* Bump the adj counters for packet and bytes */
           vlib_increment_combined_counter
               (&adjacency_counters,
-               cpu_index,
+               thread_index,
                adj_index0,
                1,
                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
           vlib_increment_combined_counter
               (&adjacency_counters,
-               cpu_index,
+               thread_index,
                adj_index1,
                1,
                vlib_buffer_length_in_chain (vm, p1) + rw_len1);
@@ -245,7 +245,7 @@ mpls_output_inline (vlib_main_t * vm,
           
           vlib_increment_combined_counter
               (&adjacency_counters,
-               cpu_index,
+               thread_index,
                adj_index0,
                1,
                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c
index 2649798b..597ae060 100644
--- a/src/vnet/pg/input.c
+++ b/src/vnet/pg/input.c
@@ -893,7 +893,7 @@ pg_generate_set_lengths (pg_main_t * pg,
 
     vlib_increment_combined_counter (im->combined_sw_if_counters
 				     + VNET_INTERFACE_COUNTER_RX,
-				     os_get_cpu_number (),
+				     vlib_get_thread_index (),
 				     si->sw_if_index, n_buffers, length_sum);
   }
 
@@ -1266,7 +1266,7 @@ pg_stream_fill_helper (pg_main_t * pg,
 	    l += vlib_buffer_index_length_in_chain (vm, buffers[i]);
 	  vlib_increment_combined_counter (im->combined_sw_if_counters
 					   + VNET_INTERFACE_COUNTER_RX,
-					   os_get_cpu_number (),
+					   vlib_get_thread_index (),
 					   si->sw_if_index, n_alloc, l);
 	  s->current_replay_packet_index += n_alloc;
 	  s->current_replay_packet_index %=
diff --git a/src/vnet/replication.c b/src/vnet/replication.c
index 86d922b5..233a8c2f 100644
--- a/src/vnet/replication.c
+++ b/src/vnet/replication.c
@@ -31,16 +31,16 @@ replication_prep (vlib_main_t * vm,
 {
   replication_main_t *rm = &replication_main;
   replication_context_t *ctx;
-  uword cpu_number = vm->cpu_index;
+  uword thread_index = vm->thread_index;
   ip4_header_t *ip;
   u32 ctx_id;
 
   /* Allocate a context, reserve context 0 */
-  if (PREDICT_FALSE (rm->contexts[cpu_number] == 0))
-    pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES);
+  if (PREDICT_FALSE (rm->contexts[thread_index] == 0))
+    pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES);
 
-  pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES);
-  ctx_id = ctx - rm->contexts[cpu_number];
+  pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES);
+  ctx_id = ctx - rm->contexts[thread_index];
 
   /* Save state from vlib buffer */
   ctx->saved_free_list_index = b0->free_list_index;
@@ -94,11 +94,11 @@ replication_recycle (vlib_main_t * vm, vlib_buffer_t * b0, u32 is_last)
 {
   replication_main_t *rm = &replication_main;
   replication_context_t *ctx;
-  uword cpu_number = vm->cpu_index;
+  uword thread_index = vm->thread_index;
   ip4_header_t *ip;
 
   /* Get access to the replication context */
-  ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count);
+  ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count);
 
   /* Restore vnet buffer state */
   clib_memcpy (vnet_buffer (b0), ctx->vnet_buffer,
@@ -133,7 +133,7 @@ replication_recycle (vlib_main_t * vm, vlib_buffer_t * b0, u32 is_last)
       b0->flags &= ~VLIB_BUFFER_RECYCLE;
 
       /* Free context back to its pool */
-      pool_put (rm->contexts[cpu_number], ctx);
+      pool_put (rm->contexts[thread_index], ctx);
     }
 
   return ctx;
@@ -160,7 +160,7 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl)
   replication_main_t *rm = &replication_main;
   replication_context_t *ctx;
   u32 feature_node_index = 0;
-  uword cpu_number = vm->cpu_index;
+  uword thread_index = vm->thread_index;
 
   /*
    * All buffers in the list are destined to the same recycle node.
@@ -172,7 +172,7 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl)
     {
       bi0 = fl->buffers[0];
       b0 = vlib_get_buffer (vm, bi0);
-      ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count);
+      ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count);
       feature_node_index = ctx->recycle_node_index;
     }
 
diff --git a/src/vnet/replication.h b/src/vnet/replication.h
index 5dc554c9..ce4b3ff1 100644
--- a/src/vnet/replication.h
+++ b/src/vnet/replication.h
@@ -100,7 +100,7 @@ replication_get_ctx (vlib_buffer_t * b0)
   replication_main_t *rm = &replication_main;
 
   return replication_is_recycled (b0) ?
-    pool_elt_at_index (rm->contexts[os_get_cpu_number ()],
+    pool_elt_at_index (rm->contexts[vlib_get_thread_index ()],
 		       b0->recycle_count) : 0;
 }
 
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index b86e87d9..dd211c51 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -311,7 +311,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   unix_shared_memory_queue_t *q;
   application_t *app;
   int n_tx_packets = 0;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   int i, rv;
   f64 now = vlib_time_now (vm);
 
diff --git a/src/vnet/sr/sr_localsid.c b/src/vnet/sr/sr_localsid.c
index 2e3d56de..6d72a506 100755
--- a/src/vnet/sr/sr_localsid.c
+++ b/src/vnet/sr/sr_localsid.c
@@ -887,7 +887,7 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -974,26 +974,26 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_increment_combined_counter
 	    (((next1 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b1));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b1));
 
 	  vlib_increment_combined_counter
 	    (((next2 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b2));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b2));
 
 	  vlib_increment_combined_counter
 	    (((next3 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b3));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b3));
 
 	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, bi1, bi2, bi3,
@@ -1062,8 +1062,8 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, next0);
@@ -1103,7 +1103,7 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -1205,26 +1205,26 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_increment_combined_counter
 	    (((next1 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b1));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b1));
 
 	  vlib_increment_combined_counter
 	    (((next2 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b2));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b2));
 
 	  vlib_increment_combined_counter
 	    (((next3 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b3));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b3));
 
 	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, bi1, bi2, bi3,
@@ -1295,8 +1295,8 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, next0);
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index e3705060..c1567aa0 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -174,7 +174,7 @@ tclient_thread_fn (void *arg)
     pthread_sigmask (SIG_SETMASK, &s, 0);
   }
 
-  clib_per_cpu_mheaps[os_get_cpu_number ()] = clib_per_cpu_mheaps[0];
+  clib_per_cpu_mheaps[vlib_get_thread_index ()] = clib_per_cpu_mheaps[0];
 
   while (1)
     {
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index b2a371e2..b6c34828 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -646,10 +646,10 @@ const static transport_proto_vft_t tcp6_proto = {
 void
 tcp_timer_keep_handler (u32 conn_index)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
-  tc = tcp_connection_get (conn_index, cpu_index);
+  tc = tcp_connection_get (conn_index, thread_index);
   tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID;
 
   tcp_connection_close (tc);
@@ -675,10 +675,10 @@ tcp_timer_establish_handler (u32 conn_index)
 void
 tcp_timer_waitclose_handler (u32 conn_index)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
-  tc = tcp_connection_get (conn_index, cpu_index);
+  tc = tcp_connection_get (conn_index, thread_index);
   tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID;
 
   /* Session didn't come back with a close(). Send FIN either way
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 0090e15e..eaca672c 100644
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -343,7 +343,7 @@ typedef enum _tcp_dbg_evt
     }                                                           	\
   else                                                          	\
     {                                                           	\
-      u32 _thread_index = os_get_cpu_number ();                 	\
+      u32 _thread_index = vlib_get_thread_index ();                 	\
       _tc = tcp_connection_get (_tc_index, _thread_index);      	\
     }                                                           	\
   ELOG_TYPE_DECLARE (_e) =                                      	\
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index a8224dc2..7e9fa47b 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -1142,7 +1142,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 			  vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
   tcp_main_t *tm = vnet_get_tcp_main ();
 
   from = vlib_frame_vector_args (from_frame);
@@ -1332,7 +1332,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
   u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
 
   from = vlib_frame_vector_args (from_frame);
@@ -1634,7 +1634,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -1989,7 +1989,7 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 		     vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   tcp_main_t *tm = vnet_get_tcp_main ();
   u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
 
@@ -2243,7 +2243,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 		    vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   tcp_main_t *tm = vnet_get_tcp_main ();
 
   from = vlib_frame_vector_args (from_frame);
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index ea157bd7..e18bfad7 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -387,8 +387,8 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
 #define tcp_get_free_buffer_index(tm, bidx)                             \
 do {                                                                    \
   u32 *my_tx_buffers, n_free_buffers;                                   \
-  u32 cpu_index = os_get_cpu_number();                             	\
-  my_tx_buffers = tm->tx_buffers[cpu_index];                            \
+  u32 thread_index = vlib_get_thread_index();                             	\
+  my_tx_buffers = tm->tx_buffers[thread_index];                            \
   if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0))                      \
     {                                                                   \
       n_free_buffers = 32;      /* TODO config or macro */              \
@@ -396,7 +396,7 @@ do {                                                                    \
       _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list (      \
           tm->vlib_main, my_tx_buffers, n_free_buffers,                 \
           VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);                         \
-      tm->tx_buffers[cpu_index] = my_tx_buffers;                        \
+      tm->tx_buffers[thread_index] = my_tx_buffers;                        \
     }                                                                   \
   /* buffer shortage */                                                 \
   if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0))                     \
@@ -408,8 +408,8 @@ do {                                                                    \
 #define tcp_return_buffer(tm)						\
 do {									\
   u32 *my_tx_buffers;							\
-  u32 cpu_index = os_get_cpu_number();                             	\
-  my_tx_buffers = tm->tx_buffers[cpu_index];                          	\
+  u32 thread_index = vlib_get_thread_index();                             	\
+  my_tx_buffers = tm->tx_buffers[thread_index];                          	\
   _vec_len (my_tx_buffers) +=1;						\
 } while (0)
 
@@ -942,7 +942,7 @@ tcp_send_ack (tcp_connection_t * tc)
 void
 tcp_timer_delack_handler (u32 index)
 {
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
   tc = tcp_connection_get (index, thread_index);
@@ -1022,7 +1022,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
   vlib_buffer_t *b;
   u32 bi, snd_space, n_bytes;
@@ -1152,7 +1152,7 @@ tcp_timer_persist_handler (u32 index)
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
   vlib_buffer_t *b;
   u32 bi, n_bytes;
@@ -1313,7 +1313,7 @@ tcp46_output_inline (vlib_main_t * vm,
 		     vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -1524,7 +1524,7 @@ tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 			 vlib_frame_t * from_frame, u8 is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index 4b22109b..810278e6 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -70,7 +70,7 @@ udp4_uri_input_node_fn (vlib_main_t * vm,
   udp4_uri_input_next_t next_index;
   udp_uri_main_t *um = vnet_get_udp_main ();
   session_manager_main_t *smm = vnet_get_session_manager_main ();
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   u8 my_enqueue_epoch;
   u32 *session_indices_to_enqueue;
   static u32 serial_number;
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index fb1a8bac..0fc62f6c 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -366,7 +366,7 @@ static uword tapcli_rx_iface(vlib_main_t * vm,
       vlib_increment_combined_counter (
           vnet_main.interface_main.combined_sw_if_counters
           + VNET_INTERFACE_COUNTER_RX,
-          os_get_cpu_number(), ti->sw_if_index,
+          vlib_get_thread_index(), ti->sw_if_index,
           1, n_bytes_in_packet);
 
       if (PREDICT_FALSE(n_trace > 0)) {
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
index 2cfcc92f..ac674653 100644
--- a/src/vnet/unix/tuntap.c
+++ b/src/vnet/unix/tuntap.c
@@ -189,7 +189,7 @@ tuntap_tx (vlib_main_t * vm,
   /* Update tuntap interface output stats. */
   vlib_increment_combined_counter (im->combined_sw_if_counters
 				   + VNET_INTERFACE_COUNTER_TX,
-				   vm->cpu_index,
+				   vm->thread_index,
 				   tm->sw_if_index, n_packets, n_bytes);
 
 
@@ -297,7 +297,7 @@ tuntap_rx (vlib_main_t * vm,
     vlib_increment_combined_counter
         (vnet_main.interface_main.combined_sw_if_counters
          + VNET_INTERFACE_COUNTER_RX,
-         os_get_cpu_number(),
+         vlib_get_thread_index(),
          tm->sw_if_index,
          1, n_bytes_in_packet);
 
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c
index 22ab4b62..d4fe4231 100644
--- a/src/vnet/vxlan-gpe/decap.c
+++ b/src/vnet/vxlan-gpe/decap.c
@@ -115,7 +115,7 @@ vxlan_gpe_input (vlib_main_t * vm,
   vxlan4_gpe_tunnel_key_t last_key4;
   vxlan6_gpe_tunnel_key_t last_key6;
   u32 pkts_decapsulated = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
 
   if (is_ip4)
@@ -342,7 +342,7 @@ vxlan_gpe_input (vlib_main_t * vm,
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len0;
         stats_sw_if_index = sw_if_index0;
@@ -427,7 +427,7 @@ vxlan_gpe_input (vlib_main_t * vm,
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len1;
         stats_sw_if_index = sw_if_index1;
@@ -588,7 +588,7 @@ vxlan_gpe_input (vlib_main_t * vm,
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len0;
         stats_sw_if_index = sw_if_index0;
@@ -615,7 +615,7 @@ vxlan_gpe_input (vlib_main_t * vm,
   if (stats_n_packets)
   {
     vlib_increment_combined_counter (
-        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, cpu_index,
+        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, thread_index,
         stats_sw_if_index, stats_n_packets, stats_n_bytes);
     node->runtime_data[0] = stats_sw_if_index;
   }
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
index 3a486e56..67ed94b4 100644
--- a/src/vnet/vxlan-gpe/encap.c
+++ b/src/vnet/vxlan-gpe/encap.c
@@ -151,7 +151,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
   vnet_main_t * vnm = ngm->vnet_main;
   vnet_interface_main_t * im = &vnm->interface_main;
   u32 pkts_encapsulated = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
 
   from = vlib_frame_vector_args (from_frame);
@@ -253,7 +253,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
           if (stats_n_packets)
             vlib_increment_combined_counter (
                 im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-                cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+                thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
           stats_sw_if_index = sw_if_index0;
           stats_n_packets = 2;
           stats_n_bytes = len0 + len1;
@@ -262,10 +262,10 @@ vxlan_gpe_encap (vlib_main_t * vm,
         {
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-              cpu_index, sw_if_index0, 1, len0);
+              thread_index, sw_if_index0, 1, len0);
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-              cpu_index, sw_if_index1, 1, len1);
+              thread_index, sw_if_index1, 1, len1);
         }
       }
 
@@ -335,7 +335,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len0;
         stats_sw_if_index = sw_if_index0;
@@ -359,7 +359,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
   if (stats_n_packets)
   {
     vlib_increment_combined_counter (
-        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, cpu_index,
+        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, thread_index,
         stats_sw_if_index, stats_n_packets, stats_n_bytes);
     node->runtime_data[0] = stats_sw_if_index;
   }
diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c
index 514b2c99..2acb1f6f 100644
--- a/src/vnet/vxlan/decap.c
+++ b/src/vnet/vxlan/decap.c
@@ -81,7 +81,7 @@ vxlan_input (vlib_main_t * vm,
   vxlan4_tunnel_key_t last_key4;
   vxlan6_tunnel_key_t last_key6;
   u32 pkts_decapsulated = 0;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
 
   if (is_ip4)
@@ -314,7 +314,7 @@ vxlan_input (vlib_main_t * vm,
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len0;
@@ -468,7 +468,7 @@ vxlan_input (vlib_main_t * vm,
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len1;
@@ -674,7 +674,7 @@ vxlan_input (vlib_main_t * vm,
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len0;
@@ -711,7 +711,7 @@ vxlan_input (vlib_main_t * vm,
     {
       vlib_increment_combined_counter 
 	(im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-	 cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
       node->runtime_data[0] = stats_sw_if_index;
     }
 
diff --git a/src/vnet/vxlan/encap.c b/src/vnet/vxlan/encap.c
index 5b63064a..4cfbbc23 100644
--- a/src/vnet/vxlan/encap.c
+++ b/src/vnet/vxlan/encap.c
@@ -77,7 +77,7 @@ vxlan_encap_inline (vlib_main_t * vm,
   vnet_interface_main_t * im = &vnm->interface_main;
   u32 pkts_encapsulated = 0;
   u16 old_l0 = 0, old_l1 = 0;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
   u32 sw_if_index0 = 0, sw_if_index1 = 0;
   u32 next0 = 0, next1 = 0;
@@ -301,7 +301,7 @@ vxlan_encap_inline (vlib_main_t * vm,
 		  if (stats_n_packets) 
 		    vlib_increment_combined_counter 
 		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       cpu_index, stats_sw_if_index, 
+		       thread_index, stats_sw_if_index, 
 		       stats_n_packets, stats_n_bytes);
 		  stats_sw_if_index = sw_if_index0;
 		  stats_n_packets = 2;
@@ -311,10 +311,10 @@ vxlan_encap_inline (vlib_main_t * vm,
 	        {
 		  vlib_increment_combined_counter 
 		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       cpu_index, sw_if_index0, 1, len0);
+		       thread_index, sw_if_index0, 1, len0);
 		  vlib_increment_combined_counter 
 		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       cpu_index, sw_if_index1, 1, len1);
+		       thread_index, sw_if_index1, 1, len1);
 		}
 	    }
 
@@ -464,7 +464,7 @@ vxlan_encap_inline (vlib_main_t * vm,
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len0;
@@ -496,7 +496,7 @@ vxlan_encap_inline (vlib_main_t * vm,
     {
       vlib_increment_combined_counter 
 	(im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-	 cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
       node->runtime_data[0] = stats_sw_if_index;
     }
 
diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c
index 042d02e2..4309cd51 100644
--- a/src/vpp/stats/stats.c
+++ b/src/vpp/stats/stats.c
@@ -66,14 +66,14 @@ _(VNET_IP6_NBR_COUNTERS, vnet_ip6_nbr_counters)
 void
 dslock (stats_main_t * sm, int release_hint, int tag)
 {
-  u32 thread_id;
+  u32 thread_index;
   data_structure_lock_t *l = sm->data_structure_lock;
 
   if (PREDICT_FALSE (l == 0))
     return;
 
-  thread_id = os_get_cpu_number ();
-  if (l->lock && l->thread_id == thread_id)
+  thread_index = vlib_get_thread_index ();
+  if (l->lock && l->thread_index == thread_index)
     {
       l->count++;
       return;
@@ -85,7 +85,7 @@ dslock (stats_main_t * sm, int release_hint, int tag)
   while (__sync_lock_test_and_set (&l->lock, 1))
     /* zzzz */ ;
   l->tag = tag;
-  l->thread_id = thread_id;
+  l->thread_index = thread_index;
   l->count = 1;
 }
 
@@ -99,14 +99,14 @@ stats_dslock_with_hint (int hint, int tag)
 void
 dsunlock (stats_main_t * sm)
 {
-  u32 thread_id;
+  u32 thread_index;
   data_structure_lock_t *l = sm->data_structure_lock;
 
   if (PREDICT_FALSE (l == 0))
     return;
 
-  thread_id = os_get_cpu_number ();
-  ASSERT (l->lock && l->thread_id == thread_id);
+  thread_index = vlib_get_thread_index ();
+  ASSERT (l->lock && l->thread_index == thread_index);
   l->count--;
   if (l->count == 0)
     {
diff --git a/src/vpp/stats/stats.h b/src/vpp/stats/stats.h
index 118115be..024dc78e 100644
--- a/src/vpp/stats/stats.h
+++ b/src/vpp/stats/stats.h
@@ -30,7 +30,7 @@ typedef struct
 {
   volatile u32 lock;
   volatile u32 release_hint;
-  u32 thread_id;
+  u32 thread_index;
   u32 count;
   int tag;
 } data_structure_lock_t;
-- 
cgit 1.2.3-korg


From f55f9b851f59264d737d92c6277a87588c565d24 Mon Sep 17 00:00:00 2001
From: Damjan Marion <damarion@cisco.com>
Date: Wed, 10 May 2017 21:06:28 +0200
Subject: completelly deprecate os_get_cpu_number, replace new occurences

Change-Id: I82c663bc0866c6c68ba354104b0bb059387f4b9d
Signed-off-by: Damjan Marion <damarion@cisco.com>
---
 src/plugins/flowperpkt/l2_node.c       | 20 ++++++++++----------
 src/plugins/flowperpkt/node.c          | 20 ++++++++++----------
 src/plugins/snat/in2out.c              |  2 +-
 src/plugins/snat/out2in.c              |  2 +-
 src/vlib/main.h                        |  2 +-
 src/vlib/threads.c                     | 12 ++----------
 src/vlib/threads.h                     |  3 +--
 src/vlib/unix/main.c                   |  2 +-
 src/vlibmemory/memory_vlib.c           |  2 +-
 src/vnet/dpo/interface_dpo.c           |  8 ++++----
 src/vnet/lisp-gpe/lisp_gpe_adjacency.c |  2 +-
 src/vppinfra/bihash_template.c         | 16 ++++++++--------
 src/vppinfra/lock.h                    |  6 +++---
 src/vppinfra/mem.h                     |  6 +++---
 src/vppinfra/mhash.c                   |  2 +-
 src/vppinfra/mhash.h                   |  2 +-
 src/vppinfra/mheap.c                   |  4 ++--
 src/vppinfra/os.h                      | 20 ++++++++++++++++++--
 src/vppinfra/smp.c                     |  2 +-
 src/vppinfra/unix-misc.c               | 19 +++++++------------
 20 files changed, 77 insertions(+), 75 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/plugins/flowperpkt/l2_node.c b/src/plugins/flowperpkt/l2_node.c
index fdaf81d1..db80e990 100644
--- a/src/plugins/flowperpkt/l2_node.c
+++ b/src/plugins/flowperpkt/l2_node.c
@@ -102,7 +102,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 		       u8 * src_mac, u8 * dst_mac,
 		       u16 ethertype, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->thread_index;
+  u32 my_thread_index = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
@@ -116,7 +116,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
   vlib_buffer_free_list_t *fl;
 
   /* Find or allocate a buffer */
-  b0 = fm->l2_buffers_per_worker[my_cpu_number];
+  b0 = fm->l2_buffers_per_worker[my_thread_index];
 
   /* Need to allocate a buffer? */
   if (PREDICT_FALSE (b0 == 0))
@@ -130,7 +130,7 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 	return;
 
       /* Initialize the buffer */
-      b0 = fm->l2_buffers_per_worker[my_cpu_number] =
+      b0 = fm->l2_buffers_per_worker[my_thread_index] =
 	vlib_get_buffer (vm, bi0);
       fl =
 	vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
@@ -142,16 +142,16 @@ add_to_flow_record_l2 (vlib_main_t * vm,
     {
       /* use the current buffer */
       bi0 = vlib_get_buffer_index (vm, b0);
-      offset = fm->l2_next_record_offset_per_worker[my_cpu_number];
+      offset = fm->l2_next_record_offset_per_worker[my_thread_index];
     }
 
   /* Find or allocate a frame */
-  f = fm->l2_frames_per_worker[my_cpu_number];
+  f = fm->l2_frames_per_worker[my_thread_index];
   if (PREDICT_FALSE (f == 0))
     {
       u32 *to_next;
       f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
-      fm->l2_frames_per_worker[my_cpu_number] = f;
+      fm->l2_frames_per_worker[my_thread_index] = f;
 
       /* Enqueue the buffer */
       to_next = vlib_frame_vector_args (f);
@@ -299,13 +299,13 @@ add_to_flow_record_l2 (vlib_main_t * vm,
 	}
 
       vlib_put_frame_to_node (vm, ip4_lookup_node.index,
-			      fm->l2_frames_per_worker[my_cpu_number]);
-      fm->l2_frames_per_worker[my_cpu_number] = 0;
-      fm->l2_buffers_per_worker[my_cpu_number] = 0;
+			      fm->l2_frames_per_worker[my_thread_index]);
+      fm->l2_frames_per_worker[my_thread_index] = 0;
+      fm->l2_buffers_per_worker[my_thread_index] = 0;
       offset = 0;
     }
 
-  fm->l2_next_record_offset_per_worker[my_cpu_number] = offset;
+  fm->l2_next_record_offset_per_worker[my_thread_index] = offset;
 }
 
 void
diff --git a/src/plugins/flowperpkt/node.c b/src/plugins/flowperpkt/node.c
index 0277682d..9bac4166 100644
--- a/src/plugins/flowperpkt/node.c
+++ b/src/plugins/flowperpkt/node.c
@@ -101,7 +101,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 			 u32 src_address, u32 dst_address,
 			 u8 tos, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->thread_index;
+  u32 my_thread_index = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
@@ -115,7 +115,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
   vlib_buffer_free_list_t *fl;
 
   /* Find or allocate a buffer */
-  b0 = fm->ipv4_buffers_per_worker[my_cpu_number];
+  b0 = fm->ipv4_buffers_per_worker[my_thread_index];
 
   /* Need to allocate a buffer? */
   if (PREDICT_FALSE (b0 == 0))
@@ -129,7 +129,7 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 	return;
 
       /* Initialize the buffer */
-      b0 = fm->ipv4_buffers_per_worker[my_cpu_number] =
+      b0 = fm->ipv4_buffers_per_worker[my_thread_index] =
 	vlib_get_buffer (vm, bi0);
       fl =
 	vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
@@ -141,16 +141,16 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
     {
       /* use the current buffer */
       bi0 = vlib_get_buffer_index (vm, b0);
-      offset = fm->ipv4_next_record_offset_per_worker[my_cpu_number];
+      offset = fm->ipv4_next_record_offset_per_worker[my_thread_index];
     }
 
   /* Find or allocate a frame */
-  f = fm->ipv4_frames_per_worker[my_cpu_number];
+  f = fm->ipv4_frames_per_worker[my_thread_index];
   if (PREDICT_FALSE (f == 0))
     {
       u32 *to_next;
       f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
-      fm->ipv4_frames_per_worker[my_cpu_number] = f;
+      fm->ipv4_frames_per_worker[my_thread_index] = f;
 
       /* Enqueue the buffer */
       to_next = vlib_frame_vector_args (f);
@@ -300,13 +300,13 @@ add_to_flow_record_ipv4 (vlib_main_t * vm,
 	}
 
       vlib_put_frame_to_node (vm, ip4_lookup_node.index,
-			      fm->ipv4_frames_per_worker[my_cpu_number]);
-      fm->ipv4_frames_per_worker[my_cpu_number] = 0;
-      fm->ipv4_buffers_per_worker[my_cpu_number] = 0;
+			      fm->ipv4_frames_per_worker[my_thread_index]);
+      fm->ipv4_frames_per_worker[my_thread_index] = 0;
+      fm->ipv4_buffers_per_worker[my_thread_index] = 0;
       offset = 0;
     }
 
-  fm->ipv4_next_record_offset_per_worker[my_cpu_number] = offset;
+  fm->ipv4_next_record_offset_per_worker[my_thread_index] = offset;
 }
 
 void
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c
index f7d29c69..bc86a7a4 100644
--- a/src/plugins/snat/in2out.c
+++ b/src/plugins/snat/in2out.c
@@ -1514,7 +1514,7 @@ snat_det_in2out_node_fn (vlib_main_t * vm,
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
   u32 now = (u32) vlib_time_now (vm);
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c
index 3d7b106a..824406ab 100644
--- a/src/plugins/snat/out2in.c
+++ b/src/plugins/snat/out2in.c
@@ -1168,7 +1168,7 @@ snat_det_out2in_node_fn (vlib_main_t * vm,
   snat_out2in_next_t next_index;
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 329bf073..0e8026d1 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -320,7 +320,7 @@ always_inline void vlib_set_queue_signal_callback
 /* Main routine. */
 int vlib_main (vlib_main_t * vm, unformat_input_t * input);
 
-/* Thread stacks, for os_get_cpu_number */
+/* Thread stacks, for os_get_thread_index */
 extern u8 **vlib_thread_stacks;
 
 /* Number of thread stacks that the application needs */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 9ccfd3a2..b7bc9e26 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,16 +35,8 @@ vl (void *p)
 vlib_worker_thread_t *vlib_worker_threads;
 vlib_thread_main_t vlib_thread_main;
 
-__thread uword vlib_thread_index = 0;
-
-uword
-os_get_cpu_number (void)
-{
-  return vlib_thread_index;
-}
-
 uword
-os_get_ncpus (void)
+os_get_nthreads (void)
 {
   u32 len;
 
@@ -467,7 +459,7 @@ vlib_worker_thread_bootstrap_fn (void *arg)
   w->lwp = syscall (SYS_gettid);
   w->thread_id = pthread_self ();
 
-  vlib_thread_index = w - vlib_worker_threads;
+  __os_thread_index = w - vlib_worker_threads;
 
   rv = (void *) clib_calljmp
     ((uword (*)(uword)) w->thread_function,
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 101d3d4a..17d35a24 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -181,11 +181,10 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 
-extern __thread uword vlib_thread_index;
 static_always_inline uword
 vlib_get_thread_index (void)
 {
-  return vlib_thread_index;
+  return __os_thread_index;
 }
 
 always_inline void
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index db5ddd64..103576db 100644
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -565,7 +565,7 @@ vlib_unix_main (int argc, char *argv[])
 
   vlib_thread_stack_init (0);
 
-  vlib_thread_index = 0;
+  __os_thread_index = 0;
 
   i = clib_calljmp (thread0, (uword) vm,
 		    (void *) (vlib_thread_stacks[0] +
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index acba8b3f..e5d88732 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1333,7 +1333,7 @@ vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
   unix_shared_memory_queue_t *q;
 
   /* Main thread: call the function directly */
-  if (os_get_cpu_number () == 0)
+  if (vlib_get_thread_index () == 0)
     {
       vlib_main_t *vm = vlib_get_main ();
       void (*call_fp) (void *);
diff --git a/src/vnet/dpo/interface_dpo.c b/src/vnet/dpo/interface_dpo.c
index 50ca756f..8d700c23 100644
--- a/src/vnet/dpo/interface_dpo.c
+++ b/src/vnet/dpo/interface_dpo.c
@@ -231,7 +231,7 @@ interface_dpo_inline (vlib_main_t * vm,
                       vlib_frame_t * from_frame)
 {
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index ();
     vnet_interface_main_t *im;
 
     im = &vnet_get_main ()->interface_main;
@@ -274,13 +274,13 @@ interface_dpo_inline (vlib_main_t * vm,
 
             vlib_increment_combined_counter (im->combined_sw_if_counters
                                              + VNET_INTERFACE_COUNTER_RX,
-                                             cpu_index,
+                                             thread_index,
                                              ido0->ido_sw_if_index,
                                              1,
                                              vlib_buffer_length_in_chain (vm, b0));
             vlib_increment_combined_counter (im->combined_sw_if_counters
                                              + VNET_INTERFACE_COUNTER_RX,
-                                             cpu_index,
+                                             thread_index,
                                              ido1->ido_sw_if_index,
                                              1,
                                              vlib_buffer_length_in_chain (vm, b1));
@@ -331,7 +331,7 @@ interface_dpo_inline (vlib_main_t * vm,
             /* Bump the interface's RX coutners */
             vlib_increment_combined_counter (im->combined_sw_if_counters
                                              + VNET_INTERFACE_COUNTER_RX,
-                                             cpu_index,
+                                             thread_index,
                                              ido0->ido_sw_if_index,
                                              1,
                                              vlib_buffer_length_in_chain (vm, b0));
diff --git a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
index d5f3a28a..7db1c9bb 100644
--- a/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
+++ b/src/vnet/lisp-gpe/lisp_gpe_adjacency.c
@@ -302,7 +302,7 @@ lisp_gpe_increment_stats_counters (lisp_cp_main_t * lcm, ip_adjacency_t * adj,
 
   /* compute payload length starting after GPE */
   u32 bytes = b->current_length - (lisp_data - b->data - b->current_data);
-  vlib_increment_combined_counter (&lgm->counters, os_get_cpu_number (),
+  vlib_increment_combined_counter (&lgm->counters, vlib_get_thread_index (),
 				   p[0], 1, bytes);
 }
 
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
index d8b97b5f..51fadeb8 100644
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -96,12 +96,12 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
   clib_bihash_bucket_t working_bucket __attribute__ ((aligned (8)));
   void *oldheap;
   BVT (clib_bihash_value) * working_copy;
-  u32 cpu_number = os_get_cpu_number ();
+  u32 thread_index = os_get_thread_index ();
 
-  if (cpu_number >= vec_len (h->working_copies))
+  if (thread_index >= vec_len (h->working_copies))
     {
       oldheap = clib_mem_set_heap (h->mheap);
-      vec_validate (h->working_copies, cpu_number);
+      vec_validate (h->working_copies, thread_index);
       clib_mem_set_heap (oldheap);
     }
 
@@ -110,7 +110,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
    * updates from multiple threads will not result in sporadic, spurious
    * lookup failures.
    */
-  working_copy = h->working_copies[cpu_number];
+  working_copy = h->working_copies[thread_index];
 
   h->saved_bucket.as_u64 = b->as_u64;
   oldheap = clib_mem_set_heap (h->mheap);
@@ -119,7 +119,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
     {
       vec_validate_aligned (working_copy, (1 << b->log2_pages) - 1,
 			    sizeof (u64));
-      h->working_copies[cpu_number] = working_copy;
+      h->working_copies[thread_index] = working_copy;
     }
 
   _vec_len (working_copy) = 1 << b->log2_pages;
@@ -132,7 +132,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, clib_bihash_bucket_t * b)
   working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy);
   CLIB_MEMORY_BARRIER ();
   b->as_u64 = working_bucket.as_u64;
-  h->working_copies[cpu_number] = working_copy;
+  h->working_copies[thread_index] = working_copy;
 }
 
 static
@@ -233,7 +233,7 @@ int BV (clib_bihash_add_del)
   int i, limit;
   u64 hash, new_hash;
   u32 new_log2_pages;
-  u32 cpu_number = os_get_cpu_number ();
+  u32 thread_index = os_get_thread_index ();
   int mark_bucket_linear;
   int resplit_once;
 
@@ -323,7 +323,7 @@ int BV (clib_bihash_add_del)
   new_log2_pages = h->saved_bucket.log2_pages + 1;
   mark_bucket_linear = 0;
 
-  working_copy = h->working_copies[cpu_number];
+  working_copy = h->working_copies[thread_index];
   resplit_once = 0;
 
   new_v = BV (split_and_rehash) (h, working_copy, new_log2_pages);
diff --git a/src/vppinfra/lock.h b/src/vppinfra/lock.h
index c60ff414..0cd2b4fe 100644
--- a/src/vppinfra/lock.h
+++ b/src/vppinfra/lock.h
@@ -24,7 +24,7 @@ typedef struct
   u32 lock;
 #if CLIB_DEBUG > 0
   pid_t pid;
-  uword cpu_index;
+  uword thread_index;
   void *frame_address;
 #endif
 } *clib_spinlock_t;
@@ -57,7 +57,7 @@ clib_spinlock_lock (clib_spinlock_t * p)
 #if CLIB_DEBUG > 0
   (*p)->frame_address = __builtin_frame_address (0);
   (*p)->pid = getpid ();
-  (*p)->cpu_index = os_get_cpu_number ();
+  (*p)->thread_index = os_get_thread_index ();
 #endif
 }
 
@@ -75,7 +75,7 @@ clib_spinlock_unlock (clib_spinlock_t * p)
 #if CLIB_DEBUG > 0
   (*p)->frame_address = 0;
   (*p)->pid = 0;
-  (*p)->cpu_index = 0;
+  (*p)->thread_index = 0;
 #endif
 }
 
diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h
index 1260eab2..63c5ac16 100644
--- a/src/vppinfra/mem.h
+++ b/src/vppinfra/mem.h
@@ -54,14 +54,14 @@ extern void *clib_per_cpu_mheaps[CLIB_MAX_MHEAPS];
 always_inline void *
 clib_mem_get_per_cpu_heap (void)
 {
-  int cpu = os_get_cpu_number ();
+  int cpu = os_get_thread_index ();
   return clib_per_cpu_mheaps[cpu];
 }
 
 always_inline void *
 clib_mem_set_per_cpu_heap (u8 * new_heap)
 {
-  int cpu = os_get_cpu_number ();
+  int cpu = os_get_thread_index ();
   void *old = clib_per_cpu_mheaps[cpu];
   clib_per_cpu_mheaps[cpu] = new_heap;
   return old;
@@ -83,7 +83,7 @@ clib_mem_alloc_aligned_at_offset (uword size, uword align, uword align_offset,
 	align_offset = align;
     }
 
-  cpu = os_get_cpu_number ();
+  cpu = os_get_thread_index ();
   heap = clib_per_cpu_mheaps[cpu];
   heap = mheap_get_aligned (heap, size, align, align_offset, &offset);
   clib_per_cpu_mheaps[cpu] = heap;
diff --git a/src/vppinfra/mhash.c b/src/vppinfra/mhash.c
index c917e164..00b67c49 100644
--- a/src/vppinfra/mhash.c
+++ b/src/vppinfra/mhash.c
@@ -226,7 +226,7 @@ static uword
 mhash_set_tmp_key (mhash_t * h, const void *key)
 {
   u8 *key_tmp;
-  int my_cpu = os_get_cpu_number ();
+  int my_cpu = os_get_thread_index ();
 
   vec_validate (h->key_tmps, my_cpu);
   key_tmp = h->key_tmps[my_cpu];
diff --git a/src/vppinfra/mhash.h b/src/vppinfra/mhash.h
index 102adf4e..7eb19183 100644
--- a/src/vppinfra/mhash.h
+++ b/src/vppinfra/mhash.h
@@ -93,7 +93,7 @@ mhash_key_to_mem (mhash_t * h, uword key)
     {
       u8 *key_tmp;
 
-      int my_cpu = os_get_cpu_number ();
+      int my_cpu = os_get_thread_index ();
       vec_validate (h->key_tmps, my_cpu);
       key_tmp = h->key_tmps[my_cpu];
       return key_tmp;
diff --git a/src/vppinfra/mheap.c b/src/vppinfra/mheap.c
index 192732db..d4010ceb 100644
--- a/src/vppinfra/mheap.c
+++ b/src/vppinfra/mheap.c
@@ -56,7 +56,7 @@ mheap_maybe_lock (void *v)
   mheap_t *h = mheap_header (v);
   if (v && (h->flags & MHEAP_FLAG_THREAD_SAFE))
     {
-      u32 my_cpu = os_get_cpu_number ();
+      u32 my_cpu = os_get_thread_index ();
       if (h->owner_cpu == my_cpu)
 	{
 	  h->recursion_count++;
@@ -77,7 +77,7 @@ mheap_maybe_unlock (void *v)
   mheap_t *h = mheap_header (v);
   if (v && h->flags & MHEAP_FLAG_THREAD_SAFE)
     {
-      ASSERT (os_get_cpu_number () == h->owner_cpu);
+      ASSERT (os_get_thread_index () == h->owner_cpu);
       if (--h->recursion_count == 0)
 	{
 	  h->owner_cpu = ~0;
diff --git a/src/vppinfra/os.h b/src/vppinfra/os.h
index a5c74f8c..33300716 100644
--- a/src/vppinfra/os.h
+++ b/src/vppinfra/os.h
@@ -56,8 +56,24 @@ void os_out_of_memory (void);
 /* Estimate, measure or divine CPU timestamp clock frequency. */
 f64 os_cpu_clock_frequency (void);
 
-uword os_get_cpu_number (void);
-uword os_get_ncpus (void);
+extern __thread uword __os_thread_index;
+
+static_always_inline uword
+os_get_thread_index (void)
+{
+  return __os_thread_index;
+}
+
+static_always_inline uword
+os_get_cpu_number (void) __attribute__ ((deprecated));
+
+static_always_inline uword
+os_get_cpu_number (void)
+{
+  return __os_thread_index;
+}
+
+uword os_get_nthreads (void);
 
 #include <vppinfra/smp.h>
 
diff --git a/src/vppinfra/smp.c b/src/vppinfra/smp.c
index 8ac19960..f603283e 100644
--- a/src/vppinfra/smp.c
+++ b/src/vppinfra/smp.c
@@ -53,7 +53,7 @@ allocate_per_cpu_mheap (uword cpu)
   void *heap;
   uword vm_size, stack_size, mheap_flags;
 
-  ASSERT (os_get_cpu_number () == cpu);
+  ASSERT (os_get_thread_index () == cpu);
 
   vm_size = (uword) 1 << m->log2_n_per_cpu_vm_bytes;
   stack_size = (uword) 1 << m->log2_n_per_cpu_stack_bytes;
diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c
index 2928369d..361015b4 100644
--- a/src/vppinfra/unix-misc.c
+++ b/src/vppinfra/unix-misc.c
@@ -45,6 +45,8 @@
 #include <fcntl.h>
 #include <stdio.h>		/* for sprintf */
 
+__thread uword __os_thread_index = 0;
+
 clib_error_t *
 unix_file_n_bytes (char *file, uword * result)
 {
@@ -188,14 +190,14 @@ void os_puts (u8 * string, uword string_length, uword is_error)
 void
 os_puts (u8 * string, uword string_length, uword is_error)
 {
-  int cpu = os_get_cpu_number ();
-  int ncpus = os_get_ncpus ();
+  int cpu = os_get_thread_index ();
+  int nthreads = os_get_nthreads ();
   char buf[64];
   int fd = is_error ? 2 : 1;
   struct iovec iovs[2];
   int n_iovs = 0;
 
-  if (ncpus > 1)
+  if (nthreads > 1)
     {
       snprintf (buf, sizeof (buf), "%d: ", cpu);
 
@@ -219,16 +221,9 @@ os_out_of_memory (void)
   os_panic ();
 }
 
-uword os_get_cpu_number (void) __attribute__ ((weak));
-uword
-os_get_cpu_number (void)
-{
-  return 0;
-}
-
-uword os_get_ncpus (void) __attribute__ ((weak));
+uword os_get_nthreads (void) __attribute__ ((weak));
 uword
-os_get_ncpus (void)
+os_get_nthreads (void)
 {
   return 1;
 }
-- 
cgit 1.2.3-korg


From 93992a9048cb6e5dcd22de5091e72de778122627 Mon Sep 17 00:00:00 2001
From: Florin Coras <fcoras@cisco.com>
Date: Wed, 24 May 2017 18:03:56 -0700
Subject: Implement sack based tcp loss recovery (RFC 6675)

- refactor existing congestion control code (RFC 6582/5681). Handling of ack
  feedback now consists of: ack parsing, cc event detection, event handling,
  congestion control update
- extend sack scoreboard to support sack based retransmissions
- basic implementation of Eifel detection algorithm (RFC 3522) for
  detecting spurious retransmissions
- actually initialize the per-thread frame freelist hash tables
- increase worker stack size to 2mb
- fix session queue node out-of-buffer handling
  - ensure that the local buffer cache vec_len matches reality
  - avoid 2x spurious event requeues when short of buffers
  - count out-of-buffer events
- make the builtin server thread-safe
- fix bihash template threading issue: need to paint -1 across uninitialized
  working_copy_length vector elements (via rebase from master)

Change-Id: I646cb9f1add9a67d08f4a87badbcb117980ebfc4
Signed-off-by: Florin Coras <fcoras@cisco.com>
Signed-off-by: Dave Barach <dbarach@cisco.com>
---
 src/svm/svm_fifo.c             |   5 +-
 src/vlib/node.c                |   1 +
 src/vlib/threads.c             |   2 +-
 src/vlib/threads.h             |   2 +-
 src/vnet/session/node.c        |  53 ++--
 src/vnet/session/session.c     |  11 +-
 src/vnet/session/session.h     |   6 +-
 src/vnet/session/session_cli.c |  26 +-
 src/vnet/tcp/builtin_client.c  |  40 ++-
 src/vnet/tcp/builtin_server.c  |  20 +-
 src/vnet/tcp/tcp.c             |  57 ++--
 src/vnet/tcp/tcp.h             | 112 +++++--
 src/vnet/tcp/tcp_debug.h       |  16 +-
 src/vnet/tcp/tcp_input.c       | 671 +++++++++++++++++++++++++++++------------
 src/vnet/tcp/tcp_newreno.c     |  20 +-
 src/vnet/tcp/tcp_output.c      | 287 ++++++++++++------
 src/vnet/tcp/tcp_test.c        |  53 ++--
 17 files changed, 973 insertions(+), 409 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c
index f13f6fea..5c8f244a 100644
--- a/src/svm/svm_fifo.c
+++ b/src/svm/svm_fifo.c
@@ -540,7 +540,7 @@ svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
 
   /* read cursize, which can only increase while we're working */
   cursize = svm_fifo_max_dequeue (f);
-  if (PREDICT_FALSE (cursize == 0))
+  if (PREDICT_FALSE (cursize < relative_offset))
     return -2;			/* nothing in the fifo */
 
   nitems = f->nitems;
@@ -548,7 +548,8 @@ svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
   real_head = real_head >= nitems ? real_head - nitems : real_head;
 
   /* Number of bytes we're going to copy */
-  total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes;
+  total_copy_bytes = (cursize - relative_offset < max_bytes) ?
+    cursize - relative_offset : max_bytes;
 
   if (PREDICT_TRUE (copy_here != 0))
     {
diff --git a/src/vlib/node.c b/src/vlib/node.c
index bbd3a42e..eecad274 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -502,6 +502,7 @@ vlib_node_main_init (vlib_main_t * vm)
   vlib_node_t *n;
   uword ni;
 
+  nm->frame_size_hash = hash_create (0, sizeof (uword));
   nm->flags |= VLIB_NODE_MAIN_RUNTIME_STARTED;
 
   /* Generate sibling relationships */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index b7bc9e26..0c775e2d 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -670,7 +670,7 @@ start_workers (vlib_main_t * vm)
 
 	      /* zap the (per worker) frame freelists, etc */
 	      nm_clone->frame_sizes = 0;
-	      nm_clone->frame_size_hash = 0;
+	      nm_clone->frame_size_hash = hash_create (0, sizeof (uword));
 
 	      /* Packet trace buffers are guaranteed to be empty, nothing to do here */
 
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 17d35a24..572ce77f 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -62,7 +62,7 @@ typedef struct vlib_thread_registration_
 #define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1)	/* 0x3f, max */
 #define VLIB_OFFSET_MASK (~VLIB_CPU_MASK)
 
-#define VLIB_LOG2_THREAD_STACK_SIZE (20)
+#define VLIB_LOG2_THREAD_STACK_SIZE (21)
 #define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE)
 
 typedef enum
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index 3053ccc2..07eeae82 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -47,7 +47,8 @@ vlib_node_registration_t session_queue_node;
 
 #define foreach_session_queue_error		\
 _(TX, "Packets transmitted")                  	\
-_(TIMER, "Timer events")
+_(TIMER, "Timer events")			\
+_(NO_BUFFER, "Out of buffers")
 
 typedef enum
 {
@@ -141,6 +142,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
   u8 *data0;
   int i, n_bytes_read;
   u32 n_bytes_per_buf, deq_per_buf;
+  u32 buffers_allocated, buffers_allocated_this_call;
 
   next_index = next0 = session_type_to_next[s0->session_type];
 
@@ -167,9 +169,6 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
   /* Check how much we can pull. If buffering, subtract the offset */
   max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo) - rx_offset;
 
-  /* Allow enqueuing of a new event */
-  svm_fifo_unset_event (s0->server_tx_fifo);
-
   /* Nothing to read return */
   if (max_dequeue0 == 0)
     return 0;
@@ -187,8 +186,8 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
       max_len_to_snd0 = snd_space0;
     }
 
-  n_bytes_per_buf = vlib_buffer_free_list_buffer_size (vm,
-						       VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+  n_bytes_per_buf = vlib_buffer_free_list_buffer_size
+    (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
   n_bytes_per_seg = MAX_HDRS_LEN + snd_mss0;
   n_bufs_per_seg = ceil ((double) n_bytes_per_seg / n_bytes_per_buf);
   n_bufs_per_evt = (ceil ((double) max_len_to_snd0 / n_bytes_per_seg))
@@ -205,24 +204,33 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
       if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE))
 	{
 	  vec_validate (smm->tx_buffers[thread_index],
-			n_bufs + VLIB_FRAME_SIZE - 1);
-	  n_bufs += vlib_buffer_alloc (vm,
-				       &smm->tx_buffers[thread_index][n_bufs],
-				       VLIB_FRAME_SIZE);
-
-	  /* buffer shortage
-	   * XXX 0.9 because when debugging we might not get a full frame */
-	  if (PREDICT_FALSE (n_bufs < 0.9 * VLIB_FRAME_SIZE))
+			n_bufs + 2 * VLIB_FRAME_SIZE - 1);
+
+	  buffers_allocated = 0;
+	  do
 	    {
-	      if (svm_fifo_set_event (s0->server_tx_fifo))
-		{
-		  vec_add1 (smm->pending_event_vector[thread_index], *e0);
-		}
-	      return -1;
+	      buffers_allocated_this_call =
+		vlib_buffer_alloc
+		(vm,
+		 &smm->tx_buffers[thread_index][n_bufs + buffers_allocated],
+		 2 * VLIB_FRAME_SIZE - buffers_allocated);
+	      buffers_allocated += buffers_allocated_this_call;
 	    }
+	  while (buffers_allocated_this_call > 0
+		 && ((buffers_allocated + n_bufs < VLIB_FRAME_SIZE)));
+
+	  n_bufs += buffers_allocated;
 
 	  _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
+
+	  if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE))
+	    {
+	      vec_add1 (smm->pending_event_vector[thread_index], *e0);
+	      return -1;
+	    }
 	}
+      /* Allow enqueuing of a new event */
+      svm_fifo_unset_event (s0->server_tx_fifo);
 
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
       while (left_to_snd0 && n_left_to_next >= n_bufs_per_seg)
@@ -232,7 +240,9 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
 	   */
 
 	  /* Get free buffer */
+	  ASSERT (n_bufs >= 1);
 	  bi0 = smm->tx_buffers[thread_index][--n_bufs];
+	  ASSERT (bi0);
 	  _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
 
 	  b0 = vlib_get_buffer (vm, bi0);
@@ -545,9 +555,10 @@ skip_dequeue:
 							my_thread_index,
 							&n_tx_packets);
 	  /* Out of buffers */
-	  if (rv < 0)
+	  if (PREDICT_FALSE (rv < 0))
 	    {
-	      vec_add1 (smm->pending_event_vector[my_thread_index], *e0);
+	      vlib_node_increment_counter (vm, node->node_index,
+					   SESSION_QUEUE_ERROR_NO_BUFFER, 1);
 	      continue;
 	    }
 	  break;
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 02b0cced..534598d6 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -551,7 +551,7 @@ u8
 stream_session_no_space (transport_connection_t * tc, u32 thread_index,
 			 u16 data_len)
 {
-  stream_session_t *s = stream_session_get (tc->c_index, thread_index);
+  stream_session_t *s = stream_session_get (tc->s_index, thread_index);
 
   if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY))
     return 1;
@@ -563,6 +563,15 @@ stream_session_no_space (transport_connection_t * tc, u32 thread_index,
 }
 
 u32
+stream_session_tx_fifo_max_dequeue (transport_connection_t * tc)
+{
+  stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+  if (s->session_state != SESSION_STATE_READY)
+    return 0;
+  return svm_fifo_max_dequeue (s->server_tx_fifo);
+}
+
+int
 stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
 			   u32 offset, u32 max_bytes)
 {
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index a8728649..d9c38bd1 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -352,16 +352,18 @@ stream_session_max_rx_enqueue (transport_connection_t * tc)
 }
 
 always_inline u32
-stream_session_fifo_size (transport_connection_t * tc)
+stream_session_rx_fifo_size (transport_connection_t * tc)
 {
   stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
   return s->server_rx_fifo->nitems;
 }
 
+u32 stream_session_tx_fifo_max_dequeue (transport_connection_t * tc);
+
 int
 stream_session_enqueue_data (transport_connection_t * tc, vlib_buffer_t * b,
 			     u32 offset, u8 queue_event, u8 is_in_order);
-u32
+int
 stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
 			   u32 offset, u32 max_bytes);
 u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index 509eedbb..6b8341aa 100755
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -15,6 +15,15 @@
 #include <vnet/session/application.h>
 #include <vnet/session/session.h>
 
+u8 *
+format_stream_session_fifos (u8 * s, va_list * args)
+{
+  stream_session_t *ss = va_arg (*args, stream_session_t *);
+  s = format (s, " Rx fifo: %U", format_svm_fifo, ss->server_rx_fifo, 1);
+  s = format (s, " Tx fifo: %U", format_svm_fifo, ss->server_tx_fifo, 1);
+  return s;
+}
+
 /**
  * Format stream session as per the following format
  *
@@ -44,6 +53,8 @@ format_stream_session (u8 * s, va_list * args)
 		  ss->thread_index, verbose);
       if (verbose == 1)
 	s = format (s, "%v", str);
+      if (verbose > 1)
+	s = format (s, "%U", format_stream_session_fifos, ss);
     }
   else if (ss->session_state == SESSION_STATE_LISTENING)
     {
@@ -57,8 +68,12 @@ format_stream_session (u8 * s, va_list * args)
     }
   else if (ss->session_state == SESSION_STATE_CLOSED)
     {
-      s = format (s, "[CL] %-40U%v", tp_vft->format_connection,
-		  ss->connection_index, ss->thread_index, verbose, str);
+      s = format (s, "[CL] %-40U", tp_vft->format_connection,
+		  ss->connection_index, ss->thread_index, verbose);
+      if (verbose == 1)
+	s = format (s, "%v", str);
+      if (verbose > 1)
+	s = format (s, "%U", format_stream_session_fifos, ss);
     }
   else
     {
@@ -124,13 +139,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
               ({
         	vec_reset_length (str);
                 str = format (str, "%U", format_stream_session, s, verbose);
-                if (verbose > 1)
-                  {
-                    str = format (str, " Rx fifo: %U", format_svm_fifo,
-				  s->server_rx_fifo, 1);
-                    str = format (str, " Tx fifo: %U", format_svm_fifo,
-				  s->server_tx_fifo, 1);
-                  }
                 vlib_cli_output (vm, "%v", str);
               }));
               /* *INDENT-ON* */
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index 768f0c3c..7238cda3 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -115,8 +115,17 @@ receive_test_chunk (tclient_main_t * tm, session_t * s)
   /* Allow enqueuing of new event */
   // svm_fifo_unset_event (rx_fifo);
 
-  n_read = svm_fifo_dequeue_nowait (rx_fifo, vec_len (tm->rx_buf),
-				    tm->rx_buf);
+  if (test_bytes)
+    {
+      n_read = svm_fifo_dequeue_nowait (rx_fifo, vec_len (tm->rx_buf),
+					tm->rx_buf);
+    }
+  else
+    {
+      n_read = svm_fifo_max_dequeue (rx_fifo);
+      svm_fifo_dequeue_drop (rx_fifo, n_read);
+    }
+
   if (n_read > 0)
     {
       if (TCP_BUILTIN_CLIENT_DBG)
@@ -165,6 +174,8 @@ builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
   int i;
   int delete_session;
   u32 *connection_indices;
+  u32 tx_quota = 0;
+  u32 delta, prev_bytes_received_this_session;
 
   connection_indices = tm->connection_index_by_thread[my_thread_index];
 
@@ -177,14 +188,19 @@ builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 
       sp = pool_elt_at_index (tm->sessions, connection_indices[i]);
 
-      if (sp->bytes_to_send > 0)
+      if (tx_quota < 60 && sp->bytes_to_send > 0)
 	{
 	  send_test_chunk (tm, sp);
 	  delete_session = 0;
+	  tx_quota++;
 	}
       if (sp->bytes_to_receive > 0)
 	{
+	  prev_bytes_received_this_session = sp->bytes_received;
 	  receive_test_chunk (tm, sp);
+	  delta = sp->bytes_received - prev_bytes_received_this_session;
+	  if (delta > 0)
+	    tx_quota--;
 	  delete_session = 0;
 	}
       if (PREDICT_FALSE (delete_session == 1))
@@ -195,11 +211,19 @@ builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
 	  dmp->client_index = tm->my_client_index;
 	  dmp->handle = sp->vpp_session_handle;
-	  vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & dmp);
-	  vec_delete (connection_indices, 1, i);
-	  tm->connection_index_by_thread[my_thread_index] =
-	    connection_indices;
-	  __sync_fetch_and_add (&tm->ready_connections, -1);
+//        vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & dmp);
+	  if (!unix_shared_memory_queue_add (tm->vl_input_queue, (u8 *) & dmp,
+					     1))
+	    {
+	      vec_delete (connection_indices, 1, i);
+	      tm->connection_index_by_thread[my_thread_index] =
+		connection_indices;
+	      __sync_fetch_and_add (&tm->ready_connections, -1);
+	    }
+	  else
+	    {
+	      vl_msg_api_free (dmp);
+	    }
 
 	  /* Kick the debug CLI process */
 	  if (tm->ready_connections == 0)
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index 4f0e211c..8bd2f360 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -39,7 +39,8 @@
 
 typedef struct
 {
-  u8 *rx_buf;
+  /* Per-thread RX buffer */
+  u8 **rx_buf;
   unix_shared_memory_queue_t **vpp_queue;
   u64 byte_index;
 
@@ -117,13 +118,15 @@ void
 test_bytes (builtin_server_main_t * bsm, int actual_transfer)
 {
   int i;
+  u32 my_thread_id = vlib_get_thread_index ();
 
   for (i = 0; i < actual_transfer; i++)
     {
-      if (bsm->rx_buf[i] != ((bsm->byte_index + i) & 0xff))
+      if (bsm->rx_buf[my_thread_id][i] != ((bsm->byte_index + i) & 0xff))
 	{
 	  clib_warning ("at %lld expected %d got %d", bsm->byte_index + i,
-			(bsm->byte_index + i) & 0xff, bsm->rx_buf[i]);
+			(bsm->byte_index + i) & 0xff,
+			bsm->rx_buf[my_thread_id][i]);
 	}
     }
   bsm->byte_index += actual_transfer;
@@ -138,6 +141,7 @@ builtin_server_rx_callback (stream_session_t * s)
   builtin_server_main_t *bsm = &builtin_server_main;
   session_fifo_event_t evt;
   static int serial_number = 0;
+  u32 my_thread_id = vlib_get_thread_index ();
 
   tx_fifo = s->server_tx_fifo;
   rx_fifo = s->server_rx_fifo;
@@ -171,11 +175,12 @@ builtin_server_rx_callback (stream_session_t * s)
       return 0;
     }
 
-  vec_validate (bsm->rx_buf, max_transfer - 1);
-  _vec_len (bsm->rx_buf) = max_transfer;
+  vec_validate (bsm->rx_buf, my_thread_id);
+  vec_validate (bsm->rx_buf[my_thread_id], max_transfer - 1);
+  _vec_len (bsm->rx_buf[my_thread_id]) = max_transfer;
 
   actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, max_transfer,
-					     bsm->rx_buf);
+					     bsm->rx_buf[my_thread_id]);
   ASSERT (actual_transfer == max_transfer);
 
 //  test_bytes (bsm, actual_transfer);
@@ -184,7 +189,8 @@ builtin_server_rx_callback (stream_session_t * s)
    * Echo back
    */
 
-  n_written = svm_fifo_enqueue_nowait (tx_fifo, actual_transfer, bsm->rx_buf);
+  n_written = svm_fifo_enqueue_nowait (tx_fifo, actual_transfer,
+				       bsm->rx_buf[my_thread_id]);
 
   if (n_written != max_transfer)
     clib_warning ("short trout!");
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 9b7b2f65..e0b67a8e 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -195,8 +195,8 @@ tcp_connection_close (tcp_connection_t * tc)
   TCP_EVT_DBG (TCP_EVT_CLOSE, tc);
 
   /* Send FIN if needed */
-  if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD
-      || tc->state == TCP_STATE_CLOSE_WAIT)
+  if (tc->state == TCP_STATE_ESTABLISHED
+      || tc->state == TCP_STATE_SYN_RCVD || tc->state == TCP_STATE_CLOSE_WAIT)
     tcp_send_fin (tc);
 
   /* Switch state */
@@ -480,7 +480,7 @@ u8 *
 format_tcp_timers (u8 * s, va_list * args)
 {
   tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
-  int i, last = 0;
+  int i, last = -1;
 
   for (i = 0; i < TCP_N_TIMERS; i++)
     if (tc->timers[i] != TCP_TIMER_HANDLE_INVALID)
@@ -493,7 +493,7 @@ format_tcp_timers (u8 * s, va_list * args)
 	s = format (s, "%s,", tcp_conn_timers[i]);
     }
 
-  if (last > 0)
+  if (last >= 0)
     s = format (s, "%s]", tcp_conn_timers[i]);
   else
     s = format (s, "]");
@@ -526,19 +526,19 @@ format_tcp_vars (u8 * s, va_list * args)
   s = format (s, " snd_wnd %u rcv_wnd %u snd_wl1 %u snd_wl2 %u\n",
 	      tc->snd_wnd, tc->rcv_wnd, tc->snd_wl1 - tc->irs,
 	      tc->snd_wl2 - tc->iss);
-  s = format (s, " flight size %u send space %u rcv_wnd available %d\n",
-	      tcp_flight_size (tc), tcp_snd_space (tc),
-	      tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las));
+  s = format (s, " flight size %u send space %u rcv_wnd_av %d\n",
+	      tcp_flight_size (tc), tcp_available_snd_space (tc),
+	      tcp_rcv_wnd_available (tc));
   s = format (s, " cong %U ", format_tcp_congestion_status, tc);
   s = format (s, "cwnd %u ssthresh %u rtx_bytes %u bytes_acked %u\n",
-	      tc->cwnd, tc->ssthresh, tc->rtx_bytes, tc->bytes_acked);
-  s = format (s, " prev_ssthresh %u snd_congestion %u\n", tc->prev_ssthresh,
-	      tc->snd_congestion - tc->iss);
+	      tc->cwnd, tc->ssthresh, tc->snd_rxt_bytes, tc->bytes_acked);
+  s = format (s, " prev_ssthresh %u snd_congestion %u dupack %u\n",
+	      tc->prev_ssthresh, tc->snd_congestion - tc->iss,
+	      tc->rcv_dupacks);
   s = format (s, " rto %u rto_boff %u srtt %u rttvar %u rtt_ts %u ", tc->rto,
 	      tc->rto_boff, tc->srtt, tc->rttvar, tc->rtt_ts);
   s = format (s, "rtt_seq %u\n", tc->rtt_seq);
-  if (scoreboard_first_hole (&tc->sack_sb))
-    s = format (s, " scoreboard: %U\n", format_tcp_scoreboard, &tc->sack_sb);
+  s = format (s, " scoreboard: %U\n", format_tcp_scoreboard, &tc->sack_sb);
   if (vec_len (tc->snd_sacks))
     s = format (s, " sacks tx: %U\n", format_tcp_sacks, tc);
 
@@ -595,9 +595,10 @@ format_tcp_session (u8 * s, va_list * args)
 
   tc = tcp_connection_get (tci, thread_index);
   if (tc)
-    return format (s, "%U", format_tcp_connection, tc, verbose);
+    s = format (s, "%U", format_tcp_connection, tc, verbose);
   else
-    return format (s, "empty");
+    s = format (s, "empty");
+  return s;
 }
 
 u8 *
@@ -643,13 +644,17 @@ format_tcp_scoreboard (u8 * s, va_list * args)
 {
   sack_scoreboard_t *sb = va_arg (*args, sack_scoreboard_t *);
   sack_scoreboard_hole_t *hole;
-  s = format (s, "head %u tail %u snd_una_adv %u\n", sb->head, sb->tail,
-	      sb->snd_una_adv);
-  s = format (s, "sacked_bytes %u last_sacked_bytes %u", sb->sacked_bytes,
-	      sb->last_sacked_bytes);
-  s = format (s, " max_byte_sacked %u\n", sb->max_byte_sacked);
-  s = format (s, "holes:\n");
+  s = format (s, "sacked_bytes %u last_sacked_bytes %u lost_bytes %u\n",
+	      sb->sacked_bytes, sb->last_sacked_bytes, sb->lost_bytes);
+  s = format (s, " last_bytes_delivered %u high_sacked %u snd_una_adv %u\n",
+	      sb->last_bytes_delivered, sb->high_sacked, sb->snd_una_adv);
+  s = format (s, " cur_rxt_hole %u high_rxt %u rescue_rxt %u",
+	      sb->cur_rxt_hole, sb->high_rxt, sb->rescue_rxt);
+
   hole = scoreboard_first_hole (sb);
+  if (hole)
+    s = format (s, "\n head %u tail %u holes:\n", sb->head, sb->tail);
+
   while (hole)
     {
       s = format (s, "%U", format_tcp_sack_hole, hole);
@@ -736,7 +741,7 @@ tcp_snd_space (tcp_connection_t * tc)
   if (tcp_in_recovery (tc))
     {
       tc->snd_nxt = tc->snd_una_max;
-      snd_space = tcp_available_wnd (tc) - tc->rtx_bytes
+      snd_space = tcp_available_wnd (tc) - tc->snd_rxt_bytes
 	- (tc->snd_una_max - tc->snd_congestion);
       if (snd_space <= 0 || (tc->snd_una_max - tc->snd_una) >= tc->snd_wnd)
 	return 0;
@@ -744,8 +749,8 @@ tcp_snd_space (tcp_connection_t * tc)
     }
 
   /* If in fast recovery, send 1 SMSS if wnd allows */
-  if (tcp_in_fastrecovery (tc) && tcp_available_snd_space (tc)
-      && tcp_fastrecovery_sent_1_smss (tc))
+  if (tcp_in_fastrecovery (tc)
+      && tcp_available_snd_space (tc) && !tcp_fastrecovery_sent_1_smss (tc))
     {
       tcp_fastrecovery_1_smss_on (tc);
       return tc->snd_mss;
@@ -761,6 +766,12 @@ tcp_session_send_space (transport_connection_t * trans_conn)
   return tcp_snd_space (tc);
 }
 
+i32
+tcp_rcv_wnd_available (tcp_connection_t * tc)
+{
+  return (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
+}
+
 u32
 tcp_session_tx_fifo_offset (transport_connection_t * trans_conn)
 {
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index c3ebe22b..071f1ab1 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -34,6 +34,7 @@
 #define TCP_MAX_RX_FIFO_SIZE 	2 << 20
 #define TCP_IW_N_SEGMENTS 	10
 #define TCP_ALWAYS_ACK		0	/**< If on, we always ack */
+#define TCP_USE_SACKS		1	/**< Disable only for testing */
 
 /** TCP FSM state definitions as per RFC793. */
 #define foreach_tcp_fsm_state   \
@@ -94,7 +95,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
 #define TCP_DELACK_TIME         1	/* 0.1s */
 #define TCP_ESTABLISH_TIME      750	/* 75s */
 #define TCP_2MSL_TIME           300	/* 30s */
-#define TCP_CLOSEWAIT_TIME	1	/* 0.1s */
+#define TCP_CLOSEWAIT_TIME	20	/* 0.1s */
 #define TCP_CLEANUP_TIME	5	/* 0.5s Time to wait before cleanup */
 #define TCP_TIMER_PERSIST_MIN	2	/* 0.2s */
 
@@ -157,6 +158,7 @@ typedef struct _sack_scoreboard_hole
   u32 prev;		/**< Index for previous entry in linked list */
   u32 start;		/**< Start sequence number */
   u32 end;		/**< End sequence number */
+  u8 is_lost;		/**< Mark hole as lost */
 } sack_scoreboard_hole_t;
 
 typedef struct _sack_scoreboard
@@ -166,8 +168,13 @@ typedef struct _sack_scoreboard
   u32 tail;				/**< Index of last entry */
   u32 sacked_bytes;			/**< Number of bytes sacked in sb */
   u32 last_sacked_bytes;		/**< Number of bytes last sacked */
+  u32 last_bytes_delivered;		/**< Number of sack bytes delivered */
   u32 snd_una_adv;			/**< Bytes to add to snd_una */
-  u32 max_byte_sacked;			/**< Highest byte acked */
+  u32 high_sacked;			/**< Highest byte sacked (fack) */
+  u32 high_rxt;				/**< Highest retransmitted sequence */
+  u32 rescue_rxt;			/**< Rescue sequence number */
+  u32 lost_bytes;			/**< Bytes lost as per RFC6675 */
+  u32 cur_rxt_hole;			/**< Retransmitting from this hole */
 } sack_scoreboard_t;
 
 typedef enum _tcp_cc_algorithm_type
@@ -211,7 +218,7 @@ typedef struct _tcp_connection
   u32 irs;		/**< initial remote sequence */
 
   /* Options */
-  tcp_options_t opt;		/**< TCP connection options parsed */
+  tcp_options_t rcv_opts;	/**< Rx options for connection */
   tcp_options_t snd_opts;	/**< Tx options for connection */
   u8 snd_opts_len;		/**< Tx options len */
   u8 rcv_wscale;	/**< Window scale to advertise to peer */
@@ -229,8 +236,10 @@ typedef struct _tcp_connection
   u32 cwnd;		/**< Congestion window */
   u32 ssthresh;		/**< Slow-start threshold */
   u32 prev_ssthresh;	/**< ssthresh before congestion */
+  u32 prev_cwnd;	/**< ssthresh before congestion */
   u32 bytes_acked;	/**< Bytes acknowledged by current segment */
-  u32 rtx_bytes;	/**< Retransmitted bytes */
+  u32 snd_rxt_bytes;	/**< Retransmitted bytes */
+  u32 snd_rxt_ts;	/**< Timestamp when first packet is retransmitted */
   u32 tsecr_last_ack;	/**< Timestamp echoed to us in last healthy ACK */
   u32 snd_congestion;	/**< snd_una_max when congestion is detected */
   tcp_cc_algorithm_t *cc_algo;	/**< Congestion control algorithm */
@@ -411,6 +420,7 @@ void tcp_send_syn (tcp_connection_t * tc);
 void tcp_send_fin (tcp_connection_t * tc);
 void tcp_init_mss (tcp_connection_t * tc);
 void tcp_update_snd_mss (tcp_connection_t * tc);
+void tcp_update_rto (tcp_connection_t * tc);
 
 always_inline u32
 tcp_end_seq (tcp_header_t * th, u32 len)
@@ -428,17 +438,39 @@ tcp_end_seq (tcp_header_t * th, u32 len)
 #define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
 #define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
 
+/**
+ * Our estimate of the number of bytes that have left the network
+ */
+always_inline u32
+tcp_bytes_out (const tcp_connection_t * tc)
+{
+  if (tcp_opts_sack_permitted (&tc->rcv_opts))
+    return tc->sack_sb.sacked_bytes + tc->sack_sb.lost_bytes;
+  else
+    return tc->rcv_dupacks * tc->snd_mss;
+}
+
+/**
+ * Our estimate of the number of bytes in flight (pipe size)
+ */
 always_inline u32
 tcp_flight_size (const tcp_connection_t * tc)
 {
   int flight_size;
 
-  flight_size = (int) ((tc->snd_una_max - tc->snd_una) + tc->rtx_bytes)
-    - (tc->rcv_dupacks * tc->snd_mss) /* - tc->sack_sb.sacked_bytes */ ;
+  flight_size = (int) (tc->snd_una_max - tc->snd_una) - tcp_bytes_out (tc)
+    + tc->snd_rxt_bytes;
 
-  /* Happens if we don't clear sacked bytes */
   if (flight_size < 0)
-    return 0;
+    {
+      if (0)
+	clib_warning
+	  ("Negative: %u %u %u dupacks %u sacked bytes %u flags %d",
+	   tc->snd_una_max - tc->snd_una, tcp_bytes_out (tc),
+	   tc->snd_rxt_bytes, tc->rcv_dupacks, tc->sack_sb.sacked_bytes,
+	   tc->rcv_opts.flags);
+      return 0;
+    }
 
   return flight_size;
 }
@@ -481,14 +513,17 @@ tcp_available_snd_space (const tcp_connection_t * tc)
   return available_wnd - flight_size;
 }
 
-u32 tcp_rcv_wnd_available (tcp_connection_t * tc);
+i32 tcp_rcv_wnd_available (tcp_connection_t * tc);
 u32 tcp_snd_space (tcp_connection_t * tc);
 void tcp_update_rcv_wnd (tcp_connection_t * tc);
 
 void tcp_retransmit_first_unacked (tcp_connection_t * tc);
+void tcp_fast_retransmit_no_sack (tcp_connection_t * tc);
+void tcp_fast_retransmit_sack (tcp_connection_t * tc);
 void tcp_fast_retransmit (tcp_connection_t * tc);
-void tcp_cc_congestion (tcp_connection_t * tc);
-void tcp_cc_recover (tcp_connection_t * tc);
+void tcp_cc_init_congestion (tcp_connection_t * tc);
+int tcp_cc_recover (tcp_connection_t * tc);
+void tcp_cc_fastrecovery_exit (tcp_connection_t * tc);
 
 /* Made public for unit testing only */
 void tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end);
@@ -563,16 +598,16 @@ tcp_retransmit_timer_set (tcp_connection_t * tc)
 }
 
 always_inline void
-tcp_retransmit_timer_update (tcp_connection_t * tc)
+tcp_retransmit_timer_reset (tcp_connection_t * tc)
 {
-  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT,
-		    clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+  tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT);
 }
 
 always_inline void
-tcp_retransmit_timer_reset (tcp_connection_t * tc)
+tcp_retransmit_timer_force_update (tcp_connection_t * tc)
 {
-  tcp_timer_reset (tc, TCP_TIMER_RETRANSMIT);
+  tcp_timer_update (tc, TCP_TIMER_RETRANSMIT,
+		    clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
 }
 
 always_inline void
@@ -598,15 +633,43 @@ tcp_persist_timer_reset (tcp_connection_t * tc)
   tcp_timer_reset (tc, TCP_TIMER_PERSIST);
 }
 
+always_inline void
+tcp_retransmit_timer_update (tcp_connection_t * tc)
+{
+  if (tc->snd_una == tc->snd_una_max)
+    {
+      tcp_retransmit_timer_reset (tc);
+      if (tc->snd_wnd < tc->snd_mss)
+	tcp_persist_timer_set (tc);
+    }
+  else
+    tcp_timer_update (tc, TCP_TIMER_RETRANSMIT,
+		      clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+}
+
 always_inline u8
 tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer)
 {
   return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID;
 }
 
+#define tcp_validate_txf_size(_tc, _a) 					\
+  ASSERT(_tc->state != TCP_STATE_ESTABLISHED 				\
+	 || stream_session_tx_fifo_max_dequeue (&_tc->connection) >= _a)
+
 void
 scoreboard_remove_hole (sack_scoreboard_t * sb,
 			sack_scoreboard_hole_t * hole);
+void scoreboard_update_lost (tcp_connection_t * tc, sack_scoreboard_t * sb);
+sack_scoreboard_hole_t *scoreboard_insert_hole (sack_scoreboard_t * sb,
+						u32 prev_index, u32 start,
+						u32 end);
+sack_scoreboard_hole_t *scoreboard_next_rxt_hole (sack_scoreboard_t * sb,
+						  sack_scoreboard_hole_t *
+						  start, u8 have_sent_1_smss,
+						  u8 * can_rescue,
+						  u8 * snd_limited);
+void scoreboard_init_high_rxt (sack_scoreboard_t * sb);
 
 always_inline sack_scoreboard_hole_t *
 scoreboard_get_hole (sack_scoreboard_t * sb, u32 index)
@@ -624,6 +687,14 @@ scoreboard_next_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
   return 0;
 }
 
+always_inline sack_scoreboard_hole_t *
+scoreboard_prev_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
+{
+  if (hole->prev != TCP_INVALID_SACK_HOLE_INDEX)
+    return pool_elt_at_index (sb->holes, hole->prev);
+  return 0;
+}
+
 always_inline sack_scoreboard_hole_t *
 scoreboard_first_hole (sack_scoreboard_t * sb)
 {
@@ -643,15 +714,19 @@ scoreboard_last_hole (sack_scoreboard_t * sb)
 always_inline void
 scoreboard_clear (sack_scoreboard_t * sb)
 {
-  sack_scoreboard_hole_t *hole = scoreboard_first_hole (sb);
+  sack_scoreboard_hole_t *hole;
   while ((hole = scoreboard_first_hole (sb)))
     {
       scoreboard_remove_hole (sb, hole);
     }
   sb->sacked_bytes = 0;
   sb->last_sacked_bytes = 0;
+  sb->last_bytes_delivered = 0;
   sb->snd_una_adv = 0;
-  sb->max_byte_sacked = 0;
+  sb->high_sacked = 0;
+  sb->high_rxt = 0;
+  sb->lost_bytes = 0;
+  sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
 }
 
 always_inline u32
@@ -671,6 +746,7 @@ scoreboard_init (sack_scoreboard_t * sb)
 {
   sb->head = TCP_INVALID_SACK_HOLE_INDEX;
   sb->tail = TCP_INVALID_SACK_HOLE_INDEX;
+  sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
 }
 
 void tcp_rcv_sacks (tcp_connection_t * tc, u32 ack);
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index b4497a3b..3a16cf63 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -393,7 +393,7 @@ typedef enum _tcp_dbg_evt
   DECLARE_ETD(_tc, _e, 4);						\
   ed->data[0] = _seq - _tc->irs;					\
   ed->data[1] = _end - _tc->irs;					\
-  ed->data[2] = _tc->opt.tsval;						\
+  ed->data[2] = _tc->rcv_opts.tsval;					\
   ed->data[3] = _tc->tsval_recent;					\
 }
 
@@ -427,27 +427,27 @@ typedef enum _tcp_dbg_evt
 {									\
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
-    .format = "rtx: snd_nxt %u offset %u snd %u rtx %u",		\
+    .format = "rxt: snd_nxt %u offset %u snd %u rxt %u",		\
     .format_args = "i4i4i4i4",						\
   };									\
   DECLARE_ETD(_tc, _e, 4);						\
   ed->data[0] = _tc->snd_nxt - _tc->iss;				\
   ed->data[1] = offset;							\
   ed->data[2] = n_bytes;						\
-  ed->data[3] = _tc->rtx_bytes;						\
+  ed->data[3] = _tc->snd_rxt_bytes;					\
 }
 
 #define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)			\
 {									\
   ELOG_TYPE_DECLARE (_e) =						\
   {									\
-    .format = "cc: %s wnd %u snd_cong %u rtx_bytes %u",			\
+    .format = "cc: %s wnd %u snd_cong %u rxt_bytes %u",			\
     .format_args = "t4i4i4i4",						\
     .n_enum_strings = 5,						\
     .enum_strings = {                                           	\
-      "fast-rtx",	                                             	\
-      "rtx-timeout",                                                 	\
-      "first-rtx",                                                 	\
+      "fast-rxt",	                                             	\
+      "rxt-timeout",                                                 	\
+      "first-rxt",                                                 	\
       "recovered",							\
       "congestion",							\
     },  								\
@@ -456,7 +456,7 @@ typedef enum _tcp_dbg_evt
   ed->data[0] = _sub_evt;						\
   ed->data[1] = tcp_available_snd_space (_tc);				\
   ed->data[2] = _tc->snd_congestion - _tc->iss;				\
-  ed->data[3] = _tc->rtx_bytes;						\
+  ed->data[3] = _tc->snd_rxt_bytes;					\
 }
 
 #define TCP_EVT_CC_PACK_HANDLER(_tc, ...)				\
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 35bc9094..ff2229b3 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -231,8 +231,8 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to)
 always_inline int
 tcp_segment_check_paws (tcp_connection_t * tc)
 {
-  return tcp_opts_tstamp (&tc->opt) && tc->tsval_recent
-    && timestamp_lt (tc->opt.tsval, tc->tsval_recent);
+  return tcp_opts_tstamp (&tc->rcv_opts) && tc->tsval_recent
+    && timestamp_lt (tc->rcv_opts.tsval, tc->tsval_recent);
 }
 
 /**
@@ -248,10 +248,10 @@ tcp_update_timestamp (tcp_connection_t * tc, u32 seq, u32 seq_end)
    * then the TSval from the segment is copied to TS.Recent;
    * otherwise, the TSval is ignored.
    */
-  if (tcp_opts_tstamp (&tc->opt) && tc->tsval_recent
+  if (tcp_opts_tstamp (&tc->rcv_opts) && tc->tsval_recent
       && seq_leq (seq, tc->rcv_las) && seq_leq (tc->rcv_las, seq_end))
     {
-      tc->tsval_recent = tc->opt.tsval;
+      tc->tsval_recent = tc->rcv_opts.tsval;
       tc->tsval_recent_age = tcp_time_now ();
     }
 }
@@ -272,14 +272,21 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0,
   if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0)))
     return -1;
 
-  if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->opt)))
+  if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->rcv_opts)))
     {
       return -1;
     }
 
   if (tcp_segment_check_paws (tc0))
     {
-      clib_warning ("paws failed");
+      if (CLIB_DEBUG > 2)
+	{
+	  clib_warning ("paws failed\n%U", format_tcp_connection, tc0, 2);
+	  clib_warning ("seq %u seq_end %u ack %u",
+			vnet_buffer (b0)->tcp.seq_number - tc0->irs,
+			vnet_buffer (b0)->tcp.seq_end - tc0->irs,
+			vnet_buffer (b0)->tcp.ack_number - tc0->iss);
+	}
       TCP_EVT_DBG (TCP_EVT_PAWS_FAIL, tc0, vnet_buffer (b0)->tcp.seq_number,
 		   vnet_buffer (b0)->tcp.seq_end);
 
@@ -348,7 +355,6 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0,
   /* If segment in window, save timestamp */
   tcp_update_timestamp (tc0, vnet_buffer (b0)->tcp.seq_number,
 			vnet_buffer (b0)->tcp.seq_end);
-
   return 0;
 }
 
@@ -391,6 +397,12 @@ tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt)
     }
 }
 
+void
+tcp_update_rto (tcp_connection_t * tc)
+{
+  tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX);
+}
+
 /** Update RTT estimate and RTO timer
  *
  * Measure RTT: We have two sources of RTT measurements: TSOPT and ACK
@@ -405,7 +417,7 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack)
   u32 mrtt = 0;
   u8 rtx_acked;
 
-  /* Determine if only rtx bytes are acked. TODO fast retransmit */
+  /* Determine if only rtx bytes are acked. TODO XXX fast retransmit */
   rtx_acked = tc->rto_boff && (tc->bytes_acked <= tc->snd_mss);
 
   /* Karn's rule, part 1. Don't use retransmitted segments to estimate
@@ -418,9 +430,10 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack)
    * snd_una, i.e., the left side of the send window:
    * seq_lt (tc->snd_una, ack). Note: last condition could be dropped, we don't
    * try to update rtt for dupacks */
-  else if (tcp_opts_tstamp (&tc->opt) && tc->opt.tsecr && tc->bytes_acked)
+  else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr
+	   && tc->bytes_acked)
     {
-      mrtt = tcp_time_now () - tc->opt.tsecr;
+      mrtt = tcp_time_now () - tc->rcv_opts.tsecr;
     }
 
   /* Allow measuring of a new RTT */
@@ -436,7 +449,7 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack)
     return 0;
 
   tcp_estimate_rtt (tc, mrtt);
-  tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX);
+  tcp_update_rto (tc);
 
   return 0;
 }
@@ -447,25 +460,46 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack)
 static void
 tcp_dequeue_acked (tcp_connection_t * tc, u32 ack)
 {
-  /* Dequeue the newly ACKed bytes */
-  stream_session_dequeue_drop (&tc->connection, tc->bytes_acked);
+  /* Dequeue the newly ACKed add SACKed bytes */
+  stream_session_dequeue_drop (&tc->connection,
+			       tc->bytes_acked + tc->sack_sb.snd_una_adv);
+
+  tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
 
   /* Update rtt and rto */
   tcp_update_rtt (tc, ack);
+
+  /* If everything has been acked, stop retransmit timer
+   * otherwise update. */
+  tcp_retransmit_timer_update (tc);
 }
 
 /**
- * Check if dupack as per RFC5681 Sec. 2
- *
- * This works only if called before updating snd_wnd.
- * */
-always_inline u8
-tcp_ack_is_dupack (tcp_connection_t * tc, vlib_buffer_t * b, u32 new_snd_wnd)
+ * Check if duplicate ack as per RFC5681 Sec. 2
+ */
+static u8
+tcp_ack_is_dupack (tcp_connection_t * tc, vlib_buffer_t * b, u32 prev_snd_wnd,
+		   u32 prev_snd_una)
 {
-  return ((vnet_buffer (b)->tcp.ack_number == tc->snd_una)
+  return ((vnet_buffer (b)->tcp.ack_number == prev_snd_una)
 	  && seq_gt (tc->snd_una_max, tc->snd_una)
 	  && (vnet_buffer (b)->tcp.seq_end == vnet_buffer (b)->tcp.seq_number)
-	  && (new_snd_wnd == tc->snd_wnd));
+	  && (prev_snd_wnd == tc->snd_wnd));
+}
+
+/**
+ * Checks if ack is a congestion control event.
+ */
+static u8
+tcp_ack_is_cc_event (tcp_connection_t * tc, vlib_buffer_t * b,
+		     u32 prev_snd_wnd, u32 prev_snd_una, u8 * is_dack)
+{
+  /* Check if ack is duplicate. Per RFC 6675, ACKs that SACK new data are
+   * defined to be 'duplicate' */
+  *is_dack = tc->sack_sb.last_sacked_bytes
+    || tcp_ack_is_dupack (tc, b, prev_snd_wnd, prev_snd_una);
+
+  return (*is_dack || tcp_in_cong_recovery (tc));
 }
 
 void
@@ -478,6 +512,10 @@ scoreboard_remove_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
       next = pool_elt_at_index (sb->holes, hole->next);
       next->prev = hole->prev;
     }
+  else
+    {
+      sb->tail = hole->prev;
+    }
 
   if (hole->prev != TCP_INVALID_SACK_HOLE_INDEX)
     {
@@ -489,6 +527,9 @@ scoreboard_remove_hole (sack_scoreboard_t * sb, sack_scoreboard_hole_t * hole)
       sb->head = hole->next;
     }
 
+  if (scoreboard_hole_index (sb, hole) == sb->cur_rxt_hole)
+    sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
+
   pool_put (sb->holes, hole);
 }
 
@@ -527,26 +568,131 @@ scoreboard_insert_hole (sack_scoreboard_t * sb, u32 prev_index,
   return hole;
 }
 
+void
+scoreboard_update_lost (tcp_connection_t * tc, sack_scoreboard_t * sb)
+{
+  sack_scoreboard_hole_t *hole, *prev;
+  u32 bytes = 0, blks = 0;
+
+  sb->lost_bytes = 0;
+  hole = scoreboard_last_hole (sb);
+  if (!hole)
+    return;
+
+  if (seq_gt (sb->high_sacked, hole->end))
+    {
+      bytes = sb->high_sacked - hole->end;
+      blks = 1;
+    }
+
+  while ((prev = scoreboard_prev_hole (sb, hole))
+	 && (bytes < (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss
+	     && blks < TCP_DUPACK_THRESHOLD))
+    {
+      bytes += hole->start - prev->end;
+      blks++;
+      hole = prev;
+    }
+
+  hole = prev;
+  while (hole)
+    {
+      sb->lost_bytes += scoreboard_hole_bytes (hole);
+      hole->is_lost = 1;
+      hole = scoreboard_prev_hole (sb, hole);
+    }
+}
+
+/**
+ * Figure out the next hole to retransmit
+ *
+ * Follows logic proposed in RFC6675 Sec. 4, NextSeg()
+ */
+sack_scoreboard_hole_t *
+scoreboard_next_rxt_hole (sack_scoreboard_t * sb,
+			  sack_scoreboard_hole_t * start,
+			  u8 have_sent_1_smss,
+			  u8 * can_rescue, u8 * snd_limited)
+{
+  sack_scoreboard_hole_t *hole = 0;
+
+  hole = start ? start : scoreboard_first_hole (sb);
+  while (hole && seq_leq (hole->end, sb->high_rxt) && hole->is_lost)
+    hole = scoreboard_next_hole (sb, hole);
+
+  /* Nothing, return */
+  if (!hole)
+    {
+      sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
+      return 0;
+    }
+
+  /* Rule (1): if higher than rxt, less than high_sacked and lost */
+  if (hole->is_lost && seq_lt (hole->start, sb->high_sacked))
+    {
+      sb->cur_rxt_hole = scoreboard_hole_index (sb, hole);
+    }
+  else
+    {
+      /* Rule (2): output takes care of transmitting new data */
+      if (!have_sent_1_smss)
+	{
+	  hole = 0;
+	  sb->cur_rxt_hole = TCP_INVALID_SACK_HOLE_INDEX;
+	}
+      /* Rule (3): if hole not lost */
+      else if (seq_lt (hole->start, sb->high_sacked))
+	{
+	  *snd_limited = 1;
+	  sb->cur_rxt_hole = scoreboard_hole_index (sb, hole);
+	}
+      /* Rule (4): if hole beyond high_sacked */
+      else
+	{
+	  ASSERT (seq_geq (hole->start, sb->high_sacked));
+	  *snd_limited = 1;
+	  *can_rescue = 1;
+	  /* HighRxt MUST NOT be updated */
+	  return 0;
+	}
+    }
+
+  if (hole && seq_lt (sb->high_rxt, hole->start))
+    sb->high_rxt = hole->start;
+
+  return hole;
+}
+
+void
+scoreboard_init_high_rxt (sack_scoreboard_t * sb)
+{
+  sack_scoreboard_hole_t *hole;
+  hole = scoreboard_first_hole (sb);
+  sb->high_rxt = hole->start;
+  sb->cur_rxt_hole = sb->head;
+}
+
 void
 tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
 {
   sack_scoreboard_t *sb = &tc->sack_sb;
   sack_block_t *blk, tmp;
   sack_scoreboard_hole_t *hole, *next_hole, *last_hole, *new_hole;
-  u32 blk_index = 0, old_sacked_bytes, delivered_bytes, hole_index;
+  u32 blk_index = 0, old_sacked_bytes, hole_index;
   int i, j;
 
   sb->last_sacked_bytes = 0;
   sb->snd_una_adv = 0;
   old_sacked_bytes = sb->sacked_bytes;
-  delivered_bytes = 0;
+  sb->last_bytes_delivered = 0;
 
-  if (!tcp_opts_sack (&tc->opt) && sb->head == TCP_INVALID_SACK_HOLE_INDEX)
+  if (!tcp_opts_sack (&tc->rcv_opts)
+      && sb->head == TCP_INVALID_SACK_HOLE_INDEX)
     return;
 
   /* Remove invalid blocks */
-  blk = tc->opt.sacks;
-  while (blk < vec_end (tc->opt.sacks))
+  blk = tc->rcv_opts.sacks;
+  while (blk < vec_end (tc->rcv_opts.sacks))
     {
       if (seq_lt (blk->start, blk->end)
 	  && seq_gt (blk->start, tc->snd_una)
@@ -555,7 +701,7 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
 	  blk++;
 	  continue;
 	}
-      vec_del1 (tc->opt.sacks, blk - tc->opt.sacks);
+      vec_del1 (tc->rcv_opts.sacks, blk - tc->rcv_opts.sacks);
     }
 
   /* Add block for cumulative ack */
@@ -563,20 +709,20 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
     {
       tmp.start = tc->snd_una;
       tmp.end = ack;
-      vec_add1 (tc->opt.sacks, tmp);
+      vec_add1 (tc->rcv_opts.sacks, tmp);
     }
 
-  if (vec_len (tc->opt.sacks) == 0)
+  if (vec_len (tc->rcv_opts.sacks) == 0)
     return;
 
   /* Make sure blocks are ordered */
-  for (i = 0; i < vec_len (tc->opt.sacks); i++)
-    for (j = i + 1; j < vec_len (tc->opt.sacks); j++)
-      if (seq_lt (tc->opt.sacks[j].start, tc->opt.sacks[i].start))
+  for (i = 0; i < vec_len (tc->rcv_opts.sacks); i++)
+    for (j = i + 1; j < vec_len (tc->rcv_opts.sacks); j++)
+      if (seq_lt (tc->rcv_opts.sacks[j].start, tc->rcv_opts.sacks[i].start))
 	{
-	  tmp = tc->opt.sacks[i];
-	  tc->opt.sacks[i] = tc->opt.sacks[j];
-	  tc->opt.sacks[j] = tmp;
+	  tmp = tc->rcv_opts.sacks[i];
+	  tc->rcv_opts.sacks[i] = tc->rcv_opts.sacks[j];
+	  tc->rcv_opts.sacks[j] = tmp;
 	}
 
   if (sb->head == TCP_INVALID_SACK_HOLE_INDEX)
@@ -585,25 +731,25 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
       last_hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX,
 					  tc->snd_una, tc->snd_una_max);
       sb->tail = scoreboard_hole_index (sb, last_hole);
-      tmp = tc->opt.sacks[vec_len (tc->opt.sacks) - 1];
-      sb->max_byte_sacked = tmp.end;
+      tmp = tc->rcv_opts.sacks[vec_len (tc->rcv_opts.sacks) - 1];
+      sb->high_sacked = tmp.end;
     }
   else
     {
       /* If we have holes but snd_una_max is beyond the last hole, update
        * last hole end */
-      tmp = tc->opt.sacks[vec_len (tc->opt.sacks) - 1];
+      tmp = tc->rcv_opts.sacks[vec_len (tc->rcv_opts.sacks) - 1];
       last_hole = scoreboard_last_hole (sb);
-      if (seq_gt (tc->snd_una_max, sb->max_byte_sacked)
+      if (seq_gt (tc->snd_una_max, sb->high_sacked)
 	  && seq_gt (tc->snd_una_max, last_hole->end))
 	last_hole->end = tc->snd_una_max;
     }
 
   /* Walk the holes with the SACK blocks */
   hole = pool_elt_at_index (sb->holes, sb->head);
-  while (hole && blk_index < vec_len (tc->opt.sacks))
+  while (hole && blk_index < vec_len (tc->rcv_opts.sacks))
     {
-      blk = &tc->opt.sacks[blk_index];
+      blk = &tc->rcv_opts.sacks[blk_index];
 
       if (seq_leq (blk->start, hole->start))
 	{
@@ -617,9 +763,9 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
 		{
 		  /* Bytes lost because snd_wnd left edge advances */
 		  if (next_hole && seq_leq (next_hole->start, ack))
-		    delivered_bytes += next_hole->start - hole->end;
+		    sb->last_bytes_delivered += next_hole->start - hole->end;
 		  else
-		    delivered_bytes += ack - hole->end;
+		    sb->last_bytes_delivered += ack - hole->end;
 		}
 	      else
 		{
@@ -633,8 +779,8 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
 		  last_hole = scoreboard_last_hole (sb);
 		  /* keep track of max byte sacked for when the last hole
 		   * is acked */
-		  if (seq_gt (hole->end, sb->max_byte_sacked))
-		    sb->max_byte_sacked = hole->end;
+		  if (seq_gt (hole->end, sb->high_sacked))
+		    sb->high_sacked = hole->end;
 		}
 
 	      /* snd_una needs to be advanced */
@@ -645,12 +791,12 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
 		      sb->snd_una_adv = next_hole->start - ack;
 
 		      /* all these can be delivered */
-		      delivered_bytes += sb->snd_una_adv;
+		      sb->last_bytes_delivered += sb->snd_una_adv;
 		    }
 		  else if (!next_hole)
 		    {
-		      sb->snd_una_adv = sb->max_byte_sacked - ack;
-		      delivered_bytes += sb->snd_una_adv;
+		      sb->snd_una_adv = sb->high_sacked - ack;
+		      sb->last_bytes_delivered += sb->snd_una_adv;
 		    }
 		}
 
@@ -691,28 +837,33 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
 		}
 
 	      blk_index++;
-	      hole = scoreboard_next_hole (sb, hole);
 	    }
-	  else
+	  else if (seq_leq (blk->start, hole->end))
 	    {
 	      sb->sacked_bytes += hole->end - blk->start;
 	      hole->end = blk->start;
-	      hole = scoreboard_next_hole (sb, hole);
 	    }
+
+	  hole = scoreboard_next_hole (sb, hole);
 	}
     }
 
   sb->last_sacked_bytes = sb->sacked_bytes - old_sacked_bytes;
-  sb->sacked_bytes -= delivered_bytes;
+  sb->sacked_bytes -= sb->last_bytes_delivered;
+  scoreboard_update_lost (tc, sb);
 }
 
-/** Update snd_wnd
+/**
+ * Try to update snd_wnd based on feedback received from peer.
  *
- * If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and SND.WL2 =< SEG.ACK)), set
- * SND.WND <- SEG.WND, set SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK */
+ * If successful, and new window is 'effectively' 0, activate persist
+ * timer.
+ */
 static void
 tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd)
 {
+  /* If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and SND.WL2 =< SEG.ACK)), set
+   * SND.WND <- SEG.WND, set SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK */
   if (seq_lt (tc->snd_wl1, seq)
       || (tc->snd_wl1 == seq && seq_leq (tc->snd_wl2, ack)))
     {
@@ -721,138 +872,269 @@ tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd)
       tc->snd_wl2 = ack;
       TCP_EVT_DBG (TCP_EVT_SND_WND, tc);
 
-      /* Set probe timer if we just got 0 wnd */
       if (tc->snd_wnd < tc->snd_mss)
 	{
-	  if (!tcp_timer_is_active (tc, TCP_TIMER_PERSIST))
+	  /* Set persist timer if not set and we just got 0 wnd */
+	  if (!tcp_timer_is_active (tc, TCP_TIMER_PERSIST)
+	      && !tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT))
 	    tcp_persist_timer_set (tc);
 	}
       else
-	tcp_persist_timer_reset (tc);
+	{
+	  tcp_persist_timer_reset (tc);
+	  if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
+	    {
+	      tc->rto_boff = 0;
+	      tcp_update_rto (tc);
+	    }
+	}
     }
 }
 
 void
-tcp_cc_congestion (tcp_connection_t * tc)
+tcp_cc_init_congestion (tcp_connection_t * tc)
 {
-  tc->snd_congestion = tc->snd_nxt;
+  tcp_fastrecovery_on (tc);
+  tc->snd_congestion = tc->snd_una_max;
   tc->cc_algo->congestion (tc);
   TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 4);
 }
 
-void
-tcp_cc_recover (tcp_connection_t * tc)
+static void
+tcp_cc_recovery_exit (tcp_connection_t * tc)
 {
-  /* TODO: check if time to recover was small. It might be that RTO popped
-   * too soon.
-   */
+  /* Deflate rto */
+  tcp_update_rto (tc);
+  tc->rto_boff = 0;
+  tc->snd_rxt_ts = 0;
+  tcp_recovery_off (tc);
+}
 
+void
+tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
+{
   tc->cc_algo->recovered (tc);
+  tc->snd_rxt_bytes = 0;
+  tc->rcv_dupacks = 0;
+  tcp_fastrecovery_off (tc);
+  tcp_fastrecovery_1_smss_off (tc);
+}
 
-  tc->rtx_bytes = 0;
+static void
+tcp_cc_congestion_undo (tcp_connection_t * tc)
+{
+  tc->cwnd = tc->prev_cwnd;
+  tc->ssthresh = tc->prev_ssthresh;
+  tc->snd_nxt = tc->snd_una_max;
   tc->rcv_dupacks = 0;
-  tc->snd_nxt = tc->snd_una;
+  if (tcp_in_recovery (tc))
+    tcp_cc_recovery_exit (tc);
+  ASSERT (tc->rto_boff == 0);
+  /* TODO extend for fastrecovery */
+}
 
-  tc->cc_algo->rcv_ack (tc);
-  tc->tsecr_last_ack = tc->opt.tsecr;
+static u8
+tcp_cc_is_spurious_retransmit (tcp_connection_t * tc)
+{
+  return (tc->snd_rxt_ts
+	  && tcp_opts_tstamp (&tc->rcv_opts)
+	  && timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts));
+}
 
-  tcp_cong_recovery_off (tc);
+int
+tcp_cc_recover (tcp_connection_t * tc)
+{
+  ASSERT (tcp_in_cong_recovery (tc));
+  if (tcp_cc_is_spurious_retransmit (tc))
+    {
+      tcp_cc_congestion_undo (tc);
+      return 1;
+    }
+
+  if (tcp_in_recovery (tc))
+    tcp_cc_recovery_exit (tc);
+  else if (tcp_in_fastrecovery (tc))
+    tcp_cc_fastrecovery_exit (tc);
+
+  ASSERT (tc->rto_boff == 0);
+  ASSERT (!tcp_in_cong_recovery (tc));
 
   TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
+  return 0;
 }
 
 static void
-tcp_cc_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b)
+tcp_cc_update (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+  ASSERT (!tcp_in_cong_recovery (tc));
+
+  /* Congestion avoidance */
+  tc->cc_algo->rcv_ack (tc);
+  tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+
+  /* If a cumulative ack, make sure dupacks is 0 */
+  tc->rcv_dupacks = 0;
+
+  /* When dupacks hits the threshold we only enter fast retransmit if
+   * cumulative ack covers more than snd_congestion. Should snd_una
+   * wrap this test may fail under otherwise valid circumstances.
+   * Therefore, proactively update snd_congestion when wrap detected. */
+  if (PREDICT_FALSE
+      (seq_leq (tc->snd_congestion, tc->snd_una - tc->bytes_acked)
+       && seq_gt (tc->snd_congestion, tc->snd_una)))
+    tc->snd_congestion = tc->snd_una - 1;
+}
+
+static u8
+tcp_should_fastrecover_sack (tcp_connection_t * tc)
 {
-  u8 partial_ack;
-  u32 bytes_advanced;
+  return (TCP_DUPACK_THRESHOLD - 1) * tc->snd_mss < tc->sack_sb.sacked_bytes;
+}
 
-  if (tcp_in_fastrecovery (tc))
+static u8
+tcp_should_fastrecover (tcp_connection_t * tc)
+{
+  return (tc->rcv_dupacks == TCP_DUPACK_THRESHOLD
+	  || tcp_should_fastrecover_sack (tc));
+}
+
+static void
+tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack)
+{
+  /*
+   * Duplicate ACK. Check if we should enter fast recovery, or if already in
+   * it account for the bytes that left the network.
+   */
+  if (is_dack)
     {
-      partial_ack = seq_lt (tc->snd_una, tc->snd_congestion);
-      if (!partial_ack)
+      ASSERT (tc->snd_una != tc->snd_una_max
+	      || tc->sack_sb.last_sacked_bytes);
+      tc->rcv_dupacks++;
+
+      if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD && !tc->bytes_acked)
 	{
-	  /* Clear retransmitted bytes. */
-	  tcp_cc_recover (tc);
+	  ASSERT (tcp_in_fastrecovery (tc));
+	  /* Pure duplicate ack. If some data got acked, it's handled lower */
+	  tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+	  return;
 	}
-      else
+      else if (tcp_should_fastrecover (tc))
 	{
-	  TCP_EVT_DBG (TCP_EVT_CC_PACK, tc);
+	  /* Things are already bad */
+	  if (tcp_in_cong_recovery (tc))
+	    {
+	      tc->rcv_dupacks = 0;
+	      goto partial_ack_test;
+	    }
 
-	  /* Clear retransmitted bytes. XXX should we clear all? */
-	  tc->rtx_bytes = 0;
+	  /* If of of the two conditions lower hold, reset dupacks
+	   * 1) Cumulative ack does not cover more than congestion threshold
+	   * 2) RFC6582 heuristic to avoid multiple fast retransmits
+	   */
+	  if (seq_leq (tc->snd_una, tc->snd_congestion)
+	      || tc->rcv_opts.tsecr != tc->tsecr_last_ack)
+	    {
+	      tc->rcv_dupacks = 0;
+	      return;
+	    }
+
+	  tcp_cc_init_congestion (tc);
+	  tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
 
-	  tc->cc_algo->rcv_cong_ack (tc, TCP_CC_PARTIALACK);
+	  /* The first segment MUST be retransmitted */
+	  tcp_retransmit_first_unacked (tc);
 
-	  /* In case snd_nxt is still in the past and output tries to
-	   * shove some new bytes */
-	  tc->snd_nxt = tc->snd_una_max;
+	  /* Post retransmit update cwnd to ssthresh and account for the
+	   * three segments that have left the network and should've been
+	   * buffered at the receiver XXX */
+	  tc->cwnd = tc->ssthresh + tc->rcv_dupacks * tc->snd_mss;
 
-	  /* XXX need proper RFC6675 support */
-	  if (tc->sack_sb.last_sacked_bytes && !tcp_in_recovery (tc))
+	  /* If cwnd allows, send more data */
+	  if (tcp_opts_sack_permitted (&tc->rcv_opts)
+	      && scoreboard_first_hole (&tc->sack_sb))
 	    {
-	      tcp_fast_retransmit (tc);
+	      scoreboard_init_high_rxt (&tc->sack_sb);
+	      tcp_fast_retransmit_sack (tc);
 	    }
 	  else
 	    {
-	      /* Retransmit first unacked segment */
-	      tcp_retransmit_first_unacked (tc);
+	      tcp_fast_retransmit_no_sack (tc);
 	    }
+
+	  return;
 	}
-    }
-  else
-    {
-      tc->cc_algo->rcv_ack (tc);
-      tc->tsecr_last_ack = tc->opt.tsecr;
-      tc->rcv_dupacks = 0;
-      if (tcp_in_recovery (tc))
+      else if (!tc->bytes_acked
+	       || (tc->bytes_acked && !tcp_in_cong_recovery (tc)))
 	{
-	  bytes_advanced = tc->bytes_acked + tc->sack_sb.snd_una_adv;
-	  tc->rtx_bytes -= clib_min (bytes_advanced, tc->rtx_bytes);
-	  tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX);
-	  if (seq_geq (tc->snd_una, tc->snd_congestion))
-	    {
-	      tc->rtx_bytes = 0;
-	      tcp_recovery_off (tc);
-	    }
+	  tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+	  return;
 	}
+      else
+	goto partial_ack;
     }
-}
 
-static void
-tcp_cc_rcv_dupack (tcp_connection_t * tc, u32 ack)
-{
-//  ASSERT (seq_geq(tc->snd_una, ack));
+partial_ack_test:
+
+  if (!tc->bytes_acked)
+    return;
+
+partial_ack:
+  /*
+   * Legitimate ACK. 1) See if we can exit recovery
+   */
+  /* XXX limit this only to first partial ack? */
+  tcp_retransmit_timer_update (tc);
 
-  tc->rcv_dupacks++;
-  if (tc->rcv_dupacks == TCP_DUPACK_THRESHOLD)
+  if (seq_geq (tc->snd_una, tc->snd_congestion))
     {
-      /* RFC6582 NewReno heuristic to avoid multiple fast retransmits */
-      if (tc->opt.tsecr != tc->tsecr_last_ack)
-	{
-	  tc->rcv_dupacks = 0;
-	  return;
-	}
+      /* If spurious return, we've already updated everything */
+      if (tcp_cc_recover (tc))
+	return;
+
+      tc->snd_nxt = tc->snd_una_max;
 
-      tcp_fastrecovery_on (tc);
+      /* Treat as congestion avoidance ack */
+      tc->cc_algo->rcv_ack (tc);
+      tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+      return;
+    }
+
+  /*
+   * Legitimate ACK. 2) If PARTIAL ACK try to retransmit
+   */
+  TCP_EVT_DBG (TCP_EVT_CC_PACK, tc);
+
+  /* RFC6675: If the incoming ACK is a cumulative acknowledgment,
+   * reset dupacks to 0 */
+  tc->rcv_dupacks = 0;
 
-      /* Handle congestion and dupack */
-      tcp_cc_congestion (tc);
-      tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+  tcp_retransmit_first_unacked (tc);
 
-      tcp_fast_retransmit (tc);
+  /* Post RTO timeout don't try anything fancy */
+  if (tcp_in_recovery (tc))
+    return;
 
-      /* Post retransmit update cwnd to ssthresh and account for the
-       * three segments that have left the network and should've been
-       * buffered at the receiver */
-      tc->cwnd = tc->ssthresh + TCP_DUPACK_THRESHOLD * tc->snd_mss;
+  /* Remove retransmitted bytes that have been delivered */
+  if (tc->sack_sb.last_bytes_delivered
+      && seq_gt (tc->sack_sb.high_rxt, tc->snd_una))
+    {
+      /* If we have sacks and we haven't gotten an ack beyond high_rxt,
+       * remove sacked bytes delivered */
+      tc->snd_rxt_bytes -= tc->sack_sb.last_bytes_delivered;
     }
-  else if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD)
+  else
     {
-      ASSERT (tcp_in_fastrecovery (tc));
-
-      tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
+      /* Either all retransmitted holes have been acked, or we're
+       * "in the blind" and retransmitting segment by segment */
+      tc->snd_rxt_bytes = 0;
     }
+
+  tc->cc_algo->rcv_cong_ack (tc, TCP_CC_PARTIALACK);
+
+  /*
+   * Since this was a partial ack, try to retransmit some more data
+   */
+  tcp_fast_retransmit (tc);
 }
 
 void
@@ -862,14 +1144,18 @@ tcp_cc_init (tcp_connection_t * tc)
   tc->cc_algo->init (tc);
 }
 
+/**
+ * Process incoming ACK
+ */
 static int
 tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b,
 	     tcp_header_t * th, u32 * next, u32 * error)
 {
-  u32 new_snd_wnd;
+  u32 prev_snd_wnd, prev_snd_una;
+  u8 is_dack;
 
   /* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) */
-  if (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt))
+  if (PREDICT_FALSE (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
     {
       /* If we have outstanding data and this is within the window, accept it,
        * probably retransmit has timed out. Otherwise ACK segment and then
@@ -892,7 +1178,7 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b,
     }
 
   /* If old ACK, probably it's an old dupack */
-  if (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
+  if (PREDICT_FALSE (seq_lt (vnet_buffer (b)->tcp.ack_number, tc->snd_una)))
     {
       *error = TCP_ERROR_ACK_OLD;
       TCP_EVT_DBG (TCP_EVT_ACK_RCV_ERR, tc, 1,
@@ -900,54 +1186,50 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b,
       if (tcp_in_fastrecovery (tc) && tc->rcv_dupacks == TCP_DUPACK_THRESHOLD)
 	{
 	  TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc);
-	  tcp_cc_rcv_dupack (tc, vnet_buffer (b)->tcp.ack_number);
+	  tcp_cc_handle_event (tc, 1);
 	}
       /* Don't drop yet */
       return 0;
     }
 
-  if (tcp_opts_sack_permitted (&tc->opt))
-    tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number);
-
-  new_snd_wnd = clib_net_to_host_u16 (th->window) << tc->snd_wscale;
-
-  if (tcp_ack_is_dupack (tc, b, new_snd_wnd))
-    {
-      TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc, 1);
-      tcp_cc_rcv_dupack (tc, vnet_buffer (b)->tcp.ack_number);
-      *error = TCP_ERROR_ACK_DUP;
-      return -1;
-    }
-
   /*
-   * Valid ACK
+   * Looks okay, process feedback
    */
 
-  tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
-  tc->snd_una = vnet_buffer (b)->tcp.ack_number + tc->sack_sb.snd_una_adv;
+  TCP_EVT_DBG (TCP_EVT_ACK_RCVD, tc);
+
+  if (tcp_opts_sack_permitted (&tc->rcv_opts))
+    tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number);
 
-  /* Dequeue ACKed data and update RTT */
-  tcp_dequeue_acked (tc, vnet_buffer (b)->tcp.ack_number);
+  prev_snd_wnd = tc->snd_wnd;
+  prev_snd_una = tc->snd_una;
   tcp_update_snd_wnd (tc, vnet_buffer (b)->tcp.seq_number,
-		      vnet_buffer (b)->tcp.ack_number, new_snd_wnd);
+		      vnet_buffer (b)->tcp.ack_number,
+		      clib_net_to_host_u16 (th->window) << tc->snd_wscale);
+  tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
+  tc->snd_una = vnet_buffer (b)->tcp.ack_number + tc->sack_sb.snd_una_adv;
+  tcp_validate_txf_size (tc, tc->bytes_acked);
 
-  /* If some of our sent bytes have been acked, update cc and retransmit
-   * timer. */
   if (tc->bytes_acked)
-    {
-      TCP_EVT_DBG (TCP_EVT_ACK_RCVD, tc);
+    tcp_dequeue_acked (tc, vnet_buffer (b)->tcp.ack_number);
 
-      /* Updates congestion control (slow start/congestion avoidance) */
-      tcp_cc_rcv_ack (tc, b);
+  /*
+   * Check if we have congestion event
+   */
 
-      /* If everything has been acked, stop retransmit timer
-       * otherwise update. */
-      if (tc->snd_una == tc->snd_una_max)
-	tcp_retransmit_timer_reset (tc);
-      else
-	tcp_retransmit_timer_update (tc);
+  if (tcp_ack_is_cc_event (tc, b, prev_snd_wnd, prev_snd_una, &is_dack))
+    {
+      tcp_cc_handle_event (tc, is_dack);
+      *error = TCP_ERROR_ACK_DUP;
+      TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc, 1);
+      return vnet_buffer (b)->tcp.data_len ? 0 : -1;
     }
 
+  /*
+   * Update congestion control (slow start/congestion avoidance)
+   */
+  tcp_cc_update (tc, b);
+
   return 0;
 }
 
@@ -1059,7 +1341,7 @@ tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
     }
 
   /* Update SACK list if need be */
-  if (tcp_opts_sack_permitted (&tc->opt))
+  if (tcp_opts_sack_permitted (&tc->rcv_opts))
     {
       /* Remove SACK blocks that have been delivered */
       tcp_update_sack_list (tc, tc->rcv_nxt, tc->rcv_nxt);
@@ -1097,7 +1379,7 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
   TCP_EVT_DBG (TCP_EVT_INPUT, tc, 1, data_len, data_len);
 
   /* Update SACK list if in use */
-  if (tcp_opts_sack_permitted (&tc->opt))
+  if (tcp_opts_sack_permitted (&tc->rcv_opts))
     {
       ooo_segment_t *newest;
       u32 start, end;
@@ -1294,7 +1576,6 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       u32 n_left_to_next;
 
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
       while (n_left_from > 0 && n_left_to_next > 0)
 	{
 	  u32 bi0;
@@ -1321,7 +1602,6 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	    }
 
 	  th0 = tcp_buffer_hdr (b0);
-
 	  is_fin = (th0->flags & TCP_FLAG_FIN) != 0;
 
 	  /* SYNs, FINs and data consume sequence numbers */
@@ -1387,7 +1667,6 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
   errors = session_manager_flush_enqueue_events (my_thread_index);
   tcp_established_inc_counter (vm, is_ip4, TCP_ERROR_EVENT_FIFO_FULL, errors);
-
   return from_frame->n_vectors;
 }
 
@@ -1582,17 +1861,17 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  new_tc0->irs = seq0;
 
 	  /* Parse options */
-	  if (tcp_options_parse (tcp0, &new_tc0->opt))
+	  if (tcp_options_parse (tcp0, &new_tc0->rcv_opts))
 	    goto drop;
 
-	  if (tcp_opts_tstamp (&new_tc0->opt))
+	  if (tcp_opts_tstamp (&new_tc0->rcv_opts))
 	    {
-	      new_tc0->tsval_recent = new_tc0->opt.tsval;
+	      new_tc0->tsval_recent = new_tc0->rcv_opts.tsval;
 	      new_tc0->tsval_recent_age = tcp_time_now ();
 	    }
 
-	  if (tcp_opts_wscale (&new_tc0->opt))
-	    new_tc0->snd_wscale = new_tc0->opt.wscale;
+	  if (tcp_opts_wscale (&new_tc0->rcv_opts))
+	    new_tc0->snd_wscale = new_tc0->rcv_opts.wscale;
 
 	  /* No scaling */
 	  new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window);
@@ -1845,7 +2124,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	      /* Initialize session variables */
 	      tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
 	      tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window)
-		<< tc0->opt.wscale;
+		<< tc0->rcv_opts.wscale;
 	      tc0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
 	      tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
 
@@ -1903,13 +2182,21 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
 	      break;
 	    case TCP_STATE_LAST_ACK:
-	      /* The only thing that can arrive in this state is an
+	      /* The only thing that [should] arrive in this state is an
 	       * acknowledgment of our FIN. If our FIN is now acknowledged,
 	       * delete the TCB, enter the CLOSED state, and return. */
 
 	      if (!tcp_rcv_ack_is_acceptable (tc0, b0))
 		goto drop;
 
+	      /* Apparently our FIN was lost */
+	      if (tcp_fin (tcp0))
+		{
+		  /* Don't "make" fin since that increments snd_nxt */
+		  tcp_send_fin (tc0);
+		  goto drop;
+		}
+
 	      tc0->state = TCP_STATE_CLOSED;
 
 	      /* Don't delete the connection/session yet. Instead, wait a
@@ -1929,8 +2216,15 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	       * retransmission of the remote FIN. Acknowledge it, and restart
 	       * the 2 MSL timeout. */
 
-	      /* TODO */
+	      if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0))
+		goto drop;
+
+	      tcp_make_ack (tc0, b0);
+	      tcp_timer_reset (tc0, TCP_TIMER_WAITCLOSE);
+	      tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+
 	      goto drop;
+
 	      break;
 	    default:
 	      ASSERT (0);
@@ -2194,7 +2488,7 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	      goto drop;
 	    }
 
-	  if (tcp_options_parse (th0, &child0->opt))
+	  if (tcp_options_parse (th0, &child0->rcv_opts))
 	    {
 	      goto drop;
 	    }
@@ -2205,14 +2499,14 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
 	  /* RFC1323: TSval timestamps sent on {SYN} and {SYN,ACK}
 	   * segments are used to initialize PAWS. */
-	  if (tcp_opts_tstamp (&child0->opt))
+	  if (tcp_opts_tstamp (&child0->rcv_opts))
 	    {
-	      child0->tsval_recent = child0->opt.tsval;
+	      child0->tsval_recent = child0->rcv_opts.tsval;
 	      child0->tsval_recent_age = tcp_time_now ();
 	    }
 
-	  if (tcp_opts_wscale (&child0->opt))
-	    child0->snd_wscale = child0->opt.wscale;
+	  if (tcp_opts_wscale (&child0->rcv_opts))
+	    child0->snd_wscale = child0->rcv_opts.wscale;
 
 	  /* No scaling */
 	  child0->snd_wnd = clib_net_to_host_u16 (th0->window);
@@ -2477,7 +2771,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 		vlib_add_trace (vm, node, b0, sizeof (*t0));
 	      tcp_set_rx_trace_data (t0, tc0, tcp0, b0, is_ip4);
 	    }
-
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, next0);
 	}
@@ -2600,7 +2893,13 @@ do {                                                       	\
   _(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
     TCP_ERROR_NONE);
   _(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  _(LAST_ACK, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  _(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+    TCP_ERROR_NONE);
   _(LAST_ACK, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  _(TIME_WAIT, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  _(TIME_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+    TCP_ERROR_NONE);
   _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
   _(CLOSED, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED);
 #undef _
diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c
index 3525f4e5..c66250e4 100644
--- a/src/vnet/tcp/tcp_newreno.c
+++ b/src/vnet/tcp/tcp_newreno.c
@@ -51,9 +51,23 @@ newreno_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type)
     }
   else if (ack_type == TCP_CC_PARTIALACK)
     {
-      tc->cwnd -= tc->bytes_acked;
-      if (tc->bytes_acked > tc->snd_mss)
-	tc->bytes_acked += tc->snd_mss;
+      /* RFC 6582 Sec. 3.2 */
+      if (!tcp_opts_sack_permitted (&tc->rcv_opts))
+	{
+	  /* Deflate the congestion window by the amount of new data
+	   * acknowledged by the Cumulative Acknowledgment field.
+	   * If the partial ACK acknowledges at least one SMSS of new data,
+	   * then add back SMSS bytes to the congestion window. This
+	   * artificially inflates the congestion window in order to reflect
+	   * the additional segment that has left the network. This "partial
+	   * window deflation" attempts to ensure that, when fast recovery
+	   * eventually ends, approximately ssthresh amount of data will be
+	   * outstanding in the network.*/
+	  tc->cwnd = (tc->cwnd > tc->bytes_acked) ?
+	    tc->cwnd - tc->bytes_acked : 0;
+	  if (tc->bytes_acked > tc->snd_mss)
+	    tc->cwnd += tc->snd_mss;
+	}
     }
 }
 
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 49fd6bef..47c94e6d 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -136,10 +136,10 @@ tcp_update_rcv_wnd (tcp_connection_t * tc)
    * Figure out how much space we have available
    */
   available_space = stream_session_max_rx_enqueue (&tc->connection);
-  max_fifo = stream_session_fifo_size (&tc->connection);
+  max_fifo = stream_session_rx_fifo_size (&tc->connection);
 
-  ASSERT (tc->opt.mss < max_fifo);
-  if (available_space < tc->opt.mss && available_space < max_fifo >> 3)
+  ASSERT (tc->rcv_opts.mss < max_fifo);
+  if (available_space < tc->rcv_opts.mss && available_space < max_fifo >> 3)
     available_space = 0;
 
   /*
@@ -276,8 +276,11 @@ tcp_make_syn_options (tcp_options_t * opts, u8 wnd_scale)
   opts->tsecr = 0;
   len += TCP_OPTION_LEN_TIMESTAMP;
 
-  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
-  len += TCP_OPTION_LEN_SACK_PERMITTED;
+  if (TCP_USE_SACKS)
+    {
+      opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
+      len += TCP_OPTION_LEN_SACK_PERMITTED;
+    }
 
   /* Align to needed boundary */
   len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
@@ -293,14 +296,14 @@ tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts)
   opts->mss = tc->mss;
   len += TCP_OPTION_LEN_MSS;
 
-  if (tcp_opts_wscale (&tc->opt))
+  if (tcp_opts_wscale (&tc->rcv_opts))
     {
       opts->flags |= TCP_OPTS_FLAG_WSCALE;
       opts->wscale = tc->rcv_wscale;
       len += TCP_OPTION_LEN_WINDOW_SCALE;
     }
 
-  if (tcp_opts_tstamp (&tc->opt))
+  if (tcp_opts_tstamp (&tc->rcv_opts))
     {
       opts->flags |= TCP_OPTS_FLAG_TSTAMP;
       opts->tsval = tcp_time_now ();
@@ -308,7 +311,7 @@ tcp_make_synack_options (tcp_connection_t * tc, tcp_options_t * opts)
       len += TCP_OPTION_LEN_TIMESTAMP;
     }
 
-  if (tcp_opts_sack_permitted (&tc->opt))
+  if (tcp_opts_sack_permitted (&tc->rcv_opts))
     {
       opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
       len += TCP_OPTION_LEN_SACK_PERMITTED;
@@ -326,14 +329,14 @@ tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts)
 
   opts->flags = 0;
 
-  if (tcp_opts_tstamp (&tc->opt))
+  if (tcp_opts_tstamp (&tc->rcv_opts))
     {
       opts->flags |= TCP_OPTS_FLAG_TSTAMP;
       opts->tsval = tcp_time_now ();
       opts->tsecr = tc->tsval_recent;
       len += TCP_OPTION_LEN_TIMESTAMP;
     }
-  if (tcp_opts_sack_permitted (&tc->opt))
+  if (tcp_opts_sack_permitted (&tc->rcv_opts))
     {
       if (vec_len (tc->snd_sacks))
 	{
@@ -395,7 +398,7 @@ tcp_update_snd_mss (tcp_connection_t * tc)
     tcp_make_options (tc, &tc->snd_opts, TCP_STATE_ESTABLISHED);
 
   /* XXX check if MTU has been updated */
-  tc->snd_mss = clib_min (tc->mss, tc->opt.mss) - tc->snd_opts_len;
+  tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
   ASSERT (tc->snd_mss > 0);
 }
 
@@ -406,21 +409,21 @@ tcp_init_mss (tcp_connection_t * tc)
   tcp_update_rcv_mss (tc);
 
   /* TODO cache mss and consider PMTU discovery */
-  tc->snd_mss = clib_min (tc->opt.mss, tc->mss);
+  tc->snd_mss = clib_min (tc->rcv_opts.mss, tc->mss);
 
   if (tc->snd_mss < 45)
     {
       clib_warning ("snd mss is 0");
       /* Assume that at least the min default mss works */
       tc->snd_mss = default_min_mss;
-      tc->opt.mss = default_min_mss;
+      tc->rcv_opts.mss = default_min_mss;
     }
 
   /* We should have enough space for 40 bytes of options */
   ASSERT (tc->snd_mss > 45);
 
   /* If we use timestamp option, account for it */
-  if (tcp_opts_tstamp (&tc->opt))
+  if (tcp_opts_tstamp (&tc->rcv_opts))
     tc->snd_mss -= TCP_OPTION_LEN_TIMESTAMP;
 }
 
@@ -879,6 +882,7 @@ tcp_send_fin (tcp_connection_t * tc)
   tcp_make_fin (tc, b);
   tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
   tc->flags |= TCP_CONN_FINSNT;
+  tcp_retransmit_timer_force_update (tc);
   TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
 }
 
@@ -919,10 +923,7 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b,
   if (compute_opts)
     tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
 
-  /* Write pre-computed options */
   tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
-
-  /* Get rcv window to advertise */
   advertise_wnd = tcp_window_to_advertise (tc, next_state);
   flags = tcp_make_state_flags (next_state);
 
@@ -930,26 +931,25 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b,
   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
 			     tc->rcv_nxt, tcp_hdr_opts_len, flags,
 			     advertise_wnd);
-
   opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
 
   ASSERT (opts_write_len == tc->snd_opts_len);
-
-  /* Tag the buffer with the connection index  */
   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
 
+  /*
+   * Update connection variables
+   */
+
   tc->snd_nxt += data_len;
   tc->rcv_las = tc->rcv_nxt;
 
   /* TODO this is updated in output as well ... */
-  if (tc->snd_nxt > tc->snd_una_max)
-    tc->snd_una_max = tc->snd_nxt;
-
-  if (tc->rtt_ts == 0)
+  if (seq_gt (tc->snd_nxt, tc->snd_una_max))
     {
-      tc->rtt_ts = tcp_time_now ();
-      tc->rtt_seq = tc->snd_nxt;
+      tc->snd_una_max = tc->snd_nxt;
+      tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
     }
+
   TCP_EVT_DBG (TCP_EVT_PKTIZE, tc);
 }
 
@@ -987,13 +987,14 @@ tcp_timer_delack_handler (u32 index)
  *
  * @return the number of bytes in the segment or 0 if there's nothing to
  *         retransmit
- * */
+ */
 u32
 tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b,
 				u32 offset, u32 max_bytes)
 {
   vlib_main_t *vm = vlib_get_main ();
-  u32 n_bytes = 0;
+  int n_bytes = 0;
+  u32 start;
 
   tcp_reuse_buffer (vm, b);
 
@@ -1001,15 +1002,16 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b,
   ASSERT (max_bytes != 0);
 
   max_bytes = clib_min (tc->snd_mss, max_bytes);
+  start = tc->snd_una + offset;
 
   /* Start is beyond snd_congestion */
-  if (seq_geq (tc->snd_una + offset, tc->snd_congestion))
+  if (seq_geq (start, tc->snd_congestion))
     goto done;
 
   /* Don't overshoot snd_congestion */
-  if (seq_gt (tc->snd_nxt + max_bytes, tc->snd_congestion))
+  if (seq_gt (start + max_bytes, tc->snd_congestion))
     {
-      max_bytes = tc->snd_congestion - tc->snd_nxt;
+      max_bytes = tc->snd_congestion - start;
       if (max_bytes == 0)
 	goto done;
     }
@@ -1021,15 +1023,12 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b,
   n_bytes = stream_session_peek_bytes (&tc->connection,
 				       vlib_buffer_get_current (b), offset,
 				       max_bytes);
-  ASSERT (n_bytes != 0);
+  ASSERT (n_bytes > 0);
   b->current_length = n_bytes;
   tcp_push_hdr_i (tc, b, tc->state, 0);
 
-  /* Don't count multiple retransmits of the same segment */
-  if (tc->rto_boff > 1)
-    goto done;
-
-  tc->rtx_bytes += n_bytes;
+  if (tcp_in_fastrecovery (tc))
+    tc->snd_rxt_bytes += n_bytes;
 
 done:
   TCP_EVT_DBG (TCP_EVT_CC_RTX, tc, offset, n_bytes);
@@ -1042,18 +1041,15 @@ done:
 static void
 tcp_rtx_timeout_cc (tcp_connection_t * tc)
 {
+  tc->prev_ssthresh = tc->ssthresh;
+  tc->prev_cwnd = tc->cwnd;
+
   /* Cleanly recover cc (also clears up fast retransmit) */
   if (tcp_in_fastrecovery (tc))
-    {
-      tcp_cc_recover (tc);
-    }
-  else
-    {
-      tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
-    }
+    tcp_cc_fastrecovery_exit (tc);
 
   /* Start again from the beginning */
-
+  tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
   tc->cwnd = tcp_loss_wnd (tc);
   tc->snd_congestion = tc->snd_una_max;
   tcp_recovery_on (tc);
@@ -1081,18 +1077,31 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
   /* Make sure timer handle is set to invalid */
   tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
 
+  if (!tcp_in_recovery (tc) && tc->rto_boff > 0
+      && tc->state >= TCP_STATE_ESTABLISHED)
+    {
+      tc->rto_boff = 0;
+      tcp_update_rto (tc);
+    }
+
   /* Increment RTO backoff (also equal to number of retries) */
   tc->rto_boff += 1;
 
   /* Go back to first un-acked byte */
   tc->snd_nxt = tc->snd_una;
 
-  /* Get buffer */
   tcp_get_free_buffer_index (tm, &bi);
   b = vlib_get_buffer (vm, bi);
 
   if (tc->state >= TCP_STATE_ESTABLISHED)
     {
+      /* Lost FIN, retransmit and return */
+      if (tc->flags & TCP_CONN_FINSNT)
+	{
+	  tcp_send_fin (tc);
+	  return;
+	}
+
       /* First retransmit timeout */
       if (tc->rto_boff == 1)
 	tcp_rtx_timeout_cc (tc);
@@ -1102,24 +1111,30 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
 
       TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
 
-      /* Send one segment. No fancy recovery for now! */
+      /* Send one segment */
       n_bytes = tcp_prepare_retransmit_segment (tc, b, 0, tc->snd_mss);
+      /* TODO be less aggressive about this */
       scoreboard_clear (&tc->sack_sb);
 
       if (n_bytes == 0)
 	{
 	  clib_warning ("could not retransmit anything");
+	  clib_warning ("%U", format_tcp_connection, tc, 2);
+
 	  /* Try again eventually */
 	  tcp_retransmit_timer_set (tc);
+	  ASSERT (0 || (tc->rto_boff > 1
+			&& tc->snd_una == tc->snd_congestion));
 	  return;
 	}
+
+      /* For first retransmit, record timestamp (Eifel detection RFC3522) */
+      if (tc->rto_boff == 1)
+	tc->snd_rxt_ts = tcp_time_now ();
     }
-  else
+  /* Retransmit for SYN/SYNACK */
+  else if (tc->state == TCP_STATE_SYN_RCVD || tc->state == TCP_STATE_SYN_SENT)
     {
-      /* Retransmit for SYN/SYNACK */
-      ASSERT (tc->state == TCP_STATE_SYN_RCVD
-	      || tc->state == TCP_STATE_SYN_SENT);
-
       /* Try without increasing RTO a number of times. If this fails,
        * start growing RTO exponentially */
       if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
@@ -1132,6 +1147,12 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
       /* Account for the SYN */
       tc->snd_nxt += 1;
     }
+  else
+    {
+      ASSERT (tc->state == TCP_STATE_CLOSED);
+      clib_warning ("connection closed ...");
+      return;
+    }
 
   if (!is_syn)
     {
@@ -1180,7 +1201,8 @@ tcp_timer_persist_handler (u32 index)
   u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
   vlib_buffer_t *b;
-  u32 bi, n_bytes;
+  u32 bi, old_snd_nxt;
+  int n_bytes = 0;
 
   tc = tcp_connection_get_if_valid (index, thread_index);
 
@@ -1202,13 +1224,15 @@ tcp_timer_persist_handler (u32 index)
   /* Try to force the first unsent segment  */
   tcp_get_free_buffer_index (tm, &bi);
   b = vlib_get_buffer (vm, bi);
+
+  tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
   tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
   n_bytes = stream_session_peek_bytes (&tc->connection,
 				       vlib_buffer_get_current (b),
 				       tc->snd_una_max - tc->snd_una,
 				       tc->snd_mss);
   /* Nothing to send */
-  if (n_bytes == 0)
+  if (n_bytes <= 0)
     {
       clib_warning ("persist found nothing to send");
       tcp_return_buffer (tm);
@@ -1216,7 +1240,13 @@ tcp_timer_persist_handler (u32 index)
     }
 
   b->current_length = n_bytes;
+  ASSERT (tc->snd_nxt == tc->snd_una_max || tc->rto_boff > 1
+	  || tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT));
+
+  /* Allow updating of snd_una_max but don't update snd_nxt */
+  old_snd_nxt = tc->snd_nxt;
   tcp_push_hdr_i (tc, b, tc->state, 0);
+  tc->snd_nxt = old_snd_nxt;
   tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
 
   /* Re-enable persist timer */
@@ -1232,8 +1262,9 @@ tcp_retransmit_first_unacked (tcp_connection_t * tc)
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
   vlib_buffer_t *b;
-  u32 bi, n_bytes;
+  u32 bi, n_bytes, old_snd_nxt;
 
+  old_snd_nxt = tc->snd_nxt;
   tc->snd_nxt = tc->snd_una;
 
   /* Get buffer */
@@ -1244,75 +1275,117 @@ tcp_retransmit_first_unacked (tcp_connection_t * tc)
 
   n_bytes = tcp_prepare_retransmit_segment (tc, b, 0, tc->snd_mss);
   if (n_bytes == 0)
-    goto done;
+    {
+      tcp_return_buffer (tm);
+      goto done;
+    }
 
   tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
 
 done:
-  tc->snd_nxt = tc->snd_una_max;
+  tc->snd_nxt = old_snd_nxt;
 }
 
-sack_scoreboard_hole_t *
-scoreboard_first_rtx_hole (sack_scoreboard_t * sb)
+/**
+ * Do fast retransmit with SACKs
+ */
+void
+tcp_fast_retransmit_sack (tcp_connection_t * tc)
 {
-  sack_scoreboard_hole_t *hole = 0;
-
-//  hole = scoreboard_first_hole (&tc->sack_sb);
-//  if (hole)
-//    {
-//
-//      offset = hole->start - tc->snd_una;
-//      hole_size = hole->end - hole->start;
-//
-//      ASSERT(hole_size);
-//
-//      if (hole_size < max_bytes)
-//      max_bytes = hole_size;
-//    }
-  return hole;
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  vlib_main_t *vm = vlib_get_main ();
+  u32 n_written = 0, offset = 0, max_bytes;
+  vlib_buffer_t *b;
+  sack_scoreboard_hole_t *hole;
+  sack_scoreboard_t *sb;
+  u32 bi, old_snd_nxt;
+  int snd_space;
+  u8 snd_limited = 0, can_rescue = 0;
+
+  ASSERT (tcp_in_fastrecovery (tc));
+  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
+
+  old_snd_nxt = tc->snd_nxt;
+  sb = &tc->sack_sb;
+  snd_space = tcp_available_snd_space (tc);
+
+  hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
+  while (hole && snd_space > 0)
+    {
+      tcp_get_free_buffer_index (tm, &bi);
+      b = vlib_get_buffer (vm, bi);
+
+      hole = scoreboard_next_rxt_hole (sb, hole,
+				       tcp_fastrecovery_sent_1_smss (tc),
+				       &can_rescue, &snd_limited);
+      if (!hole)
+	{
+	  if (!can_rescue || !(seq_lt (sb->rescue_rxt, tc->snd_una)
+			       || seq_gt (sb->rescue_rxt,
+					  tc->snd_congestion)))
+	    break;
+
+	  /* If rescue rxt undefined or less than snd_una then one segment of
+	   * up to SMSS octets that MUST include the highest outstanding
+	   * unSACKed sequence number SHOULD be returned, and RescueRxt set to
+	   * RecoveryPoint. HighRxt MUST NOT be updated.
+	   */
+	  max_bytes = clib_min (tc->snd_mss, snd_space);
+	  offset = tc->snd_congestion - tc->snd_una - max_bytes;
+	  sb->rescue_rxt = tc->snd_congestion;
+	  tc->snd_nxt = tc->snd_una + offset;
+	  tcp_prepare_retransmit_segment (tc, b, offset, max_bytes);
+	  tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+	  break;
+	}
+
+      max_bytes = snd_limited ? tc->snd_mss : hole->end - sb->high_rxt;
+      offset = sb->high_rxt - tc->snd_una;
+      tc->snd_nxt = tc->snd_una + offset;
+      n_written = tcp_prepare_retransmit_segment (tc, b, offset, max_bytes);
+
+      /* Nothing left to retransmit */
+      if (n_written == 0)
+	{
+	  tcp_return_buffer (tm);
+	  break;
+	}
+
+      sb->high_rxt += n_written;
+      tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
+      snd_space -= n_written;
+    }
+
+  /* If window allows, send 1 SMSS of new data */
+  tc->snd_nxt = old_snd_nxt;
 }
 
 /**
- * Do fast retransmit.
+ * Fast retransmit without SACK info
  */
 void
-tcp_fast_retransmit (tcp_connection_t * tc)
+tcp_fast_retransmit_no_sack (tcp_connection_t * tc)
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
-  u32 bi;
+  u32 n_written = 0, offset = 0, bi, old_snd_nxt;
   int snd_space;
-  u32 n_written = 0, offset = 0;
   vlib_buffer_t *b;
-  u8 use_sacks = 0;
 
   ASSERT (tcp_in_fastrecovery (tc));
+  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
 
   /* Start resending from first un-acked segment */
+  old_snd_nxt = tc->snd_nxt;
   tc->snd_nxt = tc->snd_una;
-
   snd_space = tcp_available_snd_space (tc);
-  TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 0);
-
-  /* If we have SACKs use them */
-  if (tcp_opts_sack_permitted (&tc->opt)
-      && scoreboard_first_hole (&tc->sack_sb))
-    use_sacks = 0;
 
   while (snd_space > 0)
     {
       tcp_get_free_buffer_index (tm, &bi);
       b = vlib_get_buffer (vm, bi);
 
-      if (use_sacks)
-	{
-	  scoreboard_first_rtx_hole (&tc->sack_sb);
-	}
-      else
-	{
-	  offset += n_written;
-	}
-
+      offset += n_written;
       n_written = tcp_prepare_retransmit_segment (tc, b, offset, snd_space);
 
       /* Nothing left to retransmit */
@@ -1326,9 +1399,21 @@ tcp_fast_retransmit (tcp_connection_t * tc)
       snd_space -= n_written;
     }
 
-  /* If window allows, send 1 SMSS of new data */
-  if (seq_lt (tc->snd_nxt, tc->snd_congestion))
-    tc->snd_nxt = tc->snd_congestion;
+  /* Restore snd_nxt. If window allows, send 1 SMSS of new data */
+  tc->snd_nxt = old_snd_nxt;
+}
+
+/**
+ * Do fast retransmit
+ */
+void
+tcp_fast_retransmit (tcp_connection_t * tc)
+{
+  if (tcp_opts_sack_permitted (&tc->rcv_opts)
+      && scoreboard_first_hole (&tc->sack_sb))
+    tcp_fast_retransmit_sack (tc);
+  else
+    tcp_fast_retransmit_no_sack (tc);
 }
 
 always_inline u32
@@ -1544,6 +1629,12 @@ tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
 
   tc = (tcp_connection_t *) tconn;
   tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, 0);
+
+  if (tc->rtt_ts == 0)
+    {
+      tc->rtt_ts = tcp_time_now ();
+      tc->rtt_seq = tc->snd_nxt;
+    }
   return 0;
 }
 
diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c
index 2af38484..3f8afa40 100644
--- a/src/vnet/tcp/tcp_test.c
+++ b/src/vnet/tcp/tcp_test.c
@@ -54,7 +54,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
   tc->snd_una = 0;
   tc->snd_una_max = 1000;
   tc->snd_nxt = 1000;
-  tc->opt.flags |= TCP_OPTS_FLAG_SACK;
+  tc->rcv_opts.flags |= TCP_OPTS_FLAG_SACK;
   scoreboard_init (&tc->sack_sb);
 
   for (i = 0; i < 1000 / 100; i++)
@@ -70,9 +70,9 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
 
   for (i = 0; i < 1000 / 200; i++)
     {
-      vec_add1 (tc->opt.sacks, sacks[i * 2]);
+      vec_add1 (tc->rcv_opts.sacks, sacks[i * 2]);
     }
-  tc->opt.n_sack_blocks = vec_len (tc->opt.sacks);
+  tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
   tcp_rcv_sacks (tc, 0);
 
   if (verbose)
@@ -93,18 +93,17 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
   TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv);
   TCP_TEST ((sb->last_sacked_bytes == 400),
 	    "last sacked bytes %d", sb->last_sacked_bytes);
-  TCP_TEST ((sb->max_byte_sacked == 900),
-	    "max byte sacked %u", sb->max_byte_sacked);
+  TCP_TEST ((sb->high_sacked == 900), "max byte sacked %u", sb->high_sacked);
   /*
    * Inject odd blocks
    */
 
-  vec_reset_length (tc->opt.sacks);
+  vec_reset_length (tc->rcv_opts.sacks);
   for (i = 0; i < 1000 / 200; i++)
     {
-      vec_add1 (tc->opt.sacks, sacks[i * 2 + 1]);
+      vec_add1 (tc->rcv_opts.sacks, sacks[i * 2 + 1]);
     }
-  tc->opt.n_sack_blocks = vec_len (tc->opt.sacks);
+  tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
   tcp_rcv_sacks (tc, 0);
 
   if (verbose)
@@ -118,8 +117,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
 	    "first hole start %u end %u", hole->start, hole->end);
   TCP_TEST ((sb->sacked_bytes == 900), "sacked bytes %d", sb->sacked_bytes);
   TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv);
-  TCP_TEST ((sb->max_byte_sacked == 1000),
-	    "max sacked byte %u", sb->max_byte_sacked);
+  TCP_TEST ((sb->high_sacked == 1000), "max sacked byte %u", sb->high_sacked);
   TCP_TEST ((sb->last_sacked_bytes == 500),
 	    "last sacked bytes %d", sb->last_sacked_bytes);
 
@@ -135,8 +133,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
 	    "scoreboard has %d elements", pool_elts (sb->holes));
   TCP_TEST ((sb->snd_una_adv == 900),
 	    "snd_una_adv after ack %u", sb->snd_una_adv);
-  TCP_TEST ((sb->max_byte_sacked == 1000),
-	    "max sacked byte %u", sb->max_byte_sacked);
+  TCP_TEST ((sb->high_sacked == 1000), "max sacked byte %u", sb->high_sacked);
   TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes);
   TCP_TEST ((sb->last_sacked_bytes == 0),
 	    "last sacked bytes %d", sb->last_sacked_bytes);
@@ -145,11 +142,11 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
    * Add new block
    */
 
-  vec_reset_length (tc->opt.sacks);
+  vec_reset_length (tc->rcv_opts.sacks);
 
   block.start = 1200;
   block.end = 1300;
-  vec_add1 (tc->opt.sacks, block);
+  vec_add1 (tc->rcv_opts.sacks, block);
 
   if (verbose)
     vlib_cli_output (vm, "add [1200, 1300]:\n%U", format_tcp_scoreboard, sb);
@@ -171,8 +168,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
 	    "first hole start %u end %u", hole->start, hole->end);
   TCP_TEST ((sb->snd_una_adv == 0),
 	    "snd_una_adv after ack %u", sb->snd_una_adv);
-  TCP_TEST ((sb->max_byte_sacked == 1300),
-	    "max sacked byte %u", sb->max_byte_sacked);
+  TCP_TEST ((sb->high_sacked == 1300), "max sacked byte %u", sb->high_sacked);
   hole = scoreboard_last_hole (sb);
   TCP_TEST ((hole->start == 1300 && hole->end == 1500),
 	    "last hole start %u end %u", hole->start, hole->end);
@@ -182,7 +178,7 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
    * Ack first hole
    */
 
-  vec_reset_length (tc->opt.sacks);
+  vec_reset_length (tc->rcv_opts.sacks);
   tcp_rcv_sacks (tc, 1200);
 
   if (verbose)
@@ -196,8 +192,16 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
 	    "scoreboard has %d elements", pool_elts (sb->holes));
 
   /*
-   * Remove all
+   * Add some more blocks and then remove all
    */
+  vec_reset_length (tc->rcv_opts.sacks);
+  for (i = 0; i < 5; i++)
+    {
+      block.start = i * 100 + 1200;
+      block.end = (i + 1) * 100 + 1200;
+      vec_add1 (tc->rcv_opts.sacks, block);
+    }
+  tcp_rcv_sacks (tc, 1900);
 
   scoreboard_clear (sb);
   if (verbose)
@@ -205,6 +209,9 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
 
   TCP_TEST ((pool_elts (sb->holes) == 0),
 	    "number of holes %d", pool_elts (sb->holes));
+  TCP_TEST ((sb->head == TCP_INVALID_SACK_HOLE_INDEX), "head %u", sb->head);
+  TCP_TEST ((sb->tail == TCP_INVALID_SACK_HOLE_INDEX), "tail %u", sb->tail);
+
   /*
    * Re-inject odd blocks and ack them all
    */
@@ -214,9 +221,9 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
   tc->snd_nxt = 1000;
   for (i = 0; i < 5; i++)
     {
-      vec_add1 (tc->opt.sacks, sacks[i * 2 + 1]);
+      vec_add1 (tc->rcv_opts.sacks, sacks[i * 2 + 1]);
     }
-  tc->opt.n_sack_blocks = vec_len (tc->opt.sacks);
+  tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
   tcp_rcv_sacks (tc, 0);
   if (verbose)
     vlib_cli_output (vm, "sb added odd blocks and ack [0, 950]:\n%U",
@@ -740,6 +747,10 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input)
       TCP_TEST (0, "[%d] peeked %u expected %u", j, data_buf[j], data[j]);
     }
 
+  /* Try to peek beyond the data */
+  rv = svm_fifo_peek (f, svm_fifo_max_dequeue (f), vec_len (data), data_buf);
+  TCP_TEST ((rv == 0), "peeked %u expected 0", rv);
+
   vec_free (data_buf);
   svm_fifo_free (f);
   vec_free (test_data);
@@ -1239,7 +1250,7 @@ tcp_test_session (vlib_main_t * vm, unformat_input_t * input)
       tc0->c_thread_index = 0;
       tc0->c_lcl_ip4.as_u32 = local.as_u32;
       tc0->c_rmt_ip4.as_u32 = remote.as_u32;
-      tc0->opt.mss = 1450;
+      tc0->rcv_opts.mss = 1450;
       tcp_connection_init_vars (tc0);
 
       TCP_EVT_DBG (TCP_EVT_OPEN, tc0);
-- 
cgit 1.2.3-korg


From 215961829c4ae5f738ffcd01a8d1afcab13bd0e2 Mon Sep 17 00:00:00 2001
From: Colin Tregenza Dancer <ctd@metaswitch.com>
Date: Mon, 4 Sep 2017 15:27:49 +0100
Subject: Refork worker thread data structures in parallel (VPP-970)

Change the rebuilding of worker thread clone datastructures
to run in parallel on the workers, instead of serially
on main.

Change-Id: Ib76bcfbef1e51f2399972090f4057be7aaa84e08
Signed-off-by: Colin Tregenza Dancer <ctd@metaswitch.com>
---
 src/vlib/main.h    |   6 +
 src/vlib/threads.c | 333 +++++++++++++++++++++++++++++++----------------------
 src/vlib/threads.h |  11 ++
 3 files changed, 214 insertions(+), 136 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/vlib/main.h b/src/vlib/main.h
index bfa7ddbe..b63c63fa 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -181,6 +181,12 @@ typedef struct vlib_main_t
   /* Attempt to do a post-mortem elog dump */
   int elog_post_mortem_dump;
 
+  /*
+   * Need to call vlib_worker_thread_node_runtime_update before
+   * releasing worker thread barrier. Only valid in vlib_global_main.
+   */
+  int need_vlib_worker_thread_node_runtime_update;
+
 } vlib_main_t;
 
 /* Global main structure. */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 0661d89a..6cd325b3 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -547,10 +547,17 @@ start_workers (vlib_main_t * vm)
       vlib_worker_threads->workers_at_barrier =
 	clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
 
+      vlib_worker_threads->node_reforks_required =
+	clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+
       /* Ask for an initial barrier sync */
       *vlib_worker_threads->workers_at_barrier = 0;
       *vlib_worker_threads->wait_at_barrier = 1;
 
+      /* Without update or refork */
+      *vlib_worker_threads->node_reforks_required = 0;
+      vm->need_vlib_worker_thread_node_runtime_update = 0;
+
       worker_thread_index = 1;
 
       for (i = 0; i < vec_len (tm->registrations); i++)
@@ -568,6 +575,8 @@ start_workers (vlib_main_t * vm)
 
 	  for (k = 0; k < tr->count; k++)
 	    {
+	      vlib_node_t *n;
+
 	      vec_add2 (vlib_worker_threads, w, 1);
 	      if (tr->mheap_size)
 		w->thread_mheap =
@@ -628,10 +637,12 @@ start_workers (vlib_main_t * vm)
 
 	      /* fork nodes */
 	      nm_clone->nodes = 0;
+
+	      /* Allocate all nodes in single block for speed */
+	      n = clib_mem_alloc_no_fail (vec_len (nm->nodes) * sizeof (*n));
+
 	      for (j = 0; j < vec_len (nm->nodes); j++)
 		{
-		  vlib_node_t *n;
-		  n = clib_mem_alloc_no_fail (sizeof (*n));
 		  clib_memcpy (n, nm->nodes[j], sizeof (*n));
 		  /* none of the copied nodes have enqueue rights given out */
 		  n->owner_node_index = VLIB_INVALID_NODE_INDEX;
@@ -639,6 +650,7 @@ start_workers (vlib_main_t * vm)
 		  memset (&n->stats_last_clear, 0,
 			  sizeof (n->stats_last_clear));
 		  vec_add1 (nm_clone->nodes, n);
+		  n++;
 		}
 	      nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
 		vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
@@ -778,17 +790,14 @@ start_workers (vlib_main_t * vm)
 
 VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
 
-void
-vlib_worker_thread_node_runtime_update (void)
+static inline void
+worker_thread_node_runtime_update_internal (void)
 {
   int i, j;
-  vlib_worker_thread_t *w;
   vlib_main_t *vm;
   vlib_node_main_t *nm, *nm_clone;
-  vlib_node_t **old_nodes_clone;
   vlib_main_t *vm_clone;
-  vlib_node_runtime_t *rt, *old_rt;
-  void *oldheap;
+  vlib_node_runtime_t *rt;
   never_inline void
     vlib_node_runtime_sync_stats (vlib_main_t * vm,
 				  vlib_node_runtime_t * r,
@@ -797,13 +806,9 @@ vlib_worker_thread_node_runtime_update (void)
 
   ASSERT (vlib_get_thread_index () == 0);
 
-  if (vec_len (vlib_mains) == 1)
-    return;
-
   vm = vlib_mains[0];
   nm = &vm->node_main;
 
-  ASSERT (vlib_get_thread_index () == 0);
   ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
 
   /*
@@ -833,146 +838,170 @@ vlib_worker_thread_node_runtime_update (void)
 	}
     }
 
-  for (i = 1; i < vec_len (vlib_mains); i++)
-    {
-      vlib_node_runtime_t *rt;
-      w = vlib_worker_threads + i;
-      oldheap = clib_mem_set_heap (w->thread_mheap);
+  /* Per-worker clone rebuilds are now done on each thread */
+}
 
-      vm_clone = vlib_mains[i];
 
-      /* Re-clone error heap */
-      u64 *old_counters = vm_clone->error_main.counters;
-      u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear;
-      clib_memcpy (&vm_clone->error_main, &vm->error_main,
-		   sizeof (vm->error_main));
-      j = vec_len (vm->error_main.counters) - 1;
-      vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES);
-      vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
-      vm_clone->error_main.counters = old_counters;
-      vm_clone->error_main.counters_last_clear = old_counters_all_clear;
+void
+vlib_worker_thread_node_refork (void)
+{
+  vlib_main_t *vm, *vm_clone;
+  vlib_node_main_t *nm, *nm_clone;
+  vlib_node_t **old_nodes_clone;
+  vlib_node_runtime_t *rt, *old_rt;
 
-      nm_clone = &vm_clone->node_main;
-      vec_free (nm_clone->next_frames);
-      nm_clone->next_frames = vec_dup (nm->next_frames);
+  vlib_node_t *new_n_clone;
 
-      for (j = 0; j < vec_len (nm_clone->next_frames); j++)
-	{
-	  vlib_next_frame_t *nf = &nm_clone->next_frames[j];
-	  u32 save_node_runtime_index;
-	  u32 save_flags;
-
-	  save_node_runtime_index = nf->node_runtime_index;
-	  save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
-	  vlib_next_frame_init (nf);
-	  nf->node_runtime_index = save_node_runtime_index;
-	  nf->flags = save_flags;
-	}
+  int j;
 
-      old_nodes_clone = nm_clone->nodes;
-      nm_clone->nodes = 0;
+  vm = vlib_mains[0];
+  nm = &vm->node_main;
+  vm_clone = vlib_get_main ();
+  nm_clone = &vm_clone->node_main;
+
+  /* Re-clone error heap */
+  u64 *old_counters = vm_clone->error_main.counters;
+  u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear;
+
+  clib_memcpy (&vm_clone->error_main, &vm->error_main,
+	       sizeof (vm->error_main));
+  j = vec_len (vm->error_main.counters) - 1;
+  vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES);
+  vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
+  vm_clone->error_main.counters = old_counters;
+  vm_clone->error_main.counters_last_clear = old_counters_all_clear;
+
+  nm_clone = &vm_clone->node_main;
+  vec_free (nm_clone->next_frames);
+  nm_clone->next_frames = vec_dup (nm->next_frames);
+
+  for (j = 0; j < vec_len (nm_clone->next_frames); j++)
+    {
+      vlib_next_frame_t *nf = &nm_clone->next_frames[j];
+      u32 save_node_runtime_index;
+      u32 save_flags;
+
+      save_node_runtime_index = nf->node_runtime_index;
+      save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
+      vlib_next_frame_init (nf);
+      nf->node_runtime_index = save_node_runtime_index;
+      nf->flags = save_flags;
+    }
 
-      /* re-fork nodes */
-      for (j = 0; j < vec_len (nm->nodes); j++)
-	{
-	  vlib_node_t *old_n_clone;
-	  vlib_node_t *new_n, *new_n_clone;
+  old_nodes_clone = nm_clone->nodes;
+  nm_clone->nodes = 0;
 
-	  new_n = nm->nodes[j];
-	  old_n_clone = old_nodes_clone[j];
+  /* re-fork nodes */
 
-	  new_n_clone = clib_mem_alloc_no_fail (sizeof (*new_n_clone));
-	  clib_memcpy (new_n_clone, new_n, sizeof (*new_n));
-	  /* none of the copied nodes have enqueue rights given out */
-	  new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX;
+  /* Allocate all nodes in single block for speed */
+  new_n_clone =
+    clib_mem_alloc_no_fail (vec_len (nm->nodes) * sizeof (*new_n_clone));
+  for (j = 0; j < vec_len (nm->nodes); j++)
+    {
+      vlib_node_t *old_n_clone;
+      vlib_node_t *new_n;
 
-	  if (j >= vec_len (old_nodes_clone))
-	    {
-	      /* new node, set to zero */
-	      memset (&new_n_clone->stats_total, 0,
-		      sizeof (new_n_clone->stats_total));
-	      memset (&new_n_clone->stats_last_clear, 0,
-		      sizeof (new_n_clone->stats_last_clear));
-	    }
-	  else
-	    {
-	      /* Copy stats if the old data is valid */
-	      clib_memcpy (&new_n_clone->stats_total,
-			   &old_n_clone->stats_total,
-			   sizeof (new_n_clone->stats_total));
-	      clib_memcpy (&new_n_clone->stats_last_clear,
-			   &old_n_clone->stats_last_clear,
-			   sizeof (new_n_clone->stats_last_clear));
-
-	      /* keep previous node state */
-	      new_n_clone->state = old_n_clone->state;
-	    }
-	  vec_add1 (nm_clone->nodes, new_n_clone);
-	}
-      /* Free the old node clone */
-      for (j = 0; j < vec_len (old_nodes_clone); j++)
-	clib_mem_free (old_nodes_clone[j]);
-      vec_free (old_nodes_clone);
-
-
-      /* re-clone internal nodes */
-      old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL];
-      nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
-	vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
-
-      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
-      {
-	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-	rt->thread_index = vm_clone->thread_index;
-	/* copy runtime_data, will be overwritten later for existing rt */
-	if (n->runtime_data && n->runtime_data_bytes > 0)
-	  clib_memcpy (rt->runtime_data, n->runtime_data,
-		       clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
-				 n->runtime_data_bytes));
-      }
-
-      for (j = 0; j < vec_len (old_rt); j++)
+      new_n = nm->nodes[j];
+      old_n_clone = old_nodes_clone[j];
+
+      clib_memcpy (new_n_clone, new_n, sizeof (*new_n));
+      /* none of the copied nodes have enqueue rights given out */
+      new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX;
+
+      if (j >= vec_len (old_nodes_clone))
 	{
-	  rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
-	  rt->state = old_rt[j].state;
-	  clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
-		       VLIB_NODE_RUNTIME_DATA_SIZE);
+	  /* new node, set to zero */
+	  memset (&new_n_clone->stats_total, 0,
+		  sizeof (new_n_clone->stats_total));
+	  memset (&new_n_clone->stats_last_clear, 0,
+		  sizeof (new_n_clone->stats_last_clear));
 	}
-
-      vec_free (old_rt);
-
-      /* re-clone input nodes */
-      old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
-      nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
-	vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
-
-      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
-      {
-	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-	rt->thread_index = vm_clone->thread_index;
-	/* copy runtime_data, will be overwritten later for existing rt */
-	if (n->runtime_data && n->runtime_data_bytes > 0)
-	  clib_memcpy (rt->runtime_data, n->runtime_data,
-		       clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
-				 n->runtime_data_bytes));
-      }
-
-      for (j = 0; j < vec_len (old_rt); j++)
+      else
 	{
-	  rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
-	  rt->state = old_rt[j].state;
-	  clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
-		       VLIB_NODE_RUNTIME_DATA_SIZE);
+	  /* Copy stats if the old data is valid */
+	  clib_memcpy (&new_n_clone->stats_total,
+		       &old_n_clone->stats_total,
+		       sizeof (new_n_clone->stats_total));
+	  clib_memcpy (&new_n_clone->stats_last_clear,
+		       &old_n_clone->stats_last_clear,
+		       sizeof (new_n_clone->stats_last_clear));
+
+	  /* keep previous node state */
+	  new_n_clone->state = old_n_clone->state;
 	}
+      vec_add1 (nm_clone->nodes, new_n_clone);
+      new_n_clone++;
+    }
+  /* Free the old node clones */
+  clib_mem_free (old_nodes_clone[0]);
+
+  vec_free (old_nodes_clone);
 
-      vec_free (old_rt);
 
-      nm_clone->processes = vec_dup (nm->processes);
+  /* re-clone internal nodes */
+  old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL];
+  nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
+    vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
 
-      clib_mem_set_heap (oldheap);
+  vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
+  {
+    vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+    rt->thread_index = vm_clone->thread_index;
+    /* copy runtime_data, will be overwritten later for existing rt */
+    if (n->runtime_data && n->runtime_data_bytes > 0)
+      clib_memcpy (rt->runtime_data, n->runtime_data,
+		   clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+			     n->runtime_data_bytes));
+  }
 
-      // vnet_main_fork_fixup (i);
+  for (j = 0; j < vec_len (old_rt); j++)
+    {
+      rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+      rt->state = old_rt[j].state;
+      clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
+		   VLIB_NODE_RUNTIME_DATA_SIZE);
     }
+
+  vec_free (old_rt);
+
+  /* re-clone input nodes */
+  old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
+  nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
+    vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+
+  vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+  {
+    vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+    rt->thread_index = vm_clone->thread_index;
+    /* copy runtime_data, will be overwritten later for existing rt */
+    if (n->runtime_data && n->runtime_data_bytes > 0)
+      clib_memcpy (rt->runtime_data, n->runtime_data,
+		   clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+			     n->runtime_data_bytes));
+  }
+
+  for (j = 0; j < vec_len (old_rt); j++)
+    {
+      rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+      rt->state = old_rt[j].state;
+      clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
+		   VLIB_NODE_RUNTIME_DATA_SIZE);
+    }
+
+  vec_free (old_rt);
+
+  nm_clone->processes = vec_dup (nm->processes);
+}
+
+
+void
+vlib_worker_thread_node_runtime_update (void)
+{
+  /*
+   * Make a note that we need to do a node runtime update
+   * prior to releasing the barrier.
+   */
+  vlib_global_main.need_vlib_worker_thread_node_runtime_update = 1;
 }
 
 u32
@@ -1172,6 +1201,8 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
   if (vec_len (vlib_mains) < 2)
     return;
 
+  ASSERT (vlib_get_thread_index () == 0);
+
   count = vec_len (vlib_mains) - 1;
 
   /* Tolerate recursive calls */
@@ -1180,8 +1211,6 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
 
   vlib_worker_threads[0].barrier_sync_count++;
 
-  ASSERT (vlib_get_thread_index () == 0);
-
   deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
 
   *vlib_worker_threads->wait_at_barrier = 1;
@@ -1199,13 +1228,29 @@ void
 vlib_worker_thread_barrier_release (vlib_main_t * vm)
 {
   f64 deadline;
+  int refork_needed = 0;
 
   if (vec_len (vlib_mains) < 2)
     return;
 
+  ASSERT (vlib_get_thread_index () == 0);
+
   if (--vlib_worker_threads[0].recursion_level > 0)
     return;
 
+  /* Update (all) node runtimes before releasing the barrier, if needed */
+  if (vm->need_vlib_worker_thread_node_runtime_update)
+    {
+      /* Do stats elements on main thread */
+      worker_thread_node_runtime_update_internal ();
+      vm->need_vlib_worker_thread_node_runtime_update = 0;
+
+      /* Do per thread rebuilds in parallel */
+      refork_needed = 1;
+      clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
+			   (vec_len (vlib_mains) - 1));
+    }
+
   deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
 
   *vlib_worker_threads->wait_at_barrier = 0;
@@ -1218,6 +1263,22 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 	  os_panic ();
 	}
     }
+
+  /* Wait for reforks before continuing */
+  if (refork_needed)
+    {
+      deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+
+      while (*vlib_worker_threads->node_reforks_required > 0)
+	{
+	  if (vlib_time_now (vm) > deadline)
+	    {
+	      fformat (stderr, "%s: worker thread refork deadlock\n",
+		       __FUNCTION__);
+	      os_panic ();
+	    }
+	}
+    }
 }
 
 /*
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 572ce77f..c3f1cade 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -102,6 +102,7 @@ typedef struct
   vlib_thread_registration_t *registration;
   u8 *name;
   u64 barrier_sync_count;
+  volatile u32 *node_reforks_required;
 
   long lwp;
   int lcore_id;
@@ -180,6 +181,7 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 
 void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
+void vlib_worker_thread_node_refork (void);
 
 static_always_inline uword
 vlib_get_thread_index (void)
@@ -369,6 +371,15 @@ vlib_worker_thread_barrier_check (void)
       if (CLIB_DEBUG > 0)
 	vm->parked_at_barrier = 0;
       clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+
+      if (PREDICT_FALSE (*vlib_worker_threads->node_reforks_required))
+	{
+	  vlib_worker_thread_node_refork ();
+	  clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
+			       -1);
+	  while (*vlib_worker_threads->node_reforks_required)
+	    ;
+	}
     }
 }
 
-- 
cgit 1.2.3-korg


From eb1ac1732f15f9a99edbeffeb94c525b9ff25c1d Mon Sep 17 00:00:00 2001
From: Colin Tregenza Dancer <ctd@metaswitch.com>
Date: Wed, 6 Sep 2017 20:23:24 +0100
Subject: Recombine diags and minimum barrier open time changes (VPP-968)
 Support logging to both syslog and elog Also include DaveB is_mp_safe fix,
 which had been lost

Change-Id: If82f7969e2f43c63c3fed5b1a0c7434c90c1f380
Signed-off-by: Colin Tregenza Dancer <ctd@metaswitch.com>
---
 src/vlib/main.h              |   9 ++
 src/vlib/threads.c           | 313 +++++++++++++++++++++++++++++++++++++++++--
 src/vlib/threads.h           |  29 +++-
 src/vlibapi/api_common.h     |   6 +
 src/vlibapi/api_shared.c     |  10 +-
 src/vlibmemory/memory_vlib.c |   5 +
 src/vpp/vnet/main.c          |   8 ++
 7 files changed, 367 insertions(+), 13 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/vlib/main.h b/src/vlib/main.h
index 4c0cde3f..fb67334e 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -199,6 +199,15 @@ typedef struct vlib_main_t
    */
   int need_vlib_worker_thread_node_runtime_update;
 
+  /*
+   * Barrier epoch - Set to current time, each time barrier_sync or
+   * barrier_release is called with zero recursion.
+   */
+  f64 barrier_epoch;
+
+  /* Earliest barrier can be closed again */
+  f64 barrier_no_close_before;
+
 } vlib_main_t;
 
 /* Global main structure. */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 6cd325b3..2d9ce84a 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,6 +35,222 @@ vl (void *p)
 vlib_worker_thread_t *vlib_worker_threads;
 vlib_thread_main_t vlib_thread_main;
 
+/*
+ * Barrier tracing can be enabled on a normal build to collect information
+ * on barrier use, including timings and call stacks.  Deliberately not
+ * keyed off CLIB_DEBUG, because that can add significant overhead which
+ * imapacts observed timings.
+ */
+
+#ifdef BARRIER_TRACING
+ /*
+  * Output of barrier tracing can be to syslog or elog as suits
+  */
+#ifdef BARRIER_TRACING_ELOG
+static u32
+elog_id_for_msg_name (const char *msg_name)
+{
+  uword *p, r;
+  static uword *h;
+  u8 *name_copy;
+
+  if (!h)
+    h = hash_create_string (0, sizeof (uword));
+
+  p = hash_get_mem (h, msg_name);
+  if (p)
+    return p[0];
+  r = elog_string (&vlib_global_main.elog_main, "%s", msg_name);
+
+  name_copy = format (0, "%s%c", msg_name, 0);
+
+  hash_set_mem (h, name_copy, r);
+
+  return r;
+}
+
+  /*
+   * elog Barrier trace functions, which are nulled out if BARRIER_TRACING isn't
+   * defined
+   */
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+    /* *INDENT-OFF* */
+    ELOG_TYPE_DECLARE (e) =
+      {
+        .format = "barrier <%d#%s(O:%dus:%dus)(%dus)",
+        .format_args = "i4T4i4i4i4",
+      };
+    /* *INDENT-ON* */
+  struct
+  {
+    u32 count, caller, t_entry, t_open, t_closed;
+  } *ed = 0;
+
+  ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+  ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+  ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+  ed->t_entry = (int) (1000000.0 * t_entry);
+  ed->t_open = (int) (1000000.0 * t_open);
+  ed->t_closed = (int) (1000000.0 * t_closed);
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+    /* *INDENT-OFF* */
+    ELOG_TYPE_DECLARE (e) =
+      {
+        .format = "barrier    <%d(%dus)%s",
+        .format_args = "i4i4T4",
+      };
+    /* *INDENT-ON* */
+  struct
+  {
+    u32 depth, t_entry, caller;
+  } *ed = 0;
+
+  ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+  ed->depth = (int) vlib_worker_threads[0].recursion_level - 1;
+  ed->t_entry = (int) (1000000.0 * t_entry);
+  ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+    /* *INDENT-OFF* */
+    ELOG_TYPE_DECLARE (e) =
+      {
+        .format = "barrier      (%dus)%d>",
+        .format_args = "i4i4",
+      };
+    /* *INDENT-ON* */
+  struct
+  {
+    u32 t_entry, depth;
+  } *ed = 0;
+
+  ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+  ed->t_entry = (int) (1000000.0 * t_entry);
+  ed->depth = (int) vlib_worker_threads[0].recursion_level;
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+    /* *INDENT-OFF* */
+    ELOG_TYPE_DECLARE (e) =
+      {
+        .format = "barrier   (%dus){%d}(C:%dus)#%d>",
+        .format_args = "i4i4i4i4",
+      };
+    /* *INDENT-ON* */
+  struct
+  {
+    u32 t_entry, t_update_main, t_closed_total, count;
+  } *ed = 0;
+
+  ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+  ed->t_entry = (int) (1000000.0 * t_entry);
+  ed->t_update_main = (int) (1000000.0 * t_update_main);
+  ed->t_closed_total = (int) (1000000.0 * t_closed_total);
+  ed->count = (int) vlib_worker_threads[0].barrier_sync_count;
+
+  /* Reset context for next trace */
+  vlib_worker_threads[0].barrier_context = NULL;
+}
+#else
+char barrier_trace[65536];
+char *btp = barrier_trace;
+
+  /*
+   * syslog Barrier trace functions, which are nulled out if BARRIER_TRACING
+   * isn't defined
+   */
+
+
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+  btp += sprintf (btp, "<%u#%s",
+		  (unsigned int) vlib_worker_threads[0].barrier_sync_count,
+		  vlib_worker_threads[0].barrier_caller);
+
+  if (vlib_worker_threads[0].barrier_context)
+    {
+      btp += sprintf (btp, "[%s]", vlib_worker_threads[0].barrier_context);
+
+    }
+
+  btp += sprintf (btp, "(O:%dus:%dus)(%dus):",
+		  (int) (1000000.0 * t_entry),
+		  (int) (1000000.0 * t_open), (int) (1000000.0 * t_closed));
+
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+  btp += sprintf (btp, "<%u(%dus)%s:",
+		  (int) vlib_worker_threads[0].recursion_level - 1,
+		  (int) (1000000.0 * t_entry),
+		  vlib_worker_threads[0].barrier_caller);
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+  btp += sprintf (btp, ":(%dus)%u>", (int) (1000000.0 * t_entry),
+		  (int) vlib_worker_threads[0].recursion_level);
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+
+  btp += sprintf (btp, ":(%dus)", (int) (1000000.0 * t_entry));
+  if (t_update_main > 0)
+    {
+      btp += sprintf (btp, "{%dus}", (int) (1000000.0 * t_update_main));
+    }
+
+  btp += sprintf (btp, "(C:%dus)#%u>",
+		  (int) (1000000.0 * t_closed_total),
+		  (int) vlib_worker_threads[0].barrier_sync_count);
+
+  /* Dump buffer to syslog, and reset for next trace */
+  fformat (stderr, "BTRC %s\n", barrier_trace);
+  btp = barrier_trace;
+  vlib_worker_threads[0].barrier_context = NULL;
+}
+#endif
+#else
+
+  /* Null functions for default case where barrier tracing isn't used */
+static inline void
+barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed)
+{
+}
+
+static inline void
+barrier_trace_sync_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release_rec (f64 t_entry)
+{
+}
+
+static inline void
+barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main)
+{
+}
+#endif
+
 uword
 os_get_nthreads (void)
 {
@@ -558,6 +774,10 @@ start_workers (vlib_main_t * vm)
       *vlib_worker_threads->node_reforks_required = 0;
       vm->need_vlib_worker_thread_node_runtime_update = 0;
 
+      /* init timing */
+      vm->barrier_epoch = 0;
+      vm->barrier_no_close_before = 0;
+
       worker_thread_index = 1;
 
       for (i = 0; i < vec_len (tm->registrations); i++)
@@ -790,6 +1010,7 @@ start_workers (vlib_main_t * vm)
 
 VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
 
+
 static inline void
 worker_thread_node_runtime_update_internal (void)
 {
@@ -993,7 +1214,6 @@ vlib_worker_thread_node_refork (void)
   nm_clone->processes = vec_dup (nm->processes);
 }
 
-
 void
 vlib_worker_thread_node_runtime_update (void)
 {
@@ -1192,10 +1412,29 @@ vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
   vlib_worker_thread_barrier_release (vm);
 }
 
+  /*
+   * Enforce minimum open time to minimize packet loss due to Rx overflow,
+   * based on a test based heuristic that barrier should be open for at least
+   * 3 time as long as it is closed (with an upper bound of 1ms because by that
+   *  point it is probably too late to make a difference)
+   */
+
+#ifndef BARRIER_MINIMUM_OPEN_LIMIT
+#define BARRIER_MINIMUM_OPEN_LIMIT 0.001
+#endif
+
+#ifndef BARRIER_MINIMUM_OPEN_FACTOR
+#define BARRIER_MINIMUM_OPEN_FACTOR 3
+#endif
+
 void
-vlib_worker_thread_barrier_sync (vlib_main_t * vm)
+vlib_worker_thread_barrier_sync_int (vlib_main_t * vm)
 {
   f64 deadline;
+  f64 now;
+  f64 t_entry;
+  f64 t_open;
+  f64 t_closed;
   u32 count;
 
   if (vec_len (vlib_mains) < 2)
@@ -1205,29 +1444,55 @@ vlib_worker_thread_barrier_sync (vlib_main_t * vm)
 
   count = vec_len (vlib_mains) - 1;
 
+  /* Record entry relative to last close */
+  now = vlib_time_now (vm);
+  t_entry = now - vm->barrier_epoch;
+
   /* Tolerate recursive calls */
   if (++vlib_worker_threads[0].recursion_level > 1)
-    return;
+    {
+      barrier_trace_sync_rec (t_entry);
+      return;
+    }
 
   vlib_worker_threads[0].barrier_sync_count++;
 
-  deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+  /* Enforce minimum barrier open time to minimize packet loss */
+  ASSERT (vm->barrier_no_close_before <= (now + BARRIER_MINIMUM_OPEN_LIMIT));
+  while ((now = vlib_time_now (vm)) < vm->barrier_no_close_before)
+    ;
+
+  /* Record time of closure */
+  t_open = now - vm->barrier_epoch;
+  vm->barrier_epoch = now;
+
+  deadline = now + BARRIER_SYNC_TIMEOUT;
 
   *vlib_worker_threads->wait_at_barrier = 1;
   while (*vlib_worker_threads->workers_at_barrier != count)
     {
-      if (vlib_time_now (vm) > deadline)
+      if ((now = vlib_time_now (vm)) > deadline)
 	{
 	  fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
 	  os_panic ();
 	}
     }
+
+  t_closed = now - vm->barrier_epoch;
+
+  barrier_trace_sync (t_entry, t_open, t_closed);
+
 }
 
 void
 vlib_worker_thread_barrier_release (vlib_main_t * vm)
 {
   f64 deadline;
+  f64 now;
+  f64 minimum_open;
+  f64 t_entry;
+  f64 t_closed_total;
+  f64 t_update_main = 0.0;
   int refork_needed = 0;
 
   if (vec_len (vlib_mains) < 2)
@@ -1235,8 +1500,15 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 
   ASSERT (vlib_get_thread_index () == 0);
 
+
+  now = vlib_time_now (vm);
+  t_entry = now - vm->barrier_epoch;
+
   if (--vlib_worker_threads[0].recursion_level > 0)
-    return;
+    {
+      barrier_trace_release_rec (t_entry);
+      return;
+    }
 
   /* Update (all) node runtimes before releasing the barrier, if needed */
   if (vm->need_vlib_worker_thread_node_runtime_update)
@@ -1249,15 +1521,17 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
       refork_needed = 1;
       clib_smp_atomic_add (vlib_worker_threads->node_reforks_required,
 			   (vec_len (vlib_mains) - 1));
+      now = vlib_time_now (vm);
+      t_update_main = now - vm->barrier_epoch;
     }
 
-  deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+  deadline = now + BARRIER_SYNC_TIMEOUT;
 
   *vlib_worker_threads->wait_at_barrier = 0;
 
   while (*vlib_worker_threads->workers_at_barrier > 0)
     {
-      if (vlib_time_now (vm) > deadline)
+      if ((now = vlib_time_now (vm)) > deadline)
 	{
 	  fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
 	  os_panic ();
@@ -1267,11 +1541,13 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
   /* Wait for reforks before continuing */
   if (refork_needed)
     {
-      deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+      now = vlib_time_now (vm);
+
+      deadline = now + BARRIER_SYNC_TIMEOUT;
 
       while (*vlib_worker_threads->node_reforks_required > 0)
 	{
-	  if (vlib_time_now (vm) > deadline)
+	  if ((now = vlib_time_now (vm)) > deadline)
 	    {
 	      fformat (stderr, "%s: worker thread refork deadlock\n",
 		       __FUNCTION__);
@@ -1279,6 +1555,23 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
 	    }
 	}
     }
+
+  t_closed_total = now - vm->barrier_epoch;
+
+  minimum_open = t_closed_total * BARRIER_MINIMUM_OPEN_FACTOR;
+
+  if (minimum_open > BARRIER_MINIMUM_OPEN_LIMIT)
+    {
+      minimum_open = BARRIER_MINIMUM_OPEN_LIMIT;
+    }
+
+  vm->barrier_no_close_before = now + minimum_open;
+
+  /* Record barrier epoch (used to enforce minimum open time) */
+  vm->barrier_epoch = now;
+
+  barrier_trace_release (t_entry, t_closed_total, t_update_main);
+
 }
 
 /*
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index c3f1cade..72340ee1 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -18,6 +18,22 @@
 #include <vlib/main.h>
 #include <linux/sched.h>
 
+/*
+ * To enable detailed tracing of barrier usage, including call stacks and
+ * timings, define BARRIER_TRACING here or in relevant TAGS.  If also used
+ * with CLIB_DEBUG, timing will _not_ be representative of normal code
+ * execution.
+ *
+ */
+
+// #define BARRIER_TRACING 1
+
+/*
+ * Two options for barrier tracing output: syslog & elog.
+ */
+
+// #define BARRIER_TRACING_ELOG 1
+
 extern vlib_main_t **vlib_mains;
 
 void vlib_set_thread_name (char *name);
@@ -102,6 +118,10 @@ typedef struct
   vlib_thread_registration_t *registration;
   u8 *name;
   u64 barrier_sync_count;
+#ifdef BARRIER_TRACING
+  const char *barrier_caller;
+  const char *barrier_context;
+#endif
   volatile u32 *node_reforks_required;
 
   long lwp;
@@ -179,7 +199,14 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
 #define BARRIER_SYNC_TIMEOUT (1.0)
 #endif
 
-void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
+#ifdef BARRIER_TRACING
+#define vlib_worker_thread_barrier_sync(X) {vlib_worker_threads[0].barrier_caller=__FUNCTION__;vlib_worker_thread_barrier_sync_int(X);}
+#else
+#define vlib_worker_thread_barrier_sync(X) vlib_worker_thread_barrier_sync_int(X)
+#endif
+
+
+void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 void vlib_worker_thread_node_refork (void);
 
diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
index 651566ae..bbeccfc2 100644
--- a/src/vlibapi/api_common.h
+++ b/src/vlibapi/api_common.h
@@ -144,6 +144,12 @@ void vl_msg_api_queue_handler (unix_shared_memory_queue_t * q);
 
 void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
 void vl_msg_api_barrier_release (void) __attribute__ ((weak));
+#ifdef BARRIER_TRACING
+void vl_msg_api_barrier_trace_context (const char *context)
+  __attribute__ ((weak));
+#else
+#define vl_msg_api_barrier_trace_context(X)
+#endif
 void vl_msg_api_free (void *);
 void vl_noop_handler (void *mp);
 void vl_msg_api_increment_missing_client_counter (void);
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index 5c1a9940..59dc2375 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -418,7 +418,10 @@ msg_handler_internal (api_main_t * am,
       if (do_it)
 	{
 	  if (!am->is_mp_safe[id])
-	    vl_msg_api_barrier_sync ();
+	    {
+	      vl_msg_api_barrier_trace_context (am->msg_names[id]);
+	      vl_msg_api_barrier_sync ();
+	    }
 	  (*am->msg_handlers[id]) (the_msg);
 	  if (!am->is_mp_safe[id])
 	    vl_msg_api_barrier_release ();
@@ -498,7 +501,10 @@ vl_msg_api_handler_with_vm_node (api_main_t * am,
 	vl_msg_api_trace (am, am->rx_trace, the_msg);
 
       if (!am->is_mp_safe[id])
-	vl_msg_api_barrier_sync ();
+	{
+	  vl_msg_api_barrier_trace_context (am->msg_names[id]);
+	  vl_msg_api_barrier_sync ();
+	}
       (*handler) (the_msg, vm, node);
       if (!am->is_mp_safe[id])
 	vl_msg_api_barrier_release ();
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 688ce604..55a90d64 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1462,6 +1462,7 @@ _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids)
 static clib_error_t *
 rpc_api_hookup (vlib_main_t * vm)
 {
+  api_main_t *am = &api_main;
 #define _(N,n)                                                  \
     vl_msg_api_set_handlers(VL_API_##N, #n,                     \
                            vl_api_##n##_t_handler,              \
@@ -1481,6 +1482,10 @@ rpc_api_hookup (vlib_main_t * vm)
                            sizeof(vl_api_##n##_t), 1 /* do trace */);
   foreach_plugin_trace_msg;
 #undef _
+
+  /* No reason to halt the parade to create a trace record... */
+  am->is_mp_safe[VL_API_TRACE_PLUGIN_MSG_IDS] = 1;
+
   return 0;
 }
 
diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c
index 76371dbe..b330f60f 100644
--- a/src/vpp/vnet/main.c
+++ b/src/vpp/vnet/main.c
@@ -294,6 +294,14 @@ os_exit (int code)
   exit (code);
 }
 
+#ifdef BARRIER_TRACING
+void
+vl_msg_api_barrier_trace_context (const char *context)
+{
+  vlib_worker_threads[0].barrier_context = context;
+}
+#endif
+
 void
 vl_msg_api_barrier_sync (void)
 {
-- 
cgit 1.2.3-korg


From 69128d0209ba6108430dca9cc78ab36a9b1c793e Mon Sep 17 00:00:00 2001
From: Dave Barach <dbarach@cisco.com>
Date: Tue, 26 Sep 2017 10:54:34 -0400
Subject: Add thread-safe event signaller, use RPC where required

Update ping code to use the new function

Change-Id: Ieb753b23f8402cbe5667c22747896784c8ece937
Signed-off-by: Florin Coras <fcoras@cisco.com>
Signed-off-by: Dave Barach <dave@barachs.net>
---
 src/vlib/node_funcs.h        | 23 +++++++++++++++++++++++
 src/vlib/threads.c           | 24 +++++++++++++++++++++++-
 src/vlib/threads.h           | 14 +++++++++++++-
 src/vlibmemory/memory_vlib.c | 13 ++++++++++++-
 src/vnet/ip/ping.c           |  4 ++--
 5 files changed, 73 insertions(+), 5 deletions(-)

(limited to 'src/vlib/threads.h')

diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index 3ae4e541..0734476c 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -965,6 +965,29 @@ vlib_process_signal_event_pointer (vlib_main_t * vm,
   d[0] = data;
 }
 
+/**
+ * Signal event to process from any thread.
+ *
+ * When in doubt, use this.
+ */
+always_inline void
+vlib_process_signal_event_mt (vlib_main_t * vm,
+			      uword node_index, uword type_opaque, uword data)
+{
+  if (vlib_get_thread_index () != 0)
+    {
+      vlib_process_signal_event_mt_args_t args = {
+	.node_index = node_index,
+	.type_opaque = type_opaque,
+	.data = data,
+      };
+      vlib_rpc_call_main_thread (vlib_process_signal_event_mt_helper,
+				 (u8 *) & args, sizeof (args));
+    }
+  else
+    vlib_process_signal_event (vm, node_index, type_opaque, data);
+}
+
 always_inline void
 vlib_process_signal_one_time_event (vlib_main_t * vm,
 				    uword node_index,
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index f9c7043c..be8daa64 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -1767,7 +1767,6 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
   return (fqm - tm->frame_queue_mains);
 }
 
-
 int
 vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb)
 {
@@ -1781,6 +1780,29 @@ vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb)
   return 0;
 }
 
+void
+vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t *
+				     args)
+{
+  ASSERT (vlib_get_thread_index () == 0);
+  vlib_process_signal_event (vlib_get_main (), args->node_index,
+			     args->type_opaque, args->data);
+}
+
+void *rpc_call_main_thread_cb_fn;
+
+void
+vlib_rpc_call_main_thread (void *callback, u8 * args, u32 arg_size)
+{
+  if (rpc_call_main_thread_cb_fn)
+    {
+      void (*fp) (void *, u8 *, u32) = rpc_call_main_thread_cb_fn;
+      (*fp) (callback, args, arg_size);
+    }
+  else
+    clib_warning ("BUG: rpc_call_main_thread_cb_fn NULL!");
+}
+
 clib_error_t *
 threads_init (vlib_main_t * vm)
 {
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 72340ee1..8931584b 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -171,6 +171,13 @@ typedef struct
   frame_queue_nelt_counter_t *frame_queue_histogram;
 } vlib_frame_queue_main_t;
 
+typedef struct
+{
+  uword node_index;
+  uword type_opaque;
+  uword data;
+} vlib_process_signal_event_mt_args_t;
+
 /* Called early, in thread 0's context */
 clib_error_t *vlib_thread_init (vlib_main_t * vm);
 
@@ -510,9 +517,14 @@ vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
 }
 
 u8 *vlib_thread_stack_init (uword thread_index);
-
 int vlib_thread_cb_register (struct vlib_main_t *vm,
 			     vlib_thread_callbacks_t * cb);
+extern void *rpc_call_main_thread_cb_fn;
+
+void
+vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t *
+				     args);
+void vlib_rpc_call_main_thread (void *function, u8 * args, u32 size);
 
 #endif /* included_vlib_threads_h */
 
diff --git a/src/vlibmemory/memory_vlib.c b/src/vlibmemory/memory_vlib.c
index 77959e6d..d305ea61 100644
--- a/src/vlibmemory/memory_vlib.c
+++ b/src/vlibmemory/memory_vlib.c
@@ -1573,6 +1573,17 @@ _(RPC_CALL_REPLY,rpc_call_reply)
 #define foreach_plugin_trace_msg		\
 _(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids)
 
+/*
+ * Set the rpc callback at our earliest possible convenience.
+ * This avoids ordering issues between thread_init() -> start_workers and
+ * an init function which we could define here. If we ever intend to use
+ * vlib all by itself, we can't create a link-time dependency on
+ * an init function here and a typical "call foo_init first"
+ * guitar lick.
+ */
+
+extern void *rpc_call_main_thread_cb_fn;
+
 static clib_error_t *
 rpc_api_hookup (vlib_main_t * vm)
 {
@@ -1599,7 +1610,7 @@ rpc_api_hookup (vlib_main_t * vm)
 
   /* No reason to halt the parade to create a trace record... */
   am->is_mp_safe[VL_API_TRACE_PLUGIN_MSG_IDS] = 1;
-
+  rpc_call_main_thread_cb_fn = vl_api_rpc_call_main_thread;
   return 0;
 }
 
diff --git a/src/vnet/ip/ping.c b/src/vnet/ip/ping.c
index c847e696..0fa537f6 100755
--- a/src/vnet/ip/ping.c
+++ b/src/vnet/ip/ping.c
@@ -97,7 +97,7 @@ signal_ip46_icmp_reply_event (u8 event_type, vlib_buffer_t * b0)
   clib_memcpy (vnet_buffer
 	       (vlib_get_buffer
 		(vm, bi0_copy))->unused, &nowts, sizeof (nowts));
-  vlib_process_signal_event (vm, pr->cli_process_id, event_type, bi0_copy);
+  vlib_process_signal_event_mt (vm, pr->cli_process_id, event_type, bi0_copy);
   return 1;
 }
 
@@ -646,7 +646,7 @@ run_ping_ip46_address (vlib_main_t * vm, u32 table_id, ip4_address_t * pa4,
 	      i = 1 + ping_repeat;
 	      break;
 	    }
-      vec_free(event_data);
+	  vec_free (event_data);
 	}
     }
   vlib_cli_output (vm, "\n");
-- 
cgit 1.2.3-korg