summaryrefslogtreecommitdiffstats
path: root/test
AgeCommit message (Expand)AuthorFilesLines
2021-09-13ip-neighbor: Handle local MAC address change for incomplete adjacenciesNeale Ranns1-1/+23
2021-09-09flowprobe: simplify test logicKlement Sekera1-55/+18
2021-09-08tests: fix DEBUG=attach multiple worker configKlement Sekera1-0/+3
2021-08-31vcl: switch part of the tests to sock apiFlorin Coras1-13/+47
2021-08-26ikev2: check for valid cipher + integrityBenoît Ganne1-1/+1
2021-08-19nat: handle ED in2out ICMP errors with workersMatthew Smith1-0/+45
2021-08-17nat: fix counters increment for output featureAlexander Chernavin1-1/+1
2021-08-13tests docs: upgrade python packagesDave Wallace16-354/+439
2021-08-09ip6-nd: only respond to RS if sending RA is enabledOle Troan1-1/+15
2021-07-29nat: fix ICMP checksum validationKlement Sekera1-0/+44
2021-07-28gre: set proper fib index for unnumbered interfaces, unset fib index before f...Stanislav Zaikin1-7/+9
2021-07-19nat: harden ICMP handlingKlement Sekera1-0/+4
2021-07-14nat: adding support for icmp-error msgFilip Varga1-0/+35
2021-07-14nat: refactoring NAT44ED cfg functionsFilip Varga1-11/+2
2021-07-13fib: Set the GLEAN flag on attached export routes so that the SAS worksNeale Ranns2-1/+14
2021-07-12papi: remove shared memory transportOle Troan2-2/+1
2021-07-09gso: add ipsec tunnel testsMohsin Kazmi1-0/+247
2021-07-06vrrp: Fix for test-all testcases failure.Rajaselvam1-18/+16
2021-06-30ipsec: Derive the TUNNEL_V6 flag from the configured address typesNeale Ranns1-2/+0
2021-06-29ipsec: Fix setting the hi-sequence number for decryptNeale Ranns2-192/+1016
2021-06-28ipsec: Split the SA add_del API into an separate add and delNeale Ranns1-20/+2
2021-06-28ipsec: Enable the extended Sequence Number IPSec tests for GCMNeale Ranns1-2/+72
2021-06-25stats: revert "add a retry mechanism in a symlink test"Beno�t Ganne1-15/+1
2021-06-22nat: don't drop packet with ttl=1 if output featureKlement Sekera1-0/+26
2021-06-22stats: add a retry mechanism in a symlink testArthur de Kerhor1-1/+15
2021-06-17tests: Tests for cross AF routingNeale Ranns1-0/+124
2021-06-17mpls: CLI to show the MPLS forwarding state of an interfaceNeale Ranns1-0/+7
2021-06-16api: remove custom dumpFilip Tehlar1-2/+0
2021-06-14linux-cp: Add tests for tun devicesNeale Ranns3-5/+252
2021-06-14pg: A Tunnel mode variant of a pg interfaceNeale Ranns3-4/+129
2021-06-08vppinfra: pool_free_elts() now supports fixed-size poolsDave Barach1-0/+13
2021-06-03fib: fix flags updates when adding routes with a udp encap pathArthur de Kerhor1-5/+5
2021-05-29udp: add udp decapsulationArthur de Kerhor2-4/+179
2021-05-28svm: fix inadequate atomic load for chunk mgrDave Wallace1-3/+1
2021-05-27ip: allow the 'ip6 enable' on tunnel interface typesNeale Ranns1-2/+78
2021-05-26tests: Fix 'test-wipe'Neale Ranns1-1/+0
2021-05-26fib: During the mfib lookup set the unicast FIB index in the packet so that a...Neale Ranns1-2/+39
2021-05-26tests: move vapi C/C++ test files into src/Damjan Marion6-1562/+9
2021-05-25nat: report correct EI per-user session limitMatthew Smith1-0/+6
2021-05-24tests: skip hoststack iperf3 cut thru test on aarch64Dave Wallace1-1/+3
2021-05-21bfd: use vnet cryptoKlement Sekera1-1/+1
2021-05-21tests: add role to VCLAppWorkerDave Wallace2-8/+9
2021-05-21quic: add extended vcl testFlorin Coras1-0/+42
2021-05-20tests: don't buffer worker stdin/stdoutDave Wallace1-2/+3
2021-05-18tls: fix dtls with no workersFlorin Coras1-2/+2
2021-05-18nat: refactor multiple vrf testsKlement Sekera1-25/+120
2021-05-13tests: move test source to vpp/testDave Wallace92-38/+55932
2021-05-08tests: fix another race condition in punt testsDave Wallace1-5/+7
2021-05-06vcl: add dtls testFlorin Coras1-0/+40
2021-05-06tests: fix race condition in punt testsDave Wallace1-1/+1
ghlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include <vppinfra/bitmap.h>
#include <vppinfra/hash.h>
#include <vppinfra/pool.h>
#include <vppinfra/timing_wheel.h>

void
timing_wheel_init (timing_wheel_t * w, u64 current_cpu_time,
		   f64 cpu_clocks_per_second)
{
  if (w->max_sched_time <= w->min_sched_time)
    {
      w->min_sched_time = 1e-6;
      w->max_sched_time = 1e-3;
    }

  w->cpu_clocks_per_second = cpu_clocks_per_second;
  w->log2_clocks_per_bin =
    max_log2 (w->cpu_clocks_per_second * w->min_sched_time);
  w->log2_bins_per_wheel =
    max_log2 (w->cpu_clocks_per_second * w->max_sched_time);
  w->log2_bins_per_wheel -= w->log2_clocks_per_bin;
  w->log2_clocks_per_wheel = w->log2_bins_per_wheel + w->log2_clocks_per_bin;
  w->bins_per_wheel = 1 << w->log2_bins_per_wheel;
  w->bins_per_wheel_mask = w->bins_per_wheel - 1;

  w->current_time_index = current_cpu_time >> w->log2_clocks_per_bin;

  if (w->n_wheel_elt_time_bits <= 0 ||
      w->n_wheel_elt_time_bits >= STRUCT_BITS_OF (timing_wheel_elt_t,
						  cpu_time_relative_to_base))
    w->n_wheel_elt_time_bits =
      STRUCT_BITS_OF (timing_wheel_elt_t, cpu_time_relative_to_base) - 1;

  w->cpu_time_base = current_cpu_time;
  w->time_index_next_cpu_time_base_update
    =
    w->current_time_index +
    ((u64) 1 << (w->n_wheel_elt_time_bits - w->log2_clocks_per_bin));
}

always_inline uword
get_level_and_relative_time (timing_wheel_t * w, u64 cpu_time,
			     uword * rtime_result)
{
  u64 dt, rtime;
  uword level_index;

  dt = (cpu_time >> w->log2_clocks_per_bin);

  /* Time should always move forward. */
  ASSERT (dt >= w->current_time_index);

  dt -= w->current_time_index;

  /* Find level and offset within level.  Level i has bins of size 2^((i+1)*M) */
  rtime = dt;
  for (level_index = 0; (rtime >> w->log2_bins_per_wheel) != 0; level_index++)
    rtime = (rtime >> w->log2_bins_per_wheel) - 1;

  /* Return offset within level and level index. */
  ASSERT (rtime < w->bins_per_wheel);
  *rtime_result = rtime;
  return level_index;
}

always_inline uword
time_index_to_wheel_index (timing_wheel_t * w, uword level_index, u64 ti)
{
  return (ti >> (level_index * w->log2_bins_per_wheel)) &
    w->bins_per_wheel_mask;
}

/* Find current time on this level. */
always_inline uword
current_time_wheel_index (timing_wheel_t * w, uword level_index)
{
  return time_index_to_wheel_index (w, level_index, w->current_time_index);
}

/* Circular wheel indexing. */
always_inline uword
wheel_add (timing_wheel_t * w, word x)
{
  return x & w->bins_per_wheel_mask;
}

always_inline uword
rtime_to_wheel_index (timing_wheel_t * w, uword level_index, uword rtime)
{
  uword t = current_time_wheel_index (w, level_index);
  return wheel_add (w, t + rtime);
}

static clib_error_t *
validate_level (timing_wheel_t * w, uword level_index, uword * n_elts)
{
  timing_wheel_level_t *level;
  timing_wheel_elt_t *e;
  uword wi;
  clib_error_t *error = 0;

#define _(x)					\
  do {						\
    error = CLIB_ERROR_ASSERT (x);		\
    ASSERT (! error);				\
    if (error) return error;			\
  } while (0)

  level = vec_elt_at_index (w->levels, level_index);
  for (wi = 0; wi < vec_len (level->elts); wi++)
    {
      /* Validate occupancy bitmap. */
      _(clib_bitmap_get_no_check (level->occupancy_bitmap, wi) ==
	(vec_len (level->elts[wi]) > 0));

      *n_elts += vec_len (level->elts[wi]);

      vec_foreach (e, level->elts[wi])
      {
	/* Validate time bin and level. */
	u64 e_time;
	uword e_ti, e_li, e_wi;

	e_time = e->cpu_time_relative_to_base + w->cpu_time_base;
	e_li = get_level_and_relative_time (w, e_time, &e_ti);
	e_wi = rtime_to_wheel_index (w, level_index, e_ti);

	if (e_li == level_index - 1)
	  /* If this element was scheduled on the previous level
	     it must be wrapped. */
	  _(e_ti + current_time_wheel_index (w, level_index - 1)
	    >= w->bins_per_wheel);
	else
	  {
	    _(e_li == level_index);
	    if (e_li == 0)
	      _(e_wi == wi);
	    else
	      _(e_wi == wi || e_wi + 1 == wi || e_wi - 1 == wi);
	  }
      }
    }

#undef _

  return error;
}

void
timing_wheel_validate (timing_wheel_t * w)
{
  uword l;
  clib_error_t *error = 0;
  uword n_elts;

  if (!w->validate)
    return;

  n_elts = pool_elts (w->overflow_pool);
  for (l = 0; l < vec_len (w->levels); l++)
    {
      error = validate_level (w, l, &n_elts);
      if (error)
	clib_error_report (error);
    }
}

always_inline void
free_elt_vector (timing_wheel_t * w, timing_wheel_elt_t * ev)
{
  /* Poison free elements so we never use them by mistake. */
  if (CLIB_DEBUG > 0)
    clib_memset (ev, ~0, vec_len (ev) * sizeof (ev[0]));
  _vec_len (ev) = 0;
  vec_add1 (w->free_elt_vectors, ev);
}

static timing_wheel_elt_t *
insert_helper (timing_wheel_t * w, uword level_index, uword rtime)
{
  timing_wheel_level_t *level;
  timing_wheel_elt_t *e;
  uword wheel_index;

  /* Circular buffer. */
  vec_validate (w->levels, level_index);
  level = vec_elt_at_index (w->levels, level_index);

  if (PREDICT_FALSE (!level->elts))
    {
      uword max = w->bins_per_wheel - 1;
      clib_bitmap_validate (level->occupancy_bitmap, max);
      vec_validate (level->elts, max);
    }

  wheel_index = rtime_to_wheel_index (w, level_index, rtime);

  level->occupancy_bitmap =
    clib_bitmap_ori (level->occupancy_bitmap, wheel_index);

  /* Allocate an elt vector from free list if there is one. */
  if (!level->elts[wheel_index] && vec_len (w->free_elt_vectors))
    level->elts[wheel_index] = vec_pop (w->free_elt_vectors);

  /* Add element to vector for this time bin. */
  vec_add2 (level->elts[wheel_index], e, 1);

  return e;
}

/* Insert user data on wheel at given CPU time stamp. */
static void
timing_wheel_insert_helper (timing_wheel_t * w, u64 insert_cpu_time,
			    u32 user_data)
{
  timing_wheel_elt_t *e;
  u64 dt;
  uword rtime, level_index;

  level_index = get_level_and_relative_time (w, insert_cpu_time, &rtime);

  dt = insert_cpu_time - w->cpu_time_base;
  if (PREDICT_TRUE (0 == (dt >> BITS (e->cpu_time_relative_to_base))))
    {
      e = insert_helper (w, level_index, rtime);
      e->user_data = user_data;
      e->cpu_time_relative_to_base = dt;
      if (insert_cpu_time < w->cached_min_cpu_time_on_wheel)
	w->cached_min_cpu_time_on_wheel = insert_cpu_time;
    }
  else
    {
      /* Time too far in the future: add to overflow vector. */
      timing_wheel_overflow_elt_t *oe;
      pool_get (w->overflow_pool, oe);
      oe->user_data = user_data;
      oe->cpu_time = insert_cpu_time;
    }
}

always_inline uword
elt_is_deleted (timing_wheel_t * w, u32 user_data)
{
  return (hash_elts (w->deleted_user_data_hash) > 0
	  && hash_get (w->deleted_user_data_hash, user_data));
}

static timing_wheel_elt_t *
delete_user_data (timing_wheel_elt_t * elts, u32 user_data)
{
  uword found_match;
  timing_wheel_elt_t *e, *new_elts;

  /* Quickly scan to see if there are any elements to delete
     in this bucket. */
  found_match = 0;
  vec_foreach (e, elts)
  {
    found_match = e->user_data == user_data;
    if (found_match)
      break;
  }
  if (!found_match)
    return elts;

  /* Re-scan to build vector of new elts with matching user_data deleted. */
  new_elts = 0;
  vec_foreach (e, elts)
  {
    if (e->user_data != user_data)
      vec_add1 (new_elts, e[0]);
  }

  vec_free (elts);
  return new_elts;
}

/* Insert user data on wheel at given CPU time stamp. */
void
timing_wheel_insert (timing_wheel_t * w, u64 insert_cpu_time, u32 user_data)
{
  /* Remove previously deleted elements. */
  if (elt_is_deleted (w, user_data))
    {
      timing_wheel_level_t *l;
      uword wi;

      /* Delete elts with given user data so that stale events don't expire. */
      vec_foreach (l, w->levels)
      {
	  /* *INDENT-OFF* */
	  clib_bitmap_foreach (wi, l->occupancy_bitmap, ({
	    l->elts[wi] = delete_user_data (l->elts[wi], user_data);
	    if (vec_len (l->elts[wi]) == 0)
	      l->occupancy_bitmap = clib_bitmap_andnoti (l->occupancy_bitmap, wi);
	  }));
	  /* *INDENT-ON* */
      }

      {
	timing_wheel_overflow_elt_t *oe;
	/* *INDENT-OFF* */
	pool_foreach (oe, w->overflow_pool, ({
	  if (oe->user_data == user_data)
	    pool_put (w->overflow_pool, oe);
	}));
	/* *INDENT-ON* */
      }

      hash_unset (w->deleted_user_data_hash, user_data);
    }

  timing_wheel_insert_helper (w, insert_cpu_time, user_data);
}

void
timing_wheel_delete (timing_wheel_t * w, u32 user_data)
{
  if (!w->deleted_user_data_hash)
    w->deleted_user_data_hash =
      hash_create ( /* capacity */ 0, /* value bytes */ 0);

  hash_set1 (w->deleted_user_data_hash, user_data);
}

/* Returns time of next expiring element. */
u64
timing_wheel_next_expiring_elt_time (timing_wheel_t * w)
{
  timing_wheel_level_t *l;
  timing_wheel_elt_t *e;
  uword li, wi, wi0;
  u32 min_dt;
  u64 min_t;
  uword wrapped = 0;

  min_dt = ~0;
  min_t = ~0ULL;
  vec_foreach (l, w->levels)
  {
    if (!l->occupancy_bitmap)
      continue;

    li = l - w->levels;
    wi0 = wi = current_time_wheel_index (w, li);
    wrapped = 0;
    while (1)
      {
	if (clib_bitmap_get_no_check (l->occupancy_bitmap, wi))
	  {
	    vec_foreach (e, l->elts[wi])
	      min_dt = clib_min (min_dt, e->cpu_time_relative_to_base);

	    if (wrapped && li + 1 < vec_len (w->levels))
	      {
		uword wi1 = current_time_wheel_index (w, li + 1);
		if (l[1].occupancy_bitmap
		    && clib_bitmap_get_no_check (l[1].occupancy_bitmap, wi1))
		  {
		    vec_foreach (e, l[1].elts[wi1])
		    {
		      min_dt =
			clib_min (min_dt, e->cpu_time_relative_to_base);
		    }
		  }
	      }

	    min_t = w->cpu_time_base + min_dt;
	    goto done;
	  }

	wi = wheel_add (w, wi + 1);
	if (wi == wi0)
	  break;

	wrapped = wi != wi + 1;
      }
  }

  {
    timing_wheel_overflow_elt_t *oe;

    if (min_dt != ~0)
      min_t = w->cpu_time_base + min_dt;

    /* *INDENT-OFF* */
    pool_foreach (oe, w->overflow_pool,
		  ({ min_t = clib_min (min_t, oe->cpu_time); }));
    /* *INDENT-ON* */

  done:
    return min_t;
  }
}

static inline void
insert_elt (timing_wheel_t * w, timing_wheel_elt_t * e)
{
  u64 t = w->cpu_time_base + e->cpu_time_relative_to_base;
  timing_wheel_insert_helper (w, t, e->user_data);
}

always_inline u64
elt_cpu_time (timing_wheel_t * w, timing_wheel_elt_t * e)
{
  return w->cpu_time_base + e->cpu_time_relative_to_base;
}

always_inline void
validate_expired_elt (timing_wheel_t * w, timing_wheel_elt_t * e,
		      u64 current_cpu_time)
{
  if (CLIB_DEBUG > 0)
    {
      u64 e_time = elt_cpu_time (w, e);

      /* Verify that element is actually expired. */
      ASSERT ((e_time >> w->log2_clocks_per_bin)
	      <= (current_cpu_time >> w->log2_clocks_per_bin));
    }
}

static u32 *
expire_bin (timing_wheel_t * w,
	    uword level_index,
	    uword wheel_index, u64 advance_cpu_time, u32 * expired_user_data)
{
  timing_wheel_level_t *level = vec_elt_at_index (w->levels, level_index);
  timing_wheel_elt_t *e;
  u32 *x;
  uword i, j, e_len;

  e = vec_elt (level->elts, wheel_index);
  e_len = vec_len (e);

  vec_add2 (expired_user_data, x, e_len);
  for (i = j = 0; i < e_len; i++)
    {
      validate_expired_elt (w, &e[i], advance_cpu_time);
      x[j] = e[i].user_data;

      /* Only advance if elt is not to be deleted. */
      j += !elt_is_deleted (w, e[i].user_data);
    }

  /* Adjust for deleted elts. */
  if (j < e_len)
    _vec_len (expired_user_data) -= e_len - j;

  free_elt_vector (w, e);

  level->elts[wheel_index] = 0;
  clib_bitmap_set_no_check (level->occupancy_bitmap, wheel_index, 0);

  return expired_user_data;
}

/* Called rarely. 32 bit times should only overflow every 4 seconds or so on a fast machine. */
static u32 *
advance_cpu_time_base (timing_wheel_t * w, u32 * expired_user_data)
{
  timing_wheel_level_t *l;
  timing_wheel_elt_t *e;
  u64 delta;

  w->stats.cpu_time_base_advances++;
  delta = ((u64) 1 << w->n_wheel_elt_time_bits);
  w->cpu_time_base += delta;
  w->time_index_next_cpu_time_base_update += delta >> w->log2_clocks_per_bin;

  vec_foreach (l, w->levels)
  {
    uword wi;
      /* *INDENT-OFF* */
      clib_bitmap_foreach (wi, l->occupancy_bitmap, ({
	vec_foreach (e, l->elts[wi])
	  {
	    /* This should always be true since otherwise we would have already expired
	       this element. Note that in the second half of this function we need
               to take care not to place the expired elements ourselves. */
	    ASSERT (e->cpu_time_relative_to_base >= delta);
	    e->cpu_time_relative_to_base -= delta;
	  }
      }));
      /* *INDENT-ON* */
  }

  /* See which overflow elements fit now. */
  {
    timing_wheel_overflow_elt_t *oe;
    /* *INDENT-OFF* */
    pool_foreach (oe, w->overflow_pool, ({
      /* It fits now into 32 bits. */
      if (0 == ((oe->cpu_time - w->cpu_time_base) >> BITS (e->cpu_time_relative_to_base)))
	{
	  u64 ti = oe->cpu_time >> w->log2_clocks_per_bin;
	  if (ti <= w->current_time_index)
	    {
	      /* This can happen when timing wheel is not advanced for a long time
		 (for example when at a gdb breakpoint for a while). */
              /* Note: the ti == w->current_time_index means it is also an expired timer */
	      if (! elt_is_deleted (w, oe->user_data))
		vec_add1 (expired_user_data, oe->user_data);
	    }
	  else
	    timing_wheel_insert_helper (w, oe->cpu_time, oe->user_data);
	  pool_put (w->overflow_pool, oe);
	}
    }));
    /* *INDENT-ON* */
  }
  return expired_user_data;
}

static u32 *
refill_level (timing_wheel_t * w,
	      uword level_index,
	      u64 advance_cpu_time,
	      uword from_wheel_index,
	      uword to_wheel_index, u32 * expired_user_data)
{
  timing_wheel_level_t *level;
  timing_wheel_elt_t *to_insert = w->unexpired_elts_pending_insert;
  u64 advance_time_index = advance_cpu_time >> w->log2_clocks_per_bin;

  vec_validate (w->stats.refills, level_index);
  w->stats.refills[level_index] += 1;

  if (level_index + 1 >= vec_len (w->levels))
    goto done;

  level = vec_elt_at_index (w->levels, level_index + 1);
  if (!level->occupancy_bitmap)
    goto done;

  while (1)
    {
      timing_wheel_elt_t *e, *es;

      if (clib_bitmap_get_no_check
	  (level->occupancy_bitmap, from_wheel_index))
	{
	  es = level->elts[from_wheel_index];
	  level->elts[from_wheel_index] = 0;
	  clib_bitmap_set_no_check (level->occupancy_bitmap, from_wheel_index,
				    0);

	  vec_foreach (e, es)
	  {
	    u64 e_time = elt_cpu_time (w, e);
	    u64 ti = e_time >> w->log2_clocks_per_bin;
	    if (ti <= advance_time_index)
	      {
		validate_expired_elt (w, e, advance_cpu_time);
		if (!elt_is_deleted (w, e->user_data))
		  vec_add1 (expired_user_data, e->user_data);
	      }
	    else
	      vec_add1 (to_insert, e[0]);
	  }
	  free_elt_vector (w, es);
	}

      if (from_wheel_index == to_wheel_index)
	break;

      from_wheel_index = wheel_add (w, from_wheel_index + 1);
    }

  timing_wheel_validate (w);
done:
  w->unexpired_elts_pending_insert = to_insert;
  return expired_user_data;
}

/* Advance wheel and return any expired user data in vector. */
u32 *
timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
		      u32 * expired_user_data,
		      u64 * next_expiring_element_cpu_time)
{
  timing_wheel_level_t *level;
  uword level_index, advance_rtime, advance_level_index, advance_wheel_index;
  uword n_expired_user_data_before;
  u64 current_time_index, advance_time_index;

  n_expired_user_data_before = vec_len (expired_user_data);

  /* Re-fill lower levels when time wraps. */
  current_time_index = w->current_time_index;
  advance_time_index = advance_cpu_time >> w->log2_clocks_per_bin;

  {
    u64 current_ti, advance_ti;

    current_ti = current_time_index >> w->log2_bins_per_wheel;
    advance_ti = advance_time_index >> w->log2_bins_per_wheel;

    if (PREDICT_FALSE (current_ti != advance_ti))
      {
	if (w->unexpired_elts_pending_insert)
	  _vec_len (w->unexpired_elts_pending_insert) = 0;

	level_index = 0;
	while (current_ti != advance_ti)
	  {
	    uword c, a;
	    c = current_ti & (w->bins_per_wheel - 1);
	    a = advance_ti & (w->bins_per_wheel - 1);
	    if (c != a)
	      expired_user_data = refill_level (w,
						level_index,
						advance_cpu_time,
						c, a, expired_user_data);
	    current_ti >>= w->log2_bins_per_wheel;
	    advance_ti >>= w->log2_bins_per_wheel;
	    level_index++;
	  }
      }
  }

  advance_level_index =
    get_level_and_relative_time (w, advance_cpu_time, &advance_rtime);
  advance_wheel_index =
    rtime_to_wheel_index (w, advance_level_index, advance_rtime);

  /* Empty all occupied bins for entire levels that we advance past. */
  for (level_index = 0; level_index < advance_level_index; level_index++)
    {
      uword wi;

      if (level_index >= vec_len (w->levels))
	break;

      level = vec_elt_at_index (w->levels, level_index);
      /* *INDENT-OFF* */
      clib_bitmap_foreach (wi, level->occupancy_bitmap, ({
        expired_user_data = expire_bin (w, level_index, wi, advance_cpu_time,
					expired_user_data);
      }));
      /* *INDENT-ON* */
    }

  if (PREDICT_TRUE (level_index < vec_len (w->levels)))
    {
      uword wi;
      level = vec_elt_at_index (w->levels, level_index);
      wi = current_time_wheel_index (w, level_index);
      if (level->occupancy_bitmap)
	while (1)
	  {
	    if (clib_bitmap_get_no_check (level->occupancy_bitmap, wi))
	      expired_user_data =
		expire_bin (w, advance_level_index, wi, advance_cpu_time,
			    expired_user_data);

	    /* When we jump out, we have already just expired the bin,
	       corresponding to advance_wheel_index */
	    if (wi == advance_wheel_index)
	      break;

	    wi = wheel_add (w, wi + 1);
	  }
    }

  /* Advance current time index. */
  w->current_time_index = advance_time_index;

  if (vec_len (w->unexpired_elts_pending_insert) > 0)
    {
      timing_wheel_elt_t *e;
      vec_foreach (e, w->unexpired_elts_pending_insert) insert_elt (w, e);
      _vec_len (w->unexpired_elts_pending_insert) = 0;
    }

  /* Don't advance until necessary. */
  /* However, if the timing_wheel_advance() hasn't been called for some time,
     the while() loop will ensure multiple calls to advance_cpu_time_base()
     in a row until the w->cpu_time_base is fresh enough. */
  while (PREDICT_FALSE
	 (advance_time_index >= w->time_index_next_cpu_time_base_update))
    expired_user_data = advance_cpu_time_base (w, expired_user_data);

  if (next_expiring_element_cpu_time)
    {
      u64 min_t;

      /* Anything expired?  If so we need to recompute next expiring elt time. */
      if (vec_len (expired_user_data) == n_expired_user_data_before
	  && w->cached_min_cpu_time_on_wheel != 0ULL)
	min_t = w->cached_min_cpu_time_on_wheel;
      else
	{
	  min_t = timing_wheel_next_expiring_elt_time (w);
	  w->cached_min_cpu_time_on_wheel = min_t;
	}

      *next_expiring_element_cpu_time = min_t;
    }

  return expired_user_data;
}

u8 *
format_timing_wheel (u8 * s, va_list * va)
{
  timing_wheel_t *w = va_arg (*va, timing_wheel_t *);
  int verbose = va_arg (*va, int);
  u32 indent = format_get_indent (s);

  s = format (s, "level 0: %.4e - %.4e secs, 2^%d - 2^%d clocks",
	      (f64) (1 << w->log2_clocks_per_bin) / w->cpu_clocks_per_second,
	      (f64) (1 << w->log2_clocks_per_wheel) /
	      w->cpu_clocks_per_second, w->log2_clocks_per_bin,
	      w->log2_clocks_per_wheel);

  if (verbose)
    {
      int l;

      s = format (s, "\n%Utime base advances %Ld, every %.4e secs",
		  format_white_space, indent + 2,
		  w->stats.cpu_time_base_advances,
		  (f64) ((u64) 1 << w->n_wheel_elt_time_bits) /
		  w->cpu_clocks_per_second);

      for (l = 0; l < vec_len (w->levels); l++)
	s = format (s, "\n%Ulevel %d: refills %Ld",
		    format_white_space, indent + 2,
		    l,
		    l <
		    vec_len (w->stats.refills) ? w->stats.
		    refills[l] : (u64) 0);
    }

  return s;
}

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */