aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/bier/bier_imp.h
blob: fa53989fc93fc98eea965d2d6a4a7fe6947bed40 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
/*
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * bier_imposition : The BIER imposition object
 *
 * A BIER imposition object is present in the IP mcast output list
 * and represents the imposition of a BIER bitmask. After BIER header
 * imposition the packet is forward within the appropriate/specifid
 * BIER table
 */

#ifndef __BIER_IMPOSITION_H__
#define __BIER_IMPOSITION_H__

#include <vnet/bier/bier_types.h>
#include <vnet/fib/fib_types.h>
#include <vnet/dpo/dpo.h>

/**
 * The BIER imposition object
 */
typedef struct bier_imp_t_ {
    /**
     * The BIER table into which to forward the post imposed packet
     */
    bier_table_id_t bi_tbl;

    /**
     * number of locks
     */
    u32 bi_locks;

    /**
     * The DPO contirubted from the resolving BIER table.
     * One per-IP protocol. This allows us to share a BIER imposition
     * object for a IPv4 and IPv6 mfib path.
     */
    dpo_id_t bi_dpo[FIB_PROTOCOL_IP_MAX];

    /**
     * The Header to impose.
     */
    bier_hdr_t bi_hdr;

    /**
     * The bit string.
     *  This is a memory v. speed tradeoff. We inline here the
     *  largest header type so as the bitstring is on the same
     *  cacheline as the header.
     */
    bier_bit_mask_4096_t bi_bits;
} bier_imp_t;

extern index_t bier_imp_add_or_lock(const bier_table_id_t *bt,
                                    bier_bp_t sender,
                                    const bier_bit_string_t *bs);

extern void bier_imp_unlock(index_t bii);
extern void bier_imp_lock(index_t bii);

extern u8* format_bier_imp(u8* s, va_list *ap);

extern void bier_imp_contribute_forwarding(index_t bii,
                                           dpo_proto_t proto,
                                           dpo_id_t *dpo);

extern bier_imp_t *bier_imp_pool;

always_inline bier_imp_t*
bier_imp_get (index_t bii)
{
    return (pool_elt_at_index(bier_imp_pool, bii));
}

#endif
ight .cs { color: #75715e } /* Comment.Special */ .highlight .gd { color: #f92672 } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .gi { color: #a6e22e } /* Generic.Inserted */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #75715e } /* Generic.Subheading */ .highlight .kc { color: #66d9ef } /* Keyword.Constant */ .highlight .kd { color: #66d9ef } /* Keyword.Declaration */ .highlight .kn { color: #f92672 } /* Keyword.Namespace */ .highlight .kp { color: #66d9ef } /* Keyword.Pseudo */ .highlight .kr { color: #66d9ef } /* Keyword.Reserved */ .highlight .kt { color: #66d9ef } /* Keyword.Type */ .highlight .ld { color: #e6db74 } /* Literal.Date */ .highlight .m { color: #ae81ff } /* Literal.Number */ .highlight .s { color: #e6db74 } /* Literal.String */ .highlight .na { color: #a6e22e } /* Name.Attribute */ .highlight .nb { color: #f8f8f2 } /* Name.Builtin */ .highlight .nc { color: #a6e22e } /* Name.Class */ .highlight .no { color: #66d9ef } /* Name.Constant */ .highlight .nd { color: #a6e22e } /* Name.Decorator */ .highlight .ni { color: #f8f8f2 } /* Name.Entity */ .highlight .ne { color: #a6e22e } /* Name.Exception */ .highlight .nf { color: #a6e22e } /* Name.Function */ .highlight .nl { color: #f8f8f2 } /* Name.Label */ .highlight .nn { color: #f8f8f2 } /* Name.Namespace */ .highlight .nx { color: #a6e22e } /* Name.Other */ .highlight .py { color: #f8f8f2 } /* Name.Property */ .highlight .nt { color: #f92672 } /* Name.Tag */ .highlight .nv { color: #f8f8f2 } /* Name.Variable */ .highlight .ow { color: #f92672 } /* Operator.Word */ .highlight .w { color: #f8f8f2 } /* Text.Whitespace */ .highlight .mb { color: #ae81ff } /* Literal.Number.Bin */ .highlight .mf { color: #ae81ff } /* Literal.Number.Float */ .highlight .mh { color: #ae81ff } /* Literal.Number.Hex */ .highlight .mi { color: #ae81ff } /* Literal.Number.Integer */ .highlight .mo { color: #ae81ff } /* Literal.Number.Oct */ .highlight .sa { color: #e6db74 } /* Literal.String.Affix */ .highlight .sb { color: #e6db74 } /* Literal.String.Backtick */ .highlight .sc { color: #e6db74 } /* Literal.String.Char */ .highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */ .highlight .sd { color: #e6db74 } /* Literal.String.Doc */ .highlight .s2 { color: #e6db74 } /* Literal.String.Double */ .highlight .se { color: #ae81ff } /* Literal.String.Escape */ .highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */ .highlight .si { color: #e6db74 } /* Literal.String.Interpol */ .highlight .sx { color: #e6db74 } /* Literal.String.Other */ .highlight .sr { color: #e6db74 } /* Literal.String.Regex */ .highlight .s1 { color: #e6db74 } /* Literal.String.Single */ .highlight .ss { color: #e6db74 } /* Literal.String.Symbol */ .highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #a6e22e } /* Name.Function.Magic */ .highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */ .highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */ .highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */ .highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */ .highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */ } @media (prefers-color-scheme: light) { .highlight .hll { background-color: #ffffcc } .highlight .c { color: #888888 } /* Comment */ .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ .highlight .k { color: #008800; font-weight: bold } /* Keyword */ .highlight .ch { color: #888888 } /* Comment.Hashbang */ .highlight .cm { color: #888888 } /* Comment.Multiline */ .highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */ .highlight .cpf { color: #888888 } /* Comment.PreprocFile */ .highlight .c1 { color: #888888 } /* Comment.Single */ .highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */ .highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .gr { color: #aa0000 } /* Generic.Error */ .highlight .gh { color: #333333 } /* Generic.Heading */ .highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ .highlight .go { color: #888888 } /* Generic.Output */ .highlight .gp { color: #555555 } /* Generic.Prompt */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #666666 } /* Generic.Subheading */ .highlight .gt { color: #aa0000 } /* Generic.Traceback */ .highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */ .highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */ .highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */ .highlight .kp { color: #008800 } /* Keyword.Pseudo */ .highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */ .highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */ .highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */ .highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */ .highlight .na { color: #336699 } /* Name.Attribute */ .highlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
/*
 * Copyright (c) 2018 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <svm/message_queue.h>
#include <vppinfra/mem.h>
#include <vppinfra/format.h>
#include <vppinfra/time.h>
#include <sys/eventfd.h>
#include <sys/socket.h>

static inline svm_msg_q_ring_t *
svm_msg_q_ring_inline (svm_msg_q_t * mq, u32 ring_index)
{
  return vec_elt_at_index (mq->rings, ring_index);
}

svm_msg_q_ring_t *
svm_msg_q_ring (svm_msg_q_t * mq, u32 ring_index)
{
  return svm_msg_q_ring_inline (mq, ring_index);
}

static inline void *
svm_msg_q_ring_data (svm_msg_q_ring_t * ring, u32 elt_index)
{
  ASSERT (elt_index < ring->nitems);
  return (ring->shr->data + elt_index * ring->elsize);
}

static void
svm_msg_q_init_mutex (svm_msg_q_shared_queue_t *sq)
{
  pthread_mutexattr_t attr;
  pthread_condattr_t cattr;

  clib_memset (&attr, 0, sizeof (attr));
  clib_memset (&cattr, 0, sizeof (cattr));

  if (pthread_mutexattr_init (&attr))
    clib_unix_warning ("mutexattr_init");
  if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
    clib_unix_warning ("pthread_mutexattr_setpshared");
  if (pthread_mutexattr_setrobust (&attr, PTHREAD_MUTEX_ROBUST))
    clib_unix_warning ("setrobust");
  if (pthread_mutex_init (&sq->mutex, &attr))
    clib_unix_warning ("mutex_init");
  if (pthread_mutexattr_destroy (&attr))
    clib_unix_warning ("mutexattr_destroy");
  if (pthread_condattr_init (&cattr))
    clib_unix_warning ("condattr_init");
  if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
    clib_unix_warning ("condattr_setpshared");
  if (pthread_cond_init (&sq->condvar, &cattr))
    clib_unix_warning ("cond_init1");
  if (pthread_condattr_destroy (&cattr))
    clib_unix_warning ("cond_init2");
}

svm_msg_q_shared_t *
svm_msg_q_init (void *base, svm_msg_q_cfg_t *cfg)
{
  svm_msg_q_ring_shared_t *ring;
  svm_msg_q_shared_queue_t *sq;
  svm_msg_q_shared_t *smq;
  u32 q_sz, offset;
  int i;

  q_sz = sizeof (*sq) + cfg->q_nitems * sizeof (svm_msg_q_msg_t);

  smq = (svm_msg_q_shared_t *) base;
  sq = smq->q;
  clib_memset (sq, 0, sizeof (*sq));
  sq->elsize = sizeof (svm_msg_q_msg_t);
  sq->maxsize = cfg->q_nitems;
  smq->n_rings = cfg->n_rings;
  ring = (void *) ((u8 *) smq->q + q_sz);
  for (i = 0; i < cfg->n_rings; i++)
    {
      ring->elsize = cfg->ring_cfgs[i].elsize;
      ring->nitems = cfg->ring_cfgs[i].nitems;
      ring->cursize = ring->head = ring->tail = 0;
      offset = sizeof (*ring) + ring->nitems * ring->elsize;
      ring = (void *) ((u8 *) ring + offset);
    }

  svm_msg_q_init_mutex (sq);

  return smq;
}

uword
svm_msg_q_size_to_alloc (svm_msg_q_cfg_t *cfg)
{
  svm_msg_q_ring_cfg_t *ring_cfg;
  uword rings_sz = 0, mq_sz;
  u32 q_sz;
  int i;

  ASSERT (cfg);

  rings_sz = sizeof (svm_msg_q_ring_shared_t) * cfg->n_rings;
  for (i = 0; i < cfg->n_rings; i++)
    {
      if (cfg->ring_cfgs[i].data)
	continue;
      ring_cfg = &cfg->ring_cfgs[i];
      rings_sz += (uword) ring_cfg->nitems * ring_cfg->elsize;
    }

  q_sz = sizeof (svm_msg_q_shared_queue_t) +
	 cfg->q_nitems * sizeof (svm_msg_q_msg_t);
  mq_sz = sizeof (svm_msg_q_shared_t) + q_sz + rings_sz;

  return mq_sz;
}

svm_msg_q_shared_t *
svm_msg_q_alloc (svm_msg_q_cfg_t *cfg)
{
  uword mq_sz;
  u8 *base;

  mq_sz = svm_msg_q_size_to_alloc (cfg);
  base = clib_mem_alloc_aligned (mq_sz, CLIB_CACHE_LINE_BYTES);
  if (!base)
    return 0;

  return svm_msg_q_init (base, cfg);
}

void
svm_msg_q_attach (svm_msg_q_t *mq, void *smq_base)
{
  svm_msg_q_ring_shared_t *ring;
  svm_msg_q_shared_t *smq;
  u32 i, n_rings, q_sz, offset;

  smq = (svm_msg_q_shared_t *) smq_base;
  mq->q.shr = smq->q;
  mq->q.evtfd = -1;
  n_rings = smq->n_rings;
  vec_validate (mq->rings, n_rings - 1);
  q_sz = sizeof (svm_msg_q_shared_queue_t) +
	 mq->q.shr->maxsize * sizeof (svm_msg_q_msg_t);
  ring = (void *) ((u8 *) smq->q + q_sz);
  for (i = 0; i < n_rings; i++)
    {
      mq->rings[i].nitems = ring->nitems;
      mq->rings[i].elsize = ring->elsize;
      mq->rings[i].shr = ring;
      offset = sizeof (*ring) + ring->nitems * ring->elsize;
      ring = (void *) ((u8 *) ring + offset);
    }
}

void
svm_msg_q_free (svm_msg_q_t * mq)
{
  clib_mem_free (mq->q.shr);
  clib_mem_free (mq);
}

static void
svm_msg_q_send_signal (svm_msg_q_t *mq)
{
  if (mq->q.evtfd == -1)
    {
      (void) pthread_cond_broadcast (&mq->q.shr->condvar);
    }
  else
    {
      int __clib_unused rv;
      u64 data = 1;

      if (mq->q.evtfd < 0)
	return;

      rv = write (mq->q.evtfd, &data, sizeof (data));
      if (PREDICT_FALSE (rv < 0))
	clib_unix_warning ("signal write on %d returned %d", mq->q.evtfd, rv);
    }
}

svm_msg_q_msg_t
svm_msg_q_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index)
{
  svm_msg_q_ring_shared_t *sr;
  svm_msg_q_ring_t *ring;
  svm_msg_q_msg_t msg;

  ring = svm_msg_q_ring_inline (mq, ring_index);
  sr = ring->shr;

  ASSERT (sr->cursize < ring->nitems);
  msg.ring_index = ring - mq->rings;
  msg.elt_index = sr->tail;
  sr->tail = (sr->tail + 1) % ring->nitems;
  clib_atomic_fetch_add_rel (&sr->cursize, 1);
  return msg;
}

int
svm_msg_q_lock_and_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index,
				     u8 noblock, svm_msg_q_msg_t * msg)
{
  if (noblock)
    {
      if (svm_msg_q_try_lock (mq))
	return -1;
      if (PREDICT_FALSE (svm_msg_q_is_full (mq)
			 || svm_msg_q_ring_is_full (mq, ring_index)))
	{
	  svm_msg_q_unlock (mq);
	  return -2;
	}
      *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
    }
  else
    {
      svm_msg_q_lock (mq);
      while (svm_msg_q_is_full (mq)
	     || svm_msg_q_ring_is_full (mq, ring_index))
	svm_msg_q_wait (mq);
      *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
    }
  return 0;
}

svm_msg_q_msg_t
svm_msg_q_alloc_msg (svm_msg_q_t * mq, u32 nbytes)
{
  svm_msg_q_msg_t msg = {.as_u64 = ~0 };
  svm_msg_q_ring_shared_t *sr;
  svm_msg_q_ring_t *ring;

  vec_foreach (ring, mq->rings)
  {
    sr = ring->shr;
    if (ring->elsize < nbytes || sr->cursize == ring->nitems)
      continue;
    msg.ring_index = ring - mq->rings;
    msg.elt_index = sr->tail;
    sr->tail = (sr->tail + 1) % ring->nitems;
    clib_atomic_fetch_add_rel (&sr->cursize, 1);
    break;
  }
  return msg;
}

void *
svm_msg_q_msg_data (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
{
  svm_msg_q_ring_t *ring = svm_msg_q_ring_inline (mq, msg->ring_index);
  return svm_msg_q_ring_data (ring, msg->elt_index);
}

void
svm_msg_q_free_msg (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
{
  svm_msg_q_ring_shared_t *sr;
  svm_msg_q_ring_t *ring;
  int need_signal;

  ASSERT (vec_len (mq->rings) > msg->ring_index);
  ring = svm_msg_q_ring_inline (mq, msg->ring_index);
  sr = ring->shr;
  if (msg->elt_index == sr->head)
    {
      sr->head = (sr->head + 1) % ring->nitems;
    }
  else
    {
      clib_warning ("message out of order");
      /* for now, expect messages to be processed in order */
      ASSERT (0);
    }

  need_signal = sr->cursize == ring->nitems;
  clib_atomic_fetch_sub_rel (&sr->cursize, 1);

  if (PREDICT_FALSE (need_signal))
    svm_msg_q_send_signal (mq);
}

static int
svm_msq_q_msg_is_valid (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
{
  u32 dist1, dist2, tail, head;
  svm_msg_q_ring_shared_t *sr;
  svm_msg_q_ring_t *ring;

  if (vec_len (mq->rings) <= msg->ring_index)
    return 0;

  ring = svm_msg_q_ring_inline (mq, msg->ring_index);
  sr = ring->shr;
  tail = sr->tail;
  head = sr->head;

  dist1 = ((ring->nitems + msg->elt_index) - head) % ring->nitems;
  if (tail == head)
    dist2 = (sr->cursize == 0) ? 0 : ring->nitems;
  else
    dist2 = ((ring->nitems + tail) - head) % ring->nitems;
  return (dist1 < dist2);
}

static void
svm_msg_q_add_raw (svm_msg_q_t *mq, u8 *elem)
{
  svm_msg_q_shared_queue_t *sq = mq->q.shr;
  i8 *tailp;
  u32 sz;

  tailp = (i8 *) (&sq->data[0] + sq->elsize * sq->tail);
  clib_memcpy_fast (tailp, elem, sq->elsize);

  sq->tail = (sq->tail + 1) % sq->maxsize;

  sz = clib_atomic_fetch_add_rel (&sq->cursize, 1);
  if (!sz)
    svm_msg_q_send_signal (mq);
}

int
svm_msg_q_add (svm_msg_q_t * mq, svm_msg_q_msg_t * msg, int nowait)
{
  ASSERT (svm_msq_q_msg_is_valid (mq, msg));

  if (nowait)
    {
      /* zero on success */
      if (svm_msg_q_try_lock (mq))
	{
	  return (-1);
	}
    }
  else
    svm_msg_q_lock (mq);

  if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
    {
      if (nowait)
	return (-2);
      while (svm_msg_q_is_full (mq))
	svm_msg_q_wait (mq);
    }

  svm_msg_q_add_raw (mq, (u8 *) msg);

  svm_msg_q_unlock (mq);

  return 0;
}

void
svm_msg_q_add_and_unlock (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
{
  ASSERT (svm_msq_q_msg_is_valid (mq, msg));
  svm_msg_q_add_raw (mq, (u8 *) msg);
  svm_msg_q_unlock (mq);
}

static int
svm_msg_q_sub_raw (svm_msg_q_t *mq, u8 *elem)
{
  svm_msg_q_shared_queue_t *sq = mq->q.shr;
  i8 *headp;
  u32 sz;

  ASSERT (!svm_msg_q_is_empty (mq));

  headp = (i8 *) (&sq->data[0] + sq->elsize * sq->head);
  clib_memcpy_fast (elem, headp, sq->elsize);

  sq->head = (sq->head + 1) % sq->maxsize;

  sz = clib_atomic_fetch_sub_rel (&sq->cursize, 1);
  if (PREDICT_FALSE (sz == sq->maxsize))
    svm_msg_q_send_signal (mq);

  return 0;
}

int
svm_msg_q_sub (svm_msg_q_t * mq, svm_msg_q_msg_t * msg,
	       svm_q_conditional_wait_t cond, u32 time)
{
  int rc = 0;

  if (cond == SVM_Q_NOWAIT)
    {
      /* zero on success */
      if (svm_msg_q_try_lock (mq))
	{
	  return (-1);
	}
    }
  else
    svm_msg_q_lock (mq);

  if (PREDICT_FALSE (svm_msg_q_is_empty (mq)))
    {
      if (cond == SVM_Q_NOWAIT)
	{
	  svm_msg_q_unlock (mq);
	  return (-2);
	}
      else if (cond == SVM_Q_TIMEDWAIT)
	{
	  while (svm_msg_q_is_empty (mq) && rc == 0)
	    rc = svm_msg_q_timedwait (mq, time);

	  if (rc == ETIMEDOUT)
	    {
	      svm_msg_q_unlock (mq);
	      return ETIMEDOUT;
	    }
	}
      else
	{
	  while (svm_msg_q_is_empty (mq))
	    svm_msg_q_wait (mq);
	}
    }

  svm_msg_q_sub_raw (mq, (u8 *) msg);

  svm_msg_q_unlock (mq);

  return 0;
}

void
svm_msg_q_sub_w_lock (svm_msg_q_t *mq, svm_msg_q_msg_t *msg)
{
  svm_msg_q_sub_raw (mq, (u8 *) msg);
}

void
svm_msg_q_set_eventfd (svm_msg_q_t *mq, int fd)
{
  mq->q.evtfd = fd;
}

int
svm_msg_q_alloc_eventfd (svm_msg_q_t *mq)
{
  int fd;
  if ((fd = eventfd (0, EFD_NONBLOCK)) < 0)
    return -1;
  svm_msg_q_set_eventfd (mq, fd);
  return 0;
}

void
svm_msg_q_wait (svm_msg_q_t *mq)
{
  if (mq->q.evtfd == -1)
    {
      pthread_cond_wait (&mq->q.shr->condvar, &mq->q.shr->mutex);
    }
  else
    {
      u64 buf;
      int rv;

      svm_msg_q_unlock (mq);
      while ((rv = read (mq->q.evtfd, &buf, sizeof (buf))) < 0)
	{
	  if (errno != EAGAIN)
	    {
	      clib_unix_warning ("read error");
	      return;
	    }
	}
      svm_msg_q_lock (mq);
    }
}

int
svm_msg_q_timedwait (svm_msg_q_t *mq, double timeout)
{
  if (mq->q.evtfd == -1)
    {
      struct timespec ts;
      ts.tv_sec = unix_time_now () + (u32) timeout;
      ts.tv_nsec = (timeout - (u32) timeout) * 1e9;
      return pthread_cond_timedwait (&mq->q.shr->condvar, &mq->q.shr->mutex,
				     &ts);
    }
  else
    {
      struct timeval tv;
      u64 buf;
      int rv;

      tv.tv_sec = (u64) timeout;
      tv.tv_usec = ((u64) timeout - (u64) timeout) * 1e9;
      setsockopt (mq->q.evtfd, SOL_SOCKET, SO_RCVTIMEO, (const char *) &tv,
		  sizeof tv);

      svm_msg_q_unlock (mq);
      rv = read (mq->q.evtfd, &buf, sizeof (buf));
      if (rv < 0)
	clib_warning ("read %u", errno);
      svm_msg_q_lock (mq);

      return rv < 0 ? errno : 0;
    }
}

u8 *
format_svm_msg_q (u8 * s, va_list * args)
{
  svm_msg_q_t *mq = va_arg (*args, svm_msg_q_t *);
  s = format (s, " [Q:%d/%d]", mq->q.shr->cursize, mq->q.shr->maxsize);
  for (u32 i = 0; i < vec_len (mq->rings); i++)
    {
      s = format (s, " [R%d:%d/%d]", i, mq->rings[i].shr->cursize,
		  mq->rings[i].nitems);
    }
  return s;
}

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */