summaryrefslogtreecommitdiffstats
path: root/src/vnet/adj/adj_midchain_delegate.c
blob: 16129ff86ac8fa35929caffb6073d97aa43c9991 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/*
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <vnet/adj/adj_delegate.h>
#include <vnet/adj/adj_midchain.h>
#include <vnet/fib/fib_table.h>
#include <vnet/fib/fib_entry_track.h>

/**
 * Midchain stacker delegate
 */
typedef struct adj_midchain_delegate_t_
{
    /**
     * the Fib Entry we are stacked on
     */
    fib_node_index_t amd_fei;

    /**
     * The sibling entry on the FIB entry
     */
    u32 amd_sibling;
} adj_midchain_delegate_t;

/**
 * Pool of delegates
 */
static adj_midchain_delegate_t *amd_pool;

static inline const adj_midchain_delegate_t*
adj_midchain_from_const_base (const adj_delegate_t *ad)
{
    if (NULL != ad)
    {
        return (pool_elt_at_index(amd_pool, ad->ad_index));
    }
    return (NULL);
}

static void
adj_midchain_delegate_restack_i (adj_index_t ai,
                                 adj_midchain_delegate_t *amd)
{
    if (vnet_sw_interface_is_admin_up (vnet_get_main (),
                                       adj_get_sw_if_index(ai)) &&
        (FIB_NODE_INDEX_INVALID != amd->amd_fei))
    {
        const fib_prefix_t *pfx;

        pfx = fib_entry_get_prefix(amd->amd_fei);

        adj_nbr_midchain_stack_on_fib_entry (
            ai,
            amd->amd_fei,
            fib_forw_chain_type_from_fib_proto(pfx->fp_proto));
    }
    else
    {
        adj_nbr_midchain_unstack (ai);
    }
}

void
adj_midchain_delegate_restack (adj_index_t ai)
{
    adj_midchain_delegate_t *amd;
    ip_adjacency_t *adj;
    adj_delegate_t *ad;

    /*
     * if there's a delegate already use that
     */
    adj = adj_get(ai);
    ad = adj_delegate_get(adj, ADJ_DELEGATE_MIDCHAIN);

    if (NULL != ad)
    {
        amd = pool_elt_at_index(amd_pool, ad->ad_index);

        adj_midchain_delegate_restack_i(ai, amd);
    }
    /*
     * else
     *  nothing to stack
     */
}

void
adj_midchain_delegate_stack (adj_index_t ai,
                             u32 fib_index,
                             const fib_prefix_t *pfx)
{
    adj_midchain_delegate_t *amd;
    ip_adjacency_t *adj;
    adj_delegate_t *ad;

    /*
     * if there's a delegate already use that
     */
    adj = adj_get(ai);
    ad = adj_delegate_get(adj, ADJ_DELEGATE_MIDCHAIN);

    if (NULL != ad)
    {
        amd = pool_elt_at_index(amd_pool, ad->ad_index);
    }
    else
    {
        pool_get(amd_pool, amd);
        amd->amd_fei = FIB_NODE_INDEX_INVALID;
        adj_delegate_add(adj, ADJ_DELEGATE_MIDCHAIN, amd - amd_pool);

        amd->amd_fei = fib_entry_track(fib_index,
                                       pfx,
                                       FIB_NODE_TYPE_ADJ,
                                       ai,
                                       &amd->amd_sibling);
    }
    adj_midchain_delegate_restack_i(ai, amd);
}

void
adj_midchain_delegate_remove (adj_index_t ai)
{
    adj_midchain_delegate_t *amd;
    ip_adjacency_t *adj;
    adj_delegate_t *ad;

    /*
     * if there's a delegate, it can be removed
     */
    adj = adj_get(ai);
    ad = adj_delegate_get(adj, ADJ_DELEGATE_MIDCHAIN);

    if (NULL != ad)
    {
        adj_nbr_midchain_unstack(ai);

        amd = pool_elt_at_index(amd_pool, ad->ad_index);
        fib_entry_untrack(amd->amd_fei, amd->amd_sibling);
        pool_put(amd_pool, amd);

        adj_delegate_remove (ai, ADJ_DELEGATE_MIDCHAIN);
    }
}

void
adj_midchain_delegate_unstack (adj_index_t ai)
{
    adj_nbr_midchain_unstack(ai);
}

static void
adj_midchain_delegate_adj_deleted (adj_delegate_t *ad)
{
    adj_midchain_delegate_t *amd;

    amd = pool_elt_at_index(amd_pool, ad->ad_index);

    fib_entry_untrack(amd->amd_fei, amd->amd_sibling);

    pool_put(amd_pool, amd);
}

/**
 * Print a delegate that represents MIDCHAIN tracking
 */
static u8 *
adj_midchain_delegate_fmt (const adj_delegate_t *aed, u8 *s)
{
    const adj_midchain_delegate_t *amd = adj_midchain_from_const_base(aed);

    s = format(s, "MIDCHAIN:[fib-entry:%d]", amd->amd_fei);

    return (s);
}

const static adj_delegate_vft_t adj_delegate_vft = {
  .adv_format = adj_midchain_delegate_fmt,
  .adv_adj_deleted = adj_midchain_delegate_adj_deleted,
};

static clib_error_t *
adj_midchain_delegate_module_init (vlib_main_t * vm)
{
    clib_error_t * error = NULL;

    adj_delegate_register_type (ADJ_DELEGATE_MIDCHAIN, &adj_delegate_vft);

    return (error);
}

VLIB_INIT_FUNCTION (adj_midchain_delegate_module_init);
.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
/*
 *------------------------------------------------------------------
 * Copyright (c) 2017 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */


#define _GNU_SOURCE
#include <stdint.h>
#include <net/if.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/eventfd.h>
#include <inttypes.h>
#include <limits.h>

#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vnet/plugin/plugin.h>
#include <vnet/ethernet/ethernet.h>
#include <vpp/app/version.h>
#include <memif/memif.h>
#include <memif/private.h>

memif_main_t memif_main;

static u32
memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
{
  /* nothing for now */
  return 0;
}

static void
memif_queue_intfd_close (memif_queue_t * mq)
{
  if (mq->int_unix_file_index != ~0)
    {
      memif_file_del_by_index (mq->int_unix_file_index);
      mq->int_unix_file_index = ~0;
      mq->int_fd = -1;
    }
  else if (mq->int_fd > -1)
    {
      close (mq->int_fd);
      mq->int_fd = -1;
    }
}

void
memif_disconnect (memif_if_t * mif, clib_error_t * err)
{
  memif_main_t *mm = &memif_main;
  vnet_main_t *vnm = vnet_get_main ();
  memif_region_t *mr;
  memif_queue_t *mq;
  int i;

  if (mif == 0)
    return;

  DBG ("disconnect %u (%v)", mif->dev_instance, err ? err->what : 0);

  if (err)
    {
      clib_error_t *e = 0;
      mif->local_disc_string = vec_dup (err->what);
      if (mif->conn_fd > -1)
	e = memif_msg_send_disconnect (mif, err);
      clib_error_free (e);
    }

  /* set interface down */
  mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING);
  if (mif->hw_if_index != ~0)
    vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0);

  /* close connection socket */
  if (mif->conn_unix_file_index != ~0)
    {
      memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files,
						   mif->socket_file_index);
      hash_unset (msf->dev_instance_by_fd, mif->conn_fd);
      memif_file_del_by_index (mif->conn_unix_file_index);
      mif->conn_unix_file_index = ~0;
    }
  else if (mif->conn_fd > -1)
    close (mif->conn_fd);
  mif->conn_fd = -1;

  vec_foreach_index (i, mif->rx_queues)
  {
    mq = vec_elt_at_index (mif->rx_queues, i);
    if (mq->ring)
      {
	int rv;
	rv = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, i);
	if (rv)
	  DBG ("Warning: unable to unassign interface %d, "
	       "queue %d: rc=%d", mif->hw_if_index, i, rv);
	mq->ring = 0;
      }
  }

  /* free tx and rx queues */
  vec_foreach (mq, mif->rx_queues) memif_queue_intfd_close (mq);
  vec_free (mif->rx_queues);

  vec_foreach (mq, mif->tx_queues) memif_queue_intfd_close (mq);
  vec_free (mif->tx_queues);

  /* free memory regions */
  vec_foreach (mr, mif->regions)
  {
    int rv;
    if ((rv = munmap (mr->shm, mr->region_size)))
      clib_warning ("munmap failed, rv = %d", rv);
    if (mr->fd > -1)
      close (mr->fd);
  }
  vec_free (mif->regions);

  mif->remote_pid = 0;
  vec_free (mif->remote_name);
  vec_free (mif->remote_if_name);
  clib_fifo_free (mif->msg_queue);
}

static clib_error_t *
memif_int_fd_read_ready (unix_file_t * uf)
{
  memif_main_t *mm = &memif_main;
  vnet_main_t *vnm = vnet_get_main ();
  u16 qid = uf->private_data & 0xFFFF;
  memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 16);
  memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid);
  u64 b;
  ssize_t size;

  size = read (uf->file_descriptor, &b, sizeof (b));
  if (size < 0)
    {
      DBG_UNIX_LOG ("Failed to read from socket");
      return 0;
    }

  vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, qid);
  mq->int_count++;

  return 0;
}


clib_error_t *
memif_connect (memif_if_t * mif)
{
  vnet_main_t *vnm = vnet_get_main ();
  unix_file_t template = { 0 };
  memif_region_t *mr;
  int i;

  DBG ("connect %u", mif->dev_instance);

  vec_free (mif->local_disc_string);
  vec_free (mif->remote_disc_string);

  vec_foreach (mr, mif->regions)
  {
    if (mr->shm)
      continue;

    if (mr->fd < 0)
      clib_error_return (0, "no memory region fd");

    if ((mr->shm = mmap (NULL, mr->region_size, PROT_READ | PROT_WRITE,
			 MAP_SHARED, mr->fd, 0)) == MAP_FAILED)
      return clib_error_return_unix (0, "mmap");
  }

  template.read_function = memif_int_fd_read_ready;

  vec_foreach_index (i, mif->tx_queues)
  {
    memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i);

    mq->ring = mif->regions[mq->region].shm + mq->offset;
    if (mq->ring->cookie != MEMIF_COOKIE)
      return clib_error_return (0, "wrong cookie on tx ring %u", i);
  }

  vec_foreach_index (i, mif->rx_queues)
  {
    memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i);
    int rv;

    mq->ring = mif->regions[mq->region].shm + mq->offset;
    if (mq->ring->cookie != MEMIF_COOKIE)
      return clib_error_return (0, "wrong cookie on tx ring %u", i);

    if (mq->int_fd > -1)
      {
	template.file_descriptor = mq->int_fd;
	template.private_data = (mif->dev_instance << 16) | (i & 0xFFFF);
	memif_file_add (&mq->int_unix_file_index, &template);
      }
    vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0);
    rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i,
					VNET_HW_INTERFACE_RX_MODE_DEFAULT);
    if (rv)
      clib_warning
	("Warning: unable to set rx mode for interface %d queue %d: "
	 "rc=%d", mif->hw_if_index, i, rv);
  }

  mif->flags &= ~MEMIF_IF_FLAG_CONNECTING;
  mif->flags |= MEMIF_IF_FLAG_CONNECTED;

  vnet_hw_interface_set_flags (vnm, mif->hw_if_index,
			       VNET_HW_INTERFACE_FLAG_LINK_UP);
  return 0;
}

static_always_inline memif_ring_t *
memif_get_ring (memif_if_t * mif, memif_ring_type_t type, u16 ring_num)
{
  if (vec_len (mif->regions) == 0)
    return NULL;
  void *p = mif->regions[0].shm;
  int ring_size =
    sizeof (memif_ring_t) +
    sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size);
  p += (ring_num + type * mif->run.num_s2m_rings) * ring_size;

  return (memif_ring_t *) p;
}

clib_error_t *
memif_init_regions_and_queues (memif_if_t * mif)
{
  memif_ring_t *ring = NULL;
  int i, j;
  u64 buffer_offset;
  memif_region_t *r;

  vec_validate_aligned (mif->regions, 0, CLIB_CACHE_LINE_BYTES);
  r = vec_elt_at_index (mif->regions, 0);

  buffer_offset = (mif->run.num_s2m_rings + mif->run.num_m2s_rings) *
    (sizeof (memif_ring_t) +
     sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size));

  r->region_size = buffer_offset +
    mif->run.buffer_size * (1 << mif->run.log2_ring_size) *
    (mif->run.num_s2m_rings + mif->run.num_m2s_rings);

  if ((r->fd = memfd_create ("memif region 0", MFD_ALLOW_SEALING)) == -1)
    return clib_error_return_unix (0, "memfd_create");

  if ((fcntl (r->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
    return clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)");

  if ((ftruncate (r->fd, r->region_size)) == -1)
    return clib_error_return_unix (0, "ftruncate");

  if ((r->shm = mmap (NULL, r->region_size, PROT_READ | PROT_WRITE,
		      MAP_SHARED, r->fd, 0)) == MAP_FAILED)
    return clib_error_return_unix (0, "mmap");

  for (i = 0; i < mif->run.num_s2m_rings; i++)
    {
      ring = memif_get_ring (mif, MEMIF_RING_S2M, i);
      ring->head = ring->tail = 0;
      ring->cookie = MEMIF_COOKIE;
      for (j = 0; j < (1 << mif->run.log2_ring_size); j++)
	{
	  u16 slot = i * (1 << mif->run.log2_ring_size) + j;
	  ring->desc[j].region = 0;
	  ring->desc[j].offset =
	    buffer_offset + (u32) (slot * mif->run.buffer_size);
	  ring->desc[j].buffer_length = mif->run.buffer_size;
	}
    }
  for (i = 0; i < mif->run.num_m2s_rings; i++)
    {
      ring = memif_get_ring (mif, MEMIF_RING_M2S, i);
      ring->head = ring->tail = 0;
      ring->cookie = MEMIF_COOKIE;
      for (j = 0; j < (1 << mif->run.log2_ring_size); j++)
	{
	  u16 slot =
	    (i + mif->run.num_s2m_rings) * (1 << mif->run.log2_ring_size) + j;
	  ring->desc[j].region = 0;
	  ring->desc[j].offset =
	    buffer_offset + (u32) (slot * mif->run.buffer_size);
	  ring->desc[j].buffer_length = mif->run.buffer_size;
	}
    }

  ASSERT (mif->tx_queues == 0);
  vec_validate_aligned (mif->tx_queues, mif->run.num_s2m_rings - 1,
			CLIB_CACHE_LINE_BYTES);
  vec_foreach_index (i, mif->tx_queues)
  {
    memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i);
    if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0)
      return clib_error_return_unix (0, "eventfd[tx queue %u]", i);
    mq->int_unix_file_index = ~0;
    mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i);
    mq->log2_ring_size = mif->cfg.log2_ring_size;
    mq->region = 0;
    mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm;
    mq->last_head = 0;
  }

  ASSERT (mif->rx_queues == 0);
  vec_validate_aligned (mif->rx_queues, mif->run.num_m2s_rings - 1,
			CLIB_CACHE_LINE_BYTES);
  vec_foreach_index (i, mif->rx_queues)
  {
    memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i);
    if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0)
      return clib_error_return_unix (0, "eventfd[rx queue %u]", i);
    mq->int_unix_file_index = ~0;
    mq->ring = memif_get_ring (mif, MEMIF_RING_M2S, i);
    mq->log2_ring_size = mif->cfg.log2_ring_size;
    mq->region = 0;
    mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm;
    mq->last_head = 0;
  }

  return 0;
}

static uword
memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
{
  memif_main_t *mm = &memif_main;
  memif_if_t *mif;
  struct sockaddr_un sun;
  int sockfd;
  uword *event_data = 0, event_type;
  u8 enabled = 0;
  f64 start_time, last_run_duration = 0, now;

  sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0);
  if (sockfd < 0)
    {
      DBG_UNIX_LOG ("socket AF_UNIX");
      return 0;
    }
  sun.sun_family = AF_UNIX;

  while (1)
    {
      if (enabled)
	vlib_process_wait_for_event_or_clock (vm, (f64) 3 -
					      last_run_duration);
      else
	vlib_process_wait_for_event (vm);

      event_type = vlib_process_get_events (vm, &event_data);
      vec_reset_length (event_data);

      switch (event_type)
	{
	case ~0:
	  break;
	case MEMIF_PROCESS_EVENT_START:
	  enabled = 1;
	  break;
	case MEMIF_PROCESS_EVENT_STOP:
	  enabled = 0;
	  continue;
	default:
	  ASSERT (0);
	}

      last_run_duration = start_time = vlib_time_now (vm);
      /* *INDENT-OFF* */
      pool_foreach (mif, mm->interfaces,
        ({
	  memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index);
	  /* Allow no more than 10us without a pause */
	  now = vlib_time_now (vm);
	  if (now > start_time + 10e-6)
	    {
	      vlib_process_suspend (vm, 100e-6);	/* suspend for 100 us */
	      start_time = vlib_time_now (vm);
	    }

	  if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) == 0)
	    continue;

	  if (mif->flags & MEMIF_IF_FLAG_CONNECTING)
	    continue;

	  if (mif->flags & MEMIF_IF_FLAG_CONNECTED)
	    continue;

	  if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
	    {
	      strncpy (sun.sun_path, (char *) msf->filename,
		       sizeof (sun.sun_path) - 1);

	      if (connect
		  (sockfd, (struct sockaddr *) &sun,
		   sizeof (struct sockaddr_un)) == 0)
	        {
		  unix_file_t t = { 0 };

		  mif->conn_fd = sockfd;
		  t.read_function = memif_slave_conn_fd_read_ready;
		  t.write_function = memif_slave_conn_fd_write_ready;
		  t.error_function = memif_slave_conn_fd_error;
		  t.file_descriptor = mif->conn_fd;
		  t.private_data = mif->dev_instance;
		  memif_file_add (&mif->conn_unix_file_index, &t);
		  hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance);

		  mif->flags |= MEMIF_IF_FLAG_CONNECTING;

		  /* grab another fd */
		  sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0);
		  if (sockfd < 0)
		    {
		      DBG_UNIX_LOG ("socket AF_UNIX");
		      return 0;
		    }
	        }
	    }
        }));
      /* *INDENT-ON* */
      last_run_duration = vlib_time_now (vm) - last_run_duration;
    }
  return 0;
}

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (memif_process_node,static) = {
  .function = memif_process,
  .type = VLIB_NODE_TYPE_PROCESS,
  .name = "memif-process",
};
/* *INDENT-ON* */

int
memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
{
  vnet_main_t *vnm = vnet_get_main ();
  memif_main_t *mm = &memif_main;
  memif_socket_file_t *msf =
    vec_elt_at_index (mm->socket_files, mif->socket_file_index);
  clib_error_t *err;

  mif->flags |= MEMIF_IF_FLAG_DELETING;
  vec_free (mif->local_disc_string);
  vec_free (mif->remote_disc_string);

  /* bring down the interface */
  vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0);
  vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0);

  err = clib_error_return (0, "interface deleted");
  memif_disconnect (mif, err);
  clib_error_free (err);

  /* remove the interface */
  if (mif->mode == MEMIF_INTERFACE_MODE_IP)
    vnet_delete_hw_interface (vnm, mif->hw_if_index);
  else
    ethernet_delete_interface (vnm, mif->hw_if_index);
  mif->hw_if_index = ~0;

  /* free interface data structures */
  clib_spinlock_free (&mif->lockp);
  mhash_unset (&msf->dev_instance_by_id, &mif->id, 0);

  /* remove socket file */
  if (--(msf->ref_cnt) == 0)
    {
      if (msf->is_listener)
	{
	  uword *x;
	  memif_file_del_by_index (msf->unix_file_index);
	  vec_foreach (x, msf->pending_file_indices)
	  {
	    memif_file_del_by_index (*x);
	  }
	  vec_free (msf->pending_file_indices);
	}
      mhash_free (&msf->dev_instance_by_id);
      hash_free (msf->dev_instance_by_fd);
      mhash_unset (&mm->socket_file_index_by_filename, msf->filename, 0);
      vec_free (msf->filename);
      pool_put (mm->socket_files, msf);
    }

  memset (mif, 0, sizeof (*mif));
  pool_put (mm->interfaces, mif);

  if (pool_elts (mm->interfaces) == 0)
    vlib_process_signal_event (vm, memif_process_node.index,
			       MEMIF_PROCESS_EVENT_STOP, 0);

  return 0;
}

/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) =
{
  .name = "memif-ip",
  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
/* *INDENT-ON* */

int
memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
{
  memif_main_t *mm = &memif_main;
  vlib_thread_main_t *tm = vlib_get_thread_main ();
  vnet_main_t *vnm = vnet_get_main ();
  memif_if_t *mif = 0;
  vnet_sw_interface_t *sw;
  clib_error_t *error = 0;
  int ret = 0;
  uword *p;
  vnet_hw_interface_t *hw;
  memif_socket_file_t *msf = 0;
  u8 *socket_filename;
  int rv = 0;

  if (args->socket_filename == 0 || args->socket_filename[0] != '/')
    {
      rv = mkdir (MEMIF_DEFAULT_SOCKET_DIR, 0755);
      if (rv && errno != EEXIST)
	return VNET_API_ERROR_SYSCALL_ERROR_1;

      if (args->socket_filename == 0)
	socket_filename = format (0, "%s/%s%c", MEMIF_DEFAULT_SOCKET_DIR,
				  MEMIF_DEFAULT_SOCKET_FILENAME, 0);
      else
	socket_filename = format (0, "%s/%s%c", MEMIF_DEFAULT_SOCKET_DIR,
				  args->socket_filename, 0);

    }
  else
    socket_filename = vec_dup (args->socket_filename);

  p = mhash_get (&mm->socket_file_index_by_filename, socket_filename);

  if (p)
    {
      msf = vec_elt_at_index (mm->socket_files, p[0]);

      /* existing socket file can be either master or slave but cannot be both */
      if (!msf->is_listener != !args->is_master)
	{
	  rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
	  goto done;
	}

      p = mhash_get (&msf->dev_instance_by_id, &args->id);
      if (p)
	{
	  rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
	  goto done;
	}
    }

  /* Create new socket file */
  if (msf == 0)
    {
      struct stat file_stat;
      /* If we are creating listener make sure file doesn't exist or if it
       * exists thn delete it if it is old socket file */
      if (args->is_master &&
	  (stat ((char *) socket_filename, &file_stat) == 0))
	{
	  if (S_ISSOCK (file_stat.st_mode))
	    {
	      unlink ((char *) socket_filename);
	    }
	  else
	    {
	      ret = VNET_API_ERROR_SYSCALL_ERROR_3;
	      goto error;
	    }
	}
      pool_get (mm->socket_files, msf);
      memset (msf, 0, sizeof (memif_socket_file_t));
      mhash_init (&msf->dev_instance_by_id, sizeof (uword),
		  sizeof (memif_interface_id_t));
      msf->dev_instance_by_fd = hash_create (0, sizeof (uword));
      msf->filename = socket_filename;
      msf->fd = -1;
      msf->is_listener = (args->is_master != 0);
      socket_filename = 0;
      mhash_set (&mm->socket_file_index_by_filename, msf->filename,
		 msf - mm->socket_files, 0);
      DBG ("creating socket file %s", msf->filename);
    }

  pool_get (mm->interfaces, mif);
  memset (mif, 0, sizeof (*mif));
  mif->dev_instance = mif - mm->interfaces;
  mif->socket_file_index = msf - mm->socket_files;
  mif->id = args->id;
  mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0;
  mif->conn_unix_file_index = ~0;
  mif->conn_fd = -1;
  mif->mode = args->mode;
  if (args->secret)
    mif->secret = vec_dup (args->secret);

  if (tm->n_vlib_mains > 1)
    clib_spinlock_init (&mif->lockp);


  if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET)
    {

      if (!args->hw_addr_set)
	{
	  f64 now = vlib_time_now (vm);
	  u32 rnd;
	  rnd = (u32) (now * 1e6);
	  rnd = random_u32 (&rnd);

	  memcpy (args->hw_addr + 2, &rnd, sizeof (rnd));
	  args->hw_addr[0] = 2;
	  args->hw_addr[1] = 0xfe;
	}
      error = ethernet_register_interface (vnm, memif_device_class.index,
					   mif->dev_instance, args->hw_addr,
					   &mif->hw_if_index,
					   memif_eth_flag_change);
    }
  else if (mif->mode == MEMIF_INTERFACE_MODE_IP)
    {
      mif->hw_if_index =
	vnet_register_interface (vnm, memif_device_class.index,
				 mif->dev_instance,
				 memif_ip_hw_if_class.index,
				 mif->dev_instance);
    }
  else
    error = clib_error_return (0, "unsupported interface mode");

  if (error)
    {
      clib_error_report (error);
      ret = VNET_API_ERROR_SYSCALL_ERROR_2;
      goto error;
    }

  sw = vnet_get_hw_sw_interface (vnm, mif->hw_if_index);
  mif->sw_if_index = sw->sw_if_index;

  mif->cfg.log2_ring_size = args->log2_ring_size;
  mif->cfg.buffer_size = args->buffer_size;
  mif->cfg.num_s2m_rings =
    args->is_master ? args->rx_queues : args->tx_queues;
  mif->cfg.num_m2s_rings =
    args->is_master ? args->tx_queues : args->rx_queues;

  args->sw_if_index = mif->sw_if_index;

  /* If this is new one, start listening */
  if (msf->is_listener && msf->ref_cnt == 0)
    {
      struct sockaddr_un un = { 0 };
      struct stat file_stat;
      int on = 1;

      if ((msf->fd = socket (AF_UNIX, SOCK_SEQPACKET, 0)) < 0)
	{
	  ret = VNET_API_ERROR_SYSCALL_ERROR_4;
	  goto error;
	}

      un.sun_family = AF_UNIX;
      strncpy ((char *) un.sun_path, (char *) msf->filename,
	       sizeof (un.sun_path) - 1);

      if (setsockopt (msf->fd, SOL_SOCKET, SO_PASSCRED, &on, sizeof (on)) < 0)
	{
	  ret = VNET_API_ERROR_SYSCALL_ERROR_5;
	  goto error;
	}
      if (bind (msf->fd, (struct sockaddr *) &un, sizeof (un)) == -1)
	{
	  ret = VNET_API_ERROR_SYSCALL_ERROR_6;
	  goto error;
	}
      if (listen (msf->fd, 1) == -1)
	{
	  ret = VNET_API_ERROR_SYSCALL_ERROR_7;
	  goto error;
	}

      if (stat ((char *) msf->filename, &file_stat) == -1)
	{
	  ret = VNET_API_ERROR_SYSCALL_ERROR_8;
	  goto error;
	}

      msf->unix_file_index = ~0;
      unix_file_t template = { 0 };
      template.read_function = memif_conn_fd_accept_ready;
      template.file_descriptor = msf->fd;
      template.private_data = mif->socket_file_index;
      memif_file_add (&msf->unix_file_index, &template);
    }

  msf->ref_cnt++;

  if (args->is_master == 0)
    mif->flags |= MEMIF_IF_FLAG_IS_SLAVE;

  hw = vnet_get_hw_interface (vnm, mif->hw_if_index);
  hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
  vnet_hw_interface_set_input_node (vnm, mif->hw_if_index,
				    memif_input_node.index);

  mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0);

  if (pool_elts (mm->interfaces) == 1)
    {
      vlib_process_signal_event (vm, memif_process_node.index,
				 MEMIF_PROCESS_EVENT_START, 0);
    }
  goto done;

error:
  if (mif->hw_if_index != ~0)
    {
      if (mif->mode == MEMIF_INTERFACE_MODE_IP)
	vnet_delete_hw_interface (vnm, mif->hw_if_index);
      else
	ethernet_delete_interface (vnm, mif->hw_if_index);
      mif->hw_if_index = ~0;
    }
  memif_delete_if (vm, mif);
  return ret;

done:
  vec_free (socket_filename);
  return rv;
}


static clib_error_t *
memif_init (vlib_main_t * vm)
{
  memif_main_t *mm = &memif_main;
  vlib_thread_main_t *tm = vlib_get_thread_main ();

  memset (mm, 0, sizeof (memif_main_t));

  /* initialize binary API */
  memif_plugin_api_hookup (vm);

  mhash_init_c_string (&mm->socket_file_index_by_filename, sizeof (uword));

  vec_validate_aligned (mm->rx_buffers, tm->n_vlib_mains - 1,
			CLIB_CACHE_LINE_BYTES);

  return 0;
}

VLIB_INIT_FUNCTION (memif_init);

/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
    .version = VPP_BUILD_VER,
    .description = "Packet Memory Interface (experimetal)",
};
/* *INDENT-ON* */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */