summaryrefslogtreecommitdiffstats
path: root/src/vlib/unix/mc_socket.c
blob: 9800b1e744c56ed661c2f8c3d76ccb94a61ffd37 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498

@media only all and (prefers-color-scheme: dark) {
.highlight .hll { background-color: #49483e }
.highlight .c { color: #75715e } /* Comment */
.highlight .err { color: #960050; background-color: #1e0010 } /* Error */
.highlight .k { color: #66d9ef } /* Keyword */
.highlight .l { color: #ae81ff } /* Literal */
.highlight .n { color: #f8f8f2 } /* Name */
.highlight .o { color: #f92672 } /* Operator */
.highlight .p { color: #f8f8f2 } /* Punctuation */
.highlight .ch { color: #75715e } /* Comment.Hashbang */
.highlight .cm { color: #75715e } /* Comment.Multiline */
.highlight .cp { color: #75715e } /* Comment.Preproc */
.highlight .cpf { color: #75715e } /* Comment.PreprocFile */
.highlight .c1 { color: #75715e } /* Comment.Single */
.highlight .cs { color: #75715e } /* Comment.Special */
.highlight .gd { color: #f92672 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gi { color: #a6e22e } /* Generic.Inserted */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #75715e } /* Generic.Subheading */
.highlight .kc { color: #66d9ef } /* Keyword.Constant */
.highlight .kd { color: #66d9ef } /* Keyword.Declaration */
.highlight .kn { color: #f92672 } /* Keyword.Namespace */
.highlight .kp { color: #66d9ef } /* Keyword.Pseudo */
.highlight .kr { color: #66d9ef } /* Keyword.Reserved */
.highlight .kt { color: #66d9ef } /* Keyword.Type */
.highlight .ld { color: #e6db74 } /* Literal.Date */
.highlight .m { color: #ae81ff } /* Literal.Number */
.highlight .s { color: #e6db74 } /* Literal.String */
.highlight .na { color: #a6e22e } /* Name.Attribute */
.highlight .nb { color: #f8f8f2 } /* Name.Builtin */
.highlight .nc { color: #a6e22e } /* Name.Class */
.highlight .no { color: #66d9ef } /* Name.Constant */
.highlight .nd { color: #a6e22e } /* Name.Decorator */
.highlight .ni { color: #f8f8f2 } /* Name.Entity */
.highlight .ne { color: #a6e22e } /* Name.Exception */
.highlight .nf { color: #a6e22e } /* Name.Function */
.highlight .nl { color: #f8f8f2 } /* Name.Label */
.highlight .nn { color: #f8f8f2 } /* Name.Namespace */
.highlight .nx { color: #a6e22e } /* Name.Other */
.highlight .py { color: #f8f8f2 } /* Name.Property */
.highlight .nt { color: #f92672 } /* Name.Tag */
.highlight .nv { color: #f8f8f2 } /* Name.Variable */
.highlight .ow { color: #f92672 } /* Operator.Word */
.highlight .w { color: #f8f8f2 } /* Text.Whitespace */
.highlight .mb { color: #ae81ff } /* Literal.Number.Bin */
.highlight .mf { color: #ae81ff } /* Literal.Number.Float */
.highlight .mh { color: #ae81ff } /* Literal.Number.Hex */
.highlight .mi { color: #ae81ff } /* Literal.Number.Integer */
.highlight .mo { color: #ae81ff } /* Literal.Number.Oct */
.highlight .sa { color: #e6db74 } /* Literal.String.Affix */
.highlight .sb { color: #e6db74 } /* Literal.String.Backtick */
.highlight .sc { color: #e6db74 } /* Literal.String.Char */
.highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */
.highlight .sd { color: #e6db74 } /* Literal.String.Doc */
.highlight .s2 { color: #e6db74 } /* Literal.String.Double */
.highlight .se { color: #ae81ff } /* Literal.String.Escape */
.highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */
.highlight .si { color: #e6db74 } /* Literal.String.Interpol */
.highlight .sx { color: #e6db74 } /* Literal.String.Other */
.highlight .sr { color: #e6db74 } /* Literal.String.Regex */
.highlight .s1 { color: #e6db74 } /* Literal.String.Single */
.highlight .ss { color: #e6db74 } /* Literal.String.Symbol */
.highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #a6e22e } /* Name.Function.Magic */
.highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */
.highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */
.highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */
.highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */
.highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */
}
@media (prefers-color-scheme: light) {
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
}
#!/usr/bin/env python3
""" L2BD ARP term Test """

import unittest
import random
import copy

from socket import AF_INET, AF_INET6

from scapy.packet import Raw
from scapy.layers.l2 import Ether, ARP
from scapy.layers.inet import IP
from scapy.utils import inet_pton, inet_ntop
from scapy.utils6 import in6_getnsma, in6_getnsmac, in6_ptop, in6_islladdr, \
    in6_mactoifaceid, in6_ismaddr
from scapy.layers.inet6 import IPv6, UDP, ICMPv6ND_NS, ICMPv6ND_RS, \
    ICMPv6ND_RA, ICMPv6NDOptSrcLLAddr, getmacbyip6, ICMPv6MRD_Solicitation, \
    ICMPv6NDOptMTU, ICMPv6NDOptSrcLLAddr, ICMPv6NDOptPrefixInfo, \
    ICMPv6ND_NA, ICMPv6NDOptDstLLAddr, ICMPv6DestUnreach, icmp6types

from framework import VppTestCase, VppTestRunner
from util import Host, ppp


class TestL2bdArpTerm(VppTestCase):
    """ L2BD arp termination Test Case """

    @classmethod
    def setUpClass(cls):
        """
        Perform standard class setup (defined by class method setUpClass in
        class VppTestCase) before running the test case, set test case related
        variables and configure VPP.
        """
        super(TestL2bdArpTerm, cls).setUpClass()

        try:
            # Create pg interfaces
            n_bd = 1
            cls.ifs_per_bd = ifs_per_bd = 3
            n_ifs = n_bd * ifs_per_bd
            cls.create_pg_interfaces(range(n_ifs))

            # Set up all interfaces
            for i in cls.pg_interfaces:
                i.admin_up()

            cls.hosts = set()

        except Exception:
            super(TestL2bdArpTerm, cls).tearDownClass()
            raise

    @classmethod
    def tearDownClass(cls):
        super(TestL2bdArpTerm, cls).tearDownClass()

    def setUp(self):
        """
        Clear trace and packet infos before running each test.
        """
        self.reset_packet_infos()
        super(TestL2bdArpTerm, self).setUp()

    def tearDown(self):
        """
        Show various debug prints after each test.
        """
        super(TestL2bdArpTerm, self).tearDown()

    def show_commands_at_teardown(self):
        self.logger.info(self.vapi.ppcli("show l2fib verbose"))
        # many tests delete bridge-domain 1 as the last task.  don't output
        # the details of a non-existent bridge-domain.
        if self.vapi.l2_fib_table_dump(bd_id=1):
            self.logger.info(self.vapi.ppcli("show bridge-domain 1 detail"))

    def add_del_arp_term_hosts(self, entries, bd_id=1, is_add=1, is_ipv6=0):
        for e in entries:
            ip = e.ip4 if is_ipv6 == 0 else e.ip6
            self.vapi.bd_ip_mac_add_del(is_add=is_add,
                                        entry={
                                            'bd_id': bd_id,
                                            'ip': ip,
                                            'mac': e.mac})

    @classmethod
    def mac_list(cls, b6_range):
        return ["00:00:ca:fe:00:%02x" % b6 for b6 in b6_range]

    @classmethod
    def ip4_host(cls, subnet, host, mac):
        return Host(mac=mac,
                    ip4="172.17.1%02u.%u" % (subnet, host))

    @classmethod
    def ip4_hosts(cls, subnet, start, mac_list):
        return {cls.ip4_host(subnet, start + j, mac_list[j])
                for j in range(len(mac_list))}

    @classmethod
    def ip6_host(cls, subnet, host, mac):
        return Host(mac=mac,
                    ip6="fd01:%x::%x" % (subnet, host))

    @classmethod
    def ip6_hosts(cls, subnet, start, mac_list):
        return {cls.ip6_host(subnet, start + j, mac_list[j])
                for j in range(len(mac_list))}

    @classmethod
    def bd_swifs(cls, b):
        n = cls.ifs_per_bd
        start = (b - 1) * n
        return [cls.pg_interfaces[j] for j in range(start, start + n)]

    def bd_add_del(self, bd_id=1, is_add=1):
        if is_add:
            self.vapi.bridge_domain_add_del(bd_id=bd_id, is_add=is_add)
        for swif in self.bd_swifs(bd_id):
            swif_idx = swif.sw_if_index
            self.vapi.sw_interface_set_l2_bridge(rx_sw_if_index=swif_idx,
                                                 bd_id=bd_id, enable=is_add)
        if not is_add:
            self.vapi.bridge_domain_add_del(bd_id=bd_id, is_add=is_add)

    @classmethod
    def arp_req(cls, src_host, host):
        return (Ether
/*
 * mc_socket.c: socket based multicast for vlib mc
 *
 * Copyright (c) 2010 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <vlib/vlib.h>
#include <vlib/unix/mc_socket.h>

#include <sys/ioctl.h>		/* for FIONBIO */
#include <netinet/tcp.h>	/* for TCP_NODELAY */
#include <net/if.h>		/* for struct ifreq */

static u8 *
format_socket_peer_id (u8 * s, va_list * args)
{
  u64 peer_id_as_u64 = va_arg (*args, u64);
  mc_peer_id_t peer_id;
  peer_id.as_u64 = peer_id_as_u64;
  u32 a = mc_socket_peer_id_get_address (peer_id);
  u32 p = mc_socket_peer_id_get_port (peer_id);

  s = format (s, "%U:%04x", format_network_address, AF_INET, &a, ntohs (p));

  return s;
}

typedef void (mc_msg_handler_t) (mc_main_t * mcm, void *msg,
				 u32 buffer_index);

always_inline void
msg_handler (mc_main_t * mcm,
	     u32 buffer_index, u32 handler_frees_buffer, void *_h)
{
  vlib_main_t *vm = mcm->vlib_main;
  mc_msg_handler_t *h = _h;
  vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
  void *the_msg = vlib_buffer_get_current (b);

  h (mcm, the_msg, buffer_index);
  if (!handler_frees_buffer)
    vlib_buffer_free_one (vm, buffer_index);
}

static uword
append_buffer_index_to_iovec (vlib_main_t * vm,
			      u32 buffer_index, struct iovec **iovs_return)
{
  struct iovec *i;
  vlib_buffer_t *b;
  u32 bi = buffer_index;
  u32 l = 0;

  while (1)
    {
      b = vlib_get_buffer (vm, bi);
      vec_add2 (*iovs_return, i, 1);
      i->iov_base = vlib_buffer_get_current (b);
      i->iov_len = b->current_length;
      l += i->iov_len;
      if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
	break;
      bi = b->next_buffer;
    }

  return l;
}

static clib_error_t *
sendmsg_helper (mc_socket_main_t * msm,
		int socket, struct sockaddr_in *tx_addr, u32 buffer_index)
{
  vlib_main_t *vm = msm->mc_main.vlib_main;
  struct msghdr h;
  word n_bytes, n_bytes_tx, n_retries;

  clib_memset (&h, 0, sizeof (h));
  h.msg_name = tx_addr;
  h.msg_namelen = sizeof (tx_addr[0]);

  if (msm->iovecs)
    _vec_len (msm->iovecs) = 0;

  n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs);
  ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size);
  if (n_bytes > msm->mc_main.transport.max_packet_size)
    clib_error ("sending packet larger than interface MTU %d bytes", n_bytes);

  h.msg_iov = msm->iovecs;
  h.msg_iovlen = vec_len (msm->iovecs);

  n_retries = 0;
  while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes
	 && errno == EAGAIN)
    n_retries++;
  if (n_bytes_tx != n_bytes)
    {
      clib_unix_warning ("sendmsg");
      return 0;
    }
  if (n_retries)
    {
      ELOG_TYPE_DECLARE (e) =
      {
      .format = "sendmsg-helper: %d retries",.format_args = "i4",};
      struct
      {
	u32 retries;
      } *ed = 0;

      ed = ELOG_DATA (&vm->elog_main, e);
      ed->retries = n_retries;
    }
  return 0;
}

static clib_error_t *
tx_buffer (void *transport, mc_transport_type_t type, u32 buffer_index)
{
  mc_socket_main_t *msm = (mc_socket_main_t *) transport;
  vlib_main_t *vm = msm->mc_main.vlib_main;
  mc_multicast_socket_t *ms = &msm->multicast_sockets[type];
  clib_error_t *error;
  error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index);
  if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY)
    vlib_buffer_free_one (vm, buffer_index);
  return error;
}

static clib_error_t *
tx_ack (void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index)
{
  struct sockaddr_in tx_addr;
  mc_socket_main_t *msm = (mc_socket_main_t *) transport;
  vlib_main_t *vm = msm->mc_main.vlib_main;
  clib_error_t *error;

  clib_memset (&tx_addr, 0, sizeof (tx_addr));
  tx_addr.sin_family = AF_INET;
  tx_addr.sin_addr.s_addr = mc_socket_peer_id_get_address (dest_peer_id);
  tx_addr.sin_port = mc_socket_peer_id_get_port (dest_peer_id);

  error = sendmsg_helper (msm, msm->ack_socket, &tx_addr, buffer_index);
  vlib_buffer_free_one (vm, buffer_index);
  return error;
}

static clib_error_t *
recvmsg_helper (mc_socket_main_t * msm,
		int socket,
		struct sockaddr_in *rx_addr,
		u32 * buffer_index, u32 drop_message)
{
  vlib_main_t *vm = msm->mc_main.vlib_main;
  vlib_buffer_t *b;
  uword n_left, n_alloc, n_mtu, i, i_rx;
  const uword buffer_size = vlib_buffer_get_default_data_size (vm);
  word n_bytes_left;

  /* Make sure we have at least a MTU worth of buffers. */
  n_mtu = msm->rx_mtu_n_buffers;
  n_left = vec_len (msm->rx_buffers);
  if (n_left < n_mtu)
    {
      uword max_alloc = 8 * n_mtu;
      vec_validate (msm->rx_buffers, max_alloc - 1);
      n_alloc =
	vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left);
      _vec_len (msm->rx_buffers) = n_left + n_alloc;
    }

  ASSERT (vec_len (msm->rx_buffers) >= n_mtu);
  vec_validate (msm->iovecs, n_mtu - 1);

  /* Allocate RX buffers from end of rx_buffers.
     Turn them into iovecs to pass to readv. */
  i_rx = vec_len (msm->rx_buffers) - 1;
  for (i = 0; i < n_mtu; i++)
    {
      b = vlib_get_buffer (vm, msm->rx_buffers[i_rx - i]);
      msm->iovecs[i].iov_base = b->data;
      msm->iovecs[i].iov_len = buffer_size;
    }
  _vec_len (msm->iovecs) = n_mtu;

  {
    struct msghdr h;

    clib_memset (&h, 0, sizeof (h));
    if (rx_addr)
      {
	h.msg_name = rx_addr;
	h.msg_namelen = sizeof (rx_addr[0]);
      }
    h.msg_iov = msm->iovecs;
    h.msg_iovlen = vec_len (msm->iovecs);

    n_bytes_left = recvmsg (socket, &h, 0);
    if (n_bytes_left < 0)
      return clib_error_return_unix (0, "recvmsg");
  }

  if (drop_message)
    {
      *buffer_index = ~0;
      return 0;
    }

  *buffer_index = msm->rx_buffers[i_rx];
  while (1)
    {
      b = vlib_get_buffer (vm, msm->rx_buffers[i_rx]);

      b->flags = 0;
      b->current_data = 0;
      b->current_length =
	n_bytes_left < buffer_size ? n_bytes_left : buffer_size;

      n_bytes_left -= buffer_size;

      if (n_bytes_left <= 0)
	break;

      i_rx--;
      b->flags |= VLIB_BUFFER_NEXT_PRESENT;
      b->next_buffer = msm->rx_buffers[i_rx];
    }

  _vec_len (msm->rx_buffers) = i_rx;

  return 0 /* no error */ ;
}

static clib_error_t *
mastership_socket_read_ready (clib_file_t * uf)
{
  mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
  mc_main_t *mcm = &msm->mc_main;
  mc_multicast_socket_t *ms =
    &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP];
  clib_error_t *error;
  u32 bi = 0;

  error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi,	/* drop_message */
			  0);
  if (!error)
    msg_handler (mcm, bi,
		 /* handler_frees_buffer */ 0,
		 mc_msg_master_assert_handler);

  return error;
}

static clib_error_t *
to_relay_socket_read_ready (clib_file_t * uf)
{
  mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
  mc_main_t *mcm = &msm->mc_main;
  vlib_main_t *vm = msm->mc_main.vlib_main;
  mc_multicast_socket_t *ms_to_relay =
    &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY];
  mc_multicast_socket_t *ms_from_relay =
    &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
  clib_error_t *error;
  u32 bi = 0;
  u32 is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;

  /* Not the ordering master? Turf the msg */
  error = recvmsg_helper (msm, ms_to_relay->socket, /* rx_addr */ 0, &bi,
			  /* drop_message */ !is_master);

  /* If we are the master, number and rebroadcast the msg. */
  if (!error && is_master)
    {
      vlib_buffer_t *b = vlib_get_buffer (vm, bi);
      mc_msg_user_request_t *mp = vlib_buffer_get_current (b);
      mp->global_sequence = clib_host_to_net_u32 (mcm->relay_global_sequence);
      mcm->relay_global_sequence++;
      error =
	sendmsg_helper (msm, ms_from_relay->socket, &ms_from_relay->tx_addr,
			bi);
      vlib_buffer_free_one (vm, bi);
    }

  return error;
}

static clib_error_t *
from_relay_socket_read_ready (clib_file_t * uf)
{
  mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
  mc_main_t *mcm = &msm->mc_main;
  mc_multicast_socket_t *ms =
    &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
  clib_error_t *error;
  u32 bi = 0;

  error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi,	/* drop_message */
			  0);
  if (!error)
    {
      msg_handler (mcm, bi, /* handler_frees_buffer */ 1,
		   mc_msg_user_request_handler);
    }
  return error;
}

static clib_error_t *
join_socket_read_ready (clib_file_t * uf)
{
  mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
  mc_main_t *mcm = &msm->mc_main;
  vlib_main_t *vm = mcm->vlib_main;
  mc_multicast_socket_t *ms = &msm->multicast_sockets[MC_TRANSPORT_JOIN];
  clib_error_t *error;
  u32 bi = 0;

  error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi,	/* drop_message */
			  0);
  if (!error)
    {
      vlib_buffer_t *b = vlib_get_buffer (vm, bi);
      mc_msg_join_or_leave_request_t *mp = vlib_buffer_get_current (b);

      switch (clib_host_to_net_u32 (mp->type))
	{
	case MC_MSG_TYPE_join_or_leave_request:
	  msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
		       mc_msg_join_or_leave_request_handler);
	  break;

	case MC_MSG_TYPE_join_reply:
	  msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
		       mc_msg_join_reply_handler);
	  break;

	default:
	  ASSERT (0);
	  break;
	}
    }
  return error;
}

static clib_error_t *
ack_socket_read_ready (clib_file_t * uf)
{
  mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
  mc_main_t *mcm = &msm->mc_main;
  clib_error_t *error;
  u32 bi = 0;

  error = recvmsg_helper (msm, msm->ack_socket, /* rx_addr */ 0, &bi,
			  /* drop_message */ 0);
  if (!error)
    msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
		 mc_msg_user_ack_handler);
  return error;
}

static void
catchup_cleanup (mc_socket_main_t * msm,
		 mc_socket_catchup_t * c, clib_file_main_t * um,
		 clib_file_t * uf)
{
  hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor);
  clib_file_del (um, uf);
  vec_free (c->input_vector);
  vec_free (c->output_vector);
  pool_put (msm->catchups, c);
}

static mc_socket_catchup_t *
find_catchup_from_file_descriptor (mc_socket_main_t * msm,
				   int file_descriptor)
{
  uword *p =
    hash_get (msm->catchup_index_by_file_descriptor, file_descriptor);
  return p ? pool_elt_at_index (msm->catchups, p[0]) : 0;
}

static clib_error_t *
catchup_socket_read_ready (clib_file_t * uf, int is_server)
{
  clib_file_main_t *um = &file_main;
  mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
  mc_main_t *mcm = &msm->mc_main;
  mc_socket_catchup_t *c =
    find_catchup_from_file_descriptor (msm, uf->file_descriptor);
  word l, n, is_eof;

  l = vec_len (c->input_vector);
  vec_resize (c->input_vector, 4096);
  n =
    read (uf->file_descriptor, c->input_vector + l,
	  vec_len (c->input_vector) - l);
  is_eof = n == 0;

  if (n < 0)
    {
      if (errno == EAGAIN)
	n = 0;
      else
	{
	  catchup_cleanup (msm, c, um, uf);
	  return clib_error_return_unix (0, "read");
	}
    }

  _vec_len (c->input_vector) = l + n;

  if (is_eof && vec_len (c->input_vector) > 0)
    {
      if (is_server)
	{
	  mc_msg_catchup_request_handler (mcm, (void *) c->input_vector,
					  c - msm->catchups);
	  _vec_len (c->input_vector) = 0;
	}
      else
	{
	  mc_msg_catchup_reply_handler (mcm, (void *) c->input_vector,
					c - msm->catchups);
	  c->input_vector = 0;	/* reply handler is responsible for freeing vector */
	  catchup_cleanup (msm, c, um, uf);
	}
    }

  return 0 /* no error */ ;
}

static clib_error_t *
catchup_server_read_ready (clib_file_t * uf)
{
  return catchup_socket_read_ready (uf, /* is_server */ 1);
}

static clib_error_t *
catchup_client_read_ready (clib_file_t * uf)
{
  if (MC_EVENT_LOGGING)
    {
      mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
      vlib_main_t *vm = msm->mc_main.vlib_main;

      ELOG_TYPE (e, "catchup_client_read_ready");
      ELOG (&vm->elog_main, e, 0);
    }
  return catchup_socket_read_ready (uf, /* is_server */ 0);
}

static clib_error_t *
catchup_socket_write_ready (clib_file_t * uf, int is_server)
{
  clib_file_main_t *um = &file_main;
  mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
  mc_socket_catchup_t *c =
    find_catchup_from_file_descriptor (msm, uf->file_descriptor);
  clib_error_t *error = 0;
  int n;

  if (c->connect_in_progress)
    {
      u32 len, value;

      c->connect_in_progress = 0;
      len = sizeof (value);
      if (getsockopt (c->socket, SOL_SOCKET, SO_ERROR, &value, &len) < 0)
	{
	  error = clib_error_return_unix (0, "getsockopt SO_ERROR");
	  goto error_quit;
	}
      if (value != 0)
	{
	  error =
	    clib_error_return_code (0, value, CLIB_ERROR_ERRNO_VALID,
				    "connect fails");
	  goto error_quit;
	}
    }

  while (1)
    {
      u32 n_this_write;

      n_this_write =
	clib_min (vec_len (c->output_vector) - c->output_vector_n_written,
		  msm->rx_mtu_n_bytes -
		  64 /* ip + tcp + option allowance */ );

      if (n_this_write <= 0)
	break;

      do
	{
	  n = write (uf->file_descriptor,
		     c->output_vector + c->output_vector_n_written,
		     n_this_write);
	}
      while (n < 0 && errno == EAGAIN);

      if (n < 0)
	{
	  error = clib_error_return_unix (0, "write");
	  goto error_quit;
	}
      c->output_vector_n_written += n;
    }

  if (c->output_vector_n_written >= vec_len (c->output_vector))
    {
      if (!is_server)
	{
	  uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
	  file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
	  /* Send EOF to other side. */
	  shutdown (uf->file_descriptor, SHUT_WR);
	  return error;
	}
      else
	{
	error_quit:
	  catchup_cleanup (msm, c, um, uf);
	}
    }
  return error;
}

static clib_error_t *
catchup_server_write_ready (clib_file_t * uf)
{
  return catchup_socket_write_ready (uf, /* is_server */ 1);
}

static clib_error_t *
catchup_client_write_ready (clib_file_t * uf)
{
  return catchup_socket_write_ready (uf, /* is_server */ 0);
}

static clib_error_t *
catchup_socket_error_ready (clib_file_t * uf)
{
  clib_file_main_t *um = &file_main;
  mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
  mc_socket_catchup_t *c =
    find_catchup_from_file_descriptor (msm, uf->file_descriptor);
  catchup_cleanup (msm, c, um, uf);
  return clib_error_return (0, "error");
}

static clib_error_t *
catchup_listen_read_ready (clib_file_t * uf)
{
  mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
  struct sockaddr_in client_addr;
  int client_len;
  mc_socket_catchup_t *c;
  clib_file_t template = { 0 };

  pool_get (msm->catchups, c);
  clib_memset (c, 0, sizeof (c[0]));

  client_len = sizeof (client_addr);

  /* Acquires the non-blocking attrib from the server socket. */
  c->socket = accept (uf->file_descriptor,
		      (struct sockaddr *) &client_addr,
		      (socklen_t *) & client_len);

  if (c->socket < 0)
    {
      pool_put (msm->catchups, c);
      return clib_error_return_unix (0, "accept");
    }

  if (MC_EVENT_LOGGING)
    {
      mc_main_t *mcm = &msm->mc_main;
      vlib_main_t *vm = mcm->vlib_main;

      ELOG_TYPE_DECLARE (e) =
      {
      .format = "catchup accepted from 0x%lx",.format_args = "i4",};
      struct
      {
	u32 addr;
      } *ed = 0;

      ed = ELOG_DATA (&vm->elog_main, e);
      ed->addr = ntohl (client_addr.sin_addr.s_addr);
    }

  /* Disable the Nagle algorithm, ship catchup pkts immediately */
  {
    int one = 1;
    if ((setsockopt (c->socket, IPPROTO_TCP,
		     TCP_NODELAY, (void *) &one, sizeof (one))) < 0)
      {
	clib_unix_warning ("catchup socket: set TCP_NODELAY");
      }
  }

  template.read_function = catchup_server_read_ready;
  template.write_function = catchup_server_write_ready;
  template.error_function = catchup_socket_error_ready;
  template.file_descriptor = c->socket;
  template.description = format (0, "multicast catchup socket");
  template.private_data = pointer_to_uword (msm);
  c->clib_file_index = clib_file_add (&file_main, &template);
  hash_set (msm->catchup_index_by_file_descriptor, c->socket,
	    c - msm->catchups);

  return 0;
}

/* Return and bind to an unused port. */
static word
find_and_bind_to_free_port (word sock, word port)
{
  for (; port < 1 << 16; port++)
    {
      struct sockaddr_in a;

      clib_memset (&a, 0, sizeof (a));	/* Warnings be gone */

      a.sin_family = PF_INET;
      a.sin_addr.s_addr = INADDR_ANY;
      a.sin_port = htons (port);

      if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
	break;
    }

  return port < 1 << 16 ? port : -1;
}

static clib_error_t *
setup_mutlicast_socket (mc_socket_main_t * msm,
			mc_multicast_socket_t * ms,
			char *type, uword udp_port)
{
  int one = 1;
  struct ip_mreq mcast_req;

  if (!msm->multicast_ttl)
    msm->multicast_ttl = 1;

  /* mastership (multicast) TX socket */
  if ((ms->socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
    return clib_error_return_unix (0, "%s socket", type);

  {
    u8 ttl = msm->multicast_ttl;

    if ((setsockopt (ms->socket, IPPROTO_IP,
		     IP_MULTICAST_TTL, (void *) &ttl, sizeof (ttl))) < 0)
      return clib_error_return_unix (0, "%s set multicast ttl", type);
  }

  if (setsockopt (ms->socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one)) <
      0)
    return clib_error_return_unix (0, "%s setsockopt SO_REUSEADDR", type);

  clib_memset (&ms->tx_addr, 0, sizeof (ms->tx_addr));
  ms->tx_addr.sin_family = AF_INET;
  ms->tx_addr.sin_addr.s_addr =
    htonl (msm->multicast_tx_ip4_address_host_byte_order);
  ms->tx_addr.sin_port = htons (udp_port);

  if (bind (ms->socket, (struct sockaddr *) &ms->tx_addr,
	    sizeof (ms->tx_addr)) < 0)
    return clib_error_return_unix (0, "%s bind", type);

  clib_memset (&mcast_req, 0, sizeof (mcast_req));
  mcast_req.imr_multiaddr.s_addr =
    htonl (msm->multicast_tx_ip4_address_host_byte_order);
  mcast_req.imr_interface.s_addr = msm->if_ip4_address_net_byte_order;

  if ((setsockopt (ms->socket, IPPROTO_IP,
		   IP_ADD_MEMBERSHIP, (void *) &mcast_req,
		   sizeof (mcast_req))) < 0)
    return clib_error_return_unix (0, "%s IP_ADD_MEMBERSHIP setsockopt",
				   type);

  if (ioctl (ms->socket, FIONBIO, &one) < 0)
    return clib_error_return_unix (0, "%s set FIONBIO", type);

  /* FIXME remove this when we support tx_ready. */
  {
    u32 len = 1 << 20;
    socklen_t sl = sizeof (len);
    if (setsockopt (ms->socket, SOL_SOCKET, SO_SNDBUF, &len, sl) < 0)
      clib_unix_error ("setsockopt");
  }

  return 0;
}

static clib_error_t *
socket_setup (mc_socket_main_t * msm)
{
  int one = 1;
  clib_error_t *error;
  u32 port;

  if (!msm->base_multicast_udp_port_host_byte_order)
    msm->base_multicast_udp_port_host_byte_order =
      0xffff - ((MC_N_TRANSPORT_TYPE + 2 /* ack socket, catchup socket */ )
		- 1);

  port = msm->base_multicast_udp_port_host_byte_order;

  error = setup_mutlicast_socket (msm,
				  &msm->multicast_sockets
				  [MC_TRANSPORT_MASTERSHIP], "mastership",
				  port++);
  if (error)
    return error;

  error = setup_mutlicast_socket (msm,
				  &msm->multicast_sockets[MC_TRANSPORT_JOIN],
				  "join", port++);
  if (error)
    return error;

  error = setup_mutlicast_socket (msm,
				  &msm->multicast_sockets
				  [MC_TRANSPORT_USER_REQUEST_TO_RELAY],
				  "to relay", port++);
  if (error)
    return error;

  error = setup_mutlicast_socket (msm,
				  &msm->multicast_sockets
				  [MC_TRANSPORT_USER_REQUEST_FROM_RELAY],
				  "from relay", port++);
  if (error)
    return error;

  /* ACK rx socket */
  msm->ack_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP);
  if (msm->ack_socket < 0)
    return clib_error_return_unix (0, "ack socket");

  msm->ack_udp_port = find_and_bind_to_free_port (msm->ack_socket, port++);

  if (ioctl (msm->ack_socket, FIONBIO, &one) < 0)
    return clib_error_return_unix (0, "ack socket FIONBIO");

  msm->catchup_server_socket = socket (AF_INET, SOCK_STREAM, 0);
  if (msm->catchup_server_socket < 0)
    return clib_error_return_unix (0, "catchup server socket");

  msm->catchup_tcp_port =
    find_and_bind_to_free_port (msm->catchup_server_socket, port++);

  if (ioctl (msm->catchup_server_socket, FIONBIO, &one) < 0)
    return clib_error_return_unix (0, "catchup server socket FIONBIO");

  if (listen (msm->catchup_server_socket, 5) < 0)
    return clib_error_return_unix (0, "catchup server socket listen");

  /* epoll setup for multicast mastership socket */
  {
    clib_file_t template = { 0 };

    template.read_function = mastership_socket_read_ready;
    template.file_descriptor =
      msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket;
    template.private_data = (uword) msm;
    clib_file_add (&file_main, &template);

    /* epoll setup for multicast to_relay socket */
    template.read_function = to_relay_socket_read_ready;
    template.file_descriptor =
      msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket;
    template.private_data = (uword) msm;
    template.description = format (0, "multicast to_relay socket");
    clib_file_add (&file_main, &template);

    /* epoll setup for multicast from_relay socket */
    template.read_function = from_relay_socket_read_ready;
    template.file_descriptor =
      msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket;
    template.private_data = (uword) msm;
    template.description = format (0, "multicast from_relay socket");
    clib_file_add (&file_main, &template);

    template.read_function = join_socket_read_ready;
    template.file_descriptor =
      msm->multicast_sockets[MC_TRANSPORT_JOIN].socket;
    template.private_data = (uword) msm;
    template.description = format (0, "multicast join socket");
    clib_file_add (&file_main, &template);

    /* epoll setup for ack rx socket */
    template.read_function = ack_socket_read_ready;
    template.file_descriptor = msm->ack_socket;
    template.private_data = (uword) msm;
    template.description = format (0, "multicast ack rx socket");
    clib_file_add (&file_main, &template);

    /* epoll setup for TCP catchup server */
    template.read_function = catchup_listen_read_ready;
    template.file_descriptor = msm->catchup_server_socket;
    template.private_data = (uword) msm;
    template.description = format (0, "multicast tcp catchup socket");
    clib_file_add (&file_main, &template);
  }

  return 0;
}

static void *
catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes,
			    u8 * set_output_vector)
{
  clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
				       c->clib_file_index);
  u8 *result = 0;

  if (set_output_vector)
    c->output_vector = set_output_vector;
  else
    vec_add2 (c->output_vector, result, n_bytes);
  if (vec_len (c->output_vector) > 0)
    {
      int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
      uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
      if (!skip_update)
	file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
    }
  return result;
}

static uword
catchup_request_fun (void *transport_main,
		     u32 stream_index, mc_peer_id_t catchup_peer_id)
{
  mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
  mc_main_t *mcm = &msm->mc_main;
  vlib_main_t *vm = mcm->vlib_main;
  mc_socket_catchup_t *c;
  struct sockaddr_in addr;
  clib_file_main_t *um = &file_main;
  int one = 1;

  pool_get (msm->catchups, c);
  clib_memset (c, 0, sizeof (*c));

  c->socket = socket (AF_INET, SOCK_STREAM, 0);
  if (c->socket < 0)
    {
      clib_unix_warning ("socket");
      return 0;
    }

  if (ioctl (c->socket, FIONBIO, &one) < 0)
    {
      clib_unix_warning ("FIONBIO");
      return 0;
    }

  clib_memset (&addr, 0, sizeof (addr));
  addr.sin_family = AF_INET;
  addr.sin_addr.s_addr = mc_socket_peer_id_get_address (catchup_peer_id);
  addr.sin_port = mc_socket_peer_id_get_port (catchup_peer_id);

  c->connect_in_progress = 1;

  if (MC_EVENT_LOGGING)
    {
      ELOG_TYPE_DECLARE (e) =
      {
      .format = "connecting to peer 0x%Lx",.format_args = "i8",};
      struct
      {
	u64 peer;
      } *ed;
      ed = ELOG_DATA (&vm->elog_main, e);
      ed->peer = catchup_peer_id.as_u64;
    }

  if (connect (c->socket, (const void *) &addr, sizeof (addr))
      < 0 && errno != EINPROGRESS)
    {
      clib_unix_warning ("connect to %U fails",
			 format_socket_peer_id, catchup_peer_id);
      return 0;
    }

  {
    clib_file_t template = { 0 };

    template.read_function = catchup_client_read_ready;
    template.write_function = catchup_client_write_ready;
    template.error_function = catchup_socket_error_ready;
    template.file_descriptor = c->socket;
    template.private_data = (uword) msm;
    template.description = format (0, "multicast socket");
    c->clib_file_index = clib_file_add (um, &template);

    hash_set (msm->catchup_index_by_file_descriptor, c->socket,
	      c - msm->catchups);
  }

  {
    mc_msg_catchup_request_t *mp;
    mp = catchup_add_pending_output (c, sizeof (mp[0]),	/* set_output_vector */
				     0);
    mp->peer_id = msm->mc_main.transport.our_catchup_peer_id;
    mp->stream_index = stream_index;
    mc_byte_swap_msg_catchup_request (mp);
  }

  return c - msm->catchups;
}

static void
catchup_send_fun (void *transport_main, uword opaque, u8 * data)
{
  mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
  mc_socket_catchup_t *c = pool_elt_at_index (msm->catchups, opaque);
  catchup_add_pending_output (c, 0, data);
}

static int
find_interface_ip4_address (char *if_name, u32 * ip4_address, u32 * mtu)
{
  int fd;
  struct ifreq ifr;
  struct sockaddr_in *sa;

  /* Dig up our IP address */
  fd = socket (PF_INET, AF_INET, 0);
  if (fd < 0)
    {
      clib_unix_error ("socket");
      return -1;
    }

  ifr.ifr_addr.sa_family = AF_INET;
  strncpy (ifr.ifr_name, if_name, sizeof (ifr.ifr_name) - 1);
  if (ioctl (fd, SIOCGIFADDR, &ifr) < 0)
    {
      clib_unix_error ("ioctl(SIOCFIGADDR)");
      close (fd);
      return -1;
    }

  sa = (void *) &ifr.ifr_addr;
  clib_memcpy (ip4_address, &sa->sin_addr.s_addr, sizeof (ip4_address[0]));

  if (ioctl (fd, SIOCGIFMTU, &ifr) < 0)
    {
      close (fd);
      return -1;
    }
  if (mtu)
    *mtu = ifr.ifr_mtu - ( /* IP4 header */ 20 + /* UDP header */ 8);

  close (fd);

  return 0;
}

clib_error_t *
mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
		     int n_intfcs_to_probe)
{
  clib_error_t *error;
  mc_main_t *mcm;
  u32 mtu;

  mcm = &msm->mc_main;

  /* 239.255.0.7 */
  if (!msm->multicast_tx_ip4_address_host_byte_order)
    msm->multicast_tx_ip4_address_host_byte_order = 0xefff0007;

  {
    u32 i, a, win;

    win = 0;
    if (msm->multicast_interface_name)
      {
	win =
	  !find_interface_ip4_address (msm->multicast_interface_name, &a,
				       &mtu);
      }
    else
      {
	for (i = 0; i < n_intfcs_to_probe; i++)
	  if (!find_interface_ip4_address (intfc_probe_list[i], &a, &mtu))
	    {
	      win = 1;
	      msm->multicast_interface_name = intfc_probe_list[i];
	      break;
	    }
      }

    if (!win)
      return clib_error_return (0, "can't find interface ip4 address");

    msm->if_ip4_address_net_byte_order = a;
  }

  msm->rx_mtu_n_bytes = mtu;
  msm->rx_mtu_n_buffers =
    msm->rx_mtu_n_bytes / vlib_buffer_get_default_data_size (vm);
  msm->rx_mtu_n_buffers +=
    (msm->rx_mtu_n_bytes % vlib_buffer_get_default_data_size (vm)) != 0;

  error = socket_setup (msm);
  if (error)
    return error;

  mcm->transport.our_ack_peer_id =
    mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
			   msm->ack_udp_port);

  mcm->transport.our_catchup_peer_id =
    mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
			   msm->catchup_tcp_port);

  mcm->transport.tx_buffer = tx_buffer;
  mcm->transport.tx_ack = tx_ack;
  mcm->transport.catchup_request_fun = catchup_request_fun;
  mcm->transport.catchup_send_fun = catchup_send_fun;
  mcm->transport.format_peer_id = format_socket_peer_id;
  mcm->transport.opaque = msm;
  mcm->transport.max_packet_size = mtu;

  mc_main_init (mcm, "socket");

  return error;
}

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */