summaryrefslogtreecommitdiffstats
path: root/test/test_dhcp.py
blob: 04ab2e111ab5bb3a122a91c0f15101b2ff172988 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
#!/usr/bin/env python

import unittest
import socket

from framework import VppTestCase, VppTestRunner

from scapy.layers.l2 import Ether, getmacbyip
from scapy.layers.inet import IP, UDP, ICMP
from scapy.layers.inet6 import IPv6, in6_getnsmac, in6_mactoifaceid
from scapy.layers.dhcp import DHCP, BOOTP, DHCPTypes
from scapy.layers.dhcp6 import DHCP6, DHCP6_Solicit, DHCP6_RelayForward, \
    DHCP6_RelayReply, DHCP6_Advertise, DHCP6OptRelayMsg, DHCP6OptIfaceId, \
    DHCP6OptStatusCode, DHCP6OptVSS, DHCP6OptClientLinkLayerAddr
from socket import AF_INET, AF_INET6
from scapy.utils import inet_pton, inet_ntop
from scapy.utils6 import in6_ptop

DHCP4_CLIENT_PORT = 68
DHCP4_SERVER_PORT = 67
DHCP6_CLIENT_PORT = 547
DHCP6_SERVER_PORT = 546


def mk_ll_addr(mac):

    euid = in6_mactoifaceid(mac)
    addr = "fe80::" + euid
    return addr


class TestDHCP(VppTestCase):
    """ DHCP Test Case """

    def setUp(self):
        super(TestDHCP, self).setUp()

        # create 3 pg interfaces
        self.create_pg_interfaces(range(4))

        # pg0 and 1 are IP configured in VRF 0 and 1.
        # pg2 and 3 are non IP-configured in VRF 0 and 1
        table_id = 0
        for i in self.pg_interfaces[:2]:
            i.admin_up()
            i.set_table_ip4(table_id)
            i.set_table_ip6(table_id)
            i.config_ip4()
            i.resolve_arp()
            i.config_ip6()
            i.resolve_ndp()
            table_id += 1

        table_id = 0
        for i in self.pg_interfaces[2:]:
            i.admin_up()
            i.set_table_ip4(table_id)
            i.set_table_ip6(table_id)
            table_id += 1

    def send_and_assert_no_replies(self, intf, pkts, remark):
        intf.add_stream(pkts)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()
        for i in self.pg_interfaces:
            i.assert_nothing_captured(remark=remark)

    def validate_option_82(self, pkt, intf, ip_addr):
        dhcp = pkt[DHCP]
        found = 0
        data = []

        for i in dhcp.options:
            if type(i) is tuple:
                if i[0] == "relay_agent_Information":
                    #
                    # There are two sb-options present - each of length 6.
                    #
                    data = i[1]
                    self.assertEqual(len(data), 12)

                    #
                    # First sub-option is ID 1, len 4, then encoded
                    #  sw_if_index. This test uses low valued indicies
                    # so [2:4] are 0.
                    # The ID space is VPP internal - so no matching value
                    # scapy
                    #
                    self.assertEqual(ord(data[0]), 1)
                    self.assertEqual(ord(data[1]), 4)
                    self.assertEqual(ord(data[2]), 0)
                    self.assertEqual(ord(data[3]), 0)
                    self.assertEqual(ord(data[4]), 0)
                    self.assertEqual(ord(data[5]), intf._sw_if_index)

                    #
                    # next sub-option is the IP address of the client side
                    # interface.
                    # sub-option ID=5, length (of a v4 address)=4
                    #
                    claddr = socket.inet_pton(AF_INET, ip_addr)

                    self.assertEqual(ord(data[6]), 5)
                    self.assertEqual(ord(data[7]), 4)
                    self.assertEqual(data[8], claddr[0])
                    self.assertEqual(data[9], claddr[1])
                    self.assertEqual(data[10], claddr[2])
                    self.assertEqual(data[11], claddr[3])

                    found = 1
        self.assertTrue(found)

        return data

    def verify_dhcp_offer(self, pkt, intf, check_option_82=True):
        ether = pkt[Ether]
        self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff")
        self.assertEqual(ether.src, intf.local_mac)

        ip = pkt[IP]
        self.assertEqual(ip.dst, "255.255.255.255")
        self.assertEqual(ip.src, intf.local_ip4)

        udp = pkt[UDP]
        self.assertEqual(udp.dport, DHCP4_CLIENT_PORT)
        self.assertEqual(udp.sport, DHCP4_SERVER_PORT)

        dhcp = pkt[DHCP]
        is_offer = False
        for o in dhcp.options:
            if type(o) is tuple:
                if o[0] == "message-type" \
                   and DHCPTypes[o[1]] == "offer":
                    is_offer = True
        self.assertTrue(is_offer)

        if check_option_82:
            data = self.validate_option_82(pkt, intf, intf.local_ip4)

    def verify_dhcp_discover(self, pkt, intf, src_intf=None,
                             option_82_present=True):
        ether = pkt[Ether]
        self.assertEqual(ether.dst, intf.remote_mac)
        self.assertEqual(ether.src, intf.local_mac)

        ip = pkt[IP]
        self.assertEqual(ip.dst, intf.remote_ip4)
        self.assertEqual(ip.src, intf.local_ip4)

        udp = pkt[UDP]
        self.assertEqual(udp.dport, DHCP4_SERVER_PORT)
        self.assertEqual(udp.sport, DHCP4_CLIENT_PORT)

        dhcp = pkt[DHCP]

        is_discover = False
        for o in dhcp.options:
            if type(o) is tuple:
                if o[0] == "message-type" \
                   and DHCPTypes[o[1]] == "discover":
                    is_discover = True
        self.assertTrue(is_discover)

        if option_82_present:
            data = self.validate_option_82(pkt, src_intf, src_intf.local_ip4)
            return data
        else:
            for i in dhcp.options:
                if type(i) is tuple:
                    self.assertNotEqual(i[0], "relay_agent_Information")

    def verify_dhcp6_solicit(self, pkt, intf,
                             peer_ip, peer_mac,
                             fib_id=0,
                             oui=0):
        ether = pkt[Ether]
        self.assertEqual(ether.dst, intf.remote_mac)
        self.assertEqual(ether.src, intf.local_mac)

        ip = pkt[IPv6]
        self.assertEqual(in6_ptop(ip.dst), in6_ptop(intf.remote_ip6))
        self.assertEqual(in6_ptop(ip.src), in6_ptop(intf.local_ip6))

        udp = pkt[UDP]
        self.assertEqual(udp.dport, DHCP6_CLIENT_PORT)
        self.assertEqual(udp.sport, DHCP6_SERVER_PORT)

        relay = pkt[DHCP6_RelayForward]
        self.assertEqual(in6_ptop(relay.peeraddr), in6_ptop(peer_ip))
        oid = pkt[DHCP6OptIfaceId]
        cll = pkt[DHCP6OptClientLinkLayerAddr]
        self.assertEqual(cll.optlen, 8)
        self.assertEqual(cll.lltype, 1)
        self.assertEqual(cll.clladdr, peer_mac)

        vss = pkt[DHCP6OptVSS]
        self.assertEqual(vss.optlen, 8)
        self.assertEqual(vss.type, 1)
        # the OUI and FIB-id are really 3 and 4 bytes resp.
        # but the tested range is small
        self.assertEqual(ord(vss.data[0]), 0)
        self.assertEqual(ord(vss.data[1]), 0)
        self.assertEqual(ord(vss.data[2]), oui)
        self.assertEqual(ord(vss.data[3]), 0)
        self.assertEqual(ord(vss.data[4]), 0)
        self.assertEqual(ord(vss.data[5]), 0)
        self.assertEqual(ord(vss.data[6]), fib_id)

        # the relay message should be an encoded Solicit
        msg = pkt[DHCP6OptRelayMsg]
        sol = DHCP6_Solicit()
        self.assertEqual(msg.optlen, len(str(sol)))
        self.assertEqual(str(sol), (str(msg[1]))[:msg.optlen])

    def verify_dhcp6_advert(self, pkt, intf, peer):
        ether = pkt[Ether]
        self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff")
        self.assertEqual(ether.src, intf.local_mac)

        ip = pkt[IPv6]
        self.assertEqual(in6_ptop(ip.dst), in6_ptop(peer))
        self.assertEqual(in6_ptop(ip.src), in6_ptop(intf.local_ip6))

        udp = pkt[UDP]
        self.assertEqual(udp.dport, DHCP6_SERVER_PORT)
        self.assertEqual(udp.sport, DHCP6_CLIENT_PORT)

        # not sure why this is not decoding
        # adv = pkt[DHCP6_Advertise]

    def test_dhcp_proxy(self):
        """ DHCPv4 Proxy """

        #
        # Verify no response to DHCP request without DHCP config
        #
        p_disc_vrf0 = (Ether(dst="ff:ff:ff:ff:ff:ff",
                             src=self.pg2.remote_mac) /
                       IP(src="0.0.0.0", dst="255.255.255.255") /
                       UDP(sport=DHCP4_CLIENT_PORT,
                           dport=DHCP4_SERVER_PORT) /
                       BOOTP(op=1) /
                       DHCP(options=[('message-type', 'discover'), ('end')]))
        pkts_disc_vrf0 = [p_disc_vrf0]
        p_disc_vrf1 = (Ether(dst="ff:ff:ff:ff:ff:ff",
                             src=self.pg3.remote_mac) /
                       IP(src="0.0.0.0", dst="255.255.255.255") /
                       UDP(sport=DHCP4_CLIENT_PORT,
                           dport=DHCP4_SERVER_PORT) /
                       BOOTP(op=1) /
                       DHCP(options=[('message-type', 'discover'), ('end')]))
        pkts_disc_vrf1 = [p_disc_vrf0]

        self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0,
                                        "DHCP with no configuration")
        self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1,
                                        "DHCP with no configuration")

        #
        # Enable DHCP proxy in VRF 0
        #
        server_addr = self.pg0.remote_ip4n
        src_addr = self.pg0.local_ip4n

        self.vapi.dhcp_proxy_config(server_addr,
                                    src_addr,
                                    rx_table_id=0)

        #
        # Now a DHCP request on pg2, which is in the same VRF
        # as the DHCP config, will result in a relayed DHCP
        # message to the [fake] server
        #
        self.pg2.add_stream(pkts_disc_vrf0)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg0.get_capture(1)
        rx = rx[0]

        #
        # Rx'd packet should be to the server address and from the configured
        # source address
        # UDP source ports are unchanged
        # we've no option 82 config so that should be absent
        #
        self.verify_dhcp_discover(rx, self.pg0, option_82_present=False)

        #
        # Inject a response from the server
        #  VPP will only relay the offer if option 82 is present.
        #  so this one is dropped
        #
        p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
             IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) /
             UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) /
             BOOTP(op=1) /
             DHCP(options=[('message-type', 'offer'), ('end')]))
        pkts = [p]

        self.send_and_assert_no_replies(self.pg0, pkts,
                                        "DHCP offer no option 82")

        #
        # Configure sending option 82 in relayed messages
        #
        self.vapi.dhcp_proxy_config(server_addr,
                                    src_addr,
                                    rx_table_id=0,
                                    insert_circuit_id=1)

        #
        # Send a request:
        #  again dropped, but ths time because there is no IP addrees on the
        #  clinet interfce to fill in the option.
        #
        self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0,
                                        "DHCP no relay address")

        #
        # configure an IP address on the client facing interface
        #
        self.pg2.config_ip4()

        #
        # Try again with a discover packet
        # Rx'd packet should be to the server address and from the configured
        # source address
        # UDP source ports are unchanged
        # we've no option 82 config so that should be absent
        #
        self.pg2.add_stream(pkts_disc_vrf0)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg0.get_capture(1)
        rx = rx[0]

        option_82 = self.verify_dhcp_discover(rx, self.pg0, src_intf=self.pg2)

        #
        # Create an DHCP offer reply from the server with a correctly formatted
        # option 82. i.e. send back what we just captured
        # The offer, sent mcast to the client, still has option 82.
        #
        p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
             IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) /
             UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) /
             BOOTP(op=1) /
             DHCP(options=[('message-type', 'offer'),
                           ('relay_agent_Information', option_82),
                           ('end')]))
        pkts = [p]

        self.pg0.add_stream(pkts)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg2.get_capture(1)
        rx = rx[0]

        self.verify_dhcp_offer(rx, self.pg2)

        #
        # Bogus Option 82:
        #
        # 1. not our IP address = not checked by VPP? so offer is replayed
        #    to client
        bad_ip = option_82[0:8] + chr(33) + option_82[9:]

        p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
             IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) /
             UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) /
             BOOTP(op=1) /
             DHCP(options=[('message-type', 'offer'),
                           ('relay_agent_Information', bad_ip),
                           ('end')]))
        pkts = [p]

        self.pg0.add_stream(pkts)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()
        rx = self.pg2.get_capture(1)
        rx = rx[0]

        self.verify_dhcp_offer(rx, self.pg2, check_option_82=False)
        self.pg0.assert_nothing_captured(remark="")

        # 2. Not a sw_if_index VPP knows
        bad_if_index = option_82[0:2] + chr(33) + option_82[3:]

        p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
             IP(src=self.pg0.remote_ip4, dst=self.pg0.local_ip4) /
             UDP(sport=DHCP4_SERVER_PORT, dport=DHCP4_SERVER_PORT) /
             BOOTP(op=1) /
             DHCP(options=[('message-type', 'offer'),
                           ('relay_agent_Information', bad_if_index),
                           ('end')]))
        pkts = [p]
        self.send_and_assert_no_replies(self.pg0, pkts,
                                        "DHCP offer option 82 bad if index")

        #
        # Send a DHCP request in VRF 1. should be dropped.
        #
        self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1,
                                        "DHCP with no configuration VRF 1")

        #
        # Delete the DHCP config in VRF 0
        # Should now drop requests.
        #
        self.vapi.dhcp_proxy_config(server_addr,
                                    src_addr,
                                    rx_table_id=0,
                                    is_add=0,
                                    insert_circuit_id=1)

        self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0,
                                        "DHCP config removed VRF 0")
        self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1,
                                        "DHCP config removed VRF 1")

        #
        # Add DHCP config for VRF 1
        #
        server_addr = self.pg1.remote_ip4n
        src_addr = self.pg1.local_ip4n
        self.vapi.dhcp_proxy_config(server_addr,
                                    src_addr,
                                    rx_table_id=1,
                                    server_table_id=1,
                                    insert_circuit_id=1)

        #
        # Confim DHCP requests ok in VRF 1.
        #  - dropped on IP config on client interface
        #
        self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1,
                                        "DHCP config removed VRF 1")

        #
        # configure an IP address on the client facing interface
        #
        self.pg3.config_ip4()

        self.pg3.add_stream(pkts_disc_vrf1)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg1.get_capture(1)
        rx = rx[0]
        self.verify_dhcp_discover(rx, self.pg1, src_intf=self.pg3)

        #
        # remove DHCP config to cleanup
        #
        self.vapi.dhcp_proxy_config(server_addr,
                                    src_addr,
                                    rx_table_id=1,
                                    server_table_id=1,
                                    insert_circuit_id=1,
                                    is_add=0)

        self.send_and_assert_no_replies(self.pg2, pkts_disc_vrf0,
                                        "DHCP cleanup VRF 0")
        self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1,
                                        "DHCP cleanup VRF 1")

    def test_dhcp6_proxy(self):
        """ DHCPv6 Proxy"""
        #
        # Verify no response to DHCP request without DHCP config
        #
        dhcp_solicit_dst = "ff02::1:2"
        dhcp_solicit_src_vrf0 = mk_ll_addr(self.pg2.remote_mac)
        dhcp_solicit_src_vrf1 = mk_ll_addr(self.pg3.remote_mac)
        server_addr_vrf0 = self.pg0.remote_ip6n
        src_addr_vrf0 = self.pg0.local_ip6n
        server_addr_vrf1 = self.pg1.remote_ip6n
        src_addr_vrf1 = self.pg1.local_ip6n

        dmac = in6_getnsmac(inet_pton(socket.AF_INET6, dhcp_solicit_dst))
        p_solicit_vrf0 = (Ether(dst=dmac, src=self.pg2.remote_mac) /
                          IPv6(src=dhcp_solicit_src_vrf0,
                               dst=dhcp_solicit_dst) /
                          UDP(sport=DHCP6_SERVER_PORT,
                              dport=DHCP6_CLIENT_PORT) /
                          DHCP6_Solicit())
        pkts_solicit_vrf0 = [p_solicit_vrf0]
        p_solicit_vrf1 = (Ether(dst=dmac, src=self.pg3.remote_mac) /
                          IPv6(src=dhcp_solicit_src_vrf1,
                               dst=dhcp_solicit_dst) /
                          UDP(sport=DHCP6_SERVER_PORT,
                              dport=DHCP6_CLIENT_PORT) /
                          DHCP6_Solicit())
        pkts_solicit_vrf1 = [p_solicit_vrf1]

        self.send_and_assert_no_replies(self.pg2, pkts_solicit_vrf0,
                                        "DHCP with no configuration")
        self.send_and_assert_no_replies(self.pg3, pkts_solicit_vrf1,
                                        "DHCP with no configuration")

        #
        # DHCPv6 config in VRF 0.
        # Packets still dropped because the client facing interface has no
        # IPv6 config
        #
        self.vapi.dhcp_proxy_config(server_addr_vrf0,
                                    src_addr_vrf0,
                                    rx_table_id=0,
                                    server_table_id=0,
                                    insert_circuit_id=1,
                                    is_ipv6=1)

        self.send_and_assert_no_replies(self.pg2, pkts_solicit_vrf0,
                                        "DHCP with no configuration")
        self.send_and_assert_no_replies(self.pg3, pkts_solicit_vrf1,
                                        "DHCP with no configuration")

        #
        # configure an IP address on the client facing interface
        #
        self.pg2.config_ip6()

        #
        # Now the DHCP requests are relayed to the server
        #
        self.pg2.add_stream(pkts_solicit_vrf0)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg0.get_capture(1)
        rx = rx[0]
        self.verify_dhcp6_solicit(rx, self.pg0,
                                  dhcp_solicit_src_vrf0,
                                  self.pg2.remote_mac)

        #
        # Exception cases for rejected relay responses
        #

        # 1 - not a relay reply
        p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
                      IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) /
                      UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) /
                      DHCP6_Advertise())
        pkts_adv_vrf0 = [p_adv_vrf0]
        self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0,
                                        "DHCP6 not a relay reply")

        # 2 - no relay message option
        p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
                      IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) /
                      UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) /
                      DHCP6_RelayReply() /
                      DHCP6_Advertise())
        pkts_adv_vrf0 = [p_adv_vrf0]
        self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0,
                                        "DHCP not a relay message")

        # 3 - no circuit ID
        p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
                      IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) /
                      UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) /
                      DHCP6_RelayReply() /
                      DHCP6OptRelayMsg(optlen=0) /
                      DHCP6_Advertise())
        pkts_adv_vrf0 = [p_adv_vrf0]
        self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0,
                                        "DHCP6 no circuit ID")
        # 4 - wrong circuit ID
        p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
                      IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) /
                      UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) /
                      DHCP6_RelayReply() /
                      DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x05') /
                      DHCP6OptRelayMsg(optlen=0) /
                      DHCP6_Advertise())
        pkts_adv_vrf0 = [p_adv_vrf0]
        self.send_and_assert_no_replies(self.pg2, pkts_adv_vrf0,
                                        "DHCP6 wrong circuit ID")

        #
        # Send the relay response (the advertisement)
        #   - no peer address
        p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
                      IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) /
                      UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) /
                      DHCP6_RelayReply() /
                      DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x03') /
                      DHCP6OptRelayMsg(optlen=0) /
                      DHCP6_Advertise(trid=1) /
                      DHCP6OptStatusCode(statuscode=0))
        pkts_adv_vrf0 = [p_adv_vrf0]

        self.pg0.add_stream(pkts_adv_vrf0)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg2.get_capture(1)
        rx = rx[0]
        self.verify_dhcp6_advert(rx, self.pg2, "::")

        #
        # Send the relay response (the advertisement)
        #   - with peer address
        p_adv_vrf0 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) /
                      IPv6(dst=self.pg0.local_ip6, src=self.pg0.remote_ip6) /
                      UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) /
                      DHCP6_RelayReply(peeraddr=dhcp_solicit_src_vrf0) /
                      DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x03') /
                      DHCP6OptRelayMsg(optlen=0) /
                      DHCP6_Advertise(trid=1) /
                      DHCP6OptStatusCode(statuscode=0))
        pkts_adv_vrf0 = [p_adv_vrf0]

        self.pg0.add_stream(pkts_adv_vrf0)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg2.get_capture(1)
        rx = rx[0]
        self.verify_dhcp6_advert(rx, self.pg2, dhcp_solicit_src_vrf0)

        #
        # Add all the config for VRF 1
        #
        self.vapi.dhcp_proxy_config(server_addr_vrf1,
                                    src_addr_vrf1,
                                    rx_table_id=1,
                                    server_table_id=1,
                                    insert_circuit_id=1,
                                    is_ipv6=1)
        self.pg3.config_ip6()

        #
        # VRF 1 solicit
        #
        self.pg3.add_stream(pkts_solicit_vrf1)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg1.get_capture(1)
        rx = rx[0]
        self.verify_dhcp6_solicit(rx, self.pg1,
                                  dhcp_solicit_src_vrf1,
                                  self.pg3.remote_mac)

        #
        # VRF 1 Advert
        #
        p_adv_vrf1 = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) /
                      IPv6(dst=self.pg1.local_ip6, src=self.pg1.remote_ip6) /
                      UDP(sport=DHCP6_SERVER_PORT, dport=DHCP6_SERVER_PORT) /
                      DHCP6_RelayReply(peeraddr=dhcp_solicit_src_vrf1) /
                      DHCP6OptIfaceId(optlen=4, ifaceid='\x00\x00\x00\x04') /
                      DHCP6OptRelayMsg(optlen=0) /
                      DHCP6_Advertise(trid=1) /
                      DHCP6OptStatusCode(statuscode=0))
        pkts_adv_vrf1 = [p_adv_vrf1]

        self.pg1.add_stream(pkts_adv_vrf1)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg3.get_capture(1)
        rx = rx[0]
        self.verify_dhcp6_advert(rx, self.pg3, dhcp_solicit_src_vrf1)

        #
        # Add VSS config
        #  table=1, fib=id=1, oui=4
        self.vapi.dhcp_proxy_set_vss(1, 1, 4, is_ip6=1)

        self.pg3.add_stream(pkts_solicit_vrf1)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg1.get_capture(1)
        rx = rx[0]
        self.verify_dhcp6_solicit(rx, self.pg1,
                                  dhcp_solicit_src_vrf1,
                                  self.pg3.remote_mac,
                                  fib_id=1,
                                  oui=4)

        #
        # Remove the VSS config
        #  relayed DHCP has default vlaues in the option.
        #
        self.vapi.dhcp_proxy_set_vss(1, 1, 4, is_ip6=1, is_add=0)

        self.pg3.add_stream(pkts_solicit_vrf1)
        self.pg_enable_capture(self.pg_interfaces)
        self.pg_start()

        rx = self.pg1.get_capture(1)
        rx = rx[0]
        self.verify_dhcp6_solicit(rx, self.pg1,
                                  dhcp_solicit_src_vrf1,
                                  self.pg3.remote_mac)

        #
        # Cleanup
        #
        self.vapi.dhcp_proxy_config(server_addr_vrf1,
                                    src_addr_vrf1,
                                    rx_table_id=1,
                                    server_table_id=1,
                                    insert_circuit_id=1,
                                    is_ipv6=1,
                                    is_add=0)
        self.vapi.dhcp_proxy_config(server_addr_vrf1,
                                    src_addr_vrf1,
                                    rx_table_id=0,
                                    server_table_id=0,
                                    insert_circuit_id=1,
                                    is_ipv6=1,
                                    is_add=0)

if __name__ == '__main__':
    unittest.main(testRunner=VppTestRunner)
an class="n">nbytes); if (size < 0) { errno = -size; size = -1; } } else { size = libc_write (fd, buf, nbytes); } return size; } ssize_t writev (int fd, const struct iovec * iov, int iovcnt) { ssize_t size = 0, total = 0; vls_handle_t vlsh; int i, rv = 0; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { for (i = 0; i < iovcnt; ++i) { rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len); if (rv < 0) break; else { total += rv; if (rv < iov[i].iov_len) break; } } if (rv < 0 && total == 0) { errno = -rv; size = -1; } else size = total; } else { size = libc_writev (fd, iov, iovcnt); } return size; } static int fcntl_internal (int fd, int cmd, va_list ap) { vls_handle_t vlsh; int rv = 0; vlsh = ldp_fd_to_vlsh (fd); LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd); if (vlsh != VLS_INVALID_HANDLE) { int flags = va_arg (ap, int); u32 size; size = sizeof (flags); rv = -EOPNOTSUPP; switch (cmd) { case F_SETFL: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size); break; case F_GETFL: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &size); if (rv == VPPCOM_OK) rv = flags; break; case F_SETFD: /* TODO handle this */ LDBG (0, "F_SETFD ignored flags %u", flags); rv = 0; break; default: rv = -EOPNOTSUPP; break; } if (rv < 0) { errno = -rv; rv = -1; } } else { #ifdef HAVE_FCNTL64 rv = libc_vfcntl64 (fd, cmd, ap); #else rv = libc_vfcntl (fd, cmd, ap); #endif } return rv; } int fcntl (int fd, int cmd, ...) { va_list ap; int rv; ldp_init_check (); va_start (ap, cmd); rv = fcntl_internal (fd, cmd, ap); va_end (ap); return rv; } int fcntl64 (int fd, int cmd, ...) { va_list ap; int rv; ldp_init_check (); va_start (ap, cmd); rv = fcntl_internal (fd, cmd, ap); va_end (ap); return rv; } int ioctl (int fd, unsigned long int cmd, ...) { vls_handle_t vlsh; va_list ap; int rv; ldp_init_check (); va_start (ap, cmd); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { switch (cmd) { case FIONREAD: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0); break; case FIONBIO: { u32 flags = va_arg (ap, int) ? O_NONBLOCK : 0; u32 size = sizeof (flags); /* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than * non-blocking, the flags should be read here and merged * with O_NONBLOCK. */ rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size); } break; default: rv = -EOPNOTSUPP; break; } if (rv < 0) { errno = -rv; rv = -1; } } else { rv = libc_vioctl (fd, cmd, ap); } va_end (ap); return rv; } always_inline void ldp_select_init_maps (fd_set * __restrict original, clib_bitmap_t ** resultb, clib_bitmap_t ** libcb, clib_bitmap_t ** vclb, int nfds, u32 minbits, u32 n_bytes, uword * si_bits, uword * libc_bits) { uword si_bits_set, libc_bits_set; vls_handle_t vlsh; int fd; clib_bitmap_validate (*vclb, minbits); clib_bitmap_validate (*libcb, minbits); clib_bitmap_validate (*resultb, minbits); clib_memcpy_fast (*resultb, original, n_bytes); memset (original, 0, n_bytes); /* *INDENT-OFF* */ clib_bitmap_foreach (fd, *resultb) { if (fd > nfds) break; vlsh = ldp_fd_to_vlsh (fd); if (vlsh == VLS_INVALID_HANDLE) clib_bitmap_set_no_check (*libcb, fd, 1); else *vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1); } /* *INDENT-ON* */ si_bits_set = clib_bitmap_last_set (*vclb) + 1; *si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits; clib_bitmap_validate (*resultb, *si_bits); libc_bits_set = clib_bitmap_last_set (*libcb) + 1; *libc_bits = (libc_bits_set > *libc_bits) ? libc_bits_set : *libc_bits; } always_inline int ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb) { vls_handle_t vlsh; uword si; int fd; if (!libcb) return 0; /* *INDENT-OFF* */ clib_bitmap_foreach (si, vclb) { vlsh = vls_session_index_to_vlsh (si); ASSERT (vlsh != VLS_INVALID_HANDLE); fd = ldp_vlsh_to_fd (vlsh); if (PREDICT_FALSE (fd < 0)) { errno = EBADFD; return -1; } FD_SET (fd, libcb); } /* *INDENT-ON* */ return 0; } always_inline void ldp_select_libc_map_merge (clib_bitmap_t * result, fd_set * __restrict libcb) { uword fd; if (!libcb) return; /* *INDENT-OFF* */ clib_bitmap_foreach (fd, result) FD_SET ((int)fd, libcb); /* *INDENT-ON* */ } int ldp_pselect (int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds, fd_set * __restrict exceptfds, const struct timespec *__restrict timeout, const __sigset_t * __restrict sigmask) { u32 minbits = clib_max (nfds, BITS (uword)), n_bytes; ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); struct timespec libc_tspec = { 0 }; f64 time_out, vcl_timeout = 0; uword si_bits, libc_bits; int rv, bits_set = 0; if (nfds < 0) { errno = EINVAL; return -1; } if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) clib_time_init (&ldpw->clib_time); if (timeout) { time_out = (timeout->tv_sec == 0 && timeout->tv_nsec == 0) ? (f64) 0 : (f64) timeout->tv_sec + (f64) timeout->tv_nsec / (f64) 1e9; time_out += clib_time_now (&ldpw->clib_time); /* select as fine grained sleep */ if (!nfds) { while (clib_time_now (&ldpw->clib_time) < time_out) ; return 0; } } else if (!nfds) { errno = EINVAL; return -1; } else time_out = -1; if (nfds <= ldp->vlsh_bit_val) { rv = libc_pselect (nfds, readfds, writefds, exceptfds, timeout, sigmask); goto done; } si_bits = libc_bits = 0; n_bytes = nfds / 8 + ((nfds % 8) ? 1 : 0); if (readfds) ldp_select_init_maps (readfds, &ldpw->rd_bitmap, &ldpw->libc_rd_bitmap, &ldpw->si_rd_bitmap, nfds, minbits, n_bytes, &si_bits, &libc_bits); if (writefds) ldp_select_init_maps (writefds, &ldpw->wr_bitmap, &ldpw->libc_wr_bitmap, &ldpw->si_wr_bitmap, nfds, minbits, n_bytes, &si_bits, &libc_bits); if (exceptfds) ldp_select_init_maps (exceptfds, &ldpw->ex_bitmap, &ldpw->libc_ex_bitmap, &ldpw->si_ex_bitmap, nfds, minbits, n_bytes, &si_bits, &libc_bits); if (PREDICT_FALSE (!si_bits && !libc_bits)) { errno = EINVAL; rv = -1; goto done; } if (!si_bits) libc_tspec = timeout ? *timeout : libc_tspec; do { if (si_bits) { if (readfds) clib_memcpy_fast (ldpw->rd_bitmap, ldpw->si_rd_bitmap, vec_len (ldpw->si_rd_bitmap) * sizeof (clib_bitmap_t)); if (writefds) clib_memcpy_fast (ldpw->wr_bitmap, ldpw->si_wr_bitmap, vec_len (ldpw->si_wr_bitmap) * sizeof (clib_bitmap_t)); if (exceptfds) clib_memcpy_fast (ldpw->ex_bitmap, ldpw->si_ex_bitmap, vec_len (ldpw->si_ex_bitmap) * sizeof (clib_bitmap_t)); rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL, writefds ? ldpw->wr_bitmap : NULL, exceptfds ? ldpw->ex_bitmap : NULL, vcl_timeout); if (rv < 0) { errno = -rv; rv = -1; goto done; } else if (rv > 0) { if (ldp_select_vcl_map_to_libc (ldpw->rd_bitmap, readfds)) { rv = -1; goto done; } if (ldp_select_vcl_map_to_libc (ldpw->wr_bitmap, writefds)) { rv = -1; goto done; } if (ldp_select_vcl_map_to_libc (ldpw->ex_bitmap, exceptfds)) { rv = -1; goto done; } bits_set = rv; } } if (libc_bits) { if (readfds) clib_memcpy_fast (ldpw->rd_bitmap, ldpw->libc_rd_bitmap, vec_len (ldpw->libc_rd_bitmap) * sizeof (clib_bitmap_t)); if (writefds) clib_memcpy_fast (ldpw->wr_bitmap, ldpw->libc_wr_bitmap, vec_len (ldpw->libc_wr_bitmap) * sizeof (clib_bitmap_t)); if (exceptfds) clib_memcpy_fast (ldpw->ex_bitmap, ldpw->libc_ex_bitmap, vec_len (ldpw->libc_ex_bitmap) * sizeof (clib_bitmap_t)); rv = libc_pselect (libc_bits, readfds ? (fd_set *) ldpw->rd_bitmap : NULL, writefds ? (fd_set *) ldpw->wr_bitmap : NULL, exceptfds ? (fd_set *) ldpw->ex_bitmap : NULL, &libc_tspec, sigmask); if (rv > 0) { ldp_select_libc_map_merge (ldpw->rd_bitmap, readfds); ldp_select_libc_map_merge (ldpw->wr_bitmap, writefds); ldp_select_libc_map_merge (ldpw->ex_bitmap, exceptfds); bits_set += rv; } } if (bits_set) { rv = bits_set; goto done; } } while ((time_out == -1) || (clib_time_now (&ldpw->clib_time) < time_out)); rv = 0; done: /* TBD: set timeout to amount of time left */ clib_bitmap_zero (ldpw->rd_bitmap); clib_bitmap_zero (ldpw->si_rd_bitmap); clib_bitmap_zero (ldpw->libc_rd_bitmap); clib_bitmap_zero (ldpw->wr_bitmap); clib_bitmap_zero (ldpw->si_wr_bitmap); clib_bitmap_zero (ldpw->libc_wr_bitmap); clib_bitmap_zero (ldpw->ex_bitmap); clib_bitmap_zero (ldpw->si_ex_bitmap); clib_bitmap_zero (ldpw->libc_ex_bitmap); return rv; } int select (int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds, fd_set * __restrict exceptfds, struct timeval *__restrict timeout) { struct timespec tspec; if (timeout) { tspec.tv_sec = timeout->tv_sec; tspec.tv_nsec = timeout->tv_usec * 1000; } return ldp_pselect (nfds, readfds, writefds, exceptfds, timeout ? &tspec : NULL, NULL); } #ifdef __USE_XOPEN2K int pselect (int nfds, fd_set * __restrict readfds, fd_set * __restrict writefds, fd_set * __restrict exceptfds, const struct timespec *__restrict timeout, const __sigset_t * __restrict sigmask) { return ldp_pselect (nfds, readfds, writefds, exceptfds, timeout, 0); } #endif /* If transparent TLS mode is turned on, then ldp will load key and cert. */ static int load_cert_key_pair (void) { char *cert_str = getenv (LDP_ENV_TLS_CERT); char *key_str = getenv (LDP_ENV_TLS_KEY); char cert_buf[4096], key_buf[4096]; int cert_size, key_size; vppcom_cert_key_pair_t crypto; int ckp_index; FILE *fp; if (!cert_str || !key_str) { LDBG (0, "ERROR: failed to read LDP environment %s\n", LDP_ENV_TLS_CERT); return -1; } fp = fopen (cert_str, "r"); if (fp == NULL) { LDBG (0, "ERROR: failed to open cert file %s \n", cert_str); return -1; } cert_size = fread (cert_buf, sizeof (char), sizeof (cert_buf), fp); fclose (fp); fp = fopen (key_str, "r"); if (fp == NULL) { LDBG (0, "ERROR: failed to open key file %s \n", key_str); return -1; } key_size = fread (key_buf, sizeof (char), sizeof (key_buf), fp); fclose (fp); crypto.cert = cert_buf; crypto.key = key_buf; crypto.cert_len = cert_size; crypto.key_len = key_size; ckp_index = vppcom_add_cert_key_pair (&crypto); if (ckp_index < 0) { LDBG (0, "ERROR: failed to add cert key pair\n"); return -1; } ldp->ckpair_index = ckp_index; return 0; } static int assign_cert_key_pair (vls_handle_t vlsh) { uint32_t ckp_len; if (ldp->ckpair_index == ~0 && load_cert_key_pair () < 0) return -1; ckp_len = sizeof (ldp->ckpair_index); return vls_attr (vlsh, VPPCOM_ATTR_SET_CKPAIR, &ldp->ckpair_index, &ckp_len); } int socket (int domain, int type, int protocol) { int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); u8 is_nonblocking = type & SOCK_NONBLOCK ? 1 : 0; vls_handle_t vlsh; ldp_init_check (); if (((domain == AF_INET) || (domain == AF_INET6)) && ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM))) { u8 proto; if (ldp->transparent_tls) { proto = VPPCOM_PROTO_TLS; } else proto = ((sock_type == SOCK_DGRAM) ? VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP); LDBG (0, "calling vls_create: proto %u (%s), is_nonblocking %u", proto, vppcom_proto_str (proto), is_nonblocking); vlsh = vls_create (proto, is_nonblocking); if (vlsh < 0) { errno = -vlsh; rv = -1; } else { if (ldp->transparent_tls) { if (assign_cert_key_pair (vlsh) < 0) return -1; } rv = ldp_vlsh_to_fd (vlsh); } } else { LDBG (0, "calling libc_socket"); rv = libc_socket (domain, type, protocol); } return rv; } /* * Create two new sockets, of type TYPE in domain DOMAIN and using * protocol PROTOCOL, which are connected to each other, and put file * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, * one will be chosen automatically. * Returns 0 on success, -1 for errors. * */ int socketpair (int domain, int type, int protocol, int fds[2]) { int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); ldp_init_check (); if (((domain == AF_INET) || (domain == AF_INET6)) && ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM))) { LDBG (0, "LDP-TBD"); errno = ENOSYS; rv = -1; } else { LDBG (1, "calling libc_socketpair"); rv = libc_socketpair (domain, type, protocol, fds); } return rv; } int bind (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len) { const struct sockaddr *addr = SOCKADDR_GET_SA (_addr); vls_handle_t vlsh; int rv; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { vppcom_endpt_t ep; switch (addr->sa_family) { case AF_INET: if (len != sizeof (struct sockaddr_in)) { LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET addr len %u!", fd, vlsh, len); errno = EINVAL; rv = -1; goto done; } ep.is_ip4 = VPPCOM_IS_IP4; ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr; ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port; break; case AF_INET6: if (len != sizeof (struct sockaddr_in6)) { LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET6 addr len %u!", fd, vlsh, len); errno = EINVAL; rv = -1; goto done; } ep.is_ip4 = VPPCOM_IS_IP6; ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr; ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port; break; default: LDBG (0, "ERROR: fd %d: vlsh %u: Unsupported address family %u!", fd, vlsh, addr->sa_family); errno = EAFNOSUPPORT; rv = -1; goto done; } LDBG (0, "fd %d: calling vls_bind: vlsh %u, addr %p, len %u", fd, vlsh, addr, len); rv = vls_bind (vlsh, &ep); if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } } else { LDBG (0, "fd %d: calling libc_bind: addr %p, len %u", fd, addr, len); rv = libc_bind (fd, addr, len); } done: LDBG (1, "fd %d: returning %d", fd, rv); return rv; } static inline int ldp_copy_ep_to_sockaddr (struct sockaddr *addr, socklen_t *__restrict len, vppcom_endpt_t *ep) { int rv = 0, sa_len, copy_len; ldp_init_check (); if (addr && len && ep) { addr->sa_family = (ep->is_ip4 == VPPCOM_IS_IP4) ? AF_INET : AF_INET6; switch (addr->sa_family) { case AF_INET: ((struct sockaddr_in *) addr)->sin_port = ep->port; if (*len > sizeof (struct sockaddr_in)) *len = sizeof (struct sockaddr_in); sa_len = sizeof (struct sockaddr_in) - sizeof (struct in_addr); copy_len = *len - sa_len; if (copy_len > 0) memcpy (&((struct sockaddr_in *) addr)->sin_addr, ep->ip, copy_len); break; case AF_INET6: ((struct sockaddr_in6 *) addr)->sin6_port = ep->port; if (*len > sizeof (struct sockaddr_in6)) *len = sizeof (struct sockaddr_in6); sa_len = sizeof (struct sockaddr_in6) - sizeof (struct in6_addr); copy_len = *len - sa_len; if (copy_len > 0) memcpy (((struct sockaddr_in6 *) addr)->sin6_addr. __in6_u.__u6_addr8, ep->ip, copy_len); break; default: /* Not possible */ rv = -EAFNOSUPPORT; break; } } return rv; } int getsockname (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len) { struct sockaddr *addr = SOCKADDR_GET_SA (_addr); vls_handle_t vlsh; int rv; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { vppcom_endpt_t ep; u8 addr_buf[sizeof (struct in6_addr)]; u32 size = sizeof (ep); ep.ip = addr_buf; rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size); if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } else { rv = ldp_copy_ep_to_sockaddr (addr, len, &ep); if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } } } else { rv = libc_getsockname (fd, _addr, len); } return rv; } int connect (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len) { const struct sockaddr *addr = SOCKADDR_GET_SA (_addr); vls_handle_t vlsh; int rv; ldp_init_check (); if (!addr) { LDBG (0, "ERROR: fd %d: NULL addr, len %u", fd, len); errno = EINVAL; rv = -1; goto done; } vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { vppcom_endpt_t ep; switch (addr->sa_family) { case AF_INET: if (len != sizeof (struct sockaddr_in)) { LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET addr len %u!", fd, vlsh, len); errno = EINVAL; rv = -1; goto done; } ep.is_ip4 = VPPCOM_IS_IP4; ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr; ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port; break; case AF_INET6: if (len != sizeof (struct sockaddr_in6)) { LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET6 addr len %u!", fd, vlsh, len); errno = EINVAL; rv = -1; goto done; } ep.is_ip4 = VPPCOM_IS_IP6; ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr; ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port; break; default: LDBG (0, "fd %d: ERROR vlsh %u: Unsupported address family %u!", fd, vlsh, addr->sa_family); errno = EAFNOSUPPORT; rv = -1; goto done; } LDBG (0, "fd %d: calling vls_connect(): vlsh %u addr %p len %u", fd, vlsh, addr, len); rv = vls_connect (vlsh, &ep); if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } } else { LDBG (0, "fd %d: calling libc_connect(): addr %p, len %u", fd, addr, len); rv = libc_connect (fd, addr, len); } done: LDBG (1, "fd %d: returning %d (0x%x)", fd, rv, rv); return rv; } int getpeername (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len) { struct sockaddr *addr = SOCKADDR_GET_SA (_addr); vls_handle_t vlsh; int rv; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { vppcom_endpt_t ep; u8 addr_buf[sizeof (struct in6_addr)]; u32 size = sizeof (ep); ep.ip = addr_buf; rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size); if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } else { rv = ldp_copy_ep_to_sockaddr (addr, len, &ep); if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } } } else { rv = libc_getpeername (fd, addr, len); } return rv; } ssize_t send (int fd, const void *buf, size_t n, int flags) { vls_handle_t vlsh = ldp_fd_to_vlsh (fd); ssize_t size; ldp_init_check (); if (vlsh != VLS_INVALID_HANDLE) { size = vls_sendto (vlsh, (void *) buf, n, flags, NULL); if (size < VPPCOM_OK) { errno = -size; size = -1; } } else { size = libc_send (fd, buf, n, flags); } return size; } ssize_t sendfile (int out_fd, int in_fd, off_t * offset, size_t len) { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); vls_handle_t vlsh; ssize_t size = 0; ldp_init_check (); vlsh = ldp_fd_to_vlsh (out_fd); if (vlsh != VLS_INVALID_HANDLE) { int rv; ssize_t results = 0; size_t n_bytes_left = len; size_t bytes_to_read; int nbytes; u8 eagain = 0; u32 flags, flags_len = sizeof (flags); rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &flags_len); if (PREDICT_FALSE (rv != VPPCOM_OK)) { LDBG (0, "ERROR: out fd %d: vls_attr: vlsh %u, returned %d (%s)!", out_fd, vlsh, rv, vppcom_retval_str (rv)); vec_reset_length (ldpw->io_buffer); errno = -rv; size = -1; goto done; } if (offset) { off_t off = lseek (in_fd, *offset, SEEK_SET); if (PREDICT_FALSE (off == -1)) { size = -1; goto done; } ASSERT (off == *offset); } do { size = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITE, 0, 0); if (size < 0) { LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %ld (%s)!", out_fd, vlsh, size, vppcom_retval_str (size)); vec_reset_length (ldpw->io_buffer); errno = -size; size = -1; goto done; } bytes_to_read = size; if (bytes_to_read == 0) { if (flags & O_NONBLOCK) { if (!results) eagain = 1; goto update_offset; } else continue; } bytes_to_read = clib_min (n_bytes_left, bytes_to_read); vec_validate (ldpw->io_buffer, bytes_to_read); nbytes = libc_read (in_fd, ldpw->io_buffer, bytes_to_read); if (nbytes < 0) { if (results == 0) { vec_reset_length (ldpw->io_buffer); size = -1; goto done; } goto update_offset; } size = vls_write (vlsh, ldpw->io_buffer, nbytes); if (size < 0) { if (size == VPPCOM_EAGAIN) { if (flags & O_NONBLOCK) { if (!results) eagain = 1; goto update_offset; } else continue; } if (results == 0) { vec_reset_length (ldpw->io_buffer); errno = -size; size = -1; goto done; } goto update_offset; } results += nbytes; ASSERT (n_bytes_left >= nbytes); n_bytes_left = n_bytes_left - nbytes; } while (n_bytes_left > 0); update_offset: vec_reset_length (ldpw->io_buffer); if (offset) { off_t off = lseek (in_fd, *offset, SEEK_SET); if (PREDICT_FALSE (off == -1)) { size = -1; goto done; } ASSERT (off == *offset); *offset += results + 1; } if (eagain) { errno = EAGAIN; size = -1; } else size = results; } else { size = libc_sendfile (out_fd, in_fd, offset, len); } done: return size; } ssize_t sendfile64 (int out_fd, int in_fd, off_t * offset, size_t len) { return sendfile (out_fd, in_fd, offset, len); } ssize_t recv (int fd, void *buf, size_t n, int flags) { vls_handle_t vlsh; ssize_t size; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { size = vls_recvfrom (vlsh, buf, n, flags, NULL); if (size < 0) { errno = -size; size = -1; } } else { size = libc_recv (fd, buf, n, flags); } return size; } ssize_t __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags) { if (n > buflen) return -1; return recv (fd, buf, n, flags); } static inline int ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, vppcom_endpt_tlv_t *ep_tlv, int flags, __CONST_SOCKADDR_ARG _addr, socklen_t addr_len) { const struct sockaddr *addr = SOCKADDR_GET_SA (_addr); vppcom_endpt_t *ep = 0; vppcom_endpt_t _ep; if (ep_tlv) { _ep.app_data = *ep_tlv; } if (addr) { ep = &_ep; switch (addr->sa_family) { case AF_INET: ep->is_ip4 = VPPCOM_IS_IP4; ep->ip = (uint8_t *) & ((const struct sockaddr_in *) addr)->sin_addr; ep->port = (uint16_t) ((const struct sockaddr_in *) addr)->sin_port; break; case AF_INET6: ep->is_ip4 = VPPCOM_IS_IP6; ep->ip = (uint8_t *) & ((const struct sockaddr_in6 *) addr)->sin6_addr; ep->port = (uint16_t) ((const struct sockaddr_in6 *) addr)->sin6_port; break; default: return EAFNOSUPPORT; } } return vls_sendto (vlsh, (void *) buf, n, flags, ep); } static int ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, int flags, __SOCKADDR_ARG _addr, socklen_t *__restrict addr_len) { u8 src_addr[sizeof (struct sockaddr_in6)]; struct sockaddr *addr = SOCKADDR_GET_SA (_addr); vppcom_endpt_t ep; ssize_t size; int rv; if (addr) { ep.ip = src_addr; size = vls_recvfrom (vlsh, buf, n, flags, &ep); if (size > 0) { rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep); if (rv < 0) size = rv; } } else size = vls_recvfrom (vlsh, buf, n, flags, NULL); return size; } ssize_t sendto (int fd, const void *buf, size_t n, int flags, __CONST_SOCKADDR_ARG _addr, socklen_t addr_len) { const struct sockaddr *addr = SOCKADDR_GET_SA (_addr); vls_handle_t vlsh; ssize_t size; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { size = ldp_vls_sendo (vlsh, buf, n, NULL, flags, addr, addr_len); if (size < 0) { errno = -size; size = -1; } } else { size = libc_sendto (fd, buf, n, flags, addr, addr_len); } return size; } ssize_t recvfrom (int fd, void *__restrict buf, size_t n, int flags, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) { vls_handle_t vlsh; ssize_t size; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { size = ldp_vls_recvfrom (vlsh, buf, n, flags, addr, addr_len); if (size < 0) { errno = -size; size = -1; } } else { size = libc_recvfrom (fd, buf, n, flags, addr, addr_len); } return size; } ssize_t sendmsg (int fd, const struct msghdr * msg, int flags) { vls_handle_t vlsh; ssize_t size; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { struct iovec *iov = msg->msg_iov; ssize_t total = 0; int i, rv = 0; struct cmsghdr *cmsg; uint16_t *valp; vppcom_endpt_tlv_t _app_data; vppcom_endpt_tlv_t *p_app_data = NULL; cmsg = CMSG_FIRSTHDR (msg); if (cmsg && cmsg->cmsg_type == UDP_SEGMENT) { p_app_data = &_app_data; valp = (void *) CMSG_DATA (cmsg); p_app_data->data_type = VCL_UDP_SEGMENT; p_app_data->data_len = sizeof (*valp); p_app_data->value = *valp; } for (i = 0; i < msg->msg_iovlen; ++i) { rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, p_app_data, flags, msg->msg_name, msg->msg_namelen); if (rv < 0) break; else { total += rv; if (rv < iov[i].iov_len) break; } } if (rv < 0 && total == 0) { errno = -rv; size = -1; } else size = total; } else { size = libc_sendmsg (fd, msg, flags); } return size; } #ifdef _GNU_SOURCE int sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) { ssize_t size; const char *func_str; u32 sh = ldp_fd_to_vlsh (fd); ldp_init_check (); if (sh != VLS_INVALID_HANDLE) { clib_warning ("LDP<%d>: LDP-TBD", getpid ()); errno = ENOSYS; size = -1; } else { func_str = "libc_sendmmsg"; if (LDP_DEBUG > 2) clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): " "vmessages %p, vlen %u, flags 0x%x", getpid (), fd, fd, func_str, vmessages, vlen, flags); size = libc_sendmmsg (fd, vmessages, vlen, flags); } if (LDP_DEBUG > 2) { if (size < 0) { int errno_val = errno; perror (func_str); clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! " "rv %d, errno = %d", getpid (), fd, fd, func_str, size, errno_val); errno = errno_val; } else clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)", getpid (), fd, fd, size, size); } return size; } #endif ssize_t recvmsg (int fd, struct msghdr * msg, int flags) { vls_handle_t vlsh; ssize_t size; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { struct iovec *iov = msg->msg_iov; ssize_t max_deq, total = 0; int i, rv; max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0); if (!max_deq) return 0; for (i = 0; i < msg->msg_iovlen; i++) { rv = ldp_vls_recvfrom (vlsh, iov[i].iov_base, iov[i].iov_len, flags, (i == 0 ? msg->msg_name : NULL), (i == 0 ? &msg->msg_namelen : NULL)); if (rv <= 0) break; else { total += rv; if (rv < iov[i].iov_len) break; } if (total >= max_deq) break; } if (rv < 0 && total == 0) { errno = -rv; size = -1; } else size = total; } else { size = libc_recvmsg (fd, msg, flags); } return size; } #ifdef _GNU_SOURCE int recvmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags, struct timespec *tmo) { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); u32 sh; ldp_init_check (); sh = ldp_fd_to_vlsh (fd); if (sh != VLS_INVALID_HANDLE) { struct mmsghdr *mh; ssize_t rv = 0; u32 nvecs = 0; f64 time_out; if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) clib_time_init (&ldpw->clib_time); if (tmo) { time_out = (f64) tmo->tv_sec + (f64) tmo->tv_nsec / (f64) 1e9; time_out += clib_time_now (&ldpw->clib_time); } else { time_out = (f64) ~0; } while (nvecs < vlen) { mh = &vmessages[nvecs]; rv = recvmsg (fd, &mh->msg_hdr, flags); if (rv > 0) { mh->msg_len = rv; nvecs += 1; continue; } if (!time_out || clib_time_now (&ldpw->clib_time) >= time_out) break; usleep (1); } return nvecs > 0 ? nvecs : rv; } else { return libc_recvmmsg (fd, vmessages, vlen, flags, tmo); } } #endif int getsockopt (int fd, int level, int optname, void *__restrict optval, socklen_t * __restrict optlen) { vls_handle_t vlsh; int rv; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { rv = -EOPNOTSUPP; switch (level) { case SOL_TCP: switch (optname) { case TCP_NODELAY: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_NODELAY, optval, optlen); break; case TCP_MAXSEG: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_USER_MSS, optval, optlen); break; case TCP_KEEPIDLE: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPIDLE, optval, optlen); break; case TCP_KEEPINTVL: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPINTVL, optval, optlen); break; case TCP_INFO: if (optval && optlen && (*optlen == sizeof (struct tcp_info))) { LDBG (1, "fd %d: vlsh %u SOL_TCP, TCP_INFO, optval %p, " "optlen %d: #LDP-NOP#", fd, vlsh, optval, *optlen); memset (optval, 0, *optlen); rv = VPPCOM_OK; } else rv = -EFAULT; break; case TCP_CONGESTION: *optlen = strlen ("cubic"); strncpy (optval, "cubic", *optlen + 1); rv = 0; break; default: LDBG (0, "ERROR: fd %d: getsockopt SOL_TCP: sid %u, " "optname %d unsupported!", fd, vlsh, optname); break; } break; case SOL_IPV6: switch (optname) { case IPV6_V6ONLY: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_V6ONLY, optval, optlen); break; default: LDBG (0, "ERROR: fd %d: getsockopt SOL_IPV6: vlsh %u " "optname %d unsupported!", fd, vlsh, optname); break; } break; case SOL_SOCKET: switch (optname) { case SO_ACCEPTCONN: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LISTEN, optval, optlen); break; case SO_KEEPALIVE: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_KEEPALIVE, optval, optlen); break; case SO_PROTOCOL: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PROTOCOL, optval, optlen); *(int *) optval = *(int *) optval ? SOCK_DGRAM : SOCK_STREAM; break; case SO_SNDBUF: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TX_FIFO_LEN, optval, optlen); break; case SO_RCVBUF: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_RX_FIFO_LEN, optval, optlen); break; case SO_REUSEADDR: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEADDR, optval, optlen); break; case SO_REUSEPORT: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEPORT, optval, optlen); break; case SO_BROADCAST: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_BROADCAST, optval, optlen); break; case SO_DOMAIN: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_DOMAIN, optval, optlen); break; case SO_ERROR: rv = vls_attr (vlsh, VPPCOM_ATTR_GET_ERROR, optval, optlen); break; case SO_BINDTODEVICE: rv = 0; break; default: LDBG (0, "ERROR: fd %d: getsockopt SOL_SOCKET: vlsh %u " "optname %d unsupported!", fd, vlsh, optname); break; } break; default: break; } if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } } else { rv = libc_getsockopt (fd, level, optname, optval, optlen); } return rv; } int setsockopt (int fd, int level, int optname, const void *optval, socklen_t optlen) { vls_handle_t vlsh; int rv; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { rv = -EOPNOTSUPP; switch (level) { case SOL_TCP: switch (optname) { case TCP_NODELAY: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_NODELAY, (void *) optval, &optlen); break; case TCP_MAXSEG: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_USER_MSS, (void *) optval, &optlen); break; case TCP_KEEPIDLE: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPIDLE, (void *) optval, &optlen); break; case TCP_KEEPINTVL: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPINTVL, (void *) optval, &optlen); break; case TCP_CONGESTION: case TCP_CORK: /* Ignore */ rv = 0; break; default: LDBG (0, "ERROR: fd %d: setsockopt() SOL_TCP: vlsh %u" "optname %d unsupported!", fd, vlsh, optname); break; } break; case SOL_IPV6: switch (optname) { case IPV6_V6ONLY: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_V6ONLY, (void *) optval, &optlen); break; default: LDBG (0, "ERROR: fd %d: setsockopt SOL_IPV6: vlsh %u" "optname %d unsupported!", fd, vlsh, optname); break; } break; case SOL_SOCKET: switch (optname) { case SO_KEEPALIVE: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_KEEPALIVE, (void *) optval, &optlen); break; case SO_REUSEADDR: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEADDR, (void *) optval, &optlen); break; case SO_REUSEPORT: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEPORT, (void *) optval, &optlen); break; case SO_BROADCAST: rv = vls_attr (vlsh, VPPCOM_ATTR_SET_BROADCAST, (void *) optval, &optlen); break; case SO_LINGER: rv = 0; break; default: LDBG (0, "ERROR: fd %d: setsockopt SOL_SOCKET: vlsh %u " "optname %d unsupported!", fd, vlsh, optname); break; } break; default: break; } if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } } else { rv = libc_setsockopt (fd, level, optname, optval, optlen); } return rv; } int listen (int fd, int n) { vls_handle_t vlsh; int rv; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { LDBG (0, "fd %d: calling vls_listen: vlsh %u, n %d", fd, vlsh, n); rv = vls_listen (vlsh, n); if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } } else { LDBG (0, "fd %d: calling libc_listen(): n %d", fd, n); rv = libc_listen (fd, n); } LDBG (1, "fd %d: returning %d", fd, rv); return rv; } static inline int ldp_accept4 (int listen_fd, __SOCKADDR_ARG _addr, socklen_t *__restrict addr_len, int flags) { struct sockaddr *addr = SOCKADDR_GET_SA (_addr); vls_handle_t listen_vlsh, accept_vlsh; int rv; ldp_init_check (); listen_vlsh = ldp_fd_to_vlsh (listen_fd); if (listen_vlsh != VLS_INVALID_HANDLE) { vppcom_endpt_t ep; u8 src_addr[sizeof (struct sockaddr_in6)]; memset (&ep, 0, sizeof (ep)); ep.ip = src_addr; LDBG (0, "listen fd %d: calling vppcom_session_accept: listen sid %u," " ep %p, flags 0x%x", listen_fd, listen_vlsh, &ep, flags); accept_vlsh = vls_accept (listen_vlsh, &ep, flags); if (accept_vlsh < 0) { errno = -accept_vlsh; rv = -1; } else { rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep); if (rv != VPPCOM_OK) { (void) vls_close (accept_vlsh); errno = -rv; rv = -1; } else { rv = ldp_vlsh_to_fd (accept_vlsh); } } } else { LDBG (0, "listen fd %d: calling libc_accept4(): addr %p, addr_len %p," " flags 0x%x", listen_fd, addr, addr_len, flags); rv = libc_accept4 (listen_fd, addr, addr_len, flags); } LDBG (1, "listen fd %d: accept returning %d", listen_fd, rv); return rv; } int accept4 (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len, int flags) { return ldp_accept4 (fd, addr, addr_len, flags); } int accept (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) { return ldp_accept4 (fd, addr, addr_len, 0); } int shutdown (int fd, int how) { vls_handle_t vlsh; int rv = 0; ldp_init_check (); vlsh = ldp_fd_to_vlsh (fd); if (vlsh != VLS_INVALID_HANDLE) { LDBG (0, "called shutdown: fd %u vlsh %u how %d", fd, vlsh, how); rv = vls_shutdown (vlsh, how); } else { LDBG (0, "fd %d: calling libc_shutdown: how %d", fd, how); rv = libc_shutdown (fd, how); } return rv; } int epoll_create1 (int flags) { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); vls_handle_t vlsh; int rv; ldp_init_check (); if (ldp->vcl_needs_real_epoll || vls_use_real_epoll ()) { /* Make sure workers have been allocated */ if (!ldp->workers) { ldp_alloc_workers (); ldpw = ldp_worker_get_current (); } rv = libc_epoll_create1 (flags); ldp->vcl_needs_real_epoll = 0; ldpw->vcl_mq_epfd = rv; LDBG (0, "created vcl epfd %u", rv); return rv; } vlsh = vls_epoll_create (); if (PREDICT_FALSE (vlsh == VLS_INVALID_HANDLE)) { errno = -vlsh; rv = -1; } else { rv = ldp_vlsh_to_fd (vlsh); } LDBG (0, "epoll_create epfd %u vlsh %u", rv, vlsh); return rv; } int epoll_create (int size) { return epoll_create1 (0); } int epoll_ctl (int epfd, int op, int fd, struct epoll_event *event) { vls_handle_t vep_vlsh, vlsh; int rv; ldp_init_check (); vep_vlsh = ldp_fd_to_vlsh (epfd); if (PREDICT_FALSE (vep_vlsh == VLS_INVALID_HANDLE)) { /* The LDP epoll_create1 always creates VCL epfd's. * The app should never have a kernel base epoll fd unless it * was acquired outside of the LD_PRELOAD process context. * In any case, if we get one, punt it to libc_epoll_ctl. */ LDBG (1, "epfd %d: calling libc_epoll_ctl: op %d, fd %d" " event %p", epfd, op, fd, event); rv = libc_epoll_ctl (epfd, op, fd, event); goto done; } vlsh = ldp_fd_to_vlsh (fd); LDBG (0, "epfd %d ep_vlsh %d, fd %u vlsh %d, op %u", epfd, vep_vlsh, fd, vlsh, op); if (vlsh != VLS_INVALID_HANDLE) { LDBG (1, "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u," " event %p", epfd, vep_vlsh, op, vlsh, event); rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event); if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } } else { int libc_epfd; u32 size = sizeof (epfd); libc_epfd = vls_attr (vep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); if (!libc_epfd) { LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: " "EPOLL_CLOEXEC", epfd, vep_vlsh); libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); if (libc_epfd < 0) { rv = libc_epfd; goto done; } rv = vls_attr (vep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, &size); if (rv < 0) { errno = -rv; rv = -1; goto done; } } else if (PREDICT_FALSE (libc_epfd < 0)) { errno = -epfd; rv = -1; goto done; } LDBG (1, "epfd %d: calling libc_epoll_ctl: libc_epfd %d, op %d, fd %d," " event %p", epfd, libc_epfd, op, fd, event); rv = libc_epoll_ctl (libc_epfd, op, fd, event); } done: return rv; } static inline int ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t * sigmask) { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); double time_to_wait = (double) 0, max_time; int libc_epfd, rv = 0; vls_handle_t ep_vlsh; ldp_init_check (); if (PREDICT_FALSE (!events || (timeout < -1))) { errno = EFAULT; return -1; } if (epfd == ldpw->vcl_mq_epfd) return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); ep_vlsh = ldp_fd_to_vlsh (epfd); if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE)) { LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh); errno = EBADFD; return -1; } if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) clib_time_init (&ldpw->clib_time); time_to_wait = ((timeout >= 0) ? (double) timeout / 1000 : 0); max_time = clib_time_now (&ldpw->clib_time) + time_to_wait; libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); if (PREDICT_FALSE (libc_epfd < 0)) { errno = -libc_epfd; rv = -1; goto done; } LDBG (2, "epfd %d: vep_idx %d, libc_epfd %d, events %p, maxevents %d, " "timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vlsh, libc_epfd, events, maxevents, timeout, sigmask, time_to_wait); do { if (!ldpw->epoll_wait_vcl) { rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0); if (rv > 0) { ldpw->epoll_wait_vcl = 1; goto done; } else if (rv < 0) { errno = -rv; rv = -1; goto done; } } else ldpw->epoll_wait_vcl = 0; if (libc_epfd > 0) { rv = libc_epoll_pwait (libc_epfd, events, maxevents, 0, sigmask); if (rv != 0) goto done; } } while ((timeout == -1) || (clib_time_now (&ldpw->clib_time) < max_time)); done: return rv; } static inline int ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t * sigmask) { ldp_worker_ctx_t *ldpw; int libc_epfd, rv = 0, num_ev; vls_handle_t ep_vlsh; ldp_init_check (); if (PREDICT_FALSE (!events || (timeout < -1))) { errno = EFAULT; return -1; } /* Make sure the vcl worker is valid. Could be that epoll fd was created on * one thread but it is now used on another */ if (PREDICT_FALSE (vppcom_worker_index () == ~0)) vls_register_vcl_worker (); ldpw = ldp_worker_get_current (); if (epfd == ldpw->vcl_mq_epfd) return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); ep_vlsh = ldp_fd_to_vlsh (epfd); if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE)) { LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh); errno = EBADFD; return -1; } libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); if (PREDICT_FALSE (!libc_epfd)) { u32 size = sizeof (epfd); LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: " "EPOLL_CLOEXEC", epfd, ep_vlsh); libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); if (libc_epfd < 0) { rv = libc_epfd; goto done; } rv = vls_attr (ep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, &size); if (rv < 0) { errno = -rv; rv = -1; goto done; } } if (PREDICT_FALSE (libc_epfd <= 0)) { errno = -libc_epfd; rv = -1; goto done; } if (PREDICT_FALSE (!ldpw->mq_epfd_added)) { struct epoll_event e = { 0 }; e.events = EPOLLIN; e.data.fd = ldpw->vcl_mq_epfd; if (libc_epoll_ctl (libc_epfd, EPOLL_CTL_ADD, ldpw->vcl_mq_epfd, &e) < 0) { LDBG (0, "epfd %d, add libc mq epoll fd %d to libc epoll fd %d", epfd, ldpw->vcl_mq_epfd, libc_epfd); rv = -1; goto done; } ldpw->mq_epfd_added = 1; } /* Request to only drain unhandled to prevent libc_epoll_wait starved */ rv = vls_epoll_wait (ep_vlsh, events, maxevents, -2); if (rv > 0) goto done; else if (PREDICT_FALSE (rv < 0)) { errno = -rv; rv = -1; goto done; } rv = libc_epoll_pwait (libc_epfd, events, maxevents, timeout, sigmask); if (rv <= 0) goto done; for (int i = 0; i < rv; i++) { if (events[i].data.fd == ldpw->vcl_mq_epfd) { /* We should remove mq epoll fd from events. */ rv--; if (i != rv) { events[i].events = events[rv].events; events[i].data.u64 = events[rv].data.u64; } num_ev = vls_epoll_wait (ep_vlsh, &events[rv], maxevents - rv, 0); if (PREDICT_TRUE (num_ev > 0)) rv += num_ev; break; } } done: return rv; } int epoll_pwait (int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t * sigmask) { if (vls_use_eventfd ()) return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, sigmask); else return ldp_epoll_pwait (epfd, events, maxevents, timeout, sigmask); } int epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout) { if (vls_use_eventfd ()) return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, NULL); else return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL); } int poll (struct pollfd *fds, nfds_t nfds, int timeout) { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); int rv, i, n_revents = 0; vls_handle_t vlsh; vcl_poll_t *vp; double max_time; LDBG (3, "fds %p, nfds %ld, timeout %d", fds, nfds, timeout); if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) clib_time_init (&ldpw->clib_time); max_time = (timeout >= 0) ? (f64) timeout / 1000 : 0; max_time += clib_time_now (&ldpw->clib_time); for (i = 0; i < nfds; i++) { if (fds[i].fd < 0) continue; vlsh = ldp_fd_to_vlsh (fds[i].fd); if (vlsh != VLS_INVALID_HANDLE) { fds[i].fd = -fds[i].fd; vec_add2 (ldpw->vcl_poll, vp, 1); vp->fds_ndx = i; vp->sh = vlsh_to_sh (vlsh); vp->events = fds[i].events; #ifdef __USE_XOPEN2K if (fds[i].events & POLLRDNORM) vp->events |= POLLIN; if (fds[i].events & POLLWRNORM) vp->events |= POLLOUT; #endif vp->revents = fds[i].revents; } else { vec_add1 (ldpw->libc_poll, fds[i]); vec_add1 (ldpw->libc_poll_idxs, i); } } do { if (vec_len (ldpw->vcl_poll)) { rv = vppcom_poll (ldpw->vcl_poll, vec_len (ldpw->vcl_poll), 0); if (rv < 0) { errno = -rv; rv = -1; goto done; } else n_revents += rv; } if (vec_len (ldpw->libc_poll)) { rv = libc_poll (ldpw->libc_poll, vec_len (ldpw->libc_poll), 0); if (rv < 0) goto done; else n_revents += rv; } if (n_revents) { rv = n_revents; goto done; } } while ((timeout < 0) || (clib_time_now (&ldpw->clib_time) < max_time)); rv = 0; done: vec_foreach (vp, ldpw->vcl_poll) { fds[vp->fds_ndx].fd = -fds[vp->fds_ndx].fd; fds[vp->fds_ndx].revents = vp->revents; #ifdef __USE_XOPEN2K if ((fds[vp->fds_ndx].revents & POLLIN) && (fds[vp->fds_ndx].events & POLLRDNORM)) fds[vp->fds_ndx].revents |= POLLRDNORM; if ((fds[vp->fds_ndx].revents & POLLOUT) && (fds[vp->fds_ndx].events & POLLWRNORM)) fds[vp->fds_ndx].revents |= POLLWRNORM; #endif } vec_reset_length (ldpw->vcl_poll); for (i = 0; i < vec_len (ldpw->libc_poll); i++) { fds[ldpw->libc_poll_idxs[i]].revents = ldpw->libc_poll[i].revents; } vec_reset_length (ldpw->libc_poll_idxs); vec_reset_length (ldpw->libc_poll); return rv; } #ifdef _GNU_SOURCE int ppoll (struct pollfd *fds, nfds_t nfds, const struct timespec *timeout, const sigset_t * sigmask) { ldp_init_check (); clib_warning ("LDP<%d>: LDP-TBD", getpid ()); errno = ENOSYS; return -1; } #endif void CONSTRUCTOR_ATTRIBUTE ldp_constructor (void); void DESTRUCTOR_ATTRIBUTE ldp_destructor (void); /* * This function is called when the library is loaded */ void ldp_constructor (void) { swrap_constructor (); if (ldp_init () != 0) { fprintf (stderr, "\nLDP<%d>: ERROR: ldp_constructor: failed!\n", getpid ()); _exit (1); } else if (LDP_DEBUG > 0) clib_warning ("LDP<%d>: LDP constructor: done!\n", getpid ()); } /* * This function is called when the library is unloaded */ void ldp_destructor (void) { /* swrap_destructor (); if (ldp->init) ldp->init = 0; */ /* Don't use clib_warning() here because that calls writev() * which will call ldp_init(). */ if (LDP_DEBUG > 0) fprintf (stderr, "%s:%d: LDP<%d>: LDP destructor: done!\n", __func__, __LINE__, getpid ()); } /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */