1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
|
# Copyright (c) 2019 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""DUT setup library."""
from robot.api import logger
from resources.libraries.python.Constants import Constants
from resources.libraries.python.ssh import SSH, exec_cmd_no_error
from resources.libraries.python.topology import NodeType, Topology
class DUTSetup:
"""Contains methods for setting up DUTs."""
@staticmethod
def get_service_logs(node, service):
"""Get specific service unit logs from node.
:param node: Node in the topology.
:param service: Service unit name.
:type node: dict
:type service: str
"""
command = u"echo $(< /tmp/*supervisor*.log)"\
if DUTSetup.running_in_container(node) \
else f"journalctl --no-pager --unit={service} " \
f"--since=\"$(echo `systemctl show -p ActiveEnterTimestamp " \
f"{service}` | awk \'{{print $2 $3}}\')\""
message = f"Node {node[u'host']} failed to get logs from unit {service}"
exec_cmd_no_error(
node, command, timeout=30, sudo=True, message=message
)
@staticmethod
def get_service_logs_on_all_duts(nodes, service):
"""Get specific service unit logs from all DUTs.
:param nodes: Nodes in the topology.
:param service: Service unit name.
:type nodes: dict
:type service: str
"""
for node in nodes.values():
if node[u"type"] == NodeType.DUT:
DUTSetup.get_service_logs(node, service)
@staticmethod
def restart_service(node, service):
"""Restart the named service on node.
:param node: Node in the topology.
:param service: Service unit name.
:type node: dict
:type service: str
"""
command = f"supervisorctl restart {service}" \
if DUTSetup.running_in_container(node) \
else f"service {service} restart"
message = f"Node {node[u'host']} failed to restart service {service}"
exec_cmd_no_error(
node, command, timeout=180, sudo=True, message=message
)
DUTSetup.get_service_logs(node, service)
@staticmethod
def restart_service_on_all_duts(nodes, service):
"""Restart the named service on all DUTs.
:param nodes: Nodes in the topology.
:param service: Service unit name.
:type nodes: dict
:type service: str
"""
for node in nodes.values():
if node[u"type"] == NodeType.DUT:
DUTSetup.restart_service(node, service)
@staticmethod
def start_service(node, service):
"""Start up the named service on node.
:param node: Node in the topology.
:param service: Service unit name.
:type node: dict
:type service: str
"""
# TODO: change command to start once all parent function updated.
command = f"supervisorctl restart {service}" \
if DUTSetup.running_in_container(node) \
else f"service {service} restart"
message = f"Node {node[u'host']} failed to start service {service}"
exec_cmd_no_error(
node, command, timeout=180, sudo=True, message=message
)
DUTSetup.get_service_logs(node, service)
@staticmethod
def start_service_on_all_duts(nodes, service):
"""Start up the named service on all DUTs.
:param nodes: Nodes in the topology.
:param service: Service unit name.
:type nodes: dict
:type service: str
"""
for node in nodes.values():
if node[u"type"] == NodeType.DUT:
DUTSetup.start_service(node, service)
@staticmethod
def stop_service(node, service):
"""Stop the named service on node.
:param node: Node in the topology.
:param service: Service unit name.
:type node: dict
:type service: str
"""
command = f"supervisorctl stop {service}" \
if DUTSetup.running_in_container(node) \
else f"service {service} stop"
message = f"Node {node[u'host']} failed to stop service {service}"
exec_cmd_no_error(
node, command, timeout=180, sudo=True, message=message
)
DUTSetup.get_service_logs(node, service)
@staticmethod
def stop_service_on_all_duts(nodes, service):
"""Stop the named service on all DUTs.
:param nodes: Nodes in the topology.
:param service: Service unit name.
:type nodes: dict
:type service: str
"""
for node in nodes.values():
if node[u"type"] == NodeType.DUT:
DUTSetup.stop_service(node, service)
@staticmethod
def get_vpp_pid(node):
"""Get PID of running VPP process.
:param node: DUT node.
:type node: dict
:returns: PID
:rtype: int
:raises RuntimeError: If it is not possible to get the PID.
"""
ssh = SSH()
ssh.connect(node)
retval = None
for i in range(3):
logger.trace(f"Try {i}: Get VPP PID")
ret_code, stdout, stderr = ssh.exec_command(u"pidof vpp")
if int(ret_code):
raise RuntimeError(
f"Not possible to get PID of VPP process on node: "
f"{node[u'host']}\n {stdout + stderr}"
)
pid_list = stdout.split()
if len(pid_list) == 1:
retval = int(stdout)
elif not pid_list:
logger.debug(f"No VPP PID found on node {node[u'host']}")
continue
else:
logger.debug(
f"More then one VPP PID found on node {node[u'host']}"
)
retval = [int(pid) for pid in pid_list]
return retval
@staticmethod
def get_vpp_pids(nodes):
"""Get PID of running VPP process on all DUTs.
:param nodes: DUT nodes.
:type nodes: dict
:returns: PIDs
:rtype: dict
"""
pids = dict()
for node in nodes.values():
if node[u"type"] == NodeType.DUT:
pids[node[u"host"]] = DUTSetup.get_vpp_pid(node)
return pids
@staticmethod
def crypto_device_verify(node, crypto_type, numvfs, force_init=False):
"""Verify if Crypto QAT device virtual functions are initialized on all
DUTs. If parameter force initialization is set to True, then try to
initialize or remove VFs on QAT.
:param node: DUT node.
:crypto_type: Crypto device type - HW_DH895xcc or HW_C3xxx.
:param numvfs: Number of VFs to initialize, 0 - disable the VFs.
:param force_init: If True then try to initialize to specific value.
:type node: dict
:type crypto_type: string
:type numvfs: int
:type force_init: bool
:returns: nothing
:raises RuntimeError: If QAT VFs are not created and force init is set
to False.
"""
pci_addr = Topology.get_cryptodev(node)
sriov_numvfs = DUTSetup.get_sriov_numvfs(node, pci_addr)
if sriov_numvfs != numvfs:
if force_init:
# QAT is not initialized and we want to initialize with numvfs
DUTSetup.crypto_device_init(node, crypto_type, numvfs)
else:
raise RuntimeError(
f"QAT device failed to create VFs on {node[u'host']}"
)
@staticmethod
def crypto_device_init(node, crypto_type, numvfs):
"""Init Crypto QAT device virtual functions on DUT.
:param node: DUT node.
:crypto_type: Crypto device type - HW_DH895xcc or HW_C3xxx.
:param numvfs: Number of VFs to initialize, 0 - disable the VFs.
:type node: dict
:type crypto_type: string
:type numvfs: int
:returns: nothing
:raises RuntimeError: If failed to stop VPP or QAT failed to initialize.
"""
if crypto_type == u"HW_DH895xcc":
kernel_mod = u"qat_dh895xcc"
kernel_drv = u"dh895xcc"
elif crypto_type == u"HW_C3xxx":
kernel_mod = u"qat_c3xxx"
kernel_drv = u"c3xxx"
else:
raise RuntimeError(
f"Unsupported crypto device type on {node[u'host']}"
)
pci_addr = Topology.get_cryptodev(node)
# QAT device must be re-bound to kernel driver before initialization.
DUTSetup.verify_kernel_module(node, kernel_mod, force_load=True)
# Stop VPP to prevent deadlock.
DUTSetup.stop_service(node, Constants.VPP_UNIT)
current_driver = DUTSetup.get_pci_dev_driver(
node, pci_addr.replace(u":", r"\:")
)
if current_driver is not None:
DUTSetup.pci_driver_unbind(node, pci_addr)
# Bind to kernel driver.
DUTSetup.pci_driver_bind(node, pci_addr, kernel_drv)
# Initialize QAT VFs.
if numvfs > 0:
DUTSetup.set_sriov_numvfs(node, pci_addr, numvfs)
@staticmethod
def get_virtfn_pci_addr(node, pf_pci_addr, vf_id):
"""Get PCI address of Virtual Function.
:param node: DUT node.
:param pf_pci_addr: Physical Function PCI address.
:param vf_id: Virtual Function number.
:type node: dict
:type pf_pci_addr: str
:type vf_id: int
:returns: Virtual Function PCI address.
:rtype: int
:raises RuntimeError: If failed to get Virtual Function PCI address.
"""
command = f"sh -c \"basename $(readlink " \
f"/sys/bus/pci/devices/{pf_pci_addr}/virtfn{vf_id})\""
message = u"Failed to get virtual function PCI address."
stdout, _ = exec_cmd_no_error(
node, command, timeout=30, sudo=True, message=message
)
return stdout.strip()
@staticmethod
def get_sriov_numvfs(node, pf_pci_addr):
"""Get number of SR-IOV VFs.
:param node: DUT node.
:param pf_pci_addr: Physical Function PCI device address.
:type node: dict
:type pf_pci_addr: str
:returns: Number of VFs.
:rtype: int
:raises RuntimeError: If PCI device is not SR-IOV capable.
"""
pci = pf_pci_addr.replace(u":", r"\:")
command = f"cat /sys/bus/pci/devices/{pci}/sriov_numvfs"
message = f"PCI device {pf_pci_addr} is not a SR-IOV device."
for _ in range(3):
stdout, _ = exec_cmd_no_error(
node, command, timeout=30, sudo=True, message=message
)
try:
sriov_numvfs = int(stdout)
except ValueError:
logger.trace(
f"Reading sriov_numvfs info failed on {node[u'host']}"
)
else:
return sriov_numvfs
@staticmethod
def set_sriov_numvfs(node, pf_pci_addr, numvfs=0):
"""Init or reset SR-IOV virtual functions by setting its number on PCI
device on DUT. Setting to zero removes all VFs.
:param node: DUT node.
:param pf_pci_addr: Physical Function PCI device address.
:param numvfs: Number of VFs to initialize, 0 - removes the VFs.
:type node: dict
:type pf_pci_addr: str
:type numvfs: int
:raises RuntimeError: Failed to create VFs on PCI.
"""
pci = pf_pci_addr.replace(u":", r"\:")
command = f"sh -c \"echo {numvfs} | " \
f"tee /sys/bus/pci/devices/{pci}/sriov_numvfs\""
message = f"Failed to create {numvfs} VFs on {pf_pci_addr} device " \
f"on {node[u'host']}"
exec_cmd_no_error(
node, command, timeout=120, sudo=True, message=message
)
@staticmethod
def pci_driver_unbind(node, pci_addr):
"""Unbind PCI device from current driver on node.
:param node: DUT node.
:param pci_addr: PCI device address.
:type node: dict
:type pci_addr: str
:raises RuntimeError: If PCI device unbind failed.
"""
pci = pci_addr.replace(u":", r"\:")
command = f"sh -c \"echo {pci_addr} | " \
f"tee /sys/bus/pci/devices/{pci}/driver/unbind\""
message = f"Failed to unbind PCI device {pci_addr} on {node[u'host']}"
exec_cmd_no_error(
node, command, timeout=120, sudo=True, message=message
)
@staticmethod
def pci_driver_bind(node, pci_addr, driver):
"""Bind PCI device to driver on node.
:param node: DUT node.
:param pci_addr: PCI device address.
:param driver: Driver to bind.
:type node: dict
:type pci_addr: str
:type driver: str
:raises RuntimeError: If PCI device bind failed.
"""
message = f"Failed to bind PCI device {pci_addr} to {driver} " \
f"on host {node[u'host']}"
pci = pci_addr.replace(u":", r"\:")
command = f"sh -c \"echo {driver} | " \
f"tee /sys/bus/pci/devices/{pci}/driver_override\""
exec_cmd_no_error(
node, command, timeout=120, sudo=True, message=message
)
command = f"sh -c \"echo {pci_addr} | " \
f"tee /sys/bus/pci/drivers/{driver}/bind\""
exec_cmd_no_error(
node, command, timeout=120, sudo=True, message=message
)
command = f"sh -c \"echo | " \
f"tee /sys/bus/pci/devices/{pci}/driver_override\""
exec_cmd_no_error(
node, command, timeout=120, sudo=True, message=message
)
@staticmethod
def pci_vf_driver_unbind(node, pf_pci_addr, vf_id):
"""Unbind Virtual Function from driver on node.
:param node: DUT node.
:param pf_pci_addr: PCI device address.
:param vf_id: Virtual Function ID.
:type node: dict
:type pf_pci_addr: str
:type vf_id: int
:raises RuntimeError: If Virtual Function unbind failed.
"""
vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
pf_pci = pf_pci_addr.replace(u":", r"\:")
vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
command = f"sh -c \"echo {vf_pci_addr} | tee {vf_path}/driver/unbind\""
message = f"Failed to unbind VF {vf_pci_addr} on {node[u'host']}"
exec_cmd_no_error(
node, command, timeout=120, sudo=True, message=message
)
@staticmethod
def pci_vf_driver_bind(node, pf_pci_addr, vf_id, driver):
"""Bind Virtual Function to driver on node.
:param node: DUT node.
:param pf_pci_addr: PCI device address.
:param vf_id: Virtual Function ID.
:param driver: Driver to bind.
:type node: dict
:type pf_pci_addr: str
:type vf_id: int
:type driver: str
:raises RuntimeError: If PCI device bind failed.
"""
vf_pci_addr = DUTSetup.get_virtfn_pci_addr(node, pf_pci_addr, vf_id)
pf_pci = pf_pci_addr.replace(u":", r'\:')
vf_path = f"/sys/bus/pci/devices/{pf_pci}/virtfn{vf_id}"
message = f"Failed to bind VF {vf_pci_addr} to {driver} " \
f"on {node[u'host']}"
command = f"sh -c \"echo {driver} | tee {vf_path}/driver_override\""
exec_cmd_no_error(
node, command, timeout=120, sudo=True, message=message
)
command = f"sh -c \"echo {vf_pci_addr} | " \
f"tee /sys/bus/pci/drivers/{driver}/bind\""
exec_cmd_no_error(
node, command, timeout=120, sudo=True, message=message
)
command = f"sh -c \"echo | tee {vf_path}/driver_override\""
exec_cmd_no_error(
node, command, timeout=120, sudo=True, message=message
)
@staticmethod
def get_pci_dev_driver(node, pci_addr):
"""Get current PCI device driver on node.
.. note::
# lspci -vmmks 0000:00:05.0
Slot: 00:05.0
Class: Ethernet controller
Vendor: Red Hat, Inc
Device: Virtio network device
SVendor: Red Hat, Inc
SDevice: Device 0001
PhySlot: 5
Driver: virtio-pci
:param node: DUT node.
:param pci_addr: PCI device address.
:type node: dict
:type pci_addr: str
:returns: Driver or None
:raises RuntimeError: If PCI rescan or lspci command execution failed.
:raises RuntimeError: If it is not possible to get the interface driver
information from the node.
"""
ssh = SSH()
ssh.connect(node)
for i in range(3):
logger.trace(f"Try number {i}: Get PCI device driver")
cmd = f"lspci -vmmks {pci_addr}"
ret_code, stdout, _ = ssh.exec_command(cmd)
if int(ret_code):
raise RuntimeError(f"'{cmd}' failed on '{node[u'host']}'")
for line in stdout.splitlines():
if not line:
continue
name = None
value = None
try:
name, value = line.split(u"\t", 1)
except ValueError:
if name == u"Driver:":
return None
if name == u"Driver:":
return value
if i < 2:
logger.trace(
f"Driver for PCI device {pci_addr} not found, "
f"executing pci rescan and retrying"
)
cmd = u"sh -c \"echo 1 > /sys/bus/pci/rescan\""
ret_code, _, _ = ssh.exec_command_sudo(cmd)
if int(ret_code) != 0:
raise RuntimeError(f"'{cmd}' failed on '{node[u'host']}'")
return None
@staticmethod
def verify_kernel_module(node, module, force_load=False):
"""Verify if kernel module is loaded on node. If parameter force
load is set to True, then try to load the modules.
:param node: Node.
:param module: Module to verify.
:param force_load: If True then try to load module.
:type node: dict
:type module: str
:type force_load: bool
:raises RuntimeError: If module is not loaded or failed to load.
"""
command = f"grep -w {module} /proc/modules"
message = f"Kernel module {module} is not loaded " \
f"on host {node[u'host']}"
try:
exec_cmd_no_error(
node, command, timeout=30, sudo=False, message=message
)
except RuntimeError:
if force_load:
# Module is not loaded and we want to load it
DUTSetup.load_kernel_module(node, module)
else:
raise
@staticmethod
def verify_kernel_module_on_all_duts(nodes, module, force_load=False):
"""Verify if kernel module is loaded on all DUTs. If parameter force
load is set to True, then try to load the modules.
:param nodes: DUT nodes.
:param module: Module to verify.
:param force_load: If True then try to load module.
:type nodes: dict
:type module: str
:type force_load: bool
"""
for node in nodes.values():
if node[u"type"] == NodeType.DUT:
DUTSetup.verify_kernel_module(node, module, force_load)
@staticmethod
def verify_uio_driver_on_all_duts(nodes):
"""Verify if uio driver kernel module is loaded on all DUTs. If module
is not present it will try to load it.
:param nodes: DUT nodes.
:type nodes: dict
"""
for node in nodes.values():
if node[u"type"] == NodeType.DUT:
uio_driver = Topology.get_uio_driver(node)
DUTSetup.verify_kernel_module(node, uio_driver, force_load=True)
@staticmethod
def load_kernel_module(node, module):
"""Load kernel module on node.
:param node: DUT node.
:param module: Module to load.
:type node: dict
:type module: str
:returns: nothing
:raises RuntimeError: If loading failed.
"""
command = f"modprobe {module}"
message = f"Failed to load {module} on host {node[u'host']}"
exec_cmd_no_error(node, command, timeout=30, sudo=True, message=message)
@staticmethod
def install_vpp_on_all_duts(nodes, vpp_pkg_dir):
"""Install VPP on all DUT nodes. Start the VPP service in case of
systemd is not available or does not support autostart.
:param nodes: Nodes in the topology.
:param vpp_pkg_dir: Path to directory where VPP packages are stored.
:type nodes: dict
:type vpp_pkg_dir: str
:raises RuntimeError: If failed to remove or install VPP.
"""
for node in nodes.values():
message = f"Failed to install VPP on host {node[u'host']}!"
if node[u"type"] == NodeType.DUT:
command = u"ln -s /dev/null /etc/sysctl.d/80-vpp.conf || true"
exec_cmd_no_error(node, command, sudo=True)
command = u". /etc/lsb-release; echo \"${DISTRIB_ID}\""
stdout, _ = exec_cmd_no_error(node, command)
if stdout.strip() == u"Ubuntu":
exec_cmd_no_error(
node, u"apt-get purge -y '*vpp*' || true",
timeout=120, sudo=True
)
# workaround to avoid installation of vpp-api-python
exec_cmd_no_error(
node, u"rm -f {vpp_pkg_dir}vpp-api-python.deb",
timeout=120, sudo=True
)
exec_cmd_no_error(
node, f"dpkg -i --force-all {vpp_pkg_dir}*.deb",
timeout=120, sudo=True, message=message
)
exec_cmd_no_error(node, u"dpkg -l | grep vpp", sudo=True)
if DUTSetup.running_in_container(node):
DUTSetup.restart_service(node, Constants.VPP_UNIT)
else:
exec_cmd_no_error(
node, u"yum -y remove '*vpp*' || true",
timeout=120, sudo=True
)
# workaround to avoid installation of vpp-api-python
exec_cmd_no_error(
node, u"rm -f {vpp_pkg_dir}vpp-api-python.rpm",
timeout=120, sudo=True
)
exec_cmd_no_error(
node, f"rpm -ivh {vpp_pkg_dir}*.rpm",
timeout=120, sudo=True, message=message
)
exec_cmd_no_error(node, u"rpm -qai '*vpp*'", sudo=True)
DUTSetup.restart_service(node, Constants.VPP_UNIT)
@staticmethod
def running_in_container(node):
"""This method tests if topology node is running inside container.
:param node: Topology node.
:type node: dict
:returns: True if running in docker container, false if not or failed
to detect.
:rtype: bool
"""
command = u"fgrep docker /proc/1/cgroup"
message = u"Failed to get cgroup settings."
try:
exec_cmd_no_error(
node, command, timeout=30, sudo=False, message=message
)
except RuntimeError:
return False
return True
@staticmethod
def get_docker_mergeddir(node, uuid):
"""Get Docker overlay for MergedDir diff.
:param node: DUT node.
:param uuid: Docker UUID.
:type node: dict
:type uuid: str
:returns: Docker container MergedDir.
:rtype: str
:raises RuntimeError: If getting output failed.
"""
command = f"docker inspect " \
f"--format='{{{{.GraphDriver.Data.MergedDir}}}}' {uuid}"
message = f"Failed to get directory of {uuid} on host {node[u'host']}"
stdout, _ = exec_cmd_no_error(node, command, sudo=True, message=message)
return stdout.strip()
@staticmethod
def get_huge_page_size(node):
"""Get default size of huge pages in system.
:param node: Node in the topology.
:type node: dict
:returns: Default size of free huge pages in system.
:rtype: int
:raises RuntimeError: If reading failed for three times.
"""
ssh = SSH()
ssh.connect(node)
for _ in range(3):
ret_code, stdout, _ = ssh.exec_command_sudo(
u"grep Hugepagesize /proc/meminfo | awk '{ print $2 }'"
)
if ret_code == 0:
try:
huge_size = int(stdout)
except ValueError:
logger.trace(u"Reading huge page size information failed")
else:
break
else:
raise RuntimeError(u"Getting huge page size information failed.")
return huge_size
@staticmethod
def get_huge_page_free(node, huge_size):
"""Get number of free huge pages in system.
:param node: Node in the topology.
:param huge_size: Size of hugepages.
:type node: dict
:type huge_size: int
:returns: Number of free huge pages in system.
:rtype: int
:raises RuntimeError: If reading failed for three times.
"""
# TODO: add numa aware option
ssh = SSH()
ssh.connect(node)
for _ in range(3):
ret_code, stdout, _ = ssh.exec_command_sudo(
f"cat /sys/kernel/mm/hugepages/hugepages-{huge_size}kB/"
f"free_hugepages"
)
if ret_code == 0:
try:
huge_free = int(stdout)
except ValueError:
logger.trace(u"Reading free huge pages information failed")
else:
break
else:
raise RuntimeError(u"Getting free huge pages information failed.")
return huge_free
@staticmethod
def get_huge_page_total(node, huge_size):
"""Get total number of huge pages in system.
:param node: Node in the topology.
:param huge_size: Size of hugepages.
:type node: dict
:type huge_size: int
:returns: Total number of huge pages in system.
:rtype: int
:raises RuntimeError: If reading failed for three times.
"""
# TODO: add numa aware option
ssh = SSH()
ssh.connect(node)
for _ in range(3):
ret_code, stdout, _ = ssh.exec_command_sudo(
f"cat /sys/kernel/mm/hugepages/hugepages-{huge_size}kB/"
f"nr_hugepages"
)
if ret_code == 0:
try:
huge_total = int(stdout)
except ValueError:
logger.trace(u"Reading total huge pages information failed")
else:
break
else:
raise RuntimeError(u"Getting total huge pages information failed.")
return huge_total
@staticmethod
def check_huge_page(node, huge_mnt, mem_size, allocate=False):
"""Check if there is enough HugePages in system. If allocate is set to
true, try to allocate more HugePages.
:param node: Node in the topology.
:param huge_mnt: HugePage mount point.
:param mem_size: Requested memory in MB.
:param allocate: Whether to allocate more memory if not enough.
:type node: dict
:type huge_mnt: str
:type mem_size: str
:type allocate: bool
:raises RuntimeError: Mounting hugetlbfs failed or not enough HugePages
or increasing map count failed.
"""
# TODO: split function into smaller parts.
ssh = SSH()
ssh.connect(node)
# Get huge pages information
huge_size = DUTSetup.get_huge_page_size(node)
huge_free = DUTSetup.get_huge_page_free(node, huge_size)
huge_total = DUTSetup.get_huge_page_total(node, huge_size)
# Check if memory requested is available on
mem_size = int(mem_size)
if (mem_size * 1024) > (huge_free * huge_size):
# If we want to allocate hugepage dynamically
if allocate:
mem_needed = (mem_size * 1024) - (huge_free * huge_size)
huge_to_allocate = ((mem_needed // huge_size) * 2) + huge_total
max_map_count = huge_to_allocate*4
# Increase maximum number of memory map areas a process may have
ret_code, _, _ = ssh.exec_command_sudo(
f"echo \"{max_map_count}\" | "
f"sudo tee /proc/sys/vm/max_map_count"
)
if int(ret_code) != 0:
raise RuntimeError(
f"Increase map count failed on {node[u'host']}"
)
# Increase hugepage count
ret_code, _, _ = ssh.exec_command_sudo(
f"echo \"{huge_to_allocate}\" | "
f"sudo tee /proc/sys/vm/nr_hugepages"
)
if int(ret_code) != 0:
raise RuntimeError(
f"Mount huge pages failed on {node[u'host']}"
)
# If we do not want to allocate dynamically end with error
else:
raise RuntimeError(
f"Not enough free huge pages: {huge_free}, "
f"{huge_free * huge_size} MB"
)
# Check if huge pages mount point exist
has_huge_mnt = False
ret_code, stdout, _ = ssh.exec_command(u"cat /proc/mounts")
if int(ret_code) == 0:
for line in stdout.splitlines():
# Try to find something like:
# none /mnt/huge hugetlbfs rw,realtime,pagesize=2048k 0 0
mount = line.split()
if mount[2] == u"hugetlbfs" and mount[1] == huge_mnt:
has_huge_mnt = True
break
# If huge page mount point not exist create one
if not has_huge_mnt:
ret_code, _, _ = ssh.exec_command_sudo(f"mkdir -p {huge_mnt}")
if int(ret_code) != 0:
raise RuntimeError(
f"Create mount dir failed on {node[u'host']}"
)
ret_code, _, _ = ssh.exec_command_sudo(
f"mount -t hugetlbfs -o pagesize=2048k none {huge_mnt}"
)
if int(ret_code) != 0:
raise RuntimeError(
f"Mount huge pages failed on {node[u'host']}"
)
|