aboutsummaryrefslogtreecommitdiffstats
path: root/resources/libraries/python/ContainerUtils.py
diff options
context:
space:
mode:
authorVratko Polak <vrpolak@cisco.com>2020-12-15 13:39:56 +0100
committerVratko Polak <vrpolak@cisco.com>2020-12-17 12:28:57 +0000
commita33b52ae0f255021d89307ebc694f6e907906151 (patch)
tree133f7cc93cd5475d211982c952244e96a223ba59 /resources/libraries/python/ContainerUtils.py
parent37877d670e7e2d81673222b6c3d9e7649ccd5cd5 (diff)
PAPI: Cache connected client instances
Disconnect+connect cycle is expensive and slow. This change tracks connected client instances so later "connect" to the same target uses it. Explicit disconnects are allowed (and executed before VPP ends), but once again disconnected instances are cached and reused, as creating a new instance is more expensive than just connect. + Add missing checks on interfaces being up to appropriate keyword. + Use appropriate keywords. + Add a comment explaining why a simpler keyword is not appropriate. + Improve VPP checks in containers. + Fix the vppctl check to actually work. + Add PAPI check to ensure VPP is really ready. + Delay/reorder checks to make them faster with multiple containers. + Leave some TODOs to improve various lifecycles later. + As we do not stop VPP in test/suite teardown: + One final disconnect is needed, added to __init__.robot teardowns. - Import of the final disconnect keyword is ugly, but it works. - We could use a hashable class for distinguishing node+socket pairs. - Are we connecting to VPP inside VMs? Change-Id: I49cd726740c3e8cae1591c7c84b85a447241228f Signed-off-by: Vratko Polak <vrpolak@cisco.com>
Diffstat (limited to 'resources/libraries/python/ContainerUtils.py')
-rw-r--r--resources/libraries/python/ContainerUtils.py118
1 files changed, 101 insertions, 17 deletions
diff --git a/resources/libraries/python/ContainerUtils.py b/resources/libraries/python/ContainerUtils.py
index 59d98aa538..3d70684695 100644
--- a/resources/libraries/python/ContainerUtils.py
+++ b/resources/libraries/python/ContainerUtils.py
@@ -23,9 +23,11 @@ from robot.libraries.BuiltIn import BuiltIn
from resources.libraries.python.Constants import Constants
from resources.libraries.python.CpuUtils import CpuUtils
+from resources.libraries.python.PapiExecutor import PapiSocketExecutor
from resources.libraries.python.ssh import SSH
from resources.libraries.python.topology import Topology, SocketType
from resources.libraries.python.VppConfigGenerator import VppConfigGenerator
+from resources.libraries.python.VPPUtil import VPPUtil
__all__ = [
@@ -141,23 +143,52 @@ class ContainerManager:
self.engine.container = self.containers[container]
self.engine.execute(command)
- def start_vpp_in_all_containers(self):
+ def start_vpp_in_all_containers(self, verify=True):
"""Start VPP in all containers."""
for container in self.containers:
self.engine.container = self.containers[container]
- self.engine.start_vpp()
+ # For multiple containers, delayed verify is faster.
+ self.engine.start_vpp(verify=False)
+ if verify:
+ self.verify_vpp_in_all_containers()
- def restart_vpp_in_all_containers(self):
+ def _disconnect_papi_to_all_containers(self):
+ """Disconnect any open PAPI connections to VPPs in containers.
+
+ The current PAPI implementation caches open connections,
+ so explicit disconnect is needed before VPP becomes inaccessible.
+
+ Currently this is a protected method, as restart, stop and destroy
+ are the only dangerous methods, and all are handled by ContainerManager.
+ """
+ for container_object in self.containers.values():
+ PapiSocketExecutor.disconnect_by_node_and_socket(
+ container_object.node,
+ container_object.api_socket,
+ )
+
+ def restart_vpp_in_all_containers(self, verify=True):
"""Restart VPP in all containers."""
+ self._disconnect_papi_to_all_containers()
for container in self.containers:
self.engine.container = self.containers[container]
- self.engine.restart_vpp()
+ # For multiple containers, delayed verify is faster.
+ self.engine.restart_vpp(verify=False)
+ if verify:
+ self.verify_vpp_in_all_containers()
def verify_vpp_in_all_containers(self):
"""Verify that VPP is installed and running in all containers."""
+ # For multiple containers, multiple fors are faster.
+ for container in self.containers:
+ self.engine.container = self.containers[container]
+ self.engine.verify_vppctl()
for container in self.containers:
self.engine.container = self.containers[container]
- self.engine.verify_vpp()
+ self.engine.adjust_privileges()
+ for container in self.containers:
+ self.engine.container = self.containers[container]
+ self.engine.verify_vpp_papi()
def configure_vpp_in_all_containers(self, chain_topology, **kwargs):
"""Configure VPP in all containers.
@@ -481,12 +512,16 @@ class ContainerManager:
def stop_all_containers(self):
"""Stop all containers."""
+ # TODO: Rework if containers can be affected outside ContainerManager.
+ self._disconnect_papi_to_all_containers()
for container in self.containers:
self.engine.container = self.containers[container]
self.engine.stop()
def destroy_all_containers(self):
"""Destroy all containers."""
+ # TODO: Rework if containers can be affected outside ContainerManager.
+ self._disconnect_papi_to_all_containers()
for container in self.containers:
self.engine.container = self.containers[container]
self.engine.destroy()
@@ -543,7 +578,7 @@ class ContainerEngine:
"""System info."""
raise NotImplementedError
- def start_vpp(self):
+ def start_vpp(self, verify=True):
"""Start VPP inside a container."""
self.execute(
u"setsid /usr/bin/vpp -c /etc/vpp/startup.conf "
@@ -556,38 +591,51 @@ class ContainerEngine:
self.container.node,
SocketType.PAPI,
self.container.name,
- f"/tmp/vpp_sockets/{self.container.name}/api.sock"
+ self.container.api_socket,
)
topo_instance.add_new_socket(
self.container.node,
SocketType.STATS,
self.container.name,
- f"/tmp/vpp_sockets/{self.container.name}/stats.sock"
+ self.container.stats_socket,
)
- self.verify_vpp()
- self.adjust_privileges()
+ if verify:
+ self.verify_vpp()
- def restart_vpp(self):
+ def restart_vpp(self, verify=True):
"""Restart VPP service inside a container."""
self.execute(u"pkill vpp")
- self.start_vpp()
+ self.start_vpp(verify=verify)
+
+ def verify_vpp(self):
+ """Verify VPP is running and ready."""
+ self.verify_vppctl()
+ self.adjust_privileges()
+ self.verify_vpp_papi()
# TODO Rewrite to use the VPPUtil.py functionality and remove this.
- def verify_vpp(self, retries=120, retry_wait=1):
+ def verify_vppctl(self, retries=120, retry_wait=1):
"""Verify that VPP is installed and running inside container.
+ This function waits a while so VPP can start.
+ PCI interfaces are listed for debug purposes.
+ When the check passes, VPP API socket is created on remote side,
+ but perhaps its directory does not have the correct access rights yet.
+
:param retries: Check for VPP for this number of times Default: 120
:param retry_wait: Wait for this number of seconds between retries.
"""
for _ in range(retries + 1):
try:
+ # Execute puts the command into single quotes,
+ # so inner arguments are enclosed in qouble quotes here.
self.execute(
- u"vppctl show pci 2>&1 | "
- u"fgrep -v 'Connection refused' | "
- u"fgrep -v 'No such file or directory'"
+ u'vppctl show pci 2>&1 | '
+ u'fgrep -v "Connection refused" | '
+ u'fgrep -v "No such file or directory"'
)
break
- except RuntimeError:
+ except (RuntimeError, AssertionError):
sleep(retry_wait)
else:
self.execute(u"cat /tmp/vppd.log")
@@ -599,6 +647,32 @@ class ContainerEngine:
"""Adjust privileges to control VPP without sudo."""
self.execute("chmod -R o+rwx /run/vpp")
+ def verify_vpp_papi(self, retries=120, retry_wait=1):
+ """Verify that VPP is available for PAPI.
+
+ This also opens and caches PAPI connection for quick reuse.
+ The connection is disconnected when ContainerManager decides to do so.
+
+ :param retries: Check for VPP for this number of times Default: 120
+ :param retry_wait: Wait for this number of seconds between retries.
+ """
+ # Wait for success.
+ for _ in range(retries + 1):
+ try:
+ VPPUtil.vpp_show_version(
+ node=self.container.node,
+ remote_vpp_socket=self.container.api_socket,
+ log=False,
+ )
+ break
+ except (RuntimeError, AssertionError):
+ sleep(retry_wait)
+ else:
+ self.execute(u"cat /tmp/vppd.log")
+ raise RuntimeError(
+ f"VPP PAPI fails in container: {self.container.name}"
+ )
+
def create_base_vpp_startup_config(self, cpuset_cpus=None):
"""Create base startup configuration of VPP on container.
@@ -1188,8 +1262,18 @@ class Container:
except KeyError:
# Creating new attribute
if attr == u"node":
+ # Create and cache a connected SSH instance.
self.__dict__[u"ssh"] = SSH()
self.__dict__[u"ssh"].connect(value)
+ elif attr == u"name":
+ # Socket paths to not have mutable state,
+ # this just saves some horizontal space in callers.
+ # TODO: Rename the dir so other apps can add sockets easily.
+ # E.g. f"/tmp/app_sockets/{value}/vpp_api.sock"
+ path = f"/tmp/vpp_sockets/{value}"
+ self.__dict__[u"socket_dir"] = path
+ self.__dict__[u"api_socket"] = f"{path}/api.sock"
+ self.__dict__[u"stats_socket"] = f"{path}/stats.sock"
self.__dict__[attr] = value
else:
# Updating attribute base of type