aboutsummaryrefslogtreecommitdiffstats
path: root/resources/libraries/python
diff options
context:
space:
mode:
Diffstat (limited to 'resources/libraries/python')
-rw-r--r--resources/libraries/python/ContainerUtils.py118
-rw-r--r--resources/libraries/python/PapiExecutor.py430
-rw-r--r--resources/libraries/python/VPPUtil.py24
-rw-r--r--resources/libraries/python/topology.py1
4 files changed, 476 insertions, 97 deletions
diff --git a/resources/libraries/python/ContainerUtils.py b/resources/libraries/python/ContainerUtils.py
index 59d98aa538..3d70684695 100644
--- a/resources/libraries/python/ContainerUtils.py
+++ b/resources/libraries/python/ContainerUtils.py
@@ -23,9 +23,11 @@ from robot.libraries.BuiltIn import BuiltIn
from resources.libraries.python.Constants import Constants
from resources.libraries.python.CpuUtils import CpuUtils
+from resources.libraries.python.PapiExecutor import PapiSocketExecutor
from resources.libraries.python.ssh import SSH
from resources.libraries.python.topology import Topology, SocketType
from resources.libraries.python.VppConfigGenerator import VppConfigGenerator
+from resources.libraries.python.VPPUtil import VPPUtil
__all__ = [
@@ -141,23 +143,52 @@ class ContainerManager:
self.engine.container = self.containers[container]
self.engine.execute(command)
- def start_vpp_in_all_containers(self):
+ def start_vpp_in_all_containers(self, verify=True):
"""Start VPP in all containers."""
for container in self.containers:
self.engine.container = self.containers[container]
- self.engine.start_vpp()
+ # For multiple containers, delayed verify is faster.
+ self.engine.start_vpp(verify=False)
+ if verify:
+ self.verify_vpp_in_all_containers()
- def restart_vpp_in_all_containers(self):
+ def _disconnect_papi_to_all_containers(self):
+ """Disconnect any open PAPI connections to VPPs in containers.
+
+ The current PAPI implementation caches open connections,
+ so explicit disconnect is needed before VPP becomes inaccessible.
+
+ Currently this is a protected method, as restart, stop and destroy
+ are the only dangerous methods, and all are handled by ContainerManager.
+ """
+ for container_object in self.containers.values():
+ PapiSocketExecutor.disconnect_by_node_and_socket(
+ container_object.node,
+ container_object.api_socket,
+ )
+
+ def restart_vpp_in_all_containers(self, verify=True):
"""Restart VPP in all containers."""
+ self._disconnect_papi_to_all_containers()
for container in self.containers:
self.engine.container = self.containers[container]
- self.engine.restart_vpp()
+ # For multiple containers, delayed verify is faster.
+ self.engine.restart_vpp(verify=False)
+ if verify:
+ self.verify_vpp_in_all_containers()
def verify_vpp_in_all_containers(self):
"""Verify that VPP is installed and running in all containers."""
+ # For multiple containers, multiple fors are faster.
+ for container in self.containers:
+ self.engine.container = self.containers[container]
+ self.engine.verify_vppctl()
for container in self.containers:
self.engine.container = self.containers[container]
- self.engine.verify_vpp()
+ self.engine.adjust_privileges()
+ for container in self.containers:
+ self.engine.container = self.containers[container]
+ self.engine.verify_vpp_papi()
def configure_vpp_in_all_containers(self, chain_topology, **kwargs):
"""Configure VPP in all containers.
@@ -481,12 +512,16 @@ class ContainerManager:
def stop_all_containers(self):
"""Stop all containers."""
+ # TODO: Rework if containers can be affected outside ContainerManager.
+ self._disconnect_papi_to_all_containers()
for container in self.containers:
self.engine.container = self.containers[container]
self.engine.stop()
def destroy_all_containers(self):
"""Destroy all containers."""
+ # TODO: Rework if containers can be affected outside ContainerManager.
+ self._disconnect_papi_to_all_containers()
for container in self.containers:
self.engine.container = self.containers[container]
self.engine.destroy()
@@ -543,7 +578,7 @@ class ContainerEngine:
"""System info."""
raise NotImplementedError
- def start_vpp(self):
+ def start_vpp(self, verify=True):
"""Start VPP inside a container."""
self.execute(
u"setsid /usr/bin/vpp -c /etc/vpp/startup.conf "
@@ -556,38 +591,51 @@ class ContainerEngine:
self.container.node,
SocketType.PAPI,
self.container.name,
- f"/tmp/vpp_sockets/{self.container.name}/api.sock"
+ self.container.api_socket,
)
topo_instance.add_new_socket(
self.container.node,
SocketType.STATS,
self.container.name,
- f"/tmp/vpp_sockets/{self.container.name}/stats.sock"
+ self.container.stats_socket,
)
- self.verify_vpp()
- self.adjust_privileges()
+ if verify:
+ self.verify_vpp()
- def restart_vpp(self):
+ def restart_vpp(self, verify=True):
"""Restart VPP service inside a container."""
self.execute(u"pkill vpp")
- self.start_vpp()
+ self.start_vpp(verify=verify)
+
+ def verify_vpp(self):
+ """Verify VPP is running and ready."""
+ self.verify_vppctl()
+ self.adjust_privileges()
+ self.verify_vpp_papi()
# TODO Rewrite to use the VPPUtil.py functionality and remove this.
- def verify_vpp(self, retries=120, retry_wait=1):
+ def verify_vppctl(self, retries=120, retry_wait=1):
"""Verify that VPP is installed and running inside container.
+ This function waits a while so VPP can start.
+ PCI interfaces are listed for debug purposes.
+ When the check passes, VPP API socket is created on remote side,
+ but perhaps its directory does not have the correct access rights yet.
+
:param retries: Check for VPP for this number of times Default: 120
:param retry_wait: Wait for this number of seconds between retries.
"""
for _ in range(retries + 1):
try:
+ # Execute puts the command into single quotes,
+ # so inner arguments are enclosed in qouble quotes here.
self.execute(
- u"vppctl show pci 2>&1 | "
- u"fgrep -v 'Connection refused' | "
- u"fgrep -v 'No such file or directory'"
+ u'vppctl show pci 2>&1 | '
+ u'fgrep -v "Connection refused" | '
+ u'fgrep -v "No such file or directory"'
)
break
- except RuntimeError:
+ except (RuntimeError, AssertionError):
sleep(retry_wait)
else:
self.execute(u"cat /tmp/vppd.log")
@@ -599,6 +647,32 @@ class ContainerEngine:
"""Adjust privileges to control VPP without sudo."""
self.execute("chmod -R o+rwx /run/vpp")
+ def verify_vpp_papi(self, retries=120, retry_wait=1):
+ """Verify that VPP is available for PAPI.
+
+ This also opens and caches PAPI connection for quick reuse.
+ The connection is disconnected when ContainerManager decides to do so.
+
+ :param retries: Check for VPP for this number of times Default: 120
+ :param retry_wait: Wait for this number of seconds between retries.
+ """
+ # Wait for success.
+ for _ in range(retries + 1):
+ try:
+ VPPUtil.vpp_show_version(
+ node=self.container.node,
+ remote_vpp_socket=self.container.api_socket,
+ log=False,
+ )
+ break
+ except (RuntimeError, AssertionError):
+ sleep(retry_wait)
+ else:
+ self.execute(u"cat /tmp/vppd.log")
+ raise RuntimeError(
+ f"VPP PAPI fails in container: {self.container.name}"
+ )
+
def create_base_vpp_startup_config(self, cpuset_cpus=None):
"""Create base startup configuration of VPP on container.
@@ -1188,8 +1262,18 @@ class Container:
except KeyError:
# Creating new attribute
if attr == u"node":
+ # Create and cache a connected SSH instance.
self.__dict__[u"ssh"] = SSH()
self.__dict__[u"ssh"].connect(value)
+ elif attr == u"name":
+ # Socket paths to not have mutable state,
+ # this just saves some horizontal space in callers.
+ # TODO: Rename the dir so other apps can add sockets easily.
+ # E.g. f"/tmp/app_sockets/{value}/vpp_api.sock"
+ path = f"/tmp/vpp_sockets/{value}"
+ self.__dict__[u"socket_dir"] = path
+ self.__dict__[u"api_socket"] = f"{path}/api.sock"
+ self.__dict__[u"stats_socket"] = f"{path}/stats.sock"
self.__dict__[attr] = value
else:
# Updating attribute base of type
diff --git a/resources/libraries/python/PapiExecutor.py b/resources/libraries/python/PapiExecutor.py
index 46fe5d583b..6b21680526 100644
--- a/resources/libraries/python/PapiExecutor.py
+++ b/resources/libraries/python/PapiExecutor.py
@@ -37,7 +37,11 @@ from resources.libraries.python.topology import Topology, SocketType
from resources.libraries.python.VppApiCrc import VppApiCrcChecker
-__all__ = [u"PapiExecutor", u"PapiSocketExecutor"]
+__all__ = [
+ u"PapiExecutor",
+ u"PapiSocketExecutor",
+ u"Disconnector",
+]
def dictize(obj):
@@ -73,14 +77,33 @@ def dictize(obj):
class PapiSocketExecutor:
"""Methods for executing VPP Python API commands on forwarded socket.
- The current implementation connects for the duration of resource manager.
- Delay for accepting connection is 10s, and disconnect is explicit.
+ Previously, we used an implementation with single client instance
+ and connection being handled by a resource manager.
+ On "with" statement, the instance connected, and disconnected
+ on exit from the "with" block.
+ This was limiting (no nested with blocks) and mainly it was slow:
+ 0.7 seconds per disconnect cycle on Skylake, more than 3 second on Taishan.
+
+ The currently used implementation caches the connected client instances,
+ providing speedup and making "with" blocks unnecessary.
+ But with many call sites, "with" blocks are still the main usage pattern.
+ Documentation still lists that as the intended pattern.
+
+ As a downside, clients need to be explicitly told to disconnect
+ before VPP restart.
+ There is some amount of retries and disconnects on disconnect
+ (so unresponsive VPPs do not breach test much more than needed),
+ but it is hard to verify all that works correctly.
+ Especially, if Robot crashes, files and ssh processes may leak.
+
+ Delay for accepting socket connection is 10s.
TODO: Decrease 10s to value that is long enough for creating connection
and short enough to not affect performance.
The current implementation downloads and parses .api.json files only once
- and stores a VPPApiClient instance (disconnected) as a class variable.
- Accessing multiple nodes with different APIs is therefore not supported.
+ and caches client instances for reuse.
+ Cleanup metadata is added as additional attributes
+ directly to client instances.
The current implementation seems to run into read error occasionally.
Not sure if the error is in Python code on Robot side, ssh forwarding,
@@ -129,10 +152,25 @@ class PapiSocketExecutor:
"""
# Class cache for reuse between instances.
- vpp_instance = None
- """Takes long time to create, stores all PAPI functions and types."""
+ api_root_dir = None
+ """We copy .api json files and PAPI code from DUT to robot machine.
+ This class variable holds temporary directory once created.
+ When python exits, the directory is deleted, so no downloaded file leaks.
+ The value will be set to TemporaryDirectory class instance (not string path)
+ to ensure deletion at exit."""
+ api_json_path = None
+ """String path to .api.json files, a directory somewhere in api_root_dir."""
+ api_package_path = None
+ """String path to PAPI code, a different directory under api_root_dir."""
crc_checker = None
- """Accesses .api.json files at creation, caching allows deleting them."""
+ """Accesses .api.json files at creation, caching speeds up accessing it."""
+ reusable_vpp_client_list = list()
+ """Each connection needs a separate client instance,
+ and each client instance creation needs to parse all .api files,
+ which takes time. If a client instance disconnects, it is put here,
+ so on next connect we can reuse intead of creating new."""
+ conn_cache = dict()
+ """Mapping from node key to connected client instance."""
def __init__(self, node, remote_vpp_socket=Constants.SOCKSVR_PATH):
"""Store the given arguments, declare managed variables.
@@ -146,89 +184,214 @@ class PapiSocketExecutor:
self._remote_vpp_socket = remote_vpp_socket
# The list of PAPI commands to be executed on the node.
self._api_command_list = list()
- # The following values are set on enter, reset on exit.
- self._temp_dir = None
- self._ssh_control_socket = None
- self._local_vpp_socket = None
- self.initialize_vpp_instance()
- def initialize_vpp_instance(self):
- """Create VPP instance with bindings to API calls, store as class field.
+ def ensure_api_dirs(self):
+ """Copy files from DUT to local temporary directory.
- No-op if the instance had been stored already.
+ If the directory is still there, do not copy again.
+ If copying, also initialize CRC checker (this also performs
+ static checks), and remember PAPI package path.
+ Do not add that to PATH yet.
+ """
+ cls = self.__class__
+ if cls.api_package_path:
+ return
+ cls.api_root_dir = tempfile.TemporaryDirectory(dir=u"/tmp")
+ root_path = cls.api_root_dir.name
+ # Pack, copy and unpack Python part of VPP installation from _node.
+ # TODO: Use rsync or recursive version of ssh.scp_node instead?
+ node = self._node
+ exec_cmd_no_error(node, [u"rm", u"-rf", u"/tmp/papi.txz"])
+ # Papi python version depends on OS (and time).
+ # Python 2.7 or 3.4, site-packages or dist-packages.
+ installed_papi_glob = u"/usr/lib/python3*/*-packages/vpp_papi"
+ # We need to wrap this command in bash, in order to expand globs,
+ # and as ssh does join, the inner command has to be quoted.
+ inner_cmd = u" ".join([
+ u"tar", u"cJf", u"/tmp/papi.txz", u"--exclude=*.pyc",
+ installed_papi_glob, u"/usr/share/vpp/api"
+ ])
+ exec_cmd_no_error(node, [u"bash", u"-c", u"'" + inner_cmd + u"'"])
+ scp_node(node, root_path + u"/papi.txz", u"/tmp/papi.txz", get=True)
+ run([u"tar", u"xf", root_path + u"/papi.txz", u"-C", root_path])
+ cls.api_json_path = root_path + u"/usr/share/vpp/api"
+ # Perform initial checks before .api.json files are gone,
+ # by creating the checker instance.
+ cls.crc_checker = VppApiCrcChecker(cls.api_json_path)
+ # When present locally, we finally can find the installation path.
+ cls.api_package_path = glob.glob(root_path + installed_papi_glob)[0]
+ # Package path has to be one level above the vpp_papi directory.
+ cls.api_package_path = cls.api_package_path.rsplit(u"/", 1)[0]
+
+ def ensure_vpp_instance(self):
+ """Create or reuse a closed client instance, return it.
The instance is initialized for unix domain socket access,
- it has initialized all the bindings, but it is not connected
+ it has initialized all the bindings, it is removed from the internal
+ list of disconnected instances, but it is not connected
(to a local socket) yet.
- This method downloads .api.json files from self._node
- into a temporary directory, deletes them finally.
+ :returns: VPP client instance ready for connect.
+ :rtype: vpp_papi.VPPApiClient
"""
- if self.vpp_instance:
- return
- cls = self.__class__ # Shorthand for setting class fields.
- package_path = None
- tmp_dir = tempfile.mkdtemp(dir=u"/tmp")
+ self.ensure_api_dirs()
+ cls = self.__class__
+ if cls.reusable_vpp_client_list:
+ # Reuse in LIFO fashion.
+ *cls.reusable_vpp_client_list, ret = cls.reusable_vpp_client_list
+ return ret
+ # Creating an instance leads to dynamic imports from VPP PAPI code,
+ # so the package directory has to be present until the instance.
+ # But it is simpler to keep the package dir around.
try:
- # Pack, copy and unpack Python part of VPP installation from _node.
- # TODO: Use rsync or recursive version of ssh.scp_node instead?
- node = self._node
- exec_cmd_no_error(node, [u"rm", u"-rf", u"/tmp/papi.txz"])
- # Papi python version depends on OS (and time).
- # Python 2.7 or 3.4, site-packages or dist-packages.
- installed_papi_glob = u"/usr/lib/python3*/*-packages/vpp_papi"
- # We need to wrap this command in bash, in order to expand globs,
- # and as ssh does join, the inner command has to be quoted.
- inner_cmd = u" ".join([
- u"tar", u"cJf", u"/tmp/papi.txz", u"--exclude=*.pyc",
- installed_papi_glob, u"/usr/share/vpp/api"
- ])
- exec_cmd_no_error(node, [u"bash", u"-c", u"'" + inner_cmd + u"'"])
- scp_node(node, tmp_dir + u"/papi.txz", u"/tmp/papi.txz", get=True)
- run([u"tar", u"xf", tmp_dir + u"/papi.txz", u"-C", tmp_dir])
- api_json_directory = tmp_dir + u"/usr/share/vpp/api"
- # Perform initial checks before .api.json files are gone,
- # by creating the checker instance.
- cls.crc_checker = VppApiCrcChecker(api_json_directory)
- # When present locally, we finally can find the installation path.
- package_path = glob.glob(tmp_dir + installed_papi_glob)[0]
- # Package path has to be one level above the vpp_papi directory.
- package_path = package_path.rsplit(u"/", 1)[0]
- sys.path.append(package_path)
+ sys.path.append(cls.api_package_path)
# TODO: Pylint says import-outside-toplevel and import-error.
# It is right, we should refactor the code and move initialization
# of package outside.
from vpp_papi.vpp_papi import VPPApiClient as vpp_class
- vpp_class.apidir = api_json_directory
+ vpp_class.apidir = cls.api_json_path
# We need to create instance before removing from sys.path.
- cls.vpp_instance = vpp_class(
+ vpp_instance = vpp_class(
use_socket=True, server_address=u"TBD", async_thread=False,
- read_timeout=14, logger=FilteredLogger(logger, u"INFO"))
+ read_timeout=14, logger=FilteredLogger(logger, u"INFO")
+ )
# Cannot use loglevel parameter, robot.api.logger lacks support.
# TODO: Stop overriding read_timeout when VPP-1722 is fixed.
finally:
- shutil.rmtree(tmp_dir)
- if sys.path[-1] == package_path:
+ if sys.path[-1] == cls.api_package_path:
sys.path.pop()
+ return vpp_instance
+
+ @classmethod
+ def key_for_node_and_socket(cls, node, remote_socket):
+ """Return a hashable object to distinguish nodes.
+
+ The usual node object (of "dict" type) is not hashable,
+ and can contain mutable information (mostly virtual interfaces).
+ Use this method to get an object suitable for being a key in dict.
+
+ The fields to include are chosen by what ssh needs.
+
+ This class method is needed, for disconnect.
+
+ :param node: The node object to distinguish.
+ :param remote_socket: Path to remote socket.
+ :type node: dict
+ :type remote_socket: str
+ :return: Tuple of values distinguishing this node from similar ones.
+ :rtype: tuple of str
+ """
+ return (
+ node[u"host"],
+ node[u"port"],
+ remote_socket,
+ # TODO: Do we support sockets paths such as "~/vpp/api.socket"?
+ # If yes, add also:
+ # node[u"username"],
+ )
+
+ def key_for_self(self):
+ """Return a hashable object to distinguish nodes.
+
+ Just a wrapper around key_for_node_and_socket
+ which sets up proper arguments.
+
+ :return: Tuple of values distinguishing this node from similar ones.
+ :rtype: tuple of str
+ """
+ return self.__class__.key_for_node_and_socket(
+ self._node, self._remote_vpp_socket,
+ )
+
+ def set_connected_client(self, client):
+ """Add a connected client instance into cache.
+
+ This hides details of what the node key is.
+
+ If there already is a client for the computed key,
+ fail, as it is a sign of resource leakage.
+
+ :param client: VPP client instance in connected state.
+ :type client: vpp_papi.VPPApiClient
+ :raises RuntimeError: If related key already has a cached client.
+ """
+ key = self.key_for_self()
+ cache = self.__class__.conn_cache
+ if key in cache:
+ raise RuntimeError(f"Caching client with existing key: {key}")
+ cache[key] = client
+
+ def get_connected_client(self, check_connected=True):
+ """Return None or cached connected client.
+
+ If check_connected, RuntimeError is raised when the client is
+ not in cache. None is returned if client is not in cache
+ (and the check is disabled).
+
+ This hides details of what the node key is.
+
+ :param check_connected: Whether cache miss raises.
+ :type check_connected: bool
+ :returns: Connected client instance, or None if uncached and no check.
+ :rtype: Optional[vpp_papi.VPPApiClient]
+ :raises RuntimeError: If cache miss and check enabled.
+ """
+ key = self.key_for_self()
+ ret = self.__class__.conn_cache.get(key, None)
+
+ if ret is None:
+ if check_connected:
+ raise RuntimeError(f"Client not cached for key: {key}")
+ else:
+ # When reading logs, it is good to see which VPP is accessed.
+ logger.debug(f"Activated cached PAPI client for key: {key}")
+ return ret
def __enter__(self):
"""Create a tunnel, connect VPP instance.
+ If the connected client is in cache, return it.
+ Only if not, create a new (or reuse a disconnected) client instance.
+
Only at this point a local socket names are created
- in a temporary directory, because VIRL runs 3 pybots at once,
- so harcoding local filenames does not work.
+ in a temporary directory, as CSIT can connect to multiple VPPs.
+
+ The following attributes are added to the client instance
+ to simplify caching and cleanup:
+ csit_temp_dir
+ - Temporary socket files are created here.
+ csit_control_socket
+ - This socket controls the local ssh process doing the forwarding.
+ csit_local_vpp_socket
+ - This is the forwarded socket to talk with remote VPP.
+
+ The attribute names do not start with underscore,
+ so pylint does not complain about accessing private attribute.
+ The attribute names start with csit_ to avoid naming conflicts
+ with "real" attributes from VPP Python code.
:returns: self
:rtype: PapiSocketExecutor
"""
+ # Do we have the connected instance in the cache?
+ vpp_instance = self.get_connected_client(check_connected=False)
+ if vpp_instance is not None:
+ return self
+ # No luck, create and connect a new instance.
time_enter = time.time()
- # Parsing takes longer than connecting, prepare instance before tunnel.
- vpp_instance = self.vpp_instance
node = self._node
- self._temp_dir = tempfile.mkdtemp(dir=u"/tmp")
- self._local_vpp_socket = self._temp_dir + u"/vpp-api.sock"
- self._ssh_control_socket = self._temp_dir + u"/ssh.sock"
- ssh_socket = self._ssh_control_socket
+ # Parsing takes longer than connecting, prepare instance before tunnel.
+ vpp_instance = self.ensure_vpp_instance()
+ # Store into cache as soon as possible.
+ # If connection fails, it is better to attempt disconnect anyway.
+ self.set_connected_client(vpp_instance)
+ # Set additional attributes.
+ vpp_instance.csit_temp_dir = tempfile.TemporaryDirectory(dir=u"/tmp")
+ temp_path = vpp_instance.csit_temp_dir.name
+ api_socket = temp_path + u"/vpp-api.sock"
+ vpp_instance.csit_local_vpp_socket = api_socket
+ ssh_socket = temp_path + u"/ssh.sock"
+ vpp_instance.csit_control_socket = ssh_socket
# Cleanup possibilities.
ret_code, _ = run([u"ls", ssh_socket], check=False)
if ret_code != 2:
@@ -243,19 +406,21 @@ class PapiSocketExecutor:
run([u"rm", u"-vrf", ssh_socket])
# Even if ssh can perhaps reuse this file,
# we need to remove it for readiness detection to work correctly.
- run([u"rm", u"-rvf", self._local_vpp_socket])
+ run([u"rm", u"-rvf", api_socket])
# We use sleep command. The ssh command will exit in 30 second,
# unless a local socket connection is established,
# in which case the ssh command will exit only when
# the ssh connection is closed again (via control socket).
# The log level is to suppress "Warning: Permanently added" messages.
ssh_cmd = [
- u"ssh", u"-S", ssh_socket, u"-M",
- u"-o", u"LogLevel=ERROR", u"-o", u"UserKnownHostsFile=/dev/null",
+ u"ssh", u"-S", ssh_socket, u"-M", u"-L",
+ api_socket + u":" + self._remote_vpp_socket,
+ u"-p", str(node[u"port"]),
+ u"-o", u"LogLevel=ERROR",
+ u"-o", u"UserKnownHostsFile=/dev/null",
u"-o", u"StrictHostKeyChecking=no",
u"-o", u"ExitOnForwardFailure=yes",
- u"-L", self._local_vpp_socket + u":" + self._remote_vpp_socket,
- u"-p", str(node[u"port"]), node[u"username"] + u"@" + node[u"host"],
+ node[u"username"] + u"@" + node[u"host"],
u"sleep", u"30"
]
priv_key = node.get(u"priv_key")
@@ -282,7 +447,7 @@ class PapiSocketExecutor:
while time.time() < time_stop:
# It can take a moment for ssh to create the socket file.
ret_code, _ = run(
- [u"ls", u"-l", self._local_vpp_socket], check=False
+ [u"ls", u"-l", api_socket], check=False
)
if not ret_code:
break
@@ -293,7 +458,7 @@ class PapiSocketExecutor:
# Socket up means the key has been read. Delete file by closing it.
key_file.close()
# Everything is ready, set the local socket address and connect.
- vpp_instance.transport.server_address = self._local_vpp_socket
+ vpp_instance.transport.server_address = api_socket
# It seems we can get read error even if every preceding check passed.
# Single retry seems to help.
for _ in range(2):
@@ -312,16 +477,100 @@ class PapiSocketExecutor:
return self
def __exit__(self, exc_type, exc_val, exc_tb):
- """Disconnect the vpp instance, tear down the SHH tunnel.
+ """No-op, the client instance remains in cache in connected state."""
+
+ @classmethod
+ def disconnect_by_key(cls, key):
+ """Disconnect a connected client instance, noop it not connected.
Also remove the local sockets by deleting the temporary directory.
- Arguments related to possible exception are entirely ignored.
+ Put disconnected client instances to the reuse list.
+ The added attributes are not cleaned up,
+ as their values will get overwritten on next connect.
+
+ This method is useful for disconnect_all type of work.
+
+ :param key: Tuple identifying the node (and socket).
+ :type key: tuple of str
"""
- self.vpp_instance.disconnect()
+ client_instance = cls.conn_cache.get(key, None)
+ if client_instance is None:
+ return
+ logger.debug(f"Disconnecting by key: {key}")
+ client_instance.disconnect()
run([
- u"ssh", u"-S", self._ssh_control_socket, u"-O", u"exit", u"0.0.0.0"
+ u"ssh", u"-S", client_instance.csit_control_socket, u"-O",
+ u"exit", u"0.0.0.0"
], check=False)
- shutil.rmtree(self._temp_dir)
+ # Temp dir has autoclean, but deleting explicitly
+ # as an error can happen.
+ try:
+ client_instance.csit_temp_dir.cleanup()
+ except FileNotFoundError:
+ # There is a race condition with ssh removing its ssh.sock file.
+ # Single retry should be enough to ensure the complete removal.
+ shutil.rmtree(client_instance.csit_temp_dir.name)
+ # Finally, put disconnected clients to reuse list.
+ cls.reusable_vpp_client_list.append(client_instance)
+ # Invalidate cache last. Repeated errors are better than silent leaks.
+ del cls.conn_cache[key]
+
+ @classmethod
+ def disconnect_by_node_and_socket(
+ cls, node, remote_socket=Constants.SOCKSVR_PATH
+ ):
+ """Disconnect a connected client instance, noop it not connected.
+
+ Also remove the local sockets by deleting the temporary directory.
+ Put disconnected client instances to the reuse list.
+ The added attributes are not cleaned up,
+ as their values will get overwritten on next connect.
+
+ Call this method just before killing/restarting remote VPP instance.
+ """
+ key = cls.key_for_node_and_socket(node, remote_socket)
+ return cls.disconnect_by_key(key)
+
+ @classmethod
+ def disconnect_all_sockets_by_node(cls, node):
+ """Disconnect all socket connected client instance.
+
+ Noop if not connected.
+
+ Also remove the local sockets by deleting the temporary directory.
+ Put disconnected client instances to the reuse list.
+ The added attributes are not cleaned up,
+ as their values will get overwritten on next connect.
+
+ Call this method just before killing/restarting remote VPP instance.
+ """
+ sockets = Topology.get_node_sockets(node, socket_type=SocketType.PAPI)
+ if sockets:
+ for socket in sockets.values():
+ # TODO: Remove sockets from topology.
+ PapiSocketExecutor.disconnect_by_node_and_socket(node, socket)
+ # Always attempt to disconnect the default socket.
+ return cls.disconnect_by_node_and_socket(node)
+
+ @staticmethod
+ def disconnect_all_papi_connections():
+ """Disconnect all connected client instances, tear down the SSH tunnels.
+
+ Also remove the local sockets by deleting the temporary directory.
+ Put disconnected client instances to the reuse list.
+ The added attributes are not cleaned up,
+ as their values will get overwritten on next connect.
+
+ This should be a class method,
+ but we prefer to call static methods from Robot.
+
+ Call this method just before killing/restarting all VPP instances.
+ """
+ cls = PapiSocketExecutor
+ # Iterate over copy of entries so deletions do not mess with iterator.
+ keys_copy = list(cls.conn_cache.keys())
+ for key in keys_copy:
+ cls.disconnect_by_key(key)
def add(self, csit_papi_command, history=True, **kwargs):
"""Add next command to internal command list; return self.
@@ -517,7 +766,7 @@ class PapiSocketExecutor:
:rtype: list of dict
:raises RuntimeError: If the replies are not all correct.
"""
- vpp_instance = self.vpp_instance
+ vpp_instance = self.get_connected_client()
local_list = self._api_command_list
# Clear first as execution may fail.
self._api_command_list = list()
@@ -531,10 +780,10 @@ class PapiSocketExecutor:
except (IOError, struct.error) as err:
# Occasionally an error happens, try reconnect.
logger.warn(f"Reconnect after error: {err!r}")
- self.vpp_instance.disconnect()
+ vpp_instance.disconnect()
# Testing shows immediate reconnect fails.
time.sleep(1)
- self.vpp_instance.connect_sync(u"csit_socket")
+ vpp_instance.connect_sync(u"csit_socket")
logger.trace(u"Reconnected.")
reply = papi_fn(**command[u"api_args"])
except (AttributeError, IOError, struct.error) as err:
@@ -558,6 +807,33 @@ class PapiSocketExecutor:
return replies
+class Disconnector:
+ """Class for holding a single keyword."""
+
+ @staticmethod
+ def disconnect_all_papi_connections():
+ """Disconnect all connected client instances, tear down the SSH tunnels.
+
+ Also remove the local sockets by deleting the temporary directory.
+ Put disconnected client instances to the reuse list.
+ The added attributes are not cleaned up,
+ as their values will get overwritten on next connect.
+
+ Call this method just before killing/restarting all VPP instances.
+
+ This could be a class method of PapiSocketExecutor.
+ But Robot calls methods on instances, and it would be weird
+ to give node argument for constructor in import.
+ Also, as we have a class of the same name as the module,
+ the keywords defined on module level are not accessible.
+ """
+ cls = PapiSocketExecutor
+ # Iterate over copy of entries so deletions do not mess with iterator.
+ keys_copy = list(cls.conn_cache.keys())
+ for key in keys_copy:
+ cls.disconnect_by_key(key)
+
+
class PapiExecutor:
"""Contains methods for executing VPP Python API commands on DUTs.
diff --git a/resources/libraries/python/VPPUtil.py b/resources/libraries/python/VPPUtil.py
index a7ec44c974..17043aa599 100644
--- a/resources/libraries/python/VPPUtil.py
+++ b/resources/libraries/python/VPPUtil.py
@@ -58,11 +58,15 @@ class VPPUtil:
def restart_vpp_service(node, node_key=None):
"""Restart VPP service on the specified topology node.
+ Disconnect possibly connected PAPI executor.
+
:param node: Topology node.
:param node_key: Topology node key.
:type node: dict
:type node_key: str
"""
+ # Containers have a separate lifecycle, but better be safe.
+ PapiSocketExecutor.disconnect_all_sockets_by_node(node)
DUTSetup.restart_service(node, Constants.VPP_UNIT)
if node_key:
Topology.add_new_socket(
@@ -85,11 +89,15 @@ class VPPUtil:
def stop_vpp_service(node, node_key=None):
"""Stop VPP service on the specified topology node.
+ Disconnect possibly connected PAPI executor.
+
:param node: Topology node.
:param node_key: Topology node key.
:type node: dict
:type node_key: str
"""
+ # Containers have a separate lifecycle, but better be safe.
+ PapiSocketExecutor.disconnect_all_sockets_by_node(node)
DUTSetup.stop_service(node, Constants.VPP_UNIT)
if node_key:
Topology.del_node_socket_id(node, SocketType.PAPI, node_key)
@@ -184,18 +192,28 @@ class VPPUtil:
VPPUtil.verify_vpp(node)
@staticmethod
- def vpp_show_version(node):
+ def vpp_show_version(
+ node, remote_vpp_socket=Constants.SOCKSVR_PATH, log=True):
"""Run "show_version" PAPI command.
+ Socket is configurable, so VPP inside container can be accessed.
+
:param node: Node to run command on.
+ :param remote_vpp_socket: Path to remote socket to target VPP.
+ :param log: If true, show the result in Robot log.
:type node: dict
+ :type remote_vpp_socket: str
+ :type log: bool
:returns: VPP version.
:rtype: str
+ :raises RuntimeError: If PAPI connection fails.
+ :raises AssertionError: If PAPI retcode is nonzero.
"""
cmd = u"show_version"
- with PapiSocketExecutor(node) as papi_exec:
+ with PapiSocketExecutor(node, remote_vpp_socket) as papi_exec:
reply = papi_exec.add(cmd).get_reply()
- logger.info(f"VPP version: {reply[u'version']}\n")
+ if log:
+ logger.info(f"VPP version: {reply[u'version']}\n")
return f"{reply[u'version']}"
@staticmethod
diff --git a/resources/libraries/python/topology.py b/resources/libraries/python/topology.py
index 2b930c08dc..c39e5afabb 100644
--- a/resources/libraries/python/topology.py
+++ b/resources/libraries/python/topology.py
@@ -1139,4 +1139,5 @@ class Topology:
"""
for node in nodes.values():
if u"sockets" in list(node.keys()):
+ # Containers are disconnected and destroyed already.
node.pop(u"sockets")