aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatej Klotton <mklotton@cisco.com>2016-07-18 14:12:38 +0200
committerMatej Klotton <mklotton@cisco.com>2016-07-25 13:20:36 +0000
commitef67d0dbd0aca3ea0c24800763f6da79f3671727 (patch)
tree47f4119ecafa46166b0f41a81162d4605af7141a
parent967ee17e64c1aff48339d35cbdc0e317f9e87d70 (diff)
SSH timeout problem
In case of large output or stderr output the SSH proces got stucked, because of output buffer fills. Fix with reading from buffers. - increase traffic script default timeout - add reconnect to ssh Change-Id: Ie07107c6ab0fb19c9361d7390467b9b693fe2fdb Signed-off-by: Matej Klotton <mklotton@cisco.com>
-rw-r--r--resources/libraries/python/TrafficScriptExecutor.py2
-rw-r--r--resources/libraries/python/ssh.py100
-rw-r--r--resources/libraries/robot/ipv4.robot2
-rw-r--r--resources/libraries/robot/ipv6.robot2
4 files changed, 69 insertions, 37 deletions
diff --git a/resources/libraries/python/TrafficScriptExecutor.py b/resources/libraries/python/TrafficScriptExecutor.py
index f4f026addf..8d899044fc 100644
--- a/resources/libraries/python/TrafficScriptExecutor.py
+++ b/resources/libraries/python/TrafficScriptExecutor.py
@@ -37,7 +37,7 @@ class TrafficScriptExecutor(object):
@staticmethod
def run_traffic_script_on_node(script_file_name, node, script_args,
- timeout=10):
+ timeout=60):
"""Run traffic script on the TG node.
:param script_file_name: Traffic script name.
diff --git a/resources/libraries/python/ssh.py b/resources/libraries/python/ssh.py
index a96a55f257..f35b925d10 100644
--- a/resources/libraries/python/ssh.py
+++ b/resources/libraries/python/ssh.py
@@ -12,11 +12,12 @@
# limitations under the License.
import StringIO
-from time import time
+from time import time, sleep
import socket
import paramiko
from paramiko import RSAKey
+from paramiko.ssh_exception import SSHException
from scp import SCPClient
from interruptingcow import timeout
from robot.api import logger
@@ -34,6 +35,7 @@ class SSH(object):
def __init__(self):
self._ssh = None
+ self._node = None
@staticmethod
def _node_hash(node):
@@ -44,6 +46,7 @@ class SSH(object):
If there already is a connection to the node, this method reuses it.
"""
+ self._node = node
node_hash = self._node_hash(node)
if node_hash in SSH.__existing_connections:
self._ssh = SSH.__existing_connections[node_hash]
@@ -79,53 +82,82 @@ class SSH(object):
"""
node_hash = self._node_hash(node)
if node_hash in SSH.__existing_connections:
+ logger.debug('Disconnecting peer: {}, {}'.
+ format(node['host'], node['port']))
ssh = SSH.__existing_connections.pop(node_hash)
ssh.close()
+ def _reconnect(self):
+ node = self._node
+ self.disconnect(node)
+ self.connect(node)
+ logger.debug('Reconnecting peer done: {}'.
+ format(self._ssh.get_transport().getpeername()))
+
def exec_command(self, cmd, timeout=10):
"""Execute SSH command on a new channel on the connected Node.
- Returns (return_code, stdout, stderr).
+ :param cmd: Command to run on the Node.
+ :param timeout: Maximal time in seconds to wait while the command is
+ done. If is None then wait forever.
+ :type cmd: str
+ :type timeout: int
+ :return return_code, stdout, stderr
+ :rtype: tuple(int, str, str)
+ :raise socket.timeout: If command is not finished in timeout time.
"""
+ start = time()
+ stdout = StringIO.StringIO()
+ stderr = StringIO.StringIO()
+ try:
+ chan = self._ssh.get_transport().open_session(timeout=5)
+ except AttributeError:
+ self._reconnect()
+ chan = self._ssh.get_transport().open_session(timeout=5)
+ except SSHException:
+ self._reconnect()
+ chan = self._ssh.get_transport().open_session(timeout=5)
+ chan.settimeout(timeout)
logger.trace('exec_command on {0}: {1}'
.format(self._ssh.get_transport().getpeername(), cmd))
- start = time()
- chan = self._ssh.get_transport().open_session()
- if timeout is not None:
- chan.settimeout(int(timeout))
+
chan.exec_command(cmd)
+ while not chan.exit_status_ready() and timeout is not None:
+ if chan.recv_ready():
+ stdout.write(chan.recv(self.__MAX_RECV_BUF))
+
+ if chan.recv_stderr_ready():
+ stderr.write(chan.recv_stderr(self.__MAX_RECV_BUF))
+
+ if time() - start > timeout:
+ raise socket.timeout(
+ 'Timeout exception.\n'
+ 'Current contents of stdout buffer: {0}\n'
+ 'Current contents of stderr buffer: {1}\n'
+ .format(stdout.getvalue(), stderr.getvalue())
+ )
+
+ sleep(0.1)
+ return_code = chan.recv_exit_status()
+
+ while chan.recv_ready():
+ stdout.write(chan.recv(self.__MAX_RECV_BUF))
+
+ while chan.recv_stderr_ready():
+ stderr.write(chan.recv_stderr(self.__MAX_RECV_BUF))
+
end = time()
logger.trace('exec_command on {0} took {1} seconds'.format(
self._ssh.get_transport().getpeername(), end-start))
- stdout = ""
- try:
- while True:
- buf = chan.recv(self.__MAX_RECV_BUF)
- stdout += buf
- if not buf:
- break
- except socket.timeout:
- logger.error('Caught timeout exception, current contents '
- 'of buffer: {0}'.format(stdout))
- raise
-
- stderr = ""
- while True:
- buf = chan.recv_stderr(self.__MAX_RECV_BUF)
- stderr += buf
- if not buf:
- break
-
- return_code = chan.recv_exit_status()
logger.trace('chan_recv/_stderr took {} seconds'.format(time()-end))
logger.trace('return RC {}'.format(return_code))
- logger.trace('return STDOUT {}'.format(stdout))
- logger.trace('return STDERR {}'.format(stderr))
- return return_code, stdout, stderr
+ logger.trace('return STDOUT {}'.format(stdout.getvalue()))
+ logger.trace('return STDERR {}'.format(stderr.getvalue()))
+ return return_code, stdout.getvalue(), stderr.getvalue()
- def exec_command_sudo(self, cmd, cmd_input=None, timeout=10):
+ def exec_command_sudo(self, cmd, cmd_input=None, timeout=30):
"""Execute SSH command with sudo on a new channel on the connected Node.
:param cmd: Command to be executed.
@@ -234,7 +266,7 @@ class SSH(object):
logger.trace('SCP took {0} seconds'.format(end-start))
-def exec_cmd(node, cmd, timeout=None, sudo=False):
+def exec_cmd(node, cmd, timeout=600, sudo=False):
"""Convenience function to ssh/exec/return rc, out & err.
Returns (rc, stdout, stderr).
@@ -251,7 +283,7 @@ def exec_cmd(node, cmd, timeout=None, sudo=False):
ssh.connect(node)
except Exception, e:
logger.error("Failed to connect to node" + str(e))
- return None
+ return None, None, None
try:
if not sudo:
@@ -261,12 +293,12 @@ def exec_cmd(node, cmd, timeout=None, sudo=False):
timeout=timeout)
except Exception, e:
logger.error(e)
- return None
+ return None, None, None
return ret_code, stdout, stderr
-def exec_cmd_no_error(node, cmd, timeout=None, sudo=False):
+def exec_cmd_no_error(node, cmd, timeout=600, sudo=False):
"""Convenience function to ssh/exec/return out & err.
Verifies that return code is zero.
diff --git a/resources/libraries/robot/ipv4.robot b/resources/libraries/robot/ipv4.robot
index 7db329d97f..6fb2b753ba 100644
--- a/resources/libraries/robot/ipv4.robot
+++ b/resources/libraries/robot/ipv4.robot
@@ -92,7 +92,7 @@
| | | ... | ${dst_mac} | ${src_ip} | ${dst_ip}
| | ${args}= | Set Variable
| | ... | ${args} --start_size ${start_size} --end_size ${end_size} --step ${step}
-| | Run Traffic Script On Node | ipv4_sweep_ping.py | ${src_node} | ${args} | ${20}
+| | Run Traffic Script On Node | ipv4_sweep_ping.py | ${src_node} | ${args}
| Send ARP request and validate response
| | [Arguments] | ${tg_node} | ${vpp_node}
diff --git a/resources/libraries/robot/ipv6.robot b/resources/libraries/robot/ipv6.robot
index 19c222e7c2..ed4259cb73 100644
--- a/resources/libraries/robot/ipv6.robot
+++ b/resources/libraries/robot/ipv6.robot
@@ -62,7 +62,7 @@
| | | ... | ${dst_mac} | ${src_ip} | ${dst_ip}
| | ${args}= | Set Variable
| | ... | ${args} --start_size ${start_size} --end_size ${end_size} --step ${step}
-| | Run Traffic Script On Node | ipv6_sweep_ping.py | ${src_node} | ${args} | ${20}
+| | Run Traffic Script On Node | ipv6_sweep_ping.py | ${src_node} | ${args}
| Ipv6 tg to dut1 egress
| | [Documentation] | Send traffic from TG to first DUT egress interface