From 3ea197e5df0de834151dccfa0c7f70651741cac9 Mon Sep 17 00:00:00 2001 From: Yaroslav Brustinov Date: Thu, 2 Mar 2017 01:25:51 +0200 Subject: t-rex-64: catch exit with trap to restore tty settings Change-Id: Id336aa95ceff5e958e9bce329c85eb7201772712 Signed-off-by: Yaroslav Brustinov --- .../server/trex_launch_thread.py | 2 +- .../trex_control_plane/server/trex_server.py | 5 +- .../server/zmq_monitor_thread.py | 4 +- scripts/t-rex-64 | 17 ++-- scripts/trex_daemon_server | 97 ++++++++++------------ 5 files changed, 56 insertions(+), 69 deletions(-) diff --git a/scripts/automation/trex_control_plane/server/trex_launch_thread.py b/scripts/automation/trex_control_plane/server/trex_launch_thread.py index a4a7a97c..9598bc98 100755 --- a/scripts/automation/trex_control_plane/server/trex_launch_thread.py +++ b/scripts/automation/trex_control_plane/server/trex_launch_thread.py @@ -77,7 +77,7 @@ class AsynchronousTRexSession(threading.Thread): logger.debug("Finished handling a single run of TRex.") self.trexObj.zmq_dump = None - def join (self, timeout = None): + def join (self, timeout = 5): self.stoprequest.set() super(AsynchronousTRexSession, self).join(timeout) diff --git a/scripts/automation/trex_control_plane/server/trex_server.py b/scripts/automation/trex_control_plane/server/trex_server.py index cd4af11a..bcbec069 100755 --- a/scripts/automation/trex_control_plane/server/trex_server.py +++ b/scripts/automation/trex_control_plane/server/trex_server.py @@ -256,7 +256,8 @@ class CTRexServer(object): def assert_zmq_ok(self): if self.trex.zmq_error: - raise Exception('ZMQ thread got error: %s' % self.trex.zmq_error) + self.trex.zmq_error, err = None, self.trex.zmq_error + raise Exception('ZMQ thread got error: %s' % err) if not self.zmq_monitor.is_alive(): if self.trex.get_status() != TRexStatus.Idle: self.force_trex_kill() @@ -326,6 +327,7 @@ class CTRexServer(object): return False def start_trex(self, trex_cmd_options, user, block_to_success = True, timeout = 40, stateless = False, debug_image = False, trex_args = ''): + self.trex.zmq_error = None self.assert_zmq_ok() with self.start_lock: logger.info("Processing start_trex() command.") @@ -418,7 +420,6 @@ class CTRexServer(object): def wait_until_kickoff_finish (self, timeout = 40): # block until TRex exits Starting state logger.info("Processing wait_until_kickoff_finish() command.") - trex_state = None start_time = time.time() while (time.time() - start_time) < timeout : self.assert_zmq_ok() diff --git a/scripts/automation/trex_control_plane/server/zmq_monitor_thread.py b/scripts/automation/trex_control_plane/server/zmq_monitor_thread.py index 172e2eb3..950e909f 100755 --- a/scripts/automation/trex_control_plane/server/zmq_monitor_thread.py +++ b/scripts/automation/trex_control_plane/server/zmq_monitor_thread.py @@ -48,12 +48,12 @@ class ZmqMonitorSession(threading.Thread): pass else: logger.error("ZMQ monitor thrown an exception. Received exception: {ex}".format(ex=e)) - raise + self.trexObj.zmq_error = e except Exception as e: logger.error('ZMQ monitor error: %s' % e) self.trexObj.zmq_error = e - def join(self, timeout=None): + def join(self, timeout=5): self.stoprequest.set() logger.debug("Handling termination of ZMQ monitor thread") self.socket.close() diff --git a/scripts/t-rex-64 b/scripts/t-rex-64 index c92d48b0..de83c4c8 100755 --- a/scripts/t-rex-64 +++ b/scripts/t-rex-64 @@ -33,11 +33,13 @@ cd $(dirname $0) export LD_LIBRARY_PATH=$PWD +function restore_tty { + stty $saveterm +} + if [ -t 0 ] && [ -t 1 ]; then - export is_tty=true saveterm="$(stty -g)" -else - export is_tty=false + trap restore_tty EXIT fi # if we have a new core run optimized trex @@ -56,12 +58,5 @@ else RESULT=$? fi -if $is_tty; then - stty $saveterm -fi - -if [ $RESULT -ne 0 ]; then - exit $RESULT -fi - +exit $RESULT diff --git a/scripts/trex_daemon_server b/scripts/trex_daemon_server index d7da283d..35dad86c 100755 --- a/scripts/trex_daemon_server +++ b/scripts/trex_daemon_server @@ -6,6 +6,7 @@ from time import time, sleep import subprocess, shlex, multiprocessing from argparse import ArgumentParser from distutils.dir_util import mkpath +import signal def fail(msg): print(msg) @@ -14,11 +15,22 @@ def fail(msg): if os.getuid() != 0: fail('Please run this program as root/with sudo') -sys.path.append(os.path.join('automation', 'trex_control_plane', 'server')) +cur_dir = os.path.abspath(os.path.dirname(__file__)) + +server_path = os.path.join(cur_dir, 'automation', 'trex_control_plane', 'server') +if server_path not in sys.path: + sys.path.append(server_path) + +ext_libs_path = os.path.join(cur_dir, 'external_libs') +if ext_libs_path not in sys.path: + sys.path.append(ext_libs_path) + if 'start-live' not in sys.argv: import CCustomLogger CCustomLogger.setup_daemon_logger('TRexServer', '/var/log/trex/trex_daemon_server.log') + import trex_server +import netstat try: from termstyle import termstyle @@ -26,49 +38,34 @@ except ImportError: import termstyle -def run_command(command, timeout = 10): - commmand = 'timeout %s %s' % (timeout, command) - # pipes might stuck, even with timeout - with tempfile.TemporaryFile() as stdout_file, tempfile.TemporaryFile() as stderr_file: - proc = subprocess.Popen(shlex.split(command), stdout = stdout_file, stderr = stderr_file, cwd = daemon_dir) - proc.wait() - stdout_file.seek(0) - stderr_file.seek(0) - return (proc.returncode, stdout_file.read().decode(errors = 'replace'), stderr_file.read().decode(errors = 'replace')) - - def get_daemon_pid(): - err = None - for i in range(5): - try: - return_code, stdout, stderr = run_command('netstat -tlnp') - if return_code: - raise Exception('Failed to run netstat.\nStdout: %s\nStderr: %s' % (stdout, stderr)) - for line in stdout.splitlines(): - if '0.0.0.0:%s' % args.daemon_port in line: - line_arr = line.split() - if '/' not in line_arr[-1]: - raise Exception('Expecting pid/program name in netstat line of using port %s, got: %s' % (args.daemon_port, line)) - pid, program = line_arr[-1].split('/') - if 'python' not in program and 'trex_server' not in program and 'trex_daemon_server' not in program: - raise Exception('Some other program holds port %s, not our daemon: %s. Please verify.' % (args.daemon_port, program)) - return int(pid) - return None - except Exception as e: - err = e - sleep(0.1) - fail('Could not determine daemon pid, err: %s' % err) + pid = None + for conn in netstat.netstat(): + if conn[2] == '0.0.0.0' and int(conn[3]) == args.daemon_port and conn[6] == 'LISTEN': + pid = conn[7] + if pid is None: + raise Exception('Found the connection, but could not determine pid: %s' % conn) + break + return pid + + +# faster variant of get_daemon_pid +def is_running(): + for conn in netstat.netstat(with_pid = False): + if conn[2] == '0.0.0.0' and int(conn[3]) == args.daemon_port and conn[6] == 'LISTEN': + return True + return False def show_daemon_status(): - if get_daemon_pid(): + if is_running(): print(termstyle.green('TRex server daemon is running')) else: print(termstyle.red('TRex server daemon is NOT running')) def start_daemon(): - if get_daemon_pid(): + if is_running(): print(termstyle.red('TRex server daemon is already running')) return # Usual daemon will die with current process, detach it with double fork @@ -76,7 +73,7 @@ def start_daemon(): pid = os.fork() if pid > 0: for i in range(50): - if get_daemon_pid(): + if is_running(): print(termstyle.green('TRex server daemon is started')) os._exit(0) sleep(0.1) @@ -89,37 +86,31 @@ def start_daemon(): def start_live(): - if get_daemon_pid(): + if is_running(): fail(termstyle.red('TRex server daemon is already running')) trex_server.do_main_program() + def restart_daemon(): - if get_daemon_pid(): + if is_running(): kill_daemon() sleep(0.5) start_daemon() + def kill_daemon(): pid = get_daemon_pid() if not pid: print(termstyle.red('TRex server daemon is NOT running')) return True - return_code, stdout, stderr = run_command('kill %s' % pid) # usual kill - #if return_code: - # fail('Failed to kill trex_daemon, error: %s' % stderr) - for i in range(50): - if not get_daemon_pid(): - print(termstyle.green('TRex server daemon is killed')) - return True - sleep(0.1) - return_code, stdout, stderr = run_command('kill -9 %s' % pid) # unconditional kill - #if return_code: - # fail('Failed to kill trex_daemon, error: %s' % stderr) - for i in range(50): - if not get_daemon_pid(): - print(termstyle.green('TRex server daemon is killed')) - return True - sleep(0.1) + pid = int(pid) + for sig in (signal.SIGTERM, signal.SIGKILL): + os.kill(pid, sig) + for i in range(50): + if not is_running(): + print(termstyle.green('TRex server daemon is killed')) + return True + sleep(0.1) fail('Failed to kill trex_daemon, even with -9. Please review manually.\n' \ 'Return code: %s\nStdout: %s\nStderr: %s' % return_code, stdout, stderr) # should not happen -- cgit 1.2.3-korg