diff options
40 files changed, 888 insertions, 311 deletions
diff --git a/linux/ws_main.py b/linux/ws_main.py index 58f5b661..3aee05db 100755 --- a/linux/ws_main.py +++ b/linux/ws_main.py @@ -121,7 +121,7 @@ main_src = SrcGroup(dir='src', 'latency.cpp', 'flow_stat.cpp', 'flow_stat_parser.cpp', - + 'trex_watchdog.cpp', 'pal/linux/pal_utl.cpp', 'pal/linux/mbuf.cpp', 'sim/trex_sim_stateless.cpp', @@ -371,7 +371,7 @@ class build_option: def get_link_flags(self): # add here basic flags - base_flags = ['-pthread']; + base_flags = []; if self.isPIE(): base_flags.append('-lstdc++') @@ -410,7 +410,7 @@ def build_prog (bld, build_obj): linkflags = build_obj.get_link_flags(), source = build_obj.get_src(), use = build_obj.get_use_libs(), - lib = ['z'], + lib = ['pthread', 'z', 'dl'], rpath = bld.env.RPATH + build_obj.get_rpath(), target = build_obj.get_target()) diff --git a/linux_dpdk/ws_main.py b/linux_dpdk/ws_main.py index 4c2d3c51..a13c8fd7 100755 --- a/linux_dpdk/ws_main.py +++ b/linux_dpdk/ws_main.py @@ -98,6 +98,7 @@ main_src = SrcGroup(dir='src', 'utl_term_io.cpp', 'global_io_mode.cpp', 'main_dpdk.cpp', + 'trex_watchdog.cpp', 'debug.cpp', 'flow_stat.cpp', 'flow_stat_parser.cpp', @@ -665,7 +666,7 @@ class build_option: return (flags) def get_link_flags(self): - base_flags = []; + base_flags = ['-rdynamic']; if self.is64Platform(): base_flags += ['-m64']; base_flags += ['-lrt']; diff --git a/scripts/automation/regression/functional_tests/stl_basic_tests.py b/scripts/automation/regression/functional_tests/stl_basic_tests.py index dbbf2530..863307f1 100644 --- a/scripts/automation/regression/functional_tests/stl_basic_tests.py +++ b/scripts/automation/regression/functional_tests/stl_basic_tests.py @@ -119,9 +119,9 @@ class CStlBasic_Test(functional_general_test.CGeneralFunctional_Test): def run_sim (self, yaml, output, options = "", silent = False, obj = None): if output: - user_cmd = "-f {0} -o {1} {2}".format(yaml, output, options) + user_cmd = "-f {0} -o {1} {2} -p {3}".format(yaml, output, options, self.scripts_path) else: - user_cmd = "-f {0} {1}".format(yaml, options) + user_cmd = "-f {0} {1} -p {2}".format(yaml, options, self.scripts_path) if silent: user_cmd += " --silent" diff --git a/scripts/automation/regression/setups/trex17/benchmark.yaml b/scripts/automation/regression/setups/trex17/benchmark.yaml index c6f588e6..3e1f7c9b 100644 --- a/scripts/automation/regression/setups/trex17/benchmark.yaml +++ b/scripts/automation/regression/setups/trex17/benchmark.yaml @@ -23,7 +23,7 @@ test_routing_imix_64: test_static_routing_imix_asymmetric: - multiplier : 0.8 + multiplier : 0.7 cores : 1 bw_per_core : 9.635 diff --git a/scripts/automation/regression/trex_unit_test.py b/scripts/automation/regression/trex_unit_test.py index 0762fc95..83650164 100755 --- a/scripts/automation/regression/trex_unit_test.py +++ b/scripts/automation/regression/trex_unit_test.py @@ -203,11 +203,10 @@ class CTRexTestConfiguringPlugin(Plugin): print('Could not restart TRex daemon server') sys.exit(-1) - trex_cmds = CTRexScenario.trex.get_trex_cmds() - if trex_cmds: - if self.kill_running: - CTRexScenario.trex.kill_all_trexes() - else: + if self.kill_running: + CTRexScenario.trex.kill_all_trexes() + else: + if CTRexScenario.trex.get_trex_cmds(): print('TRex is already running') sys.exit(-1) @@ -238,11 +237,11 @@ class CTRexTestConfiguringPlugin(Plugin): if self.stateful: CTRexScenario.trex = None if self.stateless: - if not self.no_daemon: + if self.no_daemon: + if CTRexScenario.stl_trex and CTRexScenario.stl_trex.is_connected(): + CTRexScenario.stl_trex.disconnect() + else: CTRexScenario.trex.force_kill(False) - if CTRexScenario.stl_trex and CTRexScenario.stl_trex.is_connected(): - CTRexScenario.stl_trex.disconnect() - #time.sleep(3) CTRexScenario.stl_trex = None diff --git a/scripts/automation/trex_control_plane/server/singleton_daemon.py b/scripts/automation/trex_control_plane/server/singleton_daemon.py index 7cfbc3bc..8fdedc6e 100755 --- a/scripts/automation/trex_control_plane/server/singleton_daemon.py +++ b/scripts/automation/trex_control_plane/server/singleton_daemon.py @@ -6,10 +6,12 @@ import tempfile import types from subprocess import Popen from time import sleep +import outer_packages +import jsonrpclib # uses Unix sockets for determine running process. # (assumes used daemons will register proper socket) -# all daemons should use -p argument as listening tcp port +# all daemons should use -p argument as listening tcp port and check_connectivity RPC method class SingletonDaemon(object): # run_cmd can be function of how to run daemon or a str to run at subprocess @@ -28,14 +30,14 @@ class SingletonDaemon(object): # returns True if daemon is running def is_running(self): - lock_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) try: - lock_socket.bind('\0' + self.tag) # the check is ~200000 faster and more reliable than checking via 'netstat' or 'ps' etc. + lock_socket = register_socket(self.tag) # the check is ~200000 faster and more reliable than checking via 'netstat' or 'ps' etc. + lock_socket.shutdown(socket.SHUT_RDWR) lock_socket.close() except socket.error: # Unix socket in use return True # Unix socket is not used, but maybe it's old version of daemon not using socket - return bool(self.get_pid()) + return bool(self.get_pid_by_listening_port()) # get pid of running daemon by registered Unix socket (most robust way) @@ -73,7 +75,7 @@ class SingletonDaemon(object): # kill daemon - def kill(self, timeout = 5): + def kill(self, timeout = 10): pid = self.get_pid() if not pid: return False @@ -88,17 +90,27 @@ class SingletonDaemon(object): ret_code, stdout, stderr = run_command('kill -9 %s' % pid) # unconditional kill if ret_code: raise Exception('Failed to run kill -9 command for %s: %s' % (self.name, [ret_code, stdout, stderr])) - poll_rate = 0.1 - for i in range(inr(timeout / poll_rate)): + for i in range(int(timeout / poll_rate)): if not self.is_running(): return True sleep(poll_rate) raise Exception('Could not kill %s, even with -9' % self.name) + # try connection as RPC client, return True upon success, False if fail + def check_connectivity(self, timeout = 5): + daemon = jsonrpclib.Server('http://127.0.0.1:%s/' % self.port) + poll_rate = 0.1 + for i in range(int(timeout/poll_rate)): + try: + daemon.check_connectivity() + return True + except socket.error: # daemon is not up yet + sleep(poll_rate) + return False # start daemon # returns True if success, False if already running - def start(self, timeout = 5): + def start(self, timeout = 20): if self.is_running(): raise Exception('%s is already running' % self.name) if not self.run_cmd: @@ -112,6 +124,8 @@ class SingletonDaemon(object): if timeout > 0: poll_rate = 0.1 for i in range(int(timeout/poll_rate)): + if self.is_running(): + break sleep(poll_rate) if bool(proc.poll()): # process ended with error stdout_file.seek(0) @@ -120,7 +134,9 @@ class SingletonDaemon(object): elif proc.poll() == 0: # process runs other process, and ended break if self.is_running(): - return True + if self.check_connectivity(): + return True + raise Exception('Daemon process is running, but no connectivity') raise Exception('%s failed to run.' % self.name) # restart the daemon @@ -136,8 +152,9 @@ def register_socket(tag): lock_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) try: lock_socket.bind('\0%s' % tag) + return lock_socket except socket.error: - raise Exception('Error: process with tag %s is already running.' % tag) + raise socket.error('Error: process with tag %s is already running.' % tag) # runs command def run_command(command, timeout = 15, cwd = None): @@ -153,6 +170,8 @@ def run_command(command, timeout = 15, cwd = None): if proc.poll() is None: proc.kill() # timeout return (errno.ETIME, '', 'Timeout on running: %s' % command) + else: + proc.wait() stdout_file.seek(0) stderr_file.seek(0) return (proc.returncode, stdout_file.read().decode(errors = 'replace'), stderr_file.read().decode(errors = 'replace')) diff --git a/scripts/automation/trex_control_plane/server/trex_launch_thread.py b/scripts/automation/trex_control_plane/server/trex_launch_thread.py index 74ce1750..82a7f996 100755 --- a/scripts/automation/trex_control_plane/server/trex_launch_thread.py +++ b/scripts/automation/trex_control_plane/server/trex_launch_thread.py @@ -6,6 +6,7 @@ import signal import socket
from common.trex_status_e import TRexStatus
import subprocess
+import shlex
import time
import threading
import logging
@@ -32,7 +33,7 @@ class AsynchronousTRexSession(threading.Thread): with open(os.devnull, 'w') as DEVNULL:
self.time_stamps['start'] = self.time_stamps['run_time'] = time.time()
- self.session = subprocess.Popen("exec "+self.cmd, cwd = self.launch_path, shell=True, stdin = DEVNULL, stderr = subprocess.PIPE, preexec_fn=os.setsid)
+ self.session = subprocess.Popen(shlex.split(self.cmd), cwd = self.launch_path, stdin = DEVNULL, stderr = subprocess.PIPE, preexec_fn=os.setsid, close_fds = True)
logger.info("TRex session initialized successfully, Parent process pid is {pid}.".format( pid = self.session.pid ))
while self.session.poll() is None: # subprocess is NOT finished
time.sleep(0.5)
diff --git a/scripts/automation/trex_control_plane/server/trex_server.py b/scripts/automation/trex_control_plane/server/trex_server.py index 45ef9ac1..8f7e99f0 100755 --- a/scripts/automation/trex_control_plane/server/trex_server.py +++ b/scripts/automation/trex_control_plane/server/trex_server.py @@ -30,9 +30,9 @@ import shlex import tempfile try: - from .singleton_daemon import register_socket + from .singleton_daemon import register_socket, run_command except: - from singleton_daemon import register_socket + from singleton_daemon import register_socket, run_command # setup the logger @@ -134,6 +134,7 @@ class CTRexServer(object): self.server.register_function(self.add) self.server.register_function(self.cancel_reservation) self.server.register_function(self.connectivity_check) + self.server.register_function(self.connectivity_check, 'check_connectivity') # alias self.server.register_function(self.force_trex_kill) self.server.register_function(self.get_file) self.server.register_function(self.get_files_list) @@ -164,16 +165,6 @@ class CTRexServer(object): self.server.shutdown() #self.server.server_close() - def _run_command(self, command, timeout = 15, cwd = None): - if timeout: - command = 'timeout %s %s' % (timeout, command) - # pipes might stuck, even with timeout - with tempfile.TemporaryFile() as stdout_file, tempfile.TemporaryFile() as stderr_file: - proc = subprocess.Popen(shlex.split(command), stdout=stdout_file, stderr=stderr_file, cwd = cwd) - proc.wait() - stdout_file.seek(0) - stderr_file.seek(0) - return (proc.returncode, stdout_file.read().decode(errors = 'replace'), stderr_file.read().decode(errors = 'replace')) # get files from Trex server and return their content (mainly for logs) @staticmethod @@ -234,7 +225,7 @@ class CTRexServer(object): try: logger.info("Processing get_trex_version() command.") if not self.trex_version: - ret_code, stdout, stderr = self._run_command('./t-rex-64 --help', cwd = self.TREX_PATH, timeout = 0) + ret_code, stdout, stderr = run_command('./t-rex-64 --help', cwd = self.TREX_PATH) search_result = re.search('\n\s*(Version\s*:.+)', stdout, re.DOTALL) if not search_result: raise Exception('Could not determine version from ./t-rex-64 --help') @@ -383,7 +374,7 @@ class CTRexServer(object): # returns list of tuples (pid, command line) of running TRex(es) def get_trex_cmds(self): logger.info('Processing get_trex_cmds() command.') - ret_code, stdout, stderr = self._run_command('ps -u root --format pid,comm,cmd') + ret_code, stdout, stderr = run_command('ps -u root --format pid,comm,cmd') if ret_code: raise Exception('Failed to determine running processes, stderr: %s' % stderr) trex_cmds_list = [] @@ -403,12 +394,12 @@ class CTRexServer(object): return False for pid, cmd in trex_cmds_list: logger.info('Killing process %s %s' % (pid, cmd)) - self._run_command('kill %s' % pid) - ret_code_ps, _, _ = self._run_command('ps -p %s' % pid) + run_command('kill %s' % pid) + ret_code_ps, _, _ = run_command('ps -p %s' % pid) if not ret_code_ps: logger.info('Killing with -9.') - self._run_command('kill -9 %s' % pid) - ret_code_ps, _, _ = self._run_command('ps -p %s' % pid) + run_command('kill -9 %s' % pid) + ret_code_ps, _, _ = run_command('ps -p %s' % pid) if not ret_code_ps: logger.info('Could not kill process.') raise Exception('Could not kill process %s %s' % (pid, cmd)) diff --git a/scripts/automation/trex_control_plane/stl/examples/stl_pcap.py b/scripts/automation/trex_control_plane/stl/examples/stl_pcap.py index eae0f18b..98af6134 100644 --- a/scripts/automation/trex_control_plane/stl/examples/stl_pcap.py +++ b/scripts/automation/trex_control_plane/stl/examples/stl_pcap.py @@ -39,14 +39,12 @@ def inject_pcap (pcap_file, server, port, loop_count, ipg_usec, use_vm, remove_f c.reset(ports = [port]) c.clear_stats() - d = c.push_pcap(pcap_file, + c.push_pcap(pcap_file, ipg_usec = ipg_usec, count = loop_count, vm = vm, packet_hook = packet_hook) - STLSim().run(d, outfile = 'test.cap') - c.wait_on_traffic() diff --git a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_sim.py b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_sim.py index 11e80b9a..62724e64 100644 --- a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_sim.py +++ b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_sim.py @@ -40,18 +40,11 @@ class BpSimException(Exception): # stateless simulation class STLSim(object): - def __init__ (self, bp_sim_path = None, handler = 0, port_id = 0, api_h = "dummy"): + def __init__ (self, bp_sim_path, handler = 0, port_id = 0, api_h = "dummy"): - if not bp_sim_path: - # auto find scripts - m = re.match(".*/trex-core", os.getcwd()) - if not m: - raise STLError('cannot find BP sim path, please provide it') - - self.bp_sim_path = os.path.join(m.group(0), 'scripts') - - else: - self.bp_sim_path = bp_sim_path + self.bp_sim_path = os.path.abspath(bp_sim_path) + if not os.path.exists(self.bp_sim_path): + raise STLError('BP sim path %s does not exist' % self.bp_sim_path) # dummies self.handler = handler diff --git a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py index 94a45577..0ec98a0d 100644 --- a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py +++ b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py @@ -1020,19 +1020,20 @@ class CLatencyStats(CTRexStats): output[int_pg_id]['err_cntrs'] = current_pg['err_cntrs'] output[int_pg_id]['latency'] = {} - output[int_pg_id]['latency']['last_max'] = current_pg['latency']['last_max'] - output[int_pg_id]['latency']['jitter'] = current_pg['latency']['jitter'] - if current_pg['latency']['h'] != "": - output[int_pg_id]['latency']['average'] = current_pg['latency']['h']['s_avg'] - output[int_pg_id]['latency']['total_max'] = current_pg['latency']['h']['max_usec'] - output[int_pg_id]['latency']['histogram'] = {elem['key']: elem['val'] - for elem in current_pg['latency']['h']['histogram']} - zero_count = current_pg['latency']['h']['cnt'] - current_pg['latency']['h']['high_cnt'] - if zero_count != 0: - output[int_pg_id]['latency']['total_min'] = 1 - output[int_pg_id]['latency']['histogram'][0] = zero_count - elif output[int_pg_id]['latency']['histogram']: - output[int_pg_id]['latency']['total_min'] = min(output[int_pg_id]['latency']['histogram'].keys()) + if 'latency' in current_pg: + for field in ['jitter', 'average', 'total_max', 'last_max']: + if field in current_pg['latency']: + output[int_pg_id]['latency'][field] = current_pg['latency'][field] + else: + output[int_pg_id]['latency'][field] = StatNotAvailable(field) + + if 'histogram' in current_pg['latency']: + output[int_pg_id]['latency']['histogram'] = {int(elem): current_pg['latency']['histogram'][elem] + for elem in current_pg['latency']['histogram']} + min_val = min(output[int_pg_id]['latency']['histogram'].keys()) + if min_val == 0: + min_val = 2 + output[int_pg_id]['latency']['total_min'] = min_val else: output[int_pg_id]['latency']['total_min'] = StatNotAvailable('total_min') diff --git a/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py b/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py index 32f0a2d1..729ab3fd 100644 --- a/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py +++ b/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py @@ -69,13 +69,14 @@ RTF_REJECT = 0x0200 LOOPBACK_NAME="lo" -with os.popen("tcpdump -V 2> /dev/null") as _f: - if _f.close() >> 8 == 0x7f: - log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH") - TCPDUMP=0 - else: - TCPDUMP=1 -del(_f) +#with os.popen("tcpdump -V 2> /dev/null") as _f: +# if _f.close() >> 8 == 0x7f: +# log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH") +# TCPDUMP=0 +# else: +# TCPDUMP=1 +#del(_f) +TCPDUMP=0 def get_if_raw_hwaddr(iff): diff --git a/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py b/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py index 3eab16c6..40ff9e35 100644 --- a/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py +++ b/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py @@ -71,13 +71,14 @@ PCAP_ERRBUF_SIZE=256 LOOPBACK_NAME="lo" -with os.popen("tcpdump -V 2> /dev/null") as _f: - if _f.close() >> 8 == 0x7f: - log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH") - TCPDUMP=0 - else: - TCPDUMP=1 -del(_f) +#with os.popen("tcpdump -V 2> /dev/null") as _f: +# if _f.close() >> 8 == 0x7f: +# log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH") +# TCPDUMP=0 +# else: +# TCPDUMP=1 +#del(_f) +TCPDUMP=0 def get_if_raw_hwaddr(iff): diff --git a/scripts/ko/src/igb_uio.c b/scripts/ko/src/igb_uio.c index faeb0b68..27bec6a3 100755 --- a/scripts/ko/src/igb_uio.c +++ b/scripts/ko/src/igb_uio.c @@ -31,6 +31,7 @@ #include <linux/io.h> #include <linux/msi.h> #include <linux/version.h> +#include <linux/slab.h> #ifdef CONFIG_XEN_DOM0 #include <xen/xen.h> @@ -39,15 +40,6 @@ #include "compat.h" -#ifdef RTE_PCI_CONFIG -#define PCI_SYS_FILE_BUF_SIZE 10 -#define PCI_DEV_CAP_REG 0xA4 -#define PCI_DEV_CTRL_REG 0xA8 -#define PCI_DEV_CAP_EXT_TAG_MASK 0x20 -#define PCI_DEV_CTRL_EXT_TAG_SHIFT 8 -#define PCI_DEV_CTRL_EXT_TAG_MASK (1 << PCI_DEV_CTRL_EXT_TAG_SHIFT) -#endif - /** * A structure describing the private information for a uio device. */ @@ -57,22 +49,15 @@ struct rte_uio_pci_dev { enum rte_intr_mode mode; }; -static char *intr_mode = NULL; +static char *intr_mode; static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; -static inline struct rte_uio_pci_dev * -igbuio_get_uio_pci_dev(struct uio_info *info) -{ - return container_of(info, struct rte_uio_pci_dev, info); -} - /* sriov sysfs */ static ssize_t show_max_vfs(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, 10, "%u\n", - pci_num_vf(container_of(dev, struct pci_dev, dev))); + return snprintf(buf, 10, "%u\n", dev_num_vf(dev)); } static ssize_t @@ -81,7 +66,7 @@ store_max_vfs(struct device *dev, struct device_attribute *attr, { int err = 0; unsigned long max_vfs; - struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); + struct pci_dev *pdev = to_pci_dev(dev); if (0 != kstrtoul(buf, 0, &max_vfs)) return -EINVAL; @@ -100,19 +85,9 @@ store_max_vfs(struct device *dev, struct device_attribute *attr, static ssize_t show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf) { - struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev); - uint32_t val = 0; - - pci_read_config_dword(pci_dev, PCI_DEV_CAP_REG, &val); - if (!(val & PCI_DEV_CAP_EXT_TAG_MASK)) /* Not supported */ - return snprintf(buf, PCI_SYS_FILE_BUF_SIZE, "%s\n", "invalid"); - - val = 0; - pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn, - PCI_DEV_CTRL_REG, &val); + dev_info(dev, "Deprecated\n"); - return snprintf(buf, PCI_SYS_FILE_BUF_SIZE, "%s\n", - (val & PCI_DEV_CTRL_EXT_TAG_MASK) ? "on" : "off"); + return 0; } static ssize_t @@ -121,36 +96,9 @@ store_extended_tag(struct device *dev, const char *buf, size_t count) { - struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev); - uint32_t val = 0, enable; - - if (strncmp(buf, "on", 2) == 0) - enable = 1; - else if (strncmp(buf, "off", 3) == 0) - enable = 0; - else - return -EINVAL; - - pci_cfg_access_lock(pci_dev); - pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn, - PCI_DEV_CAP_REG, &val); - if (!(val & PCI_DEV_CAP_EXT_TAG_MASK)) { /* Not supported */ - pci_cfg_access_unlock(pci_dev); - return -EPERM; - } - - val = 0; - pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn, - PCI_DEV_CTRL_REG, &val); - if (enable) - val |= PCI_DEV_CTRL_EXT_TAG_MASK; - else - val &= ~PCI_DEV_CTRL_EXT_TAG_MASK; - pci_bus_write_config_dword(pci_dev->bus, pci_dev->devfn, - PCI_DEV_CTRL_REG, val); - pci_cfg_access_unlock(pci_dev); + dev_info(dev, "Deprecated\n"); - return count; + return 0; } static ssize_t @@ -158,10 +106,9 @@ show_max_read_request_size(struct device *dev, struct device_attribute *attr, char *buf) { - struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev); - int val = pcie_get_readrq(pci_dev); + dev_info(dev, "Deprecated\n"); - return snprintf(buf, PCI_SYS_FILE_BUF_SIZE, "%d\n", val); + return 0; } static ssize_t @@ -170,18 +117,9 @@ store_max_read_request_size(struct device *dev, const char *buf, size_t count) { - struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev); - unsigned long size = 0; - int ret; - - if (0 != kstrtoul(buf, 0, &size)) - return -EINVAL; - - ret = pcie_set_readrq(pci_dev, (int)size); - if (ret < 0) - return ret; + dev_info(dev, "Deprecated\n"); - return count; + return 0; } #endif @@ -243,7 +181,7 @@ igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state) static int igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) { - struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info); + struct rte_uio_pci_dev *udev = info->priv; struct pci_dev *pdev = udev->pdev; pci_cfg_access_lock(pdev); @@ -253,8 +191,13 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) else if (udev->mode == RTE_INTR_MODE_MSIX) { struct msi_desc *desc; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)) list_for_each_entry(desc, &pdev->msi_list, list) igbuio_msix_mask_irq(desc, irq_state); +#else + list_for_each_entry(desc, &pdev->dev.msi_list, list) + igbuio_msix_mask_irq(desc, irq_state); +#endif } pci_cfg_access_unlock(pdev); @@ -268,7 +211,7 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) static irqreturn_t igbuio_pci_irqhandler(int irq, struct uio_info *info) { - struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info); + struct rte_uio_pci_dev *udev = info->priv; /* Legacy mode need to mask in hardware */ if (udev->mode == RTE_INTR_MODE_LEGACY && @@ -333,7 +276,7 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info, unsigned long addr, len; void *internal_addr; - if (sizeof(info->mem) / sizeof(info->mem[0]) <= n) + if (n >= ARRAY_SIZE(info->mem)) return -EINVAL; addr = pci_resource_start(dev, pci_bar); @@ -358,7 +301,7 @@ igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info, { unsigned long addr, len; - if (sizeof(info->port) / sizeof(info->port[0]) <= n) + if (n >= ARRAY_SIZE(info->port)) return -EINVAL; addr = pci_resource_start(dev, pci_bar); @@ -403,7 +346,7 @@ igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) iom = 0; iop = 0; - for (i = 0; i != sizeof(bar_names) / sizeof(bar_names[0]); i++) { + for (i = 0; i < ARRAY_SIZE(bar_names); i++) { if (pci_resource_len(dev, i) != 0 && pci_resource_start(dev, i) != 0) { flags = pci_resource_flags(dev, i); @@ -562,23 +505,17 @@ fail_free: static void igbuio_pci_remove(struct pci_dev *dev) { - struct uio_info *info = pci_get_drvdata(dev); - struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info); - - if (info->priv == NULL) { - pr_notice("Not igbuio device\n"); - return; - } + struct rte_uio_pci_dev *udev = pci_get_drvdata(dev); sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); - uio_unregister_device(info); - igbuio_pci_release_iomem(info); + uio_unregister_device(&udev->info); + igbuio_pci_release_iomem(&udev->info); if (udev->mode == RTE_INTR_MODE_MSIX) pci_disable_msix(dev); pci_release_regions(dev); pci_disable_device(dev); pci_set_drvdata(dev, NULL); - kfree(info); + kfree(udev); } static int diff --git a/scripts/master_daemon.py b/scripts/master_daemon.py index 0b1b7363..390db0a3 100755 --- a/scripts/master_daemon.py +++ b/scripts/master_daemon.py @@ -9,6 +9,7 @@ from collections import OrderedDict from argparse import * from time import time, sleep from glob import glob +import signal sys.path.append(os.path.join('automation', 'trex_control_plane', 'server')) import outer_packages @@ -16,7 +17,7 @@ from singleton_daemon import SingletonDaemon, register_socket, run_command from jsonrpclib.SimpleJSONRPCServer import SimpleJSONRPCServer import termstyle -logging.basicConfig(level = logging.FATAL) # keep quiet +logger = logging.getLogger('Master daemon') ### Server functions ### @@ -83,33 +84,52 @@ def start_master_daemon(): proc.start() for i in range(50): if master_daemon.is_running(): - print(termstyle.green('Master daemon is started')) + print(termstyle.green('Master daemon is started.')) os._exit(0) sleep(0.1) - fail(termstyle.red('Master daemon failed to run')) - + fail(termstyle.red('Master daemon failed to run. Please look in log: %s' % logging_file)) + +def set_logger(): + if os.path.exists(logging_file): + if os.path.exists(logging_file_bu): + os.unlink(logging_file_bu) + os.rename(logging_file, logging_file_bu) + hdlr = logging.FileHandler(logging_file) + formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', datefmt = '%Y-%m-%d %H:%M:%S') + hdlr.setFormatter(formatter) + logger.addHandler(hdlr) + logger.setLevel(logging.INFO) def start_master_daemon_func(): - register_socket(master_daemon.tag) - server = SimpleJSONRPCServer(('0.0.0.0', master_daemon.port)) - print('Started master daemon (port %s)' % master_daemon.port) - server.register_function(add) - server.register_function(check_connectivity) - server.register_function(get_trex_path) - server.register_function(update_trex) - # trex_daemon_server - server.register_function(trex_daemon_server.is_running, 'is_trex_daemon_running') - server.register_function(trex_daemon_server.restart, 'restart_trex_daemon') - server.register_function(trex_daemon_server.start, 'start_trex_daemon') - server.register_function(trex_daemon_server.stop, 'stop_trex_daemon') - # stl rpc proxy - server.register_function(stl_rpc_proxy.is_running, 'is_stl_rpc_proxy_running') - server.register_function(stl_rpc_proxy.restart, 'restart_stl_rpc_proxy') - server.register_function(stl_rpc_proxy.start, 'start_stl_rpc_proxy') - server.register_function(stl_rpc_proxy.stop, 'stop_stl_rpc_proxy') - server.register_function(server.funcs.keys, 'get_methods') # should be last - server.serve_forever() + try: + set_logger() + register_socket(master_daemon.tag) + server = SimpleJSONRPCServer(('0.0.0.0', master_daemon.port)) + logger.info('Started master daemon (port %s)' % master_daemon.port) + server.register_function(add) + server.register_function(check_connectivity) + server.register_function(get_trex_path) + server.register_function(update_trex) + # trex_daemon_server + server.register_function(trex_daemon_server.is_running, 'is_trex_daemon_running') + server.register_function(trex_daemon_server.restart, 'restart_trex_daemon') + server.register_function(trex_daemon_server.start, 'start_trex_daemon') + server.register_function(trex_daemon_server.stop, 'stop_trex_daemon') + # stl rpc proxy + server.register_function(stl_rpc_proxy.is_running, 'is_stl_rpc_proxy_running') + server.register_function(stl_rpc_proxy.restart, 'restart_stl_rpc_proxy') + server.register_function(stl_rpc_proxy.start, 'start_stl_rpc_proxy') + server.register_function(stl_rpc_proxy.stop, 'stop_stl_rpc_proxy') + server.register_function(server.funcs.keys, 'get_methods') # should be last + signal.signal(signal.SIGTSTP, stop_handler) + signal.signal(signal.SIGTERM, stop_handler) + server.serve_forever() + except Exception as e: + logger.error('Closing due to error: %s' % e) +def stop_handler(*args, **kwargs): + logger.info('Got killed explicitly.') + sys.exit(0) # returns True if given path is under current dir or /tmp def _check_path_under_current_or_temp(path): @@ -170,8 +190,9 @@ stl_rpc_proxy = SingletonDaemon('Stateless RPC proxy', 'trex_stl_rpc_proxy' trex_daemon_server = SingletonDaemon('TRex daemon server', 'trex_daemon_server', args.trex_daemon_port, './trex_daemon_server start', args.trex_dir) master_daemon = SingletonDaemon('Master daemon', 'trex_master_daemon', args.master_port, start_master_daemon) # add ourself for easier check if running, kill etc. -daemons_by_name = {} tmp_dir = '/tmp/trex-tmp' +logging_file = '/var/log/trex/master_daemon.log' +logging_file_bu = '/var/log/trex/master_daemon.log_bu' if not _check_path_under_current_or_temp(args.trex_dir): raise Exception('Only allowed to use path under /tmp or current directory') @@ -182,6 +203,7 @@ if not os.path.exists(args.trex_dir): os.makedirs(args.trex_dir) os.chmod(args.trex_dir, 0o777) elif args.allow_update: + print('Due to allow updates flag, setting mode 777 on given directory') os.chmod(args.trex_dir, 0o777) if not os.path.exists(tmp_dir): diff --git a/src/bp_gtest.cpp b/src/bp_gtest.cpp index 86b7821b..b36ac6e1 100755 --- a/src/bp_gtest.cpp +++ b/src/bp_gtest.cpp @@ -897,7 +897,7 @@ TEST_F(basic, latency3) { EXPECT_EQ_UINT32(mg.is_active()?1:0, (uint32_t)0)<< "pass"; - mg.start(8); + mg.start(8, NULL); mg.stop(); mg.Dump(stdout); mg.DumpShort(stdout); diff --git a/src/bp_sim.cpp b/src/bp_sim.cpp index 51023b90..c9171ae5 100755 --- a/src/bp_sim.cpp +++ b/src/bp_sim.cpp @@ -24,6 +24,8 @@ limitations under the License. #include "utl_json.h" #include "utl_yaml.h" #include "msg_manager.h" +#include "trex_watchdog.h" + #include <common/basic_utils.h> #include <trex_stream_node.h> @@ -3322,6 +3324,9 @@ bool CFlowGenListPerThread::Create(uint32_t thread_id, m_max_threads=max_threads; m_thread_id=thread_id; + m_watchdog = NULL; + m_watchdog_handle = -1; + m_cpu_cp_u.Create(&m_cpu_dp_u); uint32_t socket_id=rte_lcore_to_socket_id(m_core_id); @@ -3897,6 +3902,10 @@ void CNodeGenerator::handle_flow_pkt(CGenNode *node, CFlowGenListPerThread *thre } void CNodeGenerator::handle_flow_sync(CGenNode *node, CFlowGenListPerThread *thread, bool &exit_scheduler) { + + /* tickle the watchdog */ + thread->tickle(); + /* flow sync message is a sync point for time */ thread->m_cur_time_sec = node->m_time; diff --git a/src/bp_sim.h b/src/bp_sim.h index bb7dd928..d940080e 100755 --- a/src/bp_sim.h +++ b/src/bp_sim.h @@ -58,6 +58,7 @@ limitations under the License. #include <arpa/inet.h> #include "platform_cfg.h" #include "flow_stat.h" +#include "trex_watchdog.h" #include <trex_stateless_dp_core.h> @@ -3637,6 +3638,12 @@ public: m_node_gen.m_v_if->flush_tx_queue(); } + void tickle() { + if (m_watchdog) { + m_watchdog->tickle(m_watchdog_handle); + } + } + /* return the dual port ID this thread is attached to in 4 ports configuration there are 2 dual-ports @@ -3759,6 +3766,8 @@ public: CTupleGeneratorSmart m_smart_gen; + TrexWatchDog *m_watchdog; + int m_watchdog_handle; public: CNodeGenerator m_node_gen; diff --git a/src/flow_stat.cpp b/src/flow_stat.cpp index 2385e03f..98b9494b 100644 --- a/src/flow_stat.cpp +++ b/src/flow_stat.cpp @@ -464,6 +464,8 @@ CFlowStatRuleMgr::CFlowStatRuleMgr() { m_rx_core = NULL; m_hw_id_map.create(MAX_FLOW_STATS); m_hw_id_map_payload.create(MAX_FLOW_STATS_PAYLOAD); + memset(m_rx_cant_count_err, 0, sizeof(m_rx_cant_count_err)); + memset(m_tx_cant_count_err, 0, sizeof(m_tx_cant_count_err)); } CFlowStatRuleMgr::~CFlowStatRuleMgr() { @@ -959,6 +961,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo p_user_id->set_need_to_send_rx(port); } } else { + m_rx_cant_count_err[port] += rx_pkts.get_pkts(); std::cerr << __METHOD_NAME__ << i << ":Could not count " << rx_pkts << " rx packets, on port " << (uint16_t)port << ", because no mapping was found." << std::endl; } @@ -972,6 +975,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo p_user_id->set_need_to_send_tx(port); } } else { + m_tx_cant_count_err[port] += tx_pkts.get_pkts();; std::cerr << __METHOD_NAME__ << i << ":Could not count " << tx_pkts << " tx packets on port " << (uint16_t)port << ", because no mapping was found." << std::endl; } @@ -990,6 +994,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo p_user_id->set_need_to_send_rx(port); } } else { + m_rx_cant_count_err[port] += rx_pkts.get_pkts();; std::cerr << __METHOD_NAME__ << i << ":Could not count " << rx_pkts << " rx payload packets, on port " << (uint16_t)port << ", because no mapping was found." << std::endl; } @@ -1003,6 +1008,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo p_user_id->set_need_to_send_tx(port); } } else { + m_tx_cant_count_err[port] += tx_pkts.get_pkts();; std::cerr << __METHOD_NAME__ << i << ":Could not count " << tx_pkts << " tx packets on port " << (uint16_t)port << ", because no mapping was found." << std::endl; } @@ -1011,6 +1017,15 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo } // build json report + // general per port data + for (uint8_t port = 0; port < m_num_ports; port++) { + std::string str_port = static_cast<std::ostringstream*>( &(std::ostringstream() << int(port) ) )->str(); + if (m_rx_cant_count_err[port] != 0) + s_data_section["port_data"][str_port]["rx_err"] = m_rx_cant_count_err[port]; + if (m_tx_cant_count_err[port] != 0) + s_data_section["port_data"][str_port]["tx_err"] = m_tx_cant_count_err[port]; + } + flow_stat_user_id_map_it_t it; for (it = m_user_id_map.begin(); it != m_user_id_map.end(); it++) { bool send_empty = true; @@ -1022,6 +1037,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo user_id_info->set_was_sent(true); send_empty = false; } + // flow stat json for (uint8_t port = 0; port < m_num_ports; port++) { std::string str_port = static_cast<std::ostringstream*>( &(std::ostringstream() << int(port) ) )->str(); if (user_id_info->need_to_send_rx(port) || baseline) { @@ -1042,10 +1058,11 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo s_data_section[str_user_id] = Json::objectValue; } + // latency info json if (user_id_info->rfc2544_support()) { CFlowStatUserIdInfoPayload *user_id_info_p = (CFlowStatUserIdInfoPayload *)user_id_info; // payload object. Send also latency, jitter... - Json::Value lat_hist; + Json::Value lat_hist = Json::arrayValue; if (user_id_info->is_hw_id()) { // if mapped to hw_id, take info from what we just got from rx core uint16_t hw_id = user_id_info->get_hw_id(); @@ -1055,17 +1072,16 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo user_id_info_p->set_dup_cnt(rfc2544_info[hw_id].get_dup_cnt()); user_id_info_p->set_seq_err_big_cnt(rfc2544_info[hw_id].get_seq_err_ev_big()); user_id_info_p->set_seq_err_low_cnt(rfc2544_info[hw_id].get_seq_err_ev_low()); - l_data_section[str_user_id]["latency"]["h"] = lat_hist; - l_data_section[str_user_id]["latency"]["last_max"] = rfc2544_info[hw_id].get_last_max_usec(); + l_data_section[str_user_id]["latency"] = lat_hist; l_data_section[str_user_id]["latency"]["jitter"] = rfc2544_info[hw_id].get_jitter_usec(); } else { // Not mapped to hw_id. Get saved info. user_id_info_p->get_latency_json(lat_hist); - l_data_section[str_user_id]["latency"]["h"] = lat_hist; - l_data_section[str_user_id]["latency"]["last_max"] = 0; - l_data_section[str_user_id]["latency"]["jitter"] = user_id_info_p->get_jitter_usec(); + if (lat_hist != Json::nullValue) { + l_data_section[str_user_id]["latency"] = lat_hist; + l_data_section[str_user_id]["latency"]["jitter"] = user_id_info_p->get_jitter_usec(); + } } - //todo: add last 10 samples l_data_section[str_user_id]["err_cntrs"]["dropped"] = Json::Value::UInt64(user_id_info_p->get_seq_err_cnt()); l_data_section[str_user_id]["err_cntrs"]["out_of_order"] diff --git a/src/flow_stat.h b/src/flow_stat.h index 3387c2d3..8671b228 100644 --- a/src/flow_stat.h +++ b/src/flow_stat.h @@ -24,11 +24,12 @@ #include <stdio.h> #include <string> #include <map> +#include <json/json.h> #include "trex_defs.h" #include "trex_exception.h" #include "trex_stream.h" #include "msg_manager.h" -#include <internal_api/trex_platform_api.h> +#include "internal_api/trex_platform_api.h" // range reserved for rx stat measurement is from IP_ID_RESERVE_BASE to 0xffff // Do not change this value. In i350 cards, we filter according to first byte of IP ID @@ -132,7 +133,7 @@ class rfc2544_info_t_ { m_seq_err_ev_big = 0; m_seq_err_ev_low = 0; m_jitter = 0; - m_latency = Json::Value(""); + m_latency = Json::nullValue; m_last_max_latency = 0; } @@ -332,7 +333,7 @@ class CFlowStatUserIdInfoPayload : public CFlowStatUserIdInfo { json = m_rfc2544_info.m_latency; } - inline void set_latency_json(Json::Value json) { + inline void set_latency_json(const Json::Value &json) { m_rfc2544_info.m_latency = json; } @@ -474,7 +475,9 @@ class CFlowStatRuleMgr { uint32_t m_num_started_streams; // How many started (transmitting) streams we have CNodeRing *m_ring_to_rx; // handle for sending messages to Rx core CFlowStatParser *m_parser; - uint16_t m_cap; + uint16_t m_cap; // capabilities of the NIC driver we are using + uint32_t m_rx_cant_count_err[TREX_MAX_PORTS]; + uint32_t m_tx_cant_count_err[TREX_MAX_PORTS]; }; #endif diff --git a/src/latency.cpp b/src/latency.cpp index a7652bed..acbe26d4 100644 --- a/src/latency.cpp +++ b/src/latency.cpp @@ -22,6 +22,8 @@ limitations under the License. #include "latency.h" #include "bp_sim.h" #include "utl_json.h" +#include "trex_watchdog.h" + #include <common/basic_utils.h> const uint8_t sctp_pkt[]={ @@ -562,6 +564,10 @@ bool CLatencyManager::Create(CLatencyManagerCfg * cfg){ if ( CGlobalInfo::is_learn_mode() ){ m_nat_check_manager.Create(); } + + m_watchdog = NULL; + m_watchdog_handle = -1; + return (true); } @@ -711,7 +717,13 @@ void CLatencyManager::reset(){ } -void CLatencyManager::start(int iter) { +void CLatencyManager::tickle() { + if (m_watchdog) { + m_watchdog->tickle(m_watchdog_handle); + } +} + +void CLatencyManager::start(int iter, TrexWatchDog *watchdog) { m_do_stop =false; m_is_active =false; int cnt=0; @@ -728,6 +740,10 @@ void CLatencyManager::start(int iter) { m_p_queue.push(node); bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable()?true:false; + if (watchdog) { + m_watchdog = watchdog; + m_watchdog_handle = watchdog->register_monitor("STF RX CORE", 1); + } while ( !m_p_queue.empty() ) { node = m_p_queue.top(); @@ -748,6 +764,9 @@ void CLatencyManager::start(int iter) { switch (node->m_type) { case CGenNode::FLOW_SYNC: + + tickle(); + if ( CGlobalInfo::is_learn_mode() ) { m_nat_check_manager.handle_aging(); } @@ -792,6 +811,11 @@ void CLatencyManager::start(int iter) { m_rx_check_manager.tw_drain(); } + /* disable the monitor */ + if (m_watchdog) { + m_watchdog->disable_monitor(m_watchdog_handle); + } + } void CLatencyManager::stop(){ diff --git a/src/latency.h b/src/latency.h index eef7146a..2b74f737 100644 --- a/src/latency.h +++ b/src/latency.h @@ -28,6 +28,8 @@ limitations under the License. #define L_PKT_SUBMODE_REPLY 2 #define L_PKT_SUBMODE_0_SEQ 3 +class TrexWatchDog; + class CLatencyPktInfo { public: void Create(class CLatencyPktMode *m_l_pkt_info); @@ -337,7 +339,7 @@ public: bool Create(CLatencyManagerCfg * cfg); void Delete(); void reset(); - void start(int iter); + void start(int iter, TrexWatchDog *watchdog); void stop(); bool is_active(); void set_ip(uint32_t client_ip, @@ -374,6 +376,7 @@ public: CLatencyPktMode *c_l_pkt_mode; private: + void tickle(); void send_pkt_all_ports(); void try_rx(); void try_rx_queues(); @@ -400,6 +403,9 @@ private: CNatRxManager m_nat_check_manager; CCpuUtlDp m_cpu_dp_u; CCpuUtlCp m_cpu_cp_u; + TrexWatchDog *m_watchdog; + int m_watchdog_handle; + volatile bool m_do_stop __rte_cache_aligned ; }; diff --git a/src/main.cpp b/src/main.cpp index 701a65d2..62eee880 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -83,6 +83,11 @@ static CSimpleOpt::SOption parser_options[] = }; static TrexStateless *m_sim_statelss_obj; +static char *g_exe_name; + +const char *get_exe_name() { + return g_exe_name; +} static int usage(){ @@ -261,8 +266,8 @@ void set_stateless_obj(TrexStateless *obj) { m_sim_statelss_obj = obj; } - int main(int argc , char * argv[]){ + g_exe_name = argv[0]; std::unordered_map<std::string, int> params; diff --git a/src/main_dpdk.cpp b/src/main_dpdk.cpp index bd3cac64..c72af57a 100644 --- a/src/main_dpdk.cpp +++ b/src/main_dpdk.cpp @@ -73,6 +73,7 @@ extern "C" { #include "debug.h" #include "internal_api/trex_platform_api.h" #include "main_dpdk.h" +#include "trex_watchdog.h" #define RX_CHECK_MIX_SAMPLE_RATE 8 #define RX_CHECK_MIX_SAMPLE_RATE_1G 2 @@ -2845,6 +2846,7 @@ private: std::mutex m_cp_lock; public: + TrexWatchDog m_watchdog; TrexStateless *m_trex_stateless; }; @@ -3272,14 +3274,16 @@ bool CGlobalTRex::Create(){ TrexStatelessCfg cfg; - TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_TCP, global_platform_cfg_info.m_zmq_rpc_port); + TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_TCP, + global_platform_cfg_info.m_zmq_rpc_port, + &m_cp_lock, + &m_watchdog); cfg.m_port_count = CGlobalInfo::m_options.m_expected_portd; cfg.m_rpc_req_resp_cfg = &rpc_req_resp_cfg; cfg.m_rpc_server_verbose = false; cfg.m_platform_api = new TrexDpdkPlatformApi(); cfg.m_publisher = &m_zmq_publisher; - cfg.m_global_lock = &m_cp_lock; m_trex_stateless = new TrexStateless(cfg); } @@ -3975,6 +3979,9 @@ int CGlobalTRex::run_in_master() { const int FASTPATH_DELAY_MS = 10; const int SLOWPATH_DELAY_MS = 500; + int handle = m_watchdog.register_monitor("master", 2); + m_watchdog.start(); + while ( true ) { /* fast path */ @@ -3995,6 +4002,8 @@ int CGlobalTRex::run_in_master() { delay(FASTPATH_DELAY_MS); slow_path_counter += FASTPATH_DELAY_MS; cp_lock.lock(); + + m_watchdog.tickle(handle); } /* on exit release the lock */ @@ -4006,6 +4015,9 @@ int CGlobalTRex::run_in_master() { } m_mg.stop(); + + m_watchdog.stop(); + delay(1000); if ( was_stopped ){ /* we should stop latency and exit to stop agents */ @@ -4017,14 +4029,15 @@ int CGlobalTRex::run_in_master() { int CGlobalTRex::run_in_rx_core(void){ + if (get_is_stateless()) { m_sl_rx_running = true; - m_rx_sl.start(); + m_rx_sl.start(m_watchdog); m_sl_rx_running = false; } else { if ( CGlobalInfo::m_options.is_rx_enabled() ){ m_sl_rx_running = false; - m_mg.start(0); + m_mg.start(0, &m_watchdog); } } @@ -4032,6 +4045,8 @@ int CGlobalTRex::run_in_rx_core(void){ } int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){ + std::stringstream ss; + ss << "DP core " << int(virt_core_id); CPreviewMode *lp=&CGlobalInfo::m_options.preview; if ( lp->getSingleCore() && @@ -4045,14 +4060,23 @@ int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){ assert(m_fl_was_init); CFlowGenListPerThread * lpt; + lpt = m_fl.m_threads_info[virt_core_id-1]; + /* register a watchdog handle on current core */ + lpt->m_watchdog = &m_watchdog; + lpt->m_watchdog_handle = m_watchdog.register_monitor(ss.str(), 1); + + if (get_is_stateless()) { lpt->start_stateless_daemon(*lp); }else{ lpt->start_generate_stateful(CGlobalInfo::m_options.out_file,*lp); } + /* done - remove this from the watchdog (we might wait on join for a long time) */ + lpt->m_watchdog->disable_monitor(lpt->m_watchdog_handle); + m_signal[virt_core_id]=1; return (0); } @@ -4422,9 +4446,14 @@ uint32_t get_cores_mask(uint32_t cores,int offset){ } +static char *g_exe_name; +const char *get_exe_name() { + return g_exe_name; +} int main(int argc , char * argv[]){ + g_exe_name = argv[0]; return ( main_test(argc , argv)); } @@ -4733,13 +4762,17 @@ int main_test(int argc , char * argv[]){ uint32_t pkts = CGlobalInfo::m_options.m_latency_prev * CGlobalInfo::m_options.m_latency_rate; printf("Starting pre latency check for %d sec\n",CGlobalInfo::m_options.m_latency_prev); - g_trex.m_mg.start(pkts); + g_trex.m_mg.start(pkts, NULL); delay(CGlobalInfo::m_options.m_latency_prev* 1000); printf("Finished \n"); g_trex.m_mg.reset(); g_trex.reset_counters(); } + /* this will give us all cores - master + tx + latency */ + g_trex.m_watchdog.mark_pending_monitor(g_trex.m_max_cores); + + g_trex.m_sl_rx_running = false; if ( get_is_stateless() ) { g_trex.start_master_stateless(); diff --git a/src/rpc-server/trex_rpc_async_server.cpp b/src/rpc-server/trex_rpc_async_server.cpp index aee92539..82c42458 100644 --- a/src/rpc-server/trex_rpc_async_server.cpp +++ b/src/rpc-server/trex_rpc_async_server.cpp @@ -36,7 +36,7 @@ limitations under the License. * ZMQ based publisher server * */ -TrexRpcServerAsync::TrexRpcServerAsync(const TrexRpcServerConfig &cfg, std::mutex *lock) : TrexRpcServerInterface(cfg, "publisher", lock) { +TrexRpcServerAsync::TrexRpcServerAsync(const TrexRpcServerConfig &cfg) : TrexRpcServerInterface(cfg, "publisher") { /* ZMQ is not thread safe - this should be outside */ m_context = zmq_ctx_new(); } diff --git a/src/rpc-server/trex_rpc_async_server.h b/src/rpc-server/trex_rpc_async_server.h index 80d92c2f..daefa174 100644 --- a/src/rpc-server/trex_rpc_async_server.h +++ b/src/rpc-server/trex_rpc_async_server.h @@ -33,7 +33,7 @@ limitations under the License. class TrexRpcServerAsync : public TrexRpcServerInterface { public: - TrexRpcServerAsync(const TrexRpcServerConfig &cfg, std::mutex *lock = NULL); + TrexRpcServerAsync(const TrexRpcServerConfig &cfg); protected: void _prepare(); diff --git a/src/rpc-server/trex_rpc_req_resp_server.cpp b/src/rpc-server/trex_rpc_req_resp_server.cpp index 5c587e0f..033f265c 100644 --- a/src/rpc-server/trex_rpc_req_resp_server.cpp +++ b/src/rpc-server/trex_rpc_req_resp_server.cpp @@ -32,11 +32,13 @@ limitations under the License. #include <zmq.h> #include <json/json.h> +#include "trex_watchdog.h" + /** * ZMQ based request-response server * */ -TrexRpcServerReqRes::TrexRpcServerReqRes(const TrexRpcServerConfig &cfg, std::mutex *lock) : TrexRpcServerInterface(cfg, "req resp", lock) { +TrexRpcServerReqRes::TrexRpcServerReqRes(const TrexRpcServerConfig &cfg) : TrexRpcServerInterface(cfg, "ZMQ sync request-response") { } @@ -52,11 +54,19 @@ void TrexRpcServerReqRes::_prepare() { */ void TrexRpcServerReqRes::_rpc_thread_cb() { std::stringstream ss; + int zmq_rc; + + m_watchdog_handle = m_watchdog->register_monitor(m_name, 1); /* create a socket based on the configuration */ m_socket = zmq_socket (m_context, ZMQ_REP); + /* to make sure the watchdog gets tickles form time to time we give a timeout of 500ms */ + int timeout = 500; + zmq_rc = zmq_setsockopt (m_socket, ZMQ_RCVTIMEO, &timeout, sizeof(int)); + assert(zmq_rc == 0); + switch (m_cfg.get_protocol()) { case TrexRpcServerConfig::RPC_PROT_TCP: ss << "tcp://*:"; @@ -68,8 +78,8 @@ void TrexRpcServerReqRes::_rpc_thread_cb() { ss << m_cfg.get_port(); /* bind the scoket */ - int rc = zmq_bind (m_socket, ss.str().c_str()); - if (rc != 0) { + zmq_rc = zmq_bind (m_socket, ss.str().c_str()); + if (zmq_rc != 0) { throw TrexRpcException("Unable to start ZMQ server at: " + ss.str()); } @@ -90,6 +100,9 @@ void TrexRpcServerReqRes::_rpc_thread_cb() { /* must be done from the same thread */ zmq_close(m_socket); + + /* done */ + m_watchdog->disable_monitor(m_watchdog_handle); } bool @@ -101,10 +114,22 @@ TrexRpcServerReqRes::fetch_one_request(std::string &msg) { rc = zmq_msg_init(&zmq_msg); assert(rc == 0); - rc = zmq_msg_recv (&zmq_msg, m_socket, 0); + while (true) { + m_watchdog->tickle(m_watchdog_handle); - if (rc == -1) { + rc = zmq_msg_recv (&zmq_msg, m_socket, 0); + if (rc != -1) { + break; + } + + /* timeout ? */ + if (errno == EAGAIN) { + continue; + } + + /* error ! */ zmq_msg_close(&zmq_msg); + /* normal shutdown and zmq_term was called */ if (errno == ETERM) { return false; @@ -113,7 +138,9 @@ TrexRpcServerReqRes::fetch_one_request(std::string &msg) { } } - const char *data = (const char *)zmq_msg_data(&zmq_msg); + + + const char *data = (const char *)zmq_msg_data(&zmq_msg); size_t len = zmq_msg_size(&zmq_msg); msg.append(data, len); diff --git a/src/rpc-server/trex_rpc_req_resp_server.h b/src/rpc-server/trex_rpc_req_resp_server.h index 26b3248f..92d51a2a 100644 --- a/src/rpc-server/trex_rpc_req_resp_server.h +++ b/src/rpc-server/trex_rpc_req_resp_server.h @@ -32,7 +32,7 @@ limitations under the License. class TrexRpcServerReqRes : public TrexRpcServerInterface { public: - TrexRpcServerReqRes(const TrexRpcServerConfig &cfg, std::mutex *lock = NULL); + TrexRpcServerReqRes(const TrexRpcServerConfig &cfg); /* for test purposes - bypass the ZMQ and inject a message */ std::string test_inject_request(const std::string &req); diff --git a/src/rpc-server/trex_rpc_server.cpp b/src/rpc-server/trex_rpc_server.cpp index 7d2e31a5..e4ca95c3 100644 --- a/src/rpc-server/trex_rpc_server.cpp +++ b/src/rpc-server/trex_rpc_server.cpp @@ -28,11 +28,20 @@ limitations under the License. #include <sstream> #include <iostream> +#include "trex_watchdog.h" + /************** RPC server interface ***************/ -TrexRpcServerInterface::TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name, std::mutex *lock) : m_cfg(cfg), m_name(name), m_lock(lock) { +TrexRpcServerInterface::TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name) : m_cfg(cfg) { + m_name = name; + + m_lock = cfg.m_lock; + m_watchdog = cfg.m_watchdog; + m_watchdog_handle = -1; + m_is_running = false; m_is_verbose = false; + if (m_lock == NULL) { m_lock = &m_dummy_lock; } @@ -69,6 +78,7 @@ void TrexRpcServerInterface::start() { /* prepare for run */ _prepare(); + m_watchdog->mark_pending_monitor(); m_thread = new std::thread(&TrexRpcServerInterface::_rpc_thread_cb, this); if (!m_thread) { throw TrexRpcException("unable to create RPC thread"); @@ -119,8 +129,7 @@ get_current_date_time() { const std::string TrexRpcServer::s_server_uptime = get_current_date_time(); -TrexRpcServer::TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg, - std::mutex *lock) { +TrexRpcServer::TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg) { m_req_resp = NULL; @@ -130,7 +139,7 @@ TrexRpcServer::TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg, if (req_resp_cfg->get_protocol() == TrexRpcServerConfig::RPC_PROT_MOCK) { m_req_resp = new TrexRpcServerReqResMock(*req_resp_cfg); } else { - m_req_resp = new TrexRpcServerReqRes(*req_resp_cfg, lock); + m_req_resp = new TrexRpcServerReqRes(*req_resp_cfg); } m_servers.push_back(m_req_resp); diff --git a/src/rpc-server/trex_rpc_server_api.h b/src/rpc-server/trex_rpc_server_api.h index a02b2cc0..3d9837ef 100644 --- a/src/rpc-server/trex_rpc_server_api.h +++ b/src/rpc-server/trex_rpc_server_api.h @@ -33,6 +33,7 @@ limitations under the License. class TrexRpcServerInterface; class TrexRpcServerReqRes; +class TrexWatchDog; /** * defines a configuration of generic RPC server @@ -47,8 +48,11 @@ public: RPC_PROT_MOCK }; - TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port) : m_protocol(protocol), m_port(port) { - + TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port, std::mutex *lock, TrexWatchDog *watchdog) { + m_protocol = protocol; + m_port = port; + m_lock = lock; + m_watchdog = watchdog; } uint16_t get_port() const { @@ -62,6 +66,10 @@ public: private: rpc_prot_e m_protocol; uint16_t m_port; + +public: + std::mutex *m_lock; + TrexWatchDog *m_watchdog; }; /** @@ -72,7 +80,7 @@ private: class TrexRpcServerInterface { public: - TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name, std::mutex *m_lock = NULL); + TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name); virtual ~TrexRpcServerInterface(); /** @@ -134,6 +142,8 @@ protected: std::string m_name; std::mutex *m_lock; std::mutex m_dummy_lock; + TrexWatchDog *m_watchdog; + int m_watchdog_handle; }; /** @@ -147,8 +157,7 @@ class TrexRpcServer { public: /* creates the collection of servers using configurations */ - TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg, - std::mutex *m_lock = NULL); + TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg); ~TrexRpcServer(); diff --git a/src/sim/trex_sim_stateless.cpp b/src/sim/trex_sim_stateless.cpp index acbeef69..d3981e97 100644 --- a/src/sim/trex_sim_stateless.cpp +++ b/src/sim/trex_sim_stateless.cpp @@ -200,7 +200,7 @@ SimStateless::prepare_control_plane() { m_publisher = new SimPublisher(); - TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0); + TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0, NULL, NULL); cfg.m_port_count = m_port_count; cfg.m_rpc_req_resp_cfg = &rpc_req_resp_cfg; diff --git a/src/stateless/cp/trex_stateless.cpp b/src/stateless/cp/trex_stateless.cpp index 5bbe9faf..698ede90 100644 --- a/src/stateless/cp/trex_stateless.cpp +++ b/src/stateless/cp/trex_stateless.cpp @@ -40,7 +40,7 @@ TrexStateless::TrexStateless(const TrexStatelessCfg &cfg) { /* create RPC servers */ /* set both servers to mutex each other */ - m_rpc_server = new TrexRpcServer(cfg.m_rpc_req_resp_cfg, cfg.m_global_lock); + m_rpc_server = new TrexRpcServer(cfg.m_rpc_req_resp_cfg); m_rpc_server->set_verbose(cfg.m_rpc_server_verbose); /* configure ports */ diff --git a/src/stateless/cp/trex_stateless.h b/src/stateless/cp/trex_stateless.h index 033326ca..83ab6976 100644 --- a/src/stateless/cp/trex_stateless.h +++ b/src/stateless/cp/trex_stateless.h @@ -41,6 +41,7 @@ limitations under the License. #include "trex_api_class.h" class TrexStatelessPort; +class TrexWatchDog; /** * unified stats @@ -87,7 +88,6 @@ public: m_rpc_server_verbose = false; m_platform_api = NULL; m_publisher = NULL; - m_global_lock = NULL; } const TrexRpcServerConfig *m_rpc_req_resp_cfg; @@ -95,7 +95,6 @@ public: bool m_rpc_server_verbose; uint8_t m_port_count; TrexPublisher *m_publisher; - std::mutex *m_global_lock; }; diff --git a/src/stateless/dp/trex_stateless_dp_core.cpp b/src/stateless/dp/trex_stateless_dp_core.cpp index 21334363..fe78c5b2 100644 --- a/src/stateless/dp/trex_stateless_dp_core.cpp +++ b/src/stateless/dp/trex_stateless_dp_core.cpp @@ -656,6 +656,7 @@ TrexStatelessDpCore::idle_state_loop() { int counter = 0; while (m_state == STATE_IDLE) { + m_core->tickle(); m_core->m_node_gen.m_v_if->flush_dp_rx_queue(); bool had_msg = periodic_check_for_cp_messages(); if (had_msg) { diff --git a/src/stateless/rx/trex_stateless_rx_core.cpp b/src/stateless/rx/trex_stateless_rx_core.cpp index f9150ff7..b3555c13 100644 --- a/src/stateless/rx/trex_stateless_rx_core.cpp +++ b/src/stateless/rx/trex_stateless_rx_core.cpp @@ -52,12 +52,8 @@ void CRFC2544Info::export_data(rfc2544_info_t_ &obj) { obj.set_err_cntrs(m_seq_err, m_ooo, m_dup, m_seq_err_events_too_big, m_seq_err_events_too_low); obj.set_jitter(m_jitter.get_jitter()); - json_str = ""; - m_latency.dump_json("", json_str); - // This is a hack. We need to make the dump_json return json object. - reader.parse( json_str.c_str(), json); + m_latency.dump_json(json); obj.set_latency_json(json); - obj.set_last_max(m_last_max.getMax()); }; void CCPortLatencyStl::reset() { @@ -76,6 +72,9 @@ void CRxCoreStateless::create(const CRxSlCfg &cfg) { m_ring_to_cp = cp_rx->getRingDpToCp(0); m_state = STATE_IDLE; + m_watchdog_handle = -1; + m_watchdog = NULL; + for (int i = 0; i < m_max_ports; i++) { CLatencyManagerPerPortStl * lp = &m_ports[i]; lp->m_io = cfg.m_ports[i]; @@ -93,7 +92,15 @@ void CRxCoreStateless::handle_cp_msg(TrexStatelessCpToRxMsgBase *msg) { delete msg; } +void CRxCoreStateless::tickle() { + m_watchdog->tickle(m_watchdog_handle); +} + bool CRxCoreStateless::periodic_check_for_cp_messages() { + + /* tickle the watchdog */ + tickle(); + /* fast path */ if ( likely ( m_ring_from_cp->isEmpty() ) ) { return false; @@ -140,11 +147,15 @@ void CRxCoreStateless::idle_state_loop() { } } -void CRxCoreStateless::start() { +void CRxCoreStateless::start(TrexWatchDog &watchdog) { int count = 0; int i = 0; bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable() ? true : false; + /* register a watchdog handle on current core */ + m_watchdog = &watchdog; + m_watchdog_handle = watchdog.register_monitor("STL RX CORE", 1); + while (true) { if (m_state == STATE_WORKING) { i++; @@ -167,6 +178,8 @@ void CRxCoreStateless::start() { count += try_rx(); } rte_pause(); + + m_watchdog->disable_monitor(m_watchdog_handle); } void CRxCoreStateless::handle_rx_pkt(CLatencyManagerPerPortStl *lp, rte_mbuf_t *m) { diff --git a/src/stateless/rx/trex_stateless_rx_core.h b/src/stateless/rx/trex_stateless_rx_core.h index d18356b6..ce1bc1ad 100644 --- a/src/stateless/rx/trex_stateless_rx_core.h +++ b/src/stateless/rx/trex_stateless_rx_core.h @@ -28,55 +28,6 @@ class TrexStatelessCpToRxMsgBase; -class CLastMax { - public: - CLastMax() { - m_max1 = 0; - m_max1 = 1; - m_choose = true; - } - - void update(dsec_t val) { - if (m_choose) { - if (val > m_max1) { - m_max1 = val; - sanb_smp_memory_barrier(); - } - } else { - if (val > m_max2) { - m_max2 = val; - sanb_smp_memory_barrier(); - } - } - } - - dsec_t getMax() { - if (m_choose) - return m_max2; - else - return m_max1; - } - - void switchMax() { - if (m_choose) { - m_max2 = 0; - m_choose = false; - sanb_smp_memory_barrier(); - } - else { - m_max1 = 0; - m_choose = true; - sanb_smp_memory_barrier(); - } - } - - private: - dsec_t m_max1; - dsec_t m_max2; - bool m_choose; -}; - - class CCPortLatencyStl { public: void reset(); @@ -112,12 +63,10 @@ class CRFC2544Info { void export_data(rfc2544_info_t_ &obj); inline void add_sample(double stime) { m_latency.Add(stime); - m_last_max.update(stime); m_jitter.calc(stime); } inline void sample_period_end() { m_latency.update(); - m_last_max.switchMax(); } inline uint32_t get_seq() {return m_seq;} inline void set_seq(uint32_t val) {m_seq = val;} @@ -136,7 +85,6 @@ class CRFC2544Info { uint64_t m_seq_err_events_too_low; // How many packet seq num lower than expected events we had uint64_t m_ooo; // Packets we got with seq num lower than expected (We guess they are out of order) uint64_t m_dup; // Packets we got with same seq num - CLastMax m_last_max; // maximum for last measurement period (reset whenever we read it). }; class CRxCoreStateless { @@ -147,7 +95,7 @@ class CRxCoreStateless { }; public: - void start(); + void start(TrexWatchDog &watchdog); void create(const CRxSlCfg &cfg); void reset_rx_stats(uint8_t port_id); int get_rx_stats(uint8_t port_id, rx_per_flow_t *rx_stats, int min, int max, bool reset @@ -165,6 +113,7 @@ class CRxCoreStateless { private: void handle_cp_msg(TrexStatelessCpToRxMsgBase *msg); bool periodic_check_for_cp_messages(); + void tickle(); void idle_state_loop(); void handle_rx_pkt(CLatencyManagerPerPortStl * lp, rte_mbuf_t * m); void handle_rx_queue_msgs(uint8_t thread_id, CNodeRing * r); @@ -176,6 +125,10 @@ class CRxCoreStateless { uint16_t get_hw_id(uint16_t id); private: + + TrexWatchDog *m_watchdog; + int m_watchdog_handle; + uint32_t m_max_ports; bool m_has_streams; CLatencyManagerPerPortStl m_ports[TREX_MAX_PORTS]; diff --git a/src/time_histogram.cpp b/src/time_histogram.cpp index 8a92cb6f..fefa59d6 100755 --- a/src/time_histogram.cpp +++ b/src/time_histogram.cpp @@ -66,10 +66,6 @@ bool CTimeHistogram::Add(dsec_t dt) { return false; } period_elem.inc_high_cnt(); - - if ( m_max_dt < dt) { - m_max_dt = dt; - } period_elem.update_max(dt); uint32_t d_10usec = (uint32_t)(dt*100000.0); @@ -118,6 +114,9 @@ void CTimeHistogram::update() { update_average(period_elem); m_total_cnt += period_elem.get_cnt(); m_total_cnt_high += period_elem.get_high_cnt(); + if ( m_max_dt < period_elem.get_max()) { + m_max_dt = period_elem.get_max(); + } } void CTimeHistogram::update_average(CTimeHistogramPerPeriodData &period_elem) { @@ -180,11 +179,7 @@ void CTimeHistogram::Dump(FILE *fd) { } } -/* - { "histogram" : [ {} ,{} ] } - -*/ - +// Used in statefull void CTimeHistogram::dump_json(std::string name,std::string & json ) { char buff[200]; if (name != "") @@ -222,3 +217,31 @@ void CTimeHistogram::dump_json(std::string name,std::string & json ) { } json+=" ] } ,"; } + +// Used in stateless +void CTimeHistogram::dump_json(Json::Value & json, bool add_histogram) { + int i, j; + uint32_t base=10; + CTimeHistogramPerPeriodData &period_elem = m_period_data[get_read_period_index()]; + + json["total_max"] = get_usec(m_max_dt); + json["last_max"] = get_usec(period_elem.get_max()); + json["average"] = get_average_latency(); + + if (add_histogram) { + for (j = 0; j < HISTOGRAM_SIZE_LOG; j++) { + for (i = 0; i < HISTOGRAM_SIZE; i++) { + if (m_hcnt[j][i] > 0) { + std::string key = static_cast<std::ostringstream*>( &(std::ostringstream() + << int(base * (i + 1)) ) )->str(); + json["histogram"][key] = Json::Value::UInt64(m_hcnt[j][i]); + } + } + base = base * 10; + } + if (m_total_cnt != m_total_cnt_high) { + json["histogram"]["0"] = Json::Value::UInt64(m_total_cnt - m_total_cnt_high); + } + } +} + diff --git a/src/time_histogram.h b/src/time_histogram.h index da70e677..0d532f1b 100755 --- a/src/time_histogram.h +++ b/src/time_histogram.h @@ -24,11 +24,12 @@ limitations under the License. #include <stdint.h> -#include "os_time.h" #include <stdio.h> #include <math.h> -#include "mbuf.h" #include <string> +#include <json/json.h> +#include "mbuf.h" +#include "os_time.h" class CTimeHistogramPerPeriodData { public: @@ -85,6 +86,7 @@ public: return period_elem.get_max_usec(); } void dump_json(std::string name,std::string & json ); + void dump_json(Json::Value & json, bool add_histogram = true); uint64_t get_count() {return m_total_cnt;} uint64_t get_high_count() {return m_total_cnt_high;} diff --git a/src/trex_watchdog.cpp b/src/trex_watchdog.cpp new file mode 100644 index 00000000..e78e8e6d --- /dev/null +++ b/src/trex_watchdog.cpp @@ -0,0 +1,331 @@ +/* + Itay Marom + Cisco Systems, Inc. +*/ + +/* +Copyright (c) 2015-2015 Cisco Systems, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "trex_watchdog.h" +#include "trex_exception.h" + +#include <assert.h> +#include <unistd.h> +#include <sstream> + +#include <sys/ptrace.h> +#include <execinfo.h> +#include <cxxabi.h> +#include <dlfcn.h> +#include <pthread.h> +#include <signal.h> +#include <string.h> +#include <iostream> +#include <stdexcept> + +#define DISABLE_WATCHDOG_ON_GDB + +static TrexWatchDog::monitor_st *global_monitor; + +const char *get_exe_name(); + +std::string exec(const char* cmd) { + char buffer[128]; + std::string result = ""; + std::shared_ptr<FILE> pipe(popen(cmd, "r"), pclose); + if (!pipe) throw std::runtime_error("popen() failed!"); + while (!feof(pipe.get())) { + if (fgets(buffer, 128, pipe.get()) != NULL) { + result += buffer; + } + } + return result; +} + +// This function produces a stack backtrace with demangled function & method names. +__attribute__((noinline)) +std::string Backtrace(int skip = 1) +{ + void *callstack[128]; + const int nMaxFrames = sizeof(callstack) / sizeof(callstack[0]); + char buf[1024]; + int nFrames = backtrace(callstack, nMaxFrames); + char **symbols = backtrace_symbols(callstack, nFrames); + + std::ostringstream trace_buf; + for (int i = skip; i < nFrames; i++) { + + Dl_info info; + if (dladdr(callstack[i], &info) && info.dli_sname) { + char *demangled = NULL; + int status = -1; + if (info.dli_sname[0] == '_') + demangled = abi::__cxa_demangle(info.dli_sname, NULL, 0, &status); + snprintf(buf, sizeof(buf), "%-3d %*p %s + %zd\n", + i, int(2 + sizeof(void*) * 2), callstack[i], + status == 0 ? demangled : + info.dli_sname == 0 ? symbols[i] : info.dli_sname, + (char *)callstack[i] - (char *)info.dli_saddr); + free(demangled); + } else { + snprintf(buf, sizeof(buf), "%-3d %*p %s\n", + i, int(2 + sizeof(void*) * 2), callstack[i], symbols[i]); + } + trace_buf << buf; + } + free(symbols); + if (nFrames == nMaxFrames) + trace_buf << "[truncated]\n"; + + /* add the addr2line info */ + std::stringstream addr2line; + + addr2line << "/usr/bin/addr2line -e " << get_exe_name() << " "; + for (int i = skip; i < nFrames; i++) { + addr2line << callstack[i] << " "; + } + + trace_buf << "\n\n*** addr2line information follows ***\n\n"; + try { + trace_buf << exec(addr2line.str().c_str()); + } catch (std::runtime_error &e) { + trace_buf << "\n" << e.what(); + } + + return trace_buf.str(); +} + +__attribute__((noinline)) +static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret) { + std::stringstream ss; + + double now = now_sec(); + + ss << "WATCHDOG: task '" << global_monitor->name << "' has not responded for more than " << (now - global_monitor->ts) << " seconds - timeout is " << global_monitor->timeout_sec << " seconds"; + + std::string backtrace = Backtrace(); + ss << "\n\n*** traceback follows ***\n\n" << backtrace << "\n"; + + throw std::runtime_error(ss.str()); +} + +void TrexWatchDog::mark_pending_monitor(int count) { + std::unique_lock<std::mutex> lock(m_lock); + m_pending += count; + lock.unlock(); +} + +void TrexWatchDog::block_on_pending(int max_block_time_ms) { + + int timeout_msec = max_block_time_ms; + + std::unique_lock<std::mutex> lock(m_lock); + + while (m_pending > 0) { + + lock.unlock(); + delay(1); + lock.lock(); + + timeout_msec -= 1; + if (timeout_msec == 0) { + throw TrexException("WATCHDOG: block on pending monitors timed out"); + } + } + + /* lock will be released */ +} + +/** + * register a monitor + * must be called from the relevant thread + * + * this function is thread safe + * + * @author imarom (01-Jun-16) + * + * @param name + * @param timeout_sec + * + * @return int + */ +int TrexWatchDog::register_monitor(const std::string &name, double timeout_sec) { + monitor_st monitor; + + /* cannot add monitors while active */ + assert(m_active == false); + + monitor.active = true; + monitor.tid = pthread_self(); + monitor.name = name; + monitor.timeout_sec = timeout_sec; + monitor.tickled = true; + monitor.ts = 0; + + /* critical section start */ + std::unique_lock<std::mutex> lock(m_lock); + + /* make sure no double register */ + for (auto &m : m_monitors) { + if (m.tid == pthread_self()) { + std::stringstream ss; + ss << "WATCHDOG: double register detected\n\n" << Backtrace(); + throw TrexException(ss.str()); + } + } + + monitor.handle = m_monitors.size(); + m_monitors.push_back(monitor); + + assert(m_pending > 0); + m_pending--; + + /* critical section end */ + lock.unlock(); + + return monitor.handle; +} + +/** + * will disable the monitor - it will no longer be watched + * + */ +void TrexWatchDog::disable_monitor(int handle) { + assert(handle < m_monitors.size()); + + m_monitors[handle].active = false; +} + +/** + * thread safe function + * + */ +void TrexWatchDog::tickle(int handle) { + + assert(handle < m_monitors.size()); + + /* not nesscary but write gets cache invalidate for nothing */ + if (m_monitors[handle].tickled) { + return; + } + + m_monitors[handle].tickled = true; +} + +void TrexWatchDog::register_signal() { + + /* do this once */ + if (g_signal_init) { + return; + } + + /* register a handler on SIG ALARM */ + struct sigaction sa; + memset (&sa, '\0', sizeof(sa)); + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = _callstack_signal_handler; + + int rc = sigaction(SIGALRM , &sa, NULL); + assert(rc == 0); + + g_signal_init = true; +} + +void TrexWatchDog::start() { + + block_on_pending(); + + /* no pending monitors */ + assert(m_pending == 0); + + /* under GDB - disable the watchdog */ + #ifdef DISABLE_WATCHDOG_ON_GDB + if (ptrace(PTRACE_TRACEME, 0, NULL, 0) == -1) { + printf("\n\n*** GDB detected - disabling watchdog... ***\n\n"); + return; + } + #endif + + m_active = true; + m_thread = new std::thread(&TrexWatchDog::_main, this); + if (!m_thread) { + throw TrexException("unable to create watchdog thread"); + } +} + +void TrexWatchDog::stop() { + m_active = false; + + if (m_thread) { + m_thread->join(); + delete m_thread; + m_thread = NULL; + } +} + + + +/** + * main loop + * + */ +void TrexWatchDog::_main() { + + /* reset all the monitors */ + for (auto &monitor : m_monitors) { + monitor.tickled = true; + } + + /* start main loop */ + while (m_active) { + + dsec_t now = now_sec(); + + for (auto &monitor : m_monitors) { + + /* skip non active monitors */ + if (!monitor.active) { + continue; + } + + /* if its own - turn it off and write down the time */ + if (monitor.tickled) { + monitor.tickled = false; + monitor.ts = now; + continue; + } + + /* the bit is off - check the time first */ + if ( (now - monitor.ts) > monitor.timeout_sec ) { + global_monitor = &monitor; + + pthread_kill(monitor.tid, SIGALRM); + + /* nothing to do more... the other thread will terminate, but if not - we terminate */ + sleep(5); + printf("\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor.name.c_str()); + exit(1); + } + + } + + /* the internal clock - 250 ms */ + delay(250); + } +} + +bool TrexWatchDog::g_signal_init = false; diff --git a/src/trex_watchdog.h b/src/trex_watchdog.h new file mode 100644 index 00000000..63255180 --- /dev/null +++ b/src/trex_watchdog.h @@ -0,0 +1,141 @@ +/* + Itay Marom + Cisco Systems, Inc. +*/ + +/* +Copyright (c) 2015-2015 Cisco Systems, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef __TREX_WATCHDOG_H__ +#define __TREX_WATCHDOG_H__ + +#include <string> +#include <vector> +#include <thread> +#include <mutex> + +//#include "rte_memory.h" +#include "mbuf.h" +#include "os_time.h" + +class TrexWatchDog { +public: + TrexWatchDog() { + m_thread = NULL; + m_active = false; + m_pending = 0; + + register_signal(); + } + + /** + * registering a monitor happens from another thread + * this make sure that start will be able to block until + * all threads has registered + * + * @author imarom (01-Jun-16) + */ + void mark_pending_monitor(int count = 1); + + + /** + * blocks while monitors are pending registeration + * + * @author imarom (01-Jun-16) + */ + void block_on_pending(int max_block_time_ms = 200); + + + /** + * add a monitor to the watchdog + * this thread will be monitored and if timeout + * has passed without calling tick - an exception will be called + * + * @author imarom (31-May-16) + * + * @param name + * @param timeout + * + * @return int + */ + int register_monitor(const std::string &name, double timeout_sec); + + + /** + * disable a monitor - it will no longer be watched + * + */ + void disable_monitor(int handle); + + + /** + * should be called by each thread on it's handle + * + * @author imarom (31-May-16) + * + * @param handle + */ + void tickle(int handle); + + + /** + * start the watchdog + * + */ + void start(); + + + /** + * stop the watchdog + * + */ + void stop(); + + + /* should be cache aligned to avoid false sharing */ + struct monitor_st { + /* write fields are first */ + volatile bool active; + volatile bool tickled; + dsec_t ts; + + int handle; + double timeout_sec; + pthread_t tid; + std::string name; + + /* for for a full cacheline */ + uint8_t pad[15]; + }; + + +private: + void register_signal(); + void _main(); + + std::vector<monitor_st> m_monitors __rte_cache_aligned; + std::mutex m_lock; + + volatile bool m_active; + std::thread *m_thread; + volatile int m_pending; + + static bool g_signal_init; +}; + +static_assert(sizeof(TrexWatchDog::monitor_st) >= RTE_CACHE_LINE_SIZE, "sizeof(monitor_st) != RTE_CACHE_LINE_SIZE" ); + +#endif /* __TREX_WATCHDOG_H__ */ |