summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xlinux/ws_main.py6
-rwxr-xr-xlinux_dpdk/ws_main.py3
-rw-r--r--scripts/automation/regression/functional_tests/stl_basic_tests.py4
-rw-r--r--scripts/automation/regression/setups/trex17/benchmark.yaml2
-rwxr-xr-xscripts/automation/regression/trex_unit_test.py17
-rwxr-xr-xscripts/automation/trex_control_plane/server/singleton_daemon.py39
-rwxr-xr-xscripts/automation/trex_control_plane/server/trex_launch_thread.py3
-rwxr-xr-xscripts/automation/trex_control_plane/server/trex_server.py27
-rw-r--r--scripts/automation/trex_control_plane/stl/examples/stl_pcap.py4
-rw-r--r--scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_sim.py15
-rw-r--r--scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py27
-rw-r--r--scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py15
-rw-r--r--scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py15
-rwxr-xr-xscripts/ko/src/igb_uio.c115
-rwxr-xr-xscripts/master_daemon.py70
-rwxr-xr-xsrc/bp_gtest.cpp2
-rwxr-xr-xsrc/bp_sim.cpp9
-rwxr-xr-xsrc/bp_sim.h9
-rw-r--r--src/flow_stat.cpp30
-rw-r--r--src/flow_stat.h11
-rw-r--r--src/latency.cpp26
-rw-r--r--src/latency.h8
-rwxr-xr-xsrc/main.cpp7
-rw-r--r--src/main_dpdk.cpp43
-rw-r--r--src/rpc-server/trex_rpc_async_server.cpp2
-rw-r--r--src/rpc-server/trex_rpc_async_server.h2
-rw-r--r--src/rpc-server/trex_rpc_req_resp_server.cpp39
-rw-r--r--src/rpc-server/trex_rpc_req_resp_server.h2
-rw-r--r--src/rpc-server/trex_rpc_server.cpp17
-rw-r--r--src/rpc-server/trex_rpc_server_api.h19
-rw-r--r--src/sim/trex_sim_stateless.cpp2
-rw-r--r--src/stateless/cp/trex_stateless.cpp2
-rw-r--r--src/stateless/cp/trex_stateless.h3
-rw-r--r--src/stateless/dp/trex_stateless_dp_core.cpp1
-rw-r--r--src/stateless/rx/trex_stateless_rx_core.cpp25
-rw-r--r--src/stateless/rx/trex_stateless_rx_core.h59
-rwxr-xr-xsrc/time_histogram.cpp41
-rwxr-xr-xsrc/time_histogram.h6
-rw-r--r--src/trex_watchdog.cpp331
-rw-r--r--src/trex_watchdog.h141
40 files changed, 888 insertions, 311 deletions
diff --git a/linux/ws_main.py b/linux/ws_main.py
index 58f5b661..3aee05db 100755
--- a/linux/ws_main.py
+++ b/linux/ws_main.py
@@ -121,7 +121,7 @@ main_src = SrcGroup(dir='src',
'latency.cpp',
'flow_stat.cpp',
'flow_stat_parser.cpp',
-
+ 'trex_watchdog.cpp',
'pal/linux/pal_utl.cpp',
'pal/linux/mbuf.cpp',
'sim/trex_sim_stateless.cpp',
@@ -371,7 +371,7 @@ class build_option:
def get_link_flags(self):
# add here basic flags
- base_flags = ['-pthread'];
+ base_flags = [];
if self.isPIE():
base_flags.append('-lstdc++')
@@ -410,7 +410,7 @@ def build_prog (bld, build_obj):
linkflags = build_obj.get_link_flags(),
source = build_obj.get_src(),
use = build_obj.get_use_libs(),
- lib = ['z'],
+ lib = ['pthread', 'z', 'dl'],
rpath = bld.env.RPATH + build_obj.get_rpath(),
target = build_obj.get_target())
diff --git a/linux_dpdk/ws_main.py b/linux_dpdk/ws_main.py
index 4c2d3c51..a13c8fd7 100755
--- a/linux_dpdk/ws_main.py
+++ b/linux_dpdk/ws_main.py
@@ -98,6 +98,7 @@ main_src = SrcGroup(dir='src',
'utl_term_io.cpp',
'global_io_mode.cpp',
'main_dpdk.cpp',
+ 'trex_watchdog.cpp',
'debug.cpp',
'flow_stat.cpp',
'flow_stat_parser.cpp',
@@ -665,7 +666,7 @@ class build_option:
return (flags)
def get_link_flags(self):
- base_flags = [];
+ base_flags = ['-rdynamic'];
if self.is64Platform():
base_flags += ['-m64'];
base_flags += ['-lrt'];
diff --git a/scripts/automation/regression/functional_tests/stl_basic_tests.py b/scripts/automation/regression/functional_tests/stl_basic_tests.py
index dbbf2530..863307f1 100644
--- a/scripts/automation/regression/functional_tests/stl_basic_tests.py
+++ b/scripts/automation/regression/functional_tests/stl_basic_tests.py
@@ -119,9 +119,9 @@ class CStlBasic_Test(functional_general_test.CGeneralFunctional_Test):
def run_sim (self, yaml, output, options = "", silent = False, obj = None):
if output:
- user_cmd = "-f {0} -o {1} {2}".format(yaml, output, options)
+ user_cmd = "-f {0} -o {1} {2} -p {3}".format(yaml, output, options, self.scripts_path)
else:
- user_cmd = "-f {0} {1}".format(yaml, options)
+ user_cmd = "-f {0} {1} -p {2}".format(yaml, options, self.scripts_path)
if silent:
user_cmd += " --silent"
diff --git a/scripts/automation/regression/setups/trex17/benchmark.yaml b/scripts/automation/regression/setups/trex17/benchmark.yaml
index c6f588e6..3e1f7c9b 100644
--- a/scripts/automation/regression/setups/trex17/benchmark.yaml
+++ b/scripts/automation/regression/setups/trex17/benchmark.yaml
@@ -23,7 +23,7 @@ test_routing_imix_64:
test_static_routing_imix_asymmetric:
- multiplier : 0.8
+ multiplier : 0.7
cores : 1
bw_per_core : 9.635
diff --git a/scripts/automation/regression/trex_unit_test.py b/scripts/automation/regression/trex_unit_test.py
index 0762fc95..83650164 100755
--- a/scripts/automation/regression/trex_unit_test.py
+++ b/scripts/automation/regression/trex_unit_test.py
@@ -203,11 +203,10 @@ class CTRexTestConfiguringPlugin(Plugin):
print('Could not restart TRex daemon server')
sys.exit(-1)
- trex_cmds = CTRexScenario.trex.get_trex_cmds()
- if trex_cmds:
- if self.kill_running:
- CTRexScenario.trex.kill_all_trexes()
- else:
+ if self.kill_running:
+ CTRexScenario.trex.kill_all_trexes()
+ else:
+ if CTRexScenario.trex.get_trex_cmds():
print('TRex is already running')
sys.exit(-1)
@@ -238,11 +237,11 @@ class CTRexTestConfiguringPlugin(Plugin):
if self.stateful:
CTRexScenario.trex = None
if self.stateless:
- if not self.no_daemon:
+ if self.no_daemon:
+ if CTRexScenario.stl_trex and CTRexScenario.stl_trex.is_connected():
+ CTRexScenario.stl_trex.disconnect()
+ else:
CTRexScenario.trex.force_kill(False)
- if CTRexScenario.stl_trex and CTRexScenario.stl_trex.is_connected():
- CTRexScenario.stl_trex.disconnect()
- #time.sleep(3)
CTRexScenario.stl_trex = None
diff --git a/scripts/automation/trex_control_plane/server/singleton_daemon.py b/scripts/automation/trex_control_plane/server/singleton_daemon.py
index 7cfbc3bc..8fdedc6e 100755
--- a/scripts/automation/trex_control_plane/server/singleton_daemon.py
+++ b/scripts/automation/trex_control_plane/server/singleton_daemon.py
@@ -6,10 +6,12 @@ import tempfile
import types
from subprocess import Popen
from time import sleep
+import outer_packages
+import jsonrpclib
# uses Unix sockets for determine running process.
# (assumes used daemons will register proper socket)
-# all daemons should use -p argument as listening tcp port
+# all daemons should use -p argument as listening tcp port and check_connectivity RPC method
class SingletonDaemon(object):
# run_cmd can be function of how to run daemon or a str to run at subprocess
@@ -28,14 +30,14 @@ class SingletonDaemon(object):
# returns True if daemon is running
def is_running(self):
- lock_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
try:
- lock_socket.bind('\0' + self.tag) # the check is ~200000 faster and more reliable than checking via 'netstat' or 'ps' etc.
+ lock_socket = register_socket(self.tag) # the check is ~200000 faster and more reliable than checking via 'netstat' or 'ps' etc.
+ lock_socket.shutdown(socket.SHUT_RDWR)
lock_socket.close()
except socket.error: # Unix socket in use
return True
# Unix socket is not used, but maybe it's old version of daemon not using socket
- return bool(self.get_pid())
+ return bool(self.get_pid_by_listening_port())
# get pid of running daemon by registered Unix socket (most robust way)
@@ -73,7 +75,7 @@ class SingletonDaemon(object):
# kill daemon
- def kill(self, timeout = 5):
+ def kill(self, timeout = 10):
pid = self.get_pid()
if not pid:
return False
@@ -88,17 +90,27 @@ class SingletonDaemon(object):
ret_code, stdout, stderr = run_command('kill -9 %s' % pid) # unconditional kill
if ret_code:
raise Exception('Failed to run kill -9 command for %s: %s' % (self.name, [ret_code, stdout, stderr]))
- poll_rate = 0.1
- for i in range(inr(timeout / poll_rate)):
+ for i in range(int(timeout / poll_rate)):
if not self.is_running():
return True
sleep(poll_rate)
raise Exception('Could not kill %s, even with -9' % self.name)
+ # try connection as RPC client, return True upon success, False if fail
+ def check_connectivity(self, timeout = 5):
+ daemon = jsonrpclib.Server('http://127.0.0.1:%s/' % self.port)
+ poll_rate = 0.1
+ for i in range(int(timeout/poll_rate)):
+ try:
+ daemon.check_connectivity()
+ return True
+ except socket.error: # daemon is not up yet
+ sleep(poll_rate)
+ return False
# start daemon
# returns True if success, False if already running
- def start(self, timeout = 5):
+ def start(self, timeout = 20):
if self.is_running():
raise Exception('%s is already running' % self.name)
if not self.run_cmd:
@@ -112,6 +124,8 @@ class SingletonDaemon(object):
if timeout > 0:
poll_rate = 0.1
for i in range(int(timeout/poll_rate)):
+ if self.is_running():
+ break
sleep(poll_rate)
if bool(proc.poll()): # process ended with error
stdout_file.seek(0)
@@ -120,7 +134,9 @@ class SingletonDaemon(object):
elif proc.poll() == 0: # process runs other process, and ended
break
if self.is_running():
- return True
+ if self.check_connectivity():
+ return True
+ raise Exception('Daemon process is running, but no connectivity')
raise Exception('%s failed to run.' % self.name)
# restart the daemon
@@ -136,8 +152,9 @@ def register_socket(tag):
lock_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
try:
lock_socket.bind('\0%s' % tag)
+ return lock_socket
except socket.error:
- raise Exception('Error: process with tag %s is already running.' % tag)
+ raise socket.error('Error: process with tag %s is already running.' % tag)
# runs command
def run_command(command, timeout = 15, cwd = None):
@@ -153,6 +170,8 @@ def run_command(command, timeout = 15, cwd = None):
if proc.poll() is None:
proc.kill() # timeout
return (errno.ETIME, '', 'Timeout on running: %s' % command)
+ else:
+ proc.wait()
stdout_file.seek(0)
stderr_file.seek(0)
return (proc.returncode, stdout_file.read().decode(errors = 'replace'), stderr_file.read().decode(errors = 'replace'))
diff --git a/scripts/automation/trex_control_plane/server/trex_launch_thread.py b/scripts/automation/trex_control_plane/server/trex_launch_thread.py
index 74ce1750..82a7f996 100755
--- a/scripts/automation/trex_control_plane/server/trex_launch_thread.py
+++ b/scripts/automation/trex_control_plane/server/trex_launch_thread.py
@@ -6,6 +6,7 @@ import signal
import socket
from common.trex_status_e import TRexStatus
import subprocess
+import shlex
import time
import threading
import logging
@@ -32,7 +33,7 @@ class AsynchronousTRexSession(threading.Thread):
with open(os.devnull, 'w') as DEVNULL:
self.time_stamps['start'] = self.time_stamps['run_time'] = time.time()
- self.session = subprocess.Popen("exec "+self.cmd, cwd = self.launch_path, shell=True, stdin = DEVNULL, stderr = subprocess.PIPE, preexec_fn=os.setsid)
+ self.session = subprocess.Popen(shlex.split(self.cmd), cwd = self.launch_path, stdin = DEVNULL, stderr = subprocess.PIPE, preexec_fn=os.setsid, close_fds = True)
logger.info("TRex session initialized successfully, Parent process pid is {pid}.".format( pid = self.session.pid ))
while self.session.poll() is None: # subprocess is NOT finished
time.sleep(0.5)
diff --git a/scripts/automation/trex_control_plane/server/trex_server.py b/scripts/automation/trex_control_plane/server/trex_server.py
index 45ef9ac1..8f7e99f0 100755
--- a/scripts/automation/trex_control_plane/server/trex_server.py
+++ b/scripts/automation/trex_control_plane/server/trex_server.py
@@ -30,9 +30,9 @@ import shlex
import tempfile
try:
- from .singleton_daemon import register_socket
+ from .singleton_daemon import register_socket, run_command
except:
- from singleton_daemon import register_socket
+ from singleton_daemon import register_socket, run_command
# setup the logger
@@ -134,6 +134,7 @@ class CTRexServer(object):
self.server.register_function(self.add)
self.server.register_function(self.cancel_reservation)
self.server.register_function(self.connectivity_check)
+ self.server.register_function(self.connectivity_check, 'check_connectivity') # alias
self.server.register_function(self.force_trex_kill)
self.server.register_function(self.get_file)
self.server.register_function(self.get_files_list)
@@ -164,16 +165,6 @@ class CTRexServer(object):
self.server.shutdown()
#self.server.server_close()
- def _run_command(self, command, timeout = 15, cwd = None):
- if timeout:
- command = 'timeout %s %s' % (timeout, command)
- # pipes might stuck, even with timeout
- with tempfile.TemporaryFile() as stdout_file, tempfile.TemporaryFile() as stderr_file:
- proc = subprocess.Popen(shlex.split(command), stdout=stdout_file, stderr=stderr_file, cwd = cwd)
- proc.wait()
- stdout_file.seek(0)
- stderr_file.seek(0)
- return (proc.returncode, stdout_file.read().decode(errors = 'replace'), stderr_file.read().decode(errors = 'replace'))
# get files from Trex server and return their content (mainly for logs)
@staticmethod
@@ -234,7 +225,7 @@ class CTRexServer(object):
try:
logger.info("Processing get_trex_version() command.")
if not self.trex_version:
- ret_code, stdout, stderr = self._run_command('./t-rex-64 --help', cwd = self.TREX_PATH, timeout = 0)
+ ret_code, stdout, stderr = run_command('./t-rex-64 --help', cwd = self.TREX_PATH)
search_result = re.search('\n\s*(Version\s*:.+)', stdout, re.DOTALL)
if not search_result:
raise Exception('Could not determine version from ./t-rex-64 --help')
@@ -383,7 +374,7 @@ class CTRexServer(object):
# returns list of tuples (pid, command line) of running TRex(es)
def get_trex_cmds(self):
logger.info('Processing get_trex_cmds() command.')
- ret_code, stdout, stderr = self._run_command('ps -u root --format pid,comm,cmd')
+ ret_code, stdout, stderr = run_command('ps -u root --format pid,comm,cmd')
if ret_code:
raise Exception('Failed to determine running processes, stderr: %s' % stderr)
trex_cmds_list = []
@@ -403,12 +394,12 @@ class CTRexServer(object):
return False
for pid, cmd in trex_cmds_list:
logger.info('Killing process %s %s' % (pid, cmd))
- self._run_command('kill %s' % pid)
- ret_code_ps, _, _ = self._run_command('ps -p %s' % pid)
+ run_command('kill %s' % pid)
+ ret_code_ps, _, _ = run_command('ps -p %s' % pid)
if not ret_code_ps:
logger.info('Killing with -9.')
- self._run_command('kill -9 %s' % pid)
- ret_code_ps, _, _ = self._run_command('ps -p %s' % pid)
+ run_command('kill -9 %s' % pid)
+ ret_code_ps, _, _ = run_command('ps -p %s' % pid)
if not ret_code_ps:
logger.info('Could not kill process.')
raise Exception('Could not kill process %s %s' % (pid, cmd))
diff --git a/scripts/automation/trex_control_plane/stl/examples/stl_pcap.py b/scripts/automation/trex_control_plane/stl/examples/stl_pcap.py
index eae0f18b..98af6134 100644
--- a/scripts/automation/trex_control_plane/stl/examples/stl_pcap.py
+++ b/scripts/automation/trex_control_plane/stl/examples/stl_pcap.py
@@ -39,14 +39,12 @@ def inject_pcap (pcap_file, server, port, loop_count, ipg_usec, use_vm, remove_f
c.reset(ports = [port])
c.clear_stats()
- d = c.push_pcap(pcap_file,
+ c.push_pcap(pcap_file,
ipg_usec = ipg_usec,
count = loop_count,
vm = vm,
packet_hook = packet_hook)
- STLSim().run(d, outfile = 'test.cap')
-
c.wait_on_traffic()
diff --git a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_sim.py b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_sim.py
index 11e80b9a..62724e64 100644
--- a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_sim.py
+++ b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_sim.py
@@ -40,18 +40,11 @@ class BpSimException(Exception):
# stateless simulation
class STLSim(object):
- def __init__ (self, bp_sim_path = None, handler = 0, port_id = 0, api_h = "dummy"):
+ def __init__ (self, bp_sim_path, handler = 0, port_id = 0, api_h = "dummy"):
- if not bp_sim_path:
- # auto find scripts
- m = re.match(".*/trex-core", os.getcwd())
- if not m:
- raise STLError('cannot find BP sim path, please provide it')
-
- self.bp_sim_path = os.path.join(m.group(0), 'scripts')
-
- else:
- self.bp_sim_path = bp_sim_path
+ self.bp_sim_path = os.path.abspath(bp_sim_path)
+ if not os.path.exists(self.bp_sim_path):
+ raise STLError('BP sim path %s does not exist' % self.bp_sim_path)
# dummies
self.handler = handler
diff --git a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py
index 94a45577..0ec98a0d 100644
--- a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py
+++ b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py
@@ -1020,19 +1020,20 @@ class CLatencyStats(CTRexStats):
output[int_pg_id]['err_cntrs'] = current_pg['err_cntrs']
output[int_pg_id]['latency'] = {}
- output[int_pg_id]['latency']['last_max'] = current_pg['latency']['last_max']
- output[int_pg_id]['latency']['jitter'] = current_pg['latency']['jitter']
- if current_pg['latency']['h'] != "":
- output[int_pg_id]['latency']['average'] = current_pg['latency']['h']['s_avg']
- output[int_pg_id]['latency']['total_max'] = current_pg['latency']['h']['max_usec']
- output[int_pg_id]['latency']['histogram'] = {elem['key']: elem['val']
- for elem in current_pg['latency']['h']['histogram']}
- zero_count = current_pg['latency']['h']['cnt'] - current_pg['latency']['h']['high_cnt']
- if zero_count != 0:
- output[int_pg_id]['latency']['total_min'] = 1
- output[int_pg_id]['latency']['histogram'][0] = zero_count
- elif output[int_pg_id]['latency']['histogram']:
- output[int_pg_id]['latency']['total_min'] = min(output[int_pg_id]['latency']['histogram'].keys())
+ if 'latency' in current_pg:
+ for field in ['jitter', 'average', 'total_max', 'last_max']:
+ if field in current_pg['latency']:
+ output[int_pg_id]['latency'][field] = current_pg['latency'][field]
+ else:
+ output[int_pg_id]['latency'][field] = StatNotAvailable(field)
+
+ if 'histogram' in current_pg['latency']:
+ output[int_pg_id]['latency']['histogram'] = {int(elem): current_pg['latency']['histogram'][elem]
+ for elem in current_pg['latency']['histogram']}
+ min_val = min(output[int_pg_id]['latency']['histogram'].keys())
+ if min_val == 0:
+ min_val = 2
+ output[int_pg_id]['latency']['total_min'] = min_val
else:
output[int_pg_id]['latency']['total_min'] = StatNotAvailable('total_min')
diff --git a/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py b/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py
index 32f0a2d1..729ab3fd 100644
--- a/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py
+++ b/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py
@@ -69,13 +69,14 @@ RTF_REJECT = 0x0200
LOOPBACK_NAME="lo"
-with os.popen("tcpdump -V 2> /dev/null") as _f:
- if _f.close() >> 8 == 0x7f:
- log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH")
- TCPDUMP=0
- else:
- TCPDUMP=1
-del(_f)
+#with os.popen("tcpdump -V 2> /dev/null") as _f:
+# if _f.close() >> 8 == 0x7f:
+# log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH")
+# TCPDUMP=0
+# else:
+# TCPDUMP=1
+#del(_f)
+TCPDUMP=0
def get_if_raw_hwaddr(iff):
diff --git a/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py b/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py
index 3eab16c6..40ff9e35 100644
--- a/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py
+++ b/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py
@@ -71,13 +71,14 @@ PCAP_ERRBUF_SIZE=256
LOOPBACK_NAME="lo"
-with os.popen("tcpdump -V 2> /dev/null") as _f:
- if _f.close() >> 8 == 0x7f:
- log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH")
- TCPDUMP=0
- else:
- TCPDUMP=1
-del(_f)
+#with os.popen("tcpdump -V 2> /dev/null") as _f:
+# if _f.close() >> 8 == 0x7f:
+# log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH")
+# TCPDUMP=0
+# else:
+# TCPDUMP=1
+#del(_f)
+TCPDUMP=0
def get_if_raw_hwaddr(iff):
diff --git a/scripts/ko/src/igb_uio.c b/scripts/ko/src/igb_uio.c
index faeb0b68..27bec6a3 100755
--- a/scripts/ko/src/igb_uio.c
+++ b/scripts/ko/src/igb_uio.c
@@ -31,6 +31,7 @@
#include <linux/io.h>
#include <linux/msi.h>
#include <linux/version.h>
+#include <linux/slab.h>
#ifdef CONFIG_XEN_DOM0
#include <xen/xen.h>
@@ -39,15 +40,6 @@
#include "compat.h"
-#ifdef RTE_PCI_CONFIG
-#define PCI_SYS_FILE_BUF_SIZE 10
-#define PCI_DEV_CAP_REG 0xA4
-#define PCI_DEV_CTRL_REG 0xA8
-#define PCI_DEV_CAP_EXT_TAG_MASK 0x20
-#define PCI_DEV_CTRL_EXT_TAG_SHIFT 8
-#define PCI_DEV_CTRL_EXT_TAG_MASK (1 << PCI_DEV_CTRL_EXT_TAG_SHIFT)
-#endif
-
/**
* A structure describing the private information for a uio device.
*/
@@ -57,22 +49,15 @@ struct rte_uio_pci_dev {
enum rte_intr_mode mode;
};
-static char *intr_mode = NULL;
+static char *intr_mode;
static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
-static inline struct rte_uio_pci_dev *
-igbuio_get_uio_pci_dev(struct uio_info *info)
-{
- return container_of(info, struct rte_uio_pci_dev, info);
-}
-
/* sriov sysfs */
static ssize_t
show_max_vfs(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, 10, "%u\n",
- pci_num_vf(container_of(dev, struct pci_dev, dev)));
+ return snprintf(buf, 10, "%u\n", dev_num_vf(dev));
}
static ssize_t
@@ -81,7 +66,7 @@ store_max_vfs(struct device *dev, struct device_attribute *attr,
{
int err = 0;
unsigned long max_vfs;
- struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+ struct pci_dev *pdev = to_pci_dev(dev);
if (0 != kstrtoul(buf, 0, &max_vfs))
return -EINVAL;
@@ -100,19 +85,9 @@ store_max_vfs(struct device *dev, struct device_attribute *attr,
static ssize_t
show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf)
{
- struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev);
- uint32_t val = 0;
-
- pci_read_config_dword(pci_dev, PCI_DEV_CAP_REG, &val);
- if (!(val & PCI_DEV_CAP_EXT_TAG_MASK)) /* Not supported */
- return snprintf(buf, PCI_SYS_FILE_BUF_SIZE, "%s\n", "invalid");
-
- val = 0;
- pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn,
- PCI_DEV_CTRL_REG, &val);
+ dev_info(dev, "Deprecated\n");
- return snprintf(buf, PCI_SYS_FILE_BUF_SIZE, "%s\n",
- (val & PCI_DEV_CTRL_EXT_TAG_MASK) ? "on" : "off");
+ return 0;
}
static ssize_t
@@ -121,36 +96,9 @@ store_extended_tag(struct device *dev,
const char *buf,
size_t count)
{
- struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev);
- uint32_t val = 0, enable;
-
- if (strncmp(buf, "on", 2) == 0)
- enable = 1;
- else if (strncmp(buf, "off", 3) == 0)
- enable = 0;
- else
- return -EINVAL;
-
- pci_cfg_access_lock(pci_dev);
- pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn,
- PCI_DEV_CAP_REG, &val);
- if (!(val & PCI_DEV_CAP_EXT_TAG_MASK)) { /* Not supported */
- pci_cfg_access_unlock(pci_dev);
- return -EPERM;
- }
-
- val = 0;
- pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn,
- PCI_DEV_CTRL_REG, &val);
- if (enable)
- val |= PCI_DEV_CTRL_EXT_TAG_MASK;
- else
- val &= ~PCI_DEV_CTRL_EXT_TAG_MASK;
- pci_bus_write_config_dword(pci_dev->bus, pci_dev->devfn,
- PCI_DEV_CTRL_REG, val);
- pci_cfg_access_unlock(pci_dev);
+ dev_info(dev, "Deprecated\n");
- return count;
+ return 0;
}
static ssize_t
@@ -158,10 +106,9 @@ show_max_read_request_size(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev);
- int val = pcie_get_readrq(pci_dev);
+ dev_info(dev, "Deprecated\n");
- return snprintf(buf, PCI_SYS_FILE_BUF_SIZE, "%d\n", val);
+ return 0;
}
static ssize_t
@@ -170,18 +117,9 @@ store_max_read_request_size(struct device *dev,
const char *buf,
size_t count)
{
- struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev);
- unsigned long size = 0;
- int ret;
-
- if (0 != kstrtoul(buf, 0, &size))
- return -EINVAL;
-
- ret = pcie_set_readrq(pci_dev, (int)size);
- if (ret < 0)
- return ret;
+ dev_info(dev, "Deprecated\n");
- return count;
+ return 0;
}
#endif
@@ -243,7 +181,7 @@ igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
static int
igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
{
- struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info);
+ struct rte_uio_pci_dev *udev = info->priv;
struct pci_dev *pdev = udev->pdev;
pci_cfg_access_lock(pdev);
@@ -253,8 +191,13 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
else if (udev->mode == RTE_INTR_MODE_MSIX) {
struct msi_desc *desc;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0))
list_for_each_entry(desc, &pdev->msi_list, list)
igbuio_msix_mask_irq(desc, irq_state);
+#else
+ list_for_each_entry(desc, &pdev->dev.msi_list, list)
+ igbuio_msix_mask_irq(desc, irq_state);
+#endif
}
pci_cfg_access_unlock(pdev);
@@ -268,7 +211,7 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
static irqreturn_t
igbuio_pci_irqhandler(int irq, struct uio_info *info)
{
- struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info);
+ struct rte_uio_pci_dev *udev = info->priv;
/* Legacy mode need to mask in hardware */
if (udev->mode == RTE_INTR_MODE_LEGACY &&
@@ -333,7 +276,7 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
unsigned long addr, len;
void *internal_addr;
- if (sizeof(info->mem) / sizeof(info->mem[0]) <= n)
+ if (n >= ARRAY_SIZE(info->mem))
return -EINVAL;
addr = pci_resource_start(dev, pci_bar);
@@ -358,7 +301,7 @@ igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
{
unsigned long addr, len;
- if (sizeof(info->port) / sizeof(info->port[0]) <= n)
+ if (n >= ARRAY_SIZE(info->port))
return -EINVAL;
addr = pci_resource_start(dev, pci_bar);
@@ -403,7 +346,7 @@ igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
iom = 0;
iop = 0;
- for (i = 0; i != sizeof(bar_names) / sizeof(bar_names[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
if (pci_resource_len(dev, i) != 0 &&
pci_resource_start(dev, i) != 0) {
flags = pci_resource_flags(dev, i);
@@ -562,23 +505,17 @@ fail_free:
static void
igbuio_pci_remove(struct pci_dev *dev)
{
- struct uio_info *info = pci_get_drvdata(dev);
- struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info);
-
- if (info->priv == NULL) {
- pr_notice("Not igbuio device\n");
- return;
- }
+ struct rte_uio_pci_dev *udev = pci_get_drvdata(dev);
sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
- uio_unregister_device(info);
- igbuio_pci_release_iomem(info);
+ uio_unregister_device(&udev->info);
+ igbuio_pci_release_iomem(&udev->info);
if (udev->mode == RTE_INTR_MODE_MSIX)
pci_disable_msix(dev);
pci_release_regions(dev);
pci_disable_device(dev);
pci_set_drvdata(dev, NULL);
- kfree(info);
+ kfree(udev);
}
static int
diff --git a/scripts/master_daemon.py b/scripts/master_daemon.py
index 0b1b7363..390db0a3 100755
--- a/scripts/master_daemon.py
+++ b/scripts/master_daemon.py
@@ -9,6 +9,7 @@ from collections import OrderedDict
from argparse import *
from time import time, sleep
from glob import glob
+import signal
sys.path.append(os.path.join('automation', 'trex_control_plane', 'server'))
import outer_packages
@@ -16,7 +17,7 @@ from singleton_daemon import SingletonDaemon, register_socket, run_command
from jsonrpclib.SimpleJSONRPCServer import SimpleJSONRPCServer
import termstyle
-logging.basicConfig(level = logging.FATAL) # keep quiet
+logger = logging.getLogger('Master daemon')
### Server functions ###
@@ -83,33 +84,52 @@ def start_master_daemon():
proc.start()
for i in range(50):
if master_daemon.is_running():
- print(termstyle.green('Master daemon is started'))
+ print(termstyle.green('Master daemon is started.'))
os._exit(0)
sleep(0.1)
- fail(termstyle.red('Master daemon failed to run'))
-
+ fail(termstyle.red('Master daemon failed to run. Please look in log: %s' % logging_file))
+
+def set_logger():
+ if os.path.exists(logging_file):
+ if os.path.exists(logging_file_bu):
+ os.unlink(logging_file_bu)
+ os.rename(logging_file, logging_file_bu)
+ hdlr = logging.FileHandler(logging_file)
+ formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', datefmt = '%Y-%m-%d %H:%M:%S')
+ hdlr.setFormatter(formatter)
+ logger.addHandler(hdlr)
+ logger.setLevel(logging.INFO)
def start_master_daemon_func():
- register_socket(master_daemon.tag)
- server = SimpleJSONRPCServer(('0.0.0.0', master_daemon.port))
- print('Started master daemon (port %s)' % master_daemon.port)
- server.register_function(add)
- server.register_function(check_connectivity)
- server.register_function(get_trex_path)
- server.register_function(update_trex)
- # trex_daemon_server
- server.register_function(trex_daemon_server.is_running, 'is_trex_daemon_running')
- server.register_function(trex_daemon_server.restart, 'restart_trex_daemon')
- server.register_function(trex_daemon_server.start, 'start_trex_daemon')
- server.register_function(trex_daemon_server.stop, 'stop_trex_daemon')
- # stl rpc proxy
- server.register_function(stl_rpc_proxy.is_running, 'is_stl_rpc_proxy_running')
- server.register_function(stl_rpc_proxy.restart, 'restart_stl_rpc_proxy')
- server.register_function(stl_rpc_proxy.start, 'start_stl_rpc_proxy')
- server.register_function(stl_rpc_proxy.stop, 'stop_stl_rpc_proxy')
- server.register_function(server.funcs.keys, 'get_methods') # should be last
- server.serve_forever()
+ try:
+ set_logger()
+ register_socket(master_daemon.tag)
+ server = SimpleJSONRPCServer(('0.0.0.0', master_daemon.port))
+ logger.info('Started master daemon (port %s)' % master_daemon.port)
+ server.register_function(add)
+ server.register_function(check_connectivity)
+ server.register_function(get_trex_path)
+ server.register_function(update_trex)
+ # trex_daemon_server
+ server.register_function(trex_daemon_server.is_running, 'is_trex_daemon_running')
+ server.register_function(trex_daemon_server.restart, 'restart_trex_daemon')
+ server.register_function(trex_daemon_server.start, 'start_trex_daemon')
+ server.register_function(trex_daemon_server.stop, 'stop_trex_daemon')
+ # stl rpc proxy
+ server.register_function(stl_rpc_proxy.is_running, 'is_stl_rpc_proxy_running')
+ server.register_function(stl_rpc_proxy.restart, 'restart_stl_rpc_proxy')
+ server.register_function(stl_rpc_proxy.start, 'start_stl_rpc_proxy')
+ server.register_function(stl_rpc_proxy.stop, 'stop_stl_rpc_proxy')
+ server.register_function(server.funcs.keys, 'get_methods') # should be last
+ signal.signal(signal.SIGTSTP, stop_handler)
+ signal.signal(signal.SIGTERM, stop_handler)
+ server.serve_forever()
+ except Exception as e:
+ logger.error('Closing due to error: %s' % e)
+def stop_handler(*args, **kwargs):
+ logger.info('Got killed explicitly.')
+ sys.exit(0)
# returns True if given path is under current dir or /tmp
def _check_path_under_current_or_temp(path):
@@ -170,8 +190,9 @@ stl_rpc_proxy = SingletonDaemon('Stateless RPC proxy', 'trex_stl_rpc_proxy'
trex_daemon_server = SingletonDaemon('TRex daemon server', 'trex_daemon_server', args.trex_daemon_port, './trex_daemon_server start', args.trex_dir)
master_daemon = SingletonDaemon('Master daemon', 'trex_master_daemon', args.master_port, start_master_daemon) # add ourself for easier check if running, kill etc.
-daemons_by_name = {}
tmp_dir = '/tmp/trex-tmp'
+logging_file = '/var/log/trex/master_daemon.log'
+logging_file_bu = '/var/log/trex/master_daemon.log_bu'
if not _check_path_under_current_or_temp(args.trex_dir):
raise Exception('Only allowed to use path under /tmp or current directory')
@@ -182,6 +203,7 @@ if not os.path.exists(args.trex_dir):
os.makedirs(args.trex_dir)
os.chmod(args.trex_dir, 0o777)
elif args.allow_update:
+ print('Due to allow updates flag, setting mode 777 on given directory')
os.chmod(args.trex_dir, 0o777)
if not os.path.exists(tmp_dir):
diff --git a/src/bp_gtest.cpp b/src/bp_gtest.cpp
index 86b7821b..b36ac6e1 100755
--- a/src/bp_gtest.cpp
+++ b/src/bp_gtest.cpp
@@ -897,7 +897,7 @@ TEST_F(basic, latency3) {
EXPECT_EQ_UINT32(mg.is_active()?1:0, (uint32_t)0)<< "pass";
- mg.start(8);
+ mg.start(8, NULL);
mg.stop();
mg.Dump(stdout);
mg.DumpShort(stdout);
diff --git a/src/bp_sim.cpp b/src/bp_sim.cpp
index 51023b90..c9171ae5 100755
--- a/src/bp_sim.cpp
+++ b/src/bp_sim.cpp
@@ -24,6 +24,8 @@ limitations under the License.
#include "utl_json.h"
#include "utl_yaml.h"
#include "msg_manager.h"
+#include "trex_watchdog.h"
+
#include <common/basic_utils.h>
#include <trex_stream_node.h>
@@ -3322,6 +3324,9 @@ bool CFlowGenListPerThread::Create(uint32_t thread_id,
m_max_threads=max_threads;
m_thread_id=thread_id;
+ m_watchdog = NULL;
+ m_watchdog_handle = -1;
+
m_cpu_cp_u.Create(&m_cpu_dp_u);
uint32_t socket_id=rte_lcore_to_socket_id(m_core_id);
@@ -3897,6 +3902,10 @@ void CNodeGenerator::handle_flow_pkt(CGenNode *node, CFlowGenListPerThread *thre
}
void CNodeGenerator::handle_flow_sync(CGenNode *node, CFlowGenListPerThread *thread, bool &exit_scheduler) {
+
+ /* tickle the watchdog */
+ thread->tickle();
+
/* flow sync message is a sync point for time */
thread->m_cur_time_sec = node->m_time;
diff --git a/src/bp_sim.h b/src/bp_sim.h
index bb7dd928..d940080e 100755
--- a/src/bp_sim.h
+++ b/src/bp_sim.h
@@ -58,6 +58,7 @@ limitations under the License.
#include <arpa/inet.h>
#include "platform_cfg.h"
#include "flow_stat.h"
+#include "trex_watchdog.h"
#include <trex_stateless_dp_core.h>
@@ -3637,6 +3638,12 @@ public:
m_node_gen.m_v_if->flush_tx_queue();
}
+ void tickle() {
+ if (m_watchdog) {
+ m_watchdog->tickle(m_watchdog_handle);
+ }
+ }
+
/* return the dual port ID this thread is attached to in 4 ports configuration
there are 2 dual-ports
@@ -3759,6 +3766,8 @@ public:
CTupleGeneratorSmart m_smart_gen;
+ TrexWatchDog *m_watchdog;
+ int m_watchdog_handle;
public:
CNodeGenerator m_node_gen;
diff --git a/src/flow_stat.cpp b/src/flow_stat.cpp
index 2385e03f..98b9494b 100644
--- a/src/flow_stat.cpp
+++ b/src/flow_stat.cpp
@@ -464,6 +464,8 @@ CFlowStatRuleMgr::CFlowStatRuleMgr() {
m_rx_core = NULL;
m_hw_id_map.create(MAX_FLOW_STATS);
m_hw_id_map_payload.create(MAX_FLOW_STATS_PAYLOAD);
+ memset(m_rx_cant_count_err, 0, sizeof(m_rx_cant_count_err));
+ memset(m_tx_cant_count_err, 0, sizeof(m_tx_cant_count_err));
}
CFlowStatRuleMgr::~CFlowStatRuleMgr() {
@@ -959,6 +961,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
p_user_id->set_need_to_send_rx(port);
}
} else {
+ m_rx_cant_count_err[port] += rx_pkts.get_pkts();
std::cerr << __METHOD_NAME__ << i << ":Could not count " << rx_pkts << " rx packets, on port "
<< (uint16_t)port << ", because no mapping was found." << std::endl;
}
@@ -972,6 +975,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
p_user_id->set_need_to_send_tx(port);
}
} else {
+ m_tx_cant_count_err[port] += tx_pkts.get_pkts();;
std::cerr << __METHOD_NAME__ << i << ":Could not count " << tx_pkts << " tx packets on port "
<< (uint16_t)port << ", because no mapping was found." << std::endl;
}
@@ -990,6 +994,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
p_user_id->set_need_to_send_rx(port);
}
} else {
+ m_rx_cant_count_err[port] += rx_pkts.get_pkts();;
std::cerr << __METHOD_NAME__ << i << ":Could not count " << rx_pkts << " rx payload packets, on port "
<< (uint16_t)port << ", because no mapping was found." << std::endl;
}
@@ -1003,6 +1008,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
p_user_id->set_need_to_send_tx(port);
}
} else {
+ m_tx_cant_count_err[port] += tx_pkts.get_pkts();;
std::cerr << __METHOD_NAME__ << i << ":Could not count " << tx_pkts << " tx packets on port "
<< (uint16_t)port << ", because no mapping was found." << std::endl;
}
@@ -1011,6 +1017,15 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
}
// build json report
+ // general per port data
+ for (uint8_t port = 0; port < m_num_ports; port++) {
+ std::string str_port = static_cast<std::ostringstream*>( &(std::ostringstream() << int(port) ) )->str();
+ if (m_rx_cant_count_err[port] != 0)
+ s_data_section["port_data"][str_port]["rx_err"] = m_rx_cant_count_err[port];
+ if (m_tx_cant_count_err[port] != 0)
+ s_data_section["port_data"][str_port]["tx_err"] = m_tx_cant_count_err[port];
+ }
+
flow_stat_user_id_map_it_t it;
for (it = m_user_id_map.begin(); it != m_user_id_map.end(); it++) {
bool send_empty = true;
@@ -1022,6 +1037,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
user_id_info->set_was_sent(true);
send_empty = false;
}
+ // flow stat json
for (uint8_t port = 0; port < m_num_ports; port++) {
std::string str_port = static_cast<std::ostringstream*>( &(std::ostringstream() << int(port) ) )->str();
if (user_id_info->need_to_send_rx(port) || baseline) {
@@ -1042,10 +1058,11 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
s_data_section[str_user_id] = Json::objectValue;
}
+ // latency info json
if (user_id_info->rfc2544_support()) {
CFlowStatUserIdInfoPayload *user_id_info_p = (CFlowStatUserIdInfoPayload *)user_id_info;
// payload object. Send also latency, jitter...
- Json::Value lat_hist;
+ Json::Value lat_hist = Json::arrayValue;
if (user_id_info->is_hw_id()) {
// if mapped to hw_id, take info from what we just got from rx core
uint16_t hw_id = user_id_info->get_hw_id();
@@ -1055,17 +1072,16 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
user_id_info_p->set_dup_cnt(rfc2544_info[hw_id].get_dup_cnt());
user_id_info_p->set_seq_err_big_cnt(rfc2544_info[hw_id].get_seq_err_ev_big());
user_id_info_p->set_seq_err_low_cnt(rfc2544_info[hw_id].get_seq_err_ev_low());
- l_data_section[str_user_id]["latency"]["h"] = lat_hist;
- l_data_section[str_user_id]["latency"]["last_max"] = rfc2544_info[hw_id].get_last_max_usec();
+ l_data_section[str_user_id]["latency"] = lat_hist;
l_data_section[str_user_id]["latency"]["jitter"] = rfc2544_info[hw_id].get_jitter_usec();
} else {
// Not mapped to hw_id. Get saved info.
user_id_info_p->get_latency_json(lat_hist);
- l_data_section[str_user_id]["latency"]["h"] = lat_hist;
- l_data_section[str_user_id]["latency"]["last_max"] = 0;
- l_data_section[str_user_id]["latency"]["jitter"] = user_id_info_p->get_jitter_usec();
+ if (lat_hist != Json::nullValue) {
+ l_data_section[str_user_id]["latency"] = lat_hist;
+ l_data_section[str_user_id]["latency"]["jitter"] = user_id_info_p->get_jitter_usec();
+ }
}
- //todo: add last 10 samples
l_data_section[str_user_id]["err_cntrs"]["dropped"]
= Json::Value::UInt64(user_id_info_p->get_seq_err_cnt());
l_data_section[str_user_id]["err_cntrs"]["out_of_order"]
diff --git a/src/flow_stat.h b/src/flow_stat.h
index 3387c2d3..8671b228 100644
--- a/src/flow_stat.h
+++ b/src/flow_stat.h
@@ -24,11 +24,12 @@
#include <stdio.h>
#include <string>
#include <map>
+#include <json/json.h>
#include "trex_defs.h"
#include "trex_exception.h"
#include "trex_stream.h"
#include "msg_manager.h"
-#include <internal_api/trex_platform_api.h>
+#include "internal_api/trex_platform_api.h"
// range reserved for rx stat measurement is from IP_ID_RESERVE_BASE to 0xffff
// Do not change this value. In i350 cards, we filter according to first byte of IP ID
@@ -132,7 +133,7 @@ class rfc2544_info_t_ {
m_seq_err_ev_big = 0;
m_seq_err_ev_low = 0;
m_jitter = 0;
- m_latency = Json::Value("");
+ m_latency = Json::nullValue;
m_last_max_latency = 0;
}
@@ -332,7 +333,7 @@ class CFlowStatUserIdInfoPayload : public CFlowStatUserIdInfo {
json = m_rfc2544_info.m_latency;
}
- inline void set_latency_json(Json::Value json) {
+ inline void set_latency_json(const Json::Value &json) {
m_rfc2544_info.m_latency = json;
}
@@ -474,7 +475,9 @@ class CFlowStatRuleMgr {
uint32_t m_num_started_streams; // How many started (transmitting) streams we have
CNodeRing *m_ring_to_rx; // handle for sending messages to Rx core
CFlowStatParser *m_parser;
- uint16_t m_cap;
+ uint16_t m_cap; // capabilities of the NIC driver we are using
+ uint32_t m_rx_cant_count_err[TREX_MAX_PORTS];
+ uint32_t m_tx_cant_count_err[TREX_MAX_PORTS];
};
#endif
diff --git a/src/latency.cpp b/src/latency.cpp
index a7652bed..acbe26d4 100644
--- a/src/latency.cpp
+++ b/src/latency.cpp
@@ -22,6 +22,8 @@ limitations under the License.
#include "latency.h"
#include "bp_sim.h"
#include "utl_json.h"
+#include "trex_watchdog.h"
+
#include <common/basic_utils.h>
const uint8_t sctp_pkt[]={
@@ -562,6 +564,10 @@ bool CLatencyManager::Create(CLatencyManagerCfg * cfg){
if ( CGlobalInfo::is_learn_mode() ){
m_nat_check_manager.Create();
}
+
+ m_watchdog = NULL;
+ m_watchdog_handle = -1;
+
return (true);
}
@@ -711,7 +717,13 @@ void CLatencyManager::reset(){
}
-void CLatencyManager::start(int iter) {
+void CLatencyManager::tickle() {
+ if (m_watchdog) {
+ m_watchdog->tickle(m_watchdog_handle);
+ }
+}
+
+void CLatencyManager::start(int iter, TrexWatchDog *watchdog) {
m_do_stop =false;
m_is_active =false;
int cnt=0;
@@ -728,6 +740,10 @@ void CLatencyManager::start(int iter) {
m_p_queue.push(node);
bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable()?true:false;
+ if (watchdog) {
+ m_watchdog = watchdog;
+ m_watchdog_handle = watchdog->register_monitor("STF RX CORE", 1);
+ }
while ( !m_p_queue.empty() ) {
node = m_p_queue.top();
@@ -748,6 +764,9 @@ void CLatencyManager::start(int iter) {
switch (node->m_type) {
case CGenNode::FLOW_SYNC:
+
+ tickle();
+
if ( CGlobalInfo::is_learn_mode() ) {
m_nat_check_manager.handle_aging();
}
@@ -792,6 +811,11 @@ void CLatencyManager::start(int iter) {
m_rx_check_manager.tw_drain();
}
+ /* disable the monitor */
+ if (m_watchdog) {
+ m_watchdog->disable_monitor(m_watchdog_handle);
+ }
+
}
void CLatencyManager::stop(){
diff --git a/src/latency.h b/src/latency.h
index eef7146a..2b74f737 100644
--- a/src/latency.h
+++ b/src/latency.h
@@ -28,6 +28,8 @@ limitations under the License.
#define L_PKT_SUBMODE_REPLY 2
#define L_PKT_SUBMODE_0_SEQ 3
+class TrexWatchDog;
+
class CLatencyPktInfo {
public:
void Create(class CLatencyPktMode *m_l_pkt_info);
@@ -337,7 +339,7 @@ public:
bool Create(CLatencyManagerCfg * cfg);
void Delete();
void reset();
- void start(int iter);
+ void start(int iter, TrexWatchDog *watchdog);
void stop();
bool is_active();
void set_ip(uint32_t client_ip,
@@ -374,6 +376,7 @@ public:
CLatencyPktMode *c_l_pkt_mode;
private:
+ void tickle();
void send_pkt_all_ports();
void try_rx();
void try_rx_queues();
@@ -400,6 +403,9 @@ private:
CNatRxManager m_nat_check_manager;
CCpuUtlDp m_cpu_dp_u;
CCpuUtlCp m_cpu_cp_u;
+ TrexWatchDog *m_watchdog;
+ int m_watchdog_handle;
+
volatile bool m_do_stop __rte_cache_aligned ;
};
diff --git a/src/main.cpp b/src/main.cpp
index 701a65d2..62eee880 100755
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -83,6 +83,11 @@ static CSimpleOpt::SOption parser_options[] =
};
static TrexStateless *m_sim_statelss_obj;
+static char *g_exe_name;
+
+const char *get_exe_name() {
+ return g_exe_name;
+}
static int usage(){
@@ -261,8 +266,8 @@ void set_stateless_obj(TrexStateless *obj) {
m_sim_statelss_obj = obj;
}
-
int main(int argc , char * argv[]){
+ g_exe_name = argv[0];
std::unordered_map<std::string, int> params;
diff --git a/src/main_dpdk.cpp b/src/main_dpdk.cpp
index bd3cac64..c72af57a 100644
--- a/src/main_dpdk.cpp
+++ b/src/main_dpdk.cpp
@@ -73,6 +73,7 @@ extern "C" {
#include "debug.h"
#include "internal_api/trex_platform_api.h"
#include "main_dpdk.h"
+#include "trex_watchdog.h"
#define RX_CHECK_MIX_SAMPLE_RATE 8
#define RX_CHECK_MIX_SAMPLE_RATE_1G 2
@@ -2845,6 +2846,7 @@ private:
std::mutex m_cp_lock;
public:
+ TrexWatchDog m_watchdog;
TrexStateless *m_trex_stateless;
};
@@ -3272,14 +3274,16 @@ bool CGlobalTRex::Create(){
TrexStatelessCfg cfg;
- TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_TCP, global_platform_cfg_info.m_zmq_rpc_port);
+ TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_TCP,
+ global_platform_cfg_info.m_zmq_rpc_port,
+ &m_cp_lock,
+ &m_watchdog);
cfg.m_port_count = CGlobalInfo::m_options.m_expected_portd;
cfg.m_rpc_req_resp_cfg = &rpc_req_resp_cfg;
cfg.m_rpc_server_verbose = false;
cfg.m_platform_api = new TrexDpdkPlatformApi();
cfg.m_publisher = &m_zmq_publisher;
- cfg.m_global_lock = &m_cp_lock;
m_trex_stateless = new TrexStateless(cfg);
}
@@ -3975,6 +3979,9 @@ int CGlobalTRex::run_in_master() {
const int FASTPATH_DELAY_MS = 10;
const int SLOWPATH_DELAY_MS = 500;
+ int handle = m_watchdog.register_monitor("master", 2);
+ m_watchdog.start();
+
while ( true ) {
/* fast path */
@@ -3995,6 +4002,8 @@ int CGlobalTRex::run_in_master() {
delay(FASTPATH_DELAY_MS);
slow_path_counter += FASTPATH_DELAY_MS;
cp_lock.lock();
+
+ m_watchdog.tickle(handle);
}
/* on exit release the lock */
@@ -4006,6 +4015,9 @@ int CGlobalTRex::run_in_master() {
}
m_mg.stop();
+
+ m_watchdog.stop();
+
delay(1000);
if ( was_stopped ){
/* we should stop latency and exit to stop agents */
@@ -4017,14 +4029,15 @@ int CGlobalTRex::run_in_master() {
int CGlobalTRex::run_in_rx_core(void){
+
if (get_is_stateless()) {
m_sl_rx_running = true;
- m_rx_sl.start();
+ m_rx_sl.start(m_watchdog);
m_sl_rx_running = false;
} else {
if ( CGlobalInfo::m_options.is_rx_enabled() ){
m_sl_rx_running = false;
- m_mg.start(0);
+ m_mg.start(0, &m_watchdog);
}
}
@@ -4032,6 +4045,8 @@ int CGlobalTRex::run_in_rx_core(void){
}
int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){
+ std::stringstream ss;
+ ss << "DP core " << int(virt_core_id);
CPreviewMode *lp=&CGlobalInfo::m_options.preview;
if ( lp->getSingleCore() &&
@@ -4045,14 +4060,23 @@ int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){
assert(m_fl_was_init);
CFlowGenListPerThread * lpt;
+
lpt = m_fl.m_threads_info[virt_core_id-1];
+ /* register a watchdog handle on current core */
+ lpt->m_watchdog = &m_watchdog;
+ lpt->m_watchdog_handle = m_watchdog.register_monitor(ss.str(), 1);
+
+
if (get_is_stateless()) {
lpt->start_stateless_daemon(*lp);
}else{
lpt->start_generate_stateful(CGlobalInfo::m_options.out_file,*lp);
}
+ /* done - remove this from the watchdog (we might wait on join for a long time) */
+ lpt->m_watchdog->disable_monitor(lpt->m_watchdog_handle);
+
m_signal[virt_core_id]=1;
return (0);
}
@@ -4422,9 +4446,14 @@ uint32_t get_cores_mask(uint32_t cores,int offset){
}
+static char *g_exe_name;
+const char *get_exe_name() {
+ return g_exe_name;
+}
int main(int argc , char * argv[]){
+ g_exe_name = argv[0];
return ( main_test(argc , argv));
}
@@ -4733,13 +4762,17 @@ int main_test(int argc , char * argv[]){
uint32_t pkts = CGlobalInfo::m_options.m_latency_prev *
CGlobalInfo::m_options.m_latency_rate;
printf("Starting pre latency check for %d sec\n",CGlobalInfo::m_options.m_latency_prev);
- g_trex.m_mg.start(pkts);
+ g_trex.m_mg.start(pkts, NULL);
delay(CGlobalInfo::m_options.m_latency_prev* 1000);
printf("Finished \n");
g_trex.m_mg.reset();
g_trex.reset_counters();
}
+ /* this will give us all cores - master + tx + latency */
+ g_trex.m_watchdog.mark_pending_monitor(g_trex.m_max_cores);
+
+
g_trex.m_sl_rx_running = false;
if ( get_is_stateless() ) {
g_trex.start_master_stateless();
diff --git a/src/rpc-server/trex_rpc_async_server.cpp b/src/rpc-server/trex_rpc_async_server.cpp
index aee92539..82c42458 100644
--- a/src/rpc-server/trex_rpc_async_server.cpp
+++ b/src/rpc-server/trex_rpc_async_server.cpp
@@ -36,7 +36,7 @@ limitations under the License.
* ZMQ based publisher server
*
*/
-TrexRpcServerAsync::TrexRpcServerAsync(const TrexRpcServerConfig &cfg, std::mutex *lock) : TrexRpcServerInterface(cfg, "publisher", lock) {
+TrexRpcServerAsync::TrexRpcServerAsync(const TrexRpcServerConfig &cfg) : TrexRpcServerInterface(cfg, "publisher") {
/* ZMQ is not thread safe - this should be outside */
m_context = zmq_ctx_new();
}
diff --git a/src/rpc-server/trex_rpc_async_server.h b/src/rpc-server/trex_rpc_async_server.h
index 80d92c2f..daefa174 100644
--- a/src/rpc-server/trex_rpc_async_server.h
+++ b/src/rpc-server/trex_rpc_async_server.h
@@ -33,7 +33,7 @@ limitations under the License.
class TrexRpcServerAsync : public TrexRpcServerInterface {
public:
- TrexRpcServerAsync(const TrexRpcServerConfig &cfg, std::mutex *lock = NULL);
+ TrexRpcServerAsync(const TrexRpcServerConfig &cfg);
protected:
void _prepare();
diff --git a/src/rpc-server/trex_rpc_req_resp_server.cpp b/src/rpc-server/trex_rpc_req_resp_server.cpp
index 5c587e0f..033f265c 100644
--- a/src/rpc-server/trex_rpc_req_resp_server.cpp
+++ b/src/rpc-server/trex_rpc_req_resp_server.cpp
@@ -32,11 +32,13 @@ limitations under the License.
#include <zmq.h>
#include <json/json.h>
+#include "trex_watchdog.h"
+
/**
* ZMQ based request-response server
*
*/
-TrexRpcServerReqRes::TrexRpcServerReqRes(const TrexRpcServerConfig &cfg, std::mutex *lock) : TrexRpcServerInterface(cfg, "req resp", lock) {
+TrexRpcServerReqRes::TrexRpcServerReqRes(const TrexRpcServerConfig &cfg) : TrexRpcServerInterface(cfg, "ZMQ sync request-response") {
}
@@ -52,11 +54,19 @@ void TrexRpcServerReqRes::_prepare() {
*/
void TrexRpcServerReqRes::_rpc_thread_cb() {
std::stringstream ss;
+ int zmq_rc;
+
+ m_watchdog_handle = m_watchdog->register_monitor(m_name, 1);
/* create a socket based on the configuration */
m_socket = zmq_socket (m_context, ZMQ_REP);
+ /* to make sure the watchdog gets tickles form time to time we give a timeout of 500ms */
+ int timeout = 500;
+ zmq_rc = zmq_setsockopt (m_socket, ZMQ_RCVTIMEO, &timeout, sizeof(int));
+ assert(zmq_rc == 0);
+
switch (m_cfg.get_protocol()) {
case TrexRpcServerConfig::RPC_PROT_TCP:
ss << "tcp://*:";
@@ -68,8 +78,8 @@ void TrexRpcServerReqRes::_rpc_thread_cb() {
ss << m_cfg.get_port();
/* bind the scoket */
- int rc = zmq_bind (m_socket, ss.str().c_str());
- if (rc != 0) {
+ zmq_rc = zmq_bind (m_socket, ss.str().c_str());
+ if (zmq_rc != 0) {
throw TrexRpcException("Unable to start ZMQ server at: " + ss.str());
}
@@ -90,6 +100,9 @@ void TrexRpcServerReqRes::_rpc_thread_cb() {
/* must be done from the same thread */
zmq_close(m_socket);
+
+ /* done */
+ m_watchdog->disable_monitor(m_watchdog_handle);
}
bool
@@ -101,10 +114,22 @@ TrexRpcServerReqRes::fetch_one_request(std::string &msg) {
rc = zmq_msg_init(&zmq_msg);
assert(rc == 0);
- rc = zmq_msg_recv (&zmq_msg, m_socket, 0);
+ while (true) {
+ m_watchdog->tickle(m_watchdog_handle);
- if (rc == -1) {
+ rc = zmq_msg_recv (&zmq_msg, m_socket, 0);
+ if (rc != -1) {
+ break;
+ }
+
+ /* timeout ? */
+ if (errno == EAGAIN) {
+ continue;
+ }
+
+ /* error ! */
zmq_msg_close(&zmq_msg);
+
/* normal shutdown and zmq_term was called */
if (errno == ETERM) {
return false;
@@ -113,7 +138,9 @@ TrexRpcServerReqRes::fetch_one_request(std::string &msg) {
}
}
- const char *data = (const char *)zmq_msg_data(&zmq_msg);
+
+
+ const char *data = (const char *)zmq_msg_data(&zmq_msg);
size_t len = zmq_msg_size(&zmq_msg);
msg.append(data, len);
diff --git a/src/rpc-server/trex_rpc_req_resp_server.h b/src/rpc-server/trex_rpc_req_resp_server.h
index 26b3248f..92d51a2a 100644
--- a/src/rpc-server/trex_rpc_req_resp_server.h
+++ b/src/rpc-server/trex_rpc_req_resp_server.h
@@ -32,7 +32,7 @@ limitations under the License.
class TrexRpcServerReqRes : public TrexRpcServerInterface {
public:
- TrexRpcServerReqRes(const TrexRpcServerConfig &cfg, std::mutex *lock = NULL);
+ TrexRpcServerReqRes(const TrexRpcServerConfig &cfg);
/* for test purposes - bypass the ZMQ and inject a message */
std::string test_inject_request(const std::string &req);
diff --git a/src/rpc-server/trex_rpc_server.cpp b/src/rpc-server/trex_rpc_server.cpp
index 7d2e31a5..e4ca95c3 100644
--- a/src/rpc-server/trex_rpc_server.cpp
+++ b/src/rpc-server/trex_rpc_server.cpp
@@ -28,11 +28,20 @@ limitations under the License.
#include <sstream>
#include <iostream>
+#include "trex_watchdog.h"
+
/************** RPC server interface ***************/
-TrexRpcServerInterface::TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name, std::mutex *lock) : m_cfg(cfg), m_name(name), m_lock(lock) {
+TrexRpcServerInterface::TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name) : m_cfg(cfg) {
+ m_name = name;
+
+ m_lock = cfg.m_lock;
+ m_watchdog = cfg.m_watchdog;
+ m_watchdog_handle = -1;
+
m_is_running = false;
m_is_verbose = false;
+
if (m_lock == NULL) {
m_lock = &m_dummy_lock;
}
@@ -69,6 +78,7 @@ void TrexRpcServerInterface::start() {
/* prepare for run */
_prepare();
+ m_watchdog->mark_pending_monitor();
m_thread = new std::thread(&TrexRpcServerInterface::_rpc_thread_cb, this);
if (!m_thread) {
throw TrexRpcException("unable to create RPC thread");
@@ -119,8 +129,7 @@ get_current_date_time() {
const std::string TrexRpcServer::s_server_uptime = get_current_date_time();
-TrexRpcServer::TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg,
- std::mutex *lock) {
+TrexRpcServer::TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg) {
m_req_resp = NULL;
@@ -130,7 +139,7 @@ TrexRpcServer::TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg,
if (req_resp_cfg->get_protocol() == TrexRpcServerConfig::RPC_PROT_MOCK) {
m_req_resp = new TrexRpcServerReqResMock(*req_resp_cfg);
} else {
- m_req_resp = new TrexRpcServerReqRes(*req_resp_cfg, lock);
+ m_req_resp = new TrexRpcServerReqRes(*req_resp_cfg);
}
m_servers.push_back(m_req_resp);
diff --git a/src/rpc-server/trex_rpc_server_api.h b/src/rpc-server/trex_rpc_server_api.h
index a02b2cc0..3d9837ef 100644
--- a/src/rpc-server/trex_rpc_server_api.h
+++ b/src/rpc-server/trex_rpc_server_api.h
@@ -33,6 +33,7 @@ limitations under the License.
class TrexRpcServerInterface;
class TrexRpcServerReqRes;
+class TrexWatchDog;
/**
* defines a configuration of generic RPC server
@@ -47,8 +48,11 @@ public:
RPC_PROT_MOCK
};
- TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port) : m_protocol(protocol), m_port(port) {
-
+ TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port, std::mutex *lock, TrexWatchDog *watchdog) {
+ m_protocol = protocol;
+ m_port = port;
+ m_lock = lock;
+ m_watchdog = watchdog;
}
uint16_t get_port() const {
@@ -62,6 +66,10 @@ public:
private:
rpc_prot_e m_protocol;
uint16_t m_port;
+
+public:
+ std::mutex *m_lock;
+ TrexWatchDog *m_watchdog;
};
/**
@@ -72,7 +80,7 @@ private:
class TrexRpcServerInterface {
public:
- TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name, std::mutex *m_lock = NULL);
+ TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name);
virtual ~TrexRpcServerInterface();
/**
@@ -134,6 +142,8 @@ protected:
std::string m_name;
std::mutex *m_lock;
std::mutex m_dummy_lock;
+ TrexWatchDog *m_watchdog;
+ int m_watchdog_handle;
};
/**
@@ -147,8 +157,7 @@ class TrexRpcServer {
public:
/* creates the collection of servers using configurations */
- TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg,
- std::mutex *m_lock = NULL);
+ TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg);
~TrexRpcServer();
diff --git a/src/sim/trex_sim_stateless.cpp b/src/sim/trex_sim_stateless.cpp
index acbeef69..d3981e97 100644
--- a/src/sim/trex_sim_stateless.cpp
+++ b/src/sim/trex_sim_stateless.cpp
@@ -200,7 +200,7 @@ SimStateless::prepare_control_plane() {
m_publisher = new SimPublisher();
- TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0);
+ TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0, NULL, NULL);
cfg.m_port_count = m_port_count;
cfg.m_rpc_req_resp_cfg = &rpc_req_resp_cfg;
diff --git a/src/stateless/cp/trex_stateless.cpp b/src/stateless/cp/trex_stateless.cpp
index 5bbe9faf..698ede90 100644
--- a/src/stateless/cp/trex_stateless.cpp
+++ b/src/stateless/cp/trex_stateless.cpp
@@ -40,7 +40,7 @@ TrexStateless::TrexStateless(const TrexStatelessCfg &cfg) {
/* create RPC servers */
/* set both servers to mutex each other */
- m_rpc_server = new TrexRpcServer(cfg.m_rpc_req_resp_cfg, cfg.m_global_lock);
+ m_rpc_server = new TrexRpcServer(cfg.m_rpc_req_resp_cfg);
m_rpc_server->set_verbose(cfg.m_rpc_server_verbose);
/* configure ports */
diff --git a/src/stateless/cp/trex_stateless.h b/src/stateless/cp/trex_stateless.h
index 033326ca..83ab6976 100644
--- a/src/stateless/cp/trex_stateless.h
+++ b/src/stateless/cp/trex_stateless.h
@@ -41,6 +41,7 @@ limitations under the License.
#include "trex_api_class.h"
class TrexStatelessPort;
+class TrexWatchDog;
/**
* unified stats
@@ -87,7 +88,6 @@ public:
m_rpc_server_verbose = false;
m_platform_api = NULL;
m_publisher = NULL;
- m_global_lock = NULL;
}
const TrexRpcServerConfig *m_rpc_req_resp_cfg;
@@ -95,7 +95,6 @@ public:
bool m_rpc_server_verbose;
uint8_t m_port_count;
TrexPublisher *m_publisher;
- std::mutex *m_global_lock;
};
diff --git a/src/stateless/dp/trex_stateless_dp_core.cpp b/src/stateless/dp/trex_stateless_dp_core.cpp
index 21334363..fe78c5b2 100644
--- a/src/stateless/dp/trex_stateless_dp_core.cpp
+++ b/src/stateless/dp/trex_stateless_dp_core.cpp
@@ -656,6 +656,7 @@ TrexStatelessDpCore::idle_state_loop() {
int counter = 0;
while (m_state == STATE_IDLE) {
+ m_core->tickle();
m_core->m_node_gen.m_v_if->flush_dp_rx_queue();
bool had_msg = periodic_check_for_cp_messages();
if (had_msg) {
diff --git a/src/stateless/rx/trex_stateless_rx_core.cpp b/src/stateless/rx/trex_stateless_rx_core.cpp
index f9150ff7..b3555c13 100644
--- a/src/stateless/rx/trex_stateless_rx_core.cpp
+++ b/src/stateless/rx/trex_stateless_rx_core.cpp
@@ -52,12 +52,8 @@ void CRFC2544Info::export_data(rfc2544_info_t_ &obj) {
obj.set_err_cntrs(m_seq_err, m_ooo, m_dup, m_seq_err_events_too_big, m_seq_err_events_too_low);
obj.set_jitter(m_jitter.get_jitter());
- json_str = "";
- m_latency.dump_json("", json_str);
- // This is a hack. We need to make the dump_json return json object.
- reader.parse( json_str.c_str(), json);
+ m_latency.dump_json(json);
obj.set_latency_json(json);
- obj.set_last_max(m_last_max.getMax());
};
void CCPortLatencyStl::reset() {
@@ -76,6 +72,9 @@ void CRxCoreStateless::create(const CRxSlCfg &cfg) {
m_ring_to_cp = cp_rx->getRingDpToCp(0);
m_state = STATE_IDLE;
+ m_watchdog_handle = -1;
+ m_watchdog = NULL;
+
for (int i = 0; i < m_max_ports; i++) {
CLatencyManagerPerPortStl * lp = &m_ports[i];
lp->m_io = cfg.m_ports[i];
@@ -93,7 +92,15 @@ void CRxCoreStateless::handle_cp_msg(TrexStatelessCpToRxMsgBase *msg) {
delete msg;
}
+void CRxCoreStateless::tickle() {
+ m_watchdog->tickle(m_watchdog_handle);
+}
+
bool CRxCoreStateless::periodic_check_for_cp_messages() {
+
+ /* tickle the watchdog */
+ tickle();
+
/* fast path */
if ( likely ( m_ring_from_cp->isEmpty() ) ) {
return false;
@@ -140,11 +147,15 @@ void CRxCoreStateless::idle_state_loop() {
}
}
-void CRxCoreStateless::start() {
+void CRxCoreStateless::start(TrexWatchDog &watchdog) {
int count = 0;
int i = 0;
bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable() ? true : false;
+ /* register a watchdog handle on current core */
+ m_watchdog = &watchdog;
+ m_watchdog_handle = watchdog.register_monitor("STL RX CORE", 1);
+
while (true) {
if (m_state == STATE_WORKING) {
i++;
@@ -167,6 +178,8 @@ void CRxCoreStateless::start() {
count += try_rx();
}
rte_pause();
+
+ m_watchdog->disable_monitor(m_watchdog_handle);
}
void CRxCoreStateless::handle_rx_pkt(CLatencyManagerPerPortStl *lp, rte_mbuf_t *m) {
diff --git a/src/stateless/rx/trex_stateless_rx_core.h b/src/stateless/rx/trex_stateless_rx_core.h
index d18356b6..ce1bc1ad 100644
--- a/src/stateless/rx/trex_stateless_rx_core.h
+++ b/src/stateless/rx/trex_stateless_rx_core.h
@@ -28,55 +28,6 @@
class TrexStatelessCpToRxMsgBase;
-class CLastMax {
- public:
- CLastMax() {
- m_max1 = 0;
- m_max1 = 1;
- m_choose = true;
- }
-
- void update(dsec_t val) {
- if (m_choose) {
- if (val > m_max1) {
- m_max1 = val;
- sanb_smp_memory_barrier();
- }
- } else {
- if (val > m_max2) {
- m_max2 = val;
- sanb_smp_memory_barrier();
- }
- }
- }
-
- dsec_t getMax() {
- if (m_choose)
- return m_max2;
- else
- return m_max1;
- }
-
- void switchMax() {
- if (m_choose) {
- m_max2 = 0;
- m_choose = false;
- sanb_smp_memory_barrier();
- }
- else {
- m_max1 = 0;
- m_choose = true;
- sanb_smp_memory_barrier();
- }
- }
-
- private:
- dsec_t m_max1;
- dsec_t m_max2;
- bool m_choose;
-};
-
-
class CCPortLatencyStl {
public:
void reset();
@@ -112,12 +63,10 @@ class CRFC2544Info {
void export_data(rfc2544_info_t_ &obj);
inline void add_sample(double stime) {
m_latency.Add(stime);
- m_last_max.update(stime);
m_jitter.calc(stime);
}
inline void sample_period_end() {
m_latency.update();
- m_last_max.switchMax();
}
inline uint32_t get_seq() {return m_seq;}
inline void set_seq(uint32_t val) {m_seq = val;}
@@ -136,7 +85,6 @@ class CRFC2544Info {
uint64_t m_seq_err_events_too_low; // How many packet seq num lower than expected events we had
uint64_t m_ooo; // Packets we got with seq num lower than expected (We guess they are out of order)
uint64_t m_dup; // Packets we got with same seq num
- CLastMax m_last_max; // maximum for last measurement period (reset whenever we read it).
};
class CRxCoreStateless {
@@ -147,7 +95,7 @@ class CRxCoreStateless {
};
public:
- void start();
+ void start(TrexWatchDog &watchdog);
void create(const CRxSlCfg &cfg);
void reset_rx_stats(uint8_t port_id);
int get_rx_stats(uint8_t port_id, rx_per_flow_t *rx_stats, int min, int max, bool reset
@@ -165,6 +113,7 @@ class CRxCoreStateless {
private:
void handle_cp_msg(TrexStatelessCpToRxMsgBase *msg);
bool periodic_check_for_cp_messages();
+ void tickle();
void idle_state_loop();
void handle_rx_pkt(CLatencyManagerPerPortStl * lp, rte_mbuf_t * m);
void handle_rx_queue_msgs(uint8_t thread_id, CNodeRing * r);
@@ -176,6 +125,10 @@ class CRxCoreStateless {
uint16_t get_hw_id(uint16_t id);
private:
+
+ TrexWatchDog *m_watchdog;
+ int m_watchdog_handle;
+
uint32_t m_max_ports;
bool m_has_streams;
CLatencyManagerPerPortStl m_ports[TREX_MAX_PORTS];
diff --git a/src/time_histogram.cpp b/src/time_histogram.cpp
index 8a92cb6f..fefa59d6 100755
--- a/src/time_histogram.cpp
+++ b/src/time_histogram.cpp
@@ -66,10 +66,6 @@ bool CTimeHistogram::Add(dsec_t dt) {
return false;
}
period_elem.inc_high_cnt();
-
- if ( m_max_dt < dt) {
- m_max_dt = dt;
- }
period_elem.update_max(dt);
uint32_t d_10usec = (uint32_t)(dt*100000.0);
@@ -118,6 +114,9 @@ void CTimeHistogram::update() {
update_average(period_elem);
m_total_cnt += period_elem.get_cnt();
m_total_cnt_high += period_elem.get_high_cnt();
+ if ( m_max_dt < period_elem.get_max()) {
+ m_max_dt = period_elem.get_max();
+ }
}
void CTimeHistogram::update_average(CTimeHistogramPerPeriodData &period_elem) {
@@ -180,11 +179,7 @@ void CTimeHistogram::Dump(FILE *fd) {
}
}
-/*
- { "histogram" : [ {} ,{} ] }
-
-*/
-
+// Used in statefull
void CTimeHistogram::dump_json(std::string name,std::string & json ) {
char buff[200];
if (name != "")
@@ -222,3 +217,31 @@ void CTimeHistogram::dump_json(std::string name,std::string & json ) {
}
json+=" ] } ,";
}
+
+// Used in stateless
+void CTimeHistogram::dump_json(Json::Value & json, bool add_histogram) {
+ int i, j;
+ uint32_t base=10;
+ CTimeHistogramPerPeriodData &period_elem = m_period_data[get_read_period_index()];
+
+ json["total_max"] = get_usec(m_max_dt);
+ json["last_max"] = get_usec(period_elem.get_max());
+ json["average"] = get_average_latency();
+
+ if (add_histogram) {
+ for (j = 0; j < HISTOGRAM_SIZE_LOG; j++) {
+ for (i = 0; i < HISTOGRAM_SIZE; i++) {
+ if (m_hcnt[j][i] > 0) {
+ std::string key = static_cast<std::ostringstream*>( &(std::ostringstream()
+ << int(base * (i + 1)) ) )->str();
+ json["histogram"][key] = Json::Value::UInt64(m_hcnt[j][i]);
+ }
+ }
+ base = base * 10;
+ }
+ if (m_total_cnt != m_total_cnt_high) {
+ json["histogram"]["0"] = Json::Value::UInt64(m_total_cnt - m_total_cnt_high);
+ }
+ }
+}
+
diff --git a/src/time_histogram.h b/src/time_histogram.h
index da70e677..0d532f1b 100755
--- a/src/time_histogram.h
+++ b/src/time_histogram.h
@@ -24,11 +24,12 @@ limitations under the License.
#include <stdint.h>
-#include "os_time.h"
#include <stdio.h>
#include <math.h>
-#include "mbuf.h"
#include <string>
+#include <json/json.h>
+#include "mbuf.h"
+#include "os_time.h"
class CTimeHistogramPerPeriodData {
public:
@@ -85,6 +86,7 @@ public:
return period_elem.get_max_usec();
}
void dump_json(std::string name,std::string & json );
+ void dump_json(Json::Value & json, bool add_histogram = true);
uint64_t get_count() {return m_total_cnt;}
uint64_t get_high_count() {return m_total_cnt_high;}
diff --git a/src/trex_watchdog.cpp b/src/trex_watchdog.cpp
new file mode 100644
index 00000000..e78e8e6d
--- /dev/null
+++ b/src/trex_watchdog.cpp
@@ -0,0 +1,331 @@
+/*
+ Itay Marom
+ Cisco Systems, Inc.
+*/
+
+/*
+Copyright (c) 2015-2015 Cisco Systems, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "trex_watchdog.h"
+#include "trex_exception.h"
+
+#include <assert.h>
+#include <unistd.h>
+#include <sstream>
+
+#include <sys/ptrace.h>
+#include <execinfo.h>
+#include <cxxabi.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <iostream>
+#include <stdexcept>
+
+#define DISABLE_WATCHDOG_ON_GDB
+
+static TrexWatchDog::monitor_st *global_monitor;
+
+const char *get_exe_name();
+
+std::string exec(const char* cmd) {
+ char buffer[128];
+ std::string result = "";
+ std::shared_ptr<FILE> pipe(popen(cmd, "r"), pclose);
+ if (!pipe) throw std::runtime_error("popen() failed!");
+ while (!feof(pipe.get())) {
+ if (fgets(buffer, 128, pipe.get()) != NULL) {
+ result += buffer;
+ }
+ }
+ return result;
+}
+
+// This function produces a stack backtrace with demangled function & method names.
+__attribute__((noinline))
+std::string Backtrace(int skip = 1)
+{
+ void *callstack[128];
+ const int nMaxFrames = sizeof(callstack) / sizeof(callstack[0]);
+ char buf[1024];
+ int nFrames = backtrace(callstack, nMaxFrames);
+ char **symbols = backtrace_symbols(callstack, nFrames);
+
+ std::ostringstream trace_buf;
+ for (int i = skip; i < nFrames; i++) {
+
+ Dl_info info;
+ if (dladdr(callstack[i], &info) && info.dli_sname) {
+ char *demangled = NULL;
+ int status = -1;
+ if (info.dli_sname[0] == '_')
+ demangled = abi::__cxa_demangle(info.dli_sname, NULL, 0, &status);
+ snprintf(buf, sizeof(buf), "%-3d %*p %s + %zd\n",
+ i, int(2 + sizeof(void*) * 2), callstack[i],
+ status == 0 ? demangled :
+ info.dli_sname == 0 ? symbols[i] : info.dli_sname,
+ (char *)callstack[i] - (char *)info.dli_saddr);
+ free(demangled);
+ } else {
+ snprintf(buf, sizeof(buf), "%-3d %*p %s\n",
+ i, int(2 + sizeof(void*) * 2), callstack[i], symbols[i]);
+ }
+ trace_buf << buf;
+ }
+ free(symbols);
+ if (nFrames == nMaxFrames)
+ trace_buf << "[truncated]\n";
+
+ /* add the addr2line info */
+ std::stringstream addr2line;
+
+ addr2line << "/usr/bin/addr2line -e " << get_exe_name() << " ";
+ for (int i = skip; i < nFrames; i++) {
+ addr2line << callstack[i] << " ";
+ }
+
+ trace_buf << "\n\n*** addr2line information follows ***\n\n";
+ try {
+ trace_buf << exec(addr2line.str().c_str());
+ } catch (std::runtime_error &e) {
+ trace_buf << "\n" << e.what();
+ }
+
+ return trace_buf.str();
+}
+
+__attribute__((noinline))
+static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret) {
+ std::stringstream ss;
+
+ double now = now_sec();
+
+ ss << "WATCHDOG: task '" << global_monitor->name << "' has not responded for more than " << (now - global_monitor->ts) << " seconds - timeout is " << global_monitor->timeout_sec << " seconds";
+
+ std::string backtrace = Backtrace();
+ ss << "\n\n*** traceback follows ***\n\n" << backtrace << "\n";
+
+ throw std::runtime_error(ss.str());
+}
+
+void TrexWatchDog::mark_pending_monitor(int count) {
+ std::unique_lock<std::mutex> lock(m_lock);
+ m_pending += count;
+ lock.unlock();
+}
+
+void TrexWatchDog::block_on_pending(int max_block_time_ms) {
+
+ int timeout_msec = max_block_time_ms;
+
+ std::unique_lock<std::mutex> lock(m_lock);
+
+ while (m_pending > 0) {
+
+ lock.unlock();
+ delay(1);
+ lock.lock();
+
+ timeout_msec -= 1;
+ if (timeout_msec == 0) {
+ throw TrexException("WATCHDOG: block on pending monitors timed out");
+ }
+ }
+
+ /* lock will be released */
+}
+
+/**
+ * register a monitor
+ * must be called from the relevant thread
+ *
+ * this function is thread safe
+ *
+ * @author imarom (01-Jun-16)
+ *
+ * @param name
+ * @param timeout_sec
+ *
+ * @return int
+ */
+int TrexWatchDog::register_monitor(const std::string &name, double timeout_sec) {
+ monitor_st monitor;
+
+ /* cannot add monitors while active */
+ assert(m_active == false);
+
+ monitor.active = true;
+ monitor.tid = pthread_self();
+ monitor.name = name;
+ monitor.timeout_sec = timeout_sec;
+ monitor.tickled = true;
+ monitor.ts = 0;
+
+ /* critical section start */
+ std::unique_lock<std::mutex> lock(m_lock);
+
+ /* make sure no double register */
+ for (auto &m : m_monitors) {
+ if (m.tid == pthread_self()) {
+ std::stringstream ss;
+ ss << "WATCHDOG: double register detected\n\n" << Backtrace();
+ throw TrexException(ss.str());
+ }
+ }
+
+ monitor.handle = m_monitors.size();
+ m_monitors.push_back(monitor);
+
+ assert(m_pending > 0);
+ m_pending--;
+
+ /* critical section end */
+ lock.unlock();
+
+ return monitor.handle;
+}
+
+/**
+ * will disable the monitor - it will no longer be watched
+ *
+ */
+void TrexWatchDog::disable_monitor(int handle) {
+ assert(handle < m_monitors.size());
+
+ m_monitors[handle].active = false;
+}
+
+/**
+ * thread safe function
+ *
+ */
+void TrexWatchDog::tickle(int handle) {
+
+ assert(handle < m_monitors.size());
+
+ /* not nesscary but write gets cache invalidate for nothing */
+ if (m_monitors[handle].tickled) {
+ return;
+ }
+
+ m_monitors[handle].tickled = true;
+}
+
+void TrexWatchDog::register_signal() {
+
+ /* do this once */
+ if (g_signal_init) {
+ return;
+ }
+
+ /* register a handler on SIG ALARM */
+ struct sigaction sa;
+ memset (&sa, '\0', sizeof(sa));
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = _callstack_signal_handler;
+
+ int rc = sigaction(SIGALRM , &sa, NULL);
+ assert(rc == 0);
+
+ g_signal_init = true;
+}
+
+void TrexWatchDog::start() {
+
+ block_on_pending();
+
+ /* no pending monitors */
+ assert(m_pending == 0);
+
+ /* under GDB - disable the watchdog */
+ #ifdef DISABLE_WATCHDOG_ON_GDB
+ if (ptrace(PTRACE_TRACEME, 0, NULL, 0) == -1) {
+ printf("\n\n*** GDB detected - disabling watchdog... ***\n\n");
+ return;
+ }
+ #endif
+
+ m_active = true;
+ m_thread = new std::thread(&TrexWatchDog::_main, this);
+ if (!m_thread) {
+ throw TrexException("unable to create watchdog thread");
+ }
+}
+
+void TrexWatchDog::stop() {
+ m_active = false;
+
+ if (m_thread) {
+ m_thread->join();
+ delete m_thread;
+ m_thread = NULL;
+ }
+}
+
+
+
+/**
+ * main loop
+ *
+ */
+void TrexWatchDog::_main() {
+
+ /* reset all the monitors */
+ for (auto &monitor : m_monitors) {
+ monitor.tickled = true;
+ }
+
+ /* start main loop */
+ while (m_active) {
+
+ dsec_t now = now_sec();
+
+ for (auto &monitor : m_monitors) {
+
+ /* skip non active monitors */
+ if (!monitor.active) {
+ continue;
+ }
+
+ /* if its own - turn it off and write down the time */
+ if (monitor.tickled) {
+ monitor.tickled = false;
+ monitor.ts = now;
+ continue;
+ }
+
+ /* the bit is off - check the time first */
+ if ( (now - monitor.ts) > monitor.timeout_sec ) {
+ global_monitor = &monitor;
+
+ pthread_kill(monitor.tid, SIGALRM);
+
+ /* nothing to do more... the other thread will terminate, but if not - we terminate */
+ sleep(5);
+ printf("\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor.name.c_str());
+ exit(1);
+ }
+
+ }
+
+ /* the internal clock - 250 ms */
+ delay(250);
+ }
+}
+
+bool TrexWatchDog::g_signal_init = false;
diff --git a/src/trex_watchdog.h b/src/trex_watchdog.h
new file mode 100644
index 00000000..63255180
--- /dev/null
+++ b/src/trex_watchdog.h
@@ -0,0 +1,141 @@
+/*
+ Itay Marom
+ Cisco Systems, Inc.
+*/
+
+/*
+Copyright (c) 2015-2015 Cisco Systems, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef __TREX_WATCHDOG_H__
+#define __TREX_WATCHDOG_H__
+
+#include <string>
+#include <vector>
+#include <thread>
+#include <mutex>
+
+//#include "rte_memory.h"
+#include "mbuf.h"
+#include "os_time.h"
+
+class TrexWatchDog {
+public:
+ TrexWatchDog() {
+ m_thread = NULL;
+ m_active = false;
+ m_pending = 0;
+
+ register_signal();
+ }
+
+ /**
+ * registering a monitor happens from another thread
+ * this make sure that start will be able to block until
+ * all threads has registered
+ *
+ * @author imarom (01-Jun-16)
+ */
+ void mark_pending_monitor(int count = 1);
+
+
+ /**
+ * blocks while monitors are pending registeration
+ *
+ * @author imarom (01-Jun-16)
+ */
+ void block_on_pending(int max_block_time_ms = 200);
+
+
+ /**
+ * add a monitor to the watchdog
+ * this thread will be monitored and if timeout
+ * has passed without calling tick - an exception will be called
+ *
+ * @author imarom (31-May-16)
+ *
+ * @param name
+ * @param timeout
+ *
+ * @return int
+ */
+ int register_monitor(const std::string &name, double timeout_sec);
+
+
+ /**
+ * disable a monitor - it will no longer be watched
+ *
+ */
+ void disable_monitor(int handle);
+
+
+ /**
+ * should be called by each thread on it's handle
+ *
+ * @author imarom (31-May-16)
+ *
+ * @param handle
+ */
+ void tickle(int handle);
+
+
+ /**
+ * start the watchdog
+ *
+ */
+ void start();
+
+
+ /**
+ * stop the watchdog
+ *
+ */
+ void stop();
+
+
+ /* should be cache aligned to avoid false sharing */
+ struct monitor_st {
+ /* write fields are first */
+ volatile bool active;
+ volatile bool tickled;
+ dsec_t ts;
+
+ int handle;
+ double timeout_sec;
+ pthread_t tid;
+ std::string name;
+
+ /* for for a full cacheline */
+ uint8_t pad[15];
+ };
+
+
+private:
+ void register_signal();
+ void _main();
+
+ std::vector<monitor_st> m_monitors __rte_cache_aligned;
+ std::mutex m_lock;
+
+ volatile bool m_active;
+ std::thread *m_thread;
+ volatile int m_pending;
+
+ static bool g_signal_init;
+};
+
+static_assert(sizeof(TrexWatchDog::monitor_st) >= RTE_CACHE_LINE_SIZE, "sizeof(monitor_st) != RTE_CACHE_LINE_SIZE" );
+
+#endif /* __TREX_WATCHDOG_H__ */