Merge branch 'master' into cpu_per_core

author: Yaroslav Brustinov <ybrustin@cisco.com> 2016-06-05 10:44:40 +0300
committer: Yaroslav Brustinov <ybrustin@cisco.com> 2016-06-05 10:44:40 +0300
commit: 6d5144308445cb02373056668e3df5d962dc424c (patch)
tree: 41be395e6a06a0cb1cfe1fce070c536c0687d8de
parent: e0ca7a226fd37023ff8a9d80e46403a34492c1d7 (diff)
parent: 0400a9503137f9179eb92f15d4e3b985ffad562e (diff)
35 files changed, 884 insertions, 291 deletions
diff --git a/README.asciidoc b/README.asciidoc
index d4adea4e..a4cc808d 100755
--- a/README.asciidoc
+++ b/README.asciidoc
@@ -82,13 +82,13 @@ TRex addresses these problems through an innovative and extendable software impl
 
 The following example shows three streams configured for Continuous, Burst, and Multi-burst traffic.
 
-image::http://trex-tgn.cisco.com/trex/doc/images/stl_streams_example_02.png[title="",align="center",width=600, link="http://trex-tgn.cisco.com/trex/doc/images/stl_streams_example_02.png"]
+image::https://trex-tgn.cisco.com/trex/doc/images/stl_streams_example_02.png[title="",align="center",width=600, link="http://trex-tgn.cisco.com/trex/doc/images/stl_streams_example_02.png"]
 
 A new JSON-RPC2 Architecture  provides support for interactive mode
 
-image::http://trex-tgn.cisco.com/trex/doc/images/trex_architecture_01.png[title="",align="center",width=600, link="http://trex-tgn.cisco.com/trex/doc/images/trex_architecture_01.png"]
+image::https://trex-tgn.cisco.com/trex/doc/images/trex_architecture_01.png[title="",align="center",width=600, link="http://trex-tgn.cisco.com/trex/doc/images/trex_architecture_01.png"]
 
-more info can be found here link:http://trex-tgn.cisco.com/trex/doc/index.html[Documentation]
+more info can be found here link:https://trex-tgn.cisco.com/trex/doc/index.html[Documentation]
 
 
 === What it�s not
@@ -123,7 +123,7 @@ link:http://www.slideshare.net/harryvanhaaren/trex-traffig-gen-hanoch-haim[DPDK
 
 link:http://www.youtube.com/watch?v=U0gRalB7DOs[Video DPDK summit 2015] 
 
-link:http://trex-tgn.cisco.com/trex/doc/trex_preso.html[Presentation] 
+link:https://trex-tgn.cisco.com/trex/doc/trex_preso.html[Presentation] 
 
 === Wiki
 
@@ -135,7 +135,7 @@ Report bug/request feature link:http://trex-tgn.cisco.com/youtrack/issues[YouTra
 
 === Windows Stateful Client GUI
 
-image::http://trex-tgn.cisco.com/trex/doc/images/TrexViewer.png[title="",align="center",width=200, link="http://trex-tgn.cisco.com/trex/doc/images/TrexViewer.png"]
+image::https://trex-tgn.cisco.com/trex/doc/images/TrexViewer.png[title="",align="center",width=200, link="http://trex-tgn.cisco.com/trex/doc/images/TrexViewer.png"]
 
 === Sandbox for evaluation
 
diff --git a/VERSION b/VERSION
index 8b5f9afb..f85ec405 100755
--- a/VERSION
+++ b/VERSION
@@ -1,4 +1,4 @@
-v2.02
+v2.03
 
 
 
diff --git a/linux/ws_main.py b/linux/ws_main.py
index 58f5b661..3aee05db 100755
--- a/linux/ws_main.py
+++ b/linux/ws_main.py
@@ -121,7 +121,7 @@ main_src = SrcGroup(dir='src',
              'latency.cpp',
              'flow_stat.cpp',
              'flow_stat_parser.cpp',
-            
+             'trex_watchdog.cpp',
              'pal/linux/pal_utl.cpp',
              'pal/linux/mbuf.cpp',
              'sim/trex_sim_stateless.cpp',
@@ -371,7 +371,7 @@ class build_option:
 
     def get_link_flags(self):
         # add here basic flags
-        base_flags = ['-pthread'];
+        base_flags = [];
         if self.isPIE():
             base_flags.append('-lstdc++')
 
@@ -410,7 +410,7 @@ def build_prog (bld, build_obj):
                 linkflags = build_obj.get_link_flags(),
                 source = build_obj.get_src(),
                 use = build_obj.get_use_libs(),
-                lib = ['z'],
+                lib = ['pthread', 'z', 'dl'],
                 rpath  = bld.env.RPATH + build_obj.get_rpath(),
                 target = build_obj.get_target())
 
diff --git a/linux_dpdk/ws_main.py b/linux_dpdk/ws_main.py
index 4c2d3c51..a13c8fd7 100755
--- a/linux_dpdk/ws_main.py
+++ b/linux_dpdk/ws_main.py
@@ -98,6 +98,7 @@ main_src = SrcGroup(dir='src',
              'utl_term_io.cpp',
              'global_io_mode.cpp',
              'main_dpdk.cpp',
+             'trex_watchdog.cpp',
              'debug.cpp',
              'flow_stat.cpp',
              'flow_stat_parser.cpp',
@@ -665,7 +666,7 @@ class build_option:
         return (flags)
 
     def get_link_flags(self):
-        base_flags = [];
+        base_flags = ['-rdynamic'];
         if self.is64Platform():
             base_flags += ['-m64'];
             base_flags += ['-lrt'];
diff --git a/scripts/automation/regression/setups/trex17/benchmark.yaml b/scripts/automation/regression/setups/trex17/benchmark.yaml
index c6f588e6..3e1f7c9b 100644
--- a/scripts/automation/regression/setups/trex17/benchmark.yaml
+++ b/scripts/automation/regression/setups/trex17/benchmark.yaml
@@ -23,7 +23,7 @@ test_routing_imix_64:
 
 
 test_static_routing_imix_asymmetric:
-    multiplier          : 0.8
+    multiplier          : 0.7
     cores               : 1
     bw_per_core         : 9.635
 
diff --git a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py
index 06009ac3..132d34ee 100644
--- a/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py
+++ b/scripts/automation/trex_control_plane/stl/trex_stl_lib/trex_stl_stats.py
@@ -1089,19 +1089,20 @@ class CLatencyStats(CTRexStats):
             output[int_pg_id]['err_cntrs'] = current_pg['err_cntrs']
             output[int_pg_id]['latency'] = {}
 
-            output[int_pg_id]['latency']['last_max'] = current_pg['latency']['last_max']
-            output[int_pg_id]['latency']['jitter'] = current_pg['latency']['jitter']
-            if current_pg['latency']['h'] != "":
-                output[int_pg_id]['latency']['average'] = current_pg['latency']['h']['s_avg']
-                output[int_pg_id]['latency']['total_max'] = current_pg['latency']['h']['max_usec']
-                output[int_pg_id]['latency']['histogram'] = {elem['key']: elem['val']
-                                                             for elem in current_pg['latency']['h']['histogram']}
-                zero_count = current_pg['latency']['h']['cnt'] - current_pg['latency']['h']['high_cnt']
-                if zero_count != 0:
-                    output[int_pg_id]['latency']['total_min'] = 1
-                    output[int_pg_id]['latency']['histogram'][0] = zero_count
-                elif output[int_pg_id]['latency']['histogram']:
-                    output[int_pg_id]['latency']['total_min'] = min(output[int_pg_id]['latency']['histogram'].keys())
+            if 'latency' in current_pg:
+                for field in ['jitter', 'average', 'total_max', 'last_max']:
+                    if field in current_pg['latency']:
+                        output[int_pg_id]['latency'][field] = current_pg['latency'][field]
+                    else:
+                        output[int_pg_id]['latency'][field] = StatNotAvailable(field)
+
+                if 'histogram' in current_pg['latency']:
+                    output[int_pg_id]['latency']['histogram'] = {int(elem): current_pg['latency']['histogram'][elem]
+                                                                 for elem in current_pg['latency']['histogram']}
+                    min_val = min(output[int_pg_id]['latency']['histogram'].keys())
+                    if min_val == 0:
+                        min_val = 2
+                    output[int_pg_id]['latency']['total_min'] = min_val
                 else:
                     output[int_pg_id]['latency']['total_min'] = StatNotAvailable('total_min')
 
diff --git a/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py b/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py
index 32f0a2d1..729ab3fd 100644
--- a/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py
+++ b/scripts/external_libs/scapy-2.3.1/python2/scapy/arch/linux.py
@@ -69,13 +69,14 @@ RTF_REJECT = 0x0200
 
 LOOPBACK_NAME="lo"
 
-with os.popen("tcpdump -V 2> /dev/null") as _f:
-    if _f.close() >> 8 == 0x7f:
-        log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH")
-        TCPDUMP=0
-    else:
-        TCPDUMP=1
-del(_f)
+#with os.popen("tcpdump -V 2> /dev/null") as _f:
+#    if _f.close() >> 8 == 0x7f:
+#        log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH")
+#        TCPDUMP=0
+#    else:
+#        TCPDUMP=1
+#del(_f)
+TCPDUMP=0
     
 
 def get_if_raw_hwaddr(iff):
diff --git a/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py b/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py
index 3eab16c6..40ff9e35 100644
--- a/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py
+++ b/scripts/external_libs/scapy-2.3.1/python3/scapy/arch/linux.py
@@ -71,13 +71,14 @@ PCAP_ERRBUF_SIZE=256
 
 LOOPBACK_NAME="lo"
 
-with os.popen("tcpdump -V 2> /dev/null") as _f:
-    if _f.close() >> 8 == 0x7f:
-        log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH")
-        TCPDUMP=0
-    else:
-        TCPDUMP=1
-del(_f)
+#with os.popen("tcpdump -V 2> /dev/null") as _f:
+#    if _f.close() >> 8 == 0x7f:
+#        log_loading.warning("Failed to execute tcpdump. Check it is installed and in the PATH")
+#        TCPDUMP=0
+#    else:
+#        TCPDUMP=1
+#del(_f)
+TCPDUMP=0
 
 
 def get_if_raw_hwaddr(iff):
diff --git a/scripts/ko/src/igb_uio.c b/scripts/ko/src/igb_uio.c
index faeb0b68..27bec6a3 100755
--- a/scripts/ko/src/igb_uio.c
+++ b/scripts/ko/src/igb_uio.c
@@ -31,6 +31,7 @@
 #include <linux/io.h>
 #include <linux/msi.h>
 #include <linux/version.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_XEN_DOM0
 #include <xen/xen.h>
@@ -39,15 +40,6 @@
 
 #include "compat.h"
 
-#ifdef RTE_PCI_CONFIG
-#define PCI_SYS_FILE_BUF_SIZE      10
-#define PCI_DEV_CAP_REG            0xA4
-#define PCI_DEV_CTRL_REG           0xA8
-#define PCI_DEV_CAP_EXT_TAG_MASK   0x20
-#define PCI_DEV_CTRL_EXT_TAG_SHIFT 8
-#define PCI_DEV_CTRL_EXT_TAG_MASK  (1 << PCI_DEV_CTRL_EXT_TAG_SHIFT)
-#endif
-
 /**
  * A structure describing the private information for a uio device.
  */
@@ -57,22 +49,15 @@ struct rte_uio_pci_dev {
 	enum rte_intr_mode mode;
 };
 
-static char *intr_mode = NULL;
+static char *intr_mode;
 static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX;
 
-static inline struct rte_uio_pci_dev *
-igbuio_get_uio_pci_dev(struct uio_info *info)
-{
-	return container_of(info, struct rte_uio_pci_dev, info);
-}
-
 /* sriov sysfs */
 static ssize_t
 show_max_vfs(struct device *dev, struct device_attribute *attr,
 	     char *buf)
 {
-	return snprintf(buf, 10, "%u\n",
-			pci_num_vf(container_of(dev, struct pci_dev, dev)));
+	return snprintf(buf, 10, "%u\n", dev_num_vf(dev));
 }
 
 static ssize_t
@@ -81,7 +66,7 @@ store_max_vfs(struct device *dev, struct device_attribute *attr,
 {
 	int err = 0;
 	unsigned long max_vfs;
-	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+	struct pci_dev *pdev = to_pci_dev(dev);
 
 	if (0 != kstrtoul(buf, 0, &max_vfs))
 		return -EINVAL;
@@ -100,19 +85,9 @@ store_max_vfs(struct device *dev, struct device_attribute *attr,
 static ssize_t
 show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev);
-	uint32_t val = 0;
-
-	pci_read_config_dword(pci_dev, PCI_DEV_CAP_REG, &val);
-	if (!(val & PCI_DEV_CAP_EXT_TAG_MASK)) /* Not supported */
-		return snprintf(buf, PCI_SYS_FILE_BUF_SIZE, "%s\n", "invalid");
-
-	val = 0;
-	pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn,
-					PCI_DEV_CTRL_REG, &val);
+	dev_info(dev, "Deprecated\n");
 
-	return snprintf(buf, PCI_SYS_FILE_BUF_SIZE, "%s\n",
-		(val & PCI_DEV_CTRL_EXT_TAG_MASK) ? "on" : "off");
+	return 0;
 }
 
 static ssize_t
@@ -121,36 +96,9 @@ store_extended_tag(struct device *dev,
 		   const char *buf,
 		   size_t count)
 {
-	struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev);
-	uint32_t val = 0, enable;
-
-	if (strncmp(buf, "on", 2) == 0)
-		enable = 1;
-	else if (strncmp(buf, "off", 3) == 0)
-		enable = 0;
-	else
-		return -EINVAL;
-
-	pci_cfg_access_lock(pci_dev);
-	pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn,
-					PCI_DEV_CAP_REG, &val);
-	if (!(val & PCI_DEV_CAP_EXT_TAG_MASK)) { /* Not supported */
-		pci_cfg_access_unlock(pci_dev);
-		return -EPERM;
-	}
-
-	val = 0;
-	pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn,
-					PCI_DEV_CTRL_REG, &val);
-	if (enable)
-		val |= PCI_DEV_CTRL_EXT_TAG_MASK;
-	else
-		val &= ~PCI_DEV_CTRL_EXT_TAG_MASK;
-	pci_bus_write_config_dword(pci_dev->bus, pci_dev->devfn,
-					PCI_DEV_CTRL_REG, val);
-	pci_cfg_access_unlock(pci_dev);
+	dev_info(dev, "Deprecated\n");
 
-	return count;
+	return 0;
 }
 
 static ssize_t
@@ -158,10 +106,9 @@ show_max_read_request_size(struct device *dev,
 			   struct device_attribute *attr,
 			   char *buf)
 {
-	struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev);
-	int val = pcie_get_readrq(pci_dev);
+	dev_info(dev, "Deprecated\n");
 
-	return snprintf(buf, PCI_SYS_FILE_BUF_SIZE, "%d\n", val);
+	return 0;
 }
 
 static ssize_t
@@ -170,18 +117,9 @@ store_max_read_request_size(struct device *dev,
 			    const char *buf,
 			    size_t count)
 {
-	struct pci_dev *pci_dev = container_of(dev, struct pci_dev, dev);
-	unsigned long size = 0;
-	int ret;
-
-	if (0 != kstrtoul(buf, 0, &size))
-		return -EINVAL;
-
-	ret = pcie_set_readrq(pci_dev, (int)size);
-	if (ret < 0)
-		return ret;
+	dev_info(dev, "Deprecated\n");
 
-	return count;
+	return 0;
 }
 #endif
 
@@ -243,7 +181,7 @@ igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
 static int
 igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
 {
-	struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info);
+	struct rte_uio_pci_dev *udev = info->priv;
 	struct pci_dev *pdev = udev->pdev;
 
 	pci_cfg_access_lock(pdev);
@@ -253,8 +191,13 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
 	else if (udev->mode == RTE_INTR_MODE_MSIX) {
 		struct msi_desc *desc;
 
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0))
 		list_for_each_entry(desc, &pdev->msi_list, list)
 			igbuio_msix_mask_irq(desc, irq_state);
+#else
+		list_for_each_entry(desc, &pdev->dev.msi_list, list)
+			igbuio_msix_mask_irq(desc, irq_state);
+#endif
 	}
 	pci_cfg_access_unlock(pdev);
 
@@ -268,7 +211,7 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
 static irqreturn_t
 igbuio_pci_irqhandler(int irq, struct uio_info *info)
 {
-	struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info);
+	struct rte_uio_pci_dev *udev = info->priv;
 
 	/* Legacy mode need to mask in hardware */
 	if (udev->mode == RTE_INTR_MODE_LEGACY &&
@@ -333,7 +276,7 @@ igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
 	unsigned long addr, len;
 	void *internal_addr;
 
-	if (sizeof(info->mem) / sizeof(info->mem[0]) <= n)
+	if (n >= ARRAY_SIZE(info->mem))
 		return -EINVAL;
 
 	addr = pci_resource_start(dev, pci_bar);
@@ -358,7 +301,7 @@ igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
 {
 	unsigned long addr, len;
 
-	if (sizeof(info->port) / sizeof(info->port[0]) <= n)
+	if (n >= ARRAY_SIZE(info->port))
 		return -EINVAL;
 
 	addr = pci_resource_start(dev, pci_bar);
@@ -403,7 +346,7 @@ igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
 	iom = 0;
 	iop = 0;
 
-	for (i = 0; i != sizeof(bar_names) / sizeof(bar_names[0]); i++) {
+	for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
 		if (pci_resource_len(dev, i) != 0 &&
 				pci_resource_start(dev, i) != 0) {
 			flags = pci_resource_flags(dev, i);
@@ -562,23 +505,17 @@ fail_free:
 static void
 igbuio_pci_remove(struct pci_dev *dev)
 {
-	struct uio_info *info = pci_get_drvdata(dev);
-	struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info);
-
-	if (info->priv == NULL) {
-		pr_notice("Not igbuio device\n");
-		return;
-	}
+	struct rte_uio_pci_dev *udev = pci_get_drvdata(dev);
 
 	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
-	uio_unregister_device(info);
-	igbuio_pci_release_iomem(info);
+	uio_unregister_device(&udev->info);
+	igbuio_pci_release_iomem(&udev->info);
 	if (udev->mode == RTE_INTR_MODE_MSIX)
 		pci_disable_msix(dev);
 	pci_release_regions(dev);
 	pci_disable_device(dev);
 	pci_set_drvdata(dev, NULL);
-	kfree(info);
+	kfree(udev);
 }
 
 static int
diff --git a/scripts/stl/flow_stats_latency.py b/scripts/stl/flow_stats_latency.py
index 334406e5..e1541272 100644
--- a/scripts/stl/flow_stats_latency.py
+++ b/scripts/stl/flow_stats_latency.py
@@ -9,11 +9,11 @@ class STLS1(object):
     def get_streams (self, direction = 0, **kwargs):
         return [STLStream(packet = STLPktBuilder(pkt = os.path.join(CP, "yaml/udp_64B_no_crc.pcap")), # path relative to pwd 
                           mode = STLTXCont(pps=1000),
-                          flow_stats = STLFlowLatencyStats(pg_id = 7)),
+                          flow_stats = STLFlowLatencyStats(pg_id = 1 + kwargs['port_id'])),
 
                 STLStream(packet = STLPktBuilder(pkt = os.path.join(CP, "yaml/udp_594B_no_crc.pcap")), # path relative to pwd 
                           mode = STLTXCont(pps=5000),
-                          flow_stats = STLFlowLatencyStats(pg_id = 12))
+                          flow_stats = STLFlowLatencyStats(pg_id = 50 + kwargs['port_id']))
                ]
 
 
diff --git a/src/bp_gtest.cpp b/src/bp_gtest.cpp
index 7bb3da0c..b36ac6e1 100755
--- a/src/bp_gtest.cpp
+++ b/src/bp_gtest.cpp
@@ -897,7 +897,7 @@ TEST_F(basic, latency3) {
 
     EXPECT_EQ_UINT32(mg.is_active()?1:0, (uint32_t)0)<< "pass";
 
-    mg.start(8);
+    mg.start(8, NULL);
     mg.stop();
     mg.Dump(stdout);
     mg.DumpShort(stdout);
@@ -2657,19 +2657,17 @@ public:
 TEST_F(time_histogram, test_average) {
     int i;
     int j;
-    // Latency is calculated by low pass filter, so need to give it time to stabilize
-    for (j=0; j < 13; j++) {
-        for (i=0; i<100; i++) {
-            m_hist.Add(10e-6);
-        }
-        for (i=0; i<100; i++) {
-            m_hist.Add(10e-3);
+    for (j = 0; j < 10; j++) {
+        for (i = 0; i <= 2000; i++) {
+            m_hist.Add(10e-7 * i);
         }
         m_hist.update();
+        // Latency is calculated using low pass filter, with initial value of 0
+        EXPECT_EQ(m_hist.get_average_latency(), 1000.0 - (1000.0 / (2 << j)));
+        EXPECT_EQ(m_hist.get_count(), 2001 * (j+1));
+        EXPECT_EQ(m_hist.get_high_count(), 2001 * (j+1) - (11 * (j+1)));
+        EXPECT_EQ(m_hist.get_max_latency(), 2000);
     }
-
-    EXPECT_GT(m_hist.get_average_latency(), 5004);
-    EXPECT_LT(m_hist.get_average_latency(), 5005);
     
     m_hist.Dump(stdout);
 }
diff --git a/src/bp_sim.cpp b/src/bp_sim.cpp
index cb70c572..dc41c8f2 100755
--- a/src/bp_sim.cpp
+++ b/src/bp_sim.cpp
@@ -24,6 +24,8 @@ limitations under the License.
 #include "utl_json.h"
 #include "utl_yaml.h"
 #include "msg_manager.h"
+#include "trex_watchdog.h"
+
 #include <common/basic_utils.h>
 
 #include <trex_stream_node.h>
@@ -3299,6 +3301,9 @@ bool CFlowGenListPerThread::Create(uint32_t           thread_id,
     m_max_threads=max_threads;
     m_thread_id=thread_id;
 
+    m_watchdog        = NULL;
+    m_watchdog_handle = -1;
+
     m_cpu_cp_u.Create(&m_cpu_dp_u);
 
     uint32_t socket_id=rte_lcore_to_socket_id(m_core_id);
@@ -3874,6 +3879,10 @@ void CNodeGenerator::handle_flow_pkt(CGenNode *node, CFlowGenListPerThread *thre
 }
 
 void CNodeGenerator::handle_flow_sync(CGenNode *node, CFlowGenListPerThread *thread, bool &exit_scheduler) {
+
+    /* tickle the watchdog */
+    thread->tickle();
+
     /* flow sync message is a sync point for time */
     thread->m_cur_time_sec = node->m_time;
 
diff --git a/src/bp_sim.h b/src/bp_sim.h
index 5d25506d..136381f9 100755
--- a/src/bp_sim.h
+++ b/src/bp_sim.h
@@ -58,6 +58,7 @@ limitations under the License.
 #include <arpa/inet.h>
 #include "platform_cfg.h"
 #include "flow_stat.h"
+#include "trex_watchdog.h"
 
 #include <trex_stateless_dp_core.h>
 
@@ -3635,6 +3636,12 @@ public:
         m_node_gen.m_v_if->flush_tx_queue();
     }
 
+    void tickle() {
+        if (m_watchdog) {
+            m_watchdog->tickle(m_watchdog_handle);
+        }
+    }
+
     /* return the dual port ID this thread is attached to in 4 ports configuration
        there are 2 dual-ports
 
@@ -3757,6 +3764,8 @@ public:
 
     CTupleGeneratorSmart             m_smart_gen;
 
+    TrexWatchDog                    *m_watchdog;
+    int                              m_watchdog_handle;
 
 public:
     CNodeGenerator                   m_node_gen;
diff --git a/src/flow_stat.cpp b/src/flow_stat.cpp
index 2385e03f..98b9494b 100644
--- a/src/flow_stat.cpp
+++ b/src/flow_stat.cpp
@@ -464,6 +464,8 @@ CFlowStatRuleMgr::CFlowStatRuleMgr() {
     m_rx_core = NULL;
     m_hw_id_map.create(MAX_FLOW_STATS);
     m_hw_id_map_payload.create(MAX_FLOW_STATS_PAYLOAD);
+    memset(m_rx_cant_count_err, 0, sizeof(m_rx_cant_count_err));
+    memset(m_tx_cant_count_err, 0, sizeof(m_tx_cant_count_err));
 }
 
 CFlowStatRuleMgr::~CFlowStatRuleMgr() {
@@ -959,6 +961,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
                         p_user_id->set_need_to_send_rx(port);
                     }
                 } else {
+                    m_rx_cant_count_err[port] += rx_pkts.get_pkts();
                     std::cerr <<  __METHOD_NAME__ << i << ":Could not count " << rx_pkts << " rx packets, on port "
                               << (uint16_t)port << ", because no mapping was found." << std::endl;
                 }
@@ -972,6 +975,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
                         p_user_id->set_need_to_send_tx(port);
                     }
                 } else {
+                    m_tx_cant_count_err[port] += tx_pkts.get_pkts();;
                     std::cerr <<  __METHOD_NAME__ << i << ":Could not count " << tx_pkts <<  " tx packets on port "
                               << (uint16_t)port << ", because no mapping was found." << std::endl;
                 }
@@ -990,6 +994,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
                         p_user_id->set_need_to_send_rx(port);
                     }
                 } else {
+                    m_rx_cant_count_err[port] += rx_pkts.get_pkts();;
                     std::cerr <<  __METHOD_NAME__ << i << ":Could not count " << rx_pkts << " rx payload packets, on port "
                               << (uint16_t)port << ", because no mapping was found." << std::endl;
                 }
@@ -1003,6 +1008,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
                         p_user_id->set_need_to_send_tx(port);
                     }
                 } else {
+                    m_tx_cant_count_err[port] += tx_pkts.get_pkts();;
                     std::cerr <<  __METHOD_NAME__ << i << ":Could not count " << tx_pkts <<  " tx packets on port "
                               << (uint16_t)port << ", because no mapping was found." << std::endl;
                 }
@@ -1011,6 +1017,15 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
     }
 
     // build json report
+    // general per port data
+    for (uint8_t port = 0; port < m_num_ports; port++) {
+            std::string str_port = static_cast<std::ostringstream*>( &(std::ostringstream() << int(port) ) )->str();
+            if (m_rx_cant_count_err[port] != 0)
+                s_data_section["port_data"][str_port]["rx_err"] = m_rx_cant_count_err[port];
+            if (m_tx_cant_count_err[port] != 0)
+                s_data_section["port_data"][str_port]["tx_err"] = m_tx_cant_count_err[port];
+    }
+
     flow_stat_user_id_map_it_t it;
     for (it = m_user_id_map.begin(); it != m_user_id_map.end(); it++) {
         bool send_empty = true;
@@ -1022,6 +1037,7 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
             user_id_info->set_was_sent(true);
             send_empty = false;
         }
+        // flow stat json
         for (uint8_t port = 0; port < m_num_ports; port++) {
             std::string str_port = static_cast<std::ostringstream*>( &(std::ostringstream() << int(port) ) )->str();
             if (user_id_info->need_to_send_rx(port) || baseline) {
@@ -1042,10 +1058,11 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
             s_data_section[str_user_id] = Json::objectValue;
         }
 
+        // latency info json
         if (user_id_info->rfc2544_support()) {
             CFlowStatUserIdInfoPayload *user_id_info_p = (CFlowStatUserIdInfoPayload *)user_id_info;
             // payload object. Send also latency, jitter...
-            Json::Value lat_hist;
+            Json::Value lat_hist = Json::arrayValue;
             if (user_id_info->is_hw_id()) {
                 // if mapped to hw_id, take info from what we just got from rx core
                 uint16_t hw_id = user_id_info->get_hw_id();
@@ -1055,17 +1072,16 @@ bool CFlowStatRuleMgr::dump_json(std::string & s_json, std::string & l_json, boo
                 user_id_info_p->set_dup_cnt(rfc2544_info[hw_id].get_dup_cnt());
                 user_id_info_p->set_seq_err_big_cnt(rfc2544_info[hw_id].get_seq_err_ev_big());
                 user_id_info_p->set_seq_err_low_cnt(rfc2544_info[hw_id].get_seq_err_ev_low());
-                l_data_section[str_user_id]["latency"]["h"] = lat_hist;
-                l_data_section[str_user_id]["latency"]["last_max"] = rfc2544_info[hw_id].get_last_max_usec();
+                l_data_section[str_user_id]["latency"] = lat_hist;
                 l_data_section[str_user_id]["latency"]["jitter"] = rfc2544_info[hw_id].get_jitter_usec();
             } else {
                 // Not mapped to hw_id. Get saved info.
                 user_id_info_p->get_latency_json(lat_hist);
-                l_data_section[str_user_id]["latency"]["h"] = lat_hist;
-                l_data_section[str_user_id]["latency"]["last_max"] = 0;
-                l_data_section[str_user_id]["latency"]["jitter"] = user_id_info_p->get_jitter_usec();
+                if (lat_hist != Json::nullValue) {
+                    l_data_section[str_user_id]["latency"] = lat_hist;
+                    l_data_section[str_user_id]["latency"]["jitter"] = user_id_info_p->get_jitter_usec();
+                }
             }
-            //todo: add last 10 samples
             l_data_section[str_user_id]["err_cntrs"]["dropped"]
                 = Json::Value::UInt64(user_id_info_p->get_seq_err_cnt());
             l_data_section[str_user_id]["err_cntrs"]["out_of_order"]
diff --git a/src/flow_stat.h b/src/flow_stat.h
index 3387c2d3..8671b228 100644
--- a/src/flow_stat.h
+++ b/src/flow_stat.h
@@ -24,11 +24,12 @@
 #include <stdio.h>
 #include <string>
 #include <map>
+#include <json/json.h>
 #include "trex_defs.h"
 #include "trex_exception.h"
 #include "trex_stream.h"
 #include "msg_manager.h"
-#include <internal_api/trex_platform_api.h>
+#include "internal_api/trex_platform_api.h"
 
 // range reserved for rx stat measurement is from IP_ID_RESERVE_BASE to 0xffff
 // Do not change this value. In i350 cards, we filter according to first byte of IP ID
@@ -132,7 +133,7 @@ class rfc2544_info_t_ {
         m_seq_err_ev_big = 0;
         m_seq_err_ev_low = 0;
         m_jitter = 0;
-        m_latency = Json::Value("");
+        m_latency = Json::nullValue;
         m_last_max_latency = 0;
     }
 
@@ -332,7 +333,7 @@ class CFlowStatUserIdInfoPayload : public CFlowStatUserIdInfo {
         json = m_rfc2544_info.m_latency;
     }
 
-    inline void set_latency_json(Json::Value json) {
+    inline void set_latency_json(const Json::Value &json) {
         m_rfc2544_info.m_latency = json;
     }
 
@@ -474,7 +475,9 @@ class CFlowStatRuleMgr {
     uint32_t m_num_started_streams; // How many started (transmitting) streams we have
     CNodeRing *m_ring_to_rx; // handle for sending messages to Rx core
     CFlowStatParser *m_parser;
-    uint16_t m_cap;
+    uint16_t m_cap; // capabilities of the NIC driver we are using
+    uint32_t m_rx_cant_count_err[TREX_MAX_PORTS];
+    uint32_t m_tx_cant_count_err[TREX_MAX_PORTS];
 };
 
 #endif
diff --git a/src/latency.cpp b/src/latency.cpp
index a7652bed..acbe26d4 100644
--- a/src/latency.cpp
+++ b/src/latency.cpp
@@ -22,6 +22,8 @@ limitations under the License.
 #include "latency.h"
 #include "bp_sim.h"
 #include "utl_json.h"
+#include "trex_watchdog.h"
+
 #include <common/basic_utils.h> 
 
 const uint8_t sctp_pkt[]={ 
@@ -562,6 +564,10 @@ bool CLatencyManager::Create(CLatencyManagerCfg * cfg){
     if ( CGlobalInfo::is_learn_mode() ){
         m_nat_check_manager.Create();
     }
+
+    m_watchdog        = NULL;
+    m_watchdog_handle = -1;
+
     return (true);
 }
 
@@ -711,7 +717,13 @@ void  CLatencyManager::reset(){
 
 }
 
-void  CLatencyManager::start(int iter) {
+void CLatencyManager::tickle() {
+    if (m_watchdog) {
+        m_watchdog->tickle(m_watchdog_handle);
+    }
+}
+
+void  CLatencyManager::start(int iter, TrexWatchDog *watchdog) {
     m_do_stop =false;
     m_is_active =false;
     int cnt=0;
@@ -728,6 +740,10 @@ void  CLatencyManager::start(int iter) {
     m_p_queue.push(node);
     bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable()?true:false;
 
+    if (watchdog) {
+        m_watchdog        = watchdog;
+        m_watchdog_handle = watchdog->register_monitor("STF RX CORE", 1);
+    }
 
     while (  !m_p_queue.empty() ) {
         node = m_p_queue.top();
@@ -748,6 +764,9 @@ void  CLatencyManager::start(int iter) {
 
         switch (node->m_type) {
         case CGenNode::FLOW_SYNC:
+
+            tickle();
+
             if ( CGlobalInfo::is_learn_mode() ) {
                 m_nat_check_manager.handle_aging();
             }
@@ -792,6 +811,11 @@ void  CLatencyManager::start(int iter) {
         m_rx_check_manager.tw_drain();
     }
 
+    /* disable the monitor */
+    if (m_watchdog) {
+        m_watchdog->disable_monitor(m_watchdog_handle);
+    }
+
 }
 
 void  CLatencyManager::stop(){
diff --git a/src/latency.h b/src/latency.h
index eef7146a..2b74f737 100644
--- a/src/latency.h
+++ b/src/latency.h
@@ -28,6 +28,8 @@ limitations under the License.
 #define L_PKT_SUBMODE_REPLY 2
 #define L_PKT_SUBMODE_0_SEQ 3
 
+class TrexWatchDog;
+
 class CLatencyPktInfo {
 public:
     void Create(class CLatencyPktMode *m_l_pkt_info);
@@ -337,7 +339,7 @@ public:
     bool Create(CLatencyManagerCfg * cfg);
     void Delete();
     void  reset();
-    void  start(int iter);
+    void  start(int iter, TrexWatchDog *watchdog);
     void  stop();
     bool  is_active();
     void set_ip(uint32_t client_ip,
@@ -374,6 +376,7 @@ public:
     CLatencyPktMode *c_l_pkt_mode;
 
 private:
+    void  tickle();
     void  send_pkt_all_ports();
     void  try_rx();
     void  try_rx_queues();
@@ -400,6 +403,9 @@ private:
      CNatRxManager           m_nat_check_manager;
      CCpuUtlDp               m_cpu_dp_u;
      CCpuUtlCp               m_cpu_cp_u;
+     TrexWatchDog            *m_watchdog;
+     int                     m_watchdog_handle;
+
      volatile bool           m_do_stop __rte_cache_aligned ;
 };
 
diff --git a/src/main.cpp b/src/main.cpp
index 701a65d2..62eee880 100755
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -83,6 +83,11 @@ static CSimpleOpt::SOption parser_options[] =
 };
 
 static TrexStateless *m_sim_statelss_obj;
+static char *g_exe_name;
+
+const char *get_exe_name() {
+    return g_exe_name;
+}
 
 static int usage(){
 
@@ -261,8 +266,8 @@ void set_stateless_obj(TrexStateless *obj) {
     m_sim_statelss_obj = obj;
 }
 
-
 int main(int argc , char * argv[]){
+    g_exe_name = argv[0];
 
     std::unordered_map<std::string, int> params;
 
diff --git a/src/main_dpdk.cpp b/src/main_dpdk.cpp
index 75da27ca..f0c9c7e1 100644
--- a/src/main_dpdk.cpp
+++ b/src/main_dpdk.cpp
@@ -73,6 +73,7 @@ extern "C" {
 #include "debug.h"
 #include "internal_api/trex_platform_api.h"
 #include "main_dpdk.h"
+#include "trex_watchdog.h"
 
 #define RX_CHECK_MIX_SAMPLE_RATE 8
 #define RX_CHECK_MIX_SAMPLE_RATE_1G 2
@@ -80,7 +81,6 @@ extern "C" {
 
 #define SOCKET0         0
 
-#define BP_MAX_PKT      32
 #define MAX_PKT_BURST   32
 
 #define BP_MAX_CORES 32
@@ -1698,7 +1698,8 @@ public:
         }
         m_port=0;
     }
-    uint16_t                m_tx_queue_id;
+    uint8_t                 m_tx_queue_id;
+    uint8_t                 m_tx_queue_id_lat; // q id for tx of latency pkts
     uint16_t                m_len;
     rte_mbuf_t *            m_table[MAX_PKT_BURST];
     CPhyEthIF  *            m_port;
@@ -1710,6 +1711,10 @@ public:
 
 /* per core/gbe queue port for trasmitt */
 class CCoreEthIF : public CVirtualIF {
+public:
+    enum {
+     INVALID_Q_ID = 255
+    };
 
 public:
 
@@ -1717,13 +1722,12 @@ public:
         m_mbuf_cache=0;
     }
 
-public:
     bool Create(uint8_t             core_id,
-                uint16_t            tx_client_queue_id,
+                uint8_t            tx_client_queue_id,
                 CPhyEthIF  *        tx_client_port,
-
-                uint16_t            tx_server_queue_id,
-                CPhyEthIF  *        tx_server_port);
+                uint8_t            tx_server_queue_id,
+                CPhyEthIF  *        tx_server_port,
+                uint8_t             tx_q_id_lat);
     void Delete();
 
     virtual int open_file(std::string file_name){
@@ -1749,8 +1753,6 @@ public:
     virtual int update_mac_addr_from_global_cfg(pkt_dir_t       dir, uint8_t * p);
 
     virtual pkt_dir_t port_id_to_dir(uint8_t port_id);
-
-public:
     void GetCoreCounters(CVirtualIFPerSideStats *stats);
     void DumpCoreStats(FILE *fd);
     void DumpIfStats(FILE *fd);
@@ -1773,8 +1775,9 @@ protected:
     int send_pkt(CCorePerPort * lp_port,
                  rte_mbuf_t *m,
                  CVirtualIFPerSideStats  * lp_stats);
-
-
+    int send_pkt_lat(CCorePerPort * lp_port,
+                 rte_mbuf_t *m,
+                 CVirtualIFPerSideStats  * lp_stats);
 
 protected:
     uint8_t      m_core_id;
@@ -1795,16 +1798,17 @@ protected:
 };
 
 bool CCoreEthIF::Create(uint8_t             core_id,
-                        uint16_t            tx_client_queue_id,
+                        uint8_t             tx_client_queue_id,
                         CPhyEthIF  *        tx_client_port,
-
-                        uint16_t            tx_server_queue_id,
-                        CPhyEthIF  *        tx_server_port){
+                        uint8_t             tx_server_queue_id,
+                        CPhyEthIF  *        tx_server_port,
+                        uint8_t tx_q_id_lat ) {
     m_ports[CLIENT_SIDE].m_tx_queue_id = tx_client_queue_id;
     m_ports[CLIENT_SIDE].m_port        = tx_client_port;
-
+    m_ports[CLIENT_SIDE].m_tx_queue_id_lat = tx_q_id_lat;
     m_ports[SERVER_SIDE].m_tx_queue_id = tx_server_queue_id;
     m_ports[SERVER_SIDE].m_port        = tx_server_port;
+    m_ports[SERVER_SIDE].m_tx_queue_id_lat = tx_q_id_lat;
     m_core_id = core_id;
 
     CMessagingManager * rx_dp=CMsgIns::Ins()->getRxDp();
@@ -1892,16 +1896,17 @@ void CCoreEthIF::DumpCoreStats(FILE *fd){
 }
 
 void CCoreEthIF::DumpIfCfgHeader(FILE *fd){
-    fprintf (fd," core ,  c-port, c-queue , s-port, s-queue \n");
+    fprintf (fd," core, c-port, c-queue, s-port, s-queue, lat-queue\n");
     fprintf (fd," ------------------------------------------\n");
 }
 
 void CCoreEthIF::DumpIfCfg(FILE *fd){
-    fprintf (fd," %d,   %u , %u , %u , %u  \n",m_core_id,
+    fprintf (fd," %d   %6u %6u  %6u  %6u %6u  \n",m_core_id,
              m_ports[CLIENT_SIDE].m_port->get_port_id(),
              m_ports[CLIENT_SIDE].m_tx_queue_id,
              m_ports[SERVER_SIDE].m_port->get_port_id(),
-             m_ports[SERVER_SIDE].m_tx_queue_id
+             m_ports[SERVER_SIDE].m_tx_queue_id,
+             m_ports[SERVER_SIDE].m_tx_queue_id_lat
              );
 }
 
@@ -1933,17 +1938,14 @@ int CCoreEthIF::send_burst(CCorePerPort * lp_port,
 #ifdef DELAY_IF_NEEDED
     while ( unlikely( ret<len ) ){
         rte_delay_us(1);
-        //rte_pause();
-        //rte_pause();
         lp_stats->m_tx_queue_full += 1;
         uint16_t ret1=lp_port->m_port->tx_burst(lp_port->m_tx_queue_id,
                                                 &lp_port->m_table[ret],
                                                 len-ret);
         ret+=ret1;
     }
-#endif
-
-    /* CPU has burst of packets , more that TX can send need to drop them !!*/
+#else
+    /* CPU has burst of packets larger than TX can send. Need to drop packets */
     if ( unlikely(ret < len) ) {
         lp_stats->m_tx_drop += (len-ret);
         uint16_t i;
@@ -1952,6 +1954,7 @@ int CCoreEthIF::send_burst(CCorePerPort * lp_port,
             rte_pktmbuf_free(m);
         }
     }
+#endif
 
     return (0);
 }
@@ -1975,7 +1978,20 @@ int CCoreEthIF::send_pkt(CCorePerPort * lp_port,
     return (0);
 }
 
+int CCoreEthIF::send_pkt_lat(CCorePerPort *lp_port, rte_mbuf_t *m, CVirtualIFPerSideStats *lp_stats) {
+    // We allow sending only from first core of each port. This is serious internal bug otherwise.
+    assert(lp_port->m_tx_queue_id_lat != INVALID_Q_ID);
 
+    int ret = lp_port->m_port->tx_burst(lp_port->m_tx_queue_id_lat, &m, 1);
+
+    while ( unlikely( ret != 1 ) ){
+        rte_delay_us(1);
+        lp_stats->m_tx_queue_full += 1;
+        ret = lp_port->m_port->tx_burst(lp_port->m_tx_queue_id_lat, &m, 1);
+    }
+
+    return ret;
+}
 
 void CCoreEthIF::send_one_pkt(pkt_dir_t       dir,
                               rte_mbuf_t      *m){
@@ -2021,7 +2037,6 @@ int CCoreEthIFStateless::send_node_flow_stat(rte_mbuf *m, CGenNodeStateless * no
     struct flow_stat_payload_header *fsp_head = NULL;
 
     if (hw_id >= MAX_FLOW_STATS) {
-        flush_tx_queue();
         // payload rule hw_ids are in the range right above ip id rules
         uint16_t hw_id_payload = hw_id - MAX_FLOW_STATS;
         if (hw_id_payload > max_stat_hw_id_seen_payload) {
@@ -2055,8 +2070,7 @@ int CCoreEthIFStateless::send_node_flow_stat(rte_mbuf *m, CGenNodeStateless * no
 
     if (hw_id >= MAX_FLOW_STATS) {
         fsp_head->time_stamp = os_get_hr_tick_64();
-        send_pkt(lp_port, mi, lp_stats);
-        flush_tx_queue();
+        send_pkt_lat(lp_port, mi, lp_stats);
     } else {
         send_pkt(lp_port, mi, lp_stats);
     }
@@ -2093,7 +2107,8 @@ int CCoreEthIFStateless::send_node(CGenNode * no) {
     }
 
     if (unlikely(node_sl->is_stat_needed())) {
-        return send_node_flow_stat(m, node_sl, lp_port, lp_stats, (node_sl->get_cache_mbuf() || node_sl->is_cache_mbuf_array())? true:false);
+        return send_node_flow_stat(m, node_sl, lp_port, lp_stats,
+                                   (node_sl->get_cache_mbuf() || node_sl->is_cache_mbuf_array())? true:false);
     } else {
         send_pkt(lp_port,m,lp_stats);
     }
@@ -2118,7 +2133,7 @@ int CCoreEthIFStateless::send_pcap_node(CGenNodePCAP *pcap_node) {
 
 /**
  * slow path code goes here
- * 
+ *
  */
 int CCoreEthIFStateless::handle_slow_path_node(CGenNode * no) {
 
@@ -2829,6 +2844,7 @@ private:
     std::mutex          m_cp_lock;
 
 public:
+    TrexWatchDog         m_watchdog;
     TrexStateless       *m_trex_stateless;
 
 };
@@ -3162,6 +3178,13 @@ int  CGlobalTRex::ixgbe_start(void){
 
     */
     int port_offset=0;
+    uint8_t lat_q_id;
+
+    if ( get_vm_one_queue_enable() ) {
+        lat_q_id = 0;
+    } else {
+        lat_q_id = get_cores_tx() / get_base_num_cores();
+    }
     for (i=0; i<get_cores_tx(); i++) {
         int j=(i+1);
         int queue_id=((j-1)/get_base_num_cores() );   /* for the first min core queue 0 , then queue 1 etc */
@@ -3174,11 +3197,13 @@ int  CGlobalTRex::ixgbe_start(void){
                                queue_id,
                                &m_ports[port_offset], /* 0,2*/
                                queue_id,
-                               &m_ports[port_offset+1] /*1,3*/
-                               );
+                               &m_ports[port_offset+1], /*1,3*/
+                               lat_q_id);
         port_offset+=2;
         if (port_offset == m_max_ports) {
             port_offset = 0;
+            // We want to allow sending latency packets only from first core handling a port
+            lat_q_id = CCoreEthIF::INVALID_Q_ID;
         }
     }
 
@@ -3247,14 +3272,16 @@ bool CGlobalTRex::Create(){
 
         TrexStatelessCfg cfg;
 
-        TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_TCP, global_platform_cfg_info.m_zmq_rpc_port);
+        TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_TCP,
+                                             global_platform_cfg_info.m_zmq_rpc_port,
+                                             &m_cp_lock,
+                                             &m_watchdog);
 
         cfg.m_port_count         = CGlobalInfo::m_options.m_expected_portd;
         cfg.m_rpc_req_resp_cfg   = &rpc_req_resp_cfg;
         cfg.m_rpc_server_verbose = false;
         cfg.m_platform_api       = new TrexDpdkPlatformApi();
         cfg.m_publisher          = &m_zmq_publisher;
-        cfg.m_global_lock        = &m_cp_lock;
 
         m_trex_stateless = new TrexStateless(cfg);
     }
@@ -3922,7 +3949,7 @@ CGlobalTRex::handle_fast_path() {
     check_for_dp_messages();
     // update CPU%
     m_fl.UpdateFast();
-    
+
     if (get_is_stateless()) {
         m_rx_sl.update_cpu_util();
     }else{
@@ -3932,7 +3959,7 @@ CGlobalTRex::handle_fast_path() {
     if ( is_all_cores_finished() ) {
         return false;
     }
-   
+
     return true;
 }
 
@@ -3951,6 +3978,9 @@ int CGlobalTRex::run_in_master() {
     const int FASTPATH_DELAY_MS = 10;
     const int SLOWPATH_DELAY_MS = 500;
 
+    int handle = m_watchdog.register_monitor("master", 2);
+    m_watchdog.start();
+
     while ( true ) {
 
         /* fast path */
@@ -3965,12 +3995,14 @@ int CGlobalTRex::run_in_master() {
             }
             slow_path_counter = 0;
         }
-        
-       
+
+
         cp_lock.unlock();
         delay(FASTPATH_DELAY_MS);
         slow_path_counter += FASTPATH_DELAY_MS;
         cp_lock.lock();
+
+        m_watchdog.tickle(handle);
     }
 
     /* on exit release the lock */
@@ -3982,6 +4014,9 @@ int CGlobalTRex::run_in_master() {
     }
 
     m_mg.stop();
+
+    m_watchdog.stop();
+
     delay(1000);
     if ( was_stopped ){
         /* we should stop latency and exit to stop agents */
@@ -3993,14 +4028,15 @@ int CGlobalTRex::run_in_master() {
 
 
 int CGlobalTRex::run_in_rx_core(void){
+
     if (get_is_stateless()) {
         m_sl_rx_running = true;
-        m_rx_sl.start();
+        m_rx_sl.start(m_watchdog);
         m_sl_rx_running = false;
     } else {
         if ( CGlobalInfo::m_options.is_rx_enabled() ){
             m_sl_rx_running = false;
-            m_mg.start(0);
+            m_mg.start(0, &m_watchdog);
         }
     }
 
@@ -4008,6 +4044,8 @@ int CGlobalTRex::run_in_rx_core(void){
 }
 
 int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){
+    std::stringstream ss;
+    ss << "DP core " << int(virt_core_id);
 
     CPreviewMode *lp=&CGlobalInfo::m_options.preview;
     if ( lp->getSingleCore() &&
@@ -4021,14 +4059,23 @@ int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){
 
     assert(m_fl_was_init);
     CFlowGenListPerThread   * lpt;
+
     lpt = m_fl.m_threads_info[virt_core_id-1];
 
+    /* register a watchdog handle on current core */
+    lpt->m_watchdog        = &m_watchdog;
+    lpt->m_watchdog_handle = m_watchdog.register_monitor(ss.str(), 1);
+
+
     if (get_is_stateless()) {
         lpt->start_stateless_daemon(*lp);
     }else{
         lpt->start_generate_stateful(CGlobalInfo::m_options.out_file,*lp);
     }
 
+    /* done - remove this from the watchdog (we might wait on join for a long time) */
+    lpt->m_watchdog->disable_monitor(lpt->m_watchdog_handle);
+
     m_signal[virt_core_id]=1;
     return (0);
 }
@@ -4280,6 +4327,16 @@ bool CCoreEthIF::process_rx_pkt(pkt_dir_t   dir,
         // In stateless RX, we only care about flow stat packets
         if ((parser.getIpId() & 0xff00) == IP_ID_RESERVE_BASE) {
             send = true;
+            if (parser.getIpId() == FLOW_STAT_PAYLOAD_IP_ID) {
+                // e1000 on ESXI appends 4 bytes to the packet.
+                // This is a best effort hack to get our latency info which we put at the end of the packet
+                uint8_t *p = rte_pktmbuf_mtod(m, uint8_t*);
+                struct flow_stat_payload_header *fsp_head = (struct flow_stat_payload_header *)
+                    (p + m->pkt_len - sizeof(struct flow_stat_payload_header));
+                if (fsp_head->magic != FLOW_STAT_PAYLOAD_MAGIC) {
+                    rte_pktmbuf_trim(m, 4);
+                }
+            }
         }
     } else {
         CLatencyPktMode *c_l_pkt_mode = g_trex.m_mg.c_l_pkt_mode;
@@ -4388,9 +4445,14 @@ uint32_t get_cores_mask(uint32_t cores,int offset){
 }
 
 
+static char *g_exe_name;
+const char *get_exe_name() {
+    return g_exe_name;
+}
 
 
 int main(int argc , char * argv[]){
+    g_exe_name = argv[0];
 
     return ( main_test(argc , argv));
 }
@@ -4699,13 +4761,17 @@ int main_test(int argc , char * argv[]){
         uint32_t pkts = CGlobalInfo::m_options.m_latency_prev *
             CGlobalInfo::m_options.m_latency_rate;
         printf("Starting pre latency check for %d sec\n",CGlobalInfo::m_options.m_latency_prev);
-        g_trex.m_mg.start(pkts);
+        g_trex.m_mg.start(pkts, NULL);
         delay(CGlobalInfo::m_options.m_latency_prev* 1000);
         printf("Finished \n");
         g_trex.m_mg.reset();
         g_trex.reset_counters();
     }
 
+    /* this will give us all cores - master + tx + latency */
+    g_trex.m_watchdog.mark_pending_monitor(g_trex.m_max_cores);
+
+
     g_trex.m_sl_rx_running = false;
     if ( get_is_stateless() ) {
         g_trex.start_master_stateless();
diff --git a/src/rpc-server/trex_rpc_async_server.cpp b/src/rpc-server/trex_rpc_async_server.cpp
index aee92539..82c42458 100644
--- a/src/rpc-server/trex_rpc_async_server.cpp
+++ b/src/rpc-server/trex_rpc_async_server.cpp
@@ -36,7 +36,7 @@ limitations under the License.
  * ZMQ based publisher server
  * 
  */
-TrexRpcServerAsync::TrexRpcServerAsync(const TrexRpcServerConfig &cfg, std::mutex *lock) : TrexRpcServerInterface(cfg, "publisher", lock) {
+TrexRpcServerAsync::TrexRpcServerAsync(const TrexRpcServerConfig &cfg) : TrexRpcServerInterface(cfg, "publisher") {
     /* ZMQ is not thread safe - this should be outside */
     m_context = zmq_ctx_new();
 }
diff --git a/src/rpc-server/trex_rpc_async_server.h b/src/rpc-server/trex_rpc_async_server.h
index 80d92c2f..daefa174 100644
--- a/src/rpc-server/trex_rpc_async_server.h
+++ b/src/rpc-server/trex_rpc_async_server.h
@@ -33,7 +33,7 @@ limitations under the License.
 class TrexRpcServerAsync : public TrexRpcServerInterface  {
 public:
 
-    TrexRpcServerAsync(const TrexRpcServerConfig &cfg, std::mutex *lock = NULL);
+    TrexRpcServerAsync(const TrexRpcServerConfig &cfg);
 
 protected:
     void _prepare();
diff --git a/src/rpc-server/trex_rpc_req_resp_server.cpp b/src/rpc-server/trex_rpc_req_resp_server.cpp
index 5c587e0f..033f265c 100644
--- a/src/rpc-server/trex_rpc_req_resp_server.cpp
+++ b/src/rpc-server/trex_rpc_req_resp_server.cpp
@@ -32,11 +32,13 @@ limitations under the License.
 #include <zmq.h>
 #include <json/json.h>
 
+#include "trex_watchdog.h"
+
 /**
  * ZMQ based request-response server
  * 
  */
-TrexRpcServerReqRes::TrexRpcServerReqRes(const TrexRpcServerConfig &cfg, std::mutex *lock) : TrexRpcServerInterface(cfg, "req resp", lock) {
+TrexRpcServerReqRes::TrexRpcServerReqRes(const TrexRpcServerConfig &cfg) : TrexRpcServerInterface(cfg, "ZMQ sync request-response") {
 
 }
 
@@ -52,11 +54,19 @@ void TrexRpcServerReqRes::_prepare() {
  */
 void TrexRpcServerReqRes::_rpc_thread_cb() {
     std::stringstream ss;
+    int zmq_rc;
+
+    m_watchdog_handle = m_watchdog->register_monitor(m_name, 1);
 
     /* create a socket based on the configuration */
 
     m_socket  = zmq_socket (m_context, ZMQ_REP);
 
+    /* to make sure the watchdog gets tickles form time to time we give a timeout of 500ms */
+    int timeout = 500;
+    zmq_rc = zmq_setsockopt (m_socket, ZMQ_RCVTIMEO, &timeout, sizeof(int));
+    assert(zmq_rc == 0);
+
     switch (m_cfg.get_protocol()) {
     case TrexRpcServerConfig::RPC_PROT_TCP:
         ss << "tcp://*:";
@@ -68,8 +78,8 @@ void TrexRpcServerReqRes::_rpc_thread_cb() {
     ss << m_cfg.get_port();
 
     /* bind the scoket */
-    int rc = zmq_bind (m_socket, ss.str().c_str());
-    if (rc != 0) {
+    zmq_rc = zmq_bind (m_socket, ss.str().c_str());
+    if (zmq_rc != 0) {
         throw TrexRpcException("Unable to start ZMQ server at: " + ss.str());
     }
 
@@ -90,6 +100,9 @@ void TrexRpcServerReqRes::_rpc_thread_cb() {
 
     /* must be done from the same thread */
     zmq_close(m_socket);
+
+    /* done */
+    m_watchdog->disable_monitor(m_watchdog_handle);
 }
 
 bool
@@ -101,10 +114,22 @@ TrexRpcServerReqRes::fetch_one_request(std::string &msg) {
     rc = zmq_msg_init(&zmq_msg);
     assert(rc == 0);
 
-    rc = zmq_msg_recv (&zmq_msg, m_socket, 0);
+    while (true) {
+        m_watchdog->tickle(m_watchdog_handle);
 
-    if (rc == -1) {
+        rc = zmq_msg_recv (&zmq_msg, m_socket, 0);
+        if (rc != -1) {
+            break;
+        }
+
+        /* timeout ? */
+        if (errno == EAGAIN) {
+            continue;
+        }
+
+        /* error ! */
         zmq_msg_close(&zmq_msg);
+
         /* normal shutdown and zmq_term was called */
         if (errno == ETERM) {
             return false;
@@ -113,7 +138,9 @@ TrexRpcServerReqRes::fetch_one_request(std::string &msg) {
         }
     }
 
-    const char *data =  (const char *)zmq_msg_data(&zmq_msg);
+  
+
+    const char *data = (const char *)zmq_msg_data(&zmq_msg);
     size_t len = zmq_msg_size(&zmq_msg);
     msg.append(data, len);
 
diff --git a/src/rpc-server/trex_rpc_req_resp_server.h b/src/rpc-server/trex_rpc_req_resp_server.h
index 26b3248f..92d51a2a 100644
--- a/src/rpc-server/trex_rpc_req_resp_server.h
+++ b/src/rpc-server/trex_rpc_req_resp_server.h
@@ -32,7 +32,7 @@ limitations under the License.
 class TrexRpcServerReqRes : public TrexRpcServerInterface  {
 public:
 
-    TrexRpcServerReqRes(const TrexRpcServerConfig &cfg, std::mutex *lock = NULL);
+    TrexRpcServerReqRes(const TrexRpcServerConfig &cfg);
 
     /* for test purposes - bypass the ZMQ and inject a message */
     std::string test_inject_request(const std::string &req);
diff --git a/src/rpc-server/trex_rpc_server.cpp b/src/rpc-server/trex_rpc_server.cpp
index 7d2e31a5..e4ca95c3 100644
--- a/src/rpc-server/trex_rpc_server.cpp
+++ b/src/rpc-server/trex_rpc_server.cpp
@@ -28,11 +28,20 @@ limitations under the License.
 #include <sstream>
 #include <iostream>
 
+#include "trex_watchdog.h"
+
 /************** RPC server interface ***************/
 
-TrexRpcServerInterface::TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name, std::mutex *lock) : m_cfg(cfg), m_name(name), m_lock(lock)  {
+TrexRpcServerInterface::TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name) : m_cfg(cfg) {
+    m_name = name;
+
+    m_lock              = cfg.m_lock;
+    m_watchdog          = cfg.m_watchdog;
+    m_watchdog_handle   = -1;
+
     m_is_running = false;
     m_is_verbose = false;
+
     if (m_lock == NULL) {
         m_lock = &m_dummy_lock;
     }
@@ -69,6 +78,7 @@ void TrexRpcServerInterface::start() {
     /* prepare for run */
     _prepare();
 
+    m_watchdog->mark_pending_monitor();
     m_thread = new std::thread(&TrexRpcServerInterface::_rpc_thread_cb, this);
     if (!m_thread) {
         throw TrexRpcException("unable to create RPC thread");
@@ -119,8 +129,7 @@ get_current_date_time() {
 
 const std::string TrexRpcServer::s_server_uptime = get_current_date_time();
 
-TrexRpcServer::TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg,
-                             std::mutex *lock) {
+TrexRpcServer::TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg) {
 
     m_req_resp = NULL;
 
@@ -130,7 +139,7 @@ TrexRpcServer::TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg,
         if (req_resp_cfg->get_protocol() == TrexRpcServerConfig::RPC_PROT_MOCK) {
             m_req_resp = new TrexRpcServerReqResMock(*req_resp_cfg);
         } else {
-            m_req_resp = new TrexRpcServerReqRes(*req_resp_cfg, lock);
+            m_req_resp = new TrexRpcServerReqRes(*req_resp_cfg);
         }
 
         m_servers.push_back(m_req_resp);
diff --git a/src/rpc-server/trex_rpc_server_api.h b/src/rpc-server/trex_rpc_server_api.h
index a02b2cc0..3d9837ef 100644
--- a/src/rpc-server/trex_rpc_server_api.h
+++ b/src/rpc-server/trex_rpc_server_api.h
@@ -33,6 +33,7 @@ limitations under the License.
 
 class TrexRpcServerInterface;
 class TrexRpcServerReqRes;
+class TrexWatchDog;
 
 /**
  * defines a configuration of generic RPC server
@@ -47,8 +48,11 @@ public:
         RPC_PROT_MOCK
     };
 
-    TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port) : m_protocol(protocol), m_port(port) {
-
+    TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port, std::mutex *lock, TrexWatchDog *watchdog) {
+        m_protocol  = protocol;
+        m_port      = port;
+        m_lock      = lock;
+        m_watchdog  = watchdog;
     }
 
     uint16_t get_port() const {
@@ -62,6 +66,10 @@ public:
 private:
     rpc_prot_e       m_protocol;
     uint16_t         m_port;
+
+public:
+    std::mutex       *m_lock;
+    TrexWatchDog     *m_watchdog;
 };
 
 /**
@@ -72,7 +80,7 @@ private:
 class TrexRpcServerInterface {
 public:
   
-    TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name, std::mutex *m_lock = NULL);
+    TrexRpcServerInterface(const TrexRpcServerConfig &cfg, const std::string &name);
     virtual ~TrexRpcServerInterface();
 
     /**
@@ -134,6 +142,8 @@ protected:
     std::string                          m_name;
     std::mutex                           *m_lock;
     std::mutex                           m_dummy_lock;
+    TrexWatchDog                        *m_watchdog;
+    int                                  m_watchdog_handle;
 };
 
 /**
@@ -147,8 +157,7 @@ class TrexRpcServer {
 public:
   
     /* creates the collection of servers using configurations */
-    TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg,
-                  std::mutex *m_lock = NULL);
+    TrexRpcServer(const TrexRpcServerConfig *req_resp_cfg);
 
     ~TrexRpcServer();
 
diff --git a/src/sim/trex_sim_stateless.cpp b/src/sim/trex_sim_stateless.cpp
index acbeef69..d3981e97 100644
--- a/src/sim/trex_sim_stateless.cpp
+++ b/src/sim/trex_sim_stateless.cpp
@@ -200,7 +200,7 @@ SimStateless::prepare_control_plane() {
     
     m_publisher = new SimPublisher();
 
-    TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0);
+    TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0, NULL, NULL);
 
     cfg.m_port_count         = m_port_count;
     cfg.m_rpc_req_resp_cfg   = &rpc_req_resp_cfg;
diff --git a/src/stateless/cp/trex_stateless.cpp b/src/stateless/cp/trex_stateless.cpp
index 5bbe9faf..698ede90 100644
--- a/src/stateless/cp/trex_stateless.cpp
+++ b/src/stateless/cp/trex_stateless.cpp
@@ -40,7 +40,7 @@ TrexStateless::TrexStateless(const TrexStatelessCfg &cfg) {
     /* create RPC servers */
 
     /* set both servers to mutex each other */
-    m_rpc_server = new TrexRpcServer(cfg.m_rpc_req_resp_cfg, cfg.m_global_lock);
+    m_rpc_server = new TrexRpcServer(cfg.m_rpc_req_resp_cfg);
     m_rpc_server->set_verbose(cfg.m_rpc_server_verbose);
 
     /* configure ports */
diff --git a/src/stateless/cp/trex_stateless.h b/src/stateless/cp/trex_stateless.h
index 033326ca..83ab6976 100644
--- a/src/stateless/cp/trex_stateless.h
+++ b/src/stateless/cp/trex_stateless.h
@@ -41,6 +41,7 @@ limitations under the License.
 #include "trex_api_class.h"
 
 class TrexStatelessPort;
+class TrexWatchDog;
 
 /**
  * unified stats
@@ -87,7 +88,6 @@ public:
         m_rpc_server_verbose  = false;
         m_platform_api        = NULL;
         m_publisher           = NULL;
-        m_global_lock         = NULL;
     }
 
     const TrexRpcServerConfig  *m_rpc_req_resp_cfg;
@@ -95,7 +95,6 @@ public:
     bool                        m_rpc_server_verbose;
     uint8_t                     m_port_count;
     TrexPublisher              *m_publisher;
-    std::mutex                 *m_global_lock;
 
 };
 
diff --git a/src/stateless/dp/trex_stateless_dp_core.cpp b/src/stateless/dp/trex_stateless_dp_core.cpp
index 21334363..fe78c5b2 100644
--- a/src/stateless/dp/trex_stateless_dp_core.cpp
+++ b/src/stateless/dp/trex_stateless_dp_core.cpp
@@ -656,6 +656,7 @@ TrexStatelessDpCore::idle_state_loop() {
     int counter = 0;
 
     while (m_state == STATE_IDLE) {
+        m_core->tickle();
         m_core->m_node_gen.m_v_if->flush_dp_rx_queue();
         bool had_msg = periodic_check_for_cp_messages();
         if (had_msg) {
diff --git a/src/stateless/rx/trex_stateless_rx_core.cpp b/src/stateless/rx/trex_stateless_rx_core.cpp
index f9150ff7..b3555c13 100644
--- a/src/stateless/rx/trex_stateless_rx_core.cpp
+++ b/src/stateless/rx/trex_stateless_rx_core.cpp
@@ -52,12 +52,8 @@ void CRFC2544Info::export_data(rfc2544_info_t_ &obj) {
 
     obj.set_err_cntrs(m_seq_err, m_ooo, m_dup, m_seq_err_events_too_big, m_seq_err_events_too_low);
     obj.set_jitter(m_jitter.get_jitter());
-    json_str = "";
-    m_latency.dump_json("", json_str);
-    // This is a hack. We need to make the dump_json return json object.
-    reader.parse( json_str.c_str(), json);
+    m_latency.dump_json(json);
     obj.set_latency_json(json);
-    obj.set_last_max(m_last_max.getMax());
 };
 
 void CCPortLatencyStl::reset() {
@@ -76,6 +72,9 @@ void CRxCoreStateless::create(const CRxSlCfg &cfg) {
     m_ring_to_cp   = cp_rx->getRingDpToCp(0);
     m_state = STATE_IDLE;
 
+    m_watchdog_handle = -1;
+    m_watchdog        = NULL;
+
     for (int i = 0; i < m_max_ports; i++) {
         CLatencyManagerPerPortStl * lp = &m_ports[i];
         lp->m_io = cfg.m_ports[i];
@@ -93,7 +92,15 @@ void CRxCoreStateless::handle_cp_msg(TrexStatelessCpToRxMsgBase *msg) {
     delete msg;
 }
 
+void CRxCoreStateless::tickle() {
+    m_watchdog->tickle(m_watchdog_handle);
+}
+
 bool CRxCoreStateless::periodic_check_for_cp_messages() {
+
+    /* tickle the watchdog */
+    tickle();
+    
     /* fast path */
     if ( likely ( m_ring_from_cp->isEmpty() ) ) {
         return false;
@@ -140,11 +147,15 @@ void CRxCoreStateless::idle_state_loop() {
     }
 }
 
-void CRxCoreStateless::start() {
+void CRxCoreStateless::start(TrexWatchDog &watchdog) {
     int count = 0;
     int i = 0;
     bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable() ? true : false;
 
+    /* register a watchdog handle on current core */
+    m_watchdog        = &watchdog;
+    m_watchdog_handle = watchdog.register_monitor("STL RX CORE", 1);
+
     while (true) {
         if (m_state == STATE_WORKING) {
             i++;
@@ -167,6 +178,8 @@ void CRxCoreStateless::start() {
         count += try_rx();
     }
     rte_pause();
+
+    m_watchdog->disable_monitor(m_watchdog_handle);
 }
 
 void CRxCoreStateless::handle_rx_pkt(CLatencyManagerPerPortStl *lp, rte_mbuf_t *m) {
diff --git a/src/stateless/rx/trex_stateless_rx_core.h b/src/stateless/rx/trex_stateless_rx_core.h
index d18356b6..ce1bc1ad 100644
--- a/src/stateless/rx/trex_stateless_rx_core.h
+++ b/src/stateless/rx/trex_stateless_rx_core.h
@@ -28,55 +28,6 @@
 
 class TrexStatelessCpToRxMsgBase;
 
-class CLastMax {
- public:
-    CLastMax() {
-        m_max1 = 0;
-        m_max1 = 1;
-        m_choose = true;
-    }
-
-    void update(dsec_t val) {
-        if (m_choose) {
-            if (val > m_max1) {
-                m_max1 = val;
-                sanb_smp_memory_barrier();
-            }
-        } else {
-            if (val > m_max2) {
-                m_max2 = val;
-                sanb_smp_memory_barrier();
-            }
-        }
-    }
-
-    dsec_t getMax() {
-        if (m_choose)
-            return m_max2;
-        else
-            return m_max1;
-    }
-
-    void switchMax() {
-        if (m_choose) {
-            m_max2 = 0;
-            m_choose = false;
-            sanb_smp_memory_barrier();
-        }
-        else {
-            m_max1 = 0;
-            m_choose = true;
-            sanb_smp_memory_barrier();
-        }
-    }
-
- private:
-    dsec_t m_max1;
-    dsec_t m_max2;
-    bool m_choose;
-};
-
-
 class CCPortLatencyStl {
  public:
     void reset();
@@ -112,12 +63,10 @@ class CRFC2544Info {
     void export_data(rfc2544_info_t_ &obj);
     inline void add_sample(double stime) {
         m_latency.Add(stime);
-        m_last_max.update(stime);
         m_jitter.calc(stime);
     }
     inline void sample_period_end() {
         m_latency.update();
-        m_last_max.switchMax();
     }
     inline uint32_t get_seq() {return m_seq;}
     inline void set_seq(uint32_t val) {m_seq = val;}
@@ -136,7 +85,6 @@ class CRFC2544Info {
     uint64_t m_seq_err_events_too_low; // How many packet seq num lower than expected events we had
     uint64_t m_ooo; // Packets we got with seq num lower than expected (We guess they are out of order)
     uint64_t m_dup; // Packets we got with same seq num
-    CLastMax m_last_max; // maximum for last measurement period (reset whenever we read it).
 };
 
 class CRxCoreStateless {
@@ -147,7 +95,7 @@ class CRxCoreStateless {
     };
 
  public:
-    void start();
+    void start(TrexWatchDog &watchdog);
     void create(const CRxSlCfg &cfg);
     void reset_rx_stats(uint8_t port_id);
     int get_rx_stats(uint8_t port_id, rx_per_flow_t *rx_stats, int min, int max, bool reset
@@ -165,6 +113,7 @@ class CRxCoreStateless {
  private:
     void handle_cp_msg(TrexStatelessCpToRxMsgBase *msg);
     bool periodic_check_for_cp_messages();
+    void tickle();
     void idle_state_loop();
     void handle_rx_pkt(CLatencyManagerPerPortStl * lp, rte_mbuf_t * m);
     void handle_rx_queue_msgs(uint8_t thread_id, CNodeRing * r);
@@ -176,6 +125,10 @@ class CRxCoreStateless {
     uint16_t get_hw_id(uint16_t id);
 
  private:
+
+    TrexWatchDog   *m_watchdog;
+    int             m_watchdog_handle;
+
     uint32_t m_max_ports;
     bool m_has_streams;
     CLatencyManagerPerPortStl m_ports[TREX_MAX_PORTS];
diff --git a/src/time_histogram.cpp b/src/time_histogram.cpp
index dd15c4be..fefa59d6 100755
--- a/src/time_histogram.cpp
+++ b/src/time_histogram.cpp
@@ -30,6 +30,8 @@ void CTimeHistogram::Reset() {
     m_period_data[0].reset();
     m_period_data[1].reset();
     m_period = 0;
+    m_total_cnt = 0;
+    m_total_cnt_high = 0;
     m_max_dt = 0;
     m_average = 0;
     memset(&m_max_ar[0],0,sizeof(m_max_ar));
@@ -57,14 +59,13 @@ bool CTimeHistogram::Add(dsec_t dt) {
     CTimeHistogramPerPeriodData &period_elem = m_period_data[m_period];
 
     period_elem.inc_cnt();
+    period_elem.update_sum(dt);
+
+    // values smaller then certain threshold do not get into the histogram
     if (dt < m_min_delta) {
         return false;
     }
     period_elem.inc_high_cnt();
-
-    if ( m_max_dt < dt) {
-        m_max_dt = dt;
-    }
     period_elem.update_max(dt);
 
     uint32_t d_10usec = (uint32_t)(dt*100000.0);
@@ -87,8 +88,6 @@ bool CTimeHistogram::Add(dsec_t dt) {
         }
     }
 
-    period_elem.update_sum(dt);
-
     return true;
 }
 
@@ -113,6 +112,11 @@ void CTimeHistogram::update() {
         m_win_cnt = 0;
     }
     update_average(period_elem);
+    m_total_cnt += period_elem.get_cnt();
+    m_total_cnt_high += period_elem.get_high_cnt();
+    if ( m_max_dt < period_elem.get_max()) {
+        m_max_dt = period_elem.get_max();
+    }
 }
 
 void  CTimeHistogram::update_average(CTimeHistogramPerPeriodData &period_elem) {
@@ -175,13 +179,8 @@ void CTimeHistogram::Dump(FILE *fd) {
     }
 }
 
-/*
- { "histogram" : [ {} ,{} ]  }
-
-*/
-
+// Used in statefull
 void CTimeHistogram::dump_json(std::string name,std::string & json ) {
-    CTimeHistogramPerPeriodData &period_elem = m_period_data[get_read_period_index()];
     char buff[200];
     if (name != "")
         sprintf(buff,"\"%s\":{",name.c_str());
@@ -191,8 +190,8 @@ void CTimeHistogram::dump_json(std::string name,std::string & json ) {
 
     json += add_json("min_usec", get_usec(m_min_delta));
     json += add_json("max_usec", get_usec(m_max_dt));
-    json += add_json("high_cnt", period_elem.get_high_cnt());
-    json += add_json("cnt", period_elem.get_cnt());
+    json += add_json("high_cnt", m_total_cnt_high);
+    json += add_json("cnt", m_total_cnt);
     json+=add_json("s_avg", get_average_latency());
     int i;
     int j;
@@ -218,3 +217,31 @@ void CTimeHistogram::dump_json(std::string name,std::string & json ) {
     }
     json+="  ] } ,";
 }
+
+// Used in stateless
+void CTimeHistogram::dump_json(Json::Value & json, bool add_histogram) {
+    int i, j;
+    uint32_t base=10;
+    CTimeHistogramPerPeriodData &period_elem = m_period_data[get_read_period_index()];
+
+    json["total_max"] = get_usec(m_max_dt);
+    json["last_max"] = get_usec(period_elem.get_max());
+    json["average"] = get_average_latency();
+
+    if (add_histogram) {
+        for (j = 0; j < HISTOGRAM_SIZE_LOG; j++) {
+            for (i = 0; i < HISTOGRAM_SIZE; i++) {
+                if (m_hcnt[j][i] > 0) {
+                    std::string key = static_cast<std::ostringstream*>( &(std::ostringstream()
+                                                                          << int(base * (i + 1)) ) )->str();
+                    json["histogram"][key] = Json::Value::UInt64(m_hcnt[j][i]);
+                }
+            }
+            base = base * 10;
+        }
+        if (m_total_cnt != m_total_cnt_high) {
+            json["histogram"]["0"] = Json::Value::UInt64(m_total_cnt - m_total_cnt_high);
+        }
+    }
+}
+
diff --git a/src/time_histogram.h b/src/time_histogram.h
index 9bdf2c93..0d532f1b 100755
--- a/src/time_histogram.h
+++ b/src/time_histogram.h
@@ -24,11 +24,12 @@ limitations under the License.
 
 
 #include <stdint.h>
-#include "os_time.h"
 #include <stdio.h>
 #include <math.h>
-#include "mbuf.h"
 #include <string>
+#include <json/json.h>
+#include "mbuf.h"
+#include "os_time.h"
 
 class CTimeHistogramPerPeriodData {
  public:
@@ -85,6 +86,9 @@ public:
         return period_elem.get_max_usec();
     }
     void  dump_json(std::string name,std::string & json );
+    void dump_json(Json::Value & json, bool add_histogram = true);
+    uint64_t get_count() {return m_total_cnt;}
+    uint64_t get_high_count() {return m_total_cnt_high;}
 
 private:
     uint32_t get_usec(dsec_t d);
@@ -103,6 +107,8 @@ private:
     // Each period we switch between the two
     CTimeHistogramPerPeriodData m_period_data[2];
     uint8_t m_period; // 0 or 1 according to m_period_data element we currently use
+    uint64_t m_total_cnt;
+    uint64_t m_total_cnt_high;
     dsec_t   m_max_dt;  // Total maximum latency
     dsec_t   m_average; /* moving average */
     uint32_t m_win_cnt;
diff --git a/src/trex_watchdog.cpp b/src/trex_watchdog.cpp
new file mode 100644
index 00000000..e78e8e6d
--- /dev/null
+++ b/src/trex_watchdog.cpp
@@ -0,0 +1,331 @@
+/*
+ Itay Marom
+ Cisco Systems, Inc.
+*/
+
+/*
+Copyright (c) 2015-2015 Cisco Systems, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "trex_watchdog.h"
+#include "trex_exception.h"
+
+#include <assert.h>
+#include <unistd.h>
+#include <sstream>
+
+#include <sys/ptrace.h>
+#include <execinfo.h>
+#include <cxxabi.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <iostream>
+#include  <stdexcept>
+
+#define DISABLE_WATCHDOG_ON_GDB
+
+static TrexWatchDog::monitor_st *global_monitor;
+
+const char *get_exe_name();
+
+std::string exec(const char* cmd) {
+    char buffer[128];
+    std::string result = "";
+    std::shared_ptr<FILE> pipe(popen(cmd, "r"), pclose);
+    if (!pipe) throw std::runtime_error("popen() failed!");
+    while (!feof(pipe.get())) {
+        if (fgets(buffer, 128, pipe.get()) != NULL) {
+            result += buffer;
+        }
+    }
+    return result;
+}
+
+// This function produces a stack backtrace with demangled function & method names.
+__attribute__((noinline))
+std::string Backtrace(int skip = 1)
+{
+    void *callstack[128];
+    const int nMaxFrames = sizeof(callstack) / sizeof(callstack[0]);
+    char buf[1024];
+    int nFrames = backtrace(callstack, nMaxFrames);
+    char **symbols = backtrace_symbols(callstack, nFrames);
+
+    std::ostringstream trace_buf;
+    for (int i = skip; i < nFrames; i++) {
+
+        Dl_info info;
+        if (dladdr(callstack[i], &info) && info.dli_sname) {
+            char *demangled = NULL;
+            int status = -1;
+            if (info.dli_sname[0] == '_')
+                demangled = abi::__cxa_demangle(info.dli_sname, NULL, 0, &status);
+            snprintf(buf, sizeof(buf), "%-3d %*p %s + %zd\n",
+                     i, int(2 + sizeof(void*) * 2), callstack[i],
+                     status == 0 ? demangled :
+                     info.dli_sname == 0 ? symbols[i] : info.dli_sname,
+                     (char *)callstack[i] - (char *)info.dli_saddr);
+            free(demangled);
+        } else {
+            snprintf(buf, sizeof(buf), "%-3d %*p %s\n",
+                     i, int(2 + sizeof(void*) * 2), callstack[i], symbols[i]);
+        }
+        trace_buf << buf;
+    }
+    free(symbols);
+    if (nFrames == nMaxFrames)
+        trace_buf << "[truncated]\n";
+
+    /* add the addr2line info */
+    std::stringstream addr2line;
+
+    addr2line << "/usr/bin/addr2line -e " << get_exe_name() << " ";
+    for (int i = skip; i < nFrames; i++) {
+        addr2line << callstack[i] << " ";
+    }
+
+    trace_buf << "\n\n*** addr2line information follows ***\n\n";
+    try {
+        trace_buf << exec(addr2line.str().c_str());
+    } catch (std::runtime_error &e) {
+        trace_buf << "\n" << e.what();
+    }
+
+    return trace_buf.str();
+}
+
+__attribute__((noinline))
+static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret) {
+    std::stringstream ss;
+
+    double now = now_sec();
+
+    ss << "WATCHDOG: task '" << global_monitor->name << "' has not responded for more than " << (now - global_monitor->ts) << " seconds - timeout is " << global_monitor->timeout_sec << " seconds";
+
+    std::string backtrace = Backtrace();
+    ss << "\n\n*** traceback follows ***\n\n" << backtrace << "\n";
+
+    throw std::runtime_error(ss.str());
+}
+
+void TrexWatchDog::mark_pending_monitor(int count) {
+    std::unique_lock<std::mutex> lock(m_lock);
+    m_pending += count;
+    lock.unlock();
+}
+
+void TrexWatchDog::block_on_pending(int max_block_time_ms) {
+
+    int timeout_msec = max_block_time_ms;
+
+    std::unique_lock<std::mutex> lock(m_lock);
+
+    while (m_pending > 0) {
+
+        lock.unlock();
+        delay(1);
+        lock.lock();
+
+        timeout_msec -= 1;
+        if (timeout_msec == 0) {
+            throw TrexException("WATCHDOG: block on pending monitors timed out");
+        }
+    }
+
+    /* lock will be released */
+}
+
+/**
+ * register a monitor 
+ * must be called from the relevant thread 
+ *  
+ * this function is thread safe 
+ * 
+ * @author imarom (01-Jun-16)
+ * 
+ * @param name 
+ * @param timeout_sec 
+ * 
+ * @return int 
+ */
+int TrexWatchDog::register_monitor(const std::string &name, double timeout_sec) {
+    monitor_st monitor;
+
+    /* cannot add monitors while active */
+    assert(m_active == false);
+
+    monitor.active       = true;   
+    monitor.tid          = pthread_self();
+    monitor.name         = name;
+    monitor.timeout_sec  = timeout_sec;
+    monitor.tickled      = true;
+    monitor.ts           = 0;
+
+    /* critical section start */
+    std::unique_lock<std::mutex> lock(m_lock);
+
+    /* make sure no double register */
+    for (auto &m : m_monitors) {
+        if (m.tid == pthread_self()) {
+            std::stringstream ss;
+            ss << "WATCHDOG: double register detected\n\n" << Backtrace();
+            throw TrexException(ss.str());
+        }
+    }
+
+    monitor.handle = m_monitors.size();
+    m_monitors.push_back(monitor);
+
+    assert(m_pending > 0);
+    m_pending--;
+
+    /* critical section end */
+    lock.unlock();
+
+    return monitor.handle;
+}
+
+/**
+ * will disable the monitor - it will no longer be watched
+ * 
+ */
+void TrexWatchDog::disable_monitor(int handle) {
+    assert(handle < m_monitors.size());
+
+    m_monitors[handle].active = false;
+}
+
+/**
+ * thread safe function
+ * 
+ */
+void TrexWatchDog::tickle(int handle) {
+
+    assert(handle < m_monitors.size());
+
+    /* not nesscary but write gets cache invalidate for nothing */
+    if (m_monitors[handle].tickled) {
+        return;
+    }
+
+    m_monitors[handle].tickled = true;
+}
+
+void TrexWatchDog::register_signal() {
+
+    /* do this once */
+    if (g_signal_init) {
+        return;
+    }
+
+    /* register a handler on SIG ALARM */
+    struct sigaction sa;
+    memset (&sa, '\0', sizeof(sa));
+
+    sa.sa_flags = SA_SIGINFO;
+    sa.sa_sigaction = _callstack_signal_handler;
+
+    int rc = sigaction(SIGALRM , &sa, NULL);
+    assert(rc == 0);
+
+    g_signal_init = true;
+}
+
+void TrexWatchDog::start() {
+
+    block_on_pending();
+
+    /* no pending monitors */
+    assert(m_pending == 0);
+
+    /* under GDB - disable the watchdog */
+    #ifdef DISABLE_WATCHDOG_ON_GDB
+    if (ptrace(PTRACE_TRACEME, 0, NULL, 0) == -1) {
+        printf("\n\n*** GDB detected - disabling watchdog... ***\n\n");
+        return;
+    }
+    #endif
+
+    m_active = true;
+    m_thread = new std::thread(&TrexWatchDog::_main, this);
+    if (!m_thread) {
+        throw TrexException("unable to create watchdog thread");
+    }
+}
+
+void TrexWatchDog::stop() {
+    m_active = false;
+
+    if (m_thread) {
+        m_thread->join();
+        delete m_thread;
+        m_thread = NULL;
+    }
+}
+
+
+
+/**
+ * main loop
+ * 
+ */
+void TrexWatchDog::_main() {
+
+    /* reset all the monitors */
+    for (auto &monitor : m_monitors) {
+        monitor.tickled = true;
+    }
+
+    /* start main loop */
+    while (m_active) {
+
+        dsec_t now = now_sec();
+
+        for (auto &monitor : m_monitors) {
+
+            /* skip non active monitors */
+            if (!monitor.active) {
+                continue;
+            }
+
+            /* if its own - turn it off and write down the time */
+            if (monitor.tickled) {
+                monitor.tickled = false;
+                monitor.ts      = now;
+                continue;
+            }
+
+            /* the bit is off - check the time first */
+            if ( (now - monitor.ts) > monitor.timeout_sec ) {
+                global_monitor = &monitor;
+
+                pthread_kill(monitor.tid, SIGALRM);
+
+                /* nothing to do more... the other thread will terminate, but if not - we terminate */
+                sleep(5);
+                printf("\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor.name.c_str());
+                exit(1);
+            }
+
+        }
+
+        /* the internal clock - 250 ms */
+        delay(250);
+    }
+}
+
+bool TrexWatchDog::g_signal_init = false;
diff --git a/src/trex_watchdog.h b/src/trex_watchdog.h
new file mode 100644
index 00000000..63255180
--- /dev/null
+++ b/src/trex_watchdog.h
@@ -0,0 +1,141 @@
+/*
+ Itay Marom
+ Cisco Systems, Inc.
+*/
+
+/*
+Copyright (c) 2015-2015 Cisco Systems, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef __TREX_WATCHDOG_H__
+#define __TREX_WATCHDOG_H__
+
+#include <string>
+#include <vector>
+#include <thread>
+#include <mutex>
+
+//#include "rte_memory.h"
+#include "mbuf.h"
+#include "os_time.h"
+
+class TrexWatchDog {
+public:
+    TrexWatchDog() {
+        m_thread  = NULL;
+        m_active  = false;
+        m_pending = 0;
+
+        register_signal();
+    }
+
+    /**
+     * registering a monitor happens from another thread 
+     * this make sure that start will be able to block until 
+     * all threads has registered 
+     * 
+     * @author imarom (01-Jun-16)
+     */
+    void mark_pending_monitor(int count = 1);
+
+
+    /**
+     * blocks while monitors are pending registeration
+     * 
+     * @author imarom (01-Jun-16)
+     */
+    void block_on_pending(int max_block_time_ms = 200);
+
+
+    /**
+     * add a monitor to the watchdog 
+     * this thread will be monitored and if timeout 
+     * has passed without calling tick - an exception will be called
+     * 
+     * @author imarom (31-May-16)
+     * 
+     * @param name 
+     * @param timeout 
+     * 
+     * @return int 
+     */
+    int register_monitor(const std::string &name, double timeout_sec);
+
+
+    /**
+     * disable a monitor - it will no longer be watched
+     * 
+     */
+    void disable_monitor(int handle);
+
+
+    /**
+     * should be called by each thread on it's handle
+     * 
+     * @author imarom (31-May-16)
+     * 
+     * @param handle 
+     */
+    void tickle(int handle);
+
+
+    /**
+     * start the watchdog
+     * 
+     */
+    void start();
+
+
+    /**
+     * stop the watchdog
+     * 
+     */
+    void stop();
+
+
+    /* should be cache aligned to avoid false sharing */
+    struct monitor_st {
+        /* write fields are first */
+        volatile bool   active;
+        volatile bool   tickled;
+        dsec_t          ts;
+
+        int             handle;
+        double          timeout_sec;
+        pthread_t       tid;
+        std::string     name;
+
+        /* for for a full cacheline */
+        uint8_t       pad[15];
+    };
+
+
+private:
+    void register_signal();
+    void _main();
+
+    std::vector<monitor_st> m_monitors __rte_cache_aligned;
+    std::mutex              m_lock;
+
+    volatile bool           m_active;
+    std::thread            *m_thread;
+    volatile int            m_pending;
+
+    static bool             g_signal_init;
+};
+
+static_assert(sizeof(TrexWatchDog::monitor_st) >= RTE_CACHE_LINE_SIZE, "sizeof(monitor_st) != RTE_CACHE_LINE_SIZE" );
+
+#endif /* __TREX_WATCHDOG_H__ */
author	Yaroslav Brustinov <ybrustin@cisco.com>	2016-06-05 10:44:40 +0300
committer	Yaroslav Brustinov <ybrustin@cisco.com>	2016-06-05 10:44:40 +0300
commit	6d5144308445cb02373056668e3df5d962dc424c (patch)
tree	41be395e6a06a0cb1cfe1fce070c536c0687d8de
parent	e0ca7a226fd37023ff8a9d80e46403a34492c1d7 (diff)
parent	0400a9503137f9179eb92f15d4e3b985ffad562e (diff)