summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorimarom <imarom@cisco.com>2016-06-19 18:05:54 +0300
committerimarom <imarom@cisco.com>2016-06-20 11:55:13 +0300
commit3ca8be805c26eddfe40c254bdca4e5ae71eee792 (patch)
treefd2b06c04de32b6bad4f5d71203422fa7292efe2
parent1bc9c49fa89a3942d3d1516217635d0c6e6b0c56 (diff)
WATCHDOG - refactor due to trex-211
-rwxr-xr-xsrc/bp_gtest.cpp2
-rwxr-xr-xsrc/bp_sim.cpp3
-rwxr-xr-xsrc/bp_sim.h7
-rw-r--r--src/latency.cpp19
-rw-r--r--src/latency.h5
-rw-r--r--src/main_dpdk.cpp33
-rw-r--r--src/rpc-server/trex_rpc_req_resp_server.cpp7
-rw-r--r--src/rpc-server/trex_rpc_server.cpp3
-rw-r--r--src/rpc-server/trex_rpc_server_api.h9
-rw-r--r--src/sim/trex_sim_stateless.cpp2
-rw-r--r--src/stateless/rx/trex_stateless_rx_core.cpp13
-rw-r--r--src/stateless/rx/trex_stateless_rx_core.h5
-rw-r--r--src/trex_watchdog.cpp209
-rw-r--r--src/trex_watchdog.h183
14 files changed, 234 insertions, 266 deletions
diff --git a/src/bp_gtest.cpp b/src/bp_gtest.cpp
index b36ac6e1..79ea2458 100755
--- a/src/bp_gtest.cpp
+++ b/src/bp_gtest.cpp
@@ -897,7 +897,7 @@ TEST_F(basic, latency3) {
EXPECT_EQ_UINT32(mg.is_active()?1:0, (uint32_t)0)<< "pass";
- mg.start(8, NULL);
+ mg.start(8, false);
mg.stop();
mg.Dump(stdout);
mg.DumpShort(stdout);
diff --git a/src/bp_sim.cpp b/src/bp_sim.cpp
index 78ae09d9..f4eb6f7e 100755
--- a/src/bp_sim.cpp
+++ b/src/bp_sim.cpp
@@ -3324,9 +3324,6 @@ bool CFlowGenListPerThread::Create(uint32_t thread_id,
m_max_threads=max_threads;
m_thread_id=thread_id;
- m_watchdog = NULL;
- m_watchdog_handle = -1;
-
m_cpu_cp_u.Create(&m_cpu_dp_u);
uint32_t socket_id=rte_lcore_to_socket_id(m_core_id);
diff --git a/src/bp_sim.h b/src/bp_sim.h
index 3c865eac..1f35faba 100755
--- a/src/bp_sim.h
+++ b/src/bp_sim.h
@@ -3647,9 +3647,7 @@ public:
}
void tickle() {
- if (m_watchdog) {
- m_watchdog->tickle(m_watchdog_handle);
- }
+ m_monitor.tickle();
}
/* return the dual port ID this thread is attached to in 4 ports configuration
@@ -3774,8 +3772,7 @@ public:
CTupleGeneratorSmart m_smart_gen;
- TrexWatchDog *m_watchdog;
- int m_watchdog_handle;
+ TrexMonitor m_monitor;
public:
CNodeGenerator m_node_gen;
diff --git a/src/latency.cpp b/src/latency.cpp
index 6e4ce643..841913cf 100644
--- a/src/latency.cpp
+++ b/src/latency.cpp
@@ -555,9 +555,6 @@ bool CLatencyManager::Create(CLatencyManagerCfg * cfg){
m_nat_check_manager.Create();
}
- m_watchdog = NULL;
- m_watchdog_handle = -1;
-
return (true);
}
@@ -708,12 +705,10 @@ void CLatencyManager::reset(){
}
void CLatencyManager::tickle() {
- if (m_watchdog) {
- m_watchdog->tickle(m_watchdog_handle);
- }
+ m_monitor.tickle();
}
-void CLatencyManager::start(int iter, TrexWatchDog *watchdog) {
+void CLatencyManager::start(int iter, bool activate_watchdog) {
m_do_stop =false;
m_is_active =false;
int cnt=0;
@@ -730,9 +725,9 @@ void CLatencyManager::start(int iter, TrexWatchDog *watchdog) {
m_p_queue.push(node);
bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable()?true:false;
- if (watchdog) {
- m_watchdog = watchdog;
- m_watchdog_handle = watchdog->register_monitor("STF RX CORE", 1);
+ if (activate_watchdog) {
+ m_monitor.create("STF RX CORE", 1);
+ TrexWatchDog::getInstance().register_monitor(&m_monitor);
}
while ( !m_p_queue.empty() ) {
@@ -802,8 +797,8 @@ void CLatencyManager::start(int iter, TrexWatchDog *watchdog) {
}
/* disable the monitor */
- if (m_watchdog) {
- m_watchdog->disable_monitor(m_watchdog_handle);
+ if (activate_watchdog) {
+ m_monitor.disable();
}
}
diff --git a/src/latency.h b/src/latency.h
index 6eddb3d7..724621f0 100644
--- a/src/latency.h
+++ b/src/latency.h
@@ -341,7 +341,7 @@ public:
bool Create(CLatencyManagerCfg * cfg);
void Delete();
void reset();
- void start(int iter, TrexWatchDog *watchdog);
+ void start(int iter, bool activate_watchdog);
void stop();
bool is_active();
void set_ip(uint32_t client_ip,
@@ -405,8 +405,7 @@ private:
CNatRxManager m_nat_check_manager;
CCpuUtlDp m_cpu_dp_u;
CCpuUtlCp m_cpu_cp_u;
- TrexWatchDog *m_watchdog;
- int m_watchdog_handle;
+ TrexMonitor m_monitor;
volatile bool m_do_stop __rte_cache_aligned ;
};
diff --git a/src/main_dpdk.cpp b/src/main_dpdk.cpp
index b959895d..ef1668ab 100644
--- a/src/main_dpdk.cpp
+++ b/src/main_dpdk.cpp
@@ -2857,8 +2857,9 @@ private:
uint32_t m_stats_cnt;
std::mutex m_cp_lock;
+ TrexMonitor m_monitor;
+
public:
- TrexWatchDog m_watchdog;
TrexStateless *m_trex_stateless;
};
@@ -3288,8 +3289,7 @@ bool CGlobalTRex::Create(){
TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_TCP,
global_platform_cfg_info.m_zmq_rpc_port,
- &m_cp_lock,
- &m_watchdog);
+ &m_cp_lock);
cfg.m_port_count = CGlobalInfo::m_options.m_expected_portd;
cfg.m_rpc_req_resp_cfg = &rpc_req_resp_cfg;
@@ -3992,8 +3992,10 @@ int CGlobalTRex::run_in_master() {
const int FASTPATH_DELAY_MS = 10;
const int SLOWPATH_DELAY_MS = 500;
- int handle = m_watchdog.register_monitor("master", 2);
- m_watchdog.start();
+ m_monitor.create("master", 2);
+ TrexWatchDog::getInstance().register_monitor(&m_monitor);
+
+ TrexWatchDog::getInstance().start();
while ( true ) {
@@ -4016,14 +4018,14 @@ int CGlobalTRex::run_in_master() {
slow_path_counter += FASTPATH_DELAY_MS;
cp_lock.lock();
- m_watchdog.tickle(handle);
+ m_monitor.tickle();
}
/* on exit release the lock */
cp_lock.unlock();
/* first stop the WD */
- m_watchdog.stop();
+ TrexWatchDog::getInstance().stop();
if (!is_all_cores_finished()) {
/* probably CLTR-C */
@@ -4047,12 +4049,12 @@ int CGlobalTRex::run_in_rx_core(void){
if (get_is_stateless()) {
m_sl_rx_running = true;
- m_rx_sl.start(m_watchdog);
+ m_rx_sl.start();
m_sl_rx_running = false;
} else {
if ( CGlobalInfo::m_options.is_rx_enabled() ){
m_sl_rx_running = false;
- m_mg.start(0, &m_watchdog);
+ m_mg.start(0, true);
}
}
@@ -4079,9 +4081,8 @@ int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){
lpt = m_fl.m_threads_info[virt_core_id-1];
/* register a watchdog handle on current core */
- lpt->m_watchdog = &m_watchdog;
- lpt->m_watchdog_handle = m_watchdog.register_monitor(ss.str(), 1);
-
+ lpt->m_monitor.create(ss.str(), 1);
+ TrexWatchDog::getInstance().register_monitor(&lpt->m_monitor);
if (get_is_stateless()) {
lpt->start_stateless_daemon(*lp);
@@ -4090,7 +4091,7 @@ int CGlobalTRex::run_in_core(virtual_thread_id_t virt_core_id){
}
/* done - remove this from the watchdog (we might wait on join for a long time) */
- lpt->m_watchdog->disable_monitor(lpt->m_watchdog_handle);
+ lpt->m_monitor.disable();
m_signal[virt_core_id]=1;
return (0);
@@ -4786,11 +4787,7 @@ int main_test(int argc , char * argv[]){
/* disable WD if needed */
//CGlobalInfo::m_options.preview.getWDDisable()?false:true
- g_trex.m_watchdog.init(false); /* always disable - due to trex-211 */
-
- /* this will give us all cores - master + tx + latency */
- g_trex.m_watchdog.mark_pending_monitor(g_trex.m_max_cores);
-
+ TrexWatchDog::getInstance().init(true); /* always disable - due to trex-211 */
g_trex.m_sl_rx_running = false;
if ( get_is_stateless() ) {
diff --git a/src/rpc-server/trex_rpc_req_resp_server.cpp b/src/rpc-server/trex_rpc_req_resp_server.cpp
index 033f265c..75eec856 100644
--- a/src/rpc-server/trex_rpc_req_resp_server.cpp
+++ b/src/rpc-server/trex_rpc_req_resp_server.cpp
@@ -56,7 +56,8 @@ void TrexRpcServerReqRes::_rpc_thread_cb() {
std::stringstream ss;
int zmq_rc;
- m_watchdog_handle = m_watchdog->register_monitor(m_name, 1);
+ m_monitor.create(m_name, 1);
+ TrexWatchDog::getInstance().register_monitor(&m_monitor);
/* create a socket based on the configuration */
@@ -102,7 +103,7 @@ void TrexRpcServerReqRes::_rpc_thread_cb() {
zmq_close(m_socket);
/* done */
- m_watchdog->disable_monitor(m_watchdog_handle);
+ m_monitor.disable();
}
bool
@@ -115,7 +116,7 @@ TrexRpcServerReqRes::fetch_one_request(std::string &msg) {
assert(rc == 0);
while (true) {
- m_watchdog->tickle(m_watchdog_handle);
+ m_monitor.tickle();
rc = zmq_msg_recv (&zmq_msg, m_socket, 0);
if (rc != -1) {
diff --git a/src/rpc-server/trex_rpc_server.cpp b/src/rpc-server/trex_rpc_server.cpp
index e4ca95c3..6c323c16 100644
--- a/src/rpc-server/trex_rpc_server.cpp
+++ b/src/rpc-server/trex_rpc_server.cpp
@@ -36,8 +36,6 @@ TrexRpcServerInterface::TrexRpcServerInterface(const TrexRpcServerConfig &cfg, c
m_name = name;
m_lock = cfg.m_lock;
- m_watchdog = cfg.m_watchdog;
- m_watchdog_handle = -1;
m_is_running = false;
m_is_verbose = false;
@@ -78,7 +76,6 @@ void TrexRpcServerInterface::start() {
/* prepare for run */
_prepare();
- m_watchdog->mark_pending_monitor();
m_thread = new std::thread(&TrexRpcServerInterface::_rpc_thread_cb, this);
if (!m_thread) {
throw TrexRpcException("unable to create RPC thread");
diff --git a/src/rpc-server/trex_rpc_server_api.h b/src/rpc-server/trex_rpc_server_api.h
index 3d9837ef..6df37b17 100644
--- a/src/rpc-server/trex_rpc_server_api.h
+++ b/src/rpc-server/trex_rpc_server_api.h
@@ -30,10 +30,10 @@ limitations under the License.
#include <stdexcept>
#include <trex_rpc_exception_api.h>
#include <json/json.h>
+#include "trex_watchdog.h"
class TrexRpcServerInterface;
class TrexRpcServerReqRes;
-class TrexWatchDog;
/**
* defines a configuration of generic RPC server
@@ -48,11 +48,10 @@ public:
RPC_PROT_MOCK
};
- TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port, std::mutex *lock, TrexWatchDog *watchdog) {
+ TrexRpcServerConfig(rpc_prot_e protocol, uint16_t port, std::mutex *lock) {
m_protocol = protocol;
m_port = port;
m_lock = lock;
- m_watchdog = watchdog;
}
uint16_t get_port() const {
@@ -69,7 +68,6 @@ private:
public:
std::mutex *m_lock;
- TrexWatchDog *m_watchdog;
};
/**
@@ -142,8 +140,7 @@ protected:
std::string m_name;
std::mutex *m_lock;
std::mutex m_dummy_lock;
- TrexWatchDog *m_watchdog;
- int m_watchdog_handle;
+ TrexMonitor m_monitor;
};
/**
diff --git a/src/sim/trex_sim_stateless.cpp b/src/sim/trex_sim_stateless.cpp
index d3981e97..77bd4d70 100644
--- a/src/sim/trex_sim_stateless.cpp
+++ b/src/sim/trex_sim_stateless.cpp
@@ -200,7 +200,7 @@ SimStateless::prepare_control_plane() {
m_publisher = new SimPublisher();
- TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0, NULL, NULL);
+ TrexRpcServerConfig rpc_req_resp_cfg(TrexRpcServerConfig::RPC_PROT_MOCK, 0, NULL);
cfg.m_port_count = m_port_count;
cfg.m_rpc_req_resp_cfg = &rpc_req_resp_cfg;
diff --git a/src/stateless/rx/trex_stateless_rx_core.cpp b/src/stateless/rx/trex_stateless_rx_core.cpp
index b3555c13..847dbc97 100644
--- a/src/stateless/rx/trex_stateless_rx_core.cpp
+++ b/src/stateless/rx/trex_stateless_rx_core.cpp
@@ -72,9 +72,6 @@ void CRxCoreStateless::create(const CRxSlCfg &cfg) {
m_ring_to_cp = cp_rx->getRingDpToCp(0);
m_state = STATE_IDLE;
- m_watchdog_handle = -1;
- m_watchdog = NULL;
-
for (int i = 0; i < m_max_ports; i++) {
CLatencyManagerPerPortStl * lp = &m_ports[i];
lp->m_io = cfg.m_ports[i];
@@ -93,7 +90,7 @@ void CRxCoreStateless::handle_cp_msg(TrexStatelessCpToRxMsgBase *msg) {
}
void CRxCoreStateless::tickle() {
- m_watchdog->tickle(m_watchdog_handle);
+ m_monitor.tickle();
}
bool CRxCoreStateless::periodic_check_for_cp_messages() {
@@ -147,14 +144,14 @@ void CRxCoreStateless::idle_state_loop() {
}
}
-void CRxCoreStateless::start(TrexWatchDog &watchdog) {
+void CRxCoreStateless::start() {
int count = 0;
int i = 0;
bool do_try_rx_queue =CGlobalInfo::m_options.preview.get_vm_one_queue_enable() ? true : false;
/* register a watchdog handle on current core */
- m_watchdog = &watchdog;
- m_watchdog_handle = watchdog.register_monitor("STL RX CORE", 1);
+ m_monitor.create("STL RX CORE", 1);
+ TrexWatchDog::getInstance().register_monitor(&m_monitor);
while (true) {
if (m_state == STATE_WORKING) {
@@ -179,7 +176,7 @@ void CRxCoreStateless::start(TrexWatchDog &watchdog) {
}
rte_pause();
- m_watchdog->disable_monitor(m_watchdog_handle);
+ m_monitor.disable();
}
void CRxCoreStateless::handle_rx_pkt(CLatencyManagerPerPortStl *lp, rte_mbuf_t *m) {
diff --git a/src/stateless/rx/trex_stateless_rx_core.h b/src/stateless/rx/trex_stateless_rx_core.h
index ce1bc1ad..dfc56e4d 100644
--- a/src/stateless/rx/trex_stateless_rx_core.h
+++ b/src/stateless/rx/trex_stateless_rx_core.h
@@ -95,7 +95,7 @@ class CRxCoreStateless {
};
public:
- void start(TrexWatchDog &watchdog);
+ void start();
void create(const CRxSlCfg &cfg);
void reset_rx_stats(uint8_t port_id);
int get_rx_stats(uint8_t port_id, rx_per_flow_t *rx_stats, int min, int max, bool reset
@@ -126,8 +126,7 @@ class CRxCoreStateless {
private:
- TrexWatchDog *m_watchdog;
- int m_watchdog_handle;
+ TrexMonitor m_monitor;
uint32_t m_max_ports;
bool m_has_streams;
diff --git a/src/trex_watchdog.cpp b/src/trex_watchdog.cpp
index b320a1b3..79dadb16 100644
--- a/src/trex_watchdog.cpp
+++ b/src/trex_watchdog.cpp
@@ -37,7 +37,7 @@ limitations under the License.
#include <stdexcept>
-static TrexWatchDog::monitor_st *global_monitor;
+static TrexMonitor *global_monitor;
const char *get_exe_name();
@@ -113,7 +113,7 @@ static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret)
double now = now_sec();
- ss << "WATCHDOG: task '" << global_monitor->name << "' has not responded for more than " << (now - global_monitor->ts) << " seconds - timeout is " << global_monitor->timeout_sec << " seconds";
+ ss << "WATCHDOG: task '" << global_monitor->get_name() << "' has not responded for more than " << global_monitor->get_interval(now) << " seconds - timeout is " << global_monitor->get_timeout_sec() << " seconds";
std::string backtrace = Backtrace();
ss << "\n\n*** traceback follows ***\n\n" << backtrace << "\n";
@@ -121,153 +121,65 @@ static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret)
throw std::runtime_error(ss.str());
}
+/**************************************
+ * Trex Monitor object
+ *************************************/
+
+void TrexMonitor::create(const std::string &name, double timeout_sec) {
+ m_active = true;
+ m_tid = pthread_self();
+ m_name = name;
+ m_timeout_sec = timeout_sec;
+ m_tickled = true;
+ m_ts = 0;
+}
+
+/**************************************
+ * Trex watchdog
+ *************************************/
void TrexWatchDog::init(bool enable){
- m_enable =enable;
+ m_enable = enable;
if (m_enable) {
register_signal();
}
}
-
-void TrexWatchDog::mark_pending_monitor(int count) {
- if (!m_enable){
- return;
- }
-
- std::unique_lock<std::mutex> lock(m_lock);
- m_pending += count;
- lock.unlock();
-}
-
-void TrexWatchDog::block_on_pending(int max_block_time_ms) {
-
- if (!m_enable){
- return;
- }
-
- int timeout_msec = max_block_time_ms;
-
- std::unique_lock<std::mutex> lock(m_lock);
-
- while (m_pending > 0) {
-
- lock.unlock();
- delay(1);
- lock.lock();
-
- timeout_msec -= 1;
- if (timeout_msec == 0) {
- throw TrexException("WATCHDOG: block on pending monitors timed out");
- }
- }
-
- /* lock will be released */
-}
-
/**
* register a monitor
- * must be called from the relevant thread
- *
* this function is thread safe
*
- * @author imarom (01-Jun-16)
- *
- * @param name
- * @param timeout_sec
- *
- * @return int
*/
-int TrexWatchDog::register_monitor(const std::string &name, double timeout_sec) {
+void TrexWatchDog::register_monitor(TrexMonitor *monitor) {
if (!m_enable){
- return 0;
+ return;
}
- monitor_st monitor;
-
-
- /* cannot add monitors while active */
- assert(m_active == false);
-
- monitor.active = true;
- monitor.tid = pthread_self();
- monitor.name = name;
- monitor.timeout_sec = timeout_sec;
- monitor.tickled = true;
- monitor.ts = 0;
/* critical section start */
std::unique_lock<std::mutex> lock(m_lock);
- /* make sure no double register */
- for (auto &m : m_monitors) {
- if (m.tid == pthread_self()) {
+ /* sanity - not a must but why not... */
+ for (int i = 0; i < m_mon_count; i++) {
+ if ( (monitor == m_monitors[i]) || (m_monitors[i]->get_tid() == pthread_self()) ) {
std::stringstream ss;
ss << "WATCHDOG: double register detected\n\n" << Backtrace();
throw TrexException(ss.str());
}
}
- monitor.handle = m_monitors.size();
- m_monitors.push_back(monitor);
+ /* check capacity */
+ if (m_mon_count == MAX_MONITORS) {
+ std::stringstream ss;
+ ss << "WATCHDOG: too many registered monitors\n\n" << Backtrace();
+ throw TrexException(ss.str());
+ }
- assert(m_pending > 0);
- m_pending--;
+ /* add monitor */
+ m_monitors[m_mon_count++] = monitor;
/* critical section end */
lock.unlock();
- return monitor.handle;
-}
-
-/**
- * will disable the monitor - it will no longer be watched
- *
- */
-void TrexWatchDog::disable_monitor(int handle) {
- if (!m_enable){
- return ;
- }
-
- assert(handle < m_monitors.size());
-
- m_monitors[handle].active = false;
-}
-
-/**
- * thread safe function
- *
- */
-void TrexWatchDog::tickle(int handle) {
- if (!m_enable){
- return ;
- }
- assert(handle < m_monitors.size());
-
- /* not nesscary but write gets cache invalidate for nothing */
- if (m_monitors[handle].tickled) {
- return;
- }
-
- m_monitors[handle].tickled = true;
-}
-
-void TrexWatchDog::register_signal() {
- /* do this once */
- if (g_signal_init) {
- return;
- }
-
- /* register a handler on SIG ALARM */
- struct sigaction sa;
- memset (&sa, '\0', sizeof(sa));
-
- sa.sa_flags = SA_SIGINFO;
- sa.sa_sigaction = _callstack_signal_handler;
-
- int rc = sigaction(SIGALRM , &sa, NULL);
- assert(rc == 0);
-
- g_signal_init = true;
}
void TrexWatchDog::start() {
@@ -276,11 +188,6 @@ void TrexWatchDog::start() {
return ;
}
- block_on_pending();
-
- /* no pending monitors */
- assert(m_pending == 0);
-
m_active = true;
m_thread = new std::thread(&TrexWatchDog::_main, this);
if (!m_thread) {
@@ -289,6 +196,7 @@ void TrexWatchDog::start() {
}
void TrexWatchDog::stop() {
+
if (!m_enable){
return ;
}
@@ -310,41 +218,39 @@ void TrexWatchDog::stop() {
*/
void TrexWatchDog::_main() {
- assert(m_enable==true);
-
- /* reset all the monitors */
- for (auto &monitor : m_monitors) {
- monitor.tickled = true;
- }
+ assert(m_enable == true);
/* start main loop */
while (m_active) {
dsec_t now = now_sec();
- for (auto &monitor : m_monitors) {
+ /* volatile are slow - read once per iteration */
+ int count = m_mon_count;
+
+ for (int i = 0; i < count; i++) {
+ TrexMonitor *monitor = m_monitors[i];
/* skip non active monitors */
- if (!monitor.active) {
+ if (!monitor->is_active()) {
continue;
}
/* if its own - turn it off and write down the time */
- if (monitor.tickled) {
- monitor.tickled = false;
- monitor.ts = now;
+ if (monitor->is_tickled()) {
+ monitor->reset(now);
continue;
}
- /* the bit is off - check the time first */
- if ( (now - monitor.ts) > monitor.timeout_sec ) {
- global_monitor = &monitor;
+ /* if the monitor has expired - crash */
+ if (monitor->is_expired(now)) {
+ global_monitor = monitor;
- pthread_kill(monitor.tid, SIGALRM);
+ pthread_kill(monitor->get_tid(), SIGALRM);
/* nothing to do more... the other thread will terminate, but if not - we terminate */
sleep(5);
- printf("\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor.name.c_str());
+ printf("\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor->get_name().c_str());
exit(1);
}
@@ -355,4 +261,25 @@ void TrexWatchDog::_main() {
}
}
+
+void TrexWatchDog::register_signal() {
+ /* do this once */
+ if (g_signal_init) {
+ return;
+ }
+
+ /* register a handler on SIG ALARM */
+ struct sigaction sa;
+ memset (&sa, '\0', sizeof(sa));
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = _callstack_signal_handler;
+
+ int rc = sigaction(SIGALRM , &sa, NULL);
+ assert(rc == 0);
+
+ g_signal_init = true;
+}
+
bool TrexWatchDog::g_signal_init = false;
+
diff --git a/src/trex_watchdog.h b/src/trex_watchdog.h
index 0c1969b1..1c948d56 100644
--- a/src/trex_watchdog.h
+++ b/src/trex_watchdog.h
@@ -27,69 +27,142 @@ limitations under the License.
#include <thread>
#include <mutex>
-//#include "rte_memory.h"
#include "mbuf.h"
#include "os_time.h"
-class TrexWatchDog {
+/**
+ * every thread creates its own monitor from its own memory
+ *
+ * @author imarom (19-Jun-16)
+ */
+class TrexMonitor {
+
public:
- TrexWatchDog() {
- m_thread = NULL;
- m_active = false;
- m_pending = 0;
- m_enable = false;
- }
- void init(bool enable);
+ /**
+ * create a monitor
+ *
+ * @author imarom (31-May-16)
+ *
+ * @param name
+ * @param timeout
+ *
+ * @return int
+ */
+ void create(const std::string &name, double timeout_sec);
/**
- * registering a monitor happens from another thread
- * this make sure that start will be able to block until
- * all threads has registered
+ * disable the monitor - it will be ignored
*
- * @author imarom (01-Jun-16)
*/
- void mark_pending_monitor(int count = 1);
-
+ void disable() {
+ m_active = false;
+ }
/**
- * blocks while monitors are pending registeration
+ * tickle the monitor - this should be called from the thread
+ * to avoid the watchdog from detecting a stuck thread
*
- * @author imarom (01-Jun-16)
+ * @author imarom (19-Jun-16)
*/
- void block_on_pending(int max_block_time_ms = 200);
-
+ void tickle() {
+ /* to avoid useless writes - first check */
+ if (!m_tickled) {
+ m_tickled = true;
+ }
+ }
/**
- * add a monitor to the watchdog
- * this thread will be monitored and if timeout
- * has passed without calling tick - an exception will be called
- *
- * @author imarom (31-May-16)
- *
- * @param name
- * @param timeout
+ * called by the watchdog to reset the monitor for a new round
*
- * @return int
*/
- int register_monitor(const std::string &name, double timeout_sec);
+ void reset(dsec_t now) {
+ m_tickled = false;
+ m_ts = now;
+ }
+
+
+ /* return how much time has passed since last tickle */
+ dsec_t get_interval(dsec_t now) const {
+ return (now - m_ts);
+ }
+
+ pthread_t get_tid() const {
+ return m_tid;
+ }
+
+ const std::string &get_name() const {
+ return m_name;
+ }
+
+ dsec_t get_timeout_sec() const {
+ return m_timeout_sec;
+ }
+
+ volatile bool is_active() const {
+ return m_active;
+ }
+ volatile bool is_tickled() const {
+ return m_tickled;
+ }
+
+ bool is_expired(dsec_t now) const {
+ return ( get_interval(now) > m_timeout_sec );
+ }
+
+
+private:
+
+ /* write fields are first */
+ volatile bool m_active;
+ volatile bool m_tickled;
+ dsec_t m_ts;
+
+ int m_handle;
+ double m_timeout_sec;
+ pthread_t m_tid;
+ std::string m_name;
+
+ /* for for a full cacheline */
+ uint8_t pad[15];
+
+} __rte_cache_aligned;
+
+
+/**
+ * a watchdog is a list of registered monitors
+ *
+ * @author imarom (19-Jun-16)
+ */
+class TrexWatchDog {
+public:
/**
- * disable a monitor - it will no longer be watched
+ * singleton entry
+ *
+ * @author imarom (19-Jun-16)
*
+ * @return TrexWatchDog&
*/
- void disable_monitor(int handle);
+ static TrexWatchDog& getInstance() {
+ static TrexWatchDog instance;
+ return instance;
+ }
+ void init(bool enable);
+
/**
- * should be called by each thread on it's handle
+ * add a monitor to the watchdog
+ * from now on this monitor will be watched
+ *
+ * @author imarom (19-Jun-16)
*
- * @author imarom (31-May-16)
+ * @param monitor - a pointer to the object
*
- * @param handle
*/
- void tickle(int handle);
+ void register_monitor(TrexMonitor *monitor);
/**
@@ -106,38 +179,30 @@ public:
void stop();
- /* should be cache aligned to avoid false sharing */
- struct monitor_st {
- /* write fields are first */
- volatile bool active;
- volatile bool tickled;
- dsec_t ts;
-
- int handle;
- double timeout_sec;
- pthread_t tid;
- std::string name;
-
- /* for for a full cacheline */
- uint8_t pad[15];
- } __rte_cache_aligned ;
+private:
+ TrexWatchDog() {
+ m_thread = NULL;
+ m_enable = false;
+ m_active = false;
+ m_mon_count = 0;
+ }
-private:
void register_signal();
void _main();
+ static const int MAX_MONITORS = 100;
+ TrexMonitor *m_monitors[MAX_MONITORS];
+ volatile int m_mon_count;
+ std::mutex m_lock;
- std::vector<monitor_st> m_monitors __rte_cache_aligned;
- std::mutex m_lock;
- bool m_enable;
- volatile bool m_active;
- std::thread *m_thread;
- volatile int m_pending;
+ bool m_enable;
+ volatile bool m_active;
+ std::thread *m_thread;
- static bool g_signal_init;
+ static bool g_signal_init;
};
-static_assert(sizeof(TrexWatchDog::monitor_st) == RTE_CACHE_LINE_SIZE, "sizeof(monitor_st) != RTE_CACHE_LINE_SIZE" );
+static_assert(sizeof(TrexMonitor) == RTE_CACHE_LINE_SIZE, "sizeof(TrexMonitor) != RTE_CACHE_LINE_SIZE" );
#endif /* __TREX_WATCHDOG_H__ */