diff options
Diffstat (limited to 'src/trex_watchdog.cpp')
-rw-r--r-- | src/trex_watchdog.cpp | 206 |
1 files changed, 81 insertions, 125 deletions
diff --git a/src/trex_watchdog.cpp b/src/trex_watchdog.cpp index e78e8e6d..d099933b 100644 --- a/src/trex_watchdog.cpp +++ b/src/trex_watchdog.cpp @@ -36,9 +36,8 @@ limitations under the License. #include <iostream> #include <stdexcept> -#define DISABLE_WATCHDOG_ON_GDB -static TrexWatchDog::monitor_st *global_monitor; +static TrexMonitor *global_monitor; const char *get_exe_name(); @@ -114,7 +113,7 @@ static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret) double now = now_sec(); - ss << "WATCHDOG: task '" << global_monitor->name << "' has not responded for more than " << (now - global_monitor->ts) << " seconds - timeout is " << global_monitor->timeout_sec << " seconds"; + ss << "WATCHDOG: task '" << global_monitor->get_name() << "' has not responded for more than " << global_monitor->get_interval(now) << " seconds - timeout is " << global_monitor->get_timeout_sec() << " seconds"; std::string backtrace = Backtrace(); ss << "\n\n*** traceback follows ***\n\n" << backtrace << "\n"; @@ -122,143 +121,72 @@ static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret) throw std::runtime_error(ss.str()); } -void TrexWatchDog::mark_pending_monitor(int count) { - std::unique_lock<std::mutex> lock(m_lock); - m_pending += count; - lock.unlock(); +/************************************** + * Trex Monitor object + *************************************/ + +void TrexMonitor::create(const std::string &name, double timeout_sec) { + m_active = true; + m_tid = pthread_self(); + m_name = name; + m_timeout_sec = timeout_sec; + m_tickled = true; + m_ts = 0; } -void TrexWatchDog::block_on_pending(int max_block_time_ms) { - - int timeout_msec = max_block_time_ms; - - std::unique_lock<std::mutex> lock(m_lock); - - while (m_pending > 0) { - - lock.unlock(); - delay(1); - lock.lock(); - - timeout_msec -= 1; - if (timeout_msec == 0) { - throw TrexException("WATCHDOG: block on pending monitors timed out"); - } - } +/************************************** + * Trex watchdog + *************************************/ - /* lock will be released */ +void TrexWatchDog::init(bool enable){ + m_enable = enable; + if (m_enable) { + register_signal(); + } } /** * register a monitor - * must be called from the relevant thread - * * this function is thread safe * - * @author imarom (01-Jun-16) - * - * @param name - * @param timeout_sec - * - * @return int */ -int TrexWatchDog::register_monitor(const std::string &name, double timeout_sec) { - monitor_st monitor; - - /* cannot add monitors while active */ - assert(m_active == false); - - monitor.active = true; - monitor.tid = pthread_self(); - monitor.name = name; - monitor.timeout_sec = timeout_sec; - monitor.tickled = true; - monitor.ts = 0; +void TrexWatchDog::register_monitor(TrexMonitor *monitor) { + if (!m_enable){ + return; + } /* critical section start */ std::unique_lock<std::mutex> lock(m_lock); - /* make sure no double register */ - for (auto &m : m_monitors) { - if (m.tid == pthread_self()) { + /* sanity - not a must but why not... */ + for (int i = 0; i < m_mon_count; i++) { + if ( (monitor == m_monitors[i]) || (m_monitors[i]->get_tid() == pthread_self()) ) { std::stringstream ss; ss << "WATCHDOG: double register detected\n\n" << Backtrace(); throw TrexException(ss.str()); } } - monitor.handle = m_monitors.size(); - m_monitors.push_back(monitor); + /* check capacity */ + if (m_mon_count == MAX_MONITORS) { + std::stringstream ss; + ss << "WATCHDOG: too many registered monitors\n\n" << Backtrace(); + throw TrexException(ss.str()); + } - assert(m_pending > 0); - m_pending--; + /* add monitor */ + m_monitors[m_mon_count++] = monitor; /* critical section end */ lock.unlock(); - return monitor.handle; -} - -/** - * will disable the monitor - it will no longer be watched - * - */ -void TrexWatchDog::disable_monitor(int handle) { - assert(handle < m_monitors.size()); - - m_monitors[handle].active = false; -} - -/** - * thread safe function - * - */ -void TrexWatchDog::tickle(int handle) { - - assert(handle < m_monitors.size()); - - /* not nesscary but write gets cache invalidate for nothing */ - if (m_monitors[handle].tickled) { - return; - } - - m_monitors[handle].tickled = true; -} - -void TrexWatchDog::register_signal() { - - /* do this once */ - if (g_signal_init) { - return; - } - - /* register a handler on SIG ALARM */ - struct sigaction sa; - memset (&sa, '\0', sizeof(sa)); - - sa.sa_flags = SA_SIGINFO; - sa.sa_sigaction = _callstack_signal_handler; - - int rc = sigaction(SIGALRM , &sa, NULL); - assert(rc == 0); - - g_signal_init = true; } void TrexWatchDog::start() { - block_on_pending(); - - /* no pending monitors */ - assert(m_pending == 0); - - /* under GDB - disable the watchdog */ - #ifdef DISABLE_WATCHDOG_ON_GDB - if (ptrace(PTRACE_TRACEME, 0, NULL, 0) == -1) { - printf("\n\n*** GDB detected - disabling watchdog... ***\n\n"); - return; + if (!m_enable){ + return ; } - #endif m_active = true; m_thread = new std::thread(&TrexWatchDog::_main, this); @@ -268,6 +196,11 @@ void TrexWatchDog::start() { } void TrexWatchDog::stop() { + + if (!m_enable){ + return ; + } + m_active = false; if (m_thread) { @@ -285,40 +218,42 @@ void TrexWatchDog::stop() { */ void TrexWatchDog::_main() { - /* reset all the monitors */ - for (auto &monitor : m_monitors) { - monitor.tickled = true; - } + assert(m_enable == true); /* start main loop */ while (m_active) { dsec_t now = now_sec(); - for (auto &monitor : m_monitors) { + /* to be on the safe side - read the count with a lock */ + std::unique_lock<std::mutex> lock(m_lock); + int count = m_mon_count; + lock.unlock(); + + for (int i = 0; i < count; i++) { + TrexMonitor *monitor = m_monitors[i]; /* skip non active monitors */ - if (!monitor.active) { + if (!monitor->is_active()) { continue; } /* if its own - turn it off and write down the time */ - if (monitor.tickled) { - monitor.tickled = false; - monitor.ts = now; + if (monitor->is_tickled()) { + monitor->reset(now); continue; } - /* the bit is off - check the time first */ - if ( (now - monitor.ts) > monitor.timeout_sec ) { - global_monitor = &monitor; + /* if the monitor has expired - crash */ + if (monitor->is_expired(now)) { + global_monitor = monitor; - pthread_kill(monitor.tid, SIGALRM); + pthread_kill(monitor->get_tid(), SIGALRM); /* nothing to do more... the other thread will terminate, but if not - we terminate */ sleep(5); - printf("\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor.name.c_str()); - exit(1); + fprintf(stderr, "\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor->get_name().c_str()); + abort(); } } @@ -328,4 +263,25 @@ void TrexWatchDog::_main() { } } + +void TrexWatchDog::register_signal() { + /* do this once */ + if (g_signal_init) { + return; + } + + /* register a handler on SIG ALARM */ + struct sigaction sa; + memset (&sa, '\0', sizeof(sa)); + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = _callstack_signal_handler; + + int rc = sigaction(SIGALRM , &sa, NULL); + assert(rc == 0); + + g_signal_init = true; +} + bool TrexWatchDog::g_signal_init = false; + |