diff options
author | 2016-12-26 15:08:38 +0200 | |
---|---|---|
committer | 2016-12-26 15:09:06 +0200 | |
commit | 9ad36b3da3d33cad1d0fbb629707515381260648 (patch) | |
tree | 89c237fadf7c383577827f17bffaf3760310023d /src | |
parent | f14ca6aecb0b8236f474d6e7c1c19b9976dac7f4 (diff) |
watchdog - support for long IO operations
also reduce complexity of the watchdog
Signed-off-by: imarom <imarom@cisco.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/trex_watchdog.cpp | 28 | ||||
-rw-r--r-- | src/trex_watchdog.h | 52 |
2 files changed, 52 insertions, 28 deletions
diff --git a/src/trex_watchdog.cpp b/src/trex_watchdog.cpp index f551a787..d2b6b803 100644 --- a/src/trex_watchdog.cpp +++ b/src/trex_watchdog.cpp @@ -126,12 +126,13 @@ static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret) *************************************/ void TrexMonitor::create(const std::string &name, double timeout_sec) { - m_active_time_sec = now_sec(); m_tid = pthread_self(); m_name = name; m_timeout_sec = timeout_sec; + m_base_timeout_sec = timeout_sec; m_tickled = true; m_ts = 0; + m_io_ref_cnt = 0; /* the rare case of m_active_time_sec set out of order with tickled */ asm volatile("mfence" ::: "memory"); @@ -256,29 +257,26 @@ void TrexWatchDog::_main() { for (int i = 0; i < count; i++) { TrexMonitor *monitor = m_monitors[i]; - /* skip non active monitors */ - if (!monitor->is_active(now)) { + /* skip non expired monitors */ + if (!monitor->is_expired(now)) { continue; } - - /* if its own - turn it off and write down the time */ + + /* it has expired but it was tickled */ if (monitor->is_tickled()) { monitor->reset(now); continue; } - /* if the monitor has expired - crash */ - if (monitor->is_expired(now)) { - global_monitor = monitor; - - pthread_kill(monitor->get_tid(), SIGALRM); + /* crash */ + global_monitor = monitor; - /* nothing to do more... the other thread will terminate, but if not - we terminate */ - sleep(5); - fprintf(stderr, "\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor->get_name().c_str()); - abort(); - } + pthread_kill(monitor->get_tid(), SIGALRM); + /* nothing to do more... the other thread will terminate, but if not - we terminate */ + sleep(5); + fprintf(stderr, "\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor->get_name().c_str()); + abort(); } /* the internal clock - 250 ms */ diff --git a/src/trex_watchdog.h b/src/trex_watchdog.h index 390b565c..67dbb80d 100644 --- a/src/trex_watchdog.h +++ b/src/trex_watchdog.h @@ -26,6 +26,7 @@ limitations under the License. #include <vector> #include <thread> #include <mutex> +#include <assert.h> #include "mbuf.h" #include "os_time.h" @@ -59,8 +60,7 @@ public: * */ void disable(dsec_t time_sec = 1e9) { - /* double writes are atomic on x86_64 (aligned to 8 bytes) */ - m_active_time_sec = now_sec() + time_sec; + set_timeout(time_sec); } /** @@ -68,11 +68,32 @@ public: * */ void enable() { - /* before enabling - must tickle o.w the watchdog might crash this thread */ - tickle(); - /* memory fence - make sure the main thread sees this by order */ - asm volatile("mfence" ::: "memory"); - m_active_time_sec = now_sec(); + set_timeout(m_base_timeout_sec); + } + + /** + * not thread safe + * call from current thread only + */ + void io_begin() { + /** + * holds a ref cnt + * a thread might start many IO operations + */ + m_io_ref_cnt++; + set_timeout(IO_TIMEOUT_SEC); + } + + /** + * not thread safe + * call from current thread only + */ + void io_end() { + assert(m_io_ref_cnt > 0); + m_io_ref_cnt--; + if (m_io_ref_cnt == 0) { + set_timeout(m_base_timeout_sec); + } } /** @@ -119,11 +140,6 @@ private: return m_tid; } - - volatile bool is_active(dsec_t now) const { - return ( (now - m_active_time_sec) > 0 ); - } - volatile bool is_tickled() const { return m_tickled; } @@ -132,16 +148,26 @@ private: return ( get_interval(now) > m_timeout_sec ); } + void set_timeout(double timeout_sec) { + /* before changing timeout we MUST tickle and memory fence o.w the main thread might crash */ + tickle(); + asm volatile("mfence" ::: "memory"); + m_timeout_sec = timeout_sec; + } + /* write fields are first */ - volatile dsec_t m_active_time_sec; volatile bool m_tickled; int m_handle; dsec_t m_ts; double m_timeout_sec; + double m_base_timeout_sec; pthread_t m_tid; std::string m_name; + uint32_t m_io_ref_cnt; + + static const int IO_TIMEOUT_SEC = 30; } __rte_cache_aligned; |