summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorimarom <imarom@cisco.com>2016-12-26 15:08:38 +0200
committerimarom <imarom@cisco.com>2016-12-26 15:09:06 +0200
commit9ad36b3da3d33cad1d0fbb629707515381260648 (patch)
tree89c237fadf7c383577827f17bffaf3760310023d /src
parentf14ca6aecb0b8236f474d6e7c1c19b9976dac7f4 (diff)
watchdog - support for long IO operations
also reduce complexity of the watchdog Signed-off-by: imarom <imarom@cisco.com>
Diffstat (limited to 'src')
-rw-r--r--src/trex_watchdog.cpp28
-rw-r--r--src/trex_watchdog.h52
2 files changed, 52 insertions, 28 deletions
diff --git a/src/trex_watchdog.cpp b/src/trex_watchdog.cpp
index f551a787..d2b6b803 100644
--- a/src/trex_watchdog.cpp
+++ b/src/trex_watchdog.cpp
@@ -126,12 +126,13 @@ static void _callstack_signal_handler(int signr, siginfo_t *info, void *secret)
*************************************/
void TrexMonitor::create(const std::string &name, double timeout_sec) {
- m_active_time_sec = now_sec();
m_tid = pthread_self();
m_name = name;
m_timeout_sec = timeout_sec;
+ m_base_timeout_sec = timeout_sec;
m_tickled = true;
m_ts = 0;
+ m_io_ref_cnt = 0;
/* the rare case of m_active_time_sec set out of order with tickled */
asm volatile("mfence" ::: "memory");
@@ -256,29 +257,26 @@ void TrexWatchDog::_main() {
for (int i = 0; i < count; i++) {
TrexMonitor *monitor = m_monitors[i];
- /* skip non active monitors */
- if (!monitor->is_active(now)) {
+ /* skip non expired monitors */
+ if (!monitor->is_expired(now)) {
continue;
}
-
- /* if its own - turn it off and write down the time */
+
+ /* it has expired but it was tickled */
if (monitor->is_tickled()) {
monitor->reset(now);
continue;
}
- /* if the monitor has expired - crash */
- if (monitor->is_expired(now)) {
- global_monitor = monitor;
-
- pthread_kill(monitor->get_tid(), SIGALRM);
+ /* crash */
+ global_monitor = monitor;
- /* nothing to do more... the other thread will terminate, but if not - we terminate */
- sleep(5);
- fprintf(stderr, "\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor->get_name().c_str());
- abort();
- }
+ pthread_kill(monitor->get_tid(), SIGALRM);
+ /* nothing to do more... the other thread will terminate, but if not - we terminate */
+ sleep(5);
+ fprintf(stderr, "\n\n*** WATCHDOG violation detected on task '%s' which have failed to response to the signal ***\n\n", monitor->get_name().c_str());
+ abort();
}
/* the internal clock - 250 ms */
diff --git a/src/trex_watchdog.h b/src/trex_watchdog.h
index 390b565c..67dbb80d 100644
--- a/src/trex_watchdog.h
+++ b/src/trex_watchdog.h
@@ -26,6 +26,7 @@ limitations under the License.
#include <vector>
#include <thread>
#include <mutex>
+#include <assert.h>
#include "mbuf.h"
#include "os_time.h"
@@ -59,8 +60,7 @@ public:
*
*/
void disable(dsec_t time_sec = 1e9) {
- /* double writes are atomic on x86_64 (aligned to 8 bytes) */
- m_active_time_sec = now_sec() + time_sec;
+ set_timeout(time_sec);
}
/**
@@ -68,11 +68,32 @@ public:
*
*/
void enable() {
- /* before enabling - must tickle o.w the watchdog might crash this thread */
- tickle();
- /* memory fence - make sure the main thread sees this by order */
- asm volatile("mfence" ::: "memory");
- m_active_time_sec = now_sec();
+ set_timeout(m_base_timeout_sec);
+ }
+
+ /**
+ * not thread safe
+ * call from current thread only
+ */
+ void io_begin() {
+ /**
+ * holds a ref cnt
+ * a thread might start many IO operations
+ */
+ m_io_ref_cnt++;
+ set_timeout(IO_TIMEOUT_SEC);
+ }
+
+ /**
+ * not thread safe
+ * call from current thread only
+ */
+ void io_end() {
+ assert(m_io_ref_cnt > 0);
+ m_io_ref_cnt--;
+ if (m_io_ref_cnt == 0) {
+ set_timeout(m_base_timeout_sec);
+ }
}
/**
@@ -119,11 +140,6 @@ private:
return m_tid;
}
-
- volatile bool is_active(dsec_t now) const {
- return ( (now - m_active_time_sec) > 0 );
- }
-
volatile bool is_tickled() const {
return m_tickled;
}
@@ -132,16 +148,26 @@ private:
return ( get_interval(now) > m_timeout_sec );
}
+ void set_timeout(double timeout_sec) {
+ /* before changing timeout we MUST tickle and memory fence o.w the main thread might crash */
+ tickle();
+ asm volatile("mfence" ::: "memory");
+ m_timeout_sec = timeout_sec;
+ }
+
/* write fields are first */
- volatile dsec_t m_active_time_sec;
volatile bool m_tickled;
int m_handle;
dsec_t m_ts;
double m_timeout_sec;
+ double m_base_timeout_sec;
pthread_t m_tid;
std::string m_name;
+ uint32_t m_io_ref_cnt;
+
+ static const int IO_TIMEOUT_SEC = 30;
} __rte_cache_aligned;