diff options
author | Hanoh Haim <hhaim@cisco.com> | 2016-12-07 15:24:38 +0200 |
---|---|---|
committer | Hanoh Haim <hhaim@cisco.com> | 2016-12-21 13:01:05 +0200 |
commit | eae78d4356b8834b78a91c52d869a7949f8f3e90 (patch) | |
tree | 184156f8e653adfa33eb0e70838f45d2a92355d0 /src/h_timer_w.h | |
parent | 539de1c6af63071c1da9ed5db668c500f8993a03 (diff) |
improve Stateful scheduler
Signed-off-by: Hanoh Haim <hhaim@cisco.com>
Diffstat (limited to 'src/h_timer_w.h')
-rw-r--r-- | src/h_timer_w.h | 533 |
1 files changed, 533 insertions, 0 deletions
diff --git a/src/h_timer_w.h b/src/h_timer_w.h new file mode 100644 index 00000000..552c7423 --- /dev/null +++ b/src/h_timer_w.h @@ -0,0 +1,533 @@ +// -*- mode: c++; c-basic-offset: 4 indent-tabs-mode: nil -*- */ +// +// Copyright 2016 Juho Snellman, released under a MIT license (see +// LICENSE). +// +// A timer queue which allows events to be scheduled for execution +// at some later point. Reasons you might want to use this implementation +// instead of some other are: +// +// - A single-file C++11 implementation with no external dependencies. +// - Optimized for high occupancy rates, on the assumption that the +// utilization of the timer queue is proportional to the utilization +// of the system as a whole. When a tradeoff needs to be made +// between efficiency of one operation at a low occupancy rate and +// another operation at a high rate, we choose the latter. +// - Tries to minimize the cost of event rescheduling or cancelation, +// on the assumption that a large percentage of events will never +// be triggered. The implementation avoids unnecessary work when an +// event is rescheduled, and provides a way for the user specify a +// range of acceptable execution times instead of just an exact one. +// - Facility for limiting the number of events to execute on a +// single invocation, to allow fine grained interleaving of timer +// processing and application logic. +// - An interface that at least the author finds convenient. +// +// The exact implementation strategy is a hierarchical timer +// wheel. A timer wheel is effectively a ring buffer of linked lists +// of events, and a pointer to the ring buffer. As the time advances, +// the pointer moves forward, and any events in the ring buffer slots +// that the pointer passed will get executed. +// +// A hierarchical timer wheel layers multiple timer wheels running at +// different resolutions on top of each other. When an event is +// scheduled so far in the future than it does not fit the innermost +// (core) wheel, it instead gets scheduled on one of the outer +// wheels. On each rotation of the inner wheel, one slot's worth of +// events are promoted from the second wheel to the core. On each +// rotation of the second wheel, one slot's worth of events is +// promoted from the third wheel to the second, and so on. +// +// The basic usage is to create a single TimerWheel object and +// multiple TimerEvent or MemberTimerEvent objects. The events are +// scheduled for execution using TimerWheel::schedule() or +// TimerWheel::schedule_in_range(), or unscheduled using the event's +// cancel() method. +// +// Example usage: +// +// typedef std::function<void()> Callback; +// TimerWheel timers; +// int count = 0; +// TimerEvent<Callback> timer([&count] () { ++count; }); +// +// timers.schedule(&timer, 5); +// timers.advance(4); +// assert(count == 0); +// timers.advance(1); +// assert(count == 1); +// +// timers.schedule(&timer, 5); +// timer.cancel(); +// timers.advance(4); +// assert(count == 1); +// +// To tie events to specific member functions of an object instead of +// a callback function, use MemberTimerEvent instead of TimerEvent. +// For example: +// +// class Test { +// public: +// Test() : inc_timer_(this) { +// } +// void start(TimerWheel* timers) { +// timers->schedule(&inc_timer_, 10); +// } +// void on_inc() { +// count_++; +// } +// int count() { return count_; } +// private: +// MemberTimerEvent<Test, &Test::on_inc> inc_timer_; +// int count_ = 0; +// }; + +#ifndef RATAS_TIMER_WHEEL_H +#define RATAS_TIMER_WHEEL_H + +#include <cassert> +#include <cstdlib> +#include <cstdint> +#include <cstdio> +#include <limits> +#include <memory> + +typedef uint64_t tick_t; + +class TimerWheelSlot; +class TimerWheel; + +// An abstract class representing an event that can be scheduled to +// happen at some later time. +struct TimerEventInterface { +public: + // Unschedule this event. It's safe to cancel an event that is inactive. + inline void cancel(); + + // Return true iff the event is currently scheduled for execution. + bool active() const { + return slot_ != NULL; + } + + // Return the absolute tick this event is scheduled to be executed on. + tick_t scheduled_at() const { return scheduled_at_; } + +private: + TimerEventInterface(const TimerEventInterface& other) = delete; + TimerEventInterface& operator=(const TimerEventInterface& other) = delete; + friend TimerWheelSlot; + friend TimerWheel; + + + void set_scheduled_at(tick_t ts) { scheduled_at_ = ts; } + // Move the event to another slot. (It's safe for either the current + // or new slot to be NULL). + inline void relink(TimerWheelSlot* slot); + +private: + /* one CACHELINE 32 byte in 64bit processor */ + + tick_t scheduled_at_; + // The slot this event is currently in (NULL if not currently scheduled). + TimerWheelSlot* slot_ = NULL; + // The events are linked together in the slot using an internal + // doubly-linked list; this iterator does double duty as the + // linked list node for this event. + TimerEventInterface* next_ = NULL; + TimerEventInterface* prev_ = NULL; +}; + +#if 0 +// An event that takes the callback (of type CBType) to execute as +// a constructor parameter. +template<typename CBType> +class TimerEvent : public TimerEventInterface { +public: + explicit TimerEvent<CBType>(const CBType& callback) + : callback_(callback) { + } + + void execute() { + callback_(); + } + +private: + TimerEvent<CBType>(const TimerEvent<CBType>& other) = delete; + TimerEvent<CBType>& operator=(const TimerEvent<CBType>& other) = delete; + CBType callback_; +}; + +// An event that's specialized with a (static) member function of class T, +// and a dynamic instance of T. Event execution causes an invocation of the +// member function on the instance. +template<typename T, void(T::*MFun)() > +class MemberTimerEvent : public TimerEventInterface { +public: + MemberTimerEvent(T* obj) : obj_(obj) { + } + + virtual void execute () { + (obj_->*MFun)(); + } + +private: + T* obj_; +}; + +#endif + +// Purely an implementation detail. +class TimerWheelSlot { +public: + TimerWheelSlot() { + } + +private: + // Return the first event queued in this slot. + const TimerEventInterface* events() const { return events_; } + // Deque the first event from the slot, and return it. + TimerEventInterface* pop_event() { + auto event = events_; + events_ = event->next_; + if (events_) { + events_->prev_ = NULL; + } + event->next_ = NULL; + event->slot_ = NULL; + return event; + } + + TimerWheelSlot(const TimerWheelSlot& other) = delete; + TimerWheelSlot& operator=(const TimerWheelSlot& other) = delete; + friend TimerEventInterface; + friend TimerWheel; + + // Doubly linked (inferior) list of events. + TimerEventInterface* events_ = NULL; +}; + +// A TimerWheel is the entity that TimerEvents can be scheduled on +// for execution (with schedule() or schedule_in_range()), and will +// eventually be executed once the time advances far enough with the +// advance() method. + +class TimerWheel { +public: + bool Create(tick_t now = 0){ + for (int i = 0; i < NUM_LEVELS; ++i) { + now_[i] = now >> (WIDTH_BITS * i); + } + ticks_pending_ = 0; + } + void Delete(){ + } + + // Advance the TimerWheel by the specified number of ticks, and execute + // any events scheduled for execution at or before that time. The + // number of events executed can be restricted using the max_execute + // parameter. If that limit is reached, the function will return false, + // and the excess events will be processed on a subsequent call. + // + // - It is safe to cancel or schedule events from within event callbacks. + // - During the execution of the callback the observable event tick will + // be the tick it was scheduled to run on; not the tick the clock will + // be advanced to. + // - Events will happen in order; all events scheduled for tick X will + // be executed before any event scheduled for tick X+1. + // + // Delta should be non-0. The only exception is if the previous + // call to advance() returned false. + // + // advance() should not be called from an event callback. + inline bool advance(tick_t delta, + size_t max_execute=std::numeric_limits<size_t>::max(), + int level = 0); + + // Schedule the event to be executed delta ticks from the current time. + // The delta must be non-0. + inline void schedule(TimerEventInterface* event, tick_t delta); + + // Schedule the event to happen at some time between start and end + // ticks from the current time. The actual time will be determined + // by the TimerWheel to minimize rescheduling and promotion overhead. + // Both start and end must be non-0, and the end must be greater than + // the start. + inline void schedule_in_range(TimerEventInterface* event, + tick_t start, tick_t end); + + // Return the current tick value. Note that if the time increases + // by multiple ticks during a single call to advance(), during the + // execution of the event callback now() will return the tick that + // the event was scheduled to run on. + tick_t now() const { return now_[0]; } + + // Return the number of ticks remaining until the next event will get + // executed. If the max parameter is passed, that will be the maximum + // tick value that gets returned. The max parameter's value will also + // be returned if no events have been scheduled. + // + // Will return 0 if the wheel still has unprocessed events from the + // previous call to advance(). + inline tick_t ticks_to_next_event(tick_t max = std::numeric_limits<tick_t>::max(), + int level = 0); + +private: + TimerWheel(const TimerWheel& other) = delete; + TimerWheel& operator=(const TimerWheel& other) = delete; + + // This handles the actual work of executing event callbacks and + // recursing to the outer wheels. + inline bool process_current_slot(tick_t now, size_t max_execute, int level); + + static const int WIDTH_BITS = 8; + static const int NUM_LEVELS = (64 + WIDTH_BITS - 1) / WIDTH_BITS; + static const int MAX_LEVEL = NUM_LEVELS - 1; + static const int NUM_SLOTS = 1 << WIDTH_BITS; + // A bitmask for looking at just the bits in the timestamp relevant to + // this wheel. + static const int MASK = (NUM_SLOTS - 1); + + // The current timestamp for this wheel. This will be right-shifted + // such that each slot is separated by exactly one tick even on + // the outermost wheels. + tick_t now_[NUM_LEVELS]; + // We've done a partial tick advance. This is how many ticks remain + // unprocessed. + tick_t ticks_pending_; + TimerWheelSlot slots_[NUM_LEVELS][NUM_SLOTS]; +}; + +// Implementation + +inline void TimerEventInterface::relink(TimerWheelSlot* new_slot) { + if (new_slot == slot_) { + return; + } + + // Unlink from old location. + if (slot_) { + auto prev = prev_; + auto next = next_; + if (next) { + next->prev_ = prev; + } + if (prev) { + prev->next_ = next; + } else { + // Must be at head of slot. Move the next item to the head. + slot_->events_ = next; + } + } + + // Insert in new slot. + { + if (new_slot) { + auto old = new_slot->events_; + next_ = old; + if (old) { + old->prev_ = this; + } + new_slot->events_ = this; + } else { + next_ = NULL; + } + prev_ = NULL; + } + slot_ = new_slot; +} + +inline void TimerEventInterface::cancel() { + // It's ok to cancel a event that's not scheduled. + if (!slot_) { + return; + } + + relink(NULL); +} + +inline bool TimerWheel::advance(tick_t delta, size_t max_events, int level) { + if (ticks_pending_) { + if (level == 0) { + // Continue collecting a backlog of ticks to process if + // we're called with non-zero deltas. + ticks_pending_ += delta; + } + // We only partially processed the last tick. Process the + // current slot, rather incrementing like advance() normally + // does. + tick_t now = now_[level]; + if (!process_current_slot(now, max_events, level)) { + // Outer layers are still not done, propagate that information + // back up. + return false; + } + if (level == 0) { + // The core wheel has been fully processed. We can now close + // down the partial tick and pretend that we've just been + // called with a delta containing both the new and original + // amounts. + delta = (ticks_pending_ - 1); + ticks_pending_ = 0; + } else { + return true; + } + } else { + // Zero deltas are only ok when in the middle of a partially + // processed tick. + assert(delta > 0); + } + + while (delta--) { + tick_t now = ++now_[level]; + if (!process_current_slot(now, max_events, level)) { + ticks_pending_ = (delta + 1); + return false; + } + } + return true; +} + +inline bool TimerWheel::process_current_slot(tick_t now, size_t max_events, int level) { + size_t slot_index = now & MASK; + auto slot = &slots_[level][slot_index]; + if (slot_index == 0 && level < MAX_LEVEL) { + if (!advance(1, max_events, level + 1)) { + return false; + } + } + while (slot->events()) { + auto event = slot->pop_event(); + if (level > 0) { + assert((now_[0] & MASK) == 0); + if (now_[0] >= event->scheduled_at()) { + event->execute(); + if (!--max_events) { + return false; + } + } else { + // There's a case to be made that promotion should + // also count as work done. And that would simplify + // this code since the max_events manipulation could + // move to the top of the loop. But it's an order of + // magnitude more expensive to execute a typical + // callback, and promotions will naturally clump while + // events triggering won't. + schedule(event, + event->scheduled_at() - now_[0]); + } + } else { + event->execute(); + if (!--max_events) { + return false; + } + } + } + return true; +} + +inline void TimerWheel::schedule(TimerEventInterface* event, tick_t delta) { + event->set_scheduled_at(now_[0] + delta); + + int level = 0; + while (delta >= NUM_SLOTS) { + delta = (delta + (now_[level] & MASK)) >> WIDTH_BITS; + ++level; + } + + size_t slot_index = (now_[level] + delta) & MASK; + auto slot = &slots_[level][slot_index]; + event->relink(slot); +} + +inline void TimerWheel::schedule_in_range(TimerEventInterface* event, + tick_t start, tick_t end) { + assert(end > start); + if (event->active()) { + auto current = event->scheduled_at() - now_[0]; + // Event is already scheduled to happen in this range. Instead + // of always using the old slot, we could check compute the + // new slot and switch iff it's aligned better than the old one. + // But it seems hard to believe that could be worthwhile. + if (current >= start && current <= end) { + return; + } + } + + // Zero as many bits (in WIDTH_BITS chunks) as possible + // from "end" while still keeping the output in the + // right range. + tick_t mask = ~0; + while ((start & mask) != (end & mask)) { + mask = (mask << WIDTH_BITS); + } + + tick_t delta = end & (mask >> WIDTH_BITS); + + schedule(event, delta); +} + +inline tick_t TimerWheel::ticks_to_next_event(tick_t max, int level) { + if (ticks_pending_) { + return 0; + } + // The actual current time (not the bitshifted time) + tick_t now = now_[0]; + + // Smallest tick (relative to now) we've found. + tick_t min = max; + for (int i = 0; i < NUM_SLOTS; ++i) { + // Note: Unlike the uses of "now", slot index calculations really + // need to use now_. + auto slot_index = (now_[level] + 1 + i) & MASK; + // We've reached slot 0. In normal scheduling this would + // mean advancing the next wheel and promoting or executing + // those events. So we need to look in that slot too + // before proceeding with the rest of this wheel. But we + // can't just accept those results outright, we need to + // check the best result there against the next slot on + // this wheel. + if (slot_index == 0 && level < MAX_LEVEL) { + // Exception: If we're in the core wheel, and slot 0 is + // not empty, there's no point in looking in the outer wheel. + // It's guaranteed that the events actually in slot 0 will be + // executed no later than anything in the outer wheel. + if (level > 0 || !slots_[level][slot_index].events()) { + auto up_slot_index = (now_[level + 1] + 1) & MASK; + const auto& slot = slots_[level + 1][up_slot_index]; + for (auto event = slot.events(); event != NULL; + event = event->next_) { + min = std::min(min, event->scheduled_at() - now); + } + } + } + bool found = false; + const auto& slot = slots_[level][slot_index]; + for (auto event = slot.events(); event != NULL; + event = event->next_) { + min = std::min(min, event->scheduled_at() - now); + // In the core wheel all the events in a slot are guaranteed to + // run at the same time, so it's enough to just look at the first + // one. + if (level == 0) { + return min; + } else { + found = true; + } + } + if (found) { + return min; + } + } + + // Nothing found on this wheel, try the next one (unless the wheel can't + // possibly contain an event scheduled earlier than "max"). + if (level < MAX_LEVEL && + (max >> (WIDTH_BITS * level + 1)) > 0) { + return ticks_to_next_event(max, level + 1); + } + + return max; +} + +#endif // RATAS_TIMER_WHEEL_H + |