summaryrefslogtreecommitdiffstats
path: root/lib/librte_timer
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_timer')
-rw-r--r--lib/librte_timer/Makefile52
-rw-r--r--lib/librte_timer/rte_timer.c637
-rw-r--r--lib/librte_timer/rte_timer.h335
-rw-r--r--lib/librte_timer/rte_timer_version.map15
4 files changed, 1039 insertions, 0 deletions
diff --git a/lib/librte_timer/Makefile b/lib/librte_timer/Makefile
new file mode 100644
index 00000000..2aabef85
--- /dev/null
+++ b/lib/librte_timer/Makefile
@@ -0,0 +1,52 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_timer.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+EXPORT_MAP := rte_timer_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_TIMER) := rte_timer.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_TIMER)-include := rte_timer.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_TIMER) += lib/librte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
new file mode 100644
index 00000000..3dcdab58
--- /dev/null
+++ b/lib/librte_timer/rte_timer.c
@@ -0,0 +1,637 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <sys/queue.h>
+
+#include <rte_atomic.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_per_lcore.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_spinlock.h>
+#include <rte_random.h>
+
+#include "rte_timer.h"
+
+LIST_HEAD(rte_timer_list, rte_timer);
+
+struct priv_timer {
+ struct rte_timer pending_head; /**< dummy timer instance to head up list */
+ rte_spinlock_t list_lock; /**< lock to protect list access */
+
+ /** per-core variable that true if a timer was updated on this
+ * core since last reset of the variable */
+ int updated;
+
+ /** track the current depth of the skiplist */
+ unsigned curr_skiplist_depth;
+
+ unsigned prev_lcore; /**< used for lcore round robin */
+
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+ /** per-lcore statistics */
+ struct rte_timer_debug_stats stats;
+#endif
+} __rte_cache_aligned;
+
+/** per-lcore private info for timers */
+static struct priv_timer priv_timer[RTE_MAX_LCORE];
+
+/* when debug is enabled, store some statistics */
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+#define __TIMER_STAT_ADD(name, n) do { \
+ unsigned __lcore_id = rte_lcore_id(); \
+ if (__lcore_id < RTE_MAX_LCORE) \
+ priv_timer[__lcore_id].stats.name += (n); \
+ } while(0)
+#else
+#define __TIMER_STAT_ADD(name, n) do {} while(0)
+#endif
+
+/* Init the timer library. */
+void
+rte_timer_subsystem_init(void)
+{
+ unsigned lcore_id;
+
+ /* since priv_timer is static, it's zeroed by default, so only init some
+ * fields.
+ */
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) {
+ rte_spinlock_init(&priv_timer[lcore_id].list_lock);
+ priv_timer[lcore_id].prev_lcore = lcore_id;
+ }
+}
+
+/* Initialize the timer handle tim for use */
+void
+rte_timer_init(struct rte_timer *tim)
+{
+ union rte_timer_status status;
+
+ status.state = RTE_TIMER_STOP;
+ status.owner = RTE_TIMER_NO_OWNER;
+ tim->status.u32 = status.u32;
+}
+
+/*
+ * if timer is pending or stopped (or running on the same core than
+ * us), mark timer as configuring, and on success return the previous
+ * status of the timer
+ */
+static int
+timer_set_config_state(struct rte_timer *tim,
+ union rte_timer_status *ret_prev_status)
+{
+ union rte_timer_status prev_status, status;
+ int success = 0;
+ unsigned lcore_id;
+
+ lcore_id = rte_lcore_id();
+
+ /* wait that the timer is in correct status before update,
+ * and mark it as being configured */
+ while (success == 0) {
+ prev_status.u32 = tim->status.u32;
+
+ /* timer is running on another core, exit */
+ if (prev_status.state == RTE_TIMER_RUNNING &&
+ prev_status.owner != (uint16_t)lcore_id)
+ return -1;
+
+ /* timer is being configured on another core */
+ if (prev_status.state == RTE_TIMER_CONFIG)
+ return -1;
+
+ /* here, we know that timer is stopped or pending,
+ * mark it atomically as being configured */
+ status.state = RTE_TIMER_CONFIG;
+ status.owner = (int16_t)lcore_id;
+ success = rte_atomic32_cmpset(&tim->status.u32,
+ prev_status.u32,
+ status.u32);
+ }
+
+ ret_prev_status->u32 = prev_status.u32;
+ return 0;
+}
+
+/*
+ * if timer is pending, mark timer as running
+ */
+static int
+timer_set_running_state(struct rte_timer *tim)
+{
+ union rte_timer_status prev_status, status;
+ unsigned lcore_id = rte_lcore_id();
+ int success = 0;
+
+ /* wait that the timer is in correct status before update,
+ * and mark it as running */
+ while (success == 0) {
+ prev_status.u32 = tim->status.u32;
+
+ /* timer is not pending anymore */
+ if (prev_status.state != RTE_TIMER_PENDING)
+ return -1;
+
+ /* here, we know that timer is stopped or pending,
+ * mark it atomically as beeing configured */
+ status.state = RTE_TIMER_RUNNING;
+ status.owner = (int16_t)lcore_id;
+ success = rte_atomic32_cmpset(&tim->status.u32,
+ prev_status.u32,
+ status.u32);
+ }
+
+ return 0;
+}
+
+/*
+ * Return a skiplist level for a new entry.
+ * This probabalistically gives a level with p=1/4 that an entry at level n
+ * will also appear at level n+1.
+ */
+static uint32_t
+timer_get_skiplist_level(unsigned curr_depth)
+{
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+ static uint32_t i, count = 0;
+ static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
+#endif
+
+ /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
+ * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
+ * bit position of a (pseudo)random number.
+ */
+ uint32_t rand = rte_rand() & (UINT32_MAX - 1);
+ uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
+
+ /* limit the levels used to one above our current level, so we don't,
+ * for instance, have a level 0 and a level 7 without anything between
+ */
+ if (level > curr_depth)
+ level = curr_depth;
+ if (level >= MAX_SKIPLIST_DEPTH)
+ level = MAX_SKIPLIST_DEPTH-1;
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+ count ++;
+ levels[level]++;
+ if (count % 10000 == 0)
+ for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
+ printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
+#endif
+ return level;
+}
+
+/*
+ * For a given time value, get the entries at each level which
+ * are <= that time value.
+ */
+static void
+timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
+ struct rte_timer **prev)
+{
+ unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
+ prev[lvl] = &priv_timer[tim_lcore].pending_head;
+ while(lvl != 0) {
+ lvl--;
+ prev[lvl] = prev[lvl+1];
+ while (prev[lvl]->sl_next[lvl] &&
+ prev[lvl]->sl_next[lvl]->expire <= time_val)
+ prev[lvl] = prev[lvl]->sl_next[lvl];
+ }
+}
+
+/*
+ * Given a timer node in the skiplist, find the previous entries for it at
+ * all skiplist levels.
+ */
+static void
+timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
+ struct rte_timer **prev)
+{
+ int i;
+ /* to get a specific entry in the list, look for just lower than the time
+ * values, and then increment on each level individually if necessary
+ */
+ timer_get_prev_entries(tim->expire - 1, tim_lcore, prev);
+ for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
+ while (prev[i]->sl_next[i] != NULL &&
+ prev[i]->sl_next[i] != tim &&
+ prev[i]->sl_next[i]->expire <= tim->expire)
+ prev[i] = prev[i]->sl_next[i];
+ }
+}
+
+/*
+ * add in list, lock if needed
+ * timer must be in config state
+ * timer must not be in a list
+ */
+static void
+timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
+{
+ unsigned lcore_id = rte_lcore_id();
+ unsigned lvl;
+ struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
+
+ /* if timer needs to be scheduled on another core, we need to
+ * lock the list; if it is on local core, we need to lock if
+ * we are not called from rte_timer_manage() */
+ if (tim_lcore != lcore_id || !local_is_locked)
+ rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
+
+ /* find where exactly this element goes in the list of elements
+ * for each depth. */
+ timer_get_prev_entries(tim->expire, tim_lcore, prev);
+
+ /* now assign it a new level and add at that level */
+ const unsigned tim_level = timer_get_skiplist_level(
+ priv_timer[tim_lcore].curr_skiplist_depth);
+ if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
+ priv_timer[tim_lcore].curr_skiplist_depth++;
+
+ lvl = tim_level;
+ while (lvl > 0) {
+ tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
+ prev[lvl]->sl_next[lvl] = tim;
+ lvl--;
+ }
+ tim->sl_next[0] = prev[0]->sl_next[0];
+ prev[0]->sl_next[0] = tim;
+
+ /* save the lowest list entry into the expire field of the dummy hdr
+ * NOTE: this is not atomic on 32-bit*/
+ priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
+ pending_head.sl_next[0]->expire;
+
+ if (tim_lcore != lcore_id || !local_is_locked)
+ rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
+}
+
+/*
+ * del from list, lock if needed
+ * timer must be in config state
+ * timer must be in a list
+ */
+static void
+timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
+ int local_is_locked)
+{
+ unsigned lcore_id = rte_lcore_id();
+ unsigned prev_owner = prev_status.owner;
+ int i;
+ struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
+
+ /* if timer needs is pending another core, we need to lock the
+ * list; if it is on local core, we need to lock if we are not
+ * called from rte_timer_manage() */
+ if (prev_owner != lcore_id || !local_is_locked)
+ rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
+
+ /* save the lowest list entry into the expire field of the dummy hdr.
+ * NOTE: this is not atomic on 32-bit */
+ if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
+ priv_timer[prev_owner].pending_head.expire =
+ ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
+
+ /* adjust pointers from previous entries to point past this */
+ timer_get_prev_entries_for_node(tim, prev_owner, prev);
+ for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
+ if (prev[i]->sl_next[i] == tim)
+ prev[i]->sl_next[i] = tim->sl_next[i];
+ }
+
+ /* in case we deleted last entry at a level, adjust down max level */
+ for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
+ if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
+ priv_timer[prev_owner].curr_skiplist_depth --;
+ else
+ break;
+
+ if (prev_owner != lcore_id || !local_is_locked)
+ rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
+}
+
+/* Reset and start the timer associated with the timer handle (private func) */
+static int
+__rte_timer_reset(struct rte_timer *tim, uint64_t expire,
+ uint64_t period, unsigned tim_lcore,
+ rte_timer_cb_t fct, void *arg,
+ int local_is_locked)
+{
+ union rte_timer_status prev_status, status;
+ int ret;
+ unsigned lcore_id = rte_lcore_id();
+
+ /* round robin for tim_lcore */
+ if (tim_lcore == (unsigned)LCORE_ID_ANY) {
+ if (lcore_id < RTE_MAX_LCORE) {
+ /* EAL thread with valid lcore_id */
+ tim_lcore = rte_get_next_lcore(
+ priv_timer[lcore_id].prev_lcore,
+ 0, 1);
+ priv_timer[lcore_id].prev_lcore = tim_lcore;
+ } else
+ /* non-EAL thread do not run rte_timer_manage(),
+ * so schedule the timer on the first enabled lcore. */
+ tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
+ }
+
+ /* wait that the timer is in correct status before update,
+ * and mark it as being configured */
+ ret = timer_set_config_state(tim, &prev_status);
+ if (ret < 0)
+ return -1;
+
+ __TIMER_STAT_ADD(reset, 1);
+ if (prev_status.state == RTE_TIMER_RUNNING &&
+ lcore_id < RTE_MAX_LCORE) {
+ priv_timer[lcore_id].updated = 1;
+ }
+
+ /* remove it from list */
+ if (prev_status.state == RTE_TIMER_PENDING) {
+ timer_del(tim, prev_status, local_is_locked);
+ __TIMER_STAT_ADD(pending, -1);
+ }
+
+ tim->period = period;
+ tim->expire = expire;
+ tim->f = fct;
+ tim->arg = arg;
+
+ __TIMER_STAT_ADD(pending, 1);
+ timer_add(tim, tim_lcore, local_is_locked);
+
+ /* update state: as we are in CONFIG state, only us can modify
+ * the state so we don't need to use cmpset() here */
+ rte_wmb();
+ status.state = RTE_TIMER_PENDING;
+ status.owner = (int16_t)tim_lcore;
+ tim->status.u32 = status.u32;
+
+ return 0;
+}
+
+/* Reset and start the timer associated with the timer handle tim */
+int
+rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
+ enum rte_timer_type type, unsigned tim_lcore,
+ rte_timer_cb_t fct, void *arg)
+{
+ uint64_t cur_time = rte_get_timer_cycles();
+ uint64_t period;
+
+ if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) &&
+ !rte_lcore_is_enabled(tim_lcore)))
+ return -1;
+
+ if (type == PERIODICAL)
+ period = ticks;
+ else
+ period = 0;
+
+ return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore,
+ fct, arg, 0);
+}
+
+/* loop until rte_timer_reset() succeed */
+void
+rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
+ enum rte_timer_type type, unsigned tim_lcore,
+ rte_timer_cb_t fct, void *arg)
+{
+ while (rte_timer_reset(tim, ticks, type, tim_lcore,
+ fct, arg) != 0)
+ rte_pause();
+}
+
+/* Stop the timer associated with the timer handle tim */
+int
+rte_timer_stop(struct rte_timer *tim)
+{
+ union rte_timer_status prev_status, status;
+ unsigned lcore_id = rte_lcore_id();
+ int ret;
+
+ /* wait that the timer is in correct status before update,
+ * and mark it as being configured */
+ ret = timer_set_config_state(tim, &prev_status);
+ if (ret < 0)
+ return -1;
+
+ __TIMER_STAT_ADD(stop, 1);
+ if (prev_status.state == RTE_TIMER_RUNNING &&
+ lcore_id < RTE_MAX_LCORE) {
+ priv_timer[lcore_id].updated = 1;
+ }
+
+ /* remove it from list */
+ if (prev_status.state == RTE_TIMER_PENDING) {
+ timer_del(tim, prev_status, 0);
+ __TIMER_STAT_ADD(pending, -1);
+ }
+
+ /* mark timer as stopped */
+ rte_wmb();
+ status.state = RTE_TIMER_STOP;
+ status.owner = RTE_TIMER_NO_OWNER;
+ tim->status.u32 = status.u32;
+
+ return 0;
+}
+
+/* loop until rte_timer_stop() succeed */
+void
+rte_timer_stop_sync(struct rte_timer *tim)
+{
+ while (rte_timer_stop(tim) != 0)
+ rte_pause();
+}
+
+/* Test the PENDING status of the timer handle tim */
+int
+rte_timer_pending(struct rte_timer *tim)
+{
+ return tim->status.state == RTE_TIMER_PENDING;
+}
+
+/* must be called periodically, run all timer that expired */
+void rte_timer_manage(void)
+{
+ union rte_timer_status status;
+ struct rte_timer *tim, *next_tim;
+ struct rte_timer *run_first_tim, **pprev;
+ unsigned lcore_id = rte_lcore_id();
+ struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
+ uint64_t cur_time;
+ int i, ret;
+
+ /* timer manager only runs on EAL thread with valid lcore_id */
+ assert(lcore_id < RTE_MAX_LCORE);
+
+ __TIMER_STAT_ADD(manage, 1);
+ /* optimize for the case where per-cpu list is empty */
+ if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
+ return;
+ cur_time = rte_get_timer_cycles();
+
+#ifdef RTE_ARCH_X86_64
+ /* on 64-bit the value cached in the pending_head.expired will be
+ * updated atomically, so we can consult that for a quick check here
+ * outside the lock */
+ if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
+ return;
+#endif
+
+ /* browse ordered list, add expired timers in 'expired' list */
+ rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
+
+ /* if nothing to do just unlock and return */
+ if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
+ priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
+ rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+ return;
+ }
+
+ /* save start of list of expired timers */
+ tim = priv_timer[lcore_id].pending_head.sl_next[0];
+
+ /* break the existing list at current time point */
+ timer_get_prev_entries(cur_time, lcore_id, prev);
+ for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
+ priv_timer[lcore_id].pending_head.sl_next[i] =
+ prev[i]->sl_next[i];
+ if (prev[i]->sl_next[i] == NULL)
+ priv_timer[lcore_id].curr_skiplist_depth--;
+ prev[i] ->sl_next[i] = NULL;
+ }
+
+ /* transition run-list from PENDING to RUNNING */
+ run_first_tim = tim;
+ pprev = &run_first_tim;
+
+ for ( ; tim != NULL; tim = next_tim) {
+ next_tim = tim->sl_next[0];
+
+ ret = timer_set_running_state(tim);
+ if (likely(ret == 0)) {
+ pprev = &tim->sl_next[0];
+ } else {
+ /* another core is trying to re-config this one,
+ * remove it from local expired list and put it
+ * back on the priv_timer[] skip list */
+ *pprev = next_tim;
+ timer_add(tim, lcore_id, 1);
+ }
+ }
+
+ /* update the next to expire timer value */
+ priv_timer[lcore_id].pending_head.expire =
+ (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
+ priv_timer[lcore_id].pending_head.sl_next[0]->expire;
+
+ rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+
+ /* now scan expired list and call callbacks */
+ for (tim = run_first_tim; tim != NULL; tim = next_tim) {
+ next_tim = tim->sl_next[0];
+ priv_timer[lcore_id].updated = 0;
+
+ /* execute callback function with list unlocked */
+ tim->f(tim, tim->arg);
+
+ __TIMER_STAT_ADD(pending, -1);
+ /* the timer was stopped or reloaded by the callback
+ * function, we have nothing to do here */
+ if (priv_timer[lcore_id].updated == 1)
+ continue;
+
+ if (tim->period == 0) {
+ /* remove from done list and mark timer as stopped */
+ status.state = RTE_TIMER_STOP;
+ status.owner = RTE_TIMER_NO_OWNER;
+ rte_wmb();
+ tim->status.u32 = status.u32;
+ }
+ else {
+ /* keep it in list and mark timer as pending */
+ rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
+ status.state = RTE_TIMER_PENDING;
+ __TIMER_STAT_ADD(pending, 1);
+ status.owner = (int16_t)lcore_id;
+ rte_wmb();
+ tim->status.u32 = status.u32;
+ __rte_timer_reset(tim, cur_time + tim->period,
+ tim->period, lcore_id, tim->f, tim->arg, 1);
+ rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
+ }
+ }
+}
+
+/* dump statistics about timers */
+void rte_timer_dump_stats(FILE *f)
+{
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+ struct rte_timer_debug_stats sum;
+ unsigned lcore_id;
+
+ memset(&sum, 0, sizeof(sum));
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ sum.reset += priv_timer[lcore_id].stats.reset;
+ sum.stop += priv_timer[lcore_id].stats.stop;
+ sum.manage += priv_timer[lcore_id].stats.manage;
+ sum.pending += priv_timer[lcore_id].stats.pending;
+ }
+ fprintf(f, "Timer statistics:\n");
+ fprintf(f, " reset = %"PRIu64"\n", sum.reset);
+ fprintf(f, " stop = %"PRIu64"\n", sum.stop);
+ fprintf(f, " manage = %"PRIu64"\n", sum.manage);
+ fprintf(f, " pending = %"PRIu64"\n", sum.pending);
+#else
+ fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
+#endif
+}
diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h
new file mode 100644
index 00000000..77547c6b
--- /dev/null
+++ b/lib/librte_timer/rte_timer.h
@@ -0,0 +1,335 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_TIMER_H_
+#define _RTE_TIMER_H_
+
+/**
+ * @file
+ RTE Timer
+ *
+ * This library provides a timer service to RTE Data Plane execution
+ * units that allows the execution of callback functions asynchronously.
+ *
+ * - Timers can be periodic or single (one-shot).
+ * - The timers can be loaded from one core and executed on another. This has
+ * to be specified in the call to rte_timer_reset().
+ * - High precision is possible. NOTE: this depends on the call frequency to
+ * rte_timer_manage() that check the timer expiration for the local core.
+ * - If not used in an application, for improved performance, it can be
+ * disabled at compilation time by not calling the rte_timer_manage()
+ * to improve performance.
+ *
+ * The timer library uses the rte_get_hpet_cycles() function that
+ * uses the HPET, when available, to provide a reliable time reference. [HPET
+ * routines are provided by EAL, which falls back to using the chip TSC (time-
+ * stamp counter) as fallback when HPET is not available]
+ *
+ * This library provides an interface to add, delete and restart a
+ * timer. The API is based on the BSD callout(9) API with a few
+ * differences.
+ *
+ * See the RTE architecture documentation for more information about the
+ * design of this library.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_TIMER_STOP 0 /**< State: timer is stopped. */
+#define RTE_TIMER_PENDING 1 /**< State: timer is scheduled. */
+#define RTE_TIMER_RUNNING 2 /**< State: timer function is running. */
+#define RTE_TIMER_CONFIG 3 /**< State: timer is being configured. */
+
+#define RTE_TIMER_NO_OWNER -2 /**< Timer has no owner. */
+
+/**
+ * Timer type: Periodic or single (one-shot).
+ */
+enum rte_timer_type {
+ SINGLE,
+ PERIODICAL
+};
+
+/**
+ * Timer status: A union of the state (stopped, pending, running,
+ * config) and an owner (the id of the lcore that owns the timer).
+ */
+union rte_timer_status {
+ struct {
+ uint16_t state; /**< Stop, pending, running, config. */
+ int16_t owner; /**< The lcore that owns the timer. */
+ };
+ uint32_t u32; /**< To atomic-set status + owner. */
+};
+
+#ifdef RTE_LIBRTE_TIMER_DEBUG
+/**
+ * A structure that stores the timer statistics (per-lcore).
+ */
+struct rte_timer_debug_stats {
+ uint64_t reset; /**< Number of success calls to rte_timer_reset(). */
+ uint64_t stop; /**< Number of success calls to rte_timer_stop(). */
+ uint64_t manage; /**< Number of calls to rte_timer_manage(). */
+ uint64_t pending; /**< Number of pending/running timers. */
+};
+#endif
+
+struct rte_timer;
+
+/**
+ * Callback function type for timer expiry.
+ */
+typedef void (*rte_timer_cb_t)(struct rte_timer *, void *);
+
+#define MAX_SKIPLIST_DEPTH 10
+
+/**
+ * A structure describing a timer in RTE.
+ */
+struct rte_timer
+{
+ uint64_t expire; /**< Time when timer expire. */
+ struct rte_timer *sl_next[MAX_SKIPLIST_DEPTH];
+ volatile union rte_timer_status status; /**< Status of timer. */
+ uint64_t period; /**< Period of timer (0 if not periodic). */
+ rte_timer_cb_t f; /**< Callback function. */
+ void *arg; /**< Argument to callback function. */
+};
+
+
+#ifdef __cplusplus
+/**
+ * A C++ static initializer for a timer structure.
+ */
+#define RTE_TIMER_INITIALIZER { \
+ 0, \
+ {NULL}, \
+ {{RTE_TIMER_STOP, RTE_TIMER_NO_OWNER}}, \
+ 0, \
+ NULL, \
+ NULL, \
+ }
+#else
+/**
+ * A static initializer for a timer structure.
+ */
+#define RTE_TIMER_INITIALIZER { \
+ .status = {{ \
+ .state = RTE_TIMER_STOP, \
+ .owner = RTE_TIMER_NO_OWNER, \
+ }}, \
+ }
+#endif
+
+/**
+ * Initialize the timer library.
+ *
+ * Initializes internal variables (list, locks and so on) for the RTE
+ * timer library.
+ */
+void rte_timer_subsystem_init(void);
+
+/**
+ * Initialize a timer handle.
+ *
+ * The rte_timer_init() function initializes the timer handle *tim*
+ * for use. No operations can be performed on a timer before it is
+ * initialized.
+ *
+ * @param tim
+ * The timer to initialize.
+ */
+void rte_timer_init(struct rte_timer *tim);
+
+/**
+ * Reset and start the timer associated with the timer handle.
+ *
+ * The rte_timer_reset() function resets and starts the timer
+ * associated with the timer handle *tim*. When the timer expires after
+ * *ticks* HPET cycles, the function specified by *fct* will be called
+ * with the argument *arg* on core *tim_lcore*.
+ *
+ * If the timer associated with the timer handle is already running
+ * (in the RUNNING state), the function will fail. The user has to check
+ * the return value of the function to see if there is a chance that the
+ * timer is in the RUNNING state.
+ *
+ * If the timer is being configured on another core (the CONFIG state),
+ * it will also fail.
+ *
+ * If the timer is pending or stopped, it will be rescheduled with the
+ * new parameters.
+ *
+ * @param tim
+ * The timer handle.
+ * @param ticks
+ * The number of cycles (see rte_get_hpet_hz()) before the callback
+ * function is called.
+ * @param type
+ * The type can be either:
+ * - PERIODICAL: The timer is automatically reloaded after execution
+ * (returns to the PENDING state)
+ * - SINGLE: The timer is one-shot, that is, the timer goes to a
+ * STOPPED state after execution.
+ * @param tim_lcore
+ * The ID of the lcore where the timer callback function has to be
+ * executed. If tim_lcore is LCORE_ID_ANY, the timer library will
+ * launch it on a different core for each call (round-robin).
+ * @param fct
+ * The callback function of the timer.
+ * @param arg
+ * The user argument of the callback function.
+ * @return
+ * - 0: Success; the timer is scheduled.
+ * - (-1): Timer is in the RUNNING or CONFIG state.
+ */
+int rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
+ enum rte_timer_type type, unsigned tim_lcore,
+ rte_timer_cb_t fct, void *arg);
+
+
+/**
+ * Loop until rte_timer_reset() succeeds.
+ *
+ * Reset and start the timer associated with the timer handle. Always
+ * succeed. See rte_timer_reset() for details.
+ *
+ * @param tim
+ * The timer handle.
+ * @param ticks
+ * The number of cycles (see rte_get_hpet_hz()) before the callback
+ * function is called.
+ * @param type
+ * The type can be either:
+ * - PERIODICAL: The timer is automatically reloaded after execution
+ * (returns to the PENDING state)
+ * - SINGLE: The timer is one-shot, that is, the timer goes to a
+ * STOPPED state after execution.
+ * @param tim_lcore
+ * The ID of the lcore where the timer callback function has to be
+ * executed. If tim_lcore is LCORE_ID_ANY, the timer library will
+ * launch it on a different core for each call (round-robin).
+ * @param fct
+ * The callback function of the timer.
+ * @param arg
+ * The user argument of the callback function.
+ */
+void
+rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
+ enum rte_timer_type type, unsigned tim_lcore,
+ rte_timer_cb_t fct, void *arg);
+
+/**
+ * Stop a timer.
+ *
+ * The rte_timer_stop() function stops the timer associated with the
+ * timer handle *tim*. It may fail if the timer is currently running or
+ * being configured.
+ *
+ * If the timer is pending or stopped (for instance, already expired),
+ * the function will succeed. The timer handle tim must have been
+ * initialized using rte_timer_init(), otherwise, undefined behavior
+ * will occur.
+ *
+ * This function can be called safely from a timer callback. If it
+ * succeeds, the timer is not referenced anymore by the timer library
+ * and the timer structure can be freed (even in the callback
+ * function).
+ *
+ * @param tim
+ * The timer handle.
+ * @return
+ * - 0: Success; the timer is stopped.
+ * - (-1): The timer is in the RUNNING or CONFIG state.
+ */
+int rte_timer_stop(struct rte_timer *tim);
+
+
+/**
+ * Loop until rte_timer_stop() succeeds.
+ *
+ * After a call to this function, the timer identified by *tim* is
+ * stopped. See rte_timer_stop() for details.
+ *
+ * @param tim
+ * The timer handle.
+ */
+void rte_timer_stop_sync(struct rte_timer *tim);
+
+/**
+ * Test if a timer is pending.
+ *
+ * The rte_timer_pending() function tests the PENDING status
+ * of the timer handle *tim*. A PENDING timer is one that has been
+ * scheduled and whose function has not yet been called.
+ *
+ * @param tim
+ * The timer handle.
+ * @return
+ * - 0: The timer is not pending.
+ * - 1: The timer is pending.
+ */
+int rte_timer_pending(struct rte_timer *tim);
+
+/**
+ * Manage the timer list and execute callback functions.
+ *
+ * This function must be called periodically from EAL lcores
+ * main_loop(). It browses the list of pending timers and runs all
+ * timers that are expired.
+ *
+ * The precision of the timer depends on the call frequency of this
+ * function. However, the more often the function is called, the more
+ * CPU resources it will use.
+ */
+void rte_timer_manage(void);
+
+/**
+ * Dump statistics about timers.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void rte_timer_dump_stats(FILE *f);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_TIMER_H_ */
diff --git a/lib/librte_timer/rte_timer_version.map b/lib/librte_timer/rte_timer_version.map
new file mode 100644
index 00000000..9b2e4b86
--- /dev/null
+++ b/lib/librte_timer/rte_timer_version.map
@@ -0,0 +1,15 @@
+DPDK_2.0 {
+ global:
+
+ rte_timer_dump_stats;
+ rte_timer_init;
+ rte_timer_manage;
+ rte_timer_pending;
+ rte_timer_reset;
+ rte_timer_reset_sync;
+ rte_timer_stop;
+ rte_timer_stop_sync;
+ rte_timer_subsystem_init;
+
+ local: *;
+};