aboutsummaryrefslogtreecommitdiffstats
path: root/examples/performance-thread
diff options
context:
space:
mode:
Diffstat (limited to 'examples/performance-thread')
-rw-r--r--examples/performance-thread/Makefile4
-rw-r--r--examples/performance-thread/common/arch/arm64/ctx.c90
-rw-r--r--examples/performance-thread/common/arch/arm64/ctx.h83
-rw-r--r--examples/performance-thread/common/arch/arm64/stack.h84
-rw-r--r--examples/performance-thread/common/arch/x86/stack.h94
-rw-r--r--examples/performance-thread/common/common.mk10
-rw-r--r--examples/performance-thread/common/lthread.c11
-rw-r--r--examples/performance-thread/common/lthread_int.h1
-rw-r--r--examples/performance-thread/common/lthread_mutex.c2
-rw-r--r--examples/performance-thread/common/lthread_pool.h10
-rw-r--r--examples/performance-thread/common/lthread_queue.h10
-rw-r--r--examples/performance-thread/common/lthread_sched.c4
-rw-r--r--examples/performance-thread/common/lthread_sched.h12
-rw-r--r--examples/performance-thread/l3fwd-thread/main.c62
14 files changed, 408 insertions, 69 deletions
diff --git a/examples/performance-thread/Makefile b/examples/performance-thread/Makefile
index d19f8489..0c5edfdb 100644
--- a/examples/performance-thread/Makefile
+++ b/examples/performance-thread/Makefile
@@ -38,8 +38,8 @@ RTE_TARGET ?= x86_64-native-linuxapp-gcc
include $(RTE_SDK)/mk/rte.vars.mk
-ifneq ($(CONFIG_RTE_ARCH),"x86_64")
-$(error This application is only supported for x86_64 targets)
+ifeq ($(filter y,$(CONFIG_RTE_ARCH_X86_64) $(CONFIG_RTE_ARCH_ARM64)),)
+$(error This application is only supported for x86_64 and arm64 targets)
endif
DIRS-y += l3fwd-thread
diff --git a/examples/performance-thread/common/arch/arm64/ctx.c b/examples/performance-thread/common/arch/arm64/ctx.c
new file mode 100644
index 00000000..d0eacaa6
--- /dev/null
+++ b/examples/performance-thread/common/arch/arm64/ctx.c
@@ -0,0 +1,90 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium, Inc. 2017.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_common.h>
+#include <ctx.h>
+
+void
+ctx_switch(struct ctx *new_ctx __rte_unused, struct ctx *curr_ctx __rte_unused)
+{
+ /* SAVE CURRENT CONTEXT */
+ asm volatile (
+ /* Save SP */
+ "mov x3, sp\n"
+ "str x3, [x1, #0]\n"
+
+ /* Save FP and LR */
+ "stp x29, x30, [x1, #8]\n"
+
+ /* Save Callee Saved Regs x19 - x28 */
+ "stp x19, x20, [x1, #24]\n"
+ "stp x21, x22, [x1, #40]\n"
+ "stp x23, x24, [x1, #56]\n"
+ "stp x25, x26, [x1, #72]\n"
+ "stp x27, x28, [x1, #88]\n"
+
+ /*
+ * Save bottom 64-bits of Callee Saved
+ * SIMD Regs v8 - v15
+ */
+ "stp d8, d9, [x1, #104]\n"
+ "stp d10, d11, [x1, #120]\n"
+ "stp d12, d13, [x1, #136]\n"
+ "stp d14, d15, [x1, #152]\n"
+ );
+
+ /* RESTORE NEW CONTEXT */
+ asm volatile (
+ /* Restore SP */
+ "ldr x3, [x0, #0]\n"
+ "mov sp, x3\n"
+
+ /* Restore FP and LR */
+ "ldp x29, x30, [x0, #8]\n"
+
+ /* Restore Callee Saved Regs x19 - x28 */
+ "ldp x19, x20, [x0, #24]\n"
+ "ldp x21, x22, [x0, #40]\n"
+ "ldp x23, x24, [x0, #56]\n"
+ "ldp x25, x26, [x0, #72]\n"
+ "ldp x27, x28, [x0, #88]\n"
+
+ /*
+ * Restore bottom 64-bits of Callee Saved
+ * SIMD Regs v8 - v15
+ */
+ "ldp d8, d9, [x0, #104]\n"
+ "ldp d10, d11, [x0, #120]\n"
+ "ldp d12, d13, [x0, #136]\n"
+ "ldp d14, d15, [x0, #152]\n"
+ );
+}
diff --git a/examples/performance-thread/common/arch/arm64/ctx.h b/examples/performance-thread/common/arch/arm64/ctx.h
new file mode 100644
index 00000000..38c86ce6
--- /dev/null
+++ b/examples/performance-thread/common/arch/arm64/ctx.h
@@ -0,0 +1,83 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium, Inc. 2017.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CTX_H
+#define CTX_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * CPU context registers
+ */
+struct ctx {
+ void *sp; /* 0 */
+ void *fp; /* 8 */
+ void *lr; /* 16 */
+
+ /* Callee Saved Generic Registers */
+ void *r19; /* 24 */
+ void *r20; /* 32 */
+ void *r21; /* 40 */
+ void *r22; /* 48 */
+ void *r23; /* 56 */
+ void *r24; /* 64 */
+ void *r25; /* 72 */
+ void *r26; /* 80 */
+ void *r27; /* 88 */
+ void *r28; /* 96 */
+
+ /*
+ * Callee Saved SIMD Registers. Only the bottom 64-bits
+ * of these registers needs to be saved.
+ */
+ void *v8; /* 104 */
+ void *v9; /* 112 */
+ void *v10; /* 120 */
+ void *v11; /* 128 */
+ void *v12; /* 136 */
+ void *v13; /* 144 */
+ void *v14; /* 152 */
+ void *v15; /* 160 */
+};
+
+
+void
+ctx_switch(struct ctx *new_ctx, struct ctx *curr_ctx);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_CTX_H_ */
diff --git a/examples/performance-thread/common/arch/arm64/stack.h b/examples/performance-thread/common/arch/arm64/stack.h
new file mode 100644
index 00000000..fa3b31e9
--- /dev/null
+++ b/examples/performance-thread/common/arch/arm64/stack.h
@@ -0,0 +1,84 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium, Inc. 2017.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium, Inc nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef STACK_H
+#define STACK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "lthread_int.h"
+
+/*
+ * Sets up the initial stack for the lthread.
+ */
+static inline void
+arch_set_stack(struct lthread *lt, void *func)
+{
+ void **stack_top = (void *)((char *)(lt->stack) + lt->stack_size);
+
+ /*
+ * Align stack_top to 16 bytes. Arm64 has the constraint that the
+ * stack pointer must always be quad-word aligned.
+ */
+ stack_top = (void **)(((unsigned long)(stack_top)) & ~0xfUL);
+
+ /*
+ * First Stack Frame
+ */
+ stack_top[0] = NULL;
+ stack_top[-1] = NULL;
+
+ /*
+ * Initialize the context
+ */
+ lt->ctx.fp = &stack_top[-1];
+ lt->ctx.sp = &stack_top[-2];
+
+ /*
+ * Here only the address of _lthread_exec is saved as the link
+ * register value. The argument to _lthread_exec i.e the address of
+ * the lthread struct is not saved. This is because the first
+ * argument to ctx_switch is the address of the new context,
+ * which also happens to be the address of required lthread struct.
+ * So while returning from ctx_switch into _thread_exec, parameter
+ * register x0 will always contain the required value.
+ */
+ lt->ctx.lr = func;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* STACK_H_ */
diff --git a/examples/performance-thread/common/arch/x86/stack.h b/examples/performance-thread/common/arch/x86/stack.h
new file mode 100644
index 00000000..98723ba3
--- /dev/null
+++ b/examples/performance-thread/common/arch/x86/stack.h
@@ -0,0 +1,94 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * Copyright(c) Cavium, Inc. 2017.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software is derived from the
+ * https://github.com/halayli/lthread which carrys the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#ifndef STACK_H
+#define STACK_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "lthread_int.h"
+
+/*
+ * Sets up the initial stack for the lthread.
+ */
+static inline void
+arch_set_stack(struct lthread *lt, void *func)
+{
+ char *stack_top = (char *)(lt->stack) + lt->stack_size;
+ void **s = (void **)stack_top;
+
+ /* set initial context */
+ s[-3] = NULL;
+ s[-2] = (void *)lt;
+ lt->ctx.rsp = (void *)(stack_top - (4 * sizeof(void *)));
+ lt->ctx.rbp = (void *)(stack_top - (3 * sizeof(void *)));
+ lt->ctx.rip = func;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* STACK_H_ */
diff --git a/examples/performance-thread/common/common.mk b/examples/performance-thread/common/common.mk
index f6cab771..f1f05fdd 100644
--- a/examples/performance-thread/common/common.mk
+++ b/examples/performance-thread/common/common.mk
@@ -37,8 +37,14 @@
MKFILE_PATH=$(abspath $(dir $(lastword $(MAKEFILE_LIST))))
-VPATH := $(MKFILE_PATH) $(MKFILE_PATH)/arch/x86
+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
+ARCH_PATH += $(MKFILE_PATH)/arch/x86
+else ifeq ($(CONFIG_RTE_ARCH_ARM64),y)
+ARCH_PATH += $(MKFILE_PATH)/arch/arm64
+endif
+
+VPATH := $(MKFILE_PATH) $(ARCH_PATH)
SRCS-y += lthread.c lthread_sched.c lthread_cond.c lthread_tls.c lthread_mutex.c lthread_diag.c ctx.c
-INCLUDES += -I$(MKFILE_PATH) -I$(MKFILE_PATH)/arch/x86/
+INCLUDES += -I$(MKFILE_PATH) -I$(ARCH_PATH)
diff --git a/examples/performance-thread/common/lthread.c b/examples/performance-thread/common/lthread.c
index 062275a4..7d76c8c4 100644
--- a/examples/performance-thread/common/lthread.c
+++ b/examples/performance-thread/common/lthread.c
@@ -76,6 +76,7 @@
#include <rte_log.h>
#include <ctx.h>
+#include <stack.h>
#include "lthread_api.h"
#include "lthread.h"
@@ -190,19 +191,11 @@ _lthread_init(struct lthread *lt,
*/
void _lthread_set_stack(struct lthread *lt, void *stack, size_t stack_size)
{
- char *stack_top = (char *)stack + stack_size;
- void **s = (void **)stack_top;
-
/* set stack */
lt->stack = stack;
lt->stack_size = stack_size;
- /* set initial context */
- s[-3] = NULL;
- s[-2] = (void *)lt;
- lt->ctx.rsp = (void *)(stack_top - (4 * sizeof(void *)));
- lt->ctx.rbp = (void *)(stack_top - (3 * sizeof(void *)));
- lt->ctx.rip = (void *)_lthread_exec;
+ arch_set_stack(lt, _lthread_exec);
}
/*
diff --git a/examples/performance-thread/common/lthread_int.h b/examples/performance-thread/common/lthread_int.h
index 3f7fb92d..e1da2462 100644
--- a/examples/performance-thread/common/lthread_int.h
+++ b/examples/performance-thread/common/lthread_int.h
@@ -59,7 +59,6 @@
* SUCH DAMAGE.
*/
#ifndef LTHREAD_INT_H
-#include <lthread_api.h>
#define LTHREAD_INT_H
#ifdef __cplusplus
diff --git a/examples/performance-thread/common/lthread_mutex.c b/examples/performance-thread/common/lthread_mutex.c
index c1bc6271..c06d3d51 100644
--- a/examples/performance-thread/common/lthread_mutex.c
+++ b/examples/performance-thread/common/lthread_mutex.c
@@ -173,7 +173,7 @@ int lthread_mutex_lock(struct lthread_mutex *m)
return 0;
}
-/* try to lock a mutex but dont block */
+/* try to lock a mutex but don't block */
int lthread_mutex_trylock(struct lthread_mutex *m)
{
struct lthread *lt = THIS_LTHREAD;
diff --git a/examples/performance-thread/common/lthread_pool.h b/examples/performance-thread/common/lthread_pool.h
index fb0c578b..315a2e21 100644
--- a/examples/performance-thread/common/lthread_pool.h
+++ b/examples/performance-thread/common/lthread_pool.h
@@ -174,7 +174,7 @@ _qnode_pool_create(const char *name, int prealloc_size) {
/*
* Insert a node into the pool
*/
-static inline void __attribute__ ((always_inline))
+static __rte_always_inline void
_qnode_pool_insert(struct qnode_pool *p, struct qnode *n)
{
n->next = NULL;
@@ -198,7 +198,7 @@ _qnode_pool_insert(struct qnode_pool *p, struct qnode *n)
* last item from the queue incurs the penalty of an atomic exchange. Since the
* pool is maintained with a bulk pre-allocation the cost of this is amortised.
*/
-static inline struct qnode *__attribute__ ((always_inline))
+static __rte_always_inline struct qnode *
_pool_remove(struct qnode_pool *p)
{
struct qnode *head;
@@ -239,7 +239,7 @@ _pool_remove(struct qnode_pool *p)
* This adds a retry to the _pool_remove function
* defined above
*/
-static inline struct qnode *__attribute__ ((always_inline))
+static __rte_always_inline struct qnode *
_qnode_pool_remove(struct qnode_pool *p)
{
struct qnode *n;
@@ -259,7 +259,7 @@ _qnode_pool_remove(struct qnode_pool *p)
* Allocate a node from the pool
* If the pool is empty add mode nodes
*/
-static inline struct qnode *__attribute__ ((always_inline))
+static __rte_always_inline struct qnode *
_qnode_alloc(void)
{
struct qnode_pool *p = (THIS_SCHED)->qnode_pool;
@@ -304,7 +304,7 @@ _qnode_alloc(void)
/*
* free a queue node to the per scheduler pool from which it came
*/
-static inline void __attribute__ ((always_inline))
+static __rte_always_inline void
_qnode_free(struct qnode *n)
{
struct qnode_pool *p = n->pool;
diff --git a/examples/performance-thread/common/lthread_queue.h b/examples/performance-thread/common/lthread_queue.h
index 4fc2074e..833ed92b 100644
--- a/examples/performance-thread/common/lthread_queue.h
+++ b/examples/performance-thread/common/lthread_queue.h
@@ -154,7 +154,7 @@ _lthread_queue_create(const char *name)
/**
* Return true if the queue is empty
*/
-static inline int __attribute__ ((always_inline))
+static __rte_always_inline int
_lthread_queue_empty(struct lthread_queue *q)
{
return q->tail == q->head;
@@ -185,7 +185,7 @@ RTE_DECLARE_PER_LCORE(struct lthread_sched *, this_sched);
* Insert a node into a queue
* this implementation is multi producer safe
*/
-static inline struct qnode *__attribute__ ((always_inline))
+static __rte_always_inline struct qnode *
_lthread_queue_insert_mp(struct lthread_queue
*q, void *data)
{
@@ -219,7 +219,7 @@ _lthread_queue_insert_mp(struct lthread_queue
* Insert an node into a queue in single producer mode
* this implementation is NOT mult producer safe
*/
-static inline struct qnode *__attribute__ ((always_inline))
+static __rte_always_inline struct qnode *
_lthread_queue_insert_sp(struct lthread_queue
*q, void *data)
{
@@ -247,7 +247,7 @@ _lthread_queue_insert_sp(struct lthread_queue
/*
* Remove a node from a queue
*/
-static inline void *__attribute__ ((always_inline))
+static __rte_always_inline void *
_lthread_queue_poll(struct lthread_queue *q)
{
void *data = NULL;
@@ -278,7 +278,7 @@ _lthread_queue_poll(struct lthread_queue *q)
/*
* Remove a node from a queue
*/
-static inline void *__attribute__ ((always_inline))
+static __rte_always_inline void *
_lthread_queue_remove(struct lthread_queue *q)
{
void *data = NULL;
diff --git a/examples/performance-thread/common/lthread_sched.c b/examples/performance-thread/common/lthread_sched.c
index c64c21ff..98291478 100644
--- a/examples/performance-thread/common/lthread_sched.c
+++ b/examples/performance-thread/common/lthread_sched.c
@@ -369,8 +369,8 @@ void lthread_scheduler_shutdown_all(void)
/*
* Resume a suspended lthread
*/
-static inline void
-_lthread_resume(struct lthread *lt) __attribute__ ((always_inline));
+static __rte_always_inline void
+_lthread_resume(struct lthread *lt);
static inline void _lthread_resume(struct lthread *lt)
{
struct lthread_sched *sched = THIS_SCHED;
diff --git a/examples/performance-thread/common/lthread_sched.h b/examples/performance-thread/common/lthread_sched.h
index 7cddda9c..aa2f0c48 100644
--- a/examples/performance-thread/common/lthread_sched.h
+++ b/examples/performance-thread/common/lthread_sched.h
@@ -112,8 +112,8 @@ static inline uint64_t _sched_now(void)
return 1;
}
-static inline void
-_affinitize(void) __attribute__ ((always_inline));
+static __rte_always_inline void
+_affinitize(void);
static inline void
_affinitize(void)
{
@@ -123,8 +123,8 @@ _affinitize(void)
ctx_switch(&(THIS_SCHED)->ctx, &lt->ctx);
}
-static inline void
-_suspend(void) __attribute__ ((always_inline));
+static __rte_always_inline void
+_suspend(void);
static inline void
_suspend(void)
{
@@ -136,8 +136,8 @@ _suspend(void)
(THIS_SCHED)->nb_blocked_threads--;
}
-static inline void
-_reschedule(void) __attribute__ ((always_inline));
+static __rte_always_inline void
+_reschedule(void);
static inline void
_reschedule(void)
{
diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c
index 2d98473e..7954b974 100644
--- a/examples/performance-thread/l3fwd-thread/main.c
+++ b/examples/performance-thread/l3fwd-thread/main.c
@@ -52,7 +52,6 @@
#include <rte_memcpy.h>
#include <rte_memzone.h>
#include <rte_eal.h>
-#include <rte_per_lcore.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_cycles.h>
@@ -73,6 +72,7 @@
#include <rte_tcp.h>
#include <rte_udp.h>
#include <rte_string_fns.h>
+#include <rte_pause.h>
#include <cmdline_parse.h>
#include <cmdline_parse_etheraddr.h>
@@ -157,11 +157,7 @@ cb_parse_ptype(__rte_unused uint8_t port, __rte_unused uint16_t queue,
* When set to one, optimized forwarding path is enabled.
* Note that LPM optimisation path uses SSE4.1 instructions.
*/
-#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__))
-#define ENABLE_MULTI_BUFFER_OPTIMIZE 0
-#else
#define ENABLE_MULTI_BUFFER_OPTIMIZE 1
-#endif
#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
#include <rte_hash.h>
@@ -188,10 +184,10 @@ cb_parse_ptype(__rte_unused uint8_t port, __rte_unused uint16_t queue,
*/
#define NB_MBUF RTE_MAX(\
- (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
- nb_ports*nb_lcores*MAX_PKT_BURST + \
- nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \
- nb_lcores*MEMPOOL_CACHE_SIZE), \
+ (nb_ports*nb_rx_queue*nb_rxd + \
+ nb_ports*nb_lcores*MAX_PKT_BURST + \
+ nb_ports*n_tx_queue*nb_txd + \
+ nb_lcores*MEMPOOL_CACHE_SIZE), \
(unsigned)8192)
#define MAX_PKT_BURST 32
@@ -225,7 +221,7 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
-static __m128i val_eth[RTE_MAX_ETHPORTS];
+static xmm_t val_eth[RTE_MAX_ETHPORTS];
/* replace first 12B of the ethernet header. */
#define MASK_ETH 0x3f
@@ -362,13 +358,8 @@ static struct rte_mempool *pktmbuf_pool[NB_SOCKETS];
#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
#include <rte_hash_crc.h>
#define DEFAULT_HASH_FUNC rte_hash_crc
-#else
-#include <rte_jhash.h>
-#define DEFAULT_HASH_FUNC rte_jhash
-#endif
struct ipv4_5tuple {
uint32_t ip_dst;
@@ -485,17 +476,10 @@ ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len,
t = k->proto;
p = (const uint32_t *)&k->port_src;
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
init_val = rte_hash_crc_4byte(t, init_val);
init_val = rte_hash_crc_4byte(k->ip_src, init_val);
init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
init_val = rte_hash_crc_4byte(*p, init_val);
-#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
- init_val = rte_jhash_1word(t, init_val);
- init_val = rte_jhash_1word(k->ip_src, init_val);
- init_val = rte_jhash_1word(k->ip_dst, init_val);
- init_val = rte_jhash_1word(*p, init_val);
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
return init_val;
}
@@ -506,16 +490,13 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
const union ipv6_5tuple_host *k;
uint32_t t;
const uint32_t *p;
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3;
const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
k = data;
t = k->proto;
p = (const uint32_t *)&k->port_src;
-#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
ip_src0 = (const uint32_t *) k->ip_src;
ip_src1 = (const uint32_t *)(k->ip_src + 4);
ip_src2 = (const uint32_t *)(k->ip_src + 8);
@@ -534,12 +515,6 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
init_val = rte_hash_crc_4byte(*p, init_val);
-#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
- init_val = rte_jhash_1word(t, init_val);
- init_val = rte_jhash(k->ip_src, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
- init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
- init_val = rte_jhash_1word(*p, init_val);
-#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
return init_val;
}
@@ -720,7 +695,7 @@ send_single_packet(struct rte_mbuf *m, uint8_t port)
#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
-static inline __attribute__((always_inline)) void
+static __rte_always_inline void
send_packetsx4(uint8_t port,
struct rte_mbuf *m[], uint32_t num)
{
@@ -761,12 +736,15 @@ send_packetsx4(uint8_t port,
case 0:
qconf->tx_mbufs[port].m_table[len + j] = m[j];
j++;
+ /* fall-through */
case 3:
qconf->tx_mbufs[port].m_table[len + j] = m[j];
j++;
+ /* fall-through */
case 2:
qconf->tx_mbufs[port].m_table[len + j] = m[j];
j++;
+ /* fall-through */
case 1:
qconf->tx_mbufs[port].m_table[len + j] = m[j];
j++;
@@ -788,12 +766,15 @@ send_packetsx4(uint8_t port,
case 0:
qconf->tx_mbufs[port].m_table[j] = m[n + j];
j++;
+ /* fall-through */
case 3:
qconf->tx_mbufs[port].m_table[j] = m[n + j];
j++;
+ /* fall-through */
case 2:
qconf->tx_mbufs[port].m_table[j] = m[n + j];
j++;
+ /* fall-through */
case 1:
qconf->tx_mbufs[port].m_table[j] = m[n + j];
j++;
@@ -1281,7 +1262,7 @@ simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
}
#endif /* APP_LOOKUP_METHOD */
-static inline __attribute__((always_inline)) void
+static __rte_always_inline void
l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
{
struct ether_hdr *eth_hdr;
@@ -1369,7 +1350,7 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
* If we encounter invalid IPV4 packet, then set destination port for it
* to BAD_PORT value.
*/
-static inline __attribute__((always_inline)) void
+static __rte_always_inline void
rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
{
uint8_t ihl;
@@ -1397,7 +1378,7 @@ rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
-static inline __attribute__((always_inline)) uint16_t
+static __rte_always_inline uint16_t
get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint8_t portid)
{
uint32_t next_hop;
@@ -1598,7 +1579,7 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
* Suppose we have array of destionation ports:
* dst_port[] = {a, b, c, d,, e, ... }
* dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
- * We doing 4 comparisions at once and the result is 4 bit mask.
+ * We doing 4 comparisons at once and the result is 4 bit mask.
* This mask is used as an index into prebuild array of pnum values.
*/
static inline uint16_t *
@@ -1860,10 +1841,12 @@ process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx,
process_packet(pkts_burst[j], dst_port + j, portid);
GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
j++;
+ /* fall-through */
case 2:
process_packet(pkts_burst[j], dst_port + j, portid);
GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
j++;
+ /* fall-through */
case 1:
process_packet(pkts_burst[j], dst_port + j, portid);
GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
@@ -3587,6 +3570,13 @@ main(int argc, char **argv)
rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
ret, portid);
+ ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
+ &nb_txd);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n",
+ ret, portid);
+
rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
print_ethaddr(" Address:", &ports_eth_addr[portid]);
printf(", ");