diff options
Diffstat (limited to 'examples/performance-thread')
-rw-r--r-- | examples/performance-thread/Makefile | 4 | ||||
-rw-r--r-- | examples/performance-thread/common/arch/arm64/ctx.c | 90 | ||||
-rw-r--r-- | examples/performance-thread/common/arch/arm64/ctx.h | 83 | ||||
-rw-r--r-- | examples/performance-thread/common/arch/arm64/stack.h | 84 | ||||
-rw-r--r-- | examples/performance-thread/common/arch/x86/stack.h | 94 | ||||
-rw-r--r-- | examples/performance-thread/common/common.mk | 10 | ||||
-rw-r--r-- | examples/performance-thread/common/lthread.c | 11 | ||||
-rw-r--r-- | examples/performance-thread/common/lthread_int.h | 1 | ||||
-rw-r--r-- | examples/performance-thread/common/lthread_mutex.c | 2 | ||||
-rw-r--r-- | examples/performance-thread/common/lthread_pool.h | 10 | ||||
-rw-r--r-- | examples/performance-thread/common/lthread_queue.h | 10 | ||||
-rw-r--r-- | examples/performance-thread/common/lthread_sched.c | 4 | ||||
-rw-r--r-- | examples/performance-thread/common/lthread_sched.h | 12 | ||||
-rw-r--r-- | examples/performance-thread/l3fwd-thread/main.c | 62 |
14 files changed, 408 insertions, 69 deletions
diff --git a/examples/performance-thread/Makefile b/examples/performance-thread/Makefile index d19f8489..0c5edfdb 100644 --- a/examples/performance-thread/Makefile +++ b/examples/performance-thread/Makefile @@ -38,8 +38,8 @@ RTE_TARGET ?= x86_64-native-linuxapp-gcc include $(RTE_SDK)/mk/rte.vars.mk -ifneq ($(CONFIG_RTE_ARCH),"x86_64") -$(error This application is only supported for x86_64 targets) +ifeq ($(filter y,$(CONFIG_RTE_ARCH_X86_64) $(CONFIG_RTE_ARCH_ARM64)),) +$(error This application is only supported for x86_64 and arm64 targets) endif DIRS-y += l3fwd-thread diff --git a/examples/performance-thread/common/arch/arm64/ctx.c b/examples/performance-thread/common/arch/arm64/ctx.c new file mode 100644 index 00000000..d0eacaa6 --- /dev/null +++ b/examples/performance-thread/common/arch/arm64/ctx.c @@ -0,0 +1,90 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_common.h> +#include <ctx.h> + +void +ctx_switch(struct ctx *new_ctx __rte_unused, struct ctx *curr_ctx __rte_unused) +{ + /* SAVE CURRENT CONTEXT */ + asm volatile ( + /* Save SP */ + "mov x3, sp\n" + "str x3, [x1, #0]\n" + + /* Save FP and LR */ + "stp x29, x30, [x1, #8]\n" + + /* Save Callee Saved Regs x19 - x28 */ + "stp x19, x20, [x1, #24]\n" + "stp x21, x22, [x1, #40]\n" + "stp x23, x24, [x1, #56]\n" + "stp x25, x26, [x1, #72]\n" + "stp x27, x28, [x1, #88]\n" + + /* + * Save bottom 64-bits of Callee Saved + * SIMD Regs v8 - v15 + */ + "stp d8, d9, [x1, #104]\n" + "stp d10, d11, [x1, #120]\n" + "stp d12, d13, [x1, #136]\n" + "stp d14, d15, [x1, #152]\n" + ); + + /* RESTORE NEW CONTEXT */ + asm volatile ( + /* Restore SP */ + "ldr x3, [x0, #0]\n" + "mov sp, x3\n" + + /* Restore FP and LR */ + "ldp x29, x30, [x0, #8]\n" + + /* Restore Callee Saved Regs x19 - x28 */ + "ldp x19, x20, [x0, #24]\n" + "ldp x21, x22, [x0, #40]\n" + "ldp x23, x24, [x0, #56]\n" + "ldp x25, x26, [x0, #72]\n" + "ldp x27, x28, [x0, #88]\n" + + /* + * Restore bottom 64-bits of Callee Saved + * SIMD Regs v8 - v15 + */ + "ldp d8, d9, [x0, #104]\n" + "ldp d10, d11, [x0, #120]\n" + "ldp d12, d13, [x0, #136]\n" + "ldp d14, d15, [x0, #152]\n" + ); +} diff --git a/examples/performance-thread/common/arch/arm64/ctx.h b/examples/performance-thread/common/arch/arm64/ctx.h new file mode 100644 index 00000000..38c86ce6 --- /dev/null +++ b/examples/performance-thread/common/arch/arm64/ctx.h @@ -0,0 +1,83 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CTX_H +#define CTX_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * CPU context registers + */ +struct ctx { + void *sp; /* 0 */ + void *fp; /* 8 */ + void *lr; /* 16 */ + + /* Callee Saved Generic Registers */ + void *r19; /* 24 */ + void *r20; /* 32 */ + void *r21; /* 40 */ + void *r22; /* 48 */ + void *r23; /* 56 */ + void *r24; /* 64 */ + void *r25; /* 72 */ + void *r26; /* 80 */ + void *r27; /* 88 */ + void *r28; /* 96 */ + + /* + * Callee Saved SIMD Registers. Only the bottom 64-bits + * of these registers needs to be saved. + */ + void *v8; /* 104 */ + void *v9; /* 112 */ + void *v10; /* 120 */ + void *v11; /* 128 */ + void *v12; /* 136 */ + void *v13; /* 144 */ + void *v14; /* 152 */ + void *v15; /* 160 */ +}; + + +void +ctx_switch(struct ctx *new_ctx, struct ctx *curr_ctx); + + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_CTX_H_ */ diff --git a/examples/performance-thread/common/arch/arm64/stack.h b/examples/performance-thread/common/arch/arm64/stack.h new file mode 100644 index 00000000..fa3b31e9 --- /dev/null +++ b/examples/performance-thread/common/arch/arm64/stack.h @@ -0,0 +1,84 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium, Inc. 2017. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium, Inc nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STACK_H +#define STACK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "lthread_int.h" + +/* + * Sets up the initial stack for the lthread. + */ +static inline void +arch_set_stack(struct lthread *lt, void *func) +{ + void **stack_top = (void *)((char *)(lt->stack) + lt->stack_size); + + /* + * Align stack_top to 16 bytes. Arm64 has the constraint that the + * stack pointer must always be quad-word aligned. + */ + stack_top = (void **)(((unsigned long)(stack_top)) & ~0xfUL); + + /* + * First Stack Frame + */ + stack_top[0] = NULL; + stack_top[-1] = NULL; + + /* + * Initialize the context + */ + lt->ctx.fp = &stack_top[-1]; + lt->ctx.sp = &stack_top[-2]; + + /* + * Here only the address of _lthread_exec is saved as the link + * register value. The argument to _lthread_exec i.e the address of + * the lthread struct is not saved. This is because the first + * argument to ctx_switch is the address of the new context, + * which also happens to be the address of required lthread struct. + * So while returning from ctx_switch into _thread_exec, parameter + * register x0 will always contain the required value. + */ + lt->ctx.lr = func; +} + +#ifdef __cplusplus +} +#endif + +#endif /* STACK_H_ */ diff --git a/examples/performance-thread/common/arch/x86/stack.h b/examples/performance-thread/common/arch/x86/stack.h new file mode 100644 index 00000000..98723ba3 --- /dev/null +++ b/examples/performance-thread/common/arch/x86/stack.h @@ -0,0 +1,94 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * Copyright(c) Cavium, Inc. 2017. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Some portions of this software is derived from the + * https://github.com/halayli/lthread which carrys the following license. + * + * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#ifndef STACK_H +#define STACK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "lthread_int.h" + +/* + * Sets up the initial stack for the lthread. + */ +static inline void +arch_set_stack(struct lthread *lt, void *func) +{ + char *stack_top = (char *)(lt->stack) + lt->stack_size; + void **s = (void **)stack_top; + + /* set initial context */ + s[-3] = NULL; + s[-2] = (void *)lt; + lt->ctx.rsp = (void *)(stack_top - (4 * sizeof(void *))); + lt->ctx.rbp = (void *)(stack_top - (3 * sizeof(void *))); + lt->ctx.rip = func; +} + +#ifdef __cplusplus +} +#endif + +#endif /* STACK_H_ */ diff --git a/examples/performance-thread/common/common.mk b/examples/performance-thread/common/common.mk index f6cab771..f1f05fdd 100644 --- a/examples/performance-thread/common/common.mk +++ b/examples/performance-thread/common/common.mk @@ -37,8 +37,14 @@ MKFILE_PATH=$(abspath $(dir $(lastword $(MAKEFILE_LIST)))) -VPATH := $(MKFILE_PATH) $(MKFILE_PATH)/arch/x86 +ifeq ($(CONFIG_RTE_ARCH_X86_64),y) +ARCH_PATH += $(MKFILE_PATH)/arch/x86 +else ifeq ($(CONFIG_RTE_ARCH_ARM64),y) +ARCH_PATH += $(MKFILE_PATH)/arch/arm64 +endif + +VPATH := $(MKFILE_PATH) $(ARCH_PATH) SRCS-y += lthread.c lthread_sched.c lthread_cond.c lthread_tls.c lthread_mutex.c lthread_diag.c ctx.c -INCLUDES += -I$(MKFILE_PATH) -I$(MKFILE_PATH)/arch/x86/ +INCLUDES += -I$(MKFILE_PATH) -I$(ARCH_PATH) diff --git a/examples/performance-thread/common/lthread.c b/examples/performance-thread/common/lthread.c index 062275a4..7d76c8c4 100644 --- a/examples/performance-thread/common/lthread.c +++ b/examples/performance-thread/common/lthread.c @@ -76,6 +76,7 @@ #include <rte_log.h> #include <ctx.h> +#include <stack.h> #include "lthread_api.h" #include "lthread.h" @@ -190,19 +191,11 @@ _lthread_init(struct lthread *lt, */ void _lthread_set_stack(struct lthread *lt, void *stack, size_t stack_size) { - char *stack_top = (char *)stack + stack_size; - void **s = (void **)stack_top; - /* set stack */ lt->stack = stack; lt->stack_size = stack_size; - /* set initial context */ - s[-3] = NULL; - s[-2] = (void *)lt; - lt->ctx.rsp = (void *)(stack_top - (4 * sizeof(void *))); - lt->ctx.rbp = (void *)(stack_top - (3 * sizeof(void *))); - lt->ctx.rip = (void *)_lthread_exec; + arch_set_stack(lt, _lthread_exec); } /* diff --git a/examples/performance-thread/common/lthread_int.h b/examples/performance-thread/common/lthread_int.h index 3f7fb92d..e1da2462 100644 --- a/examples/performance-thread/common/lthread_int.h +++ b/examples/performance-thread/common/lthread_int.h @@ -59,7 +59,6 @@ * SUCH DAMAGE. */ #ifndef LTHREAD_INT_H -#include <lthread_api.h> #define LTHREAD_INT_H #ifdef __cplusplus diff --git a/examples/performance-thread/common/lthread_mutex.c b/examples/performance-thread/common/lthread_mutex.c index c1bc6271..c06d3d51 100644 --- a/examples/performance-thread/common/lthread_mutex.c +++ b/examples/performance-thread/common/lthread_mutex.c @@ -173,7 +173,7 @@ int lthread_mutex_lock(struct lthread_mutex *m) return 0; } -/* try to lock a mutex but dont block */ +/* try to lock a mutex but don't block */ int lthread_mutex_trylock(struct lthread_mutex *m) { struct lthread *lt = THIS_LTHREAD; diff --git a/examples/performance-thread/common/lthread_pool.h b/examples/performance-thread/common/lthread_pool.h index fb0c578b..315a2e21 100644 --- a/examples/performance-thread/common/lthread_pool.h +++ b/examples/performance-thread/common/lthread_pool.h @@ -174,7 +174,7 @@ _qnode_pool_create(const char *name, int prealloc_size) { /* * Insert a node into the pool */ -static inline void __attribute__ ((always_inline)) +static __rte_always_inline void _qnode_pool_insert(struct qnode_pool *p, struct qnode *n) { n->next = NULL; @@ -198,7 +198,7 @@ _qnode_pool_insert(struct qnode_pool *p, struct qnode *n) * last item from the queue incurs the penalty of an atomic exchange. Since the * pool is maintained with a bulk pre-allocation the cost of this is amortised. */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _pool_remove(struct qnode_pool *p) { struct qnode *head; @@ -239,7 +239,7 @@ _pool_remove(struct qnode_pool *p) * This adds a retry to the _pool_remove function * defined above */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _qnode_pool_remove(struct qnode_pool *p) { struct qnode *n; @@ -259,7 +259,7 @@ _qnode_pool_remove(struct qnode_pool *p) * Allocate a node from the pool * If the pool is empty add mode nodes */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _qnode_alloc(void) { struct qnode_pool *p = (THIS_SCHED)->qnode_pool; @@ -304,7 +304,7 @@ _qnode_alloc(void) /* * free a queue node to the per scheduler pool from which it came */ -static inline void __attribute__ ((always_inline)) +static __rte_always_inline void _qnode_free(struct qnode *n) { struct qnode_pool *p = n->pool; diff --git a/examples/performance-thread/common/lthread_queue.h b/examples/performance-thread/common/lthread_queue.h index 4fc2074e..833ed92b 100644 --- a/examples/performance-thread/common/lthread_queue.h +++ b/examples/performance-thread/common/lthread_queue.h @@ -154,7 +154,7 @@ _lthread_queue_create(const char *name) /** * Return true if the queue is empty */ -static inline int __attribute__ ((always_inline)) +static __rte_always_inline int _lthread_queue_empty(struct lthread_queue *q) { return q->tail == q->head; @@ -185,7 +185,7 @@ RTE_DECLARE_PER_LCORE(struct lthread_sched *, this_sched); * Insert a node into a queue * this implementation is multi producer safe */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _lthread_queue_insert_mp(struct lthread_queue *q, void *data) { @@ -219,7 +219,7 @@ _lthread_queue_insert_mp(struct lthread_queue * Insert an node into a queue in single producer mode * this implementation is NOT mult producer safe */ -static inline struct qnode *__attribute__ ((always_inline)) +static __rte_always_inline struct qnode * _lthread_queue_insert_sp(struct lthread_queue *q, void *data) { @@ -247,7 +247,7 @@ _lthread_queue_insert_sp(struct lthread_queue /* * Remove a node from a queue */ -static inline void *__attribute__ ((always_inline)) +static __rte_always_inline void * _lthread_queue_poll(struct lthread_queue *q) { void *data = NULL; @@ -278,7 +278,7 @@ _lthread_queue_poll(struct lthread_queue *q) /* * Remove a node from a queue */ -static inline void *__attribute__ ((always_inline)) +static __rte_always_inline void * _lthread_queue_remove(struct lthread_queue *q) { void *data = NULL; diff --git a/examples/performance-thread/common/lthread_sched.c b/examples/performance-thread/common/lthread_sched.c index c64c21ff..98291478 100644 --- a/examples/performance-thread/common/lthread_sched.c +++ b/examples/performance-thread/common/lthread_sched.c @@ -369,8 +369,8 @@ void lthread_scheduler_shutdown_all(void) /* * Resume a suspended lthread */ -static inline void -_lthread_resume(struct lthread *lt) __attribute__ ((always_inline)); +static __rte_always_inline void +_lthread_resume(struct lthread *lt); static inline void _lthread_resume(struct lthread *lt) { struct lthread_sched *sched = THIS_SCHED; diff --git a/examples/performance-thread/common/lthread_sched.h b/examples/performance-thread/common/lthread_sched.h index 7cddda9c..aa2f0c48 100644 --- a/examples/performance-thread/common/lthread_sched.h +++ b/examples/performance-thread/common/lthread_sched.h @@ -112,8 +112,8 @@ static inline uint64_t _sched_now(void) return 1; } -static inline void -_affinitize(void) __attribute__ ((always_inline)); +static __rte_always_inline void +_affinitize(void); static inline void _affinitize(void) { @@ -123,8 +123,8 @@ _affinitize(void) ctx_switch(&(THIS_SCHED)->ctx, <->ctx); } -static inline void -_suspend(void) __attribute__ ((always_inline)); +static __rte_always_inline void +_suspend(void); static inline void _suspend(void) { @@ -136,8 +136,8 @@ _suspend(void) (THIS_SCHED)->nb_blocked_threads--; } -static inline void -_reschedule(void) __attribute__ ((always_inline)); +static __rte_always_inline void +_reschedule(void); static inline void _reschedule(void) { diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c index 2d98473e..7954b974 100644 --- a/examples/performance-thread/l3fwd-thread/main.c +++ b/examples/performance-thread/l3fwd-thread/main.c @@ -52,7 +52,6 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_eal.h> -#include <rte_per_lcore.h> #include <rte_launch.h> #include <rte_atomic.h> #include <rte_cycles.h> @@ -73,6 +72,7 @@ #include <rte_tcp.h> #include <rte_udp.h> #include <rte_string_fns.h> +#include <rte_pause.h> #include <cmdline_parse.h> #include <cmdline_parse_etheraddr.h> @@ -157,11 +157,7 @@ cb_parse_ptype(__rte_unused uint8_t port, __rte_unused uint16_t queue, * When set to one, optimized forwarding path is enabled. * Note that LPM optimisation path uses SSE4.1 instructions. */ -#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__)) -#define ENABLE_MULTI_BUFFER_OPTIMIZE 0 -#else #define ENABLE_MULTI_BUFFER_OPTIMIZE 1 -#endif #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) #include <rte_hash.h> @@ -188,10 +184,10 @@ cb_parse_ptype(__rte_unused uint8_t port, __rte_unused uint16_t queue, */ #define NB_MBUF RTE_MAX(\ - (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \ - nb_ports*nb_lcores*MAX_PKT_BURST + \ - nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \ - nb_lcores*MEMPOOL_CACHE_SIZE), \ + (nb_ports*nb_rx_queue*nb_rxd + \ + nb_ports*nb_lcores*MAX_PKT_BURST + \ + nb_ports*n_tx_queue*nb_txd + \ + nb_lcores*MEMPOOL_CACHE_SIZE), \ (unsigned)8192) #define MAX_PKT_BURST 32 @@ -225,7 +221,7 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS]; static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; -static __m128i val_eth[RTE_MAX_ETHPORTS]; +static xmm_t val_eth[RTE_MAX_ETHPORTS]; /* replace first 12B of the ethernet header. */ #define MASK_ETH 0x3f @@ -362,13 +358,8 @@ static struct rte_mempool *pktmbuf_pool[NB_SOCKETS]; #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 #include <rte_hash_crc.h> #define DEFAULT_HASH_FUNC rte_hash_crc -#else -#include <rte_jhash.h> -#define DEFAULT_HASH_FUNC rte_jhash -#endif struct ipv4_5tuple { uint32_t ip_dst; @@ -485,17 +476,10 @@ ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len, t = k->proto; p = (const uint32_t *)&k->port_src; -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 init_val = rte_hash_crc_4byte(t, init_val); init_val = rte_hash_crc_4byte(k->ip_src, init_val); init_val = rte_hash_crc_4byte(k->ip_dst, init_val); init_val = rte_hash_crc_4byte(*p, init_val); -#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - init_val = rte_jhash_1word(t, init_val); - init_val = rte_jhash_1word(k->ip_src, init_val); - init_val = rte_jhash_1word(k->ip_dst, init_val); - init_val = rte_jhash_1word(*p, init_val); -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ return init_val; } @@ -506,16 +490,13 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, const union ipv6_5tuple_host *k; uint32_t t; const uint32_t *p; -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3; const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3; -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ k = data; t = k->proto; p = (const uint32_t *)&k->port_src; -#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 ip_src0 = (const uint32_t *) k->ip_src; ip_src1 = (const uint32_t *)(k->ip_src + 4); ip_src2 = (const uint32_t *)(k->ip_src + 8); @@ -534,12 +515,6 @@ ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, init_val = rte_hash_crc_4byte(*ip_dst2, init_val); init_val = rte_hash_crc_4byte(*ip_dst3, init_val); init_val = rte_hash_crc_4byte(*p, init_val); -#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */ - init_val = rte_jhash_1word(t, init_val); - init_val = rte_jhash(k->ip_src, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val); - init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val); - init_val = rte_jhash_1word(*p, init_val); -#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ return init_val; } @@ -720,7 +695,7 @@ send_single_packet(struct rte_mbuf *m, uint8_t port) #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) -static inline __attribute__((always_inline)) void +static __rte_always_inline void send_packetsx4(uint8_t port, struct rte_mbuf *m[], uint32_t num) { @@ -761,12 +736,15 @@ send_packetsx4(uint8_t port, case 0: qconf->tx_mbufs[port].m_table[len + j] = m[j]; j++; + /* fall-through */ case 3: qconf->tx_mbufs[port].m_table[len + j] = m[j]; j++; + /* fall-through */ case 2: qconf->tx_mbufs[port].m_table[len + j] = m[j]; j++; + /* fall-through */ case 1: qconf->tx_mbufs[port].m_table[len + j] = m[j]; j++; @@ -788,12 +766,15 @@ send_packetsx4(uint8_t port, case 0: qconf->tx_mbufs[port].m_table[j] = m[n + j]; j++; + /* fall-through */ case 3: qconf->tx_mbufs[port].m_table[j] = m[n + j]; j++; + /* fall-through */ case 2: qconf->tx_mbufs[port].m_table[j] = m[n + j]; j++; + /* fall-through */ case 1: qconf->tx_mbufs[port].m_table[j] = m[n + j]; j++; @@ -1281,7 +1262,7 @@ simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid) } #endif /* APP_LOOKUP_METHOD */ -static inline __attribute__((always_inline)) void +static __rte_always_inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid) { struct ether_hdr *eth_hdr; @@ -1369,7 +1350,7 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid) * If we encounter invalid IPV4 packet, then set destination port for it * to BAD_PORT value. */ -static inline __attribute__((always_inline)) void +static __rte_always_inline void rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) { uint8_t ihl; @@ -1397,7 +1378,7 @@ rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype) #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)) -static inline __attribute__((always_inline)) uint16_t +static __rte_always_inline uint16_t get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint8_t portid) { uint32_t next_hop; @@ -1598,7 +1579,7 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP]) * Suppose we have array of destionation ports: * dst_port[] = {a, b, c, d,, e, ... } * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. - * We doing 4 comparisions at once and the result is 4 bit mask. + * We doing 4 comparisons at once and the result is 4 bit mask. * This mask is used as an index into prebuild array of pnum values. */ static inline uint16_t * @@ -1860,10 +1841,12 @@ process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx, process_packet(pkts_burst[j], dst_port + j, portid); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; + /* fall-through */ case 2: process_packet(pkts_burst[j], dst_port + j, portid); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); j++; + /* fall-through */ case 1: process_packet(pkts_burst[j], dst_port + j, portid); GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j); @@ -3587,6 +3570,13 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n", ret, portid); + ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, + &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n", + ret, portid); + rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); print_ethaddr(" Address:", &ports_eth_addr[portid]); printf(", "); |