diff options
author | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2017-06-08 09:46:51 -0600 |
---|---|---|
committer | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2017-06-08 09:51:02 -0600 |
commit | aab0c291a90f701b60f8c9a88cbcc265cba0ec8b (patch) | |
tree | bfa7bf9663cb9a518d81f18e3a2c792eee8fa1c8 /examples/performance-thread | |
parent | ce3d555e43e3795b5d9507fcfc76b7a0a92fd0d6 (diff) |
Imported Upstream version 16.11.2
Change-Id: I947038e46a2c747296dc7aa7522239733ca2f659
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Diffstat (limited to 'examples/performance-thread')
5 files changed, 59 insertions, 13 deletions
diff --git a/examples/performance-thread/common/lthread_timer.h b/examples/performance-thread/common/lthread_timer.h index b5e6fb0e..2d147577 100644 --- a/examples/performance-thread/common/lthread_timer.h +++ b/examples/performance-thread/common/lthread_timer.h @@ -42,11 +42,22 @@ static inline uint64_t _ns_to_clks(uint64_t ns) { - unsigned __int128 clkns = rte_get_tsc_hz(); + /* + * clkns needs to be divided by 1E9 to get ns clocks. However, + * dividing by this first would lose a lot of accuracy. + * Dividing after a multiply by ns, could cause overflow of + * uint64_t if ns is about 5 seconds [if we assume a max tsc + * rate of 4GHz]. Therefore we first divide by 1E4, then + * multiply and finally divide by 1E5. This allows ns to be + * values many hours long, without overflow, while still keeping + * reasonable accuracy. + */ + uint64_t clkns = rte_get_tsc_hz() / 1e4; clkns *= ns; - clkns /= 1000000000; - return (uint64_t) clkns; + clkns /= 1e5; + + return clkns; } diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c index fdc90b28..dd403ca8 100644 --- a/examples/performance-thread/l3fwd-thread/main.c +++ b/examples/performance-thread/l3fwd-thread/main.c @@ -90,6 +90,10 @@ #define APP_LOOKUP_METHOD APP_LOOKUP_LPM #endif +#ifndef __GLIBC__ /* sched_getcpu() is glibc specific */ +#define sched_getcpu() rte_lcore_id() +#endif + /* * When set to zero, simple forwaring path is eanbled. * When set to one, optimized forwarding path is enabled. @@ -282,7 +286,7 @@ static struct rte_eth_conf port_conf = { .hw_ip_checksum = 1, /**< IP checksum offload enabled */ .hw_vlan_filter = 0, /**< VLAN filtering disabled */ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ - .hw_strip_crc = 0, /**< CRC stripped by hardware */ + .hw_strip_crc = 1, /**< CRC stripped by hardware */ }, .rx_adv_conf = { .rss_conf = { diff --git a/examples/performance-thread/pthread_shim/main.c b/examples/performance-thread/pthread_shim/main.c index f0357218..850b009d 100644 --- a/examples/performance-thread/pthread_shim/main.c +++ b/examples/performance-thread/pthread_shim/main.c @@ -59,6 +59,10 @@ #define DEBUG_APP 0 #define HELLOW_WORLD_MAX_LTHREADS 10 +#ifndef __GLIBC__ /* sched_getcpu() is glibc-specific */ +#define sched_getcpu() rte_lcore_id() +#endif + __thread int print_count; __thread pthread_mutex_t print_lock; @@ -175,12 +179,12 @@ static void initial_lthread(void *args __attribute__((unused))) * use an attribute to pass the desired lcore */ pthread_attr_t attr; - cpu_set_t cpuset; + rte_cpuset_t cpuset; CPU_ZERO(&cpuset); CPU_SET(lcore, &cpuset); pthread_attr_init(&attr); - pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + pthread_attr_setaffinity_np(&attr, sizeof(rte_cpuset_t), &cpuset); /* create the thread */ pthread_create(&tid[i], &attr, helloworld_pthread, (void *) i); diff --git a/examples/performance-thread/pthread_shim/pthread_shim.c b/examples/performance-thread/pthread_shim/pthread_shim.c index 0d6100c9..113bafa0 100644 --- a/examples/performance-thread/pthread_shim/pthread_shim.c +++ b/examples/performance-thread/pthread_shim/pthread_shim.c @@ -48,6 +48,21 @@ #define POSIX_ERRNO(x) (x) +/* some releases of FreeBSD 10, e.g. 10.0, don't have CPU_COUNT macro */ +#ifndef CPU_COUNT +#define CPU_COUNT(x) __cpu_count(x) + +static inline unsigned int +__cpu_count(const rte_cpuset_t *cpuset) +{ + unsigned int i, count = 0; + for (i = 0; i < RTE_MAX_LCORE; i++) + if (CPU_ISSET(i, cpuset)) + count++; + return count; +} +#endif + /* * this flag determines at run time if we override pthread * calls and map then to equivalent lthread calls @@ -159,7 +174,7 @@ int (*f_pthread_setschedparam) int (*f_pthread_yield) (void); int (*f_pthread_setaffinity_np) - (pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset); + (pthread_t thread, size_t cpusetsize, const rte_cpuset_t *cpuset); int (*f_nanosleep) (const struct timespec *req, struct timespec *rem); } _sys_pthread_funcs = { @@ -390,11 +405,11 @@ pthread_create(pthread_t *__restrict tid, if (attr != NULL) { /* determine CPU being requested */ - cpu_set_t cpuset; + rte_cpuset_t cpuset; CPU_ZERO(&cpuset); pthread_attr_getaffinity_np(attr, - sizeof(cpu_set_t), + sizeof(rte_cpuset_t), &cpuset); if (CPU_COUNT(&cpuset) != 1) @@ -576,15 +591,26 @@ int pthread_rwlock_wrlock(pthread_rwlock_t *a) return _sys_pthread_funcs.f_pthread_rwlock_wrlock(a); } -int pthread_yield(void) +#ifdef RTE_EXEC_ENV_LINUXAPP +int +pthread_yield(void) { if (override) { lthread_yield(); return 0; } return _sys_pthread_funcs.f_pthread_yield(); - } +#else +void +pthread_yield(void) +{ + if (override) + lthread_yield(); + else + _sys_pthread_funcs.f_pthread_yield(); +} +#endif pthread_t pthread_self(void) { @@ -686,7 +712,7 @@ int nanosleep(const struct timespec *req, struct timespec *rem) int pthread_setaffinity_np(pthread_t thread, size_t cpusetsize, - const cpu_set_t *cpuset) + const rte_cpuset_t *cpuset) { if (override) { /* we only allow affinity with a single CPU */ diff --git a/examples/performance-thread/pthread_shim/pthread_shim.h b/examples/performance-thread/pthread_shim/pthread_shim.h index 78bbb5ac..10f87894 100644 --- a/examples/performance-thread/pthread_shim/pthread_shim.h +++ b/examples/performance-thread/pthread_shim/pthread_shim.h @@ -33,7 +33,8 @@ #ifndef _PTHREAD_SHIM_H_ #define _PTHREAD_SHIM_H_ -#include <pthread.h> + +#include <rte_lcore.h> /* * This pthread shim is an example that demonstrates how legacy code |