diff options
author | hsandid <halsandi@cisco.com> | 2024-03-25 17:51:31 +0100 |
---|---|---|
committer | Damjan Marion <dmarion@0xa5.net> | 2024-03-29 16:29:44 +0000 |
commit | 71c32a898941e32b5d4f865b50fbe775560c582d (patch) | |
tree | ebddae6023dcdb8912fde7a32febfc19ee294f98 | |
parent | 3e147f08efc82c1c9d131bd03ee2efd493775570 (diff) |
vlib: improve automatic core pinning
Type: feature
Auto core pinning now fetches vpp cpu affinity list
using pthread api. This enables us to do core-pinning in
environments where the host cpu list does not necessarily align
with cpus available to vpp
Change-Id: Ife8c2a2351c08c5c6c4fdf7c729eeff2697bc39a
Signed-off-by: hsandid <halsandi@cisco.com>
-rw-r--r-- | src/vlib/threads.c | 71 | ||||
-rw-r--r-- | src/vlib/threads.h | 5 | ||||
-rw-r--r-- | src/vppinfra/unix-misc.c | 31 | ||||
-rw-r--r-- | src/vppinfra/unix.h | 3 | ||||
-rwxr-xr-x | test/scripts/core_pinning.sh | 328 |
5 files changed, 431 insertions, 7 deletions
diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 713e1927d1f..3994afc2cea 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -179,7 +179,9 @@ vlib_thread_init (vlib_main_t * vm) u32 n_vlib_mains = 1; u32 first_index = 1; u32 i; - uword *avail_cpu; + pid_t pid; + uword *avail_cpu, *affinity_cpu; + uword n_cpus; u32 stats_num_worker_threads_dir_index; stats_num_worker_threads_dir_index = @@ -190,16 +192,39 @@ vlib_thread_init (vlib_main_t * vm) tm->cpu_core_bitmap = os_get_online_cpu_core_bitmap (); tm->cpu_socket_bitmap = os_get_online_cpu_node_bitmap (); + /* get bitmap of active cpu cores vpp has affinity to */ + pid = getpid (); + tm->cpu_affinity_bitmap = os_get_cpu_affinity_bitmap (pid); + + /* if fetching affinity fails, return online cpu core bmp */ + if (tm->cpu_affinity_bitmap == 0) + tm->cpu_affinity_bitmap = os_get_online_cpu_core_bitmap (); + avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap); + affinity_cpu = clib_bitmap_dup (tm->cpu_affinity_bitmap); /* skip cores */ + n_cpus = clib_bitmap_count_set_bits (avail_cpu); + if (tm->skip_cores >= n_cpus) + return clib_error_return (0, "skip-core greater than available cpus"); + n_cpus = clib_bitmap_count_set_bits (affinity_cpu); + if (tm->skip_cores >= n_cpus) + return clib_error_return (0, "skip-core greater than affinity cpus"); + for (i = 0; i < tm->skip_cores; i++) { - uword c = clib_bitmap_first_set (avail_cpu); + uword c; + c = clib_bitmap_first_set (avail_cpu); if (c == ~0) return clib_error_return (0, "no available cpus to skip"); avail_cpu = clib_bitmap_set (avail_cpu, c, 0); + + c = clib_bitmap_first_set (affinity_cpu); + if (c == ~0) + return clib_error_return (0, "no available env cpus to skip"); + + affinity_cpu = clib_bitmap_set (affinity_cpu, c, 0); } /* grab cpu for main thread */ @@ -209,6 +234,17 @@ vlib_thread_init (vlib_main_t * vm) return clib_error_return (0, "cpu %u is not available to be used" " for the main thread", tm->main_lcore); avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0); + affinity_cpu = clib_bitmap_set (affinity_cpu, tm->main_lcore, 0); + } + /* if auto enabled, grab first cpu vpp has affinity to for main thread */ + else if (tm->use_main_core_auto) + { + uword c = clib_bitmap_first_set (affinity_cpu); + if (c != ~0) + tm->main_lcore = c; + + avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0); + affinity_cpu = clib_bitmap_set (affinity_cpu, tm->main_lcore, 0); } /* assume that there is socket 0 only if there is no data from sysfs */ @@ -289,13 +325,23 @@ vlib_thread_init (vlib_main_t * vm) } else { + /* for automatic pinning, use cpu affinity list */ + uword n_env_cpu = 0; + n_env_cpu = clib_bitmap_count_set_bits (affinity_cpu); + + if (n_env_cpu < tr->count) + return clib_error_return (0, + "no available cpus to be used for" + " the '%s' thread #%u", + tr->name, n_env_cpu); + for (j = 0; j < tr->count; j++) { /* Do not use CPU 0 by default - leave it to the host and IRQs */ - uword avail_c0 = clib_bitmap_get (avail_cpu, 0); - avail_cpu = clib_bitmap_set (avail_cpu, 0, 0); + uword avail_c0 = clib_bitmap_get (affinity_cpu, 0); + affinity_cpu = clib_bitmap_set (affinity_cpu, 0, 0); - uword c = clib_bitmap_first_set (avail_cpu); + uword c = clib_bitmap_first_set (affinity_cpu); /* Use CPU 0 as a last resort */ if (c == ~0 && avail_c0) { @@ -309,14 +355,15 @@ vlib_thread_init (vlib_main_t * vm) " the '%s' thread #%u", tr->name, tr->count); - avail_cpu = clib_bitmap_set (avail_cpu, 0, avail_c0); - avail_cpu = clib_bitmap_set (avail_cpu, c, 0); + affinity_cpu = clib_bitmap_set (affinity_cpu, 0, avail_c0); + affinity_cpu = clib_bitmap_set (affinity_cpu, c, 0); tr->coremask = clib_bitmap_set (tr->coremask, c, 1); } } } clib_bitmap_free (avail_cpu); + clib_bitmap_free (affinity_cpu); tm->n_vlib_mains = n_vlib_mains; vlib_stats_set_gauge (stats_num_worker_threads_dir_index, n_vlib_mains - 1); @@ -1118,6 +1165,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) tm->sched_policy = ~0; tm->sched_priority = ~0; tm->main_lcore = ~0; + tm->use_main_core_auto = 0; tr = tm->next; @@ -1133,6 +1181,8 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) tm->use_pthreads = 1; else if (unformat (input, "thread-prefix %v", &tm->thread_prefix)) ; + else if (unformat (input, "main-core auto")) + tm->use_main_core_auto = 1; else if (unformat (input, "main-core %u", &tm->main_lcore)) ; else if (unformat (input, "skip-cores %u", &tm->skip_cores)) @@ -1191,6 +1241,13 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) break; } + if (tm->main_lcore != ~0 && tm->use_main_core_auto) + { + return clib_error_return ( + 0, "cannot set both 'main-core %u' and 'main-core auto'", + tm->main_lcore); + } + if (tm->sched_priority != ~0) { if (tm->sched_policy == SCHED_FIFO || tm->sched_policy == SCHED_RR) diff --git a/src/vlib/threads.h b/src/vlib/threads.h index ac0c1d5d266..3072d0e67dd 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -255,6 +255,8 @@ typedef struct int use_pthreads; + int use_main_core_auto; + /* Number of vlib_main / vnet_main clones */ u32 n_vlib_mains; @@ -282,6 +284,9 @@ typedef struct /* Bitmap of available CPU sockets (NUMA nodes) */ uword *cpu_socket_bitmap; + /* Bitmap of CPU affinity for VPP process */ + uword *cpu_affinity_bitmap; + /* Worker handoff queues */ vlib_frame_queue_main_t *frame_queue_mains; diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c index e0591ff4604..4dbc5ce98ce 100644 --- a/src/vppinfra/unix-misc.c +++ b/src/vppinfra/unix-misc.c @@ -46,6 +46,7 @@ #include <sys/stat.h> #include <sys/types.h> +#include <sys/syscall.h> #include <sys/uio.h> /* writev */ #include <fcntl.h> #include <stdio.h> /* for sprintf */ @@ -275,6 +276,36 @@ os_get_online_cpu_core_bitmap () } __clib_export clib_bitmap_t * +os_get_cpu_affinity_bitmap (int pid) +{ +#if __linux + int index, ret; + cpu_set_t cpuset; + uword *affinity_cpus; + + clib_bitmap_alloc (affinity_cpus, sizeof (cpu_set_t)); + clib_bitmap_zero (affinity_cpus); + + __CPU_ZERO_S (sizeof (cpu_set_t), &cpuset); + + ret = syscall (SYS_sched_getaffinity, 0, sizeof (cpu_set_t), &cpuset); + + if (ret < 0) + { + clib_bitmap_free (affinity_cpus); + return 0; + } + + for (index = 0; index < sizeof (cpu_set_t); index++) + if (__CPU_ISSET_S (index, sizeof (cpu_set_t), &cpuset)) + clib_bitmap_set (affinity_cpus, index, 1); + return affinity_cpus; +#else + return 0; +#endif +} + +__clib_export clib_bitmap_t * os_get_online_cpu_node_bitmap () { #if __linux__ diff --git a/src/vppinfra/unix.h b/src/vppinfra/unix.h index 651f9bb99e0..3ad57b05e72 100644 --- a/src/vppinfra/unix.h +++ b/src/vppinfra/unix.h @@ -56,6 +56,9 @@ clib_error_t *unix_proc_file_contents (char *file, u8 ** result); /* Retrieve bitmap of online cpu cures */ clib_bitmap_t *os_get_online_cpu_core_bitmap (); +/* Retrieve bitmap of cpus vpp has affinity to */ +clib_bitmap_t *os_get_cpu_affinity_bitmap (int pid); + /* Retrieve bitmap of online cpu nodes (sockets) */ clib_bitmap_t *os_get_online_cpu_node_bitmap (); diff --git a/test/scripts/core_pinning.sh b/test/scripts/core_pinning.sh new file mode 100755 index 00000000000..941d53871e5 --- /dev/null +++ b/test/scripts/core_pinning.sh @@ -0,0 +1,328 @@ +#!/bin/bash +# +# core_pinning_auto.sh -- script to test vpp (debug-build) core-pinning +# -- in bare-metal, containers (docker, lxc) +# +DOCKER_CONTAINER_NAME="vpp_core_pinning" +VPP_SOCK_PATH=/run/vpp +CONTAINER_CPU_RANGE="4-7" +TEST_SUCCESS=0 +TEST_FAIL=0 +if [ ! $WS_ROOT ] +then + if [ ! -d "../../../vpp" ]; then + echo "VPP workspace path invalid" + echo "Please execute script from vpp/test/scripts folder.." + exit 1 + fi + WS_ROOT="$(dirname $(readlink -e "../../../vpp"))/$(basename "../../../vpp")" +fi +# Get available CPU count on host machine +host_cpulist=$(cat /sys/devices/system/cpu/online) +startcpu="${host_cpulist%-*}" +endcpu="${host_cpulist#*\-}" +cpucount="$(($endcpu - $startcpu + 1))" +if [ $cpucount -lt 8 ] +then + echo "Current host machine has $cpucount CPUs" + echo "A minimum of 8 CPUs is required to run testcases, exiting.." + exit 1 +fi +# Check that container 'vpp_core_pinning' does not already exist +count=$(docker ps -a | grep -c "$DOCKER_CONTAINER_NAME") +if [ $count -ne 0 ] +then + echo "Error: docker container $DOCKER_CONTAINER_NAME already exists" + echo "Remove it using 'docker stop/docker rm', then re-run test" + exit 1 +fi +# Check that there is no vpp instance currently running on the machine +count=$(pgrep vpp | wc -l) +if [ $count -ne 0 ] +then + echo "Error: a vpp instance is currently running on this machine" + echo "Please stop the running instance, then re-run test" + exit 1 +fi +mkdir -p $VPP_SOCK_PATH + +# Function to parse main core +parse_maincore () { + main_core_args=$1 + main_core_parsed=$main_core_args + if [ $main_core_args = "auto" ]; + then + main_core_parsed="0" + if [ -n "$SKIP_CORE" ] + then + main_core_parsed=$(($main_core_parsed + $SKIP_CORE)) + fi + if [ -n "$CONTAINER_RESTRAIN_CPUSET" ] + then + main_core_parsed=(${container_cpus[ $main_core_parsed ]}) + fi + fi + echo $main_core_parsed +} + +# Function to parse n workers range to an array +# e.g. "4" is parsed to ('0','1','2','3') +parse_workers_n () { + workers_n_args=$1 + workers_n_parsed=() + main_core_increment="0" + skip_core_increment="0" + if [ -n "$SKIP_CORE" ] + then + skip_core_increment=$(($SKIP_CORE)) + fi + + for ((i=0;i<$workers_n_args;i++)); do + + if [ -n "$CONTAINER_RESTRAIN_CPUSET" ] + then + if [ $(( ${container_cpus[ $(($i + $skip_core_increment)) ]})) -eq $(("$parsed_main_core")) ] + then + main_core_increment=$(($main_core_increment + 1)) + fi + workers_n_parsed+=" ${container_cpus[ $(($i + $main_core_increment + $skip_core_increment)) ]}" + else + if [ $(( $skip_core_increment + $i)) -eq $(("$parsed_main_core")) ] + then + main_core_increment=$(($main_core_increment + 1)) + fi + workers_n_parsed+=" $(($i + $main_core_increment + $skip_core_increment))" + fi + done + echo $workers_n_parsed +} + +# Function to parse corelist range to an array +# e.g. "0,3-5,7" is parsed to ('0','3','4','5','7') +parse_corelist () { + corelist_args=$1 + corelist_args=$(echo $corelist_args | grep -Po '[0-9]+-[0-9]+|[0-9]+') + corelist_parsed=() + for corelist_elt in ${corelist_args[@]};do + if [ $(echo $corelist_elt | grep -Po '[0-9]+-[0-9]+') ] + then + startcpu="${corelist_elt%-*}" + endcpu="${corelist_elt#*\-}" + cpucount="$(($endcpu - $startcpu))" + for ((i=0;i<=$cpucount;i++)); do + corelist_parsed+=" $(($i+$startcpu))" + done + elif [ $(echo $corelist_elt | grep -Po '[0-9]+') ] + then + corelist_parsed+=" ${corelist_elt}" + fi + done + echo $corelist_parsed +} +# Test VPP core pinning configuration +test_pinning_conf () { + VPP_CPU_EXTRA_OPTIONS="" + if [ -n "$CORELIST_WORKERS" ]; + then + VPP_CPU_EXTRA_OPTIONS=" corelist-workers ${CORELIST_WORKERS}" + fi + if [ -n "$WORKERS_AUTO" ]; + then + VPP_CPU_EXTRA_OPTIONS=" workers ${WORKERS_AUTO}" + fi + if [ -n "$SKIP_CORE" ]; + then + VPP_CPU_EXTRA_OPTIONS="${VPP_CPU_EXTRA_OPTIONS} skip-cores ${SKIP_CORE}" + fi + echo "TEST - conf 'cpu {main-core ${MAIN_CORE} ${VPP_CPU_EXTRA_OPTIONS}}'" + if [ -z "$CONTAINER_RESTRAIN_CPUSET" ]; + then + VPP_CONTAINER_CPUSET="" + echo "(Running vpp in container with full host cpuset $host_cpulist)" + else + VPP_CONTAINER_CPUSET="--cpuset-cpus $CONTAINER_CPU_RANGE" + echo "(Running vpp in container with limited cpuset $CONTAINER_CPU_RANGE)" + fi + (docker run -d ${VPP_CONTAINER_CPUSET} --name="$DOCKER_CONTAINER_NAME" \ + -e LD_LIBRARY_PATH="/vpp/build-root/build-vpp_debug-native/vpp/lib/x86_64-linux-gnu/" -v $VPP_SOCK_PATH:$VPP_SOCK_PATH \ + -v $WS_ROOT:/vpp ubuntu:22.04 sh -c "/vpp/build-root/build-vpp_debug-native/vpp/bin/vpp unix {interactive \ + nodaemon cli-listen $VPP_SOCK_PATH/cli.sock} cpu {main-core ${MAIN_CORE} ${VPP_CPU_EXTRA_OPTIONS} } plugins \ + { plugin dpdk_plugin.so {disable } }" > /dev/null ) + sleep 3 # wait for VPP to initialize socket + # Change access permissions on vpp cli socket + # docker exec -it "$DOCKER_CONTAINER_NAME" /bin/bash -c "chmod 777 $VPP_SOCK_PATH/cli.sock" > /dev/null + # check if vppctl can connect to vpp container instance + $WS_ROOT/build-root/build-vpp_debug-native/vpp/bin/vppctl -s $VPP_SOCK_PATH/cli.sock show threads 1> /dev/null + # get CPUs vpp instance in container is running on + taskset_vpp_cpus=($( taskset --all-tasks -pc $(pgrep vpp) | grep -e ".$" -o)) + rc=$? + # parse list of user requested CPUs for vpp + requested_cpus=() + parsed_main_core=$(parse_maincore ${MAIN_CORE}) + requested_cpus+=($parsed_main_core) + if [ -n "$CORELIST_WORKERS" ]; + then + requested_cpus+=($(parse_corelist ${CORELIST_WORKERS})) + fi + if [ -n "$WORKERS_AUTO" ]; + then + requested_cpus+=($(parse_workers_n ${WORKERS_AUTO})) + fi + + # parse list of expected CPUs used by vpp + expected_cpu_mapping=() + expected_cpu_mapping=("${requested_cpus[@]}") + echo "CPUs requested by user: [${requested_cpus[@]}]" + echo "--------------------" + echo "Expected CPU Mapping: [${expected_cpu_mapping[@]}]" + echo "VPP pinning (taskset): [${taskset_vpp_cpus[@]}]" + #check if expected CPU mapping matches CPUs vpp instance in container is running on + failure_cond="" + for index in ${!taskset_vpp_cpus[@]}; do + if [ ${taskset_vpp_cpus[$index]} -ne ${expected_cpu_mapping[ $index ]} ] + then + failure_cond="t" + fi + done + if [ $rc -eq 0 ] && [ -z "$failure_cond" ] + then + echo "Test Successful" + TEST_SUCCESS=$(($TEST_SUCCESS+1)) + else + echo "Test Failed" + TEST_FAIL=$(($TEST_FAIL+1)) + fi + echo "==============================================" + echo " " + # Stop & destroy container instance + docker stop $DOCKER_CONTAINER_NAME &> /dev/null + docker rm -f $DOCKER_CONTAINER_NAME &> /dev/null +} +test_invalid_conf () { + if [ -n "$CORELIST_WORKERS" ]; + then + VPP_CPU_EXTRA_OPTIONS=" corelist-workers ${CORELIST_WORKERS}" + fi + if [ -n "$WORKERS_AUTO" ]; + then + VPP_CPU_EXTRA_OPTIONS=" workers ${WORKERS_AUTO}" + fi + if [ -n "$SKIP_CORE" ]; + then + VPP_CPU_EXTRA_OPTIONS="${VPP_CPU_EXTRA_OPTIONS} skip-cores ${SKIP_CORE}" + fi + echo "TEST - conf 'cpu {main-core ${MAIN_CORE} ${VPP_CPU_EXTRA_OPTIONS}}'" + if [ -z "$CONTAINER_RESTRAIN_CPUSET" ]; + then + VPP_CONTAINER_CPUSET="" + echo "(Running vpp in container with full host cpuset $host_cpulist)" + else + VPP_CONTAINER_CPUSET="--cpuset-cpus $CONTAINER_CPU_RANGE" + echo "(Running vpp in container with limited cpuset $CONTAINER_CPU_RANGE)" + fi + (docker run -d --cpuset-cpus $CONTAINER_CPU_RANGE --name="$DOCKER_CONTAINER_NAME" \ + -e LD_LIBRARY_PATH="/vpp/build-root/build-vpp_debug-native/vpp/lib/x86_64-linux-gnu/" -v $VPP_SOCK_PATH:$VPP_SOCK_PATH \ + -v $WS_ROOT:/vpp ubuntu:22.04 sh -c "/vpp/build-root/build-vpp_debug-native/vpp/bin/vpp unix {interactive \ + nodaemon cli-listen $VPP_SOCK_PATH/cli.sock} cpu {main-core ${MAIN_CORE} ${VPP_CPU_EXTRA_OPTIONS}} plugins \ + { plugin dpdk_plugin.so {disable } }" > /dev/null) + sleep 3 # wait for vpp to initialize socket + # check if vpp launched with invalid configuration + taskset --all-tasks -pc $(pgrep vpp) &> /dev/null + rc=$? + if [ $rc -eq 1 ] + then + echo " " + echo "OK... VPP did not launch with invalid configuration" + TEST_SUCCESS=$(($TEST_SUCCESS+1)) + else + echo " " + echo "Failure... VPP launched with wrong configuration" + TEST_FAIL=$(($TEST_FAIL+1)) + fi + echo "==============================================" + echo " " + # Stop & destroy container instance + docker stop $DOCKER_CONTAINER_NAME &> /dev/null + docker rm -f $DOCKER_CONTAINER_NAME &> /dev/null +} +run_tests () { + container_cpus=($(parse_corelist ${CONTAINER_CPU_RANGE})) + echo "TESTING VALID CORE PINNING CONFIGURATIONS" + echo " " + WORKERS_AUTO="" + SKIP_CORE="" + CONTAINER_RESTRAIN_CPUSET="" + CORELIST_WORKERS="1-3" + MAIN_CORE="0" + test_pinning_conf + WORKERS_AUTO="" + SKIP_CORE="" + CONTAINER_RESTRAIN_CPUSET="" + CORELIST_WORKERS="0,2-3" + MAIN_CORE="1" + test_pinning_conf + WORKERS_AUTO="" + SKIP_CORE="" + CONTAINER_RESTRAIN_CPUSET="" + CORELIST_WORKERS="0-2" + MAIN_CORE="3" + test_pinning_conf + WORKERS_AUTO="2" + SKIP_CORE="" + CONTAINER_RESTRAIN_CPUSET="" + CORELIST_WORKERS="" + MAIN_CORE="auto" + test_pinning_conf + WORKERS_AUTO="3" + SKIP_CORE="" + CONTAINER_RESTRAIN_CPUSET="t" + CORELIST_WORKERS="" + MAIN_CORE="auto" + test_pinning_conf + WORKERS_AUTO="2" + SKIP_CORE="1" + CONTAINER_RESTRAIN_CPUSET="t" + CORELIST_WORKERS="" + MAIN_CORE="auto" + test_pinning_conf + WORKERS_AUTO="2" + SKIP_CORE="" + CONTAINER_RESTRAIN_CPUSET="t" + CORELIST_WORKERS="" + MAIN_CORE="5" + test_pinning_conf + echo "TESTING NON-VALID CORE PINNING CONFIGURATIONS" + echo " " + WORKERS_AUTO="" + SKIP_CORE="" + CONTAINER_RESTRAIN_CPUSET="t" + CORELIST_WORKERS="1-3" + MAIN_CORE="0" + test_invalid_conf + WORKERS_AUTO="3" + SKIP_CORE="1" + CONTAINER_RESTRAIN_CPUSET="t" + CORELIST_WORKERS="" + MAIN_CORE="auto" + test_invalid_conf + WORKERS_AUTO="5" + SKIP_CORE="" + CONTAINER_RESTRAIN_CPUSET="t" + CORELIST_WORKERS="" + MAIN_CORE="auto" + test_invalid_conf + WORKERS_AUTO="" + SKIP_CORE="4" + CONTAINER_RESTRAIN_CPUSET="t" + CORELIST_WORKERS="" + MAIN_CORE="auto" + test_invalid_conf + echo " " + echo "========================" + echo "RESULTS:" + echo "SUCCESS: $TEST_SUCCESS" + echo "FAILURE: $TEST_FAIL" + echo "========================" + echo " " +} +run_tests
\ No newline at end of file |