diff options
-rw-r--r-- | extras/hs-test/Makefile | 12 | ||||
-rw-r--r-- | extras/hs-test/framework_test.go | 6 | ||||
-rw-r--r-- | extras/hs-test/hs_test.sh | 6 | ||||
-rw-r--r-- | extras/hs-test/infra/cpu.go | 195 | ||||
-rw-r--r-- | extras/hs-test/infra/hst_suite.go | 7 |
5 files changed, 179 insertions, 47 deletions
diff --git a/extras/hs-test/Makefile b/extras/hs-test/Makefile index 596acb1b57d..e326e9f500c 100644 --- a/extras/hs-test/Makefile +++ b/extras/hs-test/Makefile @@ -41,6 +41,10 @@ ifeq ($(REPEAT),) REPEAT=0 endif +ifeq ($(CPU0),) +CPU0=false +endif + ifeq ($(VPPSRC),) VPPSRC=$(shell pwd)/../.. endif @@ -81,6 +85,7 @@ help: @echo " VPPSRC=[path-to-vpp-src] - path to vpp source files (for gdb)" @echo " PARALLEL=[n-cpus] - number of test processes to spawn to run in parallel" @echo " REPEAT=[n] - repeat tests up to N times or until a failure occurs" + @echo " CPU0=[true|false] - use cpu0" @echo @echo "List of all tests:" @$(MAKE) list-tests @@ -117,7 +122,7 @@ test: .deps.ok .build.ok @# necessary so gmake won't skip executing the bash script @-bash ./hs_test.sh --persist=$(PERSIST) --verbose=$(VERBOSE) \ --unconfigure=$(UNCONFIGURE) --debug=$(DEBUG) --test=$(TEST) --cpus=$(CPUS) \ - --vppsrc=$(VPPSRC) --parallel=$(PARALLEL) --repeat=$(REPEAT) + --vppsrc=$(VPPSRC) --parallel=$(PARALLEL) --repeat=$(REPEAT) --cpu0=$(CPU0) @bash ./script/compress.sh .PHONY: test-debug @@ -126,14 +131,15 @@ test-debug: .deps.ok .build_debug.ok @# necessary so gmake won't skip executing the bash script @-bash ./hs_test.sh --persist=$(PERSIST) --verbose=$(VERBOSE) \ --unconfigure=$(UNCONFIGURE) --debug=$(DEBUG) --test=$(TEST) --cpus=$(CPUS) \ - --vppsrc=$(VPPSRC) --parallel=$(PARALLEL) --repeat=$(REPEAT) --debug_build=true + --vppsrc=$(VPPSRC) --parallel=$(PARALLEL) --repeat=$(REPEAT) --debug_build=true \ + --cpu0=$(CPU0) @bash ./script/compress.sh .PHONY: test-cov test-cov: .deps.ok .build.cov.ok @-bash ./hs_test.sh --persist=$(PERSIST) --verbose=$(VERBOSE) \ --unconfigure=$(UNCONFIGURE) --debug=$(DEBUG) --test=$(TEST-HS) --cpus=$(CPUS) \ - --vppsrc=$(VPPSRC) + --vppsrc=$(VPPSRC) --cpu0=$(CPU0) @$(MAKE) -C ../.. test-cov-post HS_TEST=1 @bash ./script/compress.sh diff --git a/extras/hs-test/framework_test.go b/extras/hs-test/framework_test.go index a086f75a5fc..7c8c5648d7b 100644 --- a/extras/hs-test/framework_test.go +++ b/extras/hs-test/framework_test.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "runtime" + "strings" "testing" "time" @@ -26,6 +27,11 @@ func TestHst(t *testing.T) { SuiteTimeout = time.Minute * 5 } + output, err := os.ReadFile("/sys/devices/system/node/online") + fmt.Println(string(output)) + if err == nil && strings.Contains(string(output), "-") { + NumaAwareCpuAlloc = true + } // creates a file with PPID, used for 'make cleanup-hst' ppid := fmt.Sprint(os.Getppid()) ppid = ppid[:len(ppid)-1] diff --git a/extras/hs-test/hs_test.sh b/extras/hs-test/hs_test.sh index 107fc686176..803b8f717da 100644 --- a/extras/hs-test/hs_test.sh +++ b/extras/hs-test/hs_test.sh @@ -68,6 +68,12 @@ case "${i}" in --repeat=*) ginkgo_args="$ginkgo_args --repeat=${i#*=}" ;; + --cpu0=*) + cpu0="${i#*=}" + if [ "$cpu0" = "true" ]; then + args="$args -cpu0" + fi + ;; esac done diff --git a/extras/hs-test/infra/cpu.go b/extras/hs-test/infra/cpu.go index b5555d85b98..b26a06c98db 100644 --- a/extras/hs-test/infra/cpu.go +++ b/extras/hs-test/infra/cpu.go @@ -7,6 +7,7 @@ import ( . "github.com/onsi/ginkgo/v2" "os" "os/exec" + "strconv" "strings" ) @@ -18,80 +19,188 @@ type CpuContext struct { } type CpuAllocatorT struct { - cpus []int + cpus []int + runningInCi bool + buildNumber int + maxContainerCount int +} + +func iterateAndAppend(start int, end int, slice []int) []int { + for i := start; i <= end; i++ { + slice = append(slice, i) + } + return slice } var cpuAllocator *CpuAllocatorT = nil func (c *CpuAllocatorT) Allocate(containerCount int, nCpus int) (*CpuContext, error) { var cpuCtx CpuContext + // indexes, not actual cores + var minCpu, maxCpu int - // splitting cpus into equal parts; this will over-allocate cores but it's good enough for now - maxContainerCount := 4 - // skip CPU 0 - minCpu := ((GinkgoParallelProcess() - 1) * maxContainerCount * nCpus) + 1 - maxCpu := (GinkgoParallelProcess() * maxContainerCount * nCpus) + if c.runningInCi { + minCpu = ((c.buildNumber) * c.maxContainerCount * nCpus) + maxCpu = ((c.buildNumber + 1) * c.maxContainerCount * nCpus) - 1 + } else { + minCpu = ((GinkgoParallelProcess() - 1) * c.maxContainerCount * nCpus) + maxCpu = (GinkgoParallelProcess() * c.maxContainerCount * nCpus) - 1 + } if len(c.cpus)-1 < maxCpu { - err := fmt.Errorf("could not allocate %d CPUs; available: %d; attempted to allocate cores %d-%d", - nCpus*containerCount, len(c.cpus)-1, minCpu, maxCpu) + err := fmt.Errorf("could not allocate %d CPUs; available count: %d; attempted to allocate cores with index %d-%d; max index: %d;\n"+ + "available cores: %v", nCpus*containerCount, len(c.cpus), minCpu, maxCpu, len(c.cpus)-1, c.cpus) return nil, err } + if containerCount == 1 { cpuCtx.cpus = c.cpus[minCpu : minCpu+nCpus] - } else if containerCount > 1 && containerCount <= maxContainerCount { + } else if containerCount > 1 && containerCount <= c.maxContainerCount { cpuCtx.cpus = c.cpus[minCpu+(nCpus*(containerCount-1)) : minCpu+(nCpus*containerCount)] } else { - return nil, fmt.Errorf("too many containers; CPU allocation for >%d containers is not implemented", maxContainerCount) + return nil, fmt.Errorf("too many containers; CPU allocation for >%d containers is not implemented", c.maxContainerCount) } - cpuCtx.cpuAllocator = c return &cpuCtx, nil } func (c *CpuAllocatorT) readCpus() error { - var first, last int - - // Path depends on cgroup version. We need to check which version is in use. - // For that following command can be used: 'stat -fc %T /sys/fs/cgroup/' - // In case the output states 'cgroup2fs' then cgroups v2 is used, 'tmpfs' in case cgroups v1. - cmd := exec.Command("stat", "-fc", "%T", "/sys/fs/cgroup/") - byteOutput, err := cmd.CombinedOutput() - if err != nil { - return err - } - CpuPath := CgroupPath - if strings.Contains(string(byteOutput), "tmpfs") { - CpuPath += "cpuset/cpuset.effective_cpus" - } else if strings.Contains(string(byteOutput), "cgroup2fs") { - CpuPath += "cpuset.cpus.effective" + var first, second, third, fourth int + var file *os.File + var err error + + if c.runningInCi { + // non-debug build runs on node0, debug on node1 + if *IsDebugBuild { + file, err = os.Open("/sys/devices/system/node/node1/cpulist") + } else { + file, err = os.Open("/sys/devices/system/node/node0/cpulist") + } + if err != nil { + return err + } + defer file.Close() + + sc := bufio.NewScanner(file) + sc.Scan() + line := sc.Text() + _, err = fmt.Sscanf(line, "%d-%d,%d-%d", &first, &second, &third, &fourth) + if err != nil { + return err + } + + c.cpus = iterateAndAppend(first, second, c.cpus) + c.cpus = iterateAndAppend(third, fourth, c.cpus) + } else if NumaAwareCpuAlloc { + var fifth, sixth int + var tmpCpus []int + + file, err := os.Open("/sys/devices/system/node/online") + if err != nil { + return err + } + defer file.Close() + + sc := bufio.NewScanner(file) + sc.Scan() + line := sc.Text() + // get numa node range + _, err = fmt.Sscanf(line, "%d-%d", &first, &second) + if err != nil { + return err + } + + for i := first; i <= second; i++ { + file, err := os.Open("/sys/devices/system/node/node" + fmt.Sprint(i) + "/cpulist") + if err != nil { + return err + } + defer file.Close() + + // get numa node cores + sc := bufio.NewScanner(file) + sc.Scan() + line := sc.Text() + _, err = fmt.Sscanf(line, "%d-%d,%d-%d", &third, &fourth, &fifth, &sixth) + if err != nil { + return err + } + + // get numa node cores from first range + tmpCpus = iterateAndAppend(third, fourth, tmpCpus) + + // discard cpu 0 + if tmpCpus[0] == 0 && !*UseCpu0{ + tmpCpus = tmpCpus[1:] + } + + // get numa node cores from second range + tmpCpus = iterateAndAppend(fifth, sixth, tmpCpus) + + // make c.cpus divisible by maxContainerCount * nCpus, so we don't have to check which numa will be used + // and we can use offsets + count_to_remove := len(tmpCpus) % (c.maxContainerCount * *NConfiguredCpus) + c.cpus = append(c.cpus, tmpCpus[:len(tmpCpus)-count_to_remove]...) + tmpCpus = tmpCpus[:0] + } } else { - return errors.New("cgroup unknown fs: " + string(byteOutput)) - } + // Path depends on cgroup version. We need to check which version is in use. + // For that following command can be used: 'stat -fc %T /sys/fs/cgroup/' + // In case the output states 'cgroup2fs' then cgroups v2 is used, 'tmpfs' in case cgroups v1. + cmd := exec.Command("stat", "-fc", "%T", "/sys/fs/cgroup/") + byteOutput, err := cmd.CombinedOutput() + if err != nil { + return err + } - file, err := os.Open(CpuPath) - if err != nil { - return err - } - defer file.Close() - - sc := bufio.NewScanner(file) - sc.Scan() - line := sc.Text() - _, err = fmt.Sscanf(line, "%d-%d", &first, &last) - if err != nil { - return err + CpuPath := CgroupPath + if strings.Contains(string(byteOutput), "tmpfs") { + CpuPath += "cpuset/cpuset.effective_cpus" + } else if strings.Contains(string(byteOutput), "cgroup2fs") { + CpuPath += "cpuset.cpus.effective" + } else { + return errors.New("cgroup unknown fs: " + string(byteOutput)) + } + + file, err := os.Open(CpuPath) + if err != nil { + return err + } + defer file.Close() + + sc := bufio.NewScanner(file) + sc.Scan() + line := sc.Text() + _, err = fmt.Sscanf(line, "%d-%d", &first, &second) + if err != nil { + return err + } + c.cpus = iterateAndAppend(first, second, c.cpus) } - for i := first; i <= last; i++ { - c.cpus = append(c.cpus, i) + + // discard cpu 0 + if c.cpus[0] == 0 && !*UseCpu0 { + c.cpus = c.cpus[1:] } return nil } func CpuAllocator() (*CpuAllocatorT, error) { if cpuAllocator == nil { + var err error cpuAllocator = new(CpuAllocatorT) - err := cpuAllocator.readCpus() + cpuAllocator.maxContainerCount = 4 + buildNumberStr := os.Getenv("BUILD_NUMBER") + + if buildNumberStr != "" { + cpuAllocator.runningInCi = true + // get last digit of build number + cpuAllocator.buildNumber, err = strconv.Atoi(buildNumberStr[len(buildNumberStr)-1:]) + if err != nil { + return nil, err + } + } + err = cpuAllocator.readCpus() if err != nil { return nil, err } diff --git a/extras/hs-test/infra/hst_suite.go b/extras/hs-test/infra/hst_suite.go index a6ba14676d0..b2e069343a1 100644 --- a/extras/hs-test/infra/hst_suite.go +++ b/extras/hs-test/infra/hst_suite.go @@ -33,6 +33,8 @@ var IsVppDebug = flag.Bool("debug", false, "attach gdb to vpp") var NConfiguredCpus = flag.Int("cpus", 1, "number of CPUs assigned to vpp") var VppSourceFileDir = flag.String("vppsrc", "", "vpp source file directory") var IsDebugBuild = flag.Bool("debug_build", false, "some paths are different with debug build") +var UseCpu0 = flag.Bool("cpu0", false, "use cpu0") +var NumaAwareCpuAlloc bool var SuiteTimeout time.Duration type HstSuite struct { @@ -78,7 +80,10 @@ func (s *HstSuite) SetupSuite() { func (s *HstSuite) AllocateCpus() []int { cpuCtx, err := s.CpuAllocator.Allocate(len(s.StartedContainers), s.CpuPerVpp) - s.AssertNil(err) + // using Fail instead of AssertNil to make error message more readable + if err != nil { + Fail(fmt.Sprint(err)) + } s.AddCpuContext(cpuCtx) return cpuCtx.cpus } |