diff options
Diffstat (limited to 'extras/hs-test/infra')
-rw-r--r-- | extras/hs-test/infra/cpu.go | 195 | ||||
-rw-r--r-- | extras/hs-test/infra/hst_suite.go | 7 |
2 files changed, 158 insertions, 44 deletions
diff --git a/extras/hs-test/infra/cpu.go b/extras/hs-test/infra/cpu.go index b5555d85b98..b26a06c98db 100644 --- a/extras/hs-test/infra/cpu.go +++ b/extras/hs-test/infra/cpu.go @@ -7,6 +7,7 @@ import ( . "github.com/onsi/ginkgo/v2" "os" "os/exec" + "strconv" "strings" ) @@ -18,80 +19,188 @@ type CpuContext struct { } type CpuAllocatorT struct { - cpus []int + cpus []int + runningInCi bool + buildNumber int + maxContainerCount int +} + +func iterateAndAppend(start int, end int, slice []int) []int { + for i := start; i <= end; i++ { + slice = append(slice, i) + } + return slice } var cpuAllocator *CpuAllocatorT = nil func (c *CpuAllocatorT) Allocate(containerCount int, nCpus int) (*CpuContext, error) { var cpuCtx CpuContext + // indexes, not actual cores + var minCpu, maxCpu int - // splitting cpus into equal parts; this will over-allocate cores but it's good enough for now - maxContainerCount := 4 - // skip CPU 0 - minCpu := ((GinkgoParallelProcess() - 1) * maxContainerCount * nCpus) + 1 - maxCpu := (GinkgoParallelProcess() * maxContainerCount * nCpus) + if c.runningInCi { + minCpu = ((c.buildNumber) * c.maxContainerCount * nCpus) + maxCpu = ((c.buildNumber + 1) * c.maxContainerCount * nCpus) - 1 + } else { + minCpu = ((GinkgoParallelProcess() - 1) * c.maxContainerCount * nCpus) + maxCpu = (GinkgoParallelProcess() * c.maxContainerCount * nCpus) - 1 + } if len(c.cpus)-1 < maxCpu { - err := fmt.Errorf("could not allocate %d CPUs; available: %d; attempted to allocate cores %d-%d", - nCpus*containerCount, len(c.cpus)-1, minCpu, maxCpu) + err := fmt.Errorf("could not allocate %d CPUs; available count: %d; attempted to allocate cores with index %d-%d; max index: %d;\n"+ + "available cores: %v", nCpus*containerCount, len(c.cpus), minCpu, maxCpu, len(c.cpus)-1, c.cpus) return nil, err } + if containerCount == 1 { cpuCtx.cpus = c.cpus[minCpu : minCpu+nCpus] - } else if containerCount > 1 && containerCount <= maxContainerCount { + } else if containerCount > 1 && containerCount <= c.maxContainerCount { cpuCtx.cpus = c.cpus[minCpu+(nCpus*(containerCount-1)) : minCpu+(nCpus*containerCount)] } else { - return nil, fmt.Errorf("too many containers; CPU allocation for >%d containers is not implemented", maxContainerCount) + return nil, fmt.Errorf("too many containers; CPU allocation for >%d containers is not implemented", c.maxContainerCount) } - cpuCtx.cpuAllocator = c return &cpuCtx, nil } func (c *CpuAllocatorT) readCpus() error { - var first, last int - - // Path depends on cgroup version. We need to check which version is in use. - // For that following command can be used: 'stat -fc %T /sys/fs/cgroup/' - // In case the output states 'cgroup2fs' then cgroups v2 is used, 'tmpfs' in case cgroups v1. - cmd := exec.Command("stat", "-fc", "%T", "/sys/fs/cgroup/") - byteOutput, err := cmd.CombinedOutput() - if err != nil { - return err - } - CpuPath := CgroupPath - if strings.Contains(string(byteOutput), "tmpfs") { - CpuPath += "cpuset/cpuset.effective_cpus" - } else if strings.Contains(string(byteOutput), "cgroup2fs") { - CpuPath += "cpuset.cpus.effective" + var first, second, third, fourth int + var file *os.File + var err error + + if c.runningInCi { + // non-debug build runs on node0, debug on node1 + if *IsDebugBuild { + file, err = os.Open("/sys/devices/system/node/node1/cpulist") + } else { + file, err = os.Open("/sys/devices/system/node/node0/cpulist") + } + if err != nil { + return err + } + defer file.Close() + + sc := bufio.NewScanner(file) + sc.Scan() + line := sc.Text() + _, err = fmt.Sscanf(line, "%d-%d,%d-%d", &first, &second, &third, &fourth) + if err != nil { + return err + } + + c.cpus = iterateAndAppend(first, second, c.cpus) + c.cpus = iterateAndAppend(third, fourth, c.cpus) + } else if NumaAwareCpuAlloc { + var fifth, sixth int + var tmpCpus []int + + file, err := os.Open("/sys/devices/system/node/online") + if err != nil { + return err + } + defer file.Close() + + sc := bufio.NewScanner(file) + sc.Scan() + line := sc.Text() + // get numa node range + _, err = fmt.Sscanf(line, "%d-%d", &first, &second) + if err != nil { + return err + } + + for i := first; i <= second; i++ { + file, err := os.Open("/sys/devices/system/node/node" + fmt.Sprint(i) + "/cpulist") + if err != nil { + return err + } + defer file.Close() + + // get numa node cores + sc := bufio.NewScanner(file) + sc.Scan() + line := sc.Text() + _, err = fmt.Sscanf(line, "%d-%d,%d-%d", &third, &fourth, &fifth, &sixth) + if err != nil { + return err + } + + // get numa node cores from first range + tmpCpus = iterateAndAppend(third, fourth, tmpCpus) + + // discard cpu 0 + if tmpCpus[0] == 0 && !*UseCpu0{ + tmpCpus = tmpCpus[1:] + } + + // get numa node cores from second range + tmpCpus = iterateAndAppend(fifth, sixth, tmpCpus) + + // make c.cpus divisible by maxContainerCount * nCpus, so we don't have to check which numa will be used + // and we can use offsets + count_to_remove := len(tmpCpus) % (c.maxContainerCount * *NConfiguredCpus) + c.cpus = append(c.cpus, tmpCpus[:len(tmpCpus)-count_to_remove]...) + tmpCpus = tmpCpus[:0] + } } else { - return errors.New("cgroup unknown fs: " + string(byteOutput)) - } + // Path depends on cgroup version. We need to check which version is in use. + // For that following command can be used: 'stat -fc %T /sys/fs/cgroup/' + // In case the output states 'cgroup2fs' then cgroups v2 is used, 'tmpfs' in case cgroups v1. + cmd := exec.Command("stat", "-fc", "%T", "/sys/fs/cgroup/") + byteOutput, err := cmd.CombinedOutput() + if err != nil { + return err + } - file, err := os.Open(CpuPath) - if err != nil { - return err - } - defer file.Close() - - sc := bufio.NewScanner(file) - sc.Scan() - line := sc.Text() - _, err = fmt.Sscanf(line, "%d-%d", &first, &last) - if err != nil { - return err + CpuPath := CgroupPath + if strings.Contains(string(byteOutput), "tmpfs") { + CpuPath += "cpuset/cpuset.effective_cpus" + } else if strings.Contains(string(byteOutput), "cgroup2fs") { + CpuPath += "cpuset.cpus.effective" + } else { + return errors.New("cgroup unknown fs: " + string(byteOutput)) + } + + file, err := os.Open(CpuPath) + if err != nil { + return err + } + defer file.Close() + + sc := bufio.NewScanner(file) + sc.Scan() + line := sc.Text() + _, err = fmt.Sscanf(line, "%d-%d", &first, &second) + if err != nil { + return err + } + c.cpus = iterateAndAppend(first, second, c.cpus) } - for i := first; i <= last; i++ { - c.cpus = append(c.cpus, i) + + // discard cpu 0 + if c.cpus[0] == 0 && !*UseCpu0 { + c.cpus = c.cpus[1:] } return nil } func CpuAllocator() (*CpuAllocatorT, error) { if cpuAllocator == nil { + var err error cpuAllocator = new(CpuAllocatorT) - err := cpuAllocator.readCpus() + cpuAllocator.maxContainerCount = 4 + buildNumberStr := os.Getenv("BUILD_NUMBER") + + if buildNumberStr != "" { + cpuAllocator.runningInCi = true + // get last digit of build number + cpuAllocator.buildNumber, err = strconv.Atoi(buildNumberStr[len(buildNumberStr)-1:]) + if err != nil { + return nil, err + } + } + err = cpuAllocator.readCpus() if err != nil { return nil, err } diff --git a/extras/hs-test/infra/hst_suite.go b/extras/hs-test/infra/hst_suite.go index a6ba14676d0..b2e069343a1 100644 --- a/extras/hs-test/infra/hst_suite.go +++ b/extras/hs-test/infra/hst_suite.go @@ -33,6 +33,8 @@ var IsVppDebug = flag.Bool("debug", false, "attach gdb to vpp") var NConfiguredCpus = flag.Int("cpus", 1, "number of CPUs assigned to vpp") var VppSourceFileDir = flag.String("vppsrc", "", "vpp source file directory") var IsDebugBuild = flag.Bool("debug_build", false, "some paths are different with debug build") +var UseCpu0 = flag.Bool("cpu0", false, "use cpu0") +var NumaAwareCpuAlloc bool var SuiteTimeout time.Duration type HstSuite struct { @@ -78,7 +80,10 @@ func (s *HstSuite) SetupSuite() { func (s *HstSuite) AllocateCpus() []int { cpuCtx, err := s.CpuAllocator.Allocate(len(s.StartedContainers), s.CpuPerVpp) - s.AssertNil(err) + // using Fail instead of AssertNil to make error message more readable + if err != nil { + Fail(fmt.Sprint(err)) + } s.AddCpuContext(cpuCtx) return cpuCtx.cpus } |