summaryrefslogtreecommitdiffstats
path: root/extras/hs-test/infra/cpu.go
diff options
context:
space:
mode:
authorAdrian Villin <avillin@cisco.com>2024-06-17 08:51:27 +0200
committerDave Wallace <dwallacelf@gmail.com>2024-07-08 16:27:38 +0000
commit5d171ebdc21efa4085e2c2130f595d7e0e1d2f59 (patch)
tree9b1e967d20e8c83f9780d5a76579db0edd3c5373 /extras/hs-test/infra/cpu.go
parent75e8e1e948da182dbf4f6b3394f1b7fc44c1403a (diff)
hs-test: CPU allocation improvements
- Release build runs on numa node0, debug on node1. Using the last digit of a build number to reserve 4 cores per test mmeans we can run 20 jobs (10 release, 10 debug) on the same machine, assuming we have 111 cores available (not counting core 0). Can be increased if needed, there are still some cores left. - Added separate numa aware cpu allocation - Added CPU0=true|false (useful for users with 4c/8t) Type: test Change-Id: Iba8e492a4e01a7f457e49112303887a2a27f6af9 Signed-off-by: Adrian Villin <avillin@cisco.com>
Diffstat (limited to 'extras/hs-test/infra/cpu.go')
-rw-r--r--extras/hs-test/infra/cpu.go195
1 files changed, 152 insertions, 43 deletions
diff --git a/extras/hs-test/infra/cpu.go b/extras/hs-test/infra/cpu.go
index b5555d85b98..b26a06c98db 100644
--- a/extras/hs-test/infra/cpu.go
+++ b/extras/hs-test/infra/cpu.go
@@ -7,6 +7,7 @@ import (
. "github.com/onsi/ginkgo/v2"
"os"
"os/exec"
+ "strconv"
"strings"
)
@@ -18,80 +19,188 @@ type CpuContext struct {
}
type CpuAllocatorT struct {
- cpus []int
+ cpus []int
+ runningInCi bool
+ buildNumber int
+ maxContainerCount int
+}
+
+func iterateAndAppend(start int, end int, slice []int) []int {
+ for i := start; i <= end; i++ {
+ slice = append(slice, i)
+ }
+ return slice
}
var cpuAllocator *CpuAllocatorT = nil
func (c *CpuAllocatorT) Allocate(containerCount int, nCpus int) (*CpuContext, error) {
var cpuCtx CpuContext
+ // indexes, not actual cores
+ var minCpu, maxCpu int
- // splitting cpus into equal parts; this will over-allocate cores but it's good enough for now
- maxContainerCount := 4
- // skip CPU 0
- minCpu := ((GinkgoParallelProcess() - 1) * maxContainerCount * nCpus) + 1
- maxCpu := (GinkgoParallelProcess() * maxContainerCount * nCpus)
+ if c.runningInCi {
+ minCpu = ((c.buildNumber) * c.maxContainerCount * nCpus)
+ maxCpu = ((c.buildNumber + 1) * c.maxContainerCount * nCpus) - 1
+ } else {
+ minCpu = ((GinkgoParallelProcess() - 1) * c.maxContainerCount * nCpus)
+ maxCpu = (GinkgoParallelProcess() * c.maxContainerCount * nCpus) - 1
+ }
if len(c.cpus)-1 < maxCpu {
- err := fmt.Errorf("could not allocate %d CPUs; available: %d; attempted to allocate cores %d-%d",
- nCpus*containerCount, len(c.cpus)-1, minCpu, maxCpu)
+ err := fmt.Errorf("could not allocate %d CPUs; available count: %d; attempted to allocate cores with index %d-%d; max index: %d;\n"+
+ "available cores: %v", nCpus*containerCount, len(c.cpus), minCpu, maxCpu, len(c.cpus)-1, c.cpus)
return nil, err
}
+
if containerCount == 1 {
cpuCtx.cpus = c.cpus[minCpu : minCpu+nCpus]
- } else if containerCount > 1 && containerCount <= maxContainerCount {
+ } else if containerCount > 1 && containerCount <= c.maxContainerCount {
cpuCtx.cpus = c.cpus[minCpu+(nCpus*(containerCount-1)) : minCpu+(nCpus*containerCount)]
} else {
- return nil, fmt.Errorf("too many containers; CPU allocation for >%d containers is not implemented", maxContainerCount)
+ return nil, fmt.Errorf("too many containers; CPU allocation for >%d containers is not implemented", c.maxContainerCount)
}
-
cpuCtx.cpuAllocator = c
return &cpuCtx, nil
}
func (c *CpuAllocatorT) readCpus() error {
- var first, last int
-
- // Path depends on cgroup version. We need to check which version is in use.
- // For that following command can be used: 'stat -fc %T /sys/fs/cgroup/'
- // In case the output states 'cgroup2fs' then cgroups v2 is used, 'tmpfs' in case cgroups v1.
- cmd := exec.Command("stat", "-fc", "%T", "/sys/fs/cgroup/")
- byteOutput, err := cmd.CombinedOutput()
- if err != nil {
- return err
- }
- CpuPath := CgroupPath
- if strings.Contains(string(byteOutput), "tmpfs") {
- CpuPath += "cpuset/cpuset.effective_cpus"
- } else if strings.Contains(string(byteOutput), "cgroup2fs") {
- CpuPath += "cpuset.cpus.effective"
+ var first, second, third, fourth int
+ var file *os.File
+ var err error
+
+ if c.runningInCi {
+ // non-debug build runs on node0, debug on node1
+ if *IsDebugBuild {
+ file, err = os.Open("/sys/devices/system/node/node1/cpulist")
+ } else {
+ file, err = os.Open("/sys/devices/system/node/node0/cpulist")
+ }
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ sc := bufio.NewScanner(file)
+ sc.Scan()
+ line := sc.Text()
+ _, err = fmt.Sscanf(line, "%d-%d,%d-%d", &first, &second, &third, &fourth)
+ if err != nil {
+ return err
+ }
+
+ c.cpus = iterateAndAppend(first, second, c.cpus)
+ c.cpus = iterateAndAppend(third, fourth, c.cpus)
+ } else if NumaAwareCpuAlloc {
+ var fifth, sixth int
+ var tmpCpus []int
+
+ file, err := os.Open("/sys/devices/system/node/online")
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ sc := bufio.NewScanner(file)
+ sc.Scan()
+ line := sc.Text()
+ // get numa node range
+ _, err = fmt.Sscanf(line, "%d-%d", &first, &second)
+ if err != nil {
+ return err
+ }
+
+ for i := first; i <= second; i++ {
+ file, err := os.Open("/sys/devices/system/node/node" + fmt.Sprint(i) + "/cpulist")
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ // get numa node cores
+ sc := bufio.NewScanner(file)
+ sc.Scan()
+ line := sc.Text()
+ _, err = fmt.Sscanf(line, "%d-%d,%d-%d", &third, &fourth, &fifth, &sixth)
+ if err != nil {
+ return err
+ }
+
+ // get numa node cores from first range
+ tmpCpus = iterateAndAppend(third, fourth, tmpCpus)
+
+ // discard cpu 0
+ if tmpCpus[0] == 0 && !*UseCpu0{
+ tmpCpus = tmpCpus[1:]
+ }
+
+ // get numa node cores from second range
+ tmpCpus = iterateAndAppend(fifth, sixth, tmpCpus)
+
+ // make c.cpus divisible by maxContainerCount * nCpus, so we don't have to check which numa will be used
+ // and we can use offsets
+ count_to_remove := len(tmpCpus) % (c.maxContainerCount * *NConfiguredCpus)
+ c.cpus = append(c.cpus, tmpCpus[:len(tmpCpus)-count_to_remove]...)
+ tmpCpus = tmpCpus[:0]
+ }
} else {
- return errors.New("cgroup unknown fs: " + string(byteOutput))
- }
+ // Path depends on cgroup version. We need to check which version is in use.
+ // For that following command can be used: 'stat -fc %T /sys/fs/cgroup/'
+ // In case the output states 'cgroup2fs' then cgroups v2 is used, 'tmpfs' in case cgroups v1.
+ cmd := exec.Command("stat", "-fc", "%T", "/sys/fs/cgroup/")
+ byteOutput, err := cmd.CombinedOutput()
+ if err != nil {
+ return err
+ }
- file, err := os.Open(CpuPath)
- if err != nil {
- return err
- }
- defer file.Close()
-
- sc := bufio.NewScanner(file)
- sc.Scan()
- line := sc.Text()
- _, err = fmt.Sscanf(line, "%d-%d", &first, &last)
- if err != nil {
- return err
+ CpuPath := CgroupPath
+ if strings.Contains(string(byteOutput), "tmpfs") {
+ CpuPath += "cpuset/cpuset.effective_cpus"
+ } else if strings.Contains(string(byteOutput), "cgroup2fs") {
+ CpuPath += "cpuset.cpus.effective"
+ } else {
+ return errors.New("cgroup unknown fs: " + string(byteOutput))
+ }
+
+ file, err := os.Open(CpuPath)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ sc := bufio.NewScanner(file)
+ sc.Scan()
+ line := sc.Text()
+ _, err = fmt.Sscanf(line, "%d-%d", &first, &second)
+ if err != nil {
+ return err
+ }
+ c.cpus = iterateAndAppend(first, second, c.cpus)
}
- for i := first; i <= last; i++ {
- c.cpus = append(c.cpus, i)
+
+ // discard cpu 0
+ if c.cpus[0] == 0 && !*UseCpu0 {
+ c.cpus = c.cpus[1:]
}
return nil
}
func CpuAllocator() (*CpuAllocatorT, error) {
if cpuAllocator == nil {
+ var err error
cpuAllocator = new(CpuAllocatorT)
- err := cpuAllocator.readCpus()
+ cpuAllocator.maxContainerCount = 4
+ buildNumberStr := os.Getenv("BUILD_NUMBER")
+
+ if buildNumberStr != "" {
+ cpuAllocator.runningInCi = true
+ // get last digit of build number
+ cpuAllocator.buildNumber, err = strconv.Atoi(buildNumberStr[len(buildNumberStr)-1:])
+ if err != nil {
+ return nil, err
+ }
+ }
+ err = cpuAllocator.readCpus()
if err != nil {
return nil, err
}