aboutsummaryrefslogtreecommitdiffstats
path: root/test/test/autotest_runner.py
diff options
context:
space:
mode:
Diffstat (limited to 'test/test/autotest_runner.py')
-rw-r--r--test/test/autotest_runner.py519
1 files changed, 276 insertions, 243 deletions
diff --git a/test/test/autotest_runner.py b/test/test/autotest_runner.py
index a692f069..36941a40 100644
--- a/test/test/autotest_runner.py
+++ b/test/test/autotest_runner.py
@@ -3,18 +3,19 @@
# The main logic behind running autotests in parallel
+from __future__ import print_function
import StringIO
import csv
-import multiprocessing
+from multiprocessing import Pool, Queue
import pexpect
import re
import subprocess
import sys
import time
+import glob
+import os
# wait for prompt
-
-
def wait_prompt(child):
try:
child.sendline()
@@ -27,143 +28,150 @@ def wait_prompt(child):
else:
return False
-# run a test group
-# each result tuple in results list consists of:
-# result value (0 or -1)
-# result string
-# test name
-# total test run time (double)
-# raw test log
-# test report (if not available, should be None)
-#
-# this function needs to be outside AutotestRunner class
-# because otherwise Pool won't work (or rather it will require
-# quite a bit of effort to make it work).
+# get all valid NUMA nodes
+def get_numa_nodes():
+ return [
+ int(
+ re.match(r"node(\d+)", os.path.basename(node))
+ .group(1)
+ )
+ for node in glob.glob("/sys/devices/system/node/node*")
+ ]
-def run_test_group(cmdline, test_group):
- results = []
- child = None
+
+# find first (or any, really) CPU on a particular node, will be used to spread
+# processes around NUMA nodes to avoid exhausting memory on particular node
+def first_cpu_on_node(node_nr):
+ cpu_path = glob.glob("/sys/devices/system/node/node%d/cpu*" % node_nr)[0]
+ cpu_name = os.path.basename(cpu_path)
+ m = re.match(r"cpu(\d+)", cpu_name)
+ return int(m.group(1))
+
+
+pool_child = None # per-process child
+
+
+# we initialize each worker with a queue because we need per-pool unique
+# command-line arguments, but we cannot do different arguments in an initializer
+# because the API doesn't allow per-worker initializer arguments. so, instead,
+# we will initialize with a shared queue, and dequeue command-line arguments
+# from this queue
+def pool_init(queue, result_queue):
+ global pool_child
+
+ cmdline, prefix = queue.get()
start_time = time.time()
- startuplog = None
+ name = ("Start %s" % prefix) if prefix != "" else "Start"
+
+ # use default prefix if no prefix was specified
+ prefix_cmdline = "--file-prefix=%s" % prefix if prefix != "" else ""
+
+ # append prefix to cmdline
+ cmdline = "%s %s" % (cmdline, prefix_cmdline)
+
+ # prepare logging of init
+ startuplog = StringIO.StringIO()
# run test app
try:
- # prepare logging of init
- startuplog = StringIO.StringIO()
- print >>startuplog, "\n%s %s\n" % ("=" * 20, test_group["Prefix"])
- print >>startuplog, "\ncmdline=%s" % cmdline
+ print("\n%s %s\n" % ("=" * 20, prefix), file=startuplog)
+ print("\ncmdline=%s" % cmdline, file=startuplog)
- child = pexpect.spawn(cmdline, logfile=startuplog)
+ pool_child = pexpect.spawn(cmdline, logfile=startuplog)
# wait for target to boot
- if not wait_prompt(child):
- child.close()
+ if not wait_prompt(pool_child):
+ pool_child.close()
- results.append((-1,
+ result = tuple((-1,
"Fail [No prompt]",
- "Start %s" % test_group["Prefix"],
+ name,
+ time.time() - start_time,
+ startuplog.getvalue(),
+ None))
+ pool_child = None
+ else:
+ result = tuple((0,
+ "Success",
+ name,
time.time() - start_time,
startuplog.getvalue(),
None))
-
- # mark all tests as failed
- for test in test_group["Tests"]:
- results.append((-1, "Fail [No prompt]", test["Name"],
- time.time() - start_time, "", None))
- # exit test
- return results
-
except:
- results.append((-1,
+ result = tuple((-1,
"Fail [Can't run]",
- "Start %s" % test_group["Prefix"],
+ name,
time.time() - start_time,
startuplog.getvalue(),
None))
+ pool_child = None
- # mark all tests as failed
- for t in test_group["Tests"]:
- results.append((-1, "Fail [Can't run]", t["Name"],
- time.time() - start_time, "", None))
- # exit test
- return results
-
- # startup was successful
- results.append((0, "Success", "Start %s" % test_group["Prefix"],
- time.time() - start_time, startuplog.getvalue(), None))
+ result_queue.put(result)
- # parse the binary for available test commands
- binary = cmdline.split()[0]
- stripped = 'not stripped' not in subprocess.check_output(['file', binary])
- if not stripped:
- symbols = subprocess.check_output(['nm', binary]).decode('utf-8')
- avail_cmds = re.findall('test_register_(\w+)', symbols)
- # run all tests in test group
- for test in test_group["Tests"]:
+# run a test
+# each result tuple in results list consists of:
+# result value (0 or -1)
+# result string
+# test name
+# total test run time (double)
+# raw test log
+# test report (if not available, should be None)
+#
+# this function needs to be outside AutotestRunner class because otherwise Pool
+# won't work (or rather it will require quite a bit of effort to make it work).
+def run_test(target, test):
+ global pool_child
- # create log buffer for each test
- # in multiprocessing environment, the logging would be
- # interleaved and will create a mess, hence the buffering
- logfile = StringIO.StringIO()
- child.logfile = logfile
+ if pool_child is None:
+ return -1, "Fail [No test process]", test["Name"], 0, "", None
- result = ()
+ # create log buffer for each test
+ # in multiprocessing environment, the logging would be
+ # interleaved and will create a mess, hence the buffering
+ logfile = StringIO.StringIO()
+ pool_child.logfile = logfile
- # make a note when the test started
- start_time = time.time()
+ # make a note when the test started
+ start_time = time.time()
- try:
- # print test name to log buffer
- print >>logfile, "\n%s %s\n" % ("-" * 20, test["Name"])
+ try:
+ # print test name to log buffer
+ print("\n%s %s\n" % ("-" * 20, test["Name"]), file=logfile)
- # run test function associated with the test
- if stripped or test["Command"] in avail_cmds:
- result = test["Func"](child, test["Command"])
- else:
- result = (0, "Skipped [Not Available]")
+ # run test function associated with the test
+ result = test["Func"](pool_child, test["Command"])
- # make a note when the test was finished
- end_time = time.time()
+ # make a note when the test was finished
+ end_time = time.time()
- # append test data to the result tuple
- result += (test["Name"], end_time - start_time,
- logfile.getvalue())
+ log = logfile.getvalue()
- # call report function, if any defined, and supply it with
- # target and complete log for test run
- if test["Report"]:
- report = test["Report"](self.target, log)
+ # append test data to the result tuple
+ result += (test["Name"], end_time - start_time, log)
- # append report to results tuple
- result += (report,)
- else:
- # report is None
- result += (None,)
- except:
- # make a note when the test crashed
- end_time = time.time()
-
- # mark test as failed
- result = (-1, "Fail [Crash]", test["Name"],
- end_time - start_time, logfile.getvalue(), None)
- finally:
- # append the results to the results list
- results.append(result)
+ # call report function, if any defined, and supply it with
+ # target and complete log for test run
+ if test["Report"]:
+ report = test["Report"](target, log)
- # regardless of whether test has crashed, try quitting it
- try:
- child.sendline("quit")
- child.close()
- # if the test crashed, just do nothing instead
+ # append report to results tuple
+ result += (report,)
+ else:
+ # report is None
+ result += (None,)
except:
- # nop
- pass
+ # make a note when the test crashed
+ end_time = time.time()
+
+ # mark test as failed
+ result = (-1, "Fail [Crash]", test["Name"],
+ end_time - start_time, logfile.getvalue(), None)
# return test results
- return results
+ return result
# class representing an instance of autotests run
@@ -181,11 +189,17 @@ class AutotestRunner:
blacklist = []
whitelist = []
- def __init__(self, cmdline, target, blacklist, whitelist):
+ def __init__(self, cmdline, target, blacklist, whitelist, n_processes):
self.cmdline = cmdline
self.target = target
+ self.binary = cmdline.split()[0]
self.blacklist = blacklist
self.whitelist = whitelist
+ self.skipped = []
+ self.parallel_tests = []
+ self.non_parallel_tests = []
+ self.n_processes = n_processes
+ self.active_processes = 0
# log file filename
logfile = "%s.log" % target
@@ -199,177 +213,196 @@ class AutotestRunner:
self.csvwriter.writerow(["test_name", "test_result", "result_str"])
# set up cmdline string
- def __get_cmdline(self, test):
- cmdline = self.cmdline
-
- # append memory limitations for each test
- # otherwise tests won't run in parallel
- if "i686" not in self.target:
- cmdline += " --socket-mem=%s" % test["Memory"]
- else:
- # affinitize startup so that tests don't fail on i686
- cmdline = "taskset 1 " + cmdline
- cmdline += " -m " + str(sum(map(int, test["Memory"].split(","))))
-
- # set group prefix for autotest group
- # otherwise they won't run in parallel
- cmdline += " --file-prefix=%s" % test["Prefix"]
+ def __get_cmdline(self, cpu_nr):
+ cmdline = ("taskset -c %i " % cpu_nr) + self.cmdline
return cmdline
- def add_parallel_test_group(self, test_group):
- self.parallel_test_groups.append(test_group)
+ def __process_result(self, result):
- def add_non_parallel_test_group(self, test_group):
- self.non_parallel_test_groups.append(test_group)
+ # unpack result tuple
+ test_result, result_str, test_name, \
+ test_time, log, report = result
- def __process_results(self, results):
- # this iterates over individual test results
- for i, result in enumerate(results):
+ # get total run time
+ cur_time = time.time()
+ total_time = int(cur_time - self.start)
- # increase total number of tests that were run
- # do not include "start" test
- if i > 0:
- self.n_tests += 1
+ # print results, test run time and total time since start
+ result = ("%s:" % test_name).ljust(30)
+ result += result_str.ljust(29)
+ result += "[%02dm %02ds]" % (test_time / 60, test_time % 60)
- # unpack result tuple
- test_result, result_str, test_name, \
- test_time, log, report = result
+ # don't print out total time every line, it's the same anyway
+ print(result + "[%02dm %02ds]" % (total_time / 60, total_time % 60))
- # get total run time
- cur_time = time.time()
- total_time = int(cur_time - self.start)
+ # if test failed and it wasn't a "start" test
+ if test_result < 0:
+ self.fails += 1
- # print results, test run time and total time since start
- result = ("%s:" % test_name).ljust(30)
- result += result_str.ljust(29)
- result += "[%02dm %02ds]" % (test_time / 60, test_time % 60)
+ # collect logs
+ self.log_buffers.append(log)
- # don't print out total time every line, it's the same anyway
- if i == len(results) - 1:
- print(result,
- "[%02dm %02ds]" % (total_time / 60, total_time % 60))
+ # create report if it exists
+ if report:
+ try:
+ f = open("%s_%s_report.rst" %
+ (self.target, test_name), "w")
+ except IOError:
+ print("Report for %s could not be created!" % test_name)
else:
- print(result)
-
- # if test failed and it wasn't a "start" test
- if test_result < 0 and not i == 0:
- self.fails += 1
-
- # collect logs
- self.log_buffers.append(log)
-
- # create report if it exists
- if report:
- try:
- f = open("%s_%s_report.rst" %
- (self.target, test_name), "w")
- except IOError:
- print("Report for %s could not be created!" % test_name)
- else:
- with f:
- f.write(report)
-
- # write test result to CSV file
- if i != 0:
- self.csvwriter.writerow([test_name, test_result, result_str])
+ with f:
+ f.write(report)
+
+ # write test result to CSV file
+ self.csvwriter.writerow([test_name, test_result, result_str])
+
+ # this function checks individual test and decides if this test should be in
+ # the group by comparing it against whitelist/blacklist. it also checks if
+ # the test is compiled into the binary, and marks it as skipped if necessary
+ def __filter_test(self, test):
+ test_cmd = test["Command"]
+ test_id = test_cmd
+
+ # dump tests are specified in full e.g. "Dump_mempool"
+ if "_autotest" in test_id:
+ test_id = test_id[:-len("_autotest")]
+
+ # filter out blacklisted/whitelisted tests
+ if self.blacklist and test_id in self.blacklist:
+ return False
+ if self.whitelist and test_id not in self.whitelist:
+ return False
+
+ # if test wasn't compiled in, remove it as well
+
+ # parse the binary for available test commands
+ stripped = 'not stripped' not in \
+ subprocess.check_output(['file', self.binary])
+ if not stripped:
+ symbols = subprocess.check_output(['nm',
+ self.binary]).decode('utf-8')
+ avail_cmds = re.findall('test_register_(\w+)', symbols)
+
+ if test_cmd not in avail_cmds:
+ # notify user
+ result = 0, "Skipped [Not compiled]", test_id, 0, "", None
+ self.skipped.append(tuple(result))
+ return False
- # this function iterates over test groups and removes each
- # test that is not in whitelist/blacklist
- def __filter_groups(self, test_groups):
- groups_to_remove = []
+ return True
- # filter out tests from parallel test groups
- for i, test_group in enumerate(test_groups):
+ def __run_test_group(self, test_group, worker_cmdlines):
+ group_queue = Queue()
+ init_result_queue = Queue()
+ for proc, cmdline in enumerate(worker_cmdlines):
+ prefix = "test%i" % proc if len(worker_cmdlines) > 1 else ""
+ group_queue.put(tuple((cmdline, prefix)))
- # iterate over a copy so that we could safely delete individual
- # tests
- for test in test_group["Tests"][:]:
- test_id = test["Command"]
+ # create a pool of worker threads
+ # we will initialize child in the initializer, and we don't need to
+ # close the child because when the pool worker gets destroyed, child
+ # closes the process
+ pool = Pool(processes=len(worker_cmdlines),
+ initializer=pool_init,
+ initargs=(group_queue, init_result_queue))
+
+ results = []
- # dump tests are specified in full e.g. "Dump_mempool"
- if "_autotest" in test_id:
- test_id = test_id[:-len("_autotest")]
+ # process all initialization results
+ for _ in range(len(worker_cmdlines)):
+ self.__process_result(init_result_queue.get())
- # filter out blacklisted/whitelisted tests
- if self.blacklist and test_id in self.blacklist:
- test_group["Tests"].remove(test)
- continue
- if self.whitelist and test_id not in self.whitelist:
- test_group["Tests"].remove(test)
+ # run all tests asynchronously
+ for test in test_group:
+ result = pool.apply_async(run_test, (self.target, test))
+ results.append(result)
+
+ # tell the pool to stop all processes once done
+ pool.close()
+
+ # iterate while we have group execution results to get
+ while len(results) > 0:
+ # iterate over a copy to be able to safely delete results
+ # this iterates over a list of group results
+ for async_result in results[:]:
+ # if the thread hasn't finished yet, continue
+ if not async_result.ready():
continue
- # modify or remove original group
- if len(test_group["Tests"]) > 0:
- test_groups[i] = test_group
- else:
- # remember which groups should be deleted
- # put the numbers backwards so that we start
- # deleting from the end, not from the beginning
- groups_to_remove.insert(0, i)
+ res = async_result.get()
- # remove test groups that need to be removed
- for i in groups_to_remove:
- del test_groups[i]
+ self.__process_result(res)
- return test_groups
+ # remove result from results list once we're done with it
+ results.remove(async_result)
# iterate over test groups and run tests associated with them
def run_all_tests(self):
# filter groups
- self.parallel_test_groups = \
- self.__filter_groups(self.parallel_test_groups)
- self.non_parallel_test_groups = \
- self.__filter_groups(self.non_parallel_test_groups)
-
- # create a pool of worker threads
- pool = multiprocessing.Pool(processes=1)
-
- results = []
-
- # whatever happens, try to save as much logs as possible
- try:
-
- # create table header
- print("")
- print("Test name".ljust(30), "Test result".ljust(29),
- "Test".center(9), "Total".center(9))
- print("=" * 80)
-
- # make a note of tests start time
- self.start = time.time()
+ self.parallel_tests = list(
+ filter(self.__filter_test,
+ self.parallel_tests)
+ )
+ self.non_parallel_tests = list(
+ filter(self.__filter_test,
+ self.non_parallel_tests)
+ )
+
+ parallel_cmdlines = []
+ # FreeBSD doesn't have NUMA support
+ numa_nodes = get_numa_nodes()
+ if len(numa_nodes) > 0:
+ for proc in range(self.n_processes):
+ # spread cpu affinity between NUMA nodes to have less chance of
+ # running out of memory while running multiple test apps in
+ # parallel. to do that, alternate between NUMA nodes in a round
+ # robin fashion, and pick an arbitrary CPU from that node to
+ # taskset our execution to
+ numa_node = numa_nodes[self.active_processes % len(numa_nodes)]
+ cpu_nr = first_cpu_on_node(numa_node)
+ parallel_cmdlines += [self.__get_cmdline(cpu_nr)]
+ # increase number of active processes so that the next cmdline
+ # gets a different NUMA node
+ self.active_processes += 1
+ else:
+ parallel_cmdlines = [self.cmdline] * self.n_processes
- # assign worker threads to run test groups
- for test_group in self.parallel_test_groups:
- result = pool.apply_async(run_test_group,
- [self.__get_cmdline(test_group),
- test_group])
- results.append(result)
+ print("Running tests with %d workers" % self.n_processes)
- # iterate while we have group execution results to get
- while len(results) > 0:
+ # create table header
+ print("")
+ print("Test name".ljust(30) + "Test result".ljust(29) +
+ "Test".center(9) + "Total".center(9))
+ print("=" * 80)
- # iterate over a copy to be able to safely delete results
- # this iterates over a list of group results
- for group_result in results[:]:
+ if len(self.skipped):
+ print("Skipped autotests:")
- # if the thread hasn't finished yet, continue
- if not group_result.ready():
- continue
+ # print out any skipped tests
+ for result in self.skipped:
+ # unpack result tuple
+ test_result, result_str, test_name, _, _, _ = result
+ self.csvwriter.writerow([test_name, test_result, result_str])
- res = group_result.get()
+ t = ("%s:" % test_name).ljust(30)
+ t += result_str.ljust(29)
+ t += "[00m 00s]"
- self.__process_results(res)
+ print(t)
- # remove result from results list once we're done with it
- results.remove(group_result)
+ # make a note of tests start time
+ self.start = time.time()
- # run non_parallel tests. they are run one by one, synchronously
- for test_group in self.non_parallel_test_groups:
- group_result = run_test_group(
- self.__get_cmdline(test_group), test_group)
+ # whatever happens, try to save as much logs as possible
+ try:
+ if len(self.parallel_tests) > 0:
+ print("Parallel autotests:")
+ self.__run_test_group(self.parallel_tests, parallel_cmdlines)
- self.__process_results(group_result)
+ if len(self.non_parallel_tests) > 0:
+ print("Non-parallel autotests:")
+ self.__run_test_group(self.non_parallel_tests, [self.cmdline])
# get total run time
cur_time = time.time()