1 files changed, 276 insertions, 243 deletions
diff --git a/test/test/autotest_runner.py b/test/test/autotest_runner.py
index a692f069..36941a40 100644
--- a/test/test/autotest_runner.py
+++ b/test/test/autotest_runner.py
@@ -3,18 +3,19 @@
 
 # The main logic behind running autotests in parallel
 
+from __future__ import print_function
 import StringIO
 import csv
-import multiprocessing
+from multiprocessing import Pool, Queue
 import pexpect
 import re
 import subprocess
 import sys
 import time
+import glob
+import os
 
 # wait for prompt
-
-
 def wait_prompt(child):
     try:
         child.sendline()
@@ -27,143 +28,150 @@ def wait_prompt(child):
     else:
         return False
 
-# run a test group
-# each result tuple in results list consists of:
-#   result value (0 or -1)
-#   result string
-#   test name
-#   total test run time (double)
-#   raw test log
-#   test report (if not available, should be None)
-#
-# this function needs to be outside AutotestRunner class
-# because otherwise Pool won't work (or rather it will require
-# quite a bit of effort to make it work).
 
+# get all valid NUMA nodes
+def get_numa_nodes():
+    return [
+        int(
+            re.match(r"node(\d+)", os.path.basename(node))
+            .group(1)
+        )
+        for node in glob.glob("/sys/devices/system/node/node*")
+    ]
 
-def run_test_group(cmdline, test_group):
-    results = []
-    child = None
+
+# find first (or any, really) CPU on a particular node, will be used to spread
+# processes around NUMA nodes to avoid exhausting memory on particular node
+def first_cpu_on_node(node_nr):
+    cpu_path = glob.glob("/sys/devices/system/node/node%d/cpu*" % node_nr)[0]
+    cpu_name = os.path.basename(cpu_path)
+    m = re.match(r"cpu(\d+)", cpu_name)
+    return int(m.group(1))
+
+
+pool_child = None  # per-process child
+
+
+# we initialize each worker with a queue because we need per-pool unique
+# command-line arguments, but we cannot do different arguments in an initializer
+# because the API doesn't allow per-worker initializer arguments. so, instead,
+# we will initialize with a shared queue, and dequeue command-line arguments
+# from this queue
+def pool_init(queue, result_queue):
+    global pool_child
+
+    cmdline, prefix = queue.get()
     start_time = time.time()
-    startuplog = None
+    name = ("Start %s" % prefix) if prefix != "" else "Start"
+
+    # use default prefix if no prefix was specified
+    prefix_cmdline = "--file-prefix=%s" % prefix if prefix != "" else ""
+
+    # append prefix to cmdline
+    cmdline = "%s %s" % (cmdline, prefix_cmdline)
+
+    # prepare logging of init
+    startuplog = StringIO.StringIO()
 
     # run test app
     try:
-        # prepare logging of init
-        startuplog = StringIO.StringIO()
 
-        print >>startuplog, "\n%s %s\n" % ("=" * 20, test_group["Prefix"])
-        print >>startuplog, "\ncmdline=%s" % cmdline
+        print("\n%s %s\n" % ("=" * 20, prefix), file=startuplog)
+        print("\ncmdline=%s" % cmdline, file=startuplog)
 
-        child = pexpect.spawn(cmdline, logfile=startuplog)
+        pool_child = pexpect.spawn(cmdline, logfile=startuplog)
 
         # wait for target to boot
-        if not wait_prompt(child):
-            child.close()
+        if not wait_prompt(pool_child):
+            pool_child.close()
 
-            results.append((-1,
+            result = tuple((-1,
                             "Fail [No prompt]",
-                            "Start %s" % test_group["Prefix"],
+                            name,
+                            time.time() - start_time,
+                            startuplog.getvalue(),
+                            None))
+            pool_child = None
+        else:
+            result = tuple((0,
+                            "Success",
+                            name,
                             time.time() - start_time,
                             startuplog.getvalue(),
                             None))
-
-            # mark all tests as failed
-            for test in test_group["Tests"]:
-                results.append((-1, "Fail [No prompt]", test["Name"],
-                                time.time() - start_time, "", None))
-            # exit test
-            return results
-
     except:
-        results.append((-1,
+        result = tuple((-1,
                         "Fail [Can't run]",
-                        "Start %s" % test_group["Prefix"],
+                        name,
                         time.time() - start_time,
                         startuplog.getvalue(),
                         None))
+        pool_child = None
 
-        # mark all tests as failed
-        for t in test_group["Tests"]:
-            results.append((-1, "Fail [Can't run]", t["Name"],
-                            time.time() - start_time, "", None))
-        # exit test
-        return results
-
-    # startup was successful
-    results.append((0, "Success", "Start %s" % test_group["Prefix"],
-                    time.time() - start_time, startuplog.getvalue(), None))
+    result_queue.put(result)
 
-    # parse the binary for available test commands
-    binary = cmdline.split()[0]
-    stripped = 'not stripped' not in subprocess.check_output(['file', binary])
-    if not stripped:
-        symbols = subprocess.check_output(['nm', binary]).decode('utf-8')
-        avail_cmds = re.findall('test_register_(\w+)', symbols)
 
-    # run all tests in test group
-    for test in test_group["Tests"]:
+# run a test
+# each result tuple in results list consists of:
+#   result value (0 or -1)
+#   result string
+#   test name
+#   total test run time (double)
+#   raw test log
+#   test report (if not available, should be None)
+#
+# this function needs to be outside AutotestRunner class because otherwise Pool
+# won't work (or rather it will require quite a bit of effort to make it work).
+def run_test(target, test):
+    global pool_child
 
-        # create log buffer for each test
-        # in multiprocessing environment, the logging would be
-        # interleaved and will create a mess, hence the buffering
-        logfile = StringIO.StringIO()
-        child.logfile = logfile
+    if pool_child is None:
+        return -1, "Fail [No test process]", test["Name"], 0, "", None
 
-        result = ()
+    # create log buffer for each test
+    # in multiprocessing environment, the logging would be
+    # interleaved and will create a mess, hence the buffering
+    logfile = StringIO.StringIO()
+    pool_child.logfile = logfile
 
-        # make a note when the test started
-        start_time = time.time()
+    # make a note when the test started
+    start_time = time.time()
 
-        try:
-            # print test name to log buffer
-            print >>logfile, "\n%s %s\n" % ("-" * 20, test["Name"])
+    try:
+        # print test name to log buffer
+        print("\n%s %s\n" % ("-" * 20, test["Name"]), file=logfile)
 
-            # run test function associated with the test
-            if stripped or test["Command"] in avail_cmds:
-                result = test["Func"](child, test["Command"])
-            else:
-                result = (0, "Skipped [Not Available]")
+        # run test function associated with the test
+        result = test["Func"](pool_child, test["Command"])
 
-            # make a note when the test was finished
-            end_time = time.time()
+        # make a note when the test was finished
+        end_time = time.time()
 
-            # append test data to the result tuple
-            result += (test["Name"], end_time - start_time,
-                       logfile.getvalue())
+        log = logfile.getvalue()
 
-            # call report function, if any defined, and supply it with
-            # target and complete log for test run
-            if test["Report"]:
-                report = test["Report"](self.target, log)
+        # append test data to the result tuple
+        result += (test["Name"], end_time - start_time, log)
 
-                # append report to results tuple
-                result += (report,)
-            else:
-                # report is None
-                result += (None,)
-        except:
-            # make a note when the test crashed
-            end_time = time.time()
-
-            # mark test as failed
-            result = (-1, "Fail [Crash]", test["Name"],
-                      end_time - start_time, logfile.getvalue(), None)
-        finally:
-            # append the results to the results list
-            results.append(result)
+        # call report function, if any defined, and supply it with
+        # target and complete log for test run
+        if test["Report"]:
+            report = test["Report"](target, log)
 
-    # regardless of whether test has crashed, try quitting it
-    try:
-        child.sendline("quit")
-        child.close()
-    # if the test crashed, just do nothing instead
+            # append report to results tuple
+            result += (report,)
+        else:
+            # report is None
+            result += (None,)
     except:
-        # nop
-        pass
+        # make a note when the test crashed
+        end_time = time.time()
+
+        # mark test as failed
+        result = (-1, "Fail [Crash]", test["Name"],
+                  end_time - start_time, logfile.getvalue(), None)
 
     # return test results
-    return results
+    return result
 
 
 # class representing an instance of autotests run
@@ -181,11 +189,17 @@ class AutotestRunner:
     blacklist = []
     whitelist = []
 
-    def __init__(self, cmdline, target, blacklist, whitelist):
+    def __init__(self, cmdline, target, blacklist, whitelist, n_processes):
         self.cmdline = cmdline
         self.target = target
+        self.binary = cmdline.split()[0]
         self.blacklist = blacklist
         self.whitelist = whitelist
+        self.skipped = []
+        self.parallel_tests = []
+        self.non_parallel_tests = []
+        self.n_processes = n_processes
+        self.active_processes = 0
 
         # log file filename
         logfile = "%s.log" % target
@@ -199,177 +213,196 @@ class AutotestRunner:
         self.csvwriter.writerow(["test_name", "test_result", "result_str"])
 
     # set up cmdline string
-    def __get_cmdline(self, test):
-        cmdline = self.cmdline
-
-        # append memory limitations for each test
-        # otherwise tests won't run in parallel
-        if "i686" not in self.target:
-            cmdline += " --socket-mem=%s" % test["Memory"]
-        else:
-            # affinitize startup so that tests don't fail on i686
-            cmdline = "taskset 1 " + cmdline
-            cmdline += " -m " + str(sum(map(int, test["Memory"].split(","))))
-
-        # set group prefix for autotest group
-        # otherwise they won't run in parallel
-        cmdline += " --file-prefix=%s" % test["Prefix"]
+    def __get_cmdline(self, cpu_nr):
+        cmdline = ("taskset -c %i " % cpu_nr) + self.cmdline
 
         return cmdline
 
-    def add_parallel_test_group(self, test_group):
-        self.parallel_test_groups.append(test_group)
+    def __process_result(self, result):
 
-    def add_non_parallel_test_group(self, test_group):
-        self.non_parallel_test_groups.append(test_group)
+        # unpack result tuple
+        test_result, result_str, test_name, \
+            test_time, log, report = result
 
-    def __process_results(self, results):
-        # this iterates over individual test results
-        for i, result in enumerate(results):
+        # get total run time
+        cur_time = time.time()
+        total_time = int(cur_time - self.start)
 
-            # increase total number of tests that were run
-            # do not include "start" test
-            if i > 0:
-                self.n_tests += 1
+        # print results, test run time and total time since start
+        result = ("%s:" % test_name).ljust(30)
+        result += result_str.ljust(29)
+        result += "[%02dm %02ds]" % (test_time / 60, test_time % 60)
 
-            # unpack result tuple
-            test_result, result_str, test_name, \
-                test_time, log, report = result
+        # don't print out total time every line, it's the same anyway
+        print(result + "[%02dm %02ds]" % (total_time / 60, total_time % 60))
 
-            # get total run time
-            cur_time = time.time()
-            total_time = int(cur_time - self.start)
+        # if test failed and it wasn't a "start" test
+        if test_result < 0:
+            self.fails += 1
 
-            # print results, test run time and total time since start
-            result = ("%s:" % test_name).ljust(30)
-            result += result_str.ljust(29)
-            result += "[%02dm %02ds]" % (test_time / 60, test_time % 60)
+        # collect logs
+        self.log_buffers.append(log)
 
-            # don't print out total time every line, it's the same anyway
-            if i == len(results) - 1:
-                print(result,
-                      "[%02dm %02ds]" % (total_time / 60, total_time % 60))
+        # create report if it exists
+        if report:
+            try:
+                f = open("%s_%s_report.rst" %
+                         (self.target, test_name), "w")
+            except IOError:
+                print("Report for %s could not be created!" % test_name)
             else:
-                print(result)
-
-            # if test failed and it wasn't a "start" test
-            if test_result < 0 and not i == 0:
-                self.fails += 1
-
-            # collect logs
-            self.log_buffers.append(log)
-
-            # create report if it exists
-            if report:
-                try:
-                    f = open("%s_%s_report.rst" %
-                             (self.target, test_name), "w")
-                except IOError:
-                    print("Report for %s could not be created!" % test_name)
-                else:
-                    with f:
-                        f.write(report)
-
-            # write test result to CSV file
-            if i != 0:
-                self.csvwriter.writerow([test_name, test_result, result_str])
+                with f:
+                    f.write(report)
+
+        # write test result to CSV file
+        self.csvwriter.writerow([test_name, test_result, result_str])
+
+    # this function checks individual test and decides if this test should be in
+    # the group by comparing it against  whitelist/blacklist. it also checks if
+    # the test is compiled into the binary, and marks it as skipped if necessary
+    def __filter_test(self, test):
+        test_cmd = test["Command"]
+        test_id = test_cmd
+
+        # dump tests are specified in full e.g. "Dump_mempool"
+        if "_autotest" in test_id:
+            test_id = test_id[:-len("_autotest")]
+
+        # filter out blacklisted/whitelisted tests
+        if self.blacklist and test_id in self.blacklist:
+            return False
+        if self.whitelist and test_id not in self.whitelist:
+            return False
+
+        # if test wasn't compiled in, remove it as well
+
+        # parse the binary for available test commands
+        stripped = 'not stripped' not in \
+                   subprocess.check_output(['file', self.binary])
+        if not stripped:
+            symbols = subprocess.check_output(['nm',
+                                               self.binary]).decode('utf-8')
+            avail_cmds = re.findall('test_register_(\w+)', symbols)
+
+            if test_cmd not in avail_cmds:
+                # notify user
+                result = 0, "Skipped [Not compiled]", test_id, 0, "", None
+                self.skipped.append(tuple(result))
+                return False
 
-    # this function iterates over test groups and removes each
-    # test that is not in whitelist/blacklist
-    def __filter_groups(self, test_groups):
-        groups_to_remove = []
+        return True
 
-        # filter out tests from parallel test groups
-        for i, test_group in enumerate(test_groups):
+    def __run_test_group(self, test_group, worker_cmdlines):
+        group_queue = Queue()
+        init_result_queue = Queue()
+        for proc, cmdline in enumerate(worker_cmdlines):
+            prefix = "test%i" % proc if len(worker_cmdlines) > 1 else ""
+            group_queue.put(tuple((cmdline, prefix)))
 
-            # iterate over a copy so that we could safely delete individual
-            # tests
-            for test in test_group["Tests"][:]:
-                test_id = test["Command"]
+        # create a pool of worker threads
+        # we will initialize child in the initializer, and we don't need to
+        # close the child because when the pool worker gets destroyed, child
+        # closes the process
+        pool = Pool(processes=len(worker_cmdlines),
+                    initializer=pool_init,
+                    initargs=(group_queue, init_result_queue))
+
+        results = []
 
-                # dump tests are specified in full e.g. "Dump_mempool"
-                if "_autotest" in test_id:
-                    test_id = test_id[:-len("_autotest")]
+        # process all initialization results
+        for _ in range(len(worker_cmdlines)):
+            self.__process_result(init_result_queue.get())
 
-                # filter out blacklisted/whitelisted tests
-                if self.blacklist and test_id in self.blacklist:
-                    test_group["Tests"].remove(test)
-                    continue
-                if self.whitelist and test_id not in self.whitelist:
-                    test_group["Tests"].remove(test)
+        # run all tests asynchronously
+        for test in test_group:
+            result = pool.apply_async(run_test, (self.target, test))
+            results.append(result)
+
+        # tell the pool to stop all processes once done
+        pool.close()
+
+        # iterate while we have group execution results to get
+        while len(results) > 0:
+            # iterate over a copy to be able to safely delete results
+            # this iterates over a list of group results
+            for async_result in results[:]:
+                # if the thread hasn't finished yet, continue
+                if not async_result.ready():
                     continue
 
-            # modify or remove original group
-            if len(test_group["Tests"]) > 0:
-                test_groups[i] = test_group
-            else:
-                # remember which groups should be deleted
-                # put the numbers backwards so that we start
-                # deleting from the end, not from the beginning
-                groups_to_remove.insert(0, i)
+                res = async_result.get()
 
-        # remove test groups that need to be removed
-        for i in groups_to_remove:
-            del test_groups[i]
+                self.__process_result(res)
 
-        return test_groups
+                # remove result from results list once we're done with it
+                results.remove(async_result)
 
     # iterate over test groups and run tests associated with them
     def run_all_tests(self):
         # filter groups
-        self.parallel_test_groups = \
-            self.__filter_groups(self.parallel_test_groups)
-        self.non_parallel_test_groups = \
-            self.__filter_groups(self.non_parallel_test_groups)
-
-        # create a pool of worker threads
-        pool = multiprocessing.Pool(processes=1)
-
-        results = []
-
-        # whatever happens, try to save as much logs as possible
-        try:
-
-            # create table header
-            print("")
-            print("Test name".ljust(30), "Test result".ljust(29),
-                  "Test".center(9), "Total".center(9))
-            print("=" * 80)
-
-            # make a note of tests start time
-            self.start = time.time()
+        self.parallel_tests = list(
+            filter(self.__filter_test,
+                   self.parallel_tests)
+        )
+        self.non_parallel_tests = list(
+            filter(self.__filter_test,
+                   self.non_parallel_tests)
+        )
+
+        parallel_cmdlines = []
+        # FreeBSD doesn't have NUMA support
+        numa_nodes = get_numa_nodes()
+        if len(numa_nodes) > 0:
+            for proc in range(self.n_processes):
+                # spread cpu affinity between NUMA nodes to have less chance of
+                # running out of memory while running multiple test apps in
+                # parallel. to do that, alternate between NUMA nodes in a round
+                # robin fashion, and pick an arbitrary CPU from that node to
+                # taskset our execution to
+                numa_node = numa_nodes[self.active_processes % len(numa_nodes)]
+                cpu_nr = first_cpu_on_node(numa_node)
+                parallel_cmdlines += [self.__get_cmdline(cpu_nr)]
+                # increase number of active processes so that the next cmdline
+                # gets a different NUMA node
+                self.active_processes += 1
+        else:
+            parallel_cmdlines = [self.cmdline] * self.n_processes
 
-            # assign worker threads to run test groups
-            for test_group in self.parallel_test_groups:
-                result = pool.apply_async(run_test_group,
-                                          [self.__get_cmdline(test_group),
-                                           test_group])
-                results.append(result)
+        print("Running tests with %d workers" % self.n_processes)
 
-            # iterate while we have group execution results to get
-            while len(results) > 0:
+        # create table header
+        print("")
+        print("Test name".ljust(30) + "Test result".ljust(29) +
+              "Test".center(9) + "Total".center(9))
+        print("=" * 80)
 
-                # iterate over a copy to be able to safely delete results
-                # this iterates over a list of group results
-                for group_result in results[:]:
+        if len(self.skipped):
+            print("Skipped autotests:")
 
-                    # if the thread hasn't finished yet, continue
-                    if not group_result.ready():
-                        continue
+            # print out any skipped tests
+            for result in self.skipped:
+                # unpack result tuple
+                test_result, result_str, test_name, _, _, _ = result
+                self.csvwriter.writerow([test_name, test_result, result_str])
 
-                    res = group_result.get()
+                t = ("%s:" % test_name).ljust(30)
+                t += result_str.ljust(29)
+                t += "[00m 00s]"
 
-                    self.__process_results(res)
+                print(t)
 
-                    # remove result from results list once we're done with it
-                    results.remove(group_result)
+        # make a note of tests start time
+        self.start = time.time()
 
-            # run non_parallel tests. they are run one by one, synchronously
-            for test_group in self.non_parallel_test_groups:
-                group_result = run_test_group(
-                    self.__get_cmdline(test_group), test_group)
+        # whatever happens, try to save as much logs as possible
+        try:
+            if len(self.parallel_tests) > 0:
+                print("Parallel autotests:")
+                self.__run_test_group(self.parallel_tests, parallel_cmdlines)
 
-                self.__process_results(group_result)
+            if len(self.non_parallel_tests) > 0:
+                print("Non-parallel autotests:")
+                self.__run_test_group(self.non_parallel_tests, [self.cmdline])
 
             # get total run time
             cur_time = time.time()