diff options
author | Vratko Polak <vrpolak@cisco.com> | 2023-09-08 17:22:20 +0200 |
---|---|---|
committer | Vratko Polak <vrpolak@cisco.com> | 2023-09-08 17:22:20 +0200 |
commit | d8ec3f8673346c0dc93e567159771f24c1bf74fc (patch) | |
tree | fcd41208a2844af34e9ac1308574342c189b854f /resources | |
parent | 312cdcea80a8234a4a59b0dc8fc6d60a0c4ca73d (diff) |
feat(perpatch): parse results from json
+ Use test names in output.
- Methodology updated in subsequent change.
Change-Id: I6a62f87249ea79262778f68d00f9bb81134f0b02
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
Diffstat (limited to 'resources')
-rw-r--r-- | resources/libraries/bash/entry/per_patch_perf.sh | 4 | ||||
-rw-r--r-- | resources/libraries/bash/function/per_patch.sh | 238 | ||||
-rw-r--r-- | resources/tools/integrated/compare_perpatch.py | 173 |
3 files changed, 118 insertions, 297 deletions
diff --git a/resources/libraries/bash/entry/per_patch_perf.sh b/resources/libraries/bash/entry/per_patch_perf.sh index b1854992e2..aba899689e 100644 --- a/resources/libraries/bash/entry/per_patch_perf.sh +++ b/resources/libraries/bash/entry/per_patch_perf.sh @@ -73,13 +73,13 @@ for ((iter=0; iter<iterations; iter++)); do select_build "build_current" || die check_download_dir || die run_robot || die - archive_parse_test_results "csit_current/${iter}" || die + archive_test_results "csit_current/${iter}" || die # TODO: Use less heavy way to avoid apt remove failures. ansible_playbook "cleanup" || die select_build "build_parent" || die check_download_dir || die run_robot || die - archive_parse_test_results "csit_parent/${iter}" || die + archive_test_results "csit_parent/${iter}" || die done untrap_and_unreserve_testbed || die compare_test_results # The error code becomes this script's error code. diff --git a/resources/libraries/bash/function/per_patch.sh b/resources/libraries/bash/function/per_patch.sh index 0cf7a6503d..cfd22e7826 100644 --- a/resources/libraries/bash/function/per_patch.sh +++ b/resources/libraries/bash/function/per_patch.sh @@ -46,26 +46,6 @@ function archive_test_results () { } -function archive_parse_test_results () { - - # Arguments: - # - ${1}: Directory to archive to. Required. Parent has to exist. - # Variables read: - # - TARGET - Target directory. - # Functions called: - # - die - Print to stderr and exit, defined in common.sh - # - archive_test_results - Archiving results. - # - parse_results - See definition in this file. - - set -exuo pipefail - - archive_test_results "$1" || die - parse_results "${TARGET}" || { - die "The function should have died on error." - } -} - - function build_vpp_ubuntu_amd64 () { # This function is using make pkg-verify to build VPP with all dependencies @@ -152,224 +132,6 @@ function initialize_csit_dirs () { } -function parse_results () { - - # Currently "parsing" is just few greps on output.xml. - # TODO: Parse json outputs properly. - # - # The current implementation attempts to parse for BMRR, PDR and passrate. - # If failures are present, they are reported as fake throughput values, - # enabling bisection to focus on the cause (or the fix) of the failures. - # - # The fake values are created with MRR multiplicity, - # otherwise jumpavg (which dislikes short groups) could misclassify them. - # - # Arguments: - # - ${1} - Path to (existing) directory holding robot output.xml result. - # Files read: - # - output.xml - From argument location. - # Files updated: - # - results.txt - (Re)created, in argument location. - # Variables read: - # - CSIT_PERF_TRIAL_MULTIPLICITY - To create fake results of this length. - # Functions called: - # - die - Print to stderr and exit, defined in common.sh - # - parse_results_mrr - See definition in this file. - # - parse_results_ndrpdr - See definition in this file. - # - parse_results_passrate - See definition in this file. - # - parse_results_soak - See definition in this file. - - set -exuo pipefail - - rel_dir="$(readlink -e "${1}")" || die "Readlink failed." - in_file="${rel_dir}/output.xml" || die - out_file="${rel_dir}/results.txt" || die - echo "Parsing ${in_file} putting results into ${out_file}" || die - # Frst attempt: (B)MRR. - if parse_results_mrr "${in_file}" "${out_file}"; then - return 0 - fi - # BMRR parsing failed. Attempt PDR/NDR. - if parse_results_ndrpdr "${in_file}" "${out_file}"; then - return 0 - fi - # PDR/NDR parsing failed. Attempt soak. - if parse_results_soak "${in_file}" "${out_file}"; then - return 0 - fi - # Soak parsing failed. - # Probably not a perf test at all (or a failed one), - # but we can still bisect by passrate. - parse_results_passrate "${in_file}" "${out_file}" || die -} - - -function parse_results_mrr () { - - # Parse MRR test message(s) into JSON-readable output. - # - # Return non-zero if parsing fails. - # - # Arguments: - # - ${1} - Path to (existing) input file. Required. - # - ${2} - Path to (overwritten if exists) output file. Required. - # Files read: - # - output.xml - The input file from argument location. - # Files updated: - # - results.txt - (Re)created, in argument location. - # Functions called: - # - die - Print to stderr and exit, defined in common.sh - - set -exuo pipefail - - in_file="${1}" || die "Two arguments needed." - out_file="${2}" || die "Two arguments needed." - pattern='Maximum Receive Rate trial results in .*' || die - pattern+=' per second: .*\]</status>' || die - # RC of the following line is returned. - grep -o "${pattern}" "${in_file}" | grep -o '\[.*\]' > "${out_file}" -} - - -function parse_results_ndrpdr () { - - # Parse NDRPDR test message(s) for PDR_LOWER, into JSON-readable output. - # - # Return non-zero if parsing fails. - # Parse for PDR, unless environment variable says NDR. - # - # Arguments: - # - ${1} - Path to (existing) input file. Required. - # - ${2} - Path to (overwritten if exists) output file. Required. - # Variables read: - # - FDIO_CSIT_PERF_PARSE_NDR - If defined and "yes", parse for NDR, not PDR. - # Files read: - # - output.xml - The input file from argument location. - # Files updated: - # - results.txt - (Re)created, in argument location. - # Functions called: - # - die - Print to stderr and exit, defined in common.sh - - set -exuo pipefail - - in_file="${1}" || die "Two arguments needed." - out_file="${2}" || die "Two arguments needed." - if [[ "${FDIO_CSIT_PERF_PARSE_NDR:-no}" == "yes" ]]; then - pattern1="Arguments: [ '\\nNDR_LOWER: " || die - else - pattern1="Arguments: [ '\\nPDR_LOWER: " || die - fi - # Adapted from https://superuser.com/a/377084 - pattern2='(?<=R: ).*(?= pps)' || die - if fgrep "${pattern1}" "${in_file}" | grep -Po "${pattern2}" >> "${out_file}" - then - # Add bracket https://www.shellhacks.com/sed-awk-add-end-beginning-line/ - sed -i 's/.*/[&]/' "${out_file}" - # Returns nonzero if fails. - return "$?" - fi - # Maybe it was CPS instead of pps? - pattern2='(?<=R: ).*(?= CPS)' || die - if fgrep "${pattern1}" "${in_file}" | grep -Po "${pattern2}" >> "${out_file}" - then - # Add bracket https://www.shellhacks.com/sed-awk-add-end-beginning-line/ - sed -i 's/.*/[&]/' "${out_file}" - # Returns nonzero if fails. - return "$?" - else - return 1 - fi -} - - -function parse_results_passrate () { - - # Create fake values for failed tests. - # - # This function always passes (or dies). - # - # A non-zero but small value is chosen for failed run, to distinguish from - # real nonzero perf (which are big in general) and real zero values. - # A medium sized value is chosen for a passed run. - # This way bisect can search for breakages and fixes in device tests. - # At least in theory, as device tests are bootstrapped too differently. - # - # The fake value is repeated according to BMRR multiplicity, - # because a single value can be lost in high stdev data. - # (And it does not hurt for single value outputs such as NDR.) - # - # TODO: Count number of tests and generate fake results for every one. - # Currently that would interfere with test retry logic. - # - # Arguments: - # - ${1} - Path to (existing) input file. Required. - # - ${2} - Path to (overwritten if exists) output file. Required. - # Variables read: - # - CSIT_PERF_TRIAL_MULTIPLICITY - To create fake results of this length. - # Files read: - # - output.xml - The input file from argument location. - # Files updated: - # - results.txt - (Re)created, in argument location. - # Functions called: - # - die - Print to stderr and exit, defined in common.sh - - set -exuo pipefail - - in_file="${1}" || die "Two arguments needed." - out_file="${2}" || die "Two arguments needed." - # The last status is the top level (global) robot status. - # It only passes if there were no (critical) test failures. - if fgrep '<status status=' "${out_file}" | tail -n 1 | fgrep '"PASS"'; then - fake_value="30.0" || die - else - fake_value="2.0" || die - fi - out_arr=("[") || die - for i in `seq "${CSIT_PERF_TRIAL_MULTIPLICITY:-1}"`; do - out_arr+=("${fake_value}" ",") || die - done - # The Python part uses JSON parser, the last comma has to be removed. - # Requires Bash 4.3 https://stackoverflow.com/a/36978740 - out_arr[-1]="]" || die - # TODO: Is it possible to avoid space separation by manipulating IFS? - echo "${out_arr[@]}" > "${out_file}" || die -} - - -function parse_results_soak () { - - # Parse soak test message(s) for lower bound, into JSON-readable output. - # - # Return non-zero if parsing fails. - # - # Arguments: - # - ${1} - Path to (existing) input file. Required. - # - ${2} - Path to (overwritten if exists) output file. Required. - # Files read: - # - output.xml - The input file from argument location. - # Files updated: - # - results.txt - (Re)created, in argument location. - # Functions called: - # - die - Print to stderr and exit, defined in common.sh - - set -exuo pipefail - - in_file="${1}" || die "Two arguments needed." - out_file="${2}" || die "Two arguments needed." - pattern1='PLRsearch lower bound: .*, .*<' || die - # Adapted from https://superuser.com/a/377084 - pattern2='(?<=: ).*(?= pps)' || die - if grep "${pattern1}" "${in_file}" | grep -Po "${pattern2}" >> "${out_file}" - then - # Add bracket https://www.shellhacks.com/sed-awk-add-end-beginning-line/ - sed -i 's/.*/[&]/' "${out_file}" - # Returns nonzero if fails. - else - return 1 - fi -} - - function select_build () { # Arguments: diff --git a/resources/tools/integrated/compare_perpatch.py b/resources/tools/integrated/compare_perpatch.py index b4d52dcdfe..0adb6ae73e 100644 --- a/resources/tools/integrated/compare_perpatch.py +++ b/resources/tools/integrated/compare_perpatch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Cisco and/or its affiliates. +# Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -13,72 +13,127 @@ """Script for determining whether per-patch perf test votes -1. -This script assumes there exist two text files with processed BMRR results, -located at hardcoded relative paths (subdirs thereof), having several lines -of json-parseable lists of float values, corresponding to testcase results. +This script expects a particular tree created on a filesystem by +per_patch_perf.sh bootstrap script, including test results +exported as json files according to a current model schema. +This script extracts the results (according to tresult type) +and joins them into one list of floats for parent and one for current. + This script then uses jumpavg library to determine whether there was a regression, progression or no change for each testcase. -If number of tests does not match, or there was a regression, + +If the set of test names does not match, or there was a regression, this script votes -1 (by exiting with code 1), otherwise it votes +1 (exit 0). """ import json +import os import sys +from typing import Dict, List + from resources.libraries.python import jumpavg -def main(): - """Execute the main logic, return the code to return as return code. +def parse(dirpath: str, fake_value: float) -> Dict[str, List[float]]: + """Looks for test jsons, extract scalar results. + + Files other than .json are skipped, jsons without test_id are skipped. + If the test failed, four fake values are used as a fake result. + + Units are ignored, as both parent and current are tested + with the same CSIT code so the unit should be identical. + + :param dirpath: Path to the directory tree to examine. + :param fail_value: Fake value to use for test cases that failed. + :type dirpath: str + :returns: Mapping from test IDs to list of measured values. + :rtype: Dict[str, List[float]] + :raises RuntimeError: On duplicate test ID or unknown test type. + """ + results = {} + for root, _, files in os.walk(dirpath): + for filename in files: + if not filename.endswith(".json"): + continue + filepath = os.path.join(root, filename) + with open(filepath, "rt", encoding="utf8") as file_in: + data = json.load(file_in) + if "test_id" not in data: + continue + name = data["test_id"] + if name in results: + raise RuntimeError(f"Duplicate: {name}") + if not data["passed"]: + results[name] = [fake_value] * 4 + continue + result_object = data["result"] + result_type = result_object["type"] + if result_type == "mrr": + results[name] = result_object["receive_rate"]["rate"]["values"] + elif result_type == "ndrpdr": + results[name] = [result_object["pdr"]["lower"]["rate"]["value"]] + elif result_type == "soak": + results[name] = [ + result_object["critical_rate"]["lower"]["rate"]["value"] + ] + elif result_type == "reconf": + results[name] = [result_object["loss"]["time"]["value"]] + elif result_type == "hoststack": + results[name] = [result_object["bandwidth"]["value"]] + else: + raise RuntimeError(f"Unknown result type: {result_type}") + return results + + +def main() -> int: + """Execute the main logic, return a number to return as the return code. + + Call parse to get parent and current data. + Use higher fake value for parent, so changes that keep a test failing + are marked as regressions. + + If there are multiple iterations, the value lists are joined. + For each test, call jumpavg.classify to detect possible regression. + + If there is at least one regression, return 3. :returns: Return code, 0 or 3 based on the comparison result. :rtype: int """ iteration = -1 - parent_iterations = list() - current_iterations = list() - num_tests = None + parent_aggregate = {} + current_aggregate = {} + test_names = None while 1: iteration += 1 - parent_lines = list() - current_lines = list() - filename = f"csit_parent/{iteration}/results.txt" - try: - with open(filename) as parent_file: - parent_lines = parent_file.readlines() - except IOError: + parent_results = {} + current_results = {} + parent_results = parse(f"csit_parent/{iteration}", fake_value=2.0) + parent_names = set(parent_results.keys()) + if test_names is None: + test_names = parent_names + if not parent_names: + # No more iterations. break - num_lines = len(parent_lines) - filename = f"csit_current/{iteration}/results.txt" - with open(filename) as current_file: - current_lines = current_file.readlines() - if num_lines != len(current_lines): - print( - f"Number of tests does not match within iteration {iteration}", - file=sys.stderr - ) - return 1 - if num_tests is None: - num_tests = num_lines - elif num_tests != num_lines: - print( - f"Number of tests does not match previous at iteration " - f"{iteration}", file=sys.stderr - ) - return 1 - parent_iterations.append(parent_lines) - current_iterations.append(current_lines) + assert parent_names == test_names, f"{parent_names} != {test_names}" + current_results = parse(f"csit_current/{iteration}", fake_value=1.0) + current_names = set(current_results.keys()) + assert ( + current_names == parent_names + ), f"{current_names} != {parent_names}" + for name in test_names: + if name not in parent_aggregate: + parent_aggregate[name] = [] + if name not in current_aggregate: + current_aggregate[name] = [] + parent_aggregate[name].extend(parent_results[name]) + current_aggregate[name].extend(current_results[name]) exit_code = 0 - for test_index in range(num_tests): - parent_values = list() - current_values = list() - for iteration_index, _ in enumerate(parent_iterations): - parent_values.extend( - json.loads(parent_iterations[iteration_index][test_index]) - ) - current_values.extend( - json.loads(current_iterations[iteration_index][test_index]) - ) + for name in test_names: + print(f"Test name: {name}") + parent_values = parent_aggregate[name] + current_values = current_aggregate[name] print(f"Time-ordered MRR values for parent build: {parent_values}") print(f"Time-ordered MRR values for current build: {current_values}") parent_values = sorted(parent_values) @@ -87,11 +142,14 @@ def main(): parent_stats = jumpavg.AvgStdevStats.for_runs(parent_values) current_stats = jumpavg.AvgStdevStats.for_runs(current_values) parent_group_list = jumpavg.BitCountingGroupList( - max_value=max_value).append_group_of_runs([parent_stats]) - combined_group_list = parent_group_list.copy( - ).extend_runs_to_last_group([current_stats]) + max_value=max_value + ).append_group_of_runs([parent_stats]) + combined_group_list = ( + parent_group_list.copy().extend_runs_to_last_group([current_stats]) + ) separated_group_list = parent_group_list.append_group_of_runs( - [current_stats]) + [current_stats] + ) print(f"Value-ordered MRR values for parent build: {parent_values}") print(f"Value-ordered MRR values for current build: {current_values}") avg_diff = (current_stats.avg - parent_stats.avg) / parent_stats.avg @@ -103,7 +161,7 @@ def main(): f" {combined_group_list[0].stats}" ) bits_diff = separated_group_list.bits - combined_group_list.bits - compared = u"longer" if bits_diff >= 0 else u"shorter" + compared = "longer" if bits_diff >= 0 else "shorter" print( f"Separate groups are {compared} than single group" f" by {abs(bits_diff)} bits" @@ -112,16 +170,17 @@ def main(): # That matters if only stats (not list of floats) are given. classified_list = jumpavg.classify([parent_values, current_values]) if len(classified_list) < 2: - print(f"Test test_index {test_index}: normal (no anomaly)") + print(f"Test {name}: normal (no anomaly)") continue anomaly = classified_list[1].comment - if anomaly == u"regression": - print(f"Test test_index {test_index}: anomaly regression") + if anomaly == "regression": + print(f"Test {name}: anomaly regression") exit_code = 3 # 1 or 2 can be caused by other errors continue - print(f"Test test_index {test_index}: anomaly {anomaly}") + print(f"Test {name}: anomaly {anomaly}") print(f"Exit code: {exit_code}") return exit_code -if __name__ == u"__main__": + +if __name__ == "__main__": sys.exit(main()) |