diff options
Diffstat (limited to 'resources/libraries')
-rw-r--r-- | resources/libraries/bash/entry/bisect.sh | 188 | ||||
-rw-r--r-- | resources/libraries/bash/function/common.sh | 20 | ||||
-rw-r--r-- | resources/libraries/bash/function/per_patch.sh | 59 | ||||
-rw-r--r-- | resources/libraries/python/model/parse.py | 108 |
4 files changed, 374 insertions, 1 deletions
diff --git a/resources/libraries/bash/entry/bisect.sh b/resources/libraries/bash/entry/bisect.sh new file mode 100644 index 0000000000..d5cb1d51ba --- /dev/null +++ b/resources/libraries/bash/entry/bisect.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash + +# Copyright (c) 2023 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -exuo pipefail + +# This entry script does not change which CSIT branch is used, +# use "with_oper_for_vpp.sh" wrapper for that. +# +# This script is to be used for locating performance regressions +# (or breakages, or progressions, or fixes). +# It uses "git bisect" commands on the VPP repository, +# between the triggered VPP patch and a commit specified in the first argument +# of the gerrit comment text. +# The other arguments are used as tag expressions for selecting tests as usual. +# Many different result types are supported. +# +# Logs are present in the archive directory, but usually the main output +# is the offending commit as identified by "git bisect", visible in console. +# +# While selecting just one testcase is the intended use, +# this script should be able to deal with multiple testcases as well, +# grouping all the values together. This usually inflates +# the standard deviation, but it is not clear how that affects the bisection. +# +# For the bisection decision, jumpavg library is used, +# deciding whether shorter description is achieved by forcefully grouping +# the middle results with the old, or with the new ones. +# If the shortest description is achieved with 3 separate groups, +# bisect interval focuses on biggest relative change +# (with respect to pairwise maximum). +# +# If a test fails, an artificial result is used to distinguish +# from normal results. Currently, the value 1.0, with the multiplicity of 4. +# +# Note that if there was a VPP API change that affects tests in the interval, +# there frequently is no good way for single CSIT commit to work there. +# You can try manually reverting the CSIT changes to make tests pass, +# possibly needing to search over multiple subintervals. +# Using and older CSIT commit (possibly cherry-picking the bisect Change +# if it was not present in CSIT compatible with old enough VPP builds) +# is the fastest solution; but beware of CSIT-induced performance effects +# (e.g. TRex settings). +# +# If a regression happens during a subinterval where the test fails +# due to a bug in VPP, you may try to create a new commit chain +# with the fix cherry-picked to the start of the interval. +# Do not do that as a chain in Gerrit, it would be long and Gerrit will refuse +# edits of already merged Changes. +# Instead, add a block of bash code to do the manipulation +# on local git history between checkout and bisect. +# +# At the start, the script executes first bisect iteration in an attempt +# to avoid work if the search interval has only one commit (or is invalid). +# Only when the work is needed, earliest and latest commits are built +# and tested. Branches "earliest", "middle" and "latest" are temporarily created +# as a way to remember which commits to check out. +# +# Test results are parsed from json files, +# symlinks are used to tell python script which results to compare. +# +# Assumptions: +# + There is a directory holding VPP repo with patch under test checked out. +# + It contains csit subdirectory with CSIT code to use (this script is there). +# + Everything needed to build VPP is already installed locally. +# Consequences: +# + Working directory is switched to the VPP repo root. +# + At the end, VPP repo has checked out and built some commit, +# as chosen by "git bisect". +# + Directories build_root, build and csit are reset during the run. +# + The following directories (relative to VPP repo) are (re)created: +# ++ csit_{earliest,middle,latest}, build_{earliest,latest}, +# ++ archive, csit/archive, csit/download_dir. +# + Symlinks csit_{early,late,mid} are also created. +# Arguments: +# - ${1} - If present, override JOB_NAME to simplify manual usage. + +# "set -eu" handles failures from the following two lines. +BASH_ENTRY_DIR="$(dirname $(readlink -e "${BASH_SOURCE[0]}"))" +BASH_FUNCTION_DIR="$(readlink -e "${BASH_ENTRY_DIR}/../function")" +source "${BASH_FUNCTION_DIR}/common.sh" || { + echo "Source failed." >&2 + exit 1 +} +source "${BASH_FUNCTION_DIR}/per_patch.sh" || die "Source failed." +# Cleanup needs ansible. +source "${BASH_FUNCTION_DIR}/ansible.sh" || die "Source failed." +common_dirs || die +check_prerequisites || die +set_perpatch_vpp_dir || die +get_test_code "${1-}" || die +get_test_tag_string || die +# Unfortunately, git bisect only works at the top of the repo. +cd "${VPP_DIR}" || die + +# Save the current commit. +git checkout -b "latest" +# Save the lower bound commit. +git checkout -b "earliest" +git reset --hard "${GIT_BISECT_FROM}" + +# This is the place for custom code manipulating local git history. + +#git checkout -b "alter" +#... +#git checkout "latest" +#git rebase "alter" || git rebase --skip +#git branch -D "alter" + +git bisect start || die +# TODO: Can we add a trap for "git bisect reset" or even "deactivate", +# without affecting the inner trap for unreserve and cleanup? +git checkout "latest" +git status || die +git describe || die +git bisect new || die +# Performing first iteration early to avoid testing or even building. +git checkout "earliest" || die "Failed to checkout earliest commit." +git status || die +git describe || die +# The first iteration. +git bisect old | tee "git.log" || die "Invalid bisect interval?" +git checkout -b "middle" || die "Failed to create branch: middle" +git status || die +git describe || die +if head -n 1 "git.log" | cut -b -11 | fgrep -q "Bisecting:"; then + echo "Building and testing initial bounds." +else + echo "Single commit, no work needed." + exit 0 +fi +# Building latest first, good for avoiding DPDK rebuilds. +git checkout "latest" || die "Failed to checkout latest commit." +build_vpp_ubuntu "LATEST" || die +set_aside_build_artifacts "latest" || die +git checkout "earliest" || die "Failed to checkout earliest commit." +git status || die +git describe || die +build_vpp_ubuntu "EARLIEST" || die +set_aside_build_artifacts "earliest" || die +git checkout "middle" || die "Failed to checkout middle commit." +git branch -D "earliest" "latest" || die "Failed to remove branches." +# Done with repo manipulation for now, testing commences. +initialize_csit_dirs "earliest" "middle" "latest" || die +set_perpatch_dut || die +select_topology || die +select_arch_os || die +activate_virtualenv "${VPP_DIR}" || die +generate_tests || die +archive_tests || die + +# TODO: Does it matter which build is tested first? + +select_build "build_earliest" || die +check_download_dir || die +reserve_and_cleanup_testbed || die +run_robot || die +move_test_results "csit_earliest" || die +ln -s -T "csit_earliest" "csit_early" || die + +# Explicit cleanup, in case the previous test left the testbed in a bad shape. +ansible_playbook "cleanup" + +select_build "build_latest" || die +check_download_dir || die +run_robot || die +move_test_results "csit_latest" || die +ln -s -T "csit_latest" "csit_late" || die +untrap_and_unreserve_testbed || die + +# See function documentation for the logic in the loop. +main_bisect_loop || die +# In worst case, the middle branch is still checked out. +# TODO: Is there a way to ensure "middle" branch is always deleted? +git branch -D "middle" || true +# Delete symlinks to prevent duplicate archiving. +rm -vrf "csit_early" "csit_late" "csit_mid" diff --git a/resources/libraries/bash/function/common.sh b/resources/libraries/bash/function/common.sh index 44149ca6e1..c2b169f550 100644 --- a/resources/libraries/bash/function/common.sh +++ b/resources/libraries/bash/function/common.sh @@ -290,7 +290,7 @@ function compose_robot_arguments () { *"device"*) ROBOT_ARGS+=("--suite" "tests.${DUT}.device") ;; - *"perf"*) + *"perf"* | *"bisect"*) ROBOT_ARGS+=("--suite" "tests.${DUT}.perf") ;; *) @@ -557,6 +557,8 @@ function get_test_tag_string () { # Variables set: # - TEST_TAG_STRING - The string following trigger word in gerrit comment. # May be empty, or even not set on event types not adding comment. + # - GIT_BISECT_FROM - If bisecttest, the commit hash to bisect from. + # Else not set. # Variables exported optionally: # - GRAPH_NODE_VARIANT - Node variant to test with, set if found in trigger. @@ -566,6 +568,10 @@ function get_test_tag_string () { if [[ "${GERRIT_EVENT_TYPE-}" == "comment-added" ]]; then case "${TEST_CODE}" in + # Order matters, bisect job contains "perf" in its name. + *"bisect"*) + trigger="bisecttest" + ;; *"device"*) trigger="devicetest" ;; @@ -591,6 +597,18 @@ function get_test_tag_string () { comment=$(fgrep "${trigger}" <<< "${comment}" || true) TEST_TAG_STRING=$("${cmd[@]}" <<< "${comment}" || true) fi + if [[ "${trigger}" == "bisecttest" ]]; then + # Intentionally without quotes, so spaces delimit elements. + test_tag_array=(${TEST_TAG_STRING}) || die "How could this fail?" + # First "argument" of bisecttest is a commit hash. + GIT_BISECT_FROM="${test_tag_array[0]}" || { + die "Bisect job requires commit hash." + } + # Update the tag string (tag expressions only, no commit hash). + TEST_TAG_STRING="${test_tag_array[@]:1}" || { + die "Bisect job needs a single test, no default." + } + fi if [[ -n "${TEST_TAG_STRING-}" ]]; then test_tag_array=(${TEST_TAG_STRING}) if [[ "${test_tag_array[0]}" == "icl" ]]; then diff --git a/resources/libraries/bash/function/per_patch.sh b/resources/libraries/bash/function/per_patch.sh index b9680a1560..44bd57da80 100644 --- a/resources/libraries/bash/function/per_patch.sh +++ b/resources/libraries/bash/function/per_patch.sh @@ -110,6 +110,65 @@ function initialize_csit_dirs () { } +function main_bisect_loop () { + + # Perform the iterative part of bisect entry script. + # + # The logic is too complex to remain in the entry script. + # + # At the start, the loop assumes git bisect old/new has just been executed, + # and verified more iterations are needed. + # The iteration cleans the build directory and builds the new mid commit. + # Then, testbed is reserved, tests run, and testbed unreserved. + # Results are moved from default to archive location + # (indexed by iteration number) and analyzed. + # The new adjective ("old" or "new") is selected, + # and git bisect with the adjective is executed. + # The symlinks csit_early and csit_late are updated to tightest bounds. + # The git.log file is examined and if the bisect is finished, loop ends. + + iteration=0 + while true + do + let iteration+=1 + git clean -dffx "build"/ "build-root"/ || die + build_vpp_ubuntu "MIDDLE" || die + select_build "build-root" || die + check_download_dir || die + reserve_and_cleanup_testbed || die + run_robot || die + move_test_results "csit_middle/${iteration}" || die + untrap_and_unreserve_testbed || die + rm -vf "csit_mid" || die + ln -s -T "csit_middle/${iteration}" "csit_mid" || die + set +e + python3 "${TOOLS_DIR}/integrated/compare_bisect.py" + bisect_rc="${?}" + set -e + if [[ "${bisect_rc}" == "3" ]]; then + adjective="new" + rm -v "csit_late" || die + ln -s -T "csit_middle/${iteration}" "csit_late" || die + elif [[ "${bisect_rc}" == "0" ]]; then + adjective="old" + rm -v "csit_early" || die + ln -s -T "csit_middle/${iteration}" "csit_early" || die + else + die "Unexpected return code: ${bisect_rc}" + fi + git bisect "${adjective}" | tee "git.log" || die + git describe || die + git status || die + if head -n 1 "git.log" | cut -b -11 | fgrep -q "Bisecting:"; then + echo "Still bisecting..." + else + echo "Bisecting done." + break + fi + done +} + + function move_test_results () { # Arguments: diff --git a/resources/libraries/python/model/parse.py b/resources/libraries/python/model/parse.py new file mode 100644 index 0000000000..b2e8da67ea --- /dev/null +++ b/resources/libraries/python/model/parse.py @@ -0,0 +1,108 @@ +# Copyright (c) 2023 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Library for parsing results from JSON back to python objects. + +This is useful for vpp-csit jobs like per-patch performance verify. +Such jobs invoke robot multiple times, each time on a different build. +Each robot invocation may execute several test cases. +How exactly are the results compared depends on the job type, +but extracting just the main results from jsons (file trees) is a common task, +so it is placed into this library. + +As such, the code in this file does not directly interact +with the code in other files in this directory +(result comparison is done outside robot invocation), +but all files share common assumptions about json structure. + +The function here expects a particular tree created on a filesystem by +a bootstrap script, including test results +exported as json files according to a current model schema. +This script extracts the results (according to result type) +and joins them mapping from test IDs to lists of floats. +Also, the result is cached into a results.json file, +so each tree is parsed only once. + +The cached result does not depend on tree placement, +so the bootstrap script may move and copy trees around +before or after parsing. +""" + +import json +import os +import pathlib + +from typing import Dict, List + + +def parse(dirpath: str, fake_value: float = 1.0) -> Dict[str, List[float]]: + """Look for test jsons, extract scalar results. + + Files other than .json are skipped, jsons without test_id are skipped. + If the test failed, four fake values are used as a fake result. + + Units are ignored, as both parent and current are tested + with the same CSIT code so the unit should be identical. + + The result is also cached as results.json file. + + :param dirpath: Path to the directory tree to examine. + :param fail_value: Fake value to use for test cases that failed. + :type dirpath: str + :type fail_falue: float + :returns: Mapping from test IDs to list of measured values. + :rtype: Dict[str, List[float]] + :raises RuntimeError: On duplicate test ID or unknown test type. + """ + if not pathlib.Path(dirpath).is_dir(): + # This happens when per-patch runs out of iterations. + return {} + resultpath = pathlib.Path(f"{dirpath}/results.json") + if resultpath.is_file(): + with open(resultpath, "rt", encoding="utf8") as file_in: + return json.load(file_in) + results = {} + for root, _, files in os.walk(dirpath): + for filename in files: + if not filename.endswith(".json"): + continue + filepath = os.path.join(root, filename) + with open(filepath, "rt", encoding="utf8") as file_in: + data = json.load(file_in) + if "test_id" not in data: + continue + name = data["test_id"] + if name in results: + raise RuntimeError(f"Duplicate: {name}") + if not data["passed"]: + results[name] = [fake_value] * 4 + continue + result_object = data["result"] + result_type = result_object["type"] + if result_type == "mrr": + results[name] = result_object["receive_rate"]["rate"]["values"] + elif result_type == "ndrpdr": + results[name] = [result_object["pdr"]["lower"]["rate"]["value"]] + elif result_type == "soak": + results[name] = [ + result_object["critical_rate"]["lower"]["rate"]["value"] + ] + elif result_type == "reconf": + results[name] = [result_object["loss"]["time"]["value"]] + elif result_type == "hoststack": + results[name] = [result_object["bandwidth"]["value"]] + else: + raise RuntimeError(f"Unknown result type: {result_type}") + with open(resultpath, "wt", encoding="utf8") as file_out: + json.dump(results, file_out, indent=1, separators=(", ", ": ")) + return results |