#!/usr/bin/env bash # Copyright (c) 2023 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -exuo pipefail # This entry script does not change which CSIT branch is used, # use "with_oper_for_vpp.sh" wrapper for that. # # This script is to be used for locating performance regressions # (or breakages, or progressions, or fixes). # It uses "git bisect" commands on the VPP repository, # between the triggered VPP patch and a commit specified in the first argument # of the gerrit comment text. # The other arguments are used as tag expressions for selecting tests as usual. # Many different result types are supported. # # Logs are present in the archive directory, but usually the main output # is the offending commit as identified by "git bisect", visible in console. # # While selecting just one testcase is the intended use, # this script should be able to deal with multiple testcases as well, # grouping all the values together. This usually inflates # the standard deviation, but it is not clear how that affects the bisection. # # For the bisection decision, jumpavg library is used, # deciding whether shorter description is achieved by forcefully grouping # the middle results with the old, or with the new ones. # If the shortest description is achieved with 3 separate groups, # bisect interval focuses on biggest relative change # (with respect to pairwise maximum). # # If a test fails, an artificial result is used to distinguish # from normal results. Currently, the value 1.0, with the multiplicity of 4. # # Note that if there was a VPP API change that affects tests in the interval, # there frequently is no good way for single CSIT commit to work there. # You can try manually reverting the CSIT changes to make tests pass, # possibly needing to search over multiple subintervals. # Using and older CSIT commit (possibly cherry-picking the bisect Change # if it was not present in CSIT compatible with old enough VPP builds) # is the fastest solution; but beware of CSIT-induced performance effects # (e.g. TRex settings). # # If a regression happens during a subinterval where the test fails # due to a bug in VPP, you may try to create a new commit chain # with the fix cherry-picked to the start of the interval. # Do not do that as a chain in Gerrit, it would be long and Gerrit will refuse # edits of already merged Changes. # Instead, add a block of bash code to do the manipulation # on local git history between checkout and bisect. # # At the start, the script executes first bisect iteration in an attempt # to avoid work if the search interval has only one commit (or is invalid). # Only when the work is needed, earliest and latest commits are built # and tested. Branches "earliest", "middle" and "latest" are temporarily created # as a way to remember which commits to check out. # # Test results are parsed from json files, # symlinks are used to tell python script which results to compare. # # Assumptions: # + There is a directory holding VPP repo with patch under test checked out. # + It contains csit subdirectory with CSIT code to use (this script is there). # + Everything needed to build VPP is already installed locally. # Consequences: # + Working directory is switched to the VPP repo root. # + At the end, VPP repo has checked out and built some commit, # as chosen by "git bisect". # + Directories build_root, build and csit are reset during the run. # + The following directories (relative to VPP repo) are (re)created: # ++ csit_{earliest,middle,latest}, build_{earliest,latest}, # ++ archive, csit/archive, csit/download_dir. # + Symlinks csit_{early,late,mid} are also created. # Arguments: # - ${1} - If present, override JOB_NAME to simplify manual usage. # "set -eu" handles failures from the following two lines. BASH_ENTRY_DIR="$(dirname $(readlink -e "${BASH_SOURCE[0]}"))" BASH_FUNCTION_DIR="$(readlink -e "${BASH_ENTRY_DIR}/../function")" source "${BASH_FUNCTION_DIR}/common.sh" || { echo "Source failed." >&2 exit 1 } source "${BASH_FUNCTION_DIR}/per_patch.sh" || die "Source failed." # Cleanup needs ansible. source "${BASH_FUNCTION_DIR}/ansible.sh" || die "Source failed." common_dirs || die check_prerequisites || die set_perpatch_vpp_dir || die get_test_code "${1-}" || die get_test_tag_string || die # Unfortunately, git bisect only works at the top of the repo. cd "${VPP_DIR}" || die # Save the current commit. git checkout -b "latest" # Save the lower bound commit. git checkout -b "earliest" git reset --hard "${GIT_BISECT_FROM}" # This is the place for custom code manipulating local git history. #git checkout -b "alter" #... #git checkout "latest" #git rebase "alter" || git rebase --skip #git branch -D "alter" git bisect start || die # TODO: Can we add a trap for "git bisect reset" or even "deactivate", # without affecting the inner trap for unreserve and cleanup? git checkout "latest" git status || die git describe || die git bisect new || die # Performing first iteration early to avoid testing or even building. git checkout "earliest" || die "Failed to checkout earliest commit." git status || die git describe || die # The first iteration. git bisect old | tee "git.log" || die "Invalid bisect interval?" git checkout -b "middle" || die "Failed to create branch: middle" git status || die git describe || die if head -n 1 "git.log" | cut -b -11 | fgrep -q "Bisecting:"; then echo "Building and testing initial bounds." else echo "Single commit, no work needed." exit 0 fi # Building latest first, good for avoiding DPDK rebuilds. git checkout "latest" || die "Failed to checkout latest commit." build_vpp_ubuntu "LATEST" || die set_aside_build_artifacts "latest" || die git checkout "earliest" || die "Failed to checkout earliest commit." git status || die git describe || die build_vpp_ubuntu "EARLIEST" || die set_aside_build_artifacts "earliest" || die git checkout "middle" || die "Failed to checkout middle commit." git branch -D "earliest" "latest" || die "Failed to remove branches." # Done with repo manipulation for now, testing commences. initialize_csit_dirs "earliest" "middle" "latest" || die set_perpatch_dut || die select_topology || die select_arch_os || die activate_virtualenv "${VPP_DIR}" || die generate_tests || die archive_tests || die # TODO: Does it matter which build is tested first? select_build "build_earliest" || die check_download_dir || die reserve_and_cleanup_testbed || die run_robot || die move_test_results "csit_earliest" || die ln -s -T "csit_earliest" "csit_early" || die # Explicit cleanup, in case the previous test left the testbed in a bad shape. ansible_playbook "cleanup" select_build "build_latest" || die check_download_dir || die run_robot || die move_test_results "csit_latest" || die ln -s -T "csit_latest" "csit_late" || die untrap_and_unreserve_testbed || die # See function documentation for the logic in the loop. main_bisect_loop || die # In worst case, the middle branch is still checked out. # TODO: Is there a way to ensure "middle" branch is always deleted? git branch -D "middle" || true # Delete symlinks to prevent duplicate archiving. rm -vrf "csit_early" "csit_late" "csit_mid"