From 153c9e1215f27ad166df0ce4bd2541d9f37a7afa Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Wed, 13 Dec 2023 17:17:27 +0100 Subject: feat(bisect): introduce scripts for VPP bisecting + Parsing common with per-patch job is moved to a library. Ticket: CSIT-1618 Change-Id: I185bea084a29e6a37ef94e9da42b192a6a81fc17 Signed-off-by: Vratko Polak --- resources/libraries/bash/entry/bisect.sh | 188 +++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 resources/libraries/bash/entry/bisect.sh (limited to 'resources/libraries/bash/entry') diff --git a/resources/libraries/bash/entry/bisect.sh b/resources/libraries/bash/entry/bisect.sh new file mode 100644 index 0000000000..d5cb1d51ba --- /dev/null +++ b/resources/libraries/bash/entry/bisect.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash + +# Copyright (c) 2023 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -exuo pipefail + +# This entry script does not change which CSIT branch is used, +# use "with_oper_for_vpp.sh" wrapper for that. +# +# This script is to be used for locating performance regressions +# (or breakages, or progressions, or fixes). +# It uses "git bisect" commands on the VPP repository, +# between the triggered VPP patch and a commit specified in the first argument +# of the gerrit comment text. +# The other arguments are used as tag expressions for selecting tests as usual. +# Many different result types are supported. +# +# Logs are present in the archive directory, but usually the main output +# is the offending commit as identified by "git bisect", visible in console. +# +# While selecting just one testcase is the intended use, +# this script should be able to deal with multiple testcases as well, +# grouping all the values together. This usually inflates +# the standard deviation, but it is not clear how that affects the bisection. +# +# For the bisection decision, jumpavg library is used, +# deciding whether shorter description is achieved by forcefully grouping +# the middle results with the old, or with the new ones. +# If the shortest description is achieved with 3 separate groups, +# bisect interval focuses on biggest relative change +# (with respect to pairwise maximum). +# +# If a test fails, an artificial result is used to distinguish +# from normal results. Currently, the value 1.0, with the multiplicity of 4. +# +# Note that if there was a VPP API change that affects tests in the interval, +# there frequently is no good way for single CSIT commit to work there. +# You can try manually reverting the CSIT changes to make tests pass, +# possibly needing to search over multiple subintervals. +# Using and older CSIT commit (possibly cherry-picking the bisect Change +# if it was not present in CSIT compatible with old enough VPP builds) +# is the fastest solution; but beware of CSIT-induced performance effects +# (e.g. TRex settings). +# +# If a regression happens during a subinterval where the test fails +# due to a bug in VPP, you may try to create a new commit chain +# with the fix cherry-picked to the start of the interval. +# Do not do that as a chain in Gerrit, it would be long and Gerrit will refuse +# edits of already merged Changes. +# Instead, add a block of bash code to do the manipulation +# on local git history between checkout and bisect. +# +# At the start, the script executes first bisect iteration in an attempt +# to avoid work if the search interval has only one commit (or is invalid). +# Only when the work is needed, earliest and latest commits are built +# and tested. Branches "earliest", "middle" and "latest" are temporarily created +# as a way to remember which commits to check out. +# +# Test results are parsed from json files, +# symlinks are used to tell python script which results to compare. +# +# Assumptions: +# + There is a directory holding VPP repo with patch under test checked out. +# + It contains csit subdirectory with CSIT code to use (this script is there). +# + Everything needed to build VPP is already installed locally. +# Consequences: +# + Working directory is switched to the VPP repo root. +# + At the end, VPP repo has checked out and built some commit, +# as chosen by "git bisect". +# + Directories build_root, build and csit are reset during the run. +# + The following directories (relative to VPP repo) are (re)created: +# ++ csit_{earliest,middle,latest}, build_{earliest,latest}, +# ++ archive, csit/archive, csit/download_dir. +# + Symlinks csit_{early,late,mid} are also created. +# Arguments: +# - ${1} - If present, override JOB_NAME to simplify manual usage. + +# "set -eu" handles failures from the following two lines. +BASH_ENTRY_DIR="$(dirname $(readlink -e "${BASH_SOURCE[0]}"))" +BASH_FUNCTION_DIR="$(readlink -e "${BASH_ENTRY_DIR}/../function")" +source "${BASH_FUNCTION_DIR}/common.sh" || { + echo "Source failed." >&2 + exit 1 +} +source "${BASH_FUNCTION_DIR}/per_patch.sh" || die "Source failed." +# Cleanup needs ansible. +source "${BASH_FUNCTION_DIR}/ansible.sh" || die "Source failed." +common_dirs || die +check_prerequisites || die +set_perpatch_vpp_dir || die +get_test_code "${1-}" || die +get_test_tag_string || die +# Unfortunately, git bisect only works at the top of the repo. +cd "${VPP_DIR}" || die + +# Save the current commit. +git checkout -b "latest" +# Save the lower bound commit. +git checkout -b "earliest" +git reset --hard "${GIT_BISECT_FROM}" + +# This is the place for custom code manipulating local git history. + +#git checkout -b "alter" +#... +#git checkout "latest" +#git rebase "alter" || git rebase --skip +#git branch -D "alter" + +git bisect start || die +# TODO: Can we add a trap for "git bisect reset" or even "deactivate", +# without affecting the inner trap for unreserve and cleanup? +git checkout "latest" +git status || die +git describe || die +git bisect new || die +# Performing first iteration early to avoid testing or even building. +git checkout "earliest" || die "Failed to checkout earliest commit." +git status || die +git describe || die +# The first iteration. +git bisect old | tee "git.log" || die "Invalid bisect interval?" +git checkout -b "middle" || die "Failed to create branch: middle" +git status || die +git describe || die +if head -n 1 "git.log" | cut -b -11 | fgrep -q "Bisecting:"; then + echo "Building and testing initial bounds." +else + echo "Single commit, no work needed." + exit 0 +fi +# Building latest first, good for avoiding DPDK rebuilds. +git checkout "latest" || die "Failed to checkout latest commit." +build_vpp_ubuntu "LATEST" || die +set_aside_build_artifacts "latest" || die +git checkout "earliest" || die "Failed to checkout earliest commit." +git status || die +git describe || die +build_vpp_ubuntu "EARLIEST" || die +set_aside_build_artifacts "earliest" || die +git checkout "middle" || die "Failed to checkout middle commit." +git branch -D "earliest" "latest" || die "Failed to remove branches." +# Done with repo manipulation for now, testing commences. +initialize_csit_dirs "earliest" "middle" "latest" || die +set_perpatch_dut || die +select_topology || die +select_arch_os || die +activate_virtualenv "${VPP_DIR}" || die +generate_tests || die +archive_tests || die + +# TODO: Does it matter which build is tested first? + +select_build "build_earliest" || die +check_download_dir || die +reserve_and_cleanup_testbed || die +run_robot || die +move_test_results "csit_earliest" || die +ln -s -T "csit_earliest" "csit_early" || die + +# Explicit cleanup, in case the previous test left the testbed in a bad shape. +ansible_playbook "cleanup" + +select_build "build_latest" || die +check_download_dir || die +run_robot || die +move_test_results "csit_latest" || die +ln -s -T "csit_latest" "csit_late" || die +untrap_and_unreserve_testbed || die + +# See function documentation for the logic in the loop. +main_bisect_loop || die +# In worst case, the middle branch is still checked out. +# TODO: Is there a way to ensure "middle" branch is always deleted? +git branch -D "middle" || true +# Delete symlinks to prevent duplicate archiving. +rm -vrf "csit_early" "csit_late" "csit_mid" -- cgit 1.2.3-korg