diff options
author | Vratko Polak <vrpolak@cisco.com> | 2019-06-25 12:46:12 +0200 |
---|---|---|
committer | Vratko Polak <vrpolak@cisco.com> | 2019-06-28 10:59:49 +0000 |
commit | 83070f6f4a9a682f6bbe94304dbf0830529ff475 (patch) | |
tree | ce11d7ff44f3846d7fdd866d06236876f04db415 /resources | |
parent | 25e203b678e1cfd3691a450050115c66813165c3 (diff) |
Add copyright checker to tox
+ Only looks at files edited since HEAD~.
+ Only checks files of whitelisted extensions.
+ Distinguishes missing and outdated copyrights.
+ Any line with "Copyright" substring can work.
+ Many copyright lines are tolerated, if at least one has correct year.
+ Voting.
Change-Id: I2cda0459cb191eeec7aada69c508973568039d87
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
Diffstat (limited to 'resources')
-rw-r--r-- | resources/libraries/bash/entry/check/copyright.sh | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/resources/libraries/bash/entry/check/copyright.sh b/resources/libraries/bash/entry/check/copyright.sh new file mode 100644 index 0000000000..91e89bd12e --- /dev/null +++ b/resources/libraries/bash/entry/check/copyright.sh @@ -0,0 +1,106 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -exuo pipefail + +# This file should be executed from tox, as the assumed working directory +# is different from where this file is located. +# This file does not have executable flag nor shebang, +# to dissuade non-tox callers. + +# This script examines any file edited since HEAD~ (filtered by extension), +# and fails if no line with "Copyright" and the current year is found. +# The list of offending files is written to copyright.log (overwriting). +# The log also specifies whether the copyright is missing or outdated. + +# As copyright notice usually gives readers also a license to use it, +# any file without copyright is potentially unusable (by non-authors). +# In the interest of open source code reuse, CSIT wants any content +# to be available under appropriate license, which depends on whether the file +# is deemed "code" (Apache License, Version 2.0), "documentation" +# (Creative Commons Attribution 4.0 International), or something else. +# Note that this checker does not check licenses, +# it assumes any copyright notice includes the correct data +# (all contributing people/organizations, all applicabe licenses). + +# Unfortunately, some file types are not designed to hold copyright notice, +# or at least the correct way to include it is not known yet. +# Or the file in question is processed by code which is not ready +# for (otherwise allowed) comment lines holding the copyright. +# That is why currently we need to explicitly whitelist filename patterns. + +# TODO: Figure out how to add copyright notice into .svg files. +# Do the usual .xml style comments work? +# TODO: Make the code processing text files (example: VPP_REPO_URL) +# tolerate comment lines. +# TODO: Verify more extensions are safe to whitelist (.virl) +# and that tools processing some format allows comments (.json). +# TODO: Ultimately remove filtering to start checking every file. + +# "set -eu" handles failures from the following two lines. +BASH_CHECKS_DIR="$(dirname $(readlink -e "${BASH_SOURCE[0]}"))" +BASH_FUNCTION_DIR="$(readlink -e "${BASH_CHECKS_DIR}/../../function")" +source "${BASH_FUNCTION_DIR}/common.sh" || { + echo "Source failed." >&2 + exit 1 +} + +fulldate=$(date) +year="${fulldate##* }" +# Regexp selecting only files we expect to allow copyright. +# Start from an array, each item is to be prepended by ., appended by $, +# and joined by |. +extensions_array=("gitgnore" "ini" "md" "py" "robot" "rst" "sh" "txt" "yaml") +pattern="" +for extension in "${extensions_array[@]}"; do + pattern+="\.${extension}"'$'"\|" +done +# One more "extension" just to avoid trailing pipe. +pattern+="/Dockerfile"'$' +# Get array of edited files. +# Commands to expand, pipe has to be left out. +cmd1="git diff --name-only HEAD~" +cmd2="grep \"${pattern}\"" +# When calling, commands has to be without quotes. +readarray -t file_array <<<$(${cmd1} | ${cmd2}) || { + # We need to tolerate changes that only edit other files. + errors=$(${cmd1} | ${cmd2} 2>&1 || true) + if [[ "${errors}" ]]; then + # TODO: do we need to echo errors? + die "Failure at getting list of files to check copyright in." + fi + # Empty file array is accepted. + # Accidentally, if "git diff" fails, we still proceed with empty array. +} +logfile="copyright.log" +truncate --size 0 "${logfile}" || die "truncate failed" +# Temporary +x so big changes do not spam. +set +x +for filename in "${file_array[@]}"; do + if ! fgrep -q "Copyright" "${filename}"; then + echo "No copyright found in file: ${filename}" >> "${logfile}" + elif ! fgrep "Copyright" "${filename}" | fgrep -q "${year}"; then + echo "No year ${year} copyright found in: ${filename}" >> "${logfile}" + fi +done +set -x +if [ -s "${logfile}" ]; then + warn "Copyright violations detected." + # TODO: Disable when output size does more harm than good. + cat "${logfile}" >&2 + warn + warn "Copyright checker: FAIL" + exit 1 +fi +warn +warn "Copyright checker: PASS" |