diff options
Diffstat (limited to 'fdio.infra.ansible/roles/cleanup')
10 files changed, 436 insertions, 0 deletions
diff --git a/fdio.infra.ansible/roles/cleanup/files/reset_vppdevice.sh b/fdio.infra.ansible/roles/cleanup/files/reset_vppdevice.sh new file mode 100644 index 0000000000..ede2db1273 --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/files/reset_vppdevice.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash + +set -euo pipefail + +function die () { + # Print the message to standard error end exit with error code specified + # by the second argument. + # + # Hardcoded values: + # - The default error message. + # Arguments: + # - ${1} - The whole error message, be sure to quote. Optional + # - ${2} - the code to exit with, default: 1. + + set +eu + warn "${1:-Unspecified run-time error occurred!}" + exit "${2:-1}" +} + + +function set_eligibility_off { + # Set Nomad eligibility to ineligible for scheduling. Fail otherwise. + + set -euo pipefail + + node_id="$(nomad node status | grep $(hostname) | cut -d ' ' -f 1)" || die + node_status="$(nomad node status | grep $(hostname))" || die + + if [[ "${node_status}" != *"ineligible"* ]]; then + nomad node eligibility -disable "${node_id}" || die + node_status="$(nomad node status | grep $(hostname))" || die + if [[ "${node_status}" != *"ineligible"* ]]; then + die "Set eligibility off failed!" + fi + fi +} + + +function set_eligibility_on { + # Set Nomad eligibility to eligible for scheduling. Fail otherwise. + + set -euo pipefail + + node_id="$(nomad node status | grep $(hostname) | cut -d ' ' -f 1)" || die + node_status="$(nomad node status | grep $(hostname))" || die + + if [[ "${node_status}" == *"ineligible"* ]]; then + nomad node eligibility -enable "${node_id}" || die + node_status="$(nomad node status | grep $(hostname))" || die + if [[ "${node_status}" == *"ineligible"* ]]; then + die "Set eligibility on failed!" + fi + fi +} + + +function restart_vfs_service { + # Stop and start VF serice. This will reinitialize VFs and driver mappings. + + set -euo pipefail + + warn "Restarting VFs service (this may take few minutes)..." + sudo service csit-initialize-vfs stop || die "Failed to stop VFs service!" + sudo service csit-initialize-vfs start || die "Failed to start VFs service!" +} + + +function wait_for_pending_containers { + # Wait in loop for defined amount of time for pending containers to + # gracefully quit them. If parameter force is specified. Force kill them. + + # Arguments: + # - ${@} - Script parameters. + + set -euo pipefail + + retries=60 + wait_time=60 + containers=(docker ps --quiet --filter name=csit*) + + for i in $(seq 1 ${retries}); do + mapfile -t pending_containers < <( ${containers[@]} ) || die + warn "Waiting for pending containers [${pending_containers[@]}] ..." + if [ ${#pending_containers[@]} -eq 0 ]; then + break + fi + sleep "${wait_time}" || die + done + if [ ${#pending_containers[@]} -ne 0 ]; then + if [[ "${1-}" == "force" ]]; then + warn "Force killing [${pending_containers[@]}] ..." + docker rm --force ${pending_containers[@]} || die + else + die "Still few containers running!" + fi + fi +} + + +function warn () { + # Print the message to standard error. + # + # Arguments: + # - ${@} - The text of the message. + + echo "$@" >&2 +} + + +set_eligibility_off || die +wait_for_pending_containers "${@}" || die +restart_vfs_service || die +set_eligibility_on || die diff --git a/fdio.infra.ansible/roles/cleanup/tasks/clean_images.yaml b/fdio.infra.ansible/roles/cleanup/tasks/clean_images.yaml new file mode 100644 index 0000000000..76704ab50d --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/tasks/clean_images.yaml @@ -0,0 +1,36 @@ +--- +# file: tasks/clean_images.yaml + +- name: Clean Docker Images + block: + - name: Clean Images - Prefetch Docker Images + ansible.builtin.cron: + name: "Prefetch docker image {{ item }}" + minute: "10" + hour: "7" + job: "/usr/bin/docker pull {{ item }}" + loop: + "{{ images_to_prefetch_by_arch[ansible_machine] }}" + tags: + - prefetch-docker-images + + - name: Clean Images - Remove Dangling Docker Images + ansible.builtin.cron: + name: "Remove dangling docker images" + minute: "10" + hour: "5" + weekday: "7" + job: "/usr/bin/docker rmi $(/usr/bin/docker images --filter 'dangling=true' -q)" + tags: + - remove-docker-images-dangling + +# TODO: Disabled until all images will be in registry +# - name: Clean Images - Prune Docker Images +# cron: +# name: "Prune docker images" +# minute: "10" +# hour: "6" +# weekday: 7 +# job: "/usr/bin/docker image prune --all --force" +# tags: +# - prune-docker-images diff --git a/fdio.infra.ansible/roles/cleanup/tasks/kill_containers.yaml b/fdio.infra.ansible/roles/cleanup/tasks/kill_containers.yaml new file mode 100644 index 0000000000..dc739eb954 --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/tasks/kill_containers.yaml @@ -0,0 +1,42 @@ +--- +# file: tasks/kill_containers.yaml + +- name: Kill Docker Containers + block: + - name: Get Running Docker Containers + ansible.builtin.shell: "docker ps -a --filter name=DUT -q" + register: running_containers + changed_when: false + tags: + - kill-containers + + - name: Remove All Docker Containers + ansible.builtin.shell: "docker rm --force {{ item }}" + with_items: "{{ running_containers.stdout_lines }}" + tags: + - kill-containers + + rescue: + - name: Restart Docker Daemon + ansible.builtin.systemd: + name: "docker" + state: "restarted" + +- name: Kill LXC Containers + block: + - name: Get Running LXC Containers + ansible.builtin.shell: "lxc-ls" + register: running_containers + changed_when: false + tags: + - kill-containers + + - name: Remove All LXC Containers + ansible.builtin.shell: "lxc-destroy --force -n {{ item }}" + with_items: "{{ running_containers.stdout_lines }}" + tags: + - kill-containers + + rescue: + - fail: + msg: "Kill LXC containers failed!" diff --git a/fdio.infra.ansible/roles/cleanup/tasks/kill_process.yaml b/fdio.infra.ansible/roles/cleanup/tasks/kill_process.yaml new file mode 100644 index 0000000000..9ab98a8e57 --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/tasks/kill_process.yaml @@ -0,0 +1,38 @@ +--- +# file: tasks/kill_process.yaml + +- name: Kill Process - {{ process }} + block: + - name: Get PID Of {{ process }} + ansible.builtin.shell: "ps -ef | grep -v grep | grep -w {{ process }} | awk '{print $2}'" + when: + - process is defined and process != "" + register: running_processes + tags: + - kill-process + + - name: Safe Kill {{ process }} + ansible.builtin.shell: "kill {{ item }}" + with_items: "{{ running_processes.stdout_lines }}" + ignore_errors: true + tags: + - kill-process + + - wait_for: + path: "/proc/{{ item }}/status" + state: "absent" + with_items: "{{ running_processes.stdout_lines }}" + ignore_errors: true + register: killed_processes + tags: + - kill-process + + - name: Kill Process - Force Kill {{ process }} + ansible.builtin.shell: "kill -9 {{ item }}" + with_items: "{{ killed_processes.results | select('failed') | map(attribute='item') | list }}" + tags: + - kill-process + + rescue: + - fail: + msg: "Kill process {{ process }} failed!" diff --git a/fdio.infra.ansible/roles/cleanup/tasks/main.yaml b/fdio.infra.ansible/roles/cleanup/tasks/main.yaml new file mode 100644 index 0000000000..c97b9c5d7e --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/tasks/main.yaml @@ -0,0 +1,26 @@ +--- +# file: tasks/main.yaml + +- name: tg specific + include_tasks: tg.yaml + when: "'tg' in group_names" + tags: + - cleanup + +- name: sut specific + include_tasks: sut.yaml + when: "'sut' in group_names" + tags: + - cleanup + +- name: vpp_device specific + include_tasks: vpp_device.yaml + when: "'vpp_device' in group_names" + tags: + - cleanup + +- name: nomad specific + include_tasks: nomad.yaml + when: "'nomad' in group_names" + tags: + - cleanup diff --git a/fdio.infra.ansible/roles/cleanup/tasks/nomad.yaml b/fdio.infra.ansible/roles/cleanup/tasks/nomad.yaml new file mode 100644 index 0000000000..086a4eff7d --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/tasks/nomad.yaml @@ -0,0 +1,18 @@ +--- +# file: tasks/nomad.yaml + +- name: Host Cleanup + block: + - name: Clean Images + import_tasks: clean_images.yaml + vars: + images_to_prefetch_by_arch: + aarch64: + - "fdiotools/builder-ubuntu2204:prod-aarch64" + - "fdiotools/builder-ubuntu2004:prod-aarch64" + x86_64: + - "fdiotools/builder-ubuntu2204:prod-x86_64" + - "fdiotools/builder-ubuntu2004:prod-x86_64" + - "fdiotools/builder-debian11:prod-x86_64" + tags: + - clean-images diff --git a/fdio.infra.ansible/roles/cleanup/tasks/remove_package.yaml b/fdio.infra.ansible/roles/cleanup/tasks/remove_package.yaml new file mode 100644 index 0000000000..652729bc30 --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/tasks/remove_package.yaml @@ -0,0 +1,21 @@ +--- +# file: tasks/remove_package.yaml + +- name: Fix Corrupted APT + ansible.builtin.shell: "dpkg --configure -a" + when: + - ansible_distribution == 'Ubuntu' + tags: + - remove-package + +- name: Remove Package - {{ package }} + ansible.builtin.apt: + name: "{{ package }}" + force: true + purge: true + state: "absent" + failed_when: false + when: + - ansible_distribution == 'Ubuntu' + tags: + - remove-package
\ No newline at end of file diff --git a/fdio.infra.ansible/roles/cleanup/tasks/sut.yaml b/fdio.infra.ansible/roles/cleanup/tasks/sut.yaml new file mode 100644 index 0000000000..22bf596369 --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/tasks/sut.yaml @@ -0,0 +1,97 @@ +--- +# file: tasks/sut.yaml + +- name: Host Cleanup + block: + - name: Kill Processes - Qemu + import_tasks: kill_process.yaml + vars: + process: "qemu" + tags: + - kill-process + + - name: Kill Processes - L3fwd + import_tasks: kill_process.yaml + vars: + process: "l3fwd" + tags: + - kill-process + + - name: Kill Processes - Testpmd + import_tasks: kill_process.yaml + vars: + process: "testpmd" + tags: + - kill-process + + - name: Kill Processes - iPerf3 + import_tasks: kill_process.yaml + vars: + process: "iperf3" + tags: + - kill-process + + - name: Kill Processes - nohup + import_tasks: kill_process.yaml + vars: + process: "nohup" + tags: + - kill-process + + - name: Kill Processes - vpp + import_tasks: kill_process.yaml + vars: + process: "vpp" + tags: + - kill-process + + - name: Kill Processes - vpp_echo + import_tasks: kill_process.yaml + vars: + process: "vpp_echo" + tags: + - kill-process + + - name: Find File Or Dir - Core Zip File + ansible.builtin.find: + paths: "/tmp/" + patterns: "*tar.lzo.lrz.xz*" + register: files_to_delete + tags: + - remove-file-dir + + - name: Remove File Or Dir - Core Zip File + ansible.builtin.file: + path: "{{ item.path }}" + state: absent + with_items: "{{ files_to_delete.files }}" + tags: + - remove-file-dir + + - name: Find File Or Dir - Core Dump File + ansible.builtin.find: + paths: "/tmp/" + patterns: "*core*" + register: files_to_delete + tags: + - remove-file-dir + + - name: Remove File Or Dir - Core Dump File + ansible.builtin.file: + path: "{{ item.path }}" + state: absent + with_items: "{{ files_to_delete.files }}" + tags: + - remove-file-dir + + - name: Kill Containers - Remove All Containers + import_tasks: kill_containers.yaml + tags: + - kill-containers + + - name: Remove Packages - Remove VPP + import_tasks: remove_package.yaml + vars: + package: "*vpp*" + tags: + - remove-package diff --git a/fdio.infra.ansible/roles/cleanup/tasks/tg.yaml b/fdio.infra.ansible/roles/cleanup/tasks/tg.yaml new file mode 100644 index 0000000000..8c0162df2c --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/tasks/tg.yaml @@ -0,0 +1,13 @@ +--- +# file: tasks/tg.yaml + +- name: Host Cleanup + block: + - name: Kill Processes - TRex + import_tasks: kill_process.yaml + vars: + process: "_t-rex" + when: + - docker_tg is undefined + tags: + - kill-process diff --git a/fdio.infra.ansible/roles/cleanup/tasks/vpp_device.yaml b/fdio.infra.ansible/roles/cleanup/tasks/vpp_device.yaml new file mode 100644 index 0000000000..c97fa0cde5 --- /dev/null +++ b/fdio.infra.ansible/roles/cleanup/tasks/vpp_device.yaml @@ -0,0 +1,32 @@ +--- +# file: tasks/vpp_device.yaml + +- name: Host Cleanup + block: + - name: Reset vpp_device Binary + ansible.builtin.copy: + src: "files/reset_vppdevice.sh" + dest: "/usr/local/bin" + owner: "root" + group: "root" + mode: "744" + tags: + - reset-sriov + + - name: Clean Images + import_tasks: clean_images.yaml + vars: + images_to_prefetch_by_arch: + aarch64: + - "fdiotools/builder-ubuntu2004:prod-aarch64" + - "fdiotools/builder-ubuntu1804:prod-aarch64" + - "fdiotools/builder-centos8:prod-aarch64" + x86_64: + - "fdiotools/builder-ubuntu2004:prod-x86_64" + - "fdiotools/builder-ubuntu1804:prod-x86_64" + - "fdiotools/builder-debian10:prod-x86_64" + - "fdiotools/builder-debian9:prod-x86_64" + - "fdiotools/builder-centos8:prod-x86_64" + - "fdiotools/builder-centos7:prod-x86_64" + tags: + - clean-images |