aboutsummaryrefslogtreecommitdiffstats
path: root/fdio.infra.ansible/roles/cleanup
diff options
context:
space:
mode:
Diffstat (limited to 'fdio.infra.ansible/roles/cleanup')
-rw-r--r--fdio.infra.ansible/roles/cleanup/files/reset_vppdevice.sh113
-rw-r--r--fdio.infra.ansible/roles/cleanup/tasks/clean_images.yaml36
-rw-r--r--fdio.infra.ansible/roles/cleanup/tasks/kill_containers.yaml42
-rw-r--r--fdio.infra.ansible/roles/cleanup/tasks/kill_process.yaml37
-rw-r--r--fdio.infra.ansible/roles/cleanup/tasks/main.yaml43
-rw-r--r--fdio.infra.ansible/roles/cleanup/tasks/nomad.yaml22
-rw-r--r--fdio.infra.ansible/roles/cleanup/tasks/remove_package.yaml21
-rw-r--r--fdio.infra.ansible/roles/cleanup/tasks/sut.yaml83
-rw-r--r--fdio.infra.ansible/roles/cleanup/tasks/tg.yaml13
-rw-r--r--fdio.infra.ansible/roles/cleanup/tasks/vpp_device.yaml32
10 files changed, 442 insertions, 0 deletions
diff --git a/fdio.infra.ansible/roles/cleanup/files/reset_vppdevice.sh b/fdio.infra.ansible/roles/cleanup/files/reset_vppdevice.sh
new file mode 100644
index 0000000000..ede2db1273
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/files/reset_vppdevice.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+function die () {
+ # Print the message to standard error end exit with error code specified
+ # by the second argument.
+ #
+ # Hardcoded values:
+ # - The default error message.
+ # Arguments:
+ # - ${1} - The whole error message, be sure to quote. Optional
+ # - ${2} - the code to exit with, default: 1.
+
+ set +eu
+ warn "${1:-Unspecified run-time error occurred!}"
+ exit "${2:-1}"
+}
+
+
+function set_eligibility_off {
+ # Set Nomad eligibility to ineligible for scheduling. Fail otherwise.
+
+ set -euo pipefail
+
+ node_id="$(nomad node status | grep $(hostname) | cut -d ' ' -f 1)" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+
+ if [[ "${node_status}" != *"ineligible"* ]]; then
+ nomad node eligibility -disable "${node_id}" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+ if [[ "${node_status}" != *"ineligible"* ]]; then
+ die "Set eligibility off failed!"
+ fi
+ fi
+}
+
+
+function set_eligibility_on {
+ # Set Nomad eligibility to eligible for scheduling. Fail otherwise.
+
+ set -euo pipefail
+
+ node_id="$(nomad node status | grep $(hostname) | cut -d ' ' -f 1)" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+
+ if [[ "${node_status}" == *"ineligible"* ]]; then
+ nomad node eligibility -enable "${node_id}" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+ if [[ "${node_status}" == *"ineligible"* ]]; then
+ die "Set eligibility on failed!"
+ fi
+ fi
+}
+
+
+function restart_vfs_service {
+ # Stop and start VF serice. This will reinitialize VFs and driver mappings.
+
+ set -euo pipefail
+
+ warn "Restarting VFs service (this may take few minutes)..."
+ sudo service csit-initialize-vfs stop || die "Failed to stop VFs service!"
+ sudo service csit-initialize-vfs start || die "Failed to start VFs service!"
+}
+
+
+function wait_for_pending_containers {
+ # Wait in loop for defined amount of time for pending containers to
+ # gracefully quit them. If parameter force is specified. Force kill them.
+
+ # Arguments:
+ # - ${@} - Script parameters.
+
+ set -euo pipefail
+
+ retries=60
+ wait_time=60
+ containers=(docker ps --quiet --filter name=csit*)
+
+ for i in $(seq 1 ${retries}); do
+ mapfile -t pending_containers < <( ${containers[@]} ) || die
+ warn "Waiting for pending containers [${pending_containers[@]}] ..."
+ if [ ${#pending_containers[@]} -eq 0 ]; then
+ break
+ fi
+ sleep "${wait_time}" || die
+ done
+ if [ ${#pending_containers[@]} -ne 0 ]; then
+ if [[ "${1-}" == "force" ]]; then
+ warn "Force killing [${pending_containers[@]}] ..."
+ docker rm --force ${pending_containers[@]} || die
+ else
+ die "Still few containers running!"
+ fi
+ fi
+}
+
+
+function warn () {
+ # Print the message to standard error.
+ #
+ # Arguments:
+ # - ${@} - The text of the message.
+
+ echo "$@" >&2
+}
+
+
+set_eligibility_off || die
+wait_for_pending_containers "${@}" || die
+restart_vfs_service || die
+set_eligibility_on || die
diff --git a/fdio.infra.ansible/roles/cleanup/tasks/clean_images.yaml b/fdio.infra.ansible/roles/cleanup/tasks/clean_images.yaml
new file mode 100644
index 0000000000..e030acbff2
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/tasks/clean_images.yaml
@@ -0,0 +1,36 @@
+---
+# file: roles/cleanup/tasks/clean_images.yaml
+
+- name: Clean Docker Images
+ block:
+ - name: Clean Images - Prefetch Docker Images
+ cron:
+ name: "Prefetch docker image {{ item }}"
+ minute: "10"
+ hour: "7"
+ job: "/usr/bin/docker pull {{ item }}"
+ loop:
+ "{{ images_to_prefetch_by_arch[ansible_machine] }}"
+ tags:
+ - prefetch-docker-images
+
+ - name: Clean Images - Remove Dangling Docker Images
+ cron:
+ name: "Remove dangling docker images"
+ minute: "10"
+ hour: "5"
+ weekday: "7"
+ job: "/usr/bin/docker rmi $(/usr/bin/docker images --filter 'dangling=true' -q)"
+ tags:
+ - remove-docker-images-dangling
+
+ # TODO: Disabled until all images will be in registry
+ #- name: Clean Images - Prune Docker Images
+ # cron:
+ # name: "Prune docker images"
+ # minute: "10"
+ # hour: "6"
+ # weekday: 7
+ # job: "/usr/bin/docker image prune --all --force"
+ # tags:
+ # - prune-docker-images \ No newline at end of file
diff --git a/fdio.infra.ansible/roles/cleanup/tasks/kill_containers.yaml b/fdio.infra.ansible/roles/cleanup/tasks/kill_containers.yaml
new file mode 100644
index 0000000000..25fd48e420
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/tasks/kill_containers.yaml
@@ -0,0 +1,42 @@
+---
+# file: roles/cleanup/tasks/kill_containers.yaml
+
+- name: Kill Docker Containers
+ block:
+ - name: Kill Container - Get Running Docker Containers
+ shell: "docker ps -aq"
+ register: running_containers
+ changed_when: no
+ tags:
+ - kill-containers
+
+ - name: Kill Container - Remove All Docker Containers
+ shell: "docker rm --force {{ item }}"
+ with_items: "{{ running_containers.stdout_lines }}"
+ tags:
+ - kill-containers
+
+ rescue:
+ - name: Restart Docker Daemon
+ systemd:
+ name: "docker"
+ state: "restarted"
+
+- name: Kill LXC Containers
+ block:
+ - name: Kill Container - Get Running LXC Containers
+ shell: "lxc-ls"
+ register: running_containers
+ changed_when: no
+ tags:
+ - kill-containers
+
+ - name: Kill Container - Remove All LXC Containers
+ shell: "lxc-destroy --force -n {{ item }}"
+ with_items: "{{ running_containers.stdout_lines }}"
+ tags:
+ - kill-containers
+
+ rescue:
+ - fail:
+ msg: "Kill LXC containers failed!" \ No newline at end of file
diff --git a/fdio.infra.ansible/roles/cleanup/tasks/kill_process.yaml b/fdio.infra.ansible/roles/cleanup/tasks/kill_process.yaml
new file mode 100644
index 0000000000..c7cee37485
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/tasks/kill_process.yaml
@@ -0,0 +1,37 @@
+---
+# file: roles/cleanup/tasks/kill_process.yaml
+
+- name: Kill Process - {{ process }}
+ block:
+ - name: Get PID Of {{ process }}
+ shell: "ps -ef | grep -v grep | grep -w {{ process }} | awk '{print $2}'"
+ when:
+ - process is defined and process != ""
+ register: running_processes
+ tags:
+ - kill-process
+
+ - name: Safe Kill {{ process }}
+ shell: "kill {{ item }}"
+ with_items: "{{ running_processes.stdout_lines }}"
+ tags:
+ - kill-process
+
+ - wait_for:
+ path: "/proc/{{ item }}/status"
+ state: "absent"
+ with_items: "{{ running_processes.stdout_lines }}"
+ ignore_errors: yes
+ register: killed_processes
+ tags:
+ - kill-process
+
+ - name: Kill Process - Force Kill {{ process }}
+ shell: "kill -9 {{ item }}"
+ with_items: "{{ killed_processes.results | select('failed') | map(attribute='item') | list }}"
+ tags:
+ - kill-process
+
+ rescue:
+ - fail:
+ msg: "Kill process {{ process }} failed!"
diff --git a/fdio.infra.ansible/roles/cleanup/tasks/main.yaml b/fdio.infra.ansible/roles/cleanup/tasks/main.yaml
new file mode 100644
index 0000000000..eeda0139b3
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/tasks/main.yaml
@@ -0,0 +1,43 @@
+---
+# file: roles/cleanup/tasks/main.yaml
+# purpose: Structured per server cleanup tasks.
+# - main:
+# - tg:
+# - Run tasks on TG servers only.
+# - Cleanup processes (T-Rex).
+# - sut:
+# - Run tasks on SUT servers only.
+# - Cleanup file leftovers (logs).
+# - Cleanup packages (VPP, Honeycomb).
+# - Cleanup processes (qemu, l3fwd, testpmd, docker, kubernetes)
+# - Cleanup interfaces.
+# - vpp_device
+# - Run tasks on vpp_device servers only.
+# - Reset SRIOV
+# - Docker image cleanup
+# - nomad
+# - Docker image cleanup
+
+- name: tg specific
+ include_tasks: tg.yaml
+ when: "'tg' in group_names"
+ tags:
+ - cleanup
+
+- name: sut specific
+ include_tasks: sut.yaml
+ when: "'sut' in group_names"
+ tags:
+ - cleanup
+
+- name: vpp_device specific
+ include_tasks: vpp_device.yaml
+ when: "'vpp_device' in group_names"
+ tags:
+ - cleanup
+
+- name: nomad specific
+ include_tasks: nomad.yaml
+ when: "'nomad' in group_names"
+ tags:
+ - cleanup
diff --git a/fdio.infra.ansible/roles/cleanup/tasks/nomad.yaml b/fdio.infra.ansible/roles/cleanup/tasks/nomad.yaml
new file mode 100644
index 0000000000..3c5bf6462d
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/tasks/nomad.yaml
@@ -0,0 +1,22 @@
+---
+# file: roles/cleanup/tasks/nomad.yaml
+
+- name: Host Cleanup
+ block:
+ - name: Clean Images
+ import_tasks: clean_images.yaml
+ vars:
+ images_to_prefetch_by_arch:
+ aarch64:
+ - "fdiotools/builder-ubuntu2004:prod-aarch64"
+ - "fdiotools/builder-ubuntu1804:prod-aarch64"
+ - "fdiotools/builder-centos8:prod-aarch64"
+ x86_64:
+ - "fdiotools/builder-ubuntu2004:prod-x86_64"
+ - "fdiotools/builder-ubuntu1804:prod-x86_64"
+ - "fdiotools/builder-debian10:prod-x86_64"
+ - "fdiotools/builder-debian9:prod-x86_64"
+ - "fdiotools/builder-centos8:prod-x86_64"
+ - "fdiotools/builder-centos7:prod-x86_64"
+ tags:
+ - clean-images \ No newline at end of file
diff --git a/fdio.infra.ansible/roles/cleanup/tasks/remove_package.yaml b/fdio.infra.ansible/roles/cleanup/tasks/remove_package.yaml
new file mode 100644
index 0000000000..302b43c99a
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/tasks/remove_package.yaml
@@ -0,0 +1,21 @@
+---
+# file: roles/cleanup/tasks/remove_package.yaml
+
+- name: Remove Package - Fix Corrupted APT
+ shell: "dpkg --configure -a"
+ when:
+ - ansible_distribution == 'Ubuntu'
+ tags:
+ - remove-package
+
+- name: Remove Package - {{ package }}
+ apt:
+ name: "{{ package }}"
+ force: yes
+ purge: yes
+ state: "absent"
+ failed_when: no
+ when:
+ - ansible_distribution == 'Ubuntu'
+ tags:
+ - remove-package
diff --git a/fdio.infra.ansible/roles/cleanup/tasks/sut.yaml b/fdio.infra.ansible/roles/cleanup/tasks/sut.yaml
new file mode 100644
index 0000000000..d80a35b1cb
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/tasks/sut.yaml
@@ -0,0 +1,83 @@
+---
+# file: roles/cleanup/tasks/sut.yaml
+
+- name: Host Cleanup
+ block:
+ - name: Kill Processes - Qemu
+ import_tasks: kill_process.yaml
+ vars:
+ process: "qemu"
+ tags:
+ - kill-process
+
+ - name: Kill Processes - L3fwd
+ import_tasks: kill_process.yaml
+ vars:
+ process: "l3fwd"
+ tags:
+ - kill-process
+
+ - name: Kill Processes - Testpmd
+ import_tasks: kill_process.yaml
+ vars:
+ process: "testpmd"
+ tags:
+ - kill-process
+
+ - name: Kill Processes - iPerf3
+ import_tasks: kill_process.yaml
+ vars:
+ process: "iperf3"
+ tags:
+ - kill-process
+
+ - name: Kill Processes - vpp_echo
+ import_tasks: kill_process.yaml
+ vars:
+ process: "vpp_echo"
+ tags:
+ - kill-process
+
+ - name: Find File Or Dir - Core Zip File
+ find:
+ paths: "/tmp/"
+ patterns: "*tar.lzo.lrz.xz*"
+ register: files_to_delete
+ tags:
+ - remove-file-dir
+
+ - name: Remove File Or Dir - Core Zip File
+ file:
+ path: "{{ item.path }}"
+ state: absent
+ with_items: "{{ files_to_delete.files }}"
+ tags:
+ - remove-file-dir
+
+ - name: Find File Or Dir - Core Dump File
+ find:
+ paths: "/tmp/"
+ patterns: "*core*"
+ register: files_to_delete
+ tags:
+ - remove-file-dir
+
+ - name: Remove File Or Dir - Core Dump File
+ file:
+ path: "{{ item.path }}"
+ state: absent
+ with_items: "{{ files_to_delete.files }}"
+ tags:
+ - remove-file-dir
+
+ - name: Kill Containers - Remove All Containers
+ import_tasks: kill_containers.yaml
+ tags:
+ - kill-containers
+
+ - name: Remove Packages - Remove VPP
+ import_tasks: remove_package.yaml
+ vars:
+ package: "*vpp*"
+ tags:
+ - remove-package
diff --git a/fdio.infra.ansible/roles/cleanup/tasks/tg.yaml b/fdio.infra.ansible/roles/cleanup/tasks/tg.yaml
new file mode 100644
index 0000000000..fa2d2d2819
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/tasks/tg.yaml
@@ -0,0 +1,13 @@
+---
+# file: roles/cleanup/tasks/tg.yaml
+
+- name: Host Cleanup
+ block:
+ - name: Kill Processes - TRex
+ import_tasks: kill_process.yaml
+ vars:
+ process: "_t-rex"
+ when:
+ - docker_tg is undefined
+ tags:
+ - kill-process
diff --git a/fdio.infra.ansible/roles/cleanup/tasks/vpp_device.yaml b/fdio.infra.ansible/roles/cleanup/tasks/vpp_device.yaml
new file mode 100644
index 0000000000..41c4b29d37
--- /dev/null
+++ b/fdio.infra.ansible/roles/cleanup/tasks/vpp_device.yaml
@@ -0,0 +1,32 @@
+---
+# file: roles/cleanup/tasks/vpp_device.yaml
+
+- name: Host Cleanup
+ block:
+ - name: Reset vpp_device Binary
+ copy:
+ src: "files/reset_vppdevice.sh"
+ dest: "/usr/local/bin"
+ owner: "root"
+ group: "root"
+ mode: "744"
+ tags:
+ - reset-sriov
+
+ - name: Clean Images
+ import_tasks: clean_images.yaml
+ vars:
+ images_to_prefetch_by_arch:
+ aarch64:
+ - "fdiotools/builder-ubuntu2004:prod-aarch64"
+ - "fdiotools/builder-ubuntu1804:prod-aarch64"
+ - "fdiotools/builder-centos8:prod-aarch64"
+ x86_64:
+ - "fdiotools/builder-ubuntu2004:prod-x86_64"
+ - "fdiotools/builder-ubuntu1804:prod-x86_64"
+ - "fdiotools/builder-debian10:prod-x86_64"
+ - "fdiotools/builder-debian9:prod-x86_64"
+ - "fdiotools/builder-centos8:prod-x86_64"
+ - "fdiotools/builder-centos7:prod-x86_64"
+ tags:
+ - clean-images \ No newline at end of file