aboutsummaryrefslogtreecommitdiffstats
path: root/resources/tools/testbed-setup/ansible/roles/cleanup
diff options
context:
space:
mode:
Diffstat (limited to 'resources/tools/testbed-setup/ansible/roles/cleanup')
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/files/reset_vppdevice.sh113
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_containers.yaml29
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_process.yaml32
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/main.yaml31
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/remove_package.yaml19
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/sut.yaml60
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml16
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/vpp_device.yaml15
8 files changed, 315 insertions, 0 deletions
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/files/reset_vppdevice.sh b/resources/tools/testbed-setup/ansible/roles/cleanup/files/reset_vppdevice.sh
new file mode 100644
index 0000000000..ede2db1273
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/files/reset_vppdevice.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+function die () {
+ # Print the message to standard error end exit with error code specified
+ # by the second argument.
+ #
+ # Hardcoded values:
+ # - The default error message.
+ # Arguments:
+ # - ${1} - The whole error message, be sure to quote. Optional
+ # - ${2} - the code to exit with, default: 1.
+
+ set +eu
+ warn "${1:-Unspecified run-time error occurred!}"
+ exit "${2:-1}"
+}
+
+
+function set_eligibility_off {
+ # Set Nomad eligibility to ineligible for scheduling. Fail otherwise.
+
+ set -euo pipefail
+
+ node_id="$(nomad node status | grep $(hostname) | cut -d ' ' -f 1)" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+
+ if [[ "${node_status}" != *"ineligible"* ]]; then
+ nomad node eligibility -disable "${node_id}" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+ if [[ "${node_status}" != *"ineligible"* ]]; then
+ die "Set eligibility off failed!"
+ fi
+ fi
+}
+
+
+function set_eligibility_on {
+ # Set Nomad eligibility to eligible for scheduling. Fail otherwise.
+
+ set -euo pipefail
+
+ node_id="$(nomad node status | grep $(hostname) | cut -d ' ' -f 1)" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+
+ if [[ "${node_status}" == *"ineligible"* ]]; then
+ nomad node eligibility -enable "${node_id}" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+ if [[ "${node_status}" == *"ineligible"* ]]; then
+ die "Set eligibility on failed!"
+ fi
+ fi
+}
+
+
+function restart_vfs_service {
+ # Stop and start VF serice. This will reinitialize VFs and driver mappings.
+
+ set -euo pipefail
+
+ warn "Restarting VFs service (this may take few minutes)..."
+ sudo service csit-initialize-vfs stop || die "Failed to stop VFs service!"
+ sudo service csit-initialize-vfs start || die "Failed to start VFs service!"
+}
+
+
+function wait_for_pending_containers {
+ # Wait in loop for defined amount of time for pending containers to
+ # gracefully quit them. If parameter force is specified. Force kill them.
+
+ # Arguments:
+ # - ${@} - Script parameters.
+
+ set -euo pipefail
+
+ retries=60
+ wait_time=60
+ containers=(docker ps --quiet --filter name=csit*)
+
+ for i in $(seq 1 ${retries}); do
+ mapfile -t pending_containers < <( ${containers[@]} ) || die
+ warn "Waiting for pending containers [${pending_containers[@]}] ..."
+ if [ ${#pending_containers[@]} -eq 0 ]; then
+ break
+ fi
+ sleep "${wait_time}" || die
+ done
+ if [ ${#pending_containers[@]} -ne 0 ]; then
+ if [[ "${1-}" == "force" ]]; then
+ warn "Force killing [${pending_containers[@]}] ..."
+ docker rm --force ${pending_containers[@]} || die
+ else
+ die "Still few containers running!"
+ fi
+ fi
+}
+
+
+function warn () {
+ # Print the message to standard error.
+ #
+ # Arguments:
+ # - ${@} - The text of the message.
+
+ echo "$@" >&2
+}
+
+
+set_eligibility_off || die
+wait_for_pending_containers "${@}" || die
+restart_vfs_service || die
+set_eligibility_on || die
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_containers.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_containers.yaml
new file mode 100644
index 0000000000..1cd64351a8
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_containers.yaml
@@ -0,0 +1,29 @@
+---
+# file: roles/cleanup/tasks/kill_containers.yaml
+
+- name: Kill containers
+ block:
+ - name: Kill container - Get running Docker containers
+ shell: "docker ps -aq"
+ register: running_containers
+ changed_when: no
+ tags: kill-containers
+
+ - name: Kill container - Remove all Docker containers
+ shell: "docker rm --force {{ item }}"
+ with_items: "{{ running_containers.stdout_lines }}"
+ tags: kill-containers
+
+ - name: Kill container - Get running LXC containers
+ shell: "lxc-ls"
+ register: running_containers
+ changed_when: no
+ tags: kill-containers
+
+ - name: Kill container - Remove all LXC containers
+ shell: "lxc-destroy --force -n {{ item }}"
+ with_items: "{{ running_containers.stdout_lines }}"
+ tags: kill-containers
+ rescue:
+ - fail:
+ msg: "Kill containers failed!"
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_process.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_process.yaml
new file mode 100644
index 0000000000..a593fc7616
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_process.yaml
@@ -0,0 +1,32 @@
+---
+# file: roles/cleanup/tasks/kill_process.yaml
+
+- name: Kill process - {{ process }}
+ block:
+ - name: Kill process - Get pid of {{ process }}
+ shell: "ps -ef | grep -v grep | grep -w {{ process }} | awk '{print $2}'"
+ when: >
+ process is defined and process != ""
+ register: running_processes
+ tags: kill-process
+
+ - name: Kill process - Safe kill {{ process }}
+ shell: "kill {{ item }}"
+ with_items: "{{ running_processes.stdout_lines }}"
+ tags: kill-process
+
+ - wait_for:
+ path: "/proc/{{ item }}/status"
+ state: absent
+ with_items: "{{ running_processes.stdout_lines }}"
+ ignore_errors: yes
+ register: killed_processes
+ tags: kill-process
+
+ - name: Kill process - Force kill {{ process }}
+ shell: "kill -9 {{ item }}"
+ with_items: "{{ killed_processes.results | select('failed') | map(attribute='item') | list }}"
+ tags: kill-process
+ rescue:
+ - fail:
+ msg: "Kill process {{ process }} failed!"
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/main.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/main.yaml
new file mode 100644
index 0000000000..64a55c4672
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/main.yaml
@@ -0,0 +1,31 @@
+---
+# file: roles/cleanup/tasks/main.yaml
+# purpose: Structured per server cleanup tasks.
+# - main:
+# - tg:
+# - Run tasks on TG servers only.
+# - Cleanup processes (T-Rex).
+# - sut:
+# - Run tasks on SUT servers only.
+# - Cleanup file leftovers (logs).
+# - Cleanup packages (VPP, Honeycomb).
+# - Cleanup processes (qemu, l3fwd, testpmd, docker, kubernetes)
+# - Cleanup interfaces.
+# - vpp_device
+# - Run tasks on vpp_device servers only.
+# - Reset SRIOV
+
+- name: tg specific
+ include_tasks: tg.yaml
+ when: "'tg' in group_names"
+ tags: cleanup
+
+- name: sut specific
+ include_tasks: sut.yaml
+ when: "'sut' in group_names"
+ tags: cleanup
+
+- name: vpp_device specific
+ include_tasks: vpp_device.yaml
+ when: "'vpp_device' in group_names"
+ tags: cleanup
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/remove_package.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/remove_package.yaml
new file mode 100644
index 0000000000..0c8816fe29
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/remove_package.yaml
@@ -0,0 +1,19 @@
+---
+# file: roles/cleanup/tasks/remove_package.yaml
+
+- name: Remove package - Fix corrupted apt
+ shell: 'dpkg --configure -a'
+ when: >
+ ansible_distribution == 'Ubuntu'
+ tags: remove-package
+
+- name: Remove package - {{ package }}
+ apt:
+ name: '{{ package }}'
+ force: yes
+ purge: yes
+ state: absent
+ failed_when: no
+ when: >
+ ansible_distribution == 'Ubuntu'
+ tags: remove-package
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/sut.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/sut.yaml
new file mode 100644
index 0000000000..c24b5e6a7f
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/sut.yaml
@@ -0,0 +1,60 @@
+---
+# file: roles/cleanup/tasks/sut.yaml
+
+- name: Host cleanup
+ block:
+ - name: Kill processes - qemu
+ import_tasks: kill_process.yaml
+ vars:
+ process: "qemu"
+ tags: kill-process
+
+ - name: Kill processes - l3fwd
+ import_tasks: kill_process.yaml
+ vars:
+ process: "l3fwd"
+ tags: kill-process
+
+ - name: Kill processes - testpmd
+ import_tasks: kill_process.yaml
+ vars:
+ process: "testpmd"
+ tags: kill-process
+
+ - name: Kill processes - iperf3
+ import_tasks: kill_process.yaml
+ vars:
+ process: "iperf3"
+ tags: kill-process
+
+ - name: Kill processes - vpp_echo
+ import_tasks: kill_process.yaml
+ vars:
+ process: "vpp_echo"
+ tags: kill-process
+
+ - name: Remove file or dir - Core zip file
+ file:
+ state: absent
+ path: "/tmp/*tar.lzo.lrz.xz*"
+ tags: remove-file-dir
+
+ - name: Remove file or dir - Core dump file
+ file:
+ state: absent
+ path: "/tmp/*core*"
+ tags: remove-file-dir
+
+ - name: Kill containers - Remove all containers
+ import_tasks: kill_containers.yaml
+ tags: kill-containers
+
+ - name: Kubernetes - Reset
+ raw: 'kubeadm reset --force'
+ tags: kill-kubernetes
+
+ - name: Remove packages - Remove VPP
+ import_tasks: remove_package.yaml
+ vars:
+ package: "*vpp*"
+ tags: remove-package
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml
new file mode 100644
index 0000000000..a026ec2acd
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml
@@ -0,0 +1,16 @@
+---
+# file: roles/cleanup/tasks/tg.yaml
+
+- name: Kill processes - TRex
+ import_tasks: kill_process.yaml
+ vars:
+ process: "_t-rex"
+ when: docker_tg is undefined
+ tags: kill-process
+
+- name: Kill processes - WRK
+ import_tasks: kill_process.yaml
+ vars:
+ process: "wrk"
+ tags: kill-process
+ when: docker_tg is undefined \ No newline at end of file
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/vpp_device.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/vpp_device.yaml
new file mode 100644
index 0000000000..5b7713a554
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/vpp_device.yaml
@@ -0,0 +1,15 @@
+---
+# file: roles/cleanup/tasks/vpp_device.yaml
+
+- name: Reset vpp_device binary
+ template:
+ src: 'files/reset_vppdevice.sh'
+ dest: '/usr/local/bin'
+ owner: 'root'
+ group: 'root'
+ mode: '644'
+ tags: reset-sriov
+
+- name: Reset vpp_device
+ raw: 'reset_vppdevice.sh --force'
+ tags: reset-sriov