aboutsummaryrefslogtreecommitdiffstats
path: root/resources/tools/testbed-setup/ansible/roles
diff options
context:
space:
mode:
authorPeter Mikus <pmikus@cisco.com>2019-10-10 15:31:28 +0000
committerPeter Mikus <pmikus@cisco.com>2019-11-05 07:23:56 +0000
commitd01411c3c4af6c724a3800c621804ea979818d6d (patch)
tree8c2745c25a575c7f637473fe98d3c39c1c8e2b28 /resources/tools/testbed-setup/ansible/roles
parent50d21f72ff61d06641954c22a8bc13c2468388f9 (diff)
Cleanup via Ansible
+ Remove dependency on topo_ scripts that depends on custom SSH() that depends on framework itself. This way the cleanup is independent of failure in our SSH libs. + Simple ansible command can do cleanup of a machine: ansible-playbook --inventory inventories/lf_inventory/hosts site.yaml \ --limit '10.32.8.18' --tags 'cleanup' + Add vpp_device reset and cleanup. + Remove historical scripts. - Still in testing beta phase. - Need to add SRIOV cleanup. Signed-off-by: Peter Mikus <pmikus@cisco.com> Change-Id: I68e23304c7ad01041f51263c328c6e8d9b555cb7
Diffstat (limited to 'resources/tools/testbed-setup/ansible/roles')
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/files/reset_vppdevice.sh113
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_containers.yaml28
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_process.yaml27
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/main.yaml31
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/remove_package.yaml31
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/sut.yaml52
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml14
-rw-r--r--resources/tools/testbed-setup/ansible/roles/cleanup/tasks/vpp_device.yaml15
8 files changed, 311 insertions, 0 deletions
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/files/reset_vppdevice.sh b/resources/tools/testbed-setup/ansible/roles/cleanup/files/reset_vppdevice.sh
new file mode 100644
index 0000000000..ede2db1273
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/files/reset_vppdevice.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+function die () {
+ # Print the message to standard error end exit with error code specified
+ # by the second argument.
+ #
+ # Hardcoded values:
+ # - The default error message.
+ # Arguments:
+ # - ${1} - The whole error message, be sure to quote. Optional
+ # - ${2} - the code to exit with, default: 1.
+
+ set +eu
+ warn "${1:-Unspecified run-time error occurred!}"
+ exit "${2:-1}"
+}
+
+
+function set_eligibility_off {
+ # Set Nomad eligibility to ineligible for scheduling. Fail otherwise.
+
+ set -euo pipefail
+
+ node_id="$(nomad node status | grep $(hostname) | cut -d ' ' -f 1)" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+
+ if [[ "${node_status}" != *"ineligible"* ]]; then
+ nomad node eligibility -disable "${node_id}" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+ if [[ "${node_status}" != *"ineligible"* ]]; then
+ die "Set eligibility off failed!"
+ fi
+ fi
+}
+
+
+function set_eligibility_on {
+ # Set Nomad eligibility to eligible for scheduling. Fail otherwise.
+
+ set -euo pipefail
+
+ node_id="$(nomad node status | grep $(hostname) | cut -d ' ' -f 1)" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+
+ if [[ "${node_status}" == *"ineligible"* ]]; then
+ nomad node eligibility -enable "${node_id}" || die
+ node_status="$(nomad node status | grep $(hostname))" || die
+ if [[ "${node_status}" == *"ineligible"* ]]; then
+ die "Set eligibility on failed!"
+ fi
+ fi
+}
+
+
+function restart_vfs_service {
+ # Stop and start VF serice. This will reinitialize VFs and driver mappings.
+
+ set -euo pipefail
+
+ warn "Restarting VFs service (this may take few minutes)..."
+ sudo service csit-initialize-vfs stop || die "Failed to stop VFs service!"
+ sudo service csit-initialize-vfs start || die "Failed to start VFs service!"
+}
+
+
+function wait_for_pending_containers {
+ # Wait in loop for defined amount of time for pending containers to
+ # gracefully quit them. If parameter force is specified. Force kill them.
+
+ # Arguments:
+ # - ${@} - Script parameters.
+
+ set -euo pipefail
+
+ retries=60
+ wait_time=60
+ containers=(docker ps --quiet --filter name=csit*)
+
+ for i in $(seq 1 ${retries}); do
+ mapfile -t pending_containers < <( ${containers[@]} ) || die
+ warn "Waiting for pending containers [${pending_containers[@]}] ..."
+ if [ ${#pending_containers[@]} -eq 0 ]; then
+ break
+ fi
+ sleep "${wait_time}" || die
+ done
+ if [ ${#pending_containers[@]} -ne 0 ]; then
+ if [[ "${1-}" == "force" ]]; then
+ warn "Force killing [${pending_containers[@]}] ..."
+ docker rm --force ${pending_containers[@]} || die
+ else
+ die "Still few containers running!"
+ fi
+ fi
+}
+
+
+function warn () {
+ # Print the message to standard error.
+ #
+ # Arguments:
+ # - ${@} - The text of the message.
+
+ echo "$@" >&2
+}
+
+
+set_eligibility_off || die
+wait_for_pending_containers "${@}" || die
+restart_vfs_service || die
+set_eligibility_on || die
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_containers.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_containers.yaml
new file mode 100644
index 0000000000..a61aa6ceee
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_containers.yaml
@@ -0,0 +1,28 @@
+---
+# file: roles/cleanup/tasks/kill_containers.yaml
+
+- name: Kill container - Get running Docker containers
+ shell: "docker ps -aq"
+ register: running_containers
+ changed_when: no
+ tags: kill-containers
+
+- name: Kill container - Remove all Docker containers
+ docker_container:
+ name: "{{ item }}"
+ state: absent
+ with_items: "{{ running_containers.stdout_lines }}"
+ tags: kill-containers
+
+- name: Kill container - Get running LXC containers
+ shell: "lxc-ls"
+ register: running_containers
+ changed_when: no
+ tags: kill-containers
+
+- name: Kill container - Remove all LXC containers
+ lxc_container:
+ name: '{{ item }}'
+ state: absent
+ with_items: "{{ running_containers.stdout_lines }}"
+ tags: kill-containers
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_process.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_process.yaml
new file mode 100644
index 0000000000..4a1180b77f
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/kill_process.yaml
@@ -0,0 +1,27 @@
+---
+# file: roles/cleanup/tasks/kill_process.yaml
+
+- name: Kill process - Get pid of {{ process }}
+ shell: "ps -ef | grep -v grep | grep -w {{ process }} | awk '{print $2}'"
+ when: >
+ process is defined and process != ""
+ register: running_processes
+ tags: kill-process
+
+- name: Kill process - Safe kill {{ process }}
+ shell: "kill {{ item }}"
+ with_items: "{{ running_processes.stdout_lines }}"
+ tags: kill-process
+
+- wait_for:
+ path: "/proc/{{ item }}/status"
+ state: absent
+ with_items: "{{ running_processes.stdout_lines }}"
+ ignore_errors: yes
+ register: killed_processes
+ tags: kill-process
+
+- name: Kill process - Force kill {{ process }}
+ shell: "kill -9 {{ item }}"
+ with_items: "{{ killed_processes.results | select('failed') | map(attribute='item') | list }}"
+ tags: kill-process
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/main.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/main.yaml
new file mode 100644
index 0000000000..64a55c4672
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/main.yaml
@@ -0,0 +1,31 @@
+---
+# file: roles/cleanup/tasks/main.yaml
+# purpose: Structured per server cleanup tasks.
+# - main:
+# - tg:
+# - Run tasks on TG servers only.
+# - Cleanup processes (T-Rex).
+# - sut:
+# - Run tasks on SUT servers only.
+# - Cleanup file leftovers (logs).
+# - Cleanup packages (VPP, Honeycomb).
+# - Cleanup processes (qemu, l3fwd, testpmd, docker, kubernetes)
+# - Cleanup interfaces.
+# - vpp_device
+# - Run tasks on vpp_device servers only.
+# - Reset SRIOV
+
+- name: tg specific
+ include_tasks: tg.yaml
+ when: "'tg' in group_names"
+ tags: cleanup
+
+- name: sut specific
+ include_tasks: sut.yaml
+ when: "'sut' in group_names"
+ tags: cleanup
+
+- name: vpp_device specific
+ include_tasks: vpp_device.yaml
+ when: "'vpp_device' in group_names"
+ tags: cleanup
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/remove_package.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/remove_package.yaml
new file mode 100644
index 0000000000..8f5ec8fefe
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/remove_package.yaml
@@ -0,0 +1,31 @@
+---
+# file: roles/cleanup/tasks/remove_package.yaml
+
+- name: Remove package - Fix corrupted apt
+ shell: 'dpkg --configure -a'
+ when: >
+ ansible_distribution == 'Ubuntu'
+ tags: remove-package
+
+- name: Remove package - Check if {{ package }} is installed
+ shell: >
+ "dpkg-query -W -f='${Status}' {{ package }} | grep 'install ok installed'"
+ register: package_is_installed
+ failed_when: no
+ changed_when: no
+ when: >
+ ansible_distribution == 'Ubuntu'
+ tags: remove-package
+
+- name: Remove package - {{ package }}
+ apt:
+ name: '{{ package }}'
+ force: yes
+ purge: yes
+ state: absent
+ when: >
+ package is defined and
+ package != '' and
+ package_is_installed.rc == 0 and
+ ansible_distribution|lower == 'ubuntu'
+ tags: remove-package
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/sut.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/sut.yaml
new file mode 100644
index 0000000000..5083a96a29
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/sut.yaml
@@ -0,0 +1,52 @@
+---
+# file: roles/cleanup/tasks/sut.yaml
+
+- name: Kill processes - qemu
+ import_tasks: kill_process.yaml
+ vars:
+ process: "qemu"
+ tags: kill-process
+
+- name: Kill processes - l3fwd
+ import_tasks: kill_process.yaml
+ vars:
+ process: "l3fwd"
+ tags: kill-process
+
+- name: Kill processes - testpmd
+ import_tasks: kill_process.yaml
+ vars:
+ process: "testpmd"
+ tags: kill-process
+
+- name: Remove file or dir - HoneyComb logs
+ file:
+ state: absent
+ path: "/var/log/honeycomb"
+ tags: remove-file-dir
+
+- name: Remove file or dir - Core zip file
+ file:
+ state: absent
+ path: "/tmp/*tar.lzo.lrz.xz*"
+ tags: remove-file-dir
+
+- name: Remove file or dir - Core dump file
+ file:
+ state: absent
+ path: "/tmp/*core*"
+ tags: remove-file-dir
+
+- name: Kill containers - Remove all containers
+ import_tasks: kill_containers.yaml
+ tags: kill-containers
+
+- name: Kubernetes - Reset
+ raw: 'kubeadm reset --force'
+ tags: kill-kubernetes
+
+- name: Remove packages - Remove VPP
+ import_tasks: remove_package.yaml
+ vars:
+ package: "*vpp*"
+ tags: remove-package
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml
new file mode 100644
index 0000000000..f58cb59a1a
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/tg.yaml
@@ -0,0 +1,14 @@
+---
+# file: roles/cleanup/tasks/tg.yaml
+
+- name: Kill processes - TRex
+ import_tasks: kill_process.yaml
+ vars:
+ process: "_t-rex"
+ tags: kill-process
+
+- name: Kill processes - WRK
+ import_tasks: kill_process.yaml
+ vars:
+ process: "wrk"
+ tags: kill-process
diff --git a/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/vpp_device.yaml b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/vpp_device.yaml
new file mode 100644
index 0000000000..5b7713a554
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/cleanup/tasks/vpp_device.yaml
@@ -0,0 +1,15 @@
+---
+# file: roles/cleanup/tasks/vpp_device.yaml
+
+- name: Reset vpp_device binary
+ template:
+ src: 'files/reset_vppdevice.sh'
+ dest: '/usr/local/bin'
+ owner: 'root'
+ group: 'root'
+ mode: '644'
+ tags: reset-sriov
+
+- name: Reset vpp_device
+ raw: 'reset_vppdevice.sh --force'
+ tags: reset-sriov