summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Barach <dbarach@cisco.com>2016-04-22 09:54:22 -0400
committerDave Barach <dbarach@cisco.com>2016-04-22 09:54:33 -0400
commit2b836cf4d1e4e59ca34229a9fdf49d79216da20e (patch)
treead9e5acabd3702c33a4247fce2adbcbd4a390859
parentdaede645441e72cdca631cef7b687e74183cc146 (diff)
Add a vpp_restart command
vpp_restart solves the problem of restarting vpp and not exiting until the new vpp instance is sufficiently established to allow immediate binary API connections. The point is to avoid using arbitrary "sleep N" commands e.g. in CSIT shell scripts. We send SIGTERM to the current vpp process, and expect / depend on the process-monitor du jour to remove the vpp-api shared-memory segment. vpp_restart exits w/ status 0 if all is well, non-zero upon failure. In trying to make vpp_restart reliable, we discovered a recently-introduced heap corruption bug in .../dpdk/init.c, which caused vpp to crash on startup once every 20 times on a particular rig. If possible, we should check /var/log/syslog for evidence of unexpected multiple restarts during regression-testing. Change-Id: Ic48c74b1a94a4368066ba2e326d4b12d51192088 Signed-off-by: Dave Barach <dbarach@cisco.com>
-rw-r--r--vpp-api-test/Makefile.am4
-rw-r--r--vpp-api-test/vat/restart.c234
-rw-r--r--vpp/api/gmon.c16
-rw-r--r--vpp/app/version.c1
4 files changed, 250 insertions, 5 deletions
diff --git a/vpp-api-test/Makefile.am b/vpp-api-test/Makefile.am
index 5079f8d49b9..d59c7e27f5c 100644
--- a/vpp-api-test/Makefile.am
+++ b/vpp-api-test/Makefile.am
@@ -20,7 +20,7 @@ libvatplugin_la_SOURCES = \
lib_LTLIBRARIES = libvatplugin.la
-bin_PROGRAMS = vpp_api_test vpp_json_test
+bin_PROGRAMS = vpp_api_test vpp_json_test vpp_restart
vpp_api_test_SOURCES = vat/api_format.c vat/main.c vat/plugin.c vat/vat.h vat/json_format.h vat/json_format.c
vpp_json_test_SOURCES = vat/json_format.h vat/json_format.c vat/json_test.c
@@ -31,5 +31,7 @@ vpp_api_test_LDADD = -lvlibmemoryclient -lvlibapi -lsvm \
vpp_json_test_LDADD = -lvppinfra -lm
nobase_include_HEADERS = vat/vat.h vat/json_format.h
+vpp_restart_SOURCES = vat/restart.c
+vpp_restart_LDADD = -lsvmdb -lsvm -lvppinfra -lpthread -lrt
dist_bin_SCRIPTS = scripts/vppctl
diff --git a/vpp-api-test/vat/restart.c b/vpp-api-test/vat/restart.c
new file mode 100644
index 00000000000..d94ddf89505
--- /dev/null
+++ b/vpp-api-test/vat/restart.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <time.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <svmdb.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/time.h>
+#include <vppinfra/macros.h>
+
+int restart_main_fn (unformat_input_t * i)
+{
+ int verbose = 0;
+ int old_pid;
+ int wait;
+ u8 * chroot_path = 0;
+ svmdb_client_t * svmdb_client;
+ volatile pid_t *pidp;
+ struct stat statb;
+ ino_t old_inode;
+ int sleeps;
+
+ struct timespec _req, *req = &_req;
+ struct timespec _rem, *rem = &_rem;
+
+ if (geteuid())
+ clib_error ("vpp_restart: must be root...");
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "verbose") || unformat (i, "v"))
+ verbose = 1;
+ else if (unformat (i, "chroot %s", &chroot_path))
+ ;
+ else
+ {
+ clib_error ("unknown input `%U'", format_unformat_error, i);
+ return 1;
+ }
+ }
+
+ /*
+ * Step 1: look up the current VPP pid in the shared-memory database
+ */
+ svmdb_client = svmdb_map_chroot ((char *) chroot_path);
+
+ pidp = svmdb_local_get_variable_reference (svmdb_client,
+ SVMDB_NAMESPACE_VEC,
+ "vpp_pid");
+ if (pidp == 0)
+ {
+ clib_error ("'vpp_pid' svm variable not found, vpp has never run?");
+ return 2;
+ }
+
+ /* Spin for up to 10 seconds for vpp to start */
+ for (wait = 0; wait < 1000; wait++)
+ {
+ req->tv_sec = 0;
+ req->tv_nsec = 10000*1000; /* 10 ms */
+ while (nanosleep(req, rem) < 0)
+ *req = *rem;
+
+ if (*pidp)
+ goto found2;
+ }
+
+ clib_error ("VPP not runnning...");
+ return 3;
+
+ found2:
+
+ old_pid = *pidp;
+
+ /*
+ * Step 2: sanity check the pid we discovered
+ */
+ if (verbose)
+ fformat(stdout, "Sanity check current vpp pid %d\n", old_pid);
+
+ if (kill (old_pid, 0) < 0)
+ {
+ svmdb_unmap (svmdb_client);
+ clib_error ("vpp current pid %d not running...", old_pid);
+ return 2;
+ }
+
+ if (verbose)
+ fformat(stdout, "Sanity check vpp pid %d OK\n", old_pid);
+
+ /*
+ * Step 3: figure out the current vpp <--> client shared-VM file
+ * inode number
+ */
+ if (stat("/dev/shm/vpe-api", &statb) < 0)
+ {
+ clib_unix_error ("stat fail");
+ return 4;
+ }
+
+ old_inode = statb.st_ino;
+
+ if (verbose)
+ fformat(stdout, "Old inode %u\n", old_inode);
+
+ /* Note: restart wipes out the shared VM database*/
+ svmdb_unmap (svmdb_client);
+
+ /*
+ * Step 4: send SIGTERM to vpp.
+ * systemd et al. will restart vpp after wiping out the shared-VM
+ * database and (crucially) the shared API messaging segment
+ */
+
+ if (kill (old_pid, SIGTERM) < 0)
+ {
+ clib_unix_error ("SIGTERM fail");
+ return 3;
+ }
+
+ sleeps = 0;
+
+ /*
+ * Step 5: wait up to 15 seconds for a new incarnation of
+ * the shared-VM API segment to appear.
+ */
+ for (wait = 0; wait < 150; wait++)
+ {
+ if ((stat("/dev/shm/vpe-api", &statb) < 0)
+ || statb.st_ino == old_inode)
+ {
+ req->tv_sec = 0;
+ req->tv_nsec = 100000*1000; /* 100 ms */
+ while (nanosleep(req, rem) < 0)
+ *req = *rem;
+ sleeps++;
+ }
+ else
+ goto new_inode;
+ }
+
+ clib_error ("Timeout waiting for new inode to appear...");
+ return 5;
+
+ new_inode:
+ if (verbose && sleeps > 0)
+ fformat(stdout, "Inode sleeps %d\n", sleeps);
+
+ if (verbose)
+ fformat(stdout, "New inode %u\n", statb.st_ino);
+
+ /*
+ * Step 6: remap the SVM database
+ */
+ svmdb_client = svmdb_map_chroot ((char *) chroot_path);
+
+ pidp = svmdb_local_get_variable_reference (svmdb_client,
+ SVMDB_NAMESPACE_VEC,
+ "vpp_pid");
+ if (pidp == 0)
+ {
+ clib_error ("post_restart: 'vpp_pid' svm variable not found,"
+ "vpp did not restart?");
+ return 2;
+ }
+
+ sleeps = 0;
+
+ /*
+ * Step 7: wait for vpp to publish its new PID
+ */
+
+ /* Spin for up to 15 seconds */
+ for (wait = 0; wait < 150; wait++)
+ {
+ if (*pidp && (*pidp != old_pid))
+ goto restarted;
+ req->tv_sec = 0;
+ req->tv_nsec = 100000*1000; /* 100 ms */
+ while (nanosleep(req, rem) < 0)
+ *req = *rem;
+ sleeps++;
+ }
+
+ clib_error ("Timeout waiting for vpp to publish pid after restart...");
+ return 4;
+
+ restarted:
+
+ /* Done... */
+
+ if (verbose && sleeps)
+ fformat(stdout, "pid sleeps %d\n", sleeps);
+
+ if (verbose)
+ fformat (stdout, "New PID %d... Restarted...\n", *pidp);
+
+ svmdb_unmap (svmdb_client);
+ return 0;
+}
+
+int main (int argc, char **argv)
+{
+ unformat_input_t i;
+ int ret;
+
+ clib_mem_init (0, 64ULL<<20);
+
+ unformat_init_command_line (&i, argv);
+ ret = restart_main_fn (&i);
+ unformat_free (&i);
+ return ret;
+}
diff --git a/vpp/api/gmon.c b/vpp/api/gmon.c
index 32786d63b38..8ab890fceb4 100644
--- a/vpp/api/gmon.c
+++ b/vpp/api/gmon.c
@@ -92,6 +92,18 @@ static u64 get_significant_errors(gmon_main_t * gm)
return (significant_errors);
}
+static clib_error_t *
+publish_pid (vlib_main_t *vm)
+{
+ gmon_main_t *gm = &gmon_main;
+
+ *gm->vpef_pid_ptr = getpid();
+
+ return 0;
+}
+VLIB_API_INIT_FUNCTION(publish_pid);
+
+
static uword
gmon_process (vlib_main_t * vm,
vlib_node_runtime_t * rt,
@@ -101,12 +113,8 @@ gmon_process (vlib_main_t * vm,
u64 input_packets, last_input_packets, new_sig_errors;
f64 last_runtime, dt, now;
gmon_main_t *gm = &gmon_main;
- pid_t vpefpid;
int i;
- vpefpid = getpid();
- *gm->vpef_pid_ptr = vpefpid;
-
last_runtime = 0.0;
last_input_packets = 0;
diff --git a/vpp/app/version.c b/vpp/app/version.c
index 5eb1fc5aad1..588aadf29e9 100644
--- a/vpp/app/version.c
+++ b/vpp/app/version.c
@@ -59,6 +59,7 @@ show_vpe_version_command_fn (vlib_main_t * vm,
_("Compiler", "%s", vpe_compiler);
_("CPU model name", "%U", format_cpu_model_name);
_("CPU microarchitecture", "%U", format_cpu_uarch);
+ _("Current PID", "%d", getpid());
#if DPDK > 0
_("DPDK Version", "%s", rte_version());
_("DPDK EAL init args", "%s", dpdk_main.eal_init_args_str);