summaryrefslogtreecommitdiffstats
path: root/vlib/vlib/unix
diff options
context:
space:
mode:
Diffstat (limited to 'vlib/vlib/unix')
-rw-r--r--vlib/vlib/unix/cj.c218
-rw-r--r--vlib/vlib/unix/cj.h68
-rw-r--r--vlib/vlib/unix/cli.c900
-rw-r--r--vlib/vlib/unix/input.c244
-rw-r--r--vlib/vlib/unix/main.c471
-rw-r--r--vlib/vlib/unix/mc_socket.c972
-rw-r--r--vlib/vlib/unix/mc_socket.h126
-rw-r--r--vlib/vlib/unix/pci.c577
-rw-r--r--vlib/vlib/unix/pci.h94
-rw-r--r--vlib/vlib/unix/physmem.c472
-rw-r--r--vlib/vlib/unix/physmem.h59
-rw-r--r--vlib/vlib/unix/plugin.c210
-rw-r--r--vlib/vlib/unix/plugin.h88
-rw-r--r--vlib/vlib/unix/unix.h177
14 files changed, 4676 insertions, 0 deletions
diff --git a/vlib/vlib/unix/cj.c b/vlib/vlib/unix/cj.c
new file mode 100644
index 00000000000..665a13fa4f5
--- /dev/null
+++ b/vlib/vlib/unix/cj.c
@@ -0,0 +1,218 @@
+/*
+ *------------------------------------------------------------------
+ * cj.c
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <vlib/vlib.h>
+
+#include <vlib/unix/cj.h>
+
+cj_main_t cj_main;
+
+void
+cj_log (u32 type, void * data0, void * data1)
+{
+ u64 new_tail;
+ cj_main_t * cjm = &cj_main;
+ cj_record_t * r;
+
+ if (cjm->enable == 0)
+ return;
+
+ new_tail = __sync_add_and_fetch (&cjm->tail, 1);
+
+ r = (cj_record_t *) &(cjm->records[new_tail & (cjm->num_records - 1)]);
+ r->time = vlib_time_now (cjm->vlib_main);
+ r->cpu = os_get_cpu_number();
+ r->type = type;
+ r->data[0] = (u64) data0;
+ r->data[1] = (u64) data1;
+}
+
+void cj_stop(void)
+{
+ cj_main_t * cjm = &cj_main;
+
+ cjm->enable = 0;
+}
+
+
+clib_error_t * cj_init (vlib_main_t * vm)
+{
+ cj_main_t * cjm = &cj_main;
+
+ cjm->vlib_main = vm;
+ return 0;
+}
+VLIB_INIT_FUNCTION (cj_init);
+
+static clib_error_t *
+cj_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ cj_main_t * cjm = &cj_main;
+ int matched = 0;
+ int enable = 0;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "records %d", &cjm->num_records))
+ matched = 1;
+ else if (unformat (input, "on"))
+ enable = 1;
+ else
+ return clib_error_return (0, "cj_config: unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ if (matched == 0)
+ return 0;
+
+ cjm->num_records = max_pow2 (cjm->num_records);
+ vec_validate (cjm->records, cjm->num_records-1);
+ memset (cjm->records, 0xff, cjm->num_records * sizeof (cj_record_t));
+ cjm->tail = ~0;
+ cjm->enable = enable;
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (cj_config, "cj");
+
+void cj_enable_disable (int is_enable)
+{
+ cj_main_t * cjm = &cj_main;
+
+ if (cjm->num_records)
+ cjm->enable = is_enable;
+ else
+ vlib_cli_output (cjm->vlib_main, "CJ not configured...");
+}
+
+static inline void cj_dump_one_record (cj_record_t * r)
+{
+ fprintf (stderr, "[%d]: %10.6f T%02d %llx %llx\n",
+ r->cpu, r->time, r->type, (long long unsigned int) r->data[0],
+ (long long unsigned int) r->data[1]);
+}
+
+static void cj_dump_internal (u8 filter0_enable, u64 filter0,
+ u8 filter1_enable, u64 filter1)
+{
+ cj_main_t * cjm = &cj_main;
+ cj_record_t * r;
+ u32 i, index;
+
+ if (cjm->num_records == 0)
+ {
+ fprintf (stderr, "CJ not configured...\n");
+ return;
+ }
+
+ if (cjm->tail == (u64)~0)
+ {
+ fprintf (stderr, "No data collected...\n");
+ return;
+ }
+
+ /* Has the trace wrapped? */
+ index = (cjm->tail+1) & (cjm->num_records - 1);
+ r = &(cjm->records[index]);
+
+ if (r->cpu != (u32)~0)
+ {
+ /* Yes, dump from tail + 1 to the end */
+ for (i = index; i < cjm->num_records; i++)
+ {
+ if (filter0_enable && (r->data[0] != filter0))
+ goto skip;
+ if (filter1_enable && (r->data[1] != filter1))
+ goto skip;
+ cj_dump_one_record (r);
+ skip:
+ r++;
+ }
+ }
+ /* dump from the beginning through the final tail */
+ r = cjm->records;
+ for (i = 0; i <= cjm->tail; i++)
+ {
+ if (filter0_enable && (r->data[0] != filter0))
+ goto skip2;
+ if (filter1_enable && (r->data[1] != filter1))
+ goto skip2;
+ cj_dump_one_record (r);
+ skip2:
+ r++;
+ }
+}
+
+void cj_dump (void)
+{
+ cj_dump_internal (0, 0, 0, 0);
+}
+
+void cj_dump_filter_data0 (u64 filter0)
+{
+ cj_dump_internal (1/* enable f0 */, filter0, 0, 0);
+}
+
+void cj_dump_filter_data1 (u64 filter1)
+{
+ cj_dump_internal (0, 0, 1 /* enable f1 */, filter1);
+}
+
+void cj_dump_filter_data12 (u64 filter0, u64 filter1)
+{
+ cj_dump_internal (1, filter0, 1, filter1);
+}
+
+static clib_error_t *
+cj_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_enable = -1;
+ int is_dump = -1;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "enable") || unformat (input, "on"))
+ is_enable = 1;
+ else if (unformat (input, "disable") || unformat (input, "off"))
+ is_enable = 0;
+ else if (unformat (input, "dump"))
+ is_dump = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (is_enable >= 0)
+ cj_enable_disable (is_enable);
+
+ if (is_dump > 0)
+ cj_dump ();
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (cj_command,static) = {
+ .path = "cj",
+ .short_help = "cj",
+ .function = cj_command_fn,
+};
+
diff --git a/vlib/vlib/unix/cj.h b/vlib/vlib/unix/cj.h
new file mode 100644
index 00000000000..3c37f2bf22f
--- /dev/null
+++ b/vlib/vlib/unix/cj.h
@@ -0,0 +1,68 @@
+/*
+ *------------------------------------------------------------------
+ * cj.h
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_cj_h__
+#define __included_cj_h__
+
+typedef struct {
+ f64 time;
+ u32 cpu;
+ u32 type;
+ u64 data[2];
+} cj_record_t;
+
+typedef struct {
+ volatile u64 tail;
+ cj_record_t * records;
+ u32 num_records;
+ volatile u32 enable;
+
+ vlib_main_t * vlib_main;
+} cj_main_t;
+
+void cj_log (u32 type, void * data0, void * data1);
+
+/*
+ * Supply in application main, so we can log from any library...
+ * Declare a weak reference in the library, off you go.
+ */
+
+#define DECLARE_CJ_GLOBAL_LOG \
+void cj_global_log (unsigned type, void * data0, void * data1) \
+ __attribute__ ((weak)); \
+ \
+unsigned __cj_type; \
+void * __cj_data0; \
+void * __cj_data1; \
+ \
+void \
+cj_global_log (unsigned type, void * data0, void * data1) \
+{ \
+ __cj_type = type; \
+ __cj_data0 = data0; \
+ __cj_data1 = data1; \
+}
+
+#define CJ_GLOBAL_LOG_PROTOTYPE
+void cj_global_log (unsigned type, void * data0, void * data1) \
+ __attribute__ ((weak)); \
+
+void cj_stop(void);
+
+#endif /* __included_cj_h__ */
diff --git a/vlib/vlib/unix/cli.c b/vlib/vlib/unix/cli.c
new file mode 100644
index 00000000000..3cb13fc8550
--- /dev/null
+++ b/vlib/vlib/unix/cli.c
@@ -0,0 +1,900 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.c: Unix stdin/socket CLI.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <termios.h>
+#include <unistd.h>
+#include <arpa/telnet.h>
+
+typedef struct {
+ u32 unix_file_index;
+
+ /* Vector of output pending write to file descriptor. */
+ u8 * output_vector;
+
+ /* Vector of input saved by Unix input node to be processed by
+ CLI process. */
+ u8 * input_vector;
+
+ u8 has_history;
+ u8 ** command_history;
+ u8 * current_command;
+ i32 excursion;
+ u32 history_limit;
+ u8 * search_key;
+ int search_mode;
+
+ u32 process_node_index;
+} unix_cli_file_t;
+
+always_inline void
+unix_cli_file_free (unix_cli_file_t * f)
+{
+ vec_free (f->output_vector);
+ vec_free (f->input_vector);
+}
+
+typedef struct {
+ /* Prompt string for CLI. */
+ u8 * cli_prompt;
+
+ unix_cli_file_t * cli_file_pool;
+
+ u32 * unused_cli_process_node_indices;
+
+ /* File pool index of current input. */
+ u32 current_input_file_index;
+} unix_cli_main_t;
+
+static unix_cli_main_t unix_cli_main;
+
+static void
+unix_cli_add_pending_output (unix_file_t * uf,
+ unix_cli_file_t * cf,
+ u8 * buffer,
+ uword buffer_bytes)
+{
+ unix_main_t * um = &unix_main;
+
+ vec_add (cf->output_vector, buffer, buffer_bytes);
+ if (vec_len (cf->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (! skip_update)
+ um->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+static void
+unix_cli_del_pending_output (unix_file_t * uf,
+ unix_cli_file_t * cf,
+ uword n_bytes)
+{
+ unix_main_t * um = &unix_main;
+
+ vec_delete (cf->output_vector, n_bytes, 0);
+ if (vec_len (cf->output_vector) <= 0)
+ {
+ int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (! skip_update)
+ um->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+/* VLIB cli output function. */
+static void unix_vlib_cli_output (uword cli_file_index,
+ u8 * buffer,
+ uword buffer_bytes)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_main_t * cm = &unix_cli_main;
+ unix_cli_file_t * cf;
+ unix_file_t * uf;
+ int n;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+ n = 0;
+ if (vec_len (cf->output_vector) == 0)
+ n = write (uf->file_descriptor, buffer, buffer_bytes);
+ if (n < 0 && errno != EAGAIN)
+ clib_unix_warning ("write");
+
+ else if ((word) n < (word) buffer_bytes)
+ {
+ if (n < 0) n = 0;
+ unix_cli_add_pending_output (uf, cf, buffer + n, buffer_bytes - n);
+ }
+}
+
+static int unix_cli_line_edit (unix_main_t * um, unix_cli_file_t * cf)
+{
+ unix_file_t * uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+ u8 * prev;
+ int i, j, delta;
+
+ for (i = 0; i < vec_len (cf->input_vector); i++)
+ {
+ switch (cf->input_vector[i])
+ {
+ case 0:
+ continue;
+
+ case '?':
+ /* Erase the current command (if any) plus ?*/
+ for (j = 0; j < (vec_len (cf->current_command)+1); j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\r\nHistory:\r\n", 12);
+
+ for (j = 0; j < vec_len (cf->command_history); j++)
+ {
+ unix_cli_add_pending_output (uf, cf, cf->command_history[j],
+ vec_len(cf->command_history[j]));
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\r\n", 2);
+ }
+ goto crlf;
+
+ /* ^R - reverse search */
+ case 'R' - '@':
+ case 'S' - '@':
+ if (cf->search_mode == 0)
+ {
+ /* Erase the current command (if any) plus ^R */
+ for (j = 0; j < (vec_len (cf->current_command)+2); j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+
+ vec_reset_length (cf->search_key);
+ vec_reset_length (cf->current_command);
+ if (cf->input_vector[i] == 'R' - '@')
+ cf->search_mode = -1;
+ else
+ cf->search_mode = 1;
+ }
+ else
+ {
+ if (cf->input_vector[i] == 'R' - '@')
+ cf->search_mode = -1;
+ else
+ cf->search_mode = 1;
+
+ cf->excursion += cf->search_mode;
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ goto search_again;
+ }
+ break;
+
+ /* ^U - line-kill */
+ case 'U'-'@':
+ /* Erase the command, plus ^U */
+ for (j = 0; j < (vec_len (cf->current_command)+2); j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ vec_reset_length (cf->current_command);
+ cf->search_mode = 0;
+ continue;
+
+ /* ^P - previous, ^N - next */
+ case 'P' - '@':
+ case 'N' - '@':
+ cf->search_mode = 0;
+ /* Erase the command, plus ^P */
+ for (j = 0; j < (vec_len (cf->current_command)+2); j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ vec_reset_length (cf->current_command);
+ if (vec_len (cf->command_history))
+ {
+ if (cf->input_vector[i] == 'P' - '@')
+ delta = -1;
+ else
+ delta = 1;
+
+ cf->excursion += delta;
+
+ if (cf->excursion > (i32) vec_len (cf->command_history) -1)
+ cf->excursion = 0;
+ else if (cf->excursion < 0)
+ cf->excursion = vec_len (cf->command_history) -1;
+
+ prev = cf->command_history [cf->excursion];
+ vec_validate (cf->current_command, vec_len(prev)-1);
+
+ memcpy (cf->current_command, prev, vec_len(prev));
+ _vec_len (cf->current_command) = vec_len(prev);
+ unix_cli_add_pending_output (uf, cf, cf->current_command,
+ vec_len (cf->current_command));
+ break;
+ }
+ break;
+
+ case 0x7f:
+ case 'H' - '@':
+ for (j = 0; j < 2; j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ if (vec_len (cf->current_command))
+ {
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ _vec_len (cf->current_command)--;
+ }
+ cf->search_mode = 0;
+ cf->excursion = 0;
+ cf->search_mode = 0;
+ vec_reset_length (cf->search_key);
+ break;
+
+ case '\r':
+ case '\n':
+ crlf:
+ vec_add1 (cf->current_command, '\r');
+ vec_add1 (cf->current_command, '\n');
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b\b \b\b\r\n", 8);
+
+ vec_validate (cf->input_vector, vec_len(cf->current_command)-1);
+ memcpy (cf->input_vector, cf->current_command,
+ vec_len(cf->current_command));
+ _vec_len(cf->input_vector) = _vec_len (cf->current_command);
+
+ if (vec_len(cf->command_history) >= cf->history_limit)
+ {
+ vec_free (cf->command_history[0]);
+ vec_delete (cf->command_history, 1, 0);
+ }
+ /* Don't add blank lines to the cmd history */
+ if (vec_len (cf->current_command) > 2)
+ {
+ _vec_len (cf->current_command) -= 2;
+ vec_add1 (cf->command_history, cf->current_command);
+ cf->current_command = 0;
+ }
+ else
+ vec_reset_length (cf->current_command);
+ cf->excursion = 0;
+ cf->search_mode = 0;
+ vec_reset_length (cf->search_key);
+ return 0;
+
+ /* telnet "mode character" blort, echo but don't process. */
+ case 0xff:
+ unix_cli_add_pending_output (uf, cf, cf->input_vector + i,
+ 6);
+ i += 6;
+ continue;
+
+ default:
+ if (cf->search_mode)
+ {
+ int j, k, limit, offset;
+ u8 * item;
+
+ vec_add1 (cf->search_key, cf->input_vector[i]);
+
+ search_again:
+ for (j = 0; j < vec_len(cf->command_history); j++)
+ {
+ if (cf->excursion > (i32) vec_len (cf->command_history) -1)
+ cf->excursion = 0;
+ else if (cf->excursion < 0)
+ cf->excursion = vec_len (cf->command_history) -1;
+
+ item = cf->command_history[cf->excursion];
+
+ limit = (vec_len(cf->search_key) > vec_len (item)) ?
+ vec_len(item) : vec_len (cf->search_key);
+
+ for (offset = 0; offset <= vec_len(item) - limit; offset++)
+ {
+ for (k = 0; k < limit; k++)
+ {
+ if (item[k+offset] != cf->search_key[k])
+ goto next_offset;
+ }
+ goto found_at_offset;
+
+ next_offset:
+ ;
+ }
+ goto next;
+
+ found_at_offset:
+ for (j = 0; j < vec_len (cf->current_command)+1; j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+
+ vec_validate (cf->current_command, vec_len(item)-1);
+
+ memcpy (cf->current_command, item, vec_len(item));
+ _vec_len (cf->current_command) = vec_len(item);
+ unix_cli_add_pending_output (uf, cf, cf->current_command,
+ vec_len (cf->current_command));
+ goto found;
+
+ next:
+ cf->excursion += cf->search_mode;
+ }
+
+ unix_cli_add_pending_output (uf, cf, (u8 *)"\r\nno match..", 12);
+ vec_reset_length (cf->search_key);
+ vec_reset_length (cf->current_command);
+ cf->search_mode = 0;
+ goto crlf;
+ }
+ else
+ vec_add1 (cf->current_command, cf->input_vector[i]);
+
+ found:
+
+ break;
+ }
+ }
+ vec_reset_length(cf->input_vector);
+ return 1;
+}
+
+static void unix_cli_process_input (unix_cli_main_t * cm, uword cli_file_index)
+{
+ unix_main_t * um = &unix_main;
+ unix_file_t * uf;
+ unix_cli_file_t * cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ unformat_input_t input;
+ int vlib_parse_eval (u8 *);
+
+ /* Try vlibplex first. Someday... */
+ if (0 && vlib_parse_eval (cf->input_vector) == 0)
+ goto done;
+
+ /* Line edit, echo, etc. */
+ if (cf->has_history && unix_cli_line_edit (um, cf))
+ return;
+
+ if (um->log_fd)
+ {
+ static u8 * lv;
+ vec_reset_length (lv);
+ lv = format (lv, "%U[%d]: %v",
+ format_timeval,
+ 0 /* current bat-time */,
+ 0 /* current bat-format */,
+ cli_file_index,
+ cf->input_vector);
+ {
+ int rv __attribute__((unused)) =
+ write (um->log_fd, lv, vec_len(lv));
+ }
+ }
+
+ unformat_init_vector (&input, cf->input_vector);
+
+ /* Remove leading white space from input. */
+ (void) unformat (&input, "");
+
+ cm->current_input_file_index = cli_file_index;
+
+ if (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ vlib_cli_input (um->vlib_main, &input, unix_vlib_cli_output, cli_file_index);
+
+ /* Re-fetch pointer since pool may have moved. */
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+
+ /* Zero buffer since otherwise unformat_free will call vec_free on it. */
+ input.buffer = 0;
+
+ unformat_free (&input);
+
+ /* Re-use input vector. */
+done:
+ _vec_len (cf->input_vector) = 0;
+
+ /* Prompt. */
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+ unix_cli_add_pending_output (uf, cf,
+ cm->cli_prompt,
+ vec_len (cm->cli_prompt));
+}
+
+static void unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_file_t * cf;
+ unix_file_t * uf;
+ int i;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+ /* Quit/EOF on stdin means quit program. */
+ if (uf->file_descriptor == 0)
+ clib_longjmp (&um->vlib_main->main_loop_exit, VLIB_MAIN_LOOP_EXIT_CLI);
+
+ vec_free (cf->current_command);
+ vec_free (cf->search_key);
+
+ for (i = 0; i < vec_len (cf->command_history); i++)
+ vec_free (cf->command_history[i]);
+
+ vec_free (cf->command_history);
+
+ unix_file_del (um, uf);
+
+ unix_cli_file_free (cf);
+ pool_put (cm->cli_file_pool, cf);
+}
+
+typedef enum {
+ UNIX_CLI_PROCESS_EVENT_READ_READY,
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+} unix_cli_process_event_type_t;
+
+static uword
+unix_cli_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ unix_cli_main_t * cm = &unix_cli_main;
+ uword i, * data = 0;
+
+ while (1)
+ {
+ unix_cli_process_event_type_t event_type;
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &data);
+
+ switch (event_type)
+ {
+ case UNIX_CLI_PROCESS_EVENT_READ_READY:
+ for (i = 0; i < vec_len (data); i++)
+ unix_cli_process_input (cm, data[i]);
+ break;
+
+ case UNIX_CLI_PROCESS_EVENT_QUIT:
+ /* Kill this process. */
+ for (i = 0; i < vec_len (data); i++)
+ unix_cli_kill (cm, data[i]);
+ goto done;
+ }
+
+ if (data)
+ _vec_len (data) = 0;
+ }
+
+ done:
+ vec_free (data);
+
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+
+ /* Add node index so we can re-use this process later. */
+ vec_add1 (cm->unused_cli_process_node_indices, rt->node_index);
+
+ return 0;
+}
+
+static clib_error_t * unix_cli_write_ready (unix_file_t * uf)
+{
+ unix_cli_main_t * cm = &unix_cli_main;
+ unix_cli_file_t * cf;
+ int n;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data);
+
+ /* Flush output vector. */
+ n = write (uf->file_descriptor,
+ cf->output_vector, vec_len (cf->output_vector));
+
+ if (n < 0 && errno != EAGAIN)
+ return clib_error_return_unix (0, "write");
+
+ else if (n > 0)
+ unix_cli_del_pending_output (uf, cf, n);
+
+ return /* no error */ 0;
+}
+
+static clib_error_t * unix_cli_read_ready (unix_file_t * uf)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_main_t * cm = &unix_cli_main;
+ unix_cli_file_t * cf;
+ uword l;
+ int n, n_read, n_try;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data);
+
+ n = n_try = 4096;
+ while (n == n_try) {
+ l = vec_len (cf->input_vector);
+ vec_resize (cf->input_vector, l + n_try);
+
+ n = read (uf->file_descriptor, cf->input_vector + l, n_try);
+
+ /* Error? */
+ if (n < 0 && errno != EAGAIN)
+ return clib_error_return_unix (0, "read");
+
+ n_read = n < 0 ? 0 : n;
+ _vec_len (cf->input_vector) = l + n_read;
+ }
+
+ if (! (n < 0))
+ vlib_process_signal_event (um->vlib_main,
+ cf->process_node_index,
+ (n_read == 0
+ ? UNIX_CLI_PROCESS_EVENT_QUIT
+ : UNIX_CLI_PROCESS_EVENT_READ_READY),
+ /* event data */ uf->private_data);
+
+ return /* no error */ 0;
+}
+
+static u32 unix_cli_file_add (unix_cli_main_t * cm, char * name, int fd)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_file_t * cf;
+ unix_file_t * uf, template = {0};
+ vlib_main_t * vm = um->vlib_main;
+ vlib_node_t * n;
+
+ name = (char *) format (0, "unix-cli-%s", name);
+
+ if (vec_len (cm->unused_cli_process_node_indices) > 0)
+ {
+ uword l = vec_len (cm->unused_cli_process_node_indices);
+
+ /* Find node and give it new name. */
+ n = vlib_get_node (vm, cm->unused_cli_process_node_indices[l - 1]);
+ vec_free (n->name);
+ n->name = (u8 *) name;
+
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+
+ _vec_len (cm->unused_cli_process_node_indices) = l - 1;
+ }
+ else
+ {
+ static vlib_node_registration_t r = {
+ .function = unix_cli_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 14,
+ };
+
+ r.name = name;
+ vlib_register_node (vm, &r);
+ vec_free (name);
+
+ n = vlib_get_node (vm, r.index);
+ }
+
+ pool_get (cm->cli_file_pool, cf);
+ memset (cf, 0, sizeof (*cf));
+
+ template.read_function = unix_cli_read_ready;
+ template.write_function = unix_cli_write_ready;
+ template.file_descriptor = fd;
+ template.private_data = cf - cm->cli_file_pool;
+
+ cf->process_node_index = n->index;
+ cf->unix_file_index = unix_file_add (um, &template);
+ cf->output_vector = 0;
+ cf->input_vector = 0;
+
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+ /* Prompt. */
+ unix_cli_add_pending_output (uf, cf,
+ cm->cli_prompt, vec_len (cm->cli_prompt));
+
+ vlib_start_process (vm, n->runtime_index);
+ return cf - cm->cli_file_pool;
+}
+
+static clib_error_t * unix_cli_listen_read_ready (unix_file_t * uf)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_main_t * cm = &unix_cli_main;
+ clib_socket_t * s = &um->cli_listen_socket;
+ clib_socket_t client;
+ char * client_name;
+ clib_error_t * error;
+ unix_cli_file_t * cf;
+ u32 cf_index;
+
+ error = clib_socket_accept (s, &client);
+ if (error)
+ return error;
+
+ client_name = (char *) format (0, "%U%c", format_sockaddr, &client.peer, 0);
+
+ cf_index = unix_cli_file_add (cm, client_name, client.fd);
+ cf = pool_elt_at_index (cm->cli_file_pool, cf_index);
+
+ /* No longer need CLIB version of socket. */
+ clib_socket_free (&client);
+
+ vec_free (client_name);
+
+ /* if we're supposed to run telnet session in character mode (default) */
+ if (um->cli_line_mode == 0)
+ {
+ u8 charmode_option[6];
+
+ cf->has_history = 1;
+ cf->history_limit = um->cli_history_limit ? um->cli_history_limit : 50;
+
+ /*
+ * Set telnet client character mode, echo on, suppress "go-ahead"
+ * Empirically, this sequence works. YMMV.
+ */
+
+ /* Tell the client no linemode, echo */
+ charmode_option[0] = IAC;
+ charmode_option[1] = DONT;
+ charmode_option[2] = TELOPT_LINEMODE;
+ charmode_option[3] = IAC;
+ charmode_option[4] = DO;
+ charmode_option[5] = TELOPT_SGA;
+
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+ unix_cli_add_pending_output (uf, cf, charmode_option,
+ ARRAY_LEN(charmode_option));
+ }
+
+ return error;
+}
+
+static clib_error_t *
+unix_cli_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_main_t * cm = &unix_cli_main;
+ int flags, standard_input_fd;
+ clib_error_t * error;
+
+ /* We depend on unix flags being set. */
+ if ((error = vlib_call_config_function (vm, unix_config)))
+ return error;
+
+ if (um->flags & UNIX_FLAG_INTERACTIVE)
+ {
+ standard_input_fd = 0;
+
+ /* Set stdin to be non-blocking. */
+ if ((flags = fcntl (standard_input_fd, F_GETFL, 0)) < 0)
+ flags = 0;
+ fcntl (standard_input_fd, F_SETFL, flags | O_NONBLOCK);
+
+ unix_cli_file_add (cm, "stdin", standard_input_fd);
+ }
+
+ {
+ /* CLI listen. */
+ clib_socket_t * s = &um->cli_listen_socket;
+ unix_file_t template = {0};
+
+ s->flags = SOCKET_IS_SERVER; /* listen, don't connect */
+
+ error = clib_socket_init (s);
+ if (error)
+ return error;
+
+ template.read_function = unix_cli_listen_read_ready;
+ template.file_descriptor = s->fd;
+
+ unix_file_add (um, &template);
+ }
+
+ /* Set CLI prompt. */
+ if (! cm->cli_prompt)
+ cm->cli_prompt = format (0, "VLIB: ");
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (unix_cli_config, "unix-cli");
+
+void vlib_unix_cli_set_prompt (char * prompt)
+{
+ char * fmt = (prompt[strlen(prompt)-1] == ' ') ? "%s" : "%s ";
+ unix_cli_main_t * cm = &unix_cli_main;
+ if (cm->cli_prompt)
+ vec_free (cm->cli_prompt);
+ cm->cli_prompt = format (0, fmt, prompt);
+}
+
+static clib_error_t *
+unix_cli_quit (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t * cm = &unix_cli_main;
+
+ vlib_process_signal_event (vm,
+ vlib_current_process (vm),
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+ cm->current_input_file_index);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (unix_cli_quit_command, static) = {
+ .path = "quit",
+ .short_help = "Exit CLI",
+ .function = unix_cli_quit,
+};
+
+static clib_error_t *
+unix_cli_exec (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ char * file_name;
+ int fd;
+ unformat_input_t sub_input;
+ clib_error_t * error;
+
+ file_name = 0;
+ fd = -1;
+ error = 0;
+
+ if (! unformat (input, "%s", &file_name))
+ {
+ error = clib_error_return (0, "expecting file name, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ fd = open (file_name, O_RDONLY);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "failed to open `%s'", file_name);
+ goto done;
+ }
+
+ /* Make sure its a regular file. */
+ {
+ struct stat s;
+
+ if (fstat (fd, &s) < 0)
+ {
+ error = clib_error_return_unix (0, "failed to stat `%s'", file_name);
+ goto done;
+ }
+
+ if (! (S_ISREG (s.st_mode) || S_ISLNK (s.st_mode)))
+ {
+ error = clib_error_return (0, "not a regular file `%s'", file_name);
+ goto done;
+ }
+ }
+
+ unformat_init_unix_file (&sub_input, fd);
+
+ vlib_cli_input (vm, &sub_input, 0, 0);
+ unformat_free (&sub_input);
+
+ done:
+ if (fd > 0)
+ close (fd);
+ vec_free (file_name);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (cli_exec, static) = {
+ .path = "exec",
+ .short_help = "Execute commands from file",
+ .function = unix_cli_exec,
+};
+
+static clib_error_t *
+unix_show_errors (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_main_t * um = &unix_main;
+ clib_error_t * error = 0;
+ int i, n_errors_to_show;
+ unix_error_history_t * unix_errors = 0;
+
+ n_errors_to_show = 1 << 30;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (! unformat (input, "%d", &n_errors_to_show))
+ {
+ error = clib_error_return (0, "expecting integer number of errors to show, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ n_errors_to_show = clib_min (ARRAY_LEN (um->error_history), n_errors_to_show);
+
+ i = um->error_history_index > 0 ? um->error_history_index - 1 : ARRAY_LEN (um->error_history) - 1;
+
+ while (n_errors_to_show > 0)
+ {
+ unix_error_history_t * eh = um->error_history + i;
+
+ if (! eh->error)
+ break;
+
+ vec_add1 (unix_errors, eh[0]);
+ n_errors_to_show -= 1;
+ if (i == 0)
+ i = ARRAY_LEN (um->error_history) - 1;
+ else
+ i--;
+ }
+
+ if (vec_len (unix_errors) == 0)
+ vlib_cli_output (vm, "no Unix errors so far");
+ else
+ {
+ vlib_cli_output (vm, "%Ld total errors seen", um->n_total_errors);
+ for (i = vec_len (unix_errors) - 1; i >= 0; i--)
+ {
+ unix_error_history_t * eh = vec_elt_at_index (unix_errors, i);
+ vlib_cli_output (vm, "%U: %U",
+ format_time_interval, "h:m:s:u", eh->time,
+ format_clib_error, eh->error);
+ }
+ vlib_cli_output (vm, "%U: time now",
+ format_time_interval, "h:m:s:u", vlib_time_now (vm));
+ }
+
+ done:
+ vec_free (unix_errors);
+ return error;
+}
+
+VLIB_CLI_COMMAND (cli_unix_show_errors, static) = {
+ .path = "show unix-errors",
+ .short_help = "Show Unix system call error history",
+ .function = unix_show_errors,
+};
+
+static clib_error_t *
+unix_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (unix_cli_init);
diff --git a/vlib/vlib/unix/input.c b/vlib/vlib/unix/input.c
new file mode 100644
index 00000000000..ea10e4fc354
--- /dev/null
+++ b/vlib/vlib/unix/input.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * input.c: Unix file input
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <signal.h>
+
+/* FIXME autoconf */
+#define HAVE_LINUX_EPOLL
+
+#ifdef HAVE_LINUX_EPOLL
+
+#include <sys/epoll.h>
+
+typedef struct {
+ int epoll_fd;
+ struct epoll_event * epoll_events;
+
+ /* Statistics. */
+ u64 epoll_files_ready;
+ u64 epoll_waits;
+} linux_epoll_main_t;
+
+static linux_epoll_main_t linux_epoll_main;
+
+static void
+linux_epoll_file_update (unix_file_t * f,
+ unix_file_update_type_t update_type)
+{
+ unix_main_t * um = &unix_main;
+ linux_epoll_main_t * em = &linux_epoll_main;
+ struct epoll_event e;
+
+ memset (&e, 0, sizeof (e));
+
+ e.events = EPOLLIN;
+ if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
+ e.events |= EPOLLOUT;
+ e.data.u32 = f - um->file_pool;
+
+ if (epoll_ctl (em->epoll_fd,
+ (update_type == UNIX_FILE_UPDATE_ADD
+ ? EPOLL_CTL_ADD
+ : (update_type == UNIX_FILE_UPDATE_MODIFY
+ ? EPOLL_CTL_MOD
+ : EPOLL_CTL_DEL)),
+ f->file_descriptor,
+ &e) < 0)
+ clib_warning ("epoll_ctl");
+}
+
+static uword
+linux_epoll_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ unix_main_t * um = &unix_main;
+ linux_epoll_main_t * em = &linux_epoll_main;
+ struct epoll_event * e;
+ int n_fds_ready;
+
+ {
+ vlib_node_main_t * nm = &vm->node_main;
+ u64 t = nm->cpu_time_next_process_ready;
+ f64 timeout;
+ int timeout_ms, max_timeout_ms = 10;
+ f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
+
+ if (t == ~0ULL)
+ {
+ timeout = 10e-3;
+ timeout_ms = max_timeout_ms;
+ }
+ else
+ {
+ timeout =
+ (((i64) t - (i64) clib_cpu_time_now ())
+ * vm->clib_time.seconds_per_clock)
+ /* subtract off some slop time */ - 50e-6;
+ timeout_ms = timeout * 1e3;
+
+ /* Must be between 1 and 10 ms. */
+ timeout_ms = clib_max (1, timeout_ms);
+ timeout_ms = clib_min (max_timeout_ms, timeout_ms);
+ }
+
+ /* If we still have input nodes polling (e.g. vnet packet generator)
+ don't sleep. */
+ if (nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] > 0)
+ timeout_ms = 0;
+
+ if (vector_rate > 1)
+ {
+ /* When busy don't wait & only epoll for input every 8 times
+ through main loop. */
+ timeout_ms = 0;
+ node->input_main_loops_per_call = 1024;
+ }
+ else
+ /* We're not busy; go to sleep for a while. */
+ node->input_main_loops_per_call = 0;
+
+ /* Allow any signal to wakeup our sleep. */
+ {
+ static sigset_t unblock_all_signals;
+ n_fds_ready = epoll_pwait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events),
+ timeout_ms,
+ &unblock_all_signals);
+
+ /* This kludge is necessary to run over absurdly old kernels */
+ if (n_fds_ready < 0 && errno == ENOSYS)
+ {
+ n_fds_ready = epoll_wait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events),
+ timeout_ms);
+ }
+ }
+ }
+
+ if (n_fds_ready < 0)
+ {
+ if (unix_error_is_fatal (errno))
+ vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
+
+ /* non fatal error (e.g. EINTR). */
+ return 0;
+ }
+
+ em->epoll_waits += 1;
+ em->epoll_files_ready += n_fds_ready;
+
+ for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
+ {
+ u32 i = e->data.u32;
+ unix_file_t * f = pool_elt_at_index (um->file_pool, i);
+ clib_error_t * errors[4];
+ int n_errors = 0;
+
+ if (PREDICT_TRUE (! (e->events & EPOLLERR)))
+ {
+ if (e->events & EPOLLIN)
+ {
+ errors[n_errors] = f->read_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ if (e->events & EPOLLOUT)
+ {
+ errors[n_errors] = f->write_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ }
+ else
+ {
+ if (f->error_function)
+ {
+ errors[n_errors] = f->error_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ }
+
+ ASSERT (n_errors < ARRAY_LEN (errors));
+ for (i = 0; i < n_errors; i++)
+ {
+ unix_save_error (um, errors[i]);
+ }
+ }
+
+ return 0;
+}
+
+VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
+ .function = linux_epoll_input,
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .name = "unix-epoll-input",
+};
+
+clib_error_t *
+linux_epoll_input_init (vlib_main_t * vm)
+{
+ linux_epoll_main_t * em = &linux_epoll_main;
+ unix_main_t * um = &unix_main;
+
+ /* Allocate some events. */
+ vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
+
+ em->epoll_fd = epoll_create (vec_len (em->epoll_events));
+ if (em->epoll_fd < 0)
+ return clib_error_return_unix (0, "epoll_create");
+
+ um->file_update = linux_epoll_file_update;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (linux_epoll_input_init);
+
+#endif /* HAVE_LINUX_EPOLL */
+
+static clib_error_t *
+unix_input_init (vlib_main_t * vm)
+{
+ return vlib_call_init_function (vm, linux_epoll_input_init);
+}
+
+VLIB_INIT_FUNCTION (unix_input_init);
diff --git a/vlib/vlib/unix/main.c b/vlib/vlib/unix/main.c
new file mode 100644
index 00000000000..b85f3e73326
--- /dev/null
+++ b/vlib/vlib/unix/main.c
@@ -0,0 +1,471 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.c: Unix main routine
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/unix/plugin.h>
+
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+unix_main_t unix_main;
+
+static clib_error_t *
+unix_main_init (vlib_main_t * vm)
+{
+ unix_main_t * um = &unix_main;
+ um->vlib_main = vm;
+ return vlib_call_init_function (vm, unix_input_init);
+}
+
+VLIB_INIT_FUNCTION (unix_main_init);
+
+static void unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc)
+{
+ uword fatal;
+ u8 * msg = 0;
+
+ msg = format (msg, "received signal %U, PC %U",
+ format_signal, signum,
+ format_ucontext_pc, uc);
+
+ if (signum == SIGSEGV)
+ msg = format (msg, ", faulting address %p", si->si_addr);
+
+ switch (signum)
+ {
+ /* these (caught) signals cause the application to exit */
+ case SIGTERM:
+ if (unix_main.vlib_main->main_loop_exit_set)
+ {
+ syslog (LOG_ERR | LOG_DAEMON, "received SIGTERM, exiting...");
+
+ clib_longjmp (&unix_main.vlib_main->main_loop_exit,
+ VLIB_MAIN_LOOP_EXIT_CLI);
+ }
+ case SIGQUIT:
+ case SIGINT:
+ case SIGILL:
+ case SIGBUS:
+ case SIGSEGV:
+ case SIGHUP:
+ case SIGFPE:
+ fatal = 1;
+ break;
+
+ /* by default, print a message and continue */
+ default:
+ fatal = 0;
+ break;
+ }
+
+ /* Null terminate. */
+ vec_add1 (msg, 0);
+
+ if (fatal)
+ {
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+ os_exit (1);
+ }
+ else
+ clib_warning ("%s", msg);
+
+ vec_free (msg);
+}
+
+static clib_error_t *
+setup_signal_handlers (unix_main_t * um)
+{
+ uword i;
+ struct sigaction sa;
+
+ for (i = 1; i < 32; i++)
+ {
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_sigaction = (void *) unix_signal_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ switch (i)
+ {
+ /* these signals take the default action */
+ case SIGABRT:
+ case SIGKILL:
+ case SIGSTOP:
+ case SIGUSR1:
+ case SIGUSR2:
+ continue;
+
+ /* ignore SIGPIPE, SIGCHLD */
+ case SIGPIPE:
+ case SIGCHLD:
+ sa.sa_sigaction = (void *) SIG_IGN;
+ break;
+
+ /* catch and handle all other signals */
+ default:
+ break;
+ }
+
+ if (sigaction (i, &sa, 0) < 0)
+ return clib_error_return_unix (0, "sigaction %U", format_signal, i);
+ }
+
+ return 0;
+}
+
+static void unix_error_handler (void * arg, u8 * msg, int msg_len)
+{
+ unix_main_t * um = arg;
+
+ /* Echo to stderr when interactive. */
+ if (um->flags & UNIX_FLAG_INTERACTIVE)
+ {
+ CLIB_UNUSED (int r) = write (2, msg, msg_len);
+ }
+ else
+ {
+ char save = msg[msg_len - 1];
+
+ /* Null Terminate. */
+ msg[msg_len-1] = 0;
+
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+
+ msg[msg_len-1] = save;
+ }
+}
+
+void vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error)
+{
+ unix_main_t * um = &unix_main;
+
+ if (um->flags & UNIX_FLAG_INTERACTIVE || error == 0)
+ return;
+
+ {
+ char save;
+ u8 * msg;
+ u32 msg_len;
+
+ msg = error->what;
+ msg_len = vec_len(msg);
+
+ /* Null Terminate. */
+ save = msg[msg_len-1];
+ msg[msg_len-1] = 0;
+
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+
+ msg[msg_len-1] = save;
+ }
+}
+
+static uword
+startup_config_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ unix_main_t * um = &unix_main;
+ u8 * buf = 0;
+ uword l, n = 1;
+
+ vlib_process_suspend (vm, 2.0);
+
+ while (um->unix_config_complete == 0)
+ vlib_process_suspend (vm, 0.1);
+
+ if (um->startup_config_filename) {
+ unformat_input_t sub_input;
+ int fd;
+ struct stat s;
+ char *fn = (char *)um->startup_config_filename;
+
+ fd = open (fn, O_RDONLY);
+ if (fd < 0) {
+ clib_warning ("failed to open `%s'", fn);
+ return 0;
+ }
+
+ if (fstat (fd, &s) < 0) {
+ clib_warning ("failed to stat `%s'", fn);
+ bail:
+ close(fd);
+ return 0;
+ }
+
+ if (! (S_ISREG (s.st_mode) || S_ISLNK (s.st_mode))) {
+ clib_warning ("not a regular file: `%s'", fn);
+ goto bail;
+ }
+
+ while (n > 0)
+ {
+ l = vec_len (buf);
+ vec_resize (buf, 4096);
+ n = read (fd, buf + l, 4096);
+ if (n > 0)
+ {
+ _vec_len (buf) = l + n;
+ if (n < 4096)
+ break;
+ }
+ else
+ break;
+ }
+ if (um->log_fd && vec_len (buf))
+ {
+ u8 * lv = 0;
+ lv = format (lv, "%U: ***** Startup Config *****\n%v",
+ format_timeval,
+ 0 /* current bat-time */,
+ 0 /* current bat-format */,
+ buf);
+ {
+ int rv __attribute__((unused)) =
+ write (um->log_fd, lv, vec_len(lv));
+ }
+ vec_reset_length (lv);
+ lv = format (lv, "%U: ***** End Startup Config *****\n",
+ format_timeval,
+ 0 /* current bat-time */,
+ 0 /* current bat-format */);
+ {
+ int rv __attribute__((unused)) =
+ write (um->log_fd, lv, vec_len(lv));
+ }
+ vec_free (lv);
+ }
+
+ if (vec_len(buf))
+ {
+ unformat_init_vector (&sub_input, buf);
+ vlib_cli_input (vm, &sub_input, 0, 0);
+ /* frees buf for us */
+ unformat_free (&sub_input);
+ }
+ close(fd);
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (startup_config_node,static) = {
+ .function = startup_config_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "startup-config-process",
+};
+
+static clib_error_t *
+unix_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ unix_main_t * um = &unix_main;
+ clib_error_t * error = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ char * cli_prompt;
+ if (unformat (input, "interactive"))
+ um->flags |= UNIX_FLAG_INTERACTIVE;
+ else if (unformat (input, "nodaemon"))
+ um->flags |= UNIX_FLAG_NODAEMON;
+ else if (unformat (input, "cli-prompt %s", &cli_prompt))
+ vlib_unix_cli_set_prompt (cli_prompt);
+ else if (unformat (input, "cli-listen %s", &um->cli_listen_socket.config))
+ ;
+ else if (unformat (input, "cli-line-mode"))
+ um->cli_line_mode = 1;
+ else if (unformat (input, "cli-history-limit %d", &um->cli_history_limit))
+ ;
+ else if (unformat (input, "full-coredump"))
+ {
+ int fd;
+
+ fd = open ("/proc/self/coredump_filter", O_WRONLY);
+ if (fd > 0)
+ {
+ if (write (fd, "0x6f\n", 5) != 5)
+ clib_unix_warning ("coredump filter write failed!");
+ close(fd);
+ }
+ else
+ clib_unix_warning ("couldn't open /proc/self/coredump_filter");
+ }
+ else if (unformat (input, "startup-config %s",
+ &um->startup_config_filename))
+ ;
+ else if (unformat (input, "exec %s",
+ &um->startup_config_filename))
+ ;
+ else if (unformat (input, "log %s", &um->log_filename))
+ {
+ um->log_fd = open ((char *) um->log_filename,
+ O_CREAT | O_WRONLY | O_APPEND, 0644);
+ if (um->log_fd < 0)
+ {
+ clib_warning ("couldn't open log '%s'\n", um->log_filename);
+ um->log_fd = 0;
+ }
+ else
+ {
+ u8 * lv = 0;
+ lv = format (0, "%U: ***** Start: PID %d *****\n",
+ format_timeval,
+ 0 /* current bat-time */,
+ 0 /* current bat-format */,
+ getpid());
+ {
+ int rv __attribute__((unused)) =
+ write (um->log_fd, lv, vec_len(lv));
+ }
+ vec_free (lv);
+ }
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (! (um->flags & UNIX_FLAG_INTERACTIVE))
+ {
+ error = setup_signal_handlers (um);
+ if (error)
+ return error;
+
+ openlog (vm->name, LOG_CONS | LOG_PERROR | LOG_PID, LOG_DAEMON);
+ clib_error_register_handler (unix_error_handler, um);
+
+ if (! (um->flags & UNIX_FLAG_NODAEMON)
+ && daemon (/* chdir to / */ 0,
+ /* stdin/stdout/stderr -> /dev/null */ 0) < 0)
+ clib_error_return (0, "daemon () fails");
+ }
+ um->unix_config_complete = 1;
+
+ return 0;
+}
+
+/* unix { ... } configuration. */
+VLIB_CONFIG_FUNCTION (unix_config, "unix");
+
+static clib_error_t *
+unix_exit (vlib_main_t * vm)
+{
+ /* Close syslog connection. */
+ closelog ();
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (unix_exit);
+
+u8 **vlib_thread_stacks;
+
+static char **argv_global;
+
+static uword thread0 (uword arg)
+{
+ vlib_main_t * vm = (vlib_main_t *)arg;
+ unformat_input_t input;
+ int i;
+
+ unformat_init_command_line (&input, argv_global);
+ i = vlib_main (vm, &input);
+ unformat_free (&input);
+
+ return i;
+ }
+
+int vlib_unix_main (int argc, char * argv[])
+{
+ vlib_main_t * vm = &vlib_global_main; /* one and only time for this! */
+
+ clib_smp_main_t * sm = &clib_smp_main;
+ vlib_thread_main_t * tm = &vlib_thread_main;
+ unformat_input_t input;
+ u8 * thread_stacks;
+ clib_error_t * e;
+ int i;
+
+ argv_global = argv;
+ vm->name = argv[0];
+ vm->heap_base = clib_mem_get_heap ();
+ ASSERT(vm->heap_base);
+
+ i = vlib_plugin_early_init (vm);
+ if (i)
+ return i;
+
+ unformat_init_command_line (&input, argv_global);
+ vm->init_functions_called = hash_create (0, /* value bytes */ 0);
+ e = vlib_call_all_config_functions (vm, &input, 1 /* early */);
+ if (e != 0)
+ {
+ clib_error_report(e);
+ return 1;
+ }
+ unformat_free (&input);
+
+ /* allocate N x 1mb stacks, aligned e.g. to a 16mb boundary */
+ thread_stacks = clib_mem_alloc_aligned
+ (tm->n_thread_stacks * VLIB_THREAD_STACK_SIZE,
+ (VLIB_MAX_CPUS << VLIB_LOG2_THREAD_STACK_SIZE));
+
+ sm->vm_base = thread_stacks;
+ sm->log2_n_per_cpu_vm_bytes = VLIB_LOG2_THREAD_STACK_SIZE;
+
+ vec_validate (vlib_thread_stacks, tm->n_thread_stacks - 1);
+ for (i = 0; i < vec_len (vlib_thread_stacks); i++)
+ {
+ vlib_thread_stacks[i] = thread_stacks;
+
+ /*
+ * Disallow writes to the bottom page of the stack, to
+ * catch stack overflows.
+ */
+ if (mprotect (thread_stacks, 4096, PROT_READ) < 0)
+ clib_unix_warning ("thread stack");
+
+ thread_stacks += VLIB_THREAD_STACK_SIZE;
+ }
+
+ i = clib_calljmp (thread0, (uword) vm,
+ (void *)(vlib_thread_stacks[0] + VLIB_THREAD_STACK_SIZE));
+ return i;
+}
diff --git a/vlib/vlib/unix/mc_socket.c b/vlib/vlib/unix/mc_socket.c
new file mode 100644
index 00000000000..1169203f855
--- /dev/null
+++ b/vlib/vlib/unix/mc_socket.c
@@ -0,0 +1,972 @@
+/*
+ * mc_socket.c: socket based multicast for vlib mc
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/mc_socket.h>
+
+#include <sys/ioctl.h> /* for FIONBIO */
+#include <netinet/tcp.h> /* for TCP_NODELAY */
+#include <net/if.h> /* for struct ifreq */
+
+static u8 * format_socket_peer_id (u8 * s, va_list * args)
+{
+ u64 peer_id_as_u64 = va_arg (*args, u64);
+ mc_peer_id_t peer_id;
+ peer_id.as_u64 = peer_id_as_u64;
+ u32 a = mc_socket_peer_id_get_address (peer_id);
+ u32 p = mc_socket_peer_id_get_port (peer_id);
+
+ s = format (s, "%U:%04x", format_network_address, AF_INET, &a,
+ ntohs (p));
+
+ return s;
+}
+
+typedef void (mc_msg_handler_t) (mc_main_t * mcm, void * msg, u32 buffer_index);
+
+always_inline void msg_handler (mc_main_t * mcm,
+ u32 buffer_index,
+ u32 handler_frees_buffer,
+ void * _h)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_msg_handler_t * h = _h;
+ vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index);
+ void * the_msg = vlib_buffer_get_current (b);
+
+ h (mcm, the_msg, buffer_index);
+ if (! handler_frees_buffer)
+ vlib_buffer_free_one (vm, buffer_index);
+}
+
+static uword
+append_buffer_index_to_iovec (vlib_main_t * vm,
+ u32 buffer_index,
+ struct iovec ** iovs_return)
+{
+ struct iovec * i;
+ vlib_buffer_t * b;
+ u32 bi = buffer_index;
+ u32 l = 0;
+
+ while (1)
+ {
+ b = vlib_get_buffer (vm, bi);
+ vec_add2 (*iovs_return, i, 1);
+ i->iov_base = vlib_buffer_get_current (b);
+ i->iov_len = b->current_length;
+ l += i->iov_len;
+ if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ bi = b->next_buffer;
+ }
+
+ return l;
+}
+
+static clib_error_t *
+sendmsg_helper (mc_socket_main_t * msm,
+ int socket,
+ struct sockaddr_in * tx_addr,
+ u32 buffer_index)
+{
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ struct msghdr h;
+ word n_bytes, n_bytes_tx, n_retries;
+
+ memset (&h, 0, sizeof (h));
+ h.msg_name = tx_addr;
+ h.msg_namelen = sizeof (tx_addr[0]);
+
+ if (msm->iovecs)
+ _vec_len (msm->iovecs) = 0;
+
+ n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs);
+ ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size);
+ if (n_bytes > msm->mc_main.transport.max_packet_size)
+ clib_error ("sending packet larger than interace MTU %d bytes", n_bytes);
+
+ h.msg_iov = msm->iovecs;
+ h.msg_iovlen = vec_len (msm->iovecs);
+
+ n_retries = 0;
+ while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes
+ && errno == EAGAIN)
+ n_retries++;
+ if (n_bytes_tx != n_bytes)
+ {
+ clib_unix_warning ("sendmsg");
+ return 0;
+ }
+ if (n_retries)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "sendmsg-helper: %d retries",
+ .format_args = "i4",
+ };
+ struct { u32 retries; } * ed = 0;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->retries = n_retries;
+ }
+ return 0;
+}
+
+static clib_error_t *
+tx_buffer (void * transport, mc_transport_type_t type, u32 buffer_index)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)transport;
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ mc_multicast_socket_t * ms = &msm->multicast_sockets[type];
+ clib_error_t * error;
+ error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index);
+ if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY)
+ vlib_buffer_free_one (vm, buffer_index);
+ return error;
+}
+
+static clib_error_t *
+tx_ack (void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index)
+{
+ struct sockaddr_in tx_addr;
+ mc_socket_main_t *msm = (mc_socket_main_t *)transport;
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ clib_error_t * error;
+
+ memset (&tx_addr, 0, sizeof (tx_addr));
+ tx_addr.sin_family = AF_INET;
+ tx_addr.sin_addr.s_addr = mc_socket_peer_id_get_address (dest_peer_id);
+ tx_addr.sin_port = mc_socket_peer_id_get_port (dest_peer_id);
+
+ error = sendmsg_helper (msm, msm->ack_socket, &tx_addr, buffer_index);
+ vlib_buffer_free_one (vm, buffer_index);
+ return error;
+}
+
+static clib_error_t *
+recvmsg_helper (mc_socket_main_t * msm,
+ int socket,
+ struct sockaddr_in * rx_addr,
+ u32 * buffer_index,
+ u32 drop_message)
+{
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ vlib_buffer_t * b;
+ uword n_left, n_alloc, n_mtu, i, i_rx;
+ const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+ word n_bytes_left;
+
+ /* Make sure we have at least a MTU worth of buffers. */
+ n_mtu = msm->rx_mtu_n_buffers;
+ n_left = vec_len (msm->rx_buffers);
+ if (n_left < n_mtu)
+ {
+ uword max_alloc = 8 * n_mtu;
+ vec_validate (msm->rx_buffers, max_alloc - 1);
+ n_alloc = vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left);
+ _vec_len (msm->rx_buffers) = n_left + n_alloc;
+ }
+
+ ASSERT (vec_len (msm->rx_buffers) >= n_mtu);
+ vec_validate (msm->iovecs, n_mtu - 1);
+
+ /* Allocate RX buffers from end of rx_buffers.
+ Turn them into iovecs to pass to readv. */
+ i_rx = vec_len (msm->rx_buffers) - 1;
+ for (i = 0; i < n_mtu; i++)
+ {
+ b = vlib_get_buffer (vm, msm->rx_buffers[i_rx - i]);
+ msm->iovecs[i].iov_base = b->data;
+ msm->iovecs[i].iov_len = buffer_size;
+ }
+ _vec_len (msm->iovecs) = n_mtu;
+
+ {
+ struct msghdr h;
+
+ memset (&h, 0, sizeof (h));
+ if (rx_addr)
+ {
+ h.msg_name = rx_addr;
+ h.msg_namelen = sizeof (rx_addr[0]);
+ }
+ h.msg_iov = msm->iovecs;
+ h.msg_iovlen = vec_len (msm->iovecs);
+
+ n_bytes_left = recvmsg (socket, &h, 0);
+ if (n_bytes_left < 0)
+ return clib_error_return_unix (0, "recvmsg");
+ }
+
+ if (drop_message)
+ {
+ *buffer_index = ~0;
+ return 0;
+ }
+
+ *buffer_index = msm->rx_buffers[i_rx];
+ while (1)
+ {
+ b = vlib_get_buffer (vm, msm->rx_buffers[i_rx]);
+
+ b->flags = 0;
+ b->current_data = 0;
+ b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
+
+ n_bytes_left -= buffer_size;
+
+ if (n_bytes_left <= 0)
+ break;
+
+ i_rx--;
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b->next_buffer = msm->rx_buffers[i_rx];
+ }
+
+ _vec_len (msm->rx_buffers) = i_rx;
+
+ return 0 /* no error */;
+}
+
+static clib_error_t * mastership_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t * mcm = &msm->mc_main;
+ mc_multicast_socket_t * ms = &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP];
+ clib_error_t * error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */ 0);
+ if (! error)
+ msg_handler (mcm, bi,
+ /* handler_frees_buffer */ 0,
+ mc_msg_master_assert_handler);
+
+ return error;
+}
+
+static clib_error_t * to_relay_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ mc_multicast_socket_t * ms_to_relay = &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY];
+ mc_multicast_socket_t * ms_from_relay = &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
+ clib_error_t * error;
+ u32 bi;
+ u32 is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
+
+ /* Not the ordering master? Turf the msg */
+ error = recvmsg_helper (msm, ms_to_relay->socket, /* rx_addr */ 0, &bi,
+ /* drop_message */ ! is_master);
+
+ /* If we are the master, number and rebroadcast the msg. */
+ if (! error && is_master)
+ {
+ vlib_buffer_t * b = vlib_get_buffer (vm, bi);
+ mc_msg_user_request_t * mp = vlib_buffer_get_current (b);
+ mp->global_sequence = clib_host_to_net_u32 (mcm->relay_global_sequence);
+ mcm->relay_global_sequence++;
+ error = sendmsg_helper (msm, ms_from_relay->socket, &ms_from_relay->tx_addr, bi);
+ vlib_buffer_free_one (vm, bi);
+ }
+
+ return error;
+}
+
+static clib_error_t * from_relay_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t * mcm = &msm->mc_main;
+ mc_multicast_socket_t * ms = &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
+ clib_error_t * error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */ 0);
+ if (! error)
+ {
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 1,
+ mc_msg_user_request_handler);
+ }
+ return error;
+}
+
+static clib_error_t * join_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t * mcm = &msm->mc_main;
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_multicast_socket_t * ms = &msm->multicast_sockets[MC_TRANSPORT_JOIN];
+ clib_error_t * error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */ 0);
+ if (! error)
+ {
+ vlib_buffer_t * b = vlib_get_buffer (vm, bi);
+ mc_msg_join_or_leave_request_t * mp = vlib_buffer_get_current (b);
+
+ switch (clib_host_to_net_u32 (mp->type))
+ {
+ case MC_MSG_TYPE_join_or_leave_request:
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_join_or_leave_request_handler);
+ break;
+
+ case MC_MSG_TYPE_join_reply:
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_join_reply_handler);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+ }
+ return error;
+}
+
+static clib_error_t * ack_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t * mcm = &msm->mc_main;
+ clib_error_t * error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, msm->ack_socket, /* rx_addr */ 0, &bi, /* drop_message */ 0);
+ if (! error)
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_user_ack_handler);
+ return error;
+}
+
+static void catchup_cleanup (mc_socket_main_t *msm,
+ mc_socket_catchup_t *c,
+ unix_main_t *um, unix_file_t *uf)
+{
+ hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor);
+ unix_file_del (um, uf);
+ vec_free (c->input_vector);
+ vec_free (c->output_vector);
+ pool_put (msm->catchups, c);
+}
+
+static mc_socket_catchup_t *
+find_catchup_from_file_descriptor (mc_socket_main_t * msm, int file_descriptor)
+{
+ uword * p = hash_get (msm->catchup_index_by_file_descriptor, file_descriptor);
+ return p ? pool_elt_at_index (msm->catchups, p[0]) : 0;
+}
+
+static clib_error_t * catchup_socket_read_ready (unix_file_t * uf, int is_server)
+{
+ unix_main_t * um = &unix_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ mc_socket_catchup_t * c = find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ word l, n, is_eof;
+
+ l = vec_len (c->input_vector);
+ vec_resize (c->input_vector, 4096);
+ n = read (uf->file_descriptor, c->input_vector + l, vec_len (c->input_vector) - l);
+ is_eof = n == 0;
+
+ if (n < 0)
+ {
+ if (errno == EAGAIN)
+ n = 0;
+ else
+ {
+ catchup_cleanup (msm, c, um, uf);
+ return clib_error_return_unix (0, "read");
+ }
+ }
+
+ _vec_len (c->input_vector) = l + n;
+
+ if (is_eof && vec_len (c->input_vector) > 0)
+ {
+ if (is_server)
+ {
+ mc_msg_catchup_request_handler (mcm, (void *) c->input_vector, c - msm->catchups);
+ _vec_len (c->input_vector) = 0;
+ }
+ else
+ {
+ mc_msg_catchup_reply_handler (mcm, (void *) c->input_vector, c - msm->catchups);
+ c->input_vector = 0; /* reply handler is responsible for freeing vector */
+ catchup_cleanup (msm, c, um, uf);
+ }
+ }
+
+ return 0 /* no error */;
+}
+
+static clib_error_t * catchup_server_read_ready (unix_file_t * uf)
+{ return catchup_socket_read_ready (uf, /* is_server */ 1); }
+
+static clib_error_t * catchup_client_read_ready (unix_file_t * uf)
+{
+ if (MC_EVENT_LOGGING)
+ {
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+
+ ELOG_TYPE (e, "catchup_client_read_ready");
+ ELOG (&vm->elog_main, e, 0);
+ }
+ return catchup_socket_read_ready (uf, /* is_server */ 0);
+}
+
+static clib_error_t *
+catchup_socket_write_ready (unix_file_t * uf, int is_server)
+{
+ unix_main_t * um = &unix_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ clib_error_t * error = 0;
+ int n;
+
+ if (c->connect_in_progress)
+ {
+ u32 len, value;
+
+ c->connect_in_progress = 0;
+ len = sizeof (value);
+ if (getsockopt (c->socket, SOL_SOCKET,
+ SO_ERROR, &value, &len) < 0)
+ {
+ error = clib_error_return_unix (0, "getsockopt SO_ERROR");
+ goto error_quit;
+ }
+ if (value != 0)
+ {
+ error = clib_error_return_code (0, value, CLIB_ERROR_ERRNO_VALID, "connect fails");
+ goto error_quit;
+ }
+ }
+
+ while (1)
+ {
+ u32 n_this_write;
+
+ n_this_write =
+ clib_min (vec_len (c->output_vector) - c->output_vector_n_written,
+ msm->rx_mtu_n_bytes - 64 /* ip + tcp + option allowance */);
+
+ if (n_this_write <= 0)
+ break;
+
+ do {
+ n = write (uf->file_descriptor,
+ c->output_vector + c->output_vector_n_written,
+ n_this_write);
+ } while (n < 0 && errno == EAGAIN);
+
+ if (n < 0)
+ {
+ error = clib_error_return_unix (0, "write");
+ goto error_quit;
+ }
+ c->output_vector_n_written += n;
+ }
+
+ if (c->output_vector_n_written >= vec_len (c->output_vector))
+ {
+ if (! is_server)
+ {
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ /* Send EOF to other side. */
+ shutdown (uf->file_descriptor, SHUT_WR);
+ return error;
+ }
+ else
+ {
+ error_quit:
+ catchup_cleanup (msm, c, um, uf);
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+catchup_server_write_ready (unix_file_t * uf)
+{ return catchup_socket_write_ready (uf, /* is_server */ 1); }
+
+static clib_error_t *
+catchup_client_write_ready (unix_file_t * uf)
+{ return catchup_socket_write_ready (uf, /* is_server */ 0); }
+
+static clib_error_t *catchup_socket_error_ready (unix_file_t *uf)
+{
+ unix_main_t *um = &unix_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ catchup_cleanup (msm, c, um, uf);
+ return clib_error_return (0, "error");
+}
+
+static clib_error_t *catchup_listen_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ struct sockaddr_in client_addr;
+ int client_len;
+ mc_socket_catchup_t *c;
+ unix_file_t template = {0};
+
+ pool_get (msm->catchups, c);
+ memset(c, 0, sizeof (c[0]));
+
+ client_len = sizeof(client_addr);
+
+ /* Acquires the non-blocking attrib from the server socket. */
+ c->socket = accept (uf->file_descriptor,
+ (struct sockaddr *)&client_addr,
+ (socklen_t *)&client_len);
+
+ if (c->socket < 0)
+ {
+ pool_put (msm->catchups, c);
+ return clib_error_return_unix (0, "accept");
+ }
+
+ if (MC_EVENT_LOGGING)
+ {
+ mc_main_t * mcm = &msm->mc_main;
+ vlib_main_t * vm = mcm->vlib_main;
+
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "catchup accepted from 0x%lx",
+ .format_args = "i4",
+ };
+ struct { u32 addr; } * ed = 0;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->addr = ntohl(client_addr.sin_addr.s_addr);
+ }
+
+ /* Disable the Nagle algorithm, ship catchup pkts immediately */
+ {
+ int one = 1;
+ if ((setsockopt(c->socket, IPPROTO_TCP,
+ TCP_NODELAY, (void *)&one, sizeof(one))) < 0) {
+ clib_unix_warning("catchup socket: set TCP_NODELAY");
+ }
+ }
+
+ template.read_function = catchup_server_read_ready;
+ template.write_function = catchup_server_write_ready;
+ template.error_function = catchup_socket_error_ready;
+ template.file_descriptor = c->socket;
+ template.private_data = pointer_to_uword (msm);
+ c->unix_file_index = unix_file_add (&unix_main, &template);
+ hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups);
+
+ return 0;
+}
+
+/* Return and bind to an unused port. */
+static word find_and_bind_to_free_port (word sock, word port)
+{
+ for (; port < 1 << 16; port++)
+ {
+ struct sockaddr_in a;
+
+ memset (&a, 0, sizeof(a)); /* Warnings be gone */
+
+ a.sin_family = PF_INET;
+ a.sin_addr.s_addr = INADDR_ANY;
+ a.sin_port = htons (port);
+
+ if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
+ break;
+ }
+
+ return port < 1 << 16 ? port : -1;
+}
+
+static clib_error_t *
+setup_mutlicast_socket (mc_socket_main_t * msm,
+ mc_multicast_socket_t * ms,
+ char * type,
+ uword udp_port)
+{
+ int one = 1;
+ struct ip_mreq mcast_req;
+
+ if (! msm->multicast_ttl)
+ msm->multicast_ttl = 1;
+
+ /* mastership (multicast) TX socket */
+ if ((ms->socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
+ return clib_error_return_unix(0, "%s socket", type);
+
+ {
+ u8 ttl = msm->multicast_ttl;
+
+ if ((setsockopt(ms->socket, IPPROTO_IP,
+ IP_MULTICAST_TTL, (void *)&ttl, sizeof(ttl))) < 0)
+ return clib_error_return_unix(0, "%s set multicast ttl", type);
+ }
+
+ if (setsockopt(ms->socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
+ return clib_error_return_unix (0, "%s setsockopt SO_REUSEADDR", type);
+
+ memset (&ms->tx_addr, 0, sizeof (ms->tx_addr));
+ ms->tx_addr.sin_family = AF_INET;
+ ms->tx_addr.sin_addr.s_addr = htonl (msm->multicast_tx_ip4_address_host_byte_order);
+ ms->tx_addr.sin_port = htons (udp_port);
+
+ if (bind(ms->socket, (struct sockaddr *)&ms->tx_addr,
+ sizeof (ms->tx_addr)) < 0)
+ return clib_error_return_unix(0, "%s bind", type);
+
+ memset (&mcast_req, 0, sizeof (mcast_req));
+ mcast_req.imr_multiaddr.s_addr = htonl (msm->multicast_tx_ip4_address_host_byte_order);
+ mcast_req.imr_interface.s_addr = msm->if_ip4_address_net_byte_order;
+
+ if ((setsockopt(ms->socket, IPPROTO_IP,
+ IP_ADD_MEMBERSHIP, (void *)&mcast_req,
+ sizeof (mcast_req))) < 0)
+ return clib_error_return_unix(0, "%s IP_ADD_MEMBERSHIP setsockopt", type);
+
+ if (ioctl (ms->socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "%s set FIONBIO", type);
+
+ /* FIXME remove this when we support tx_ready. */
+ {
+ u32 len = 1 << 20;
+ socklen_t sl = sizeof (len);
+ if (setsockopt(ms->socket, SOL_SOCKET, SO_SNDBUF, &len, sl) < 0)
+ clib_unix_error ("setsockopt");
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+socket_setup (mc_socket_main_t *msm)
+{
+ int one = 1;
+ clib_error_t * error;
+ u32 port;
+
+ if (! msm->base_multicast_udp_port_host_byte_order)
+ msm->base_multicast_udp_port_host_byte_order =
+ 0xffff - ((MC_N_TRANSPORT_TYPE + 2 /* ack socket, catchup socket */)
+ - 1);
+
+ port = msm->base_multicast_udp_port_host_byte_order;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP],
+ "mastership",
+ port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_JOIN],
+ "join",
+ port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY],
+ "to relay",
+ port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY],
+ "from relay",
+ port++);
+ if (error)
+ return error;
+
+ /* ACK rx socket */
+ msm->ack_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (msm->ack_socket < 0)
+ return clib_error_return_unix(0, "ack socket");
+
+ msm->ack_udp_port = find_and_bind_to_free_port (msm->ack_socket, port++);
+
+ if (ioctl (msm->ack_socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "ack socket FIONBIO");
+
+ msm->catchup_server_socket = socket(AF_INET, SOCK_STREAM, 0);
+ if (msm->catchup_server_socket < 0)
+ return clib_error_return_unix (0, "catchup server socket");
+
+ msm->catchup_tcp_port = find_and_bind_to_free_port (msm->catchup_server_socket, port++);
+
+ if (ioctl (msm->catchup_server_socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "catchup server socket FIONBIO");
+
+ if (listen(msm->catchup_server_socket, 5) < 0)
+ return clib_error_return_unix (0, "catchup server socket listen");
+
+ /* epoll setup for multicast mastership socket */
+ {
+ unix_file_t template = {0};
+
+ template.read_function = mastership_socket_read_ready;
+ template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for multicast to_relay socket */
+ template.read_function = to_relay_socket_read_ready;
+ template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for multicast from_relay socket */
+ template.read_function = from_relay_socket_read_ready;
+ template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ template.read_function = join_socket_read_ready;
+ template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_JOIN].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for ack rx socket */
+ template.read_function = ack_socket_read_ready;
+ template.file_descriptor = msm->ack_socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for TCP catchup server */
+ template.read_function = catchup_listen_read_ready;
+ template.file_descriptor = msm->catchup_server_socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+ }
+
+ return 0;
+}
+
+static void *
+catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes, u8 * set_output_vector)
+{
+ unix_file_t * uf = pool_elt_at_index (unix_main.file_pool,
+ c->unix_file_index);
+ u8 * result=0;
+
+ if (set_output_vector)
+ c->output_vector = set_output_vector;
+ else
+ vec_add2 (c->output_vector, result, n_bytes);
+ if (vec_len (c->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (! skip_update)
+ unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+ return result;
+}
+
+static uword catchup_request_fun (void *transport_main,
+ u32 stream_index,
+ mc_peer_id_t catchup_peer_id)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)transport_main;
+ mc_main_t * mcm = &msm->mc_main;
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_socket_catchup_t *c;
+ struct sockaddr_in addr;
+ unix_main_t *um = &unix_main;
+ int one = 1;
+
+ pool_get (msm->catchups, c);
+ memset (c, 0, sizeof (*c));
+
+ c->socket = socket(AF_INET, SOCK_STREAM, 0);
+ if (c->socket < 0)
+ {
+ clib_unix_warning ("socket");
+ return 0;
+ }
+
+ if (ioctl (c->socket, FIONBIO, &one) < 0)
+ {
+ clib_unix_warning ("FIONBIO");
+ return 0;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = mc_socket_peer_id_get_address (catchup_peer_id);
+ addr.sin_port = mc_socket_peer_id_get_port (catchup_peer_id);
+
+ c->connect_in_progress = 1;
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "connecting to peer 0x%Lx",
+ .format_args = "i8",
+ };
+ struct { u64 peer; } * ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->peer = catchup_peer_id.as_u64;
+ }
+
+ if (connect(c->socket, (const void *)&addr,sizeof(addr))
+ < 0 && errno != EINPROGRESS)
+ {
+ clib_unix_warning ("connect to %U fails",
+ format_socket_peer_id, catchup_peer_id);
+ return 0;
+ }
+
+ {
+ unix_file_t template = {0};
+
+ template.read_function = catchup_client_read_ready;
+ template.write_function = catchup_client_write_ready;
+ template.error_function = catchup_socket_error_ready;
+ template.file_descriptor = c->socket;
+ template.private_data = (uword) msm;
+ c->unix_file_index = unix_file_add (um, &template);
+
+ hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups);
+ }
+
+ {
+ mc_msg_catchup_request_t * mp;
+ mp = catchup_add_pending_output (c, sizeof (mp[0]), /* set_output_vector */ 0);
+ mp->peer_id = msm->mc_main.transport.our_catchup_peer_id;
+ mp->stream_index = stream_index;
+ mc_byte_swap_msg_catchup_request (mp);
+ }
+
+ return c - msm->catchups;
+}
+
+static void catchup_send_fun (void *transport_main, uword opaque, u8 * data)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)transport_main;
+ mc_socket_catchup_t *c = pool_elt_at_index (msm->catchups, opaque);
+ catchup_add_pending_output (c, 0, data);
+}
+
+static int
+find_interface_ip4_address (char * if_name, u32 * ip4_address, u32 * mtu)
+{
+ int fd;
+ struct ifreq ifr;
+ struct sockaddr_in * sa;
+
+ /* Dig up our IP address */
+ fd = socket (PF_INET, AF_INET, 0);
+ if (fd < 0) {
+ clib_unix_error ("socket");
+ return -1;
+ }
+
+ ifr.ifr_addr.sa_family = AF_INET;
+ strncpy (ifr.ifr_name, if_name, sizeof(ifr.ifr_name)-1);
+ if (ioctl (fd, SIOCGIFADDR, &ifr) < 0) {
+ clib_unix_error ("ioctl(SIOCFIGADDR)");
+ return -1;
+ }
+
+ sa = (void *) &ifr.ifr_addr;
+ memcpy (ip4_address, &sa->sin_addr.s_addr, sizeof (ip4_address[0]));
+
+ if (ioctl (fd, SIOCGIFMTU, &ifr) < 0)
+ return -1;
+ if (mtu)
+ *mtu = ifr.ifr_mtu - (/* IP4 header */ 20 + /* UDP header */ 8);
+
+ close (fd);
+
+ return 0;
+}
+
+clib_error_t *
+mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
+ int n_intfcs_to_probe)
+{
+ clib_error_t * error;
+ mc_main_t * mcm;
+ u32 mtu;
+
+ mcm = &msm->mc_main;
+
+ /* 239.255.0.7 */
+ if (! msm->multicast_tx_ip4_address_host_byte_order)
+ msm->multicast_tx_ip4_address_host_byte_order = 0xefff0007;
+
+ {
+ u32 i, a, win;
+
+ win = 0;
+ if (msm->multicast_interface_name)
+ {
+ win = ! find_interface_ip4_address (msm->multicast_interface_name, &a, &mtu);
+ }
+ else
+ {
+ for (i = 0; i < n_intfcs_to_probe; i++)
+ if (! find_interface_ip4_address (intfc_probe_list[i], &a, &mtu))
+ {
+ win = 1;
+ msm->multicast_interface_name = intfc_probe_list[i];
+ break;
+ }
+ }
+
+ if (! win)
+ return clib_error_return (0, "can't find interface ip4 address");
+
+ msm->if_ip4_address_net_byte_order = a;
+ }
+
+ msm->rx_mtu_n_bytes = mtu;
+ msm->rx_mtu_n_buffers = msm->rx_mtu_n_bytes / VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+ msm->rx_mtu_n_buffers += (msm->rx_mtu_n_bytes % VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES) != 0;
+
+ error = socket_setup (msm);
+ if (error)
+ return error;
+
+ mcm->transport.our_ack_peer_id =
+ mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order, msm->ack_udp_port);
+
+ mcm->transport.our_catchup_peer_id =
+ mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order, msm->catchup_tcp_port);
+
+ mcm->transport.tx_buffer = tx_buffer;
+ mcm->transport.tx_ack = tx_ack;
+ mcm->transport.catchup_request_fun = catchup_request_fun;
+ mcm->transport.catchup_send_fun = catchup_send_fun;
+ mcm->transport.format_peer_id = format_socket_peer_id;
+ mcm->transport.opaque = msm;
+ mcm->transport.max_packet_size = mtu;
+
+ mc_main_init (mcm, "socket");
+
+ return error;
+}
diff --git a/vlib/vlib/unix/mc_socket.h b/vlib/vlib/unix/mc_socket.h
new file mode 100644
index 00000000000..7dd6b5e27b1
--- /dev/null
+++ b/vlib/vlib/unix/mc_socket.h
@@ -0,0 +1,126 @@
+/*
+ * mc_socket.h: socket based multicast for vlib mc
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_mc_socket_h__
+#define __included_mc_socket_h__
+
+#include <vlib/unix/unix.h>
+#include <netinet/in.h>
+
+typedef struct {
+ int socket;
+ struct sockaddr_in tx_addr;
+} mc_multicast_socket_t;
+
+/* TCP catchup socket */
+typedef struct {
+ int socket;
+ u32 unix_file_index;
+
+ u8 * input_vector;
+ u8 * output_vector;
+ u32 output_vector_n_written;
+
+ u32 connect_in_progress;
+} mc_socket_catchup_t;
+
+typedef struct mc_socket_main_t {
+ mc_main_t mc_main;
+
+ /* Multicast mastership/to-relay/from-relay sockets. */
+ mc_multicast_socket_t multicast_sockets[MC_N_TRANSPORT_TYPE];
+
+ /* Unicast UDP ack sockets */
+ int ack_socket;
+
+ /* TCP catchup server socket */
+ int catchup_server_socket;
+
+ /* Pool of stream-private catchup sockets */
+ mc_socket_catchup_t *catchups;
+
+ uword * catchup_index_by_file_descriptor;
+
+ u32 rx_mtu_n_bytes;
+
+ /* Receive MTU in bytes and VLIB buffers. */
+ u32 rx_mtu_n_buffers;
+
+ /* Vector of RX VLIB buffers. */
+ u32 * rx_buffers;
+ /* Vector of scatter/gather descriptors for sending/receiving VLIB buffers
+ via kernel. */
+ struct iovec * iovecs;
+
+ /* IP address of interface to use for multicast. */
+ u32 if_ip4_address_net_byte_order;
+
+ u32 ack_udp_port;
+ u32 catchup_tcp_port;
+
+ /* Interface on which to listen for multicasts. */
+ char * multicast_interface_name;
+
+ /* Multicast address to use (e.g. 0xefff0000).
+ Host byte order. */
+ u32 multicast_tx_ip4_address_host_byte_order;
+
+ /* TTL to use for multicasts. */
+ u32 multicast_ttl;
+
+ /* Multicast ports for mastership, joins, etc. will be chosen
+ starting at the given port in host byte order.
+ A total of MC_N_TRANSPORT_TYPE ports will be used. */
+ u32 base_multicast_udp_port_host_byte_order;
+} mc_socket_main_t;
+
+always_inline u32
+mc_socket_peer_id_get_address (mc_peer_id_t i)
+{
+ u32 a = ((i.as_u8[0] << 24)
+ | (i.as_u8[1] << 16)
+ | (i.as_u8[2] << 8)
+ | (i.as_u8[3] << 0));
+ return clib_host_to_net_u32 (a);
+}
+
+always_inline u32
+mc_socket_peer_id_get_port (mc_peer_id_t i)
+{ return clib_host_to_net_u16 ((i.as_u8[4] << 8) | i.as_u8[5]); }
+
+static_always_inline mc_peer_id_t
+mc_socket_set_peer_id (u32 address_net_byte_order, u32 port_host_byte_order)
+{
+ mc_peer_id_t i;
+ u32 a = ntohl (address_net_byte_order);
+ u32 p = port_host_byte_order;
+ i.as_u8[0] = (a >> 24) & 0xff;
+ i.as_u8[1] = (a >> 16) & 0xff;
+ i.as_u8[2] = (a >> 8) & 0xff;
+ i.as_u8[3] = (a >> 0) & 0xff;
+ i.as_u8[4] = (p >> 8) & 0xff;
+ i.as_u8[5] = (p >> 0) & 0xff;
+ i.as_u8[6] = 0;
+ i.as_u8[7] = 0;
+ return i;
+}
+
+clib_error_t *
+mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
+ int n_intfcs_to_probe);
+#endif /* __included_mc_socket_h__ */
+
diff --git a/vlib/vlib/unix/pci.c b/vlib/vlib/unix/pci.c
new file mode 100644
index 00000000000..02c37f72707
--- /dev/null
+++ b/vlib/vlib/unix/pci.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.c: Linux user space PCI bus management.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/unix/unix.h>
+#include <vlib/unix/pci.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+linux_pci_main_t linux_pci_main;
+
+static clib_error_t *
+foreach_directory_file (char * dir_name,
+ clib_error_t * (* f) (void * arg, u8 * path_name, u8 * file_name),
+ void * arg,
+ int scan_dirs)
+{
+ DIR * d;
+ struct dirent * e;
+ clib_error_t * error = 0;
+ u8 * s, * t;
+
+ d = opendir (dir_name);
+ if (! d)
+ {
+ /* System has no PCI bus. */
+ if (errno == ENOENT)
+ return 0;
+ return clib_error_return_unix (0, "open `%s'", dir_name);
+ }
+
+ s = t = 0;
+ while (1)
+ {
+ e = readdir (d);
+ if (! e)
+ break;
+ if (scan_dirs)
+ {
+ if (e->d_type == DT_DIR
+ && (! strcmp (e->d_name, ".")
+ || ! strcmp (e->d_name, "..")))
+ continue;
+ }
+ else
+ {
+ if (e->d_type == DT_DIR)
+ continue;
+ }
+
+ s = format (s, "%s/%s", dir_name, e->d_name);
+ t = format (t, "%s", e->d_name);
+ error = f (arg, s, t);
+ _vec_len (s) = 0;
+ _vec_len (t) = 0;
+
+ if (error)
+ break;
+ }
+
+ vec_free (s);
+ closedir (d);
+
+ return error;
+}
+
+static clib_error_t *
+write_sys_fs (char * file_name, char * fmt, ...)
+{
+ u8 * s;
+ int fd;
+
+ fd = open (file_name, O_WRONLY);
+ if (fd < 0)
+ return clib_error_return_unix (0, "open `%s'", file_name);
+
+ va_list va;
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ if (write (fd, s, vec_len (s)) < 0)
+ return clib_error_return_unix (0, "write `%s'", file_name);
+
+ vec_free (s);
+ close (fd);
+ return 0;
+}
+
+static clib_error_t *
+scan_uio_dir (void * arg, u8 * path_name, u8 * file_name)
+{
+ linux_pci_device_t * l = arg;
+ unformat_input_t input;
+
+ unformat_init_string (&input, (char *) file_name, vec_len (file_name));
+
+ if (! unformat (&input, "uio%d", &l->uio_minor))
+ abort ();
+
+ unformat_free (&input);
+ return 0;
+}
+
+static clib_error_t * linux_pci_uio_read_ready (unix_file_t * uf)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ vlib_main_t * vm = pm->vlib_main;
+ linux_pci_device_t * l;
+ u32 li = uf->private_data;
+
+ l = pool_elt_at_index (pm->pci_devices, li);
+ vlib_node_set_interrupt_pending (vm, l->device_input_node_index);
+
+ /* Let node know which device is interrupting. */
+ {
+ vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, l->device_input_node_index);
+ rt->runtime_data[0] |= 1 << l->device_index;
+ }
+
+ return /* no error */ 0;
+}
+
+static clib_error_t *linux_pci_uio_error_ready (unix_file_t *uf)
+{
+ u32 error_index = (u32) uf->private_data;
+
+ return clib_error_return (0, "pci device %d: error", error_index);
+}
+
+static uword pci_resource_size (uword os_handle, uword resource)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * p;
+ u8 * file_name;
+ struct stat b;
+ uword result = 0;
+
+ p = pool_elt_at_index (pm->pci_devices, os_handle);
+
+ file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0);
+ if (stat ((char *) file_name, &b) >= 0)
+ result = b.st_size;
+ vec_free (file_name);
+ return result;
+}
+
+void os_add_pci_disable_interrupts_reg (uword os_handle, u32 resource,
+ u32 reg_offset, u32 reg_value)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * l;
+ char * file_name;
+ clib_error_t * error;
+
+ l = pool_elt_at_index (pm->pci_devices, os_handle);
+ ASSERT (resource == 0);
+ ASSERT (reg_offset < pci_resource_size (os_handle, resource));
+ file_name = (char *) format (0, "%s/disable_interrupt_regs%c", l->dev_dir_name, 0);
+ error = write_sys_fs (file_name, "%x %x", reg_offset, reg_value);
+ if (error)
+ clib_error_report (error);
+ vec_free (file_name);
+}
+
+static void add_device (pci_device_t * dev, linux_pci_device_t * pdev)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * l;
+ pci_config_header_t * c;
+ u32 x[4];
+ clib_error_t * error;
+
+ c = &dev->config0.header;
+
+ pool_get (pm->pci_devices, l);
+ l[0] = pdev[0];
+
+ l->dev_dir_name = vec_dup (l->dev_dir_name);
+
+ /* Parse bus, dev, function from directory name. */
+ {
+ unformat_input_t input;
+
+ unformat_init_string (&input, (char *) l->dev_dir_name,
+ vec_len (l->dev_dir_name));
+
+ if (! unformat (&input, "/sys/bus/pci/devices/%x:%x:%x.%x",
+ &x[0], &x[1], &x[2], &x[3]))
+ abort ();
+
+ unformat_free (&input);
+
+ l->bus_address.bus = x[1];
+ l->bus_address.slot_function = (x[2] << 3) | x[3];
+ dev->bus_address = l->bus_address;
+ }
+
+ dev->os_handle = l - pm->pci_devices;
+
+ error = write_sys_fs ("/sys/bus/pci/drivers/uio_pci_dma/new_id",
+ "%x %x", c->vendor_id, c->device_id);
+ if (error)
+ clib_error_report (error);
+ error = write_sys_fs ("/sys/bus/pci/drivers/uio_pci_dma/bind",
+ "%04x:%02x:%02x.%x", x[0], x[1], x[2], x[3]);
+ /* Errors happen when re-binding so just ignore them. */
+ if (error)
+ clib_error_free (error);
+
+ {
+ u8 * uio_dir = format (0, "%s/uio", l->dev_dir_name);
+ foreach_directory_file ((char *) uio_dir, scan_uio_dir, l, /* scan_dirs */ 1);
+ vec_free (uio_dir);
+ }
+
+ {
+ char * uio_name = (char *) format (0, "/dev/uio%d%c", l->uio_minor, 0);
+ l->uio_fd = open (uio_name, O_RDWR);
+ if (l->uio_fd < 0)
+ clib_unix_error ("open `%s'", uio_name);
+ vec_free (uio_name);
+ }
+
+ {
+ unix_file_t template = {0};
+ unix_main_t * um = &unix_main;
+
+ template.read_function = linux_pci_uio_read_ready;
+ template.file_descriptor = l->uio_fd;
+ template.error_function = linux_pci_uio_error_ready;
+ template.private_data = l - pm->pci_devices;
+
+ /* To be filled in by driver. */
+ l->device_input_node_index = ~0;
+ l->device_index = 0;
+
+ l->unix_file_index = unix_file_add (um, &template);
+ }
+}
+
+static void linux_pci_device_free (linux_pci_device_t * l)
+{
+ int i;
+ for (i = 0; i < vec_len (l->resource_fds); i++)
+ if (l->resource_fds[i] > 0)
+ close (l->resource_fds[i]);
+ if (l->config_fd > 0)
+ close (l->config_fd);
+ if (l->uio_fd > 0)
+ close (l->uio_fd);
+ vec_free (l->resource_fds);
+ vec_free (l->dev_dir_name);
+}
+
+/* Configuration space read/write. */
+clib_error_t *
+os_read_write_pci_config (uword os_handle,
+ vlib_read_or_write_t read_or_write,
+ uword address,
+ void * data,
+ u32 n_bytes)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * p;
+ int n;
+
+ p = pool_elt_at_index (pm->pci_devices, os_handle);
+
+ if (address != lseek (p->config_fd, address, SEEK_SET))
+ return clib_error_return_unix (0, "seek offset %d", address);
+
+ if (read_or_write == VLIB_READ)
+ n = read (p->config_fd, data, n_bytes);
+ else
+ n = write (p->config_fd, data, n_bytes);
+
+ if (n != n_bytes)
+ return clib_error_return_unix (0, "%s",
+ read_or_write == VLIB_READ
+ ? "read" : "write");
+
+ return 0;
+}
+
+static clib_error_t *
+os_map_pci_resource_internal (uword os_handle,
+ u32 resource,
+ u8 *addr,
+ void ** result)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * p;
+ struct stat stat_buf;
+ u8 * file_name;
+ int fd;
+ clib_error_t * error;
+ int flags = MAP_SHARED;
+
+ error = 0;
+ p = pool_elt_at_index (pm->pci_devices, os_handle);
+
+ file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0);
+ fd = open ((char *) file_name, O_RDWR);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", file_name);
+ goto done;
+ }
+
+ if (fstat (fd, &stat_buf) < 0)
+ {
+ error = clib_error_return_unix (0, "fstat `%s'", file_name);
+ goto done;
+ }
+
+ vec_validate (p->resource_fds, resource);
+ p->resource_fds[resource] = fd;
+ if (addr != 0)
+ flags |= MAP_FIXED;
+
+ *result = mmap (addr,
+ /* size */ stat_buf.st_size,
+ PROT_READ | PROT_WRITE,
+ flags,
+ /* file */ fd,
+ /* offset */ 0);
+ if (*result == (void *) -1)
+ {
+ error = clib_error_return_unix (0, "mmap `%s'", file_name);
+ goto done;
+ }
+
+ done:
+ if (error)
+ {
+ if (fd > 0)
+ close (fd);
+ }
+ vec_free (file_name);
+ return error;
+}
+
+clib_error_t *
+os_map_pci_resource (uword os_handle,
+ u32 resource,
+ void ** result)
+{
+ return (os_map_pci_resource_internal (os_handle, resource, 0 /* addr */,
+ result));
+}
+
+clib_error_t *
+os_map_pci_resource_fixed (uword os_handle,
+ u32 resource,
+ u8 *addr,
+ void ** result)
+{
+ return (os_map_pci_resource_internal (os_handle, resource, addr, result));
+}
+
+void os_free_pci_device (uword os_handle)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * l;
+
+ l = pool_elt_at_index (pm->pci_devices, os_handle);
+ linux_pci_device_free (l);
+ pool_put (pm->pci_devices, l);
+}
+
+u8 * format_os_pci_handle (u8 * s, va_list * va)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ uword os_pci_handle = va_arg (*va, uword);
+ linux_pci_device_t * l;
+
+ l = pool_elt_at_index (pm->pci_devices, os_pci_handle);
+ return format (s, "%x/%x/%x", l->bus_address.bus,
+ (l->bus_address.slot_function >> 3),
+ (l->bus_address.slot_function & 0x7));
+}
+
+static inline pci_device_registration_t *
+pci_device_next_registered (pci_device_registration_t * r)
+{
+ uword i;
+
+ /* Null vendor id marks end of initialized list. */
+ for (i = 0; r->supported_devices[i].vendor_id != 0; i++)
+ ;
+
+ return clib_elf_section_data_next (r, i * sizeof (r->supported_devices[0]));
+}
+
+static inline u8 kernel_driver_installed (pci_device_registration_t *r)
+{
+ u8 * link_name;
+ struct stat b;
+
+ link_name = format (0, "/sys/bus/pci/drivers/%s", r->kernel_driver);
+ if (stat ((char *)link_name, &b) >= 0)
+ r->kernel_driver_running++;
+ else
+ r->kernel_driver_running=0;
+
+ vec_free (link_name);
+ return r->kernel_driver_running;
+}
+
+static clib_error_t *
+init_device_from_registered (vlib_main_t * vm,
+ pci_device_t * dev,
+ linux_pci_device_t * pdev)
+{
+ unix_main_t * um = vlib_unix_get_main();
+ pci_device_registration_t * r;
+ pci_device_id_t * i;
+ pci_config_header_t * c;
+
+ c = &dev->config0.header;
+
+ r = um->pci_device_registrations;
+
+ while (r)
+ {
+ for (i = r->supported_devices; i->vendor_id != 0; i++)
+ if (i->vendor_id == c->vendor_id && i->device_id == c->device_id)
+ {
+ if (r->kernel_driver && kernel_driver_installed(r))
+ {
+ if (r->kernel_driver_running == 1)
+ {
+ clib_warning("PCI device type [%04x:%04x] is busy!\n"
+ "\tUninstall the associated linux kernel "
+ "driver: sudo rmmod %s",
+ c->vendor_id, c->device_id, r->kernel_driver);
+ }
+ continue;
+ }
+ add_device (dev, pdev);
+ return r->init_function (vm, dev);
+ }
+ r = r->next_registration;
+ }
+ /* No driver, close the PCI config-space FD */
+ close (pdev->config_fd);
+ return 0;
+}
+
+static clib_error_t *
+init_device (vlib_main_t * vm,
+ pci_device_t * dev,
+ linux_pci_device_t * pdev)
+{
+ return init_device_from_registered (vm, dev, pdev);
+}
+
+static clib_error_t *
+scan_device (void * arg, u8 * dev_dir_name, u8 * ignored)
+{
+ vlib_main_t * vm = arg;
+ int fd;
+ u8 * f;
+ clib_error_t * error = 0;
+ pci_device_t dev = {0};
+ linux_pci_device_t pdev = {0};
+
+ f = format (0, "%v/config%c", dev_dir_name, 0);
+ fd = open ((char *) f, O_RDWR);
+
+ /* Try read-only access if write fails. */
+ if (fd < 0)
+ fd = open ((char *) f, O_RDONLY);
+
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", f);
+ goto done;
+ }
+
+ /* You can only read more that 64 bytes of config space as root; so we try to
+ read the full space but fall back to just the first 64 bytes. */
+ if (read (fd, &dev.config_data, sizeof (dev.config_data)) != sizeof (dev.config_data)
+ && read (fd, &dev.config0, sizeof (dev.config0)) != sizeof (dev.config0))
+ {
+ error = clib_error_return_unix (0, "read `%s'", f);
+ goto done;
+ }
+
+ {
+ static pci_config_header_t all_ones;
+ if (all_ones.vendor_id == 0)
+ memset (&all_ones, ~0, sizeof (all_ones));
+
+ if (! memcmp (&dev.config0.header, &all_ones, sizeof (all_ones)))
+ {
+ error = clib_error_return (0, "invalid PCI config for `%s'", f);
+ goto done;
+ }
+ }
+
+ if (dev.config0.header.header_type == 0)
+ pci_config_type0_little_to_host (&dev.config0);
+ else
+ pci_config_type1_little_to_host (&dev.config1);
+
+ pdev.config_fd = fd;
+ pdev.dev_dir_name = dev_dir_name;
+
+ error = init_device (vm, &dev, &pdev);
+
+ done:
+ vec_free (f);
+ return error;
+}
+
+clib_error_t * pci_bus_init (vlib_main_t * vm)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ clib_error_t * error;
+
+ pm->vlib_main = vm;
+
+ if ((error = vlib_call_init_function (vm, unix_input_init)))
+ return error;
+
+ error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm, /* scan_dirs */ 0);
+
+ /* Complain and continue. might not be root, etc. */
+ if (error)
+ clib_error_report (error);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (pci_bus_init);
diff --git a/vlib/vlib/unix/pci.h b/vlib/vlib/unix/pci.h
new file mode 100644
index 00000000000..b384250eb47
--- /dev/null
+++ b/vlib/vlib/unix/pci.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * unix/pci.h: Linux specific pci state
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_unix_pci_h
+#define included_unix_pci_h
+
+#include <vlib/pci/pci.h>
+
+typedef struct {
+ /* /sys/bus/pci/devices/... directory name for this device. */
+ u8 * dev_dir_name;
+
+ /* Resource file descriptors. */
+ int * resource_fds;
+
+ /* File descriptor for config space read/write. */
+ int config_fd;
+
+ /* PCI bus address for this devices parsed from /sys/bus/pci/devices name. */
+ pci_bus_address_t bus_address;
+
+ /* File descriptor for /dev/uio%d */
+ int uio_fd;
+
+ /* Minor device for uio device. */
+ u32 uio_minor;
+
+ /* Index given by unix_file_add. */
+ u32 unix_file_index;
+
+ /* Input node to handle interrupts for this device. */
+ u32 device_input_node_index;
+
+ /* Node runtime will be a bitmap of device indices with pending interrupts. */
+ u32 device_index;
+} linux_pci_device_t;
+
+/* Pool of PCI devices. */
+typedef struct {
+ vlib_main_t * vlib_main;
+ linux_pci_device_t * pci_devices;
+} linux_pci_main_t;
+
+extern linux_pci_main_t linux_pci_main;
+
+always_inline linux_pci_device_t *
+pci_dev_for_linux (pci_device_t * dev)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ return pool_elt_at_index (pm->pci_devices, dev->os_handle);
+}
+
+/* Call to allocate/initialize the pci subsystem.
+ This is not an init function so that users can explicitly enable
+ pci only when it's needed. */
+clib_error_t * pci_bus_init (vlib_main_t * vm);
+
+#endif /* included_unix_pci_h */
diff --git a/vlib/vlib/unix/physmem.c b/vlib/vlib/unix/physmem.c
new file mode 100644
index 00000000000..83b40be6449
--- /dev/null
+++ b/vlib/vlib/unix/physmem.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * physmem.c: Unix physical memory
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/unix/physmem.h>
+
+static physmem_main_t physmem_main;
+
+static void *
+unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes, uword alignment)
+{
+ physmem_main_t * pm = &physmem_main;
+ uword lo_offset, hi_offset;
+ uword * to_free = 0;
+
+#if DPDK > 0
+ clib_warning ("unsafe alloc!");
+#endif
+
+ /* IO memory is always at least cache aligned. */
+ alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
+
+ while (1)
+ {
+ mheap_get_aligned (pm->heap, n_bytes,
+ /* align */ alignment,
+ /* align offset */ 0,
+ &lo_offset);
+
+ /* Allocation failed? */
+ if (lo_offset == ~0)
+ break;
+
+ /* Make sure allocation does not span DMA physical chunk boundary. */
+ hi_offset = lo_offset + n_bytes - 1;
+
+ if ((lo_offset >> vpm->log2_n_bytes_per_page) ==
+ (hi_offset >> vpm->log2_n_bytes_per_page))
+ break;
+
+ /* Allocation would span chunk boundary, queue it to be freed as soon as
+ we find suitable chunk. */
+ vec_add1 (to_free, lo_offset);
+ }
+
+ if (to_free != 0)
+ {
+ uword i;
+ for (i = 0; i < vec_len (to_free); i++)
+ mheap_put (pm->heap, to_free[i]);
+ vec_free (to_free);
+ }
+
+ return lo_offset != ~0 ? pm->heap + lo_offset : 0;
+}
+
+static void unix_physmem_free (void * x)
+{
+ physmem_main_t * pm = &physmem_main;
+
+ /* Return object to region's heap. */
+ mheap_put (pm->heap, x - pm->heap);
+}
+
+static void htlb_shutdown(void)
+{
+ physmem_main_t * pm = &physmem_main;
+
+ if (! pm->shmid)
+ return;
+ shmctl (pm->shmid, IPC_RMID, 0);
+ pm->shmid = 0;
+}
+
+/* try to use huge TLB pgs if possible */
+static int htlb_init (vlib_main_t * vm)
+{
+ vlib_physmem_main_t * vpm = &vm->physmem_main;
+ physmem_main_t * pm = &physmem_main;
+ u64 hugepagesize, pagesize;
+ u64 pfn, seek_loc;
+ u64 cur, physaddr, ptbits;
+ int fd, i;
+
+ pm->shmid = shmget (11 /* key, my amp goes to 11 */, pm->mem_size,
+ IPC_CREAT | SHM_HUGETLB | SHM_R | SHM_W);
+ if (pm->shmid < 0)
+ {
+ clib_unix_warning ("shmget");
+ return 0;
+ }
+
+ pm->mem = shmat (pm->shmid, NULL, 0 /* flags */);
+ if (pm->mem == 0)
+ {
+ shmctl (pm->shmid, IPC_RMID, 0);
+ return 0;
+ }
+
+ memset (pm->mem, 0, pm->mem_size);
+
+ /* $$$ get page size info from /proc/meminfo */
+ hugepagesize = 2<<20;
+ pagesize = 4<<10;
+ vpm->log2_n_bytes_per_page = min_log2 (hugepagesize);
+ vec_resize (vpm->page_table, pm->mem_size / hugepagesize);
+
+ vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
+ vpm->virtual.start = pointer_to_uword (pm->mem);
+ vpm->virtual.size = pm->mem_size;
+ vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+
+ if (fd < 0)
+ {
+ (void) shmdt (pm->mem);
+ return 0;
+ }
+
+ pm->heap = mheap_alloc_with_flags
+ (pm->mem, pm->mem_size,
+ /* Don't want mheap mmap/munmap with IO memory. */
+ MHEAP_FLAG_DISABLE_VM);
+
+ cur = (u64) pm->mem;
+ i = 0;
+
+ while (cur < (u64) pm->mem + pm->mem_size)
+ {
+ pfn = (u64) cur / pagesize;
+ seek_loc = pfn * sizeof (u64);
+ if (lseek (fd, seek_loc, SEEK_SET) != seek_loc)
+ {
+ clib_unix_warning ("lseek to 0x%llx", seek_loc);
+ shmctl (pm->shmid, IPC_RMID, 0);
+ close(fd);
+ return 0;
+ }
+ if (read (fd, &ptbits, sizeof (ptbits)) != (sizeof(ptbits)))
+ {
+ clib_unix_warning ("read ptbits");
+ shmctl (pm->shmid, IPC_RMID, 0);
+ close(fd);
+ return 0;
+ }
+
+ /* bits 0-54 are the physical page number */
+ physaddr = (ptbits & 0x7fffffffffffffULL) * pagesize;
+ if (CLIB_DEBUG > 1)
+ fformat(stderr, "pm: virtual 0x%llx physical 0x%llx\n",
+ cur, physaddr);
+ vpm->page_table[i++] = physaddr;
+
+ cur += hugepagesize;
+ }
+ close(fd);
+ atexit (htlb_shutdown);
+ return 1;
+}
+
+int vlib_app_physmem_init (vlib_main_t * vm,
+ physmem_main_t * pm, int) __attribute__ ((weak));
+int vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, int x)
+{
+ return 0;
+}
+
+clib_error_t * unix_physmem_init (vlib_main_t * vm, int physical_memory_required)
+{
+ vlib_physmem_main_t * vpm = &vm->physmem_main;
+ physmem_main_t * pm = &physmem_main;
+ clib_error_t * error = 0;
+ char * dev_uio_dma_file = "/dev/uio-dma";
+ int using_fake_memory = 0;
+
+ /* Avoid multiple calls. */
+ if (vm->os_physmem_alloc_aligned)
+ return error;
+
+ vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
+ vm->os_physmem_free = unix_physmem_free;
+ pm->mem = MAP_FAILED;
+
+ if (pm->mem_size == 0)
+ pm->mem_size = 16 << 20;
+
+ /* OK, Mr. App, you tell us */
+ if (vlib_app_physmem_init (vm, pm, physical_memory_required))
+ return 0;
+
+ if (physical_memory_required)
+ {
+ if (!pm->no_hugepages && htlb_init(vm))
+ {
+ fformat(stderr, "%s: use huge pages\n", __FUNCTION__);
+ return 0;
+ }
+ pm->uio_dma_fd = open (dev_uio_dma_file, O_RDWR);
+ }
+ else
+ pm->uio_dma_fd = -1;
+
+ if (pm->uio_dma_fd < 0)
+ {
+ if (physical_memory_required)
+ {
+ error = clib_error_return_unix (0, "open `%s'", dev_uio_dma_file);
+ goto done;
+ }
+
+ using_fake_memory = 1;
+ pm->mem = mmap (0, pm->mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (pm->mem == MAP_FAILED)
+ {
+ error = clib_error_return_unix (0, "mmap");
+ goto done;
+ }
+
+ pm->heap = mheap_alloc (pm->mem, pm->mem_size);
+
+ /* Identity map with a single page. */
+ vpm->log2_n_bytes_per_page = min_log2 (pm->mem_size);
+ vec_add1 (vpm->page_table, pointer_to_uword (pm->mem));
+ }
+ else
+ error = clib_error_return (0, "uio_dma deprecated");
+
+ if (using_fake_memory)
+ fformat(stderr, "%s: use fake dma pages\n", __FUNCTION__);
+ else
+ fformat(stderr, "%s: use uio dma pages\n", __FUNCTION__);
+
+ done:
+ if (error)
+ {
+ if (pm->mem != MAP_FAILED)
+ munmap (pm->mem, pm->mem_size);
+ if (pm->uio_dma_fd >= 0)
+ {
+ close (pm->uio_dma_fd);
+ pm->uio_dma_fd = -1;
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+show_physmem (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+#if DPDK > 0
+ vlib_cli_output (vm, "Not supported with DPDK drivers.");
+#else
+ physmem_main_t * pm = &physmem_main;
+
+ if (pm->heap)
+ vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 0);
+ else
+ vlib_cli_output (vm, "No physmem allocated.");
+#endif
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_physmem_command, static) = {
+ .path = "show physmem",
+ .short_help = "Show physical memory allocation",
+ .function = show_physmem,
+};
+
+static clib_error_t *
+show_affinity (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ cpu_set_t set;
+ cpu_set_t *setp = &set;
+ int i, rv;
+ u8 *s = 0;
+ int first_set_bit_in_run = -1;
+ int last_set_bit_in_run = -1;
+ int output_done = 0;
+
+ rv = sched_getaffinity (0 /* pid, 0 = this proc */,
+ sizeof (*setp), setp);
+ if (rv < 0)
+ {
+ vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
+ strerror(errno));
+ return 0;
+ }
+
+ for (i = 0; i < 64; i++)
+ {
+ if (CPU_ISSET(i, setp))
+ {
+ if (first_set_bit_in_run == -1)
+ {
+ first_set_bit_in_run = i;
+ last_set_bit_in_run = i;
+ if (output_done)
+ s = format (s, ",");
+ s = format (s, "%d-", i);
+ output_done = 1;
+ }
+ else
+ {
+ if (i == (last_set_bit_in_run+1))
+ last_set_bit_in_run = i;
+ }
+ }
+ else
+ {
+ if (first_set_bit_in_run != -1)
+ {
+ if (first_set_bit_in_run == (i-1))
+ {
+ _vec_len (s) -= 2 + ((first_set_bit_in_run/10));
+ }
+ s = format (s, "%d", last_set_bit_in_run);
+ first_set_bit_in_run = -1;
+ last_set_bit_in_run = -1;
+ }
+ }
+ }
+
+ if (first_set_bit_in_run != -1)
+ s = format (s, "%d", first_set_bit_in_run);
+
+ vlib_cli_output (vm, "Process runs on: %v", s);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_affinity_command, static) = {
+ .path = "show affinity",
+ .short_help = "Show process cpu affinity",
+ .function = show_affinity,
+};
+
+static clib_error_t *
+set_affinity (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ cpu_set_t set;
+ cpu_set_t *setp = &set;
+ int i, rv;
+ int another_round;
+ u32 first, last;
+
+ memset (setp, 0, sizeof (*setp));
+
+ do {
+ another_round = 0;
+ if (unformat (input, "%d-%d,", &first, &last))
+ {
+ if (first > 64 || last > 64)
+ {
+ barf1:
+ vlib_cli_output (vm, "range %d-%d invalid", first, last);
+ return 0;
+ }
+
+ for (i = first; i <= last; i++)
+ CPU_SET(i, setp);
+ another_round = 1;
+ }
+ else if (unformat (input, "%d-%d", &first, &last))
+ {
+ if (first > 64 || last > 64)
+ goto barf1;
+
+ for (i = first; i <= last; i++)
+ CPU_SET(i, setp);
+ }
+ else if (unformat (input, "%d,", &first))
+ {
+ if (first > 64)
+ {
+ barf2:
+ vlib_cli_output (vm, "cpu %d invalid", first);
+ return 0;
+ }
+ CPU_SET(first, setp);
+ another_round = 1;
+ }
+ else if (unformat (input, "%d", &first))
+ {
+ if (first > 64)
+ goto barf2;
+
+ CPU_SET(first, setp);
+ }
+ } while (another_round);
+
+ rv = sched_setaffinity (0 /* pid, 0 = this proc */,
+ sizeof (*setp), setp);
+
+ if (rv < 0)
+ {
+ vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
+ strerror(errno));
+ return 0;
+ }
+ return show_affinity (vm, input, cmd);
+}
+
+VLIB_CLI_COMMAND (set_affinity_command, static) = {
+ .path = "set affinity",
+ .short_help = "Set process cpu affinity",
+ .function = set_affinity,
+};
+
+static clib_error_t *
+vlib_physmem_configure (vlib_main_t * vm, unformat_input_t * input)
+{
+ physmem_main_t * pm = &physmem_main;
+ u32 size_in_mb;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "no-huge") || unformat (input, "no-huge-pages"))
+ pm->no_hugepages = 1;
+
+ else if (unformat(input, "size-in-mb %d", &size_in_mb) ||
+ unformat(input, "size %d", &size_in_mb))
+ pm->mem_size = size_in_mb << 20;
+ else
+ return unformat_parse_error (input);
+ }
+
+ unformat_free (input);
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (vlib_physmem_configure, "physmem");
diff --git a/vlib/vlib/unix/physmem.h b/vlib/vlib/unix/physmem.h
new file mode 100644
index 00000000000..a963be746d8
--- /dev/null
+++ b/vlib/vlib/unix/physmem.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_physmem_h__
+#define __included_physmem_h__
+
+/* Manage I/O physical memory. */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/os.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/fcntl.h> /* for open */
+#include <sys/file.h> /* for flock */
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+
+typedef struct {
+ /* File descriptor for /dev/uio-dma. */
+ int uio_dma_fd;
+
+ /* Virtual memory via mmaped. */
+ void * mem;
+
+ /* Size in bytes. */
+ uword mem_size;
+
+ /* Heap allocated out of virtual memory. */
+ void * heap;
+
+ /* huge TLB segment id */
+ int shmid;
+
+ /* should we try to use htlb ? */
+ int no_hugepages;
+
+} physmem_main_t;
+
+#endif /* __included_physmem_h__ */
diff --git a/vlib/vlib/unix/plugin.c b/vlib/vlib/unix/plugin.c
new file mode 100644
index 00000000000..3411ef340af
--- /dev/null
+++ b/vlib/vlib/unix/plugin.c
@@ -0,0 +1,210 @@
+/*
+ * plugin.c: plugin handling
+ *
+ * Copyright (c) 2011 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/unix/plugin.h>
+#include <dlfcn.h>
+#include <dirent.h>
+
+plugin_main_t vlib_plugin_main;
+
+void vlib_set_get_handoff_structure_cb (void *cb)
+{
+ plugin_main_t * pm = &vlib_plugin_main;
+ pm->handoff_structure_get_cb = cb;
+}
+
+static void * vnet_get_handoff_structure (void)
+{
+ void * (*fp)(void);
+
+ fp = vlib_plugin_main.handoff_structure_get_cb;
+ if (fp == 0)
+ return 0;
+ else
+ return (*fp)();
+}
+
+static int
+load_one_plugin (plugin_main_t *pm, plugin_info_t *pi, int from_early_init)
+{
+ void *handle, *register_handle;
+ clib_error_t * (*fp)(vlib_main_t *, void *, int);
+ clib_error_t * error;
+ void *handoff_structure;
+
+ handle = dlopen ((char *)pi->name, RTLD_LAZY);
+
+ /*
+ * Note: this can happen if the plugin has an undefined symbol reference,
+ * so print a warning. Otherwise, the poor slob won't know what happened.
+ * Ask me how I know that...
+ */
+ if (handle == 0)
+ {
+ clib_warning ("%s", dlerror());
+ return -1;
+ }
+
+ pi->handle = handle;
+
+ register_handle = dlsym (pi->handle, "vlib_plugin_register");
+ if (register_handle == 0)
+ {
+ dlclose (handle);
+ return 0;
+ }
+
+ fp = register_handle;
+
+ handoff_structure = vnet_get_handoff_structure();
+
+ if (handoff_structure == 0)
+ error = clib_error_return (0, "handoff structure callback returned 0");
+ else
+ error = (*fp)(pm->vlib_main, handoff_structure, from_early_init);
+
+ if (error)
+ {
+ clib_error_report (error);
+ dlclose (handle);
+ return 1;
+ }
+
+ clib_warning ("Loaded plugin: %s", pi->name);
+
+ return 0;
+}
+
+static u8 **split_plugin_path (plugin_main_t *pm)
+{
+ int i;
+ u8 **rv = 0;
+ u8 *path = pm->plugin_path;
+ u8 *this = 0;
+
+ for (i = 0; i < vec_len (pm->plugin_path); i++)
+ {
+ if (path[i] != ':')
+ {
+ vec_add1(this, path[i]);
+ continue;
+ }
+ vec_add1(this, 0);
+ vec_add1 (rv, this);
+ this = 0;
+ }
+ if (this)
+ {
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ }
+ return rv;
+}
+
+int vlib_load_new_plugins (plugin_main_t *pm, int from_early_init)
+{
+ DIR *dp;
+ struct dirent *entry;
+ struct stat statb;
+ uword *p;
+ plugin_info_t *pi;
+ u8 **plugin_path;
+ int i;
+
+ plugin_path = split_plugin_path (pm);
+
+ for (i = 0; i < vec_len (plugin_path); i++)
+ {
+ dp = opendir ((char *)plugin_path[i]);
+
+ if (dp == 0)
+ continue;
+
+ while ((entry = readdir (dp)))
+ {
+ u8 *plugin_name;
+
+ if (pm->plugin_name_filter)
+ {
+ int j;
+ for (j = 0; j < vec_len (pm->plugin_name_filter); j++)
+ if (entry->d_name[j] != pm->plugin_name_filter[j])
+ goto next;
+ }
+
+ plugin_name = format (0, "%s/%s%c", plugin_path[i],
+ entry->d_name, 0);
+
+ /* unreadable */
+ if (stat ((char *)plugin_name, &statb) < 0)
+ {
+ ignore:
+ vec_free (plugin_name);
+ continue;
+ }
+
+ /* a dir or other things which aren't plugins */
+ if (!S_ISREG(statb.st_mode))
+ goto ignore;
+
+ p = hash_get_mem (pm->plugin_by_name_hash, plugin_name);
+ if (p == 0)
+ {
+ vec_add2 (pm->plugin_info, pi, 1);
+ pi->name = plugin_name;
+ pi->file_info = statb;
+
+ if (load_one_plugin (pm, pi, from_early_init))
+ {
+ vec_free (plugin_name);
+ _vec_len (pm->plugin_info) = vec_len (pm->plugin_info) - 1;
+ continue;
+ }
+ memset (pi, 0, sizeof (*pi));
+ hash_set_mem (pm->plugin_by_name_hash, plugin_name,
+ pi - pm->plugin_info);
+ }
+ next:
+ ;
+ }
+ closedir (dp);
+ vec_free (plugin_path[i]);
+ }
+ vec_free (plugin_path);
+ return 0;
+}
+char *vlib_plugin_path __attribute__((weak));
+char *vlib_plugin_path = "";
+char *vlib_plugin_name_filter __attribute__((weak));
+char *vlib_plugin_name_filter = 0;
+
+int vlib_plugin_early_init (vlib_main_t *vm)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+
+ pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0);
+
+ clib_warning ("plugin path %s", pm->plugin_path);
+
+ if (vlib_plugin_name_filter)
+ pm->plugin_name_filter = format (0, "%s%c", vlib_plugin_name_filter, 0);
+
+ pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword));
+ pm->vlib_main = vm;
+
+ return vlib_load_new_plugins (pm, 1 /* from_early_init */);
+}
diff --git a/vlib/vlib/unix/plugin.h b/vlib/vlib/unix/plugin.h
new file mode 100644
index 00000000000..e7d75099ed9
--- /dev/null
+++ b/vlib/vlib/unix/plugin.h
@@ -0,0 +1,88 @@
+/*
+ * plugin.h: plugin handling
+ *
+ * Copyright (c) 2011 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_plugin_h__
+#define __included_plugin_h__
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/*
+ * vlib plugin scheme
+ *
+ * Almost anything which can be made to work in a vlib unix
+ * application will also work in a vlib plugin.
+ *
+ * The elf-section magic which registers static objects
+ * works so long as plugins are preset when the vlib unix process
+ * starts. But wait: there's more...
+ *
+ * If an application calls vlib_load_new_plugins() -- possibly after
+ * changing vlib_plugin_main.plugin_path / vlib_plugin_main.plugin_name_filter,
+ * -- new plugins will be loaded. That, in turn, allows considerable
+ * flexibility in terms of adding feature code or fixing bugs without
+ * requiring the data-plane process to restart.
+ *
+ * When the plugin mechanism loads a plugin, it uses dlsym to locate
+ * and call the plugin's function vlib_plugin_register() if it exists.
+ * A plugin which expects to be loaded after the vlib application
+ * starts uses this callback to modify the application. If vlib_plugin_register
+ * returns non-zero, the plugin mechanism dlclose()'s the plugin.
+ *
+ * Applications control the plugin search path and name filter by
+ * declaring the variables vlib_plugin_path and vlib_plugin_name_filter.
+ * libvlib_unix.la supplies weak references for these symbols which
+ * effectively disable the scheme. In order for the elf-section magic to
+ * work, static plugins must be loaded at the earliest possible moment.
+ *
+ * An application can change these parameters at any time and call
+ * vlib_load_new_plugins().
+ */
+
+
+
+typedef struct {
+ u8 *name;
+ struct stat file_info;
+ void *handle;
+} plugin_info_t;
+
+typedef struct {
+ /* loaded plugin info */
+ plugin_info_t *plugin_info;
+ uword *plugin_by_name_hash;
+
+ /* path and name filter */
+ u8 *plugin_path;
+ u8 *plugin_name_filter;
+
+ /* handoff structure get callback */
+ void *handoff_structure_get_cb;
+
+ /* usual */
+ vlib_main_t *vlib_main;
+} plugin_main_t;
+
+plugin_main_t vlib_plugin_main;
+
+int vlib_plugin_early_init (vlib_main_t *vm);
+int vlib_load_new_plugins (plugin_main_t *pm, int from_early_init);
+
+#endif /* __included_plugin_h__ */
diff --git a/vlib/vlib/unix/unix.h b/vlib/vlib/unix/unix.h
new file mode 100644
index 00000000000..0802a93baa3
--- /dev/null
+++ b/vlib/vlib/unix/unix.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * unix.h: Unix specific main state
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_unix_unix_h
+#define included_unix_unix_h
+
+#include <vppinfra/socket.h>
+
+struct unix_file;
+typedef clib_error_t * (unix_file_function_t) (struct unix_file * f);
+
+typedef struct unix_file {
+ /* Unix file descriptor from open/socket. */
+ u32 file_descriptor;
+
+ u32 flags;
+#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0)
+
+ /* Data available for function's use. */
+ uword private_data;
+
+ /* Functions to be called when read/write data becomes ready. */
+ unix_file_function_t * read_function, * write_function, * error_function;
+} unix_file_t;
+
+typedef struct {
+ f64 time;
+ clib_error_t * error;
+} unix_error_history_t;
+
+typedef enum {
+ UNIX_FILE_UPDATE_ADD,
+ UNIX_FILE_UPDATE_MODIFY,
+ UNIX_FILE_UPDATE_DELETE,
+} unix_file_update_type_t;
+
+typedef struct {
+ /* Back pointer to main structure. */
+ vlib_main_t * vlib_main;
+
+ u32 flags;
+ /* Run interactively or as daemon (background process). */
+#define UNIX_FLAG_INTERACTIVE (1 << 0)
+#define UNIX_FLAG_NODAEMON (1 << 1)
+
+ /* Pool of files to poll for input/output. */
+ unix_file_t * file_pool;
+
+ /* CLI listen socket. */
+ clib_socket_t cli_listen_socket;
+
+ void (* file_update) (unix_file_t * file, unix_file_update_type_t update_type);
+
+ /* Circular buffer of last unix errors. */
+ unix_error_history_t error_history[128];
+ u32 error_history_index;
+ u64 n_total_errors;
+
+ /* startup-config filename */
+ u8 *startup_config_filename;
+
+ /* unix config complete */
+ volatile int unix_config_complete;
+
+ /* CLI log file. GIGO. */
+ u8 *log_filename;
+ int log_fd;
+ /* Don't put telnet connections into character mode */
+ int cli_line_mode;
+ u32 cli_history_limit;
+
+} unix_main_t;
+
+/* Global main structure. */
+extern unix_main_t unix_main;
+
+always_inline uword
+unix_file_add (unix_main_t * um, unix_file_t * template)
+{
+ unix_file_t * f;
+ pool_get (um->file_pool, f);
+ f[0] = template[0];
+ um->file_update (f, UNIX_FILE_UPDATE_ADD);
+ return f - um->file_pool;
+}
+
+always_inline void
+unix_file_del (unix_main_t * um, unix_file_t * f)
+{
+ um->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ close (f->file_descriptor);
+ f->file_descriptor = ~0;
+ pool_put (um->file_pool, f);
+}
+
+always_inline uword
+unix_file_set_data_available_to_write (u32 unix_file_index, uword is_available)
+{
+ unix_file_t * uf = pool_elt_at_index (unix_main.file_pool, unix_file_index);
+ uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ if ((was_available != 0) != (is_available != 0))
+ {
+ uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+ return was_available != 0;
+}
+
+always_inline void
+unix_save_error (unix_main_t * um, clib_error_t * error)
+{
+ unix_error_history_t * eh = um->error_history + um->error_history_index;
+ clib_error_free_vector (eh->error);
+ eh->error = error;
+ eh->time = vlib_time_now (um->vlib_main);
+ um->n_total_errors += 1;
+ if (++um->error_history_index >= ARRAY_LEN (um->error_history))
+ um->error_history_index = 0;
+}
+
+/* Main function for Unix VLIB. */
+int vlib_unix_main (int argc, char * argv[]);
+
+/* Call to allocate/initialize physical DMA memory subsystem.
+ This is not an init function so that users can explicitly enable/disable
+ physmem when its not needed. */
+clib_error_t * unix_physmem_init (vlib_main_t * vm,
+ int fail_if_physical_memory_not_present);
+
+/* Set prompt for CLI. */
+void vlib_unix_cli_set_prompt (char * prompt);
+
+static inline unix_main_t * vlib_unix_get_main (void)
+{
+ return &unix_main;
+}
+
+/* thread stack array; vec_len = max number of threads */
+u8 **vlib_thread_stacks;
+
+#endif /* included_unix_unix_h */