From eaabe073515e7722ed546b36f99efc6feea305a1 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 22 Mar 2017 10:18:13 +0100 Subject: Add memif - packet memory interface for intra-host communication Change-Id: I94c06b07a39f07ceba87bf3e7fcfc70e43231e8a Signed-off-by: Damjan Marion Co-Authored-By: Milan Lenco --- src/configure.ac | 1 + src/plugins/Makefile.am | 4 + src/plugins/memif.am | 32 ++ src/plugins/memif/cli.c | 202 +++++++ src/plugins/memif/device.c | 330 +++++++++++ src/plugins/memif/memif.api | 127 +++++ src/plugins/memif/memif.c | 1045 +++++++++++++++++++++++++++++++++++ src/plugins/memif/memif.h | 263 +++++++++ src/plugins/memif/memif_all_api_h.h | 18 + src/plugins/memif/memif_api.c | 332 +++++++++++ src/plugins/memif/memif_msg_enum.h | 31 ++ src/plugins/memif/node.c | 383 +++++++++++++ src/vppinfra/clib.h | 6 + 13 files changed, 2774 insertions(+) create mode 100644 src/plugins/memif.am create mode 100644 src/plugins/memif/cli.c create mode 100644 src/plugins/memif/device.c create mode 100644 src/plugins/memif/memif.api create mode 100644 src/plugins/memif/memif.c create mode 100644 src/plugins/memif/memif.h create mode 100644 src/plugins/memif/memif_all_api_h.h create mode 100644 src/plugins/memif/memif_api.c create mode 100644 src/plugins/memif/memif_msg_enum.h create mode 100644 src/plugins/memif/node.c diff --git a/src/configure.ac b/src/configure.ac index 5e02adc1160..222dbe6efef 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -151,6 +151,7 @@ PLUGIN_ENABLED(ila) PLUGIN_ENABLED(ioam) PLUGIN_ENABLED(ixge) PLUGIN_ENABLED(lb) +PLUGIN_ENABLED(memif) PLUGIN_ENABLED(sixrd) PLUGIN_ENABLED(snat) PLUGIN_DISABLED(srv6sample) diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 255e644f089..6713a9e25a3 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -57,6 +57,10 @@ if ENABLE_LB_PLUGIN include lb.am endif +if ENABLE_MEMIF_PLUGIN +include memif.am +endif + if ENABLE_SIXRD_PLUGIN include sixrd.am endif diff --git a/src/plugins/memif.am b/src/plugins/memif.am new file mode 100644 index 00000000000..d3a3171d039 --- /dev/null +++ b/src/plugins/memif.am @@ -0,0 +1,32 @@ +# Copyright (c) 2017 Cisco Systems, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppplugins_LTLIBRARIES += memif_plugin.la + +memif_plugin_la_SOURCES = memif/memif.c \ + memif/memif_api.c \ + memif/cli.c \ + memif/node.c \ + memif/device.c \ + memif/memif_plugin.api.h + +noinst_HEADERS += memif/memif.h + +nobase_apiinclude_HEADERS += \ + memif/memif_all_api_h.h \ + memif/memif_msg_enum.h \ + memif/memif.api.h + +API_FILES += memif/memif.api + +# vi:syntax=automake diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c new file mode 100644 index 00000000000..ef73693b35d --- /dev/null +++ b/src/plugins/memif/cli.c @@ -0,0 +1,202 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include +#include +#include +#include + +#include +#include +#include + +#include + +static clib_error_t * +memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + int r; + u32 ring_size = MEMIF_DEFAULT_RING_SIZE; + memif_create_if_args_t args = { 0 }; + args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "key 0x%" PRIx64, &args.key)) + ; + else if (unformat (line_input, "socket %s", &args.socket_filename)) + ; + else if (unformat (line_input, "ring-size %u", &ring_size)) + ; + else if (unformat (line_input, "buffer-size %u", &args.buffer_size)) + ; + else if (unformat (line_input, "master")) + args.is_master = 1; + else if (unformat (line_input, "slave")) + args.is_master = 0; + else if (unformat (line_input, "hw-addr %U", + unformat_ethernet_address, args.hw_addr)) + args.hw_addr_set = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (!is_pow2 (ring_size)) + return clib_error_return (0, "ring size must be power of 2"); + + args.log2_ring_size = min_log2 (ring_size); + + r = memif_create_if (vm, &args); + + if (r <= VNET_API_ERROR_SYSCALL_ERROR_1 + && r >= VNET_API_ERROR_SYSCALL_ERROR_10) + return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + + if (r == VNET_API_ERROR_INVALID_INTERFACE) + return clib_error_return (0, "Invalid interface name"); + + if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) + return clib_error_return (0, "Interface already exists"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (memif_create_command, static) = { + .path = "create memif", + .short_help = "create memif [key ] [socket ] " + "[ring-size ] [buffer-size ] [hw-addr ] " + "", + .function = memif_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u64 key = 0; + u8 key_defined = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "key 0x%" PRIx64, &key)) + key_defined = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (!key_defined) + return clib_error_return (0, "missing key"); + + memif_delete_if (vm, key); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (memif_delete_command, static) = { + .path = "delete memif", + .short_help = "delete memif key ", + .function = memif_delete_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif; + vnet_main_t *vnm = vnet_get_main (); + int i; + + /* *INDENT-OFF* */ + pool_foreach (mif, mm->interfaces, + ({ + vlib_cli_output (vm, "interface %U", format_vnet_sw_if_index_name, + vnm, mif->sw_if_index); + vlib_cli_output (vm, " key 0x%" PRIx64 " file %s", mif->key, + mif->socket_filename); + vlib_cli_output (vm, " listener %d conn-fd %d int-fd %d", mif->listener_index, + mif->connection.fd, mif->interrupt_line.fd); + vlib_cli_output (vm, " ring-size %u num-c2s-rings %u num-s2c-rings %u buffer_size %u", + (1 << mif->log2_ring_size), + mif->num_s2m_rings, + mif->num_m2s_rings, + mif->buffer_size); + for (i=0; i < mif->num_s2m_rings; i++) + { + memif_ring_t * ring = memif_get_ring (mif, MEMIF_RING_S2M, i); + if (ring) + { + vlib_cli_output (vm, " slave-to-master ring %u:", i); + vlib_cli_output (vm, " head %u tail %u", ring->head, ring->tail); + } + } + for (i=0; i < mif->num_m2s_rings; i++) + { + memif_ring_t * ring = memif_get_ring (mif, MEMIF_RING_M2S, i); + if (ring) + { + vlib_cli_output (vm, " master-to-slave ring %u:", i); + vlib_cli_output (vm, " head %u tail %u", ring->head, ring->tail); + } + } + })); + /* *INDENT-ON* */ + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (memif_show_command, static) = { + .path = "show memif", + .short_help = "show memif", + .function = memif_show_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +memif_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (memif_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c new file mode 100644 index 00000000000..446537a3792 --- /dev/null +++ b/src/plugins/memif/device.c @@ -0,0 +1,330 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define foreach_memif_tx_func_error \ +_(NO_FREE_SLOTS, "no free tx slots") \ +_(PENDING_MSGS, "pending msgs in tx ring") + +typedef enum +{ +#define _(f,s) MEMIF_TX_ERROR_##f, + foreach_memif_tx_func_error +#undef _ + MEMIF_TX_N_ERROR, +} memif_tx_func_error_t; + +static char *memif_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_memif_tx_func_error +#undef _ +}; + + +static u8 * +format_memif_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + + s = format (s, "memif%u", i); + return s; +} + +static u8 * +format_memif_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + uword indent = format_get_indent (s); + + s = format (s, "MEMIF interface"); + if (verbose) + { + s = format (s, "\n%U instance %u", format_white_space, indent + 2, + dev_instance); + } + return s; +} + +static u8 * +format_memif_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + +static_always_inline void +memif_interface_lock (memif_if_t * mif) +{ + if (PREDICT_FALSE (mif->lockp != 0)) + { + while (__sync_lock_test_and_set (mif->lockp, 1)) + ; + } +} + +static_always_inline void +memif_interface_unlock (memif_if_t * mif) +{ + if (PREDICT_FALSE (mif->lockp != 0)) + *mif->lockp = 0; +} + +static_always_inline void +memif_prefetch_buffer_and_data (vlib_main_t * vm, u32 bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_prefetch_buffer_header (b, LOAD); + CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD); +} + +static_always_inline uword +memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, memif_if_t * mif, + memif_ring_type_t type) +{ + u8 rid = 0; + memif_ring_t *ring = memif_get_ring (mif, type, rid); + u32 *buffers = vlib_frame_args (frame); + u32 n_left = frame->n_vectors; + u16 ring_size = 1 << mif->log2_ring_size; + u16 mask = ring_size - 1; + u16 head, tail; + u16 free_slots; + + memif_interface_lock (mif); + + /* free consumed buffers */ + + head = ring->head; + tail = ring->tail; + + if (tail > head) + free_slots = tail - head; + else + free_slots = ring_size - head + tail; + + while (n_left > 5 && free_slots > 1) + { + if (PREDICT_TRUE (head + 5 < ring_size)) + { + CLIB_PREFETCH (memif_get_buffer (mif, ring, head + 2), + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (memif_get_buffer (mif, ring, head + 3), + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (&ring->desc[head + 4], CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (&ring->desc[head + 5], CLIB_CACHE_LINE_BYTES, STORE); + } + else + { + CLIB_PREFETCH (memif_get_buffer (mif, ring, (head + 2) % mask), + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (memif_get_buffer (mif, ring, (head + 3) % mask), + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (&ring->desc[(head + 4) % mask], + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (&ring->desc[(head + 5) % mask], + CLIB_CACHE_LINE_BYTES, STORE); + } + + memif_prefetch_buffer_and_data (vm, buffers[2]); + memif_prefetch_buffer_and_data (vm, buffers[3]); + + vlib_buffer_t *b0 = vlib_get_buffer (vm, buffers[0]); + vlib_buffer_t *b1 = vlib_get_buffer (vm, buffers[1]); + + void *mb0 = memif_get_buffer (mif, ring, head); + clib_memcpy (mb0, vlib_buffer_get_current (b0), CLIB_CACHE_LINE_BYTES); + ring->desc[head].length = b0->current_length; + head = (head + 1) & mask; + + void *mb1 = memif_get_buffer (mif, ring, head); + clib_memcpy (mb1, vlib_buffer_get_current (b1), CLIB_CACHE_LINE_BYTES); + ring->desc[head].length = b1->current_length; + head = (head + 1) & mask; + + if (b0->current_length > CLIB_CACHE_LINE_BYTES) + { + clib_memcpy (mb0 + CLIB_CACHE_LINE_BYTES, + vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES, + b0->current_length - CLIB_CACHE_LINE_BYTES); + } + if (b1->current_length > CLIB_CACHE_LINE_BYTES) + { + clib_memcpy (mb1 + CLIB_CACHE_LINE_BYTES, + vlib_buffer_get_current (b1) + CLIB_CACHE_LINE_BYTES, + b1->current_length - CLIB_CACHE_LINE_BYTES); + } + + + buffers += 2; + n_left -= 2; + free_slots -= 2; + } + + while (n_left && free_slots) + { + vlib_buffer_t *b0 = vlib_get_buffer (vm, buffers[0]); + void *mb0 = memif_get_buffer (mif, ring, head); + clib_memcpy (mb0, vlib_buffer_get_current (b0), CLIB_CACHE_LINE_BYTES); + + if (b0->current_length > CLIB_CACHE_LINE_BYTES) + { + clib_memcpy (mb0 + CLIB_CACHE_LINE_BYTES, + vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES, + b0->current_length - CLIB_CACHE_LINE_BYTES); + } + ring->desc[head].length = b0->current_length; + head = (head + 1) & mask; + + buffers++; + n_left--; + free_slots--; + } + + CLIB_MEMORY_STORE_BARRIER (); + ring->head = head; + + memif_interface_unlock (mif); + + if (n_left) + { + vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS, + n_left); + vlib_buffer_free (vm, buffers, n_left); + } + + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + if (mif->interrupt_line.fd > 0) + { + u8 b = rid; + CLIB_UNUSED (int r) = write (mif->interrupt_line.fd, &b, sizeof (b)); + } + + return frame->n_vectors; +} + +static uword +memif_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + memif_main_t *nm = &memif_main; + vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; + memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance); + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_S2M); + else + return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_M2S); +} + +static void +memif_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + memif_main_t *apm = &memif_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + memif_if_t *mif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + mif->per_interface_next_index = node_index; + return; + } + + mif->per_interface_next_index = + vlib_node_add_next (vlib_get_main (), memif_input_node.index, node_index); +} + +static void +memif_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + memif_main_t *apm = &memif_main; + memif_msg_t msg; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + memif_if_t *mif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + mif->flags |= MEMIF_IF_FLAG_ADMIN_UP; + else + { + mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP; + if (!(mif->flags & MEMIF_IF_FLAG_DELETING) + && mif->connection.index != ~0) + { + msg.version = MEMIF_VERSION; + msg.type = MEMIF_MSG_TYPE_DISCONNECT; + send (mif->connection.fd, &msg, sizeof (msg), 0); + } + } + + return 0; +} + +static clib_error_t * +memif_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (memif_device_class) = { + .name = "memif", + .tx_function = memif_interface_tx, + .format_device_name = format_memif_device_name, + .format_device = format_memif_device, + .format_tx_trace = format_memif_tx_trace, + .tx_function_n_errors = MEMIF_TX_N_ERROR, + .tx_function_error_strings = memif_tx_func_error_strings, + .rx_redirect_to_node = memif_set_interface_next_node, + .clear_counters = memif_clear_hw_interface_counters, + .admin_up_down_function = memif_interface_admin_up_down, + .subif_add_del_function = memif_subif_add_del_function, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH(memif_device_class, + memif_interface_tx) +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api new file mode 100644 index 00000000000..6f946421411 --- /dev/null +++ b/src/plugins/memif/memif.api @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Create memory interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param role - role of the interface in the connection (master/slave) + @param key - 64bit integer used to authenticate and match opposite sides + of the connection + @param socket_filename - filename of the socket to be used for connection + establishment + @param ring_size - the number of entries of RX/TX rings + @param buffer_size - size of the buffer allocated for each ring entry + @param hw_addr - interface MAC address +*/ +define memif_create +{ + u32 client_index; + u32 context; + + u8 role; /* 0 = master, 1 = slave */ + u64 key; /* optional, default is 0 */ + u8 socket_filename[128]; /* optional, default is "/var/vpp/memif.sock" + and can be changed in VPP startup config */ + + u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */ + u16 buffer_size; /* optional, default is 2048 bytes */ + u8 hw_addr[6]; /* optional, randomly generated if not defined */ +}; + +/** \brief Create memory interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request + @param sw_if_index - software index of the newly created interface +*/ +define memif_create_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Delete memory interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - software index of the interface to delete +*/ +define memif_delete +{ + u32 client_index; + u32 context; + + u32 sw_if_index; +}; + +/** \brief Delete host-interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define memif_delete_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Memory interface details structure + @param context - sender context, to match reply w/ request (memif_dump) + @param sw_if_index - index of the interface + @param if_name - name of the interface + @param hw_addr - interface MAC address + @param key - key associated with the interface + @param role - role of the interface in the connection (master/slave) + @param socket_filename - name of the socket used by this interface + to establish new connections + @param ring_size - the number of entries of RX/TX rings + @param buffer_size - size of the buffer allocated for each ring entry + @param admin_up_down - interface administrative status + @param link_up_down - interface link status + +*/ +define memif_details +{ + u32 context; + + u32 sw_if_index; + u8 if_name[64]; + u8 hw_addr[6]; + + /* memif specific parameters */ + u64 key; + u8 role; /* 0 = master, 1 = slave */ + u8 socket_filename[128]; + u32 ring_size; + u16 buffer_size; /* optional, default is 2048 bytes */ + + /* 1 = up, 0 = down */ + u8 admin_up_down; + u8 link_up_down; +}; + +/** \brief Dump all memory interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define memif_dump +{ + u32 client_index; + u32 context; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c new file mode 100644 index 00000000000..7ba67c5b898 --- /dev/null +++ b/src/plugins/memif/memif.c @@ -0,0 +1,1045 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define MEMIF_DEBUG 1 + +#if MEMIF_DEBUG == 1 +#define DEBUG_LOG(...) clib_warning(__VA_ARGS__) +#define DEBUG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__) +#else +#define DEBUG_LOG(...) +#endif + +memif_main_t memif_main; + +static clib_error_t *memif_conn_fd_read_ready (unix_file_t * uf); +static clib_error_t *memif_int_fd_read_ready (unix_file_t * uf); + +static u32 +memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) +{ + /* nothing for now */ + return 0; +} + +static void +memif_remove_pending_conn (memif_pending_conn_t * pending_conn) +{ + memif_main_t *mm = &memif_main; + + unix_file_del (&unix_main, + unix_main.file_pool + pending_conn->connection.index); + pool_put (mm->pending_conns, pending_conn); +} + +static void +memif_connect (vlib_main_t * vm, memif_if_t * mif) +{ + vnet_main_t *vnm = vnet_get_main (); + int num_rings = mif->num_s2m_rings + mif->num_m2s_rings; + memif_ring_data_t *rd = NULL; + + vec_validate_aligned (mif->ring_data, num_rings - 1, CLIB_CACHE_LINE_BYTES); + vec_foreach (rd, mif->ring_data) + { + rd->last_head = 0; + } + + mif->flags &= ~MEMIF_IF_FLAG_CONNECTING; + mif->flags |= MEMIF_IF_FLAG_CONNECTED; + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); +} + +static void +memif_disconnect (vlib_main_t * vm, memif_if_t * mif) +{ + vnet_main_t *vnm = vnet_get_main (); + + mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING); + if (mif->hw_if_index != ~0) + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); + + if (mif->interrupt_line.index != ~0) + { + unix_file_del (&unix_main, + unix_main.file_pool + mif->interrupt_line.index); + mif->interrupt_line.index = ~0; + mif->interrupt_line.fd = -1; /* closed in unix_file_del */ + } + if (mif->connection.index != ~0) + { + unix_file_del (&unix_main, unix_main.file_pool + mif->connection.index); + mif->connection.index = ~0; + mif->connection.fd = -1; /* closed in unix_file_del */ + } + + // TODO: properly munmap + close memif-owned shared memory segments + vec_free (mif->regions); +} + +static clib_error_t * +memif_process_connect_req (memif_pending_conn_t * pending_conn, + memif_msg_t * req, struct ucred *slave_cr, + int shm_fd, int int_fd) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + int fd = pending_conn->connection.fd; + unix_file_t *uf = 0; + memif_if_t *mif = 0; + memif_msg_t resp = { 0 }; + unix_file_t template = { 0 }; + void *shm; + uword *p; + u8 retval = 0; + static clib_error_t *error = 0; + + if (shm_fd == -1) + { + DEBUG_LOG + ("Connection request is missing shared memory file descriptor"); + retval = 1; + goto response; + } + + if (int_fd == -1) + { + DEBUG_LOG + ("Connection request is missing interrupt line file descriptor"); + retval = 2; + goto response; + } + + if (slave_cr == NULL) + { + DEBUG_LOG ("Connection request is missing slave credentials"); + retval = 3; + goto response; + } + + p = mhash_get (&mm->if_index_by_key, &req->key); + if (!p) + { + DEBUG_LOG + ("Connection request with unmatched key (0x%" PRIx64 ")", req->key); + retval = 4; + goto response; + } + + mif = vec_elt_at_index (mm->interfaces, *p); + if (mif->listener_index != pending_conn->listener_index) + { + DEBUG_LOG + ("Connection request with non-matching listener (%d vs. %d)", + pending_conn->listener_index, mif->listener_index); + retval = 5; + goto response; + } + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + { + DEBUG_LOG ("Memif slave does not accept connection requests"); + retval = 6; + goto response; + } + + if (mif->connection.fd != -1) + { + DEBUG_LOG + ("Memif with key 0x%" PRIx64 " is already connected", mif->key); + retval = 7; + goto response; + } + + if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) == 0) + { + /* just silently decline the request */ + retval = 8; + goto response; + } + + if (req->shared_mem_size < sizeof (memif_shm_t)) + { + DEBUG_LOG + ("Unexpectedly small shared memory segment received from slave."); + retval = 9; + goto response; + } + + if ((shm = + mmap (NULL, req->shared_mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd, 0)) == MAP_FAILED) + { + DEBUG_UNIX_LOG + ("Failed to map shared memory segment received from slave memif"); + error = clib_error_return_unix (0, "mmap fd %d", shm_fd); + retval = 10; + goto response; + } + + if (((memif_shm_t *) shm)->cookie != 0xdeadbeef) + { + DEBUG_LOG + ("Possibly corrupted shared memory segment received from slave memif"); + munmap (shm, req->shared_mem_size); + retval = 11; + goto response; + } + + mif->log2_ring_size = req->log2_ring_size; + mif->num_s2m_rings = req->num_s2m_rings; + mif->num_m2s_rings = req->num_m2s_rings; + mif->buffer_size = req->buffer_size; + mif->remote_pid = slave_cr->pid; + mif->remote_uid = slave_cr->uid; + vec_add1 (mif->regions, shm); + + /* register interrupt line */ + mif->interrupt_line.fd = int_fd; + template.read_function = memif_int_fd_read_ready; + template.file_descriptor = int_fd; + template.private_data = mif->if_index; + mif->interrupt_line.index = unix_file_add (&unix_main, &template); + + /* change context for future messages */ + uf = vec_elt_at_index (unix_main.file_pool, pending_conn->connection.index); + uf->private_data = mif->if_index << 1; + mif->connection = pending_conn->connection; + pool_put (mm->pending_conns, pending_conn); + + memif_connect (vm, mif); + +response: + resp.version = MEMIF_VERSION; + resp.type = MEMIF_MSG_TYPE_CONNECT_RESP; + resp.retval = retval; + send (fd, &resp, sizeof (resp), 0); + return error; +} + +static clib_error_t * +memif_process_connect_resp (memif_if_t * mif, memif_msg_t * resp) +{ + vlib_main_t *vm = vlib_get_main (); + + if ((mif->flags & MEMIF_IF_FLAG_IS_SLAVE) == 0) + { + DEBUG_LOG ("Memif master does not accept connection responses"); + return 0; + } + + if ((mif->flags & MEMIF_IF_FLAG_CONNECTING) == 0) + { + DEBUG_LOG ("Unexpected connection response"); + return 0; + } + + if (resp->retval == 0) + memif_connect (vm, mif); + else + memif_disconnect (vm, mif); + + return 0; +} + +static clib_error_t * +memif_conn_fd_read_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + memif_if_t *mif = 0; + memif_pending_conn_t *pending_conn = 0; + int fd_array[2] = { -1, -1 }; + char ctl[CMSG_SPACE (sizeof (fd_array)) + + CMSG_SPACE (sizeof (struct ucred))] = { 0 }; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + struct ucred *cr = 0; + memif_msg_t msg = { 0 }; + struct cmsghdr *cmsg; + ssize_t size; + static clib_error_t *error = 0; + + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + + /* grab the appropriate context */ + if (uf->private_data & 1) + pending_conn = vec_elt_at_index (mm->pending_conns, + uf->private_data >> 1); + else + mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 1); + + /* receive the incoming message */ + size = recvmsg (uf->file_descriptor, &mh, 0); + if (size != sizeof (memif_msg_t)) + { + if (size != 0) + { + DEBUG_UNIX_LOG ("Malformed message received on fd %d", + uf->file_descriptor); + error = clib_error_return_unix (0, "recvmsg fd %d", + uf->file_descriptor); + } + goto disconnect; + } + + /* check version of the sender's memif plugin */ + if (msg.version != MEMIF_VERSION) + { + DEBUG_LOG ("Memif version mismatch"); + goto disconnect; + } + + /* process the message based on its type */ + switch (msg.type) + { + case MEMIF_MSG_TYPE_CONNECT_REQ: + if (pending_conn == 0) + { + DEBUG_LOG ("Received unexpected connection request"); + return 0; + } + + /* Read anciliary data */ + cmsg = CMSG_FIRSTHDR (&mh); + while (cmsg) + { + if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_CREDENTIALS) + { + cr = (struct ucred *) CMSG_DATA (cmsg); + } + else if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_RIGHTS) + { + clib_memcpy (fd_array, CMSG_DATA (cmsg), sizeof (fd_array)); + } + cmsg = CMSG_NXTHDR (&mh, cmsg); + } + + return memif_process_connect_req (pending_conn, &msg, cr, + fd_array[0], fd_array[1]); + + case MEMIF_MSG_TYPE_CONNECT_RESP: + if (mif == 0) + { + DEBUG_LOG ("Received unexpected connection response"); + return 0; + } + return memif_process_connect_resp (mif, &msg); + + case MEMIF_MSG_TYPE_DISCONNECT: + goto disconnect; + + default: + DEBUG_LOG ("Received unknown message type"); + goto disconnect; + } + + return 0; + +disconnect: + if (pending_conn) + memif_remove_pending_conn (pending_conn); + else + memif_disconnect (vm, mif); + return error; +} + +static clib_error_t * +memif_int_fd_read_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + u8 b; + ssize_t size; + + size = read (uf->file_descriptor, &b, sizeof (b)); + if (0 == size) + { + /* interrupt line was disconnected */ + unix_file_del (&unix_main, + unix_main.file_pool + mif->interrupt_line.index); + mif->interrupt_line.index = ~0; + mif->interrupt_line.fd = -1; + } + vlib_node_set_interrupt_pending (vm, memif_input_node.index); + return 0; +} + +static clib_error_t * +memif_conn_fd_accept_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_listener_t *listener = 0; + memif_pending_conn_t *pending_conn = 0; + int addr_len; + struct sockaddr_un client; + int conn_fd; + unix_file_t template = { 0 }; + + listener = pool_elt_at_index (mm->listeners, uf->private_data); + + addr_len = sizeof (client); + conn_fd = accept (uf->file_descriptor, + (struct sockaddr *) &client, (socklen_t *) & addr_len); + + if (conn_fd < 0) + return clib_error_return_unix (0, "accept fd %d", uf->file_descriptor); + + pool_get (mm->pending_conns, pending_conn); + pending_conn->index = pending_conn - mm->pending_conns; + pending_conn->listener_index = listener->index; + pending_conn->connection.fd = conn_fd; + + template.read_function = memif_conn_fd_read_ready; + template.file_descriptor = conn_fd; + template.private_data = (pending_conn->index << 1) | 1; + pending_conn->connection.index = unix_file_add (&unix_main, &template); + + return 0; +} + +static void +memif_connect_master (vlib_main_t * vm, memif_if_t * mif) +{ + memif_msg_t msg; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + struct cmsghdr *cmsg; + int mfd = -1; + int rv; + int fd_array[2] = { -1, -1 }; + char ctl[CMSG_SPACE (sizeof (fd_array))]; + memif_ring_t *ring = NULL; + int i, j; + void *shm = 0; + u64 buffer_offset; + unix_file_t template = { 0 }; + + msg.version = MEMIF_VERSION; + msg.type = MEMIF_MSG_TYPE_CONNECT_REQ; + msg.key = mif->key; + msg.log2_ring_size = mif->log2_ring_size; + msg.num_s2m_rings = mif->num_s2m_rings; + msg.num_m2s_rings = mif->num_m2s_rings; + msg.buffer_size = mif->buffer_size; + + buffer_offset = sizeof (memif_shm_t) + + (mif->num_s2m_rings + mif->num_m2s_rings) * + (sizeof (memif_ring_t) + + sizeof (memif_desc_t) * (1 << mif->log2_ring_size)); + + msg.shared_mem_size = buffer_offset + + mif->buffer_size * (1 << mif->log2_ring_size) * (mif->num_s2m_rings + + mif->num_m2s_rings); + + if ((mfd = memfd_create ("shared mem", MFD_ALLOW_SEALING)) == -1) + { + DEBUG_LOG ("Failed to create anonymous file"); + goto error; + } + + if ((fcntl (mfd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + { + DEBUG_UNIX_LOG ("Failed to seal an anonymous file off from truncating"); + goto error; + } + + if ((ftruncate (mfd, msg.shared_mem_size)) == -1) + { + DEBUG_UNIX_LOG ("Failed to extend the size of an anonymous file"); + goto error; + } + + if ((shm = mmap (NULL, msg.shared_mem_size, PROT_READ | PROT_WRITE, + MAP_SHARED, mfd, 0)) == MAP_FAILED) + { + DEBUG_UNIX_LOG ("Failed to map anonymous file into memory"); + goto error; + } + + vec_add1 (mif->regions, shm); + ((memif_shm_t *) mif->regions[0])->cookie = 0xdeadbeef; + + for (i = 0; i < mif->num_s2m_rings; i++) + { + ring = memif_get_ring (mif, MEMIF_RING_S2M, i); + ring->head = ring->tail = 0; + for (j = 0; j < (1 << mif->log2_ring_size); j++) + { + u16 slot = i * (1 << mif->log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = buffer_offset + (slot * mif->buffer_size); + ring->desc[j].buffer_length = mif->buffer_size; + } + } + for (i = 0; i < mif->num_m2s_rings; i++) + { + ring = memif_get_ring (mif, MEMIF_RING_M2S, i); + ring->head = ring->tail = 0; + for (j = 0; j < (1 << mif->log2_ring_size); j++) + { + u16 slot = + (i + mif->num_s2m_rings) * (1 << mif->log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = buffer_offset + (slot * mif->buffer_size); + ring->desc[j].buffer_length = mif->buffer_size; + } + } + + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + /* create interrupt socket */ + if (socketpair (AF_UNIX, SOCK_STREAM, 0, fd_array) < 0) + { + DEBUG_UNIX_LOG ("Failed to create a pair of connected sockets"); + goto error; + } + + mif->interrupt_line.fd = fd_array[0]; + template.read_function = memif_int_fd_read_ready; + template.file_descriptor = mif->interrupt_line.fd; + template.private_data = mif->if_index; + mif->interrupt_line.index = unix_file_add (&unix_main, &template); + + memset (&ctl, 0, sizeof (ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + cmsg = CMSG_FIRSTHDR (&mh); + cmsg->cmsg_len = CMSG_LEN (sizeof (fd_array)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + fd_array[0] = mfd; + clib_memcpy (CMSG_DATA (cmsg), fd_array, sizeof (fd_array)); + + mif->flags |= MEMIF_IF_FLAG_CONNECTING; + rv = sendmsg (mif->connection.fd, &mh, 0); + if (rv < 0) + { + DEBUG_UNIX_LOG ("Failed to send memif connection request"); + goto error; + } + + /* No need to keep the descriptor open, + * mmap creates an extra reference to the underlying file */ + close (mfd); + mfd = -1; + /* This FD is given to peer, so we can close it */ + close (fd_array[1]); + fd_array[1] = -1; + return; + +error: + if (mfd > -1) + close (mfd); + if (fd_array[1] > -1) + close (fd_array[1]); + memif_disconnect (vm, mif); +} + +static uword +memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif; + struct sockaddr_un sun; + int sockfd; + uword *event_data = 0, event_type; + unix_file_t template = { 0 }; + u8 enabled = 0; + f64 start_time, last_run_duration = 0, now; + + sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + sun.sun_family = AF_UNIX; + template.read_function = memif_conn_fd_read_ready; + + while (1) + { + if (enabled) + vlib_process_wait_for_event_or_clock (vm, + (f64) 3 - last_run_duration); + else + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + switch (event_type) + { + case ~0: + break; + case MEMIF_PROCESS_EVENT_START: + enabled = 1; + break; + case MEMIF_PROCESS_EVENT_STOP: + enabled = 0; + continue; + default: + ASSERT (0); + } + + last_run_duration = start_time = vlib_time_now (vm); + /* *INDENT-OFF* */ + pool_foreach (mif, mm->interfaces, + ({ + /* Allow no more than 10us without a pause */ + now = vlib_time_now (vm); + if (now > start_time + 10e-6) + { + vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */ + start_time = vlib_time_now (vm); + } + + if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) == 0) + continue; + + if (mif->flags & MEMIF_IF_FLAG_CONNECTING) + continue; + + if (mif->flags & MEMIF_IF_FLAG_CONNECTED) + continue; + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + { + strncpy (sun.sun_path, (char *) mif->socket_filename, + sizeof (sun.sun_path) - 1); + + if (connect + (sockfd, (struct sockaddr *) &sun, + sizeof (struct sockaddr_un)) == 0) + { + mif->connection.fd = sockfd; + template.file_descriptor = sockfd; + template.private_data = mif->if_index << 1; + mif->connection.index = unix_file_add (&unix_main, &template); + memif_connect_master (vm, mif); + + /* grab another fd */ + sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) + { + DEBUG_UNIX_LOG ("socket AF_UNIX"); + return 0; + } + } + } + })); + /* *INDENT-ON* */ + last_run_duration = vlib_time_now (vm) - last_run_duration; + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (memif_process_node,static) = { + .function = memif_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "memif-process", +}; +/* *INDENT-ON* */ + +static void +memif_close_if (memif_main_t * mm, memif_if_t * mif) +{ + vlib_main_t *vm = vlib_get_main (); + memif_listener_t *listener = 0; + memif_pending_conn_t *pending_conn = 0; + + memif_disconnect (vm, mif); + + if (mif->listener_index != (uword) ~ 0) + { + listener = pool_elt_at_index (mm->listeners, mif->listener_index); + if (--listener->usage_counter == 0) + { + /* not used anymore -> remove the socket and pending connections */ + + /* *INDENT-OFF* */ + pool_foreach (pending_conn, mm->pending_conns, + ({ + if (pending_conn->listener_index == mif->listener_index) + { + memif_remove_pending_conn (pending_conn); + } + })); + /* *INDENT-ON* */ + + unix_file_del (&unix_main, + unix_main.file_pool + listener->socket.index); + pool_put (mm->listeners, listener); + unlink ((char *) mif->socket_filename); + } + } + + if (mif->lockp != 0) + { + clib_mem_free ((void *) mif->lockp); + mif->lockp = 0; + } + + mhash_unset (&mm->if_index_by_key, &mif->key, &mif->if_index); + vec_free (mif->socket_filename); + vec_free (mif->ring_data); + + memset (mif, 0, sizeof (*mif)); + pool_put (mm->interfaces, mif); +} + +int +memif_worker_thread_enable () +{ + /* if worker threads are enabled, switch to polling mode */ + foreach_vlib_main (( + { + vlib_node_set_state (this_vlib_main, + memif_input_node.index, + VLIB_NODE_STATE_POLLING); + })); + + return 0; +} + +int +memif_worker_thread_disable () +{ + foreach_vlib_main (( + { + vlib_node_set_state (this_vlib_main, + memif_input_node.index, + VLIB_NODE_STATE_INTERRUPT); + })); + + return 0; +} + +int +memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) +{ + memif_main_t *mm = &memif_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_main_t *vnm = vnet_get_main (); + memif_if_t *mif = 0; + vnet_sw_interface_t *sw; + clib_error_t *error = 0; + int ret = 0; + uword *p; + + p = mhash_get (&mm->if_index_by_key, &args->key); + if (p) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + pool_get (mm->interfaces, mif); + memset (mif, 0, sizeof (*mif)); + mif->key = args->key; + mif->if_index = mif - mm->interfaces; + mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0; + mif->listener_index = ~0; + mif->connection.index = mif->interrupt_line.index = ~0; + mif->connection.fd = mif->interrupt_line.fd = -1; + + if (tm->n_vlib_mains > 1) + { + mif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) mif->lockp, 0, CLIB_CACHE_LINE_BYTES); + } + + if (!args->hw_addr_set) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (args->hw_addr + 2, &rnd, sizeof (rnd)); + args->hw_addr[0] = 2; + args->hw_addr[1] = 0xfe; + } + + error = ethernet_register_interface (vnm, memif_device_class.index, + mif->if_index, args->hw_addr, + &mif->hw_if_index, + memif_eth_flag_change); + + if (error) + { + clib_error_report (error); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + sw = vnet_get_hw_sw_interface (vnm, mif->hw_if_index); + mif->sw_if_index = sw->sw_if_index; + + mif->log2_ring_size = args->log2_ring_size; + mif->buffer_size = args->buffer_size; + + /* TODO: make configurable */ + mif->num_s2m_rings = 1; + mif->num_m2s_rings = 1; + + mhash_set_mem (&mm->if_index_by_key, &args->key, &mif->if_index, 0); + + if (args->socket_filename != 0) + mif->socket_filename = args->socket_filename; + else + mif->socket_filename = vec_dup (mm->default_socket_filename); + + args->sw_if_index = mif->sw_if_index; + + if (args->is_master) + { + struct sockaddr_un un = { 0 }; + struct stat file_stat; + int on = 1; + memif_listener_t *listener = 0; + + if (stat ((char *) mif->socket_filename, &file_stat) == 0) + { + if (!S_ISSOCK (file_stat.st_mode)) + { + errno = ENOTSOCK; + ret = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } + /* *INDENT-OFF* */ + pool_foreach (listener, mm->listeners, + ({ + if (listener->sock_dev == file_stat.st_dev && + listener->sock_ino == file_stat.st_ino) + { + /* attach memif to the existing listener */ + mif->listener_index = listener->index; + ++listener->usage_counter; + goto signal; + } + })); + /* *INDENT-ON* */ + unlink ((char *) mif->socket_filename); + } + + pool_get (mm->listeners, listener); + memset (listener, 0, sizeof (*listener)); + listener->socket.fd = -1; + listener->socket.index = ~0; + listener->index = listener - mm->listeners; + listener->usage_counter = 1; + + if ((listener->socket.fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } + + un.sun_family = AF_UNIX; + strncpy ((char *) un.sun_path, (char *) mif->socket_filename, + sizeof (un.sun_path) - 1); + + if (setsockopt (listener->socket.fd, SOL_SOCKET, SO_PASSCRED, + &on, sizeof (on)) < 0) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_4; + goto error; + } + if (bind (listener->socket.fd, (struct sockaddr *) &un, + sizeof (un)) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_5; + goto error; + } + if (listen (listener->socket.fd, 1) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_6; + goto error; + } + + if (stat ((char *) mif->socket_filename, &file_stat) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_7; + goto error; + } + + listener->sock_dev = file_stat.st_dev; + listener->sock_ino = file_stat.st_ino; + + unix_file_t template = { 0 }; + template.read_function = memif_conn_fd_accept_ready; + template.file_descriptor = listener->socket.fd; + template.private_data = listener->index; + listener->socket.index = unix_file_add (&unix_main, &template); + + mif->listener_index = listener->index; + } + else + { + mif->flags |= MEMIF_IF_FLAG_IS_SLAVE; + } + +#if 0 + /* use configured or generate random MAC address */ + if (!args->hw_addr_set && + tm->n_vlib_mains > 1 && pool_elts (mm->interfaces) == 1) + memif_worker_thread_enable (); +#endif + +signal: + if (pool_elts (mm->interfaces) == 1) + { + vlib_process_signal_event (vm, memif_process_node.index, + MEMIF_PROCESS_EVENT_START, 0); + } + return 0; + +error: + if (mif->hw_if_index != ~0) + { + ethernet_delete_interface (vnm, mif->hw_if_index); + mif->hw_if_index = ~0; + } + memif_close_if (mm, mif); + return ret; +} + +int +memif_delete_if (vlib_main_t * vm, u64 key) +{ + vnet_main_t *vnm = vnet_get_main (); + memif_main_t *mm = &memif_main; + memif_if_t *mif; + uword *p; + + p = mhash_get (&mm->if_index_by_key, &key); + if (p == NULL) + { + clib_warning ("Memory interface with key 0x%" PRIx64 " does not exist", + key); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + mif = pool_elt_at_index (mm->interfaces, p[0]); + mif->flags |= MEMIF_IF_FLAG_DELETING; + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); + vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0); + + /* remove the interface */ + ethernet_delete_interface (vnm, mif->hw_if_index); + mif->hw_if_index = ~0; + memif_close_if (mm, mif); + + if (pool_elts (mm->interfaces) == 0) + { + vlib_process_signal_event (vm, memif_process_node.index, + MEMIF_PROCESS_EVENT_STOP, 0); + } + +#if 0 + if (tm->n_vlib_mains > 1 && pool_elts (mm->interfaces) == 0) + memif_worker_thread_disable (); +#endif + + return 0; +} + +static clib_error_t * +memif_init (vlib_main_t * vm) +{ + memif_main_t *mm = &memif_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; + + memset (mm, 0, sizeof (memif_main_t)); + + mm->input_cpu_first_index = 0; + mm->input_cpu_count = 1; + + /* initialize binary API */ + memif_plugin_api_hookup (vm); + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + mm->input_cpu_first_index = tr->first_index; + mm->input_cpu_count = tr->count; + } + + mhash_init (&mm->if_index_by_key, sizeof (uword), sizeof (u64)); + + vec_validate_aligned (mm->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + /* set default socket filename */ + vec_validate (mm->default_socket_filename, + strlen (MEMIF_DEFAULT_SOCKET_FILENAME)); + strncpy ((char *) mm->default_socket_filename, + MEMIF_DEFAULT_SOCKET_FILENAME, + vec_len (mm->default_socket_filename) - 1); + + return 0; +} + +VLIB_INIT_FUNCTION (memif_init); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Packet Memory Interface (experimetal)", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif.h b/src/plugins/memif/memif.h new file mode 100644 index 00000000000..a7a88e07b0e --- /dev/null +++ b/src/plugins/memif/memif.h @@ -0,0 +1,263 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +typedef struct +{ + u16 version; +#define MEMIF_VERSION_MAJOR 0 +#define MEMIF_VERSION_MINOR 1 +#define MEMIF_VERSION ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR) + u8 type; +#define MEMIF_MSG_TYPE_CONNECT_REQ 0 +#define MEMIF_MSG_TYPE_CONNECT_RESP 1 +#define MEMIF_MSG_TYPE_DISCONNECT 2 + + /* Connection-request parameters: */ + u64 key; + u8 log2_ring_size; +#define MEMIF_DEFAULT_RING_SIZE 1024 + u16 num_s2m_rings; + u16 num_m2s_rings; + u16 buffer_size; +#define MEMIF_DEFAULT_BUFFER_SIZE 2048 + u32 shared_mem_size; + + /* Connection-response parameters: */ + u8 retval; +} memif_msg_t; + +typedef struct __attribute__ ((packed)) +{ + u16 flags; +#define MEMIF_DESC_FLAG_NEXT (1 << 0) + u16 region; + u32 buffer_length; + u32 length;; + u8 reserved[4]; + u64 offset; + u64 metadata; +} memif_desc_t; + +STATIC_ASSERT_SIZEOF (memif_desc_t, 32); + +typedef struct +{ + u16 head __attribute__ ((aligned (128))); + u16 tail __attribute__ ((aligned (128))); + memif_desc_t desc[0] __attribute__ ((aligned (128))); +} memif_ring_t; + +typedef struct +{ + u32 cookie __attribute__ ((aligned (128))); +} memif_shm_t; + + +typedef struct +{ + u16 last_head; + u16 last_tail; +} memif_ring_data_t; + +typedef struct +{ + int fd; + u32 index; +} memif_file_t; + +typedef struct +{ + uword index; + dev_t sock_dev; + ino_t sock_ino; + memif_file_t socket; + u16 usage_counter; +} memif_listener_t; + +typedef struct +{ + uword index; + memif_file_t connection; + uword listener_index; +} memif_pending_conn_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + volatile u32 *lockp; + u32 flags; +#define MEMIF_IF_FLAG_ADMIN_UP (1 << 0) +#define MEMIF_IF_FLAG_IS_SLAVE (1 << 1) +#define MEMIF_IF_FLAG_CONNECTING (1 << 2) +#define MEMIF_IF_FLAG_CONNECTED (1 << 3) +#define MEMIF_IF_FLAG_DELETING (1 << 4) + + u64 key; + uword if_index; + u32 hw_if_index; + u32 sw_if_index; + + u32 per_interface_next_index; + + uword listener_index; + memif_file_t connection; + memif_file_t interrupt_line; + u8 *socket_filename; + + void **regions; + + u8 log2_ring_size; + u8 num_s2m_rings; + u8 num_m2s_rings; + u16 buffer_size; + + memif_ring_data_t *ring_data; + + /* remote info */ + pid_t remote_pid; + uid_t remote_uid; +} memif_if_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /** API message ID base */ + u16 msg_id_base; + + /* pool of all memory interfaces */ + memif_if_t *interfaces; + + /* pool of all listeners */ + memif_listener_t *listeners; + + /* pool of pending connections */ + memif_pending_conn_t *pending_conns; + + /* bitmap of pending rx interfaces */ + uword *pending_input_bitmap; + + /* rx buffer cache */ + u32 **rx_buffers; + + /* hash of all registered keys */ + mhash_t if_index_by_key; + + /* first cpu index */ + u32 input_cpu_first_index; + + /* total cpu count */ + u32 input_cpu_count; + + /* configuration */ + u8 *default_socket_filename; +#define MEMIF_DEFAULT_SOCKET_FILENAME "/var/vpp/memif.sock" +} memif_main_t; + +extern memif_main_t memif_main; +extern vnet_device_class_t memif_device_class; +extern vlib_node_registration_t memif_input_node; + +enum +{ + MEMIF_PROCESS_EVENT_START = 1, + MEMIF_PROCESS_EVENT_STOP = 2, +} memif_process_event_t; + +typedef struct +{ + u64 key; + u8 *socket_filename; + u8 is_master; + u8 log2_ring_size; + u16 buffer_size; + u8 hw_addr_set; + u8 hw_addr[6]; + + /* return */ + u32 sw_if_index; +} memif_create_if_args_t; + +int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); +int memif_delete_if (vlib_main_t * vm, u64 key); +clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); + +#ifndef __NR_memfd_create +#if defined __x86_64__ +#define __NR_memfd_create 319 +#elif defined __arm__ +#define __NR_memfd_create 385 +#elif defined __aarch64__ +#define __NR_memfd_create 279 +#else +#error "__NR_memfd_create unknown for this architecture" +#endif +#endif + +static inline int +memfd_create (const char *name, unsigned int flags) +{ + return syscall (__NR_memfd_create, name, flags); +} + +typedef enum +{ + MEMIF_RING_S2M = 0, + MEMIF_RING_M2S = 1 +} memif_ring_type_t; + +static_always_inline memif_ring_t * +memif_get_ring (memif_if_t * mif, memif_ring_type_t type, u16 ring_num) +{ + if (vec_len (mif->regions) == 0) + return NULL; + void *p = mif->regions[0]; + int ring_size = + sizeof (memif_ring_t) + + sizeof (memif_desc_t) * (1 << mif->log2_ring_size); + p += sizeof (memif_shm_t); + p += (ring_num + type * mif->num_s2m_rings) * ring_size; + + return (memif_ring_t *) p; +} + +static_always_inline void * +memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) +{ + u16 region = ring->desc[slot].region; + return mif->regions[region] + ring->desc[slot].offset; +} + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif +#define MFD_ALLOW_SEALING 0x0002U +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif_all_api_h.h b/src/plugins/memif/memif_all_api_h.h new file mode 100644 index 00000000000..9729ec16239 --- /dev/null +++ b/src/plugins/memif/memif_all_api_h.h @@ -0,0 +1,18 @@ +/* + * memif_all_api_h.h - plug-in api #include file + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c new file mode 100644 index 00000000000..1ade317e26f --- /dev/null +++ b/src/plugins/memif/memif_api.c @@ -0,0 +1,332 @@ +/* + *------------------------------------------------------------------ + * memif_api.c - memif api + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include +#include +#include + +#include +#include +#include + +/* define message IDs */ +#include + +/* define message structures */ +#define vl_typedefs +#include +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = htons ((t)+mm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = htonl (rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define REPLY_MACRO2(t, body) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = htons ((t)+mm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = htonl (rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define foreach_memif_plugin_api_msg \ +_(MEMIF_CREATE, memif_create) \ +_(MEMIF_DELETE, memif_delete) \ +_(MEMIF_DETAILS, memif_details) \ +_(MEMIF_DUMP, memif_dump) \ + +/** + * @brief Message handler for memif_create API. + * @param mp vl_api_memif_create_t * mp the api message + */ +void +vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + vl_api_memif_create_reply_t *rmp; + memif_create_if_args_t args = { 0 }; + u32 ring_size = MEMIF_DEFAULT_RING_SIZE; + static const u8 empty_hw_addr[6]; + int rv = 0; + + /* key */ + args.key = clib_net_to_host_u64 (mp->key); + + /* socket filename */ + mp->socket_filename[ARRAY_LEN (mp->socket_filename) - 1] = 0; + if (strlen ((char *) mp->socket_filename) > 0) + { + vec_validate (args.socket_filename, + strlen ((char *) mp->socket_filename)); + strncpy ((char *) args.socket_filename, (char *) mp->socket_filename, + vec_len (args.socket_filename)); + } + + /* role */ + args.is_master = (mp->role == 0); + + /* ring size */ + if (mp->ring_size) + { + ring_size = ntohl (mp->ring_size); + } + if (!is_pow2 (ring_size)) + { + rv = VNET_API_ERROR_INVALID_ARGUMENT; + goto reply; + } + args.log2_ring_size = min_log2 (ring_size); + + /* buffer size */ + args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE; + if (mp->buffer_size) + { + args.buffer_size = ntohs (mp->buffer_size); + } + + /* MAC address */ + if (memcmp (mp->hw_addr, empty_hw_addr, 6) != 0) + { + memcpy (args.hw_addr, mp->hw_addr, 6); + args.hw_addr_set = 1; + } + + rv = memif_create_if (vm, &args); + +reply: + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_MEMIF_CREATE_REPLY, + ({ + rmp->sw_if_index = htonl (args.sw_if_index); + })); + /* *INDENT-ON* */ +} + +/** + * @brief Message handler for memif_delete API. + * @param mp vl_api_memif_delete_t * mp the api message + */ +void +vl_api_memif_delete_t_handler (vl_api_memif_delete_t * mp) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif; + vlib_main_t *vm = vlib_get_main (); + vl_api_memif_delete_reply_t *rmp; + u32 sw_if_index = ntohl (mp->sw_if_index); + int rv = 0; + + /* *INDENT-OFF* */ + pool_foreach (mif, mm->interfaces, + ({ + if (sw_if_index == mif->sw_if_index) + { + rv = memif_delete_if (vm, mif->key); + goto reply; + } + })); + /* *INDENT-ON* */ + + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + +reply: + REPLY_MACRO (VL_API_MEMIF_DELETE_REPLY); +} + +static void +send_memif_details (unix_shared_memory_queue_t * q, + memif_if_t * mif, + vnet_sw_interface_t * swif, + u8 * interface_name, u32 context) +{ + vl_api_memif_details_t *mp; + vnet_main_t *vnm = vnet_get_main (); + memif_main_t *mm = &memif_main; + vnet_hw_interface_t *hwif; + + hwif = vnet_get_sup_hw_interface (vnm, swif->sw_if_index); + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->_vl_msg_id = htons (VL_API_MEMIF_DETAILS + mm->msg_id_base); + mp->context = context; + + mp->sw_if_index = htonl (swif->sw_if_index); + strncpy ((char *) mp->if_name, + (char *) interface_name, ARRAY_LEN (mp->if_name) - 1); + memcpy (mp->hw_addr, hwif->hw_address, ARRAY_LEN (mp->hw_addr)); + + mp->key = clib_host_to_net_u64 (mif->key); + mp->role = (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) ? 1 : 0; + strncpy ((char *) mp->socket_filename, + (char *) mif->socket_filename, + ARRAY_LEN (mp->socket_filename) - 1); + + mp->ring_size = htonl (1 << mif->log2_ring_size); + mp->buffer_size = htons (mif->buffer_size); + + mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0; + mp->link_up_down = (hwif->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? 1 : 0; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +/** + * @brief Message handler for memif_dump API. + * @param mp vl_api_memif_dump_t * mp the api message + */ +void +vl_api_memif_dump_t_handler (vl_api_memif_dump_t * mp) +{ + memif_main_t *mm = &memif_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *swif; + memif_if_t *mif; + u8 *if_name = 0; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (mif, mm->interfaces, + ({ + swif = vnet_get_sw_interface (vnm, mif->sw_if_index); + + if_name = format (if_name, "%U%c", + format_vnet_sw_interface_name, + vnm, swif, 0); + + send_memif_details (q, mif, swif, if_name, mp->context); + _vec_len (if_name) = 0; + })); + /* *INDENT-ON* */ + + vec_free (if_name); +} + +/** + * @brief Message handler for memif_details API. + * @param mp vl_api_memif_details_t * mp the api message + */ +void +vl_api_memif_details_t_handler (vl_api_memif_details_t * mp) +{ + clib_warning ("BUG"); +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (memif_main_t * mm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + mm->msg_id_base); + foreach_vl_msg_name_crc_memif; +#undef _ +} + +/* Set up the API message handling tables */ +clib_error_t * +memif_plugin_api_hookup (vlib_main_t * vm) +{ + memif_main_t *mm = &memif_main; + api_main_t *am = &api_main; + u8 *name; + + /* Construct the API name */ + name = format (0, "memif_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + mm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + mm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_memif_plugin_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (mm, am); + + vec_free (name); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif_msg_enum.h b/src/plugins/memif/memif_msg_enum.h new file mode 100644 index 00000000000..74efee00604 --- /dev/null +++ b/src/plugins/memif/memif_msg_enum.h @@ -0,0 +1,31 @@ +/* + * memif_msg_enum.h - vpp engine plug-in message enumeration + * + * Copyright (c) + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_memif_msg_enum_h +#define included_memif_msg_enum_h + +#include + +#define vl_msg_id(n,h) n, +typedef enum +{ +#include + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_memif_msg_enum_h */ diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c new file mode 100644 index 00000000000..298472b04b7 --- /dev/null +++ b/src/plugins/memif/node.c @@ -0,0 +1,383 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define foreach_memif_input_error + +typedef enum +{ +#define _(f,s) MEMIF_INPUT_ERROR_##f, + foreach_memif_input_error +#undef _ + MEMIF_INPUT_N_ERROR, +} memif_input_error_t; + +static char *memif_input_error_strings[] = { +#define _(n,s) s, + foreach_memif_input_error +#undef _ +}; + +typedef struct +{ + u32 next_index; + u32 hw_if_index; + u16 ring; +} memif_input_trace_t; + +static u8 * +format_memif_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + memif_input_trace_t *t = va_arg (*args, memif_input_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "memif: hw_if_index %d next-index %d", + t->hw_if_index, t->next_index); + s = format (s, "\n%Uslot: ring %u", format_white_space, indent + 2, + t->ring); + return s; +} + +static_always_inline void +memif_prefetch (vlib_main_t * vm, u32 bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_prefetch_buffer_header (b, STORE); + CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE); +} + +static_always_inline uword +memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, memif_if_t * mif, + memif_ring_type_t type) +{ + vnet_main_t *vnm = vnet_get_main (); + u8 rid = 0; /* Ring id */ + memif_ring_t *ring = memif_get_ring (mif, type, rid); + memif_ring_data_t *rd = + vec_elt_at_index (mif->ring_data, rid + type * mif->num_s2m_rings); + u16 head; + + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + uword n_trace = vlib_get_trace_count (vm, node); + memif_main_t *nm = &memif_main; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u32 *to_next = 0; + u32 n_free_bufs; + u32 cpu_index = os_get_cpu_number (); + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u16 ring_size = 1 << mif->log2_ring_size; + u16 mask = ring_size - 1; + u16 num_slots; + void *mb0, *mb1; + + if (mif->per_interface_next_index != ~0) + next_index = mif->per_interface_next_index; + + n_free_bufs = vec_len (nm->rx_buffers[cpu_index]); + if (PREDICT_FALSE (n_free_bufs < ring_size)) + { + vec_validate (nm->rx_buffers[cpu_index], ring_size + n_free_bufs - 1); + n_free_bufs += + vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs], + ring_size); + _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs; + } + + head = ring->head; + if (head == rd->last_head) + return 0; + + if (head > rd->last_head) + num_slots = head - rd->last_head; + else + num_slots = ring_size - rd->last_head + head; + + while (num_slots) + { + u32 n_left_to_next; + u32 next0 = next_index; + u32 next1 = next_index; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (num_slots > 5 && n_left_to_next > 2) + { + if (PREDICT_TRUE (rd->last_head + 5 < ring_size)) + { + CLIB_PREFETCH (memif_get_buffer (mif, ring, rd->last_head + 2), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (memif_get_buffer (mif, ring, rd->last_head + 3), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&ring->desc[rd->last_head + 4], + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&ring->desc[rd->last_head + 5], + CLIB_CACHE_LINE_BYTES, LOAD); + } + else + { + CLIB_PREFETCH (memif_get_buffer + (mif, ring, (rd->last_head + 2) % mask), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (memif_get_buffer + (mif, ring, (rd->last_head + 3) % mask), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&ring->desc[(rd->last_head + 4) % mask], + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&ring->desc[(rd->last_head + 5) % mask], + CLIB_CACHE_LINE_BYTES, LOAD); + } + /* get empty buffer */ + u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1; + bi0 = nm->rx_buffers[cpu_index][last_buf]; + bi1 = nm->rx_buffers[cpu_index][last_buf - 1]; + _vec_len (nm->rx_buffers[cpu_index]) -= 2; + + if (last_buf > 4) + { + memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 2]); + memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 3]); + } + + /* enqueue buffer */ + to_next[0] = bi0; + to_next[1] = bi1; + to_next += 2; + n_left_to_next -= 2; + + /* fill buffer metadata */ + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = mif->sw_if_index; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = mif->sw_if_index; + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + /* copy buffer */ + mb0 = memif_get_buffer (mif, ring, rd->last_head); + clib_memcpy (vlib_buffer_get_current (b0), mb0, + CLIB_CACHE_LINE_BYTES); + b0->current_length = ring->desc[rd->last_head].length; + rd->last_head = (rd->last_head + 1) & mask; + + mb1 = memif_get_buffer (mif, ring, rd->last_head); + clib_memcpy (vlib_buffer_get_current (b1), mb1, + CLIB_CACHE_LINE_BYTES); + b1->current_length = ring->desc[rd->last_head].length; + rd->last_head = (rd->last_head + 1) & mask; + + if (b0->current_length > CLIB_CACHE_LINE_BYTES) + clib_memcpy (vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES, + mb0 + CLIB_CACHE_LINE_BYTES, + b0->current_length - CLIB_CACHE_LINE_BYTES); + + if (b1->current_length > CLIB_CACHE_LINE_BYTES) + clib_memcpy (vlib_buffer_get_current (b1) + CLIB_CACHE_LINE_BYTES, + mb1 + CLIB_CACHE_LINE_BYTES, + b1->current_length - CLIB_CACHE_LINE_BYTES); + + /* trace */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1); + + if (PREDICT_FALSE (n_trace > 0)) + { + if (b0) + { + memif_input_trace_t *tr; + vlib_trace_buffer (vm, node, next0, b0, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = mif->hw_if_index; + tr->ring = rid; + } + + if (n_trace && b1) + { + memif_input_trace_t *tr; + vlib_trace_buffer (vm, node, next1, b1, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, b1, sizeof (*tr)); + tr->next_index = next1; + tr->hw_if_index = mif->hw_if_index; + tr->ring = rid; + } + } + + /* redirect if feature path enabled */ + vnet_feature_start_device_input_x2 (mif->sw_if_index, + &next0, &next1, b0, b1); + + /* enqueue */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, + bi0, bi1, next0, next1); + + /* next packet */ + num_slots -= 2; + n_rx_packets += 2; + n_rx_bytes += b0->current_length; + n_rx_bytes += b1->current_length; + } + while (num_slots && n_left_to_next) + { + /* get empty buffer */ + u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1; + bi0 = nm->rx_buffers[cpu_index][last_buf]; + _vec_len (nm->rx_buffers[cpu_index]) = last_buf; + + /* enqueue buffer */ + to_next[0] = bi0; + to_next += 1; + n_left_to_next--; + + /* fill buffer metadata */ + b0 = vlib_get_buffer (vm, bi0); + b0->current_length = ring->desc[rd->last_head].length; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = mif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + /* copy buffer */ + mb0 = memif_get_buffer (mif, ring, rd->last_head); + clib_memcpy (vlib_buffer_get_current (b0), mb0, + CLIB_CACHE_LINE_BYTES); + if (b0->current_length > CLIB_CACHE_LINE_BYTES) + clib_memcpy (vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES, + mb0 + CLIB_CACHE_LINE_BYTES, + b0->current_length - CLIB_CACHE_LINE_BYTES); + + /* trace */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + + if (PREDICT_FALSE (n_trace > 0)) + { + if (b0) + { + memif_input_trace_t *tr; + vlib_trace_buffer (vm, node, next0, b0, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = mif->hw_if_index; + tr->ring = rid; + } + } + + + /* redirect if feature path enabled */ + vnet_feature_start_device_input_x1 (mif->sw_if_index, &next0, b0); + + /* enqueue */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + + /* next packet */ + rd->last_head = (rd->last_head + 1) & mask; + num_slots--; + n_rx_packets++; + n_rx_bytes += b0->current_length; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + CLIB_MEMORY_STORE_BARRIER (); + ring->tail = head; + + vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, cpu_index, + mif->hw_if_index, n_rx_packets, + n_rx_bytes); + + return n_rx_packets; +} + +static uword +memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_rx_packets = 0; + u32 cpu_index = os_get_cpu_number (); + memif_main_t *nm = &memif_main; + memif_if_t *mif; + + /* *INDENT-OFF* */ + pool_foreach (mif, nm->interfaces, + ({ + if (mif->flags & MEMIF_IF_FLAG_ADMIN_UP && + mif->flags & MEMIF_IF_FLAG_CONNECTED && + (mif->if_index % nm->input_cpu_count) == + (cpu_index - nm->input_cpu_first_index)) + { + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + n_rx_packets += + memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_M2S); + else + n_rx_packets += + memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_S2M); + } + })); + /* *INDENT-ON* */ + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (memif_input_node) = { + .function = memif_input_fn, + .name = "memif-input", + .sibling_of = "device-input", + .format_trace = format_memif_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .n_errors = MEMIF_INPUT_N_ERROR, + .error_strings = memif_input_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (memif_input_node, memif_input_fn) +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/clib.h b/src/vppinfra/clib.h index 0386c756833..fbb2a21c6b9 100644 --- a/src/vppinfra/clib.h +++ b/src/vppinfra/clib.h @@ -100,6 +100,12 @@ /* Full memory barrier (read and write). */ #define CLIB_MEMORY_BARRIER() __sync_synchronize () +#if __x86_64__ +#define CLIB_MEMORY_STORE_BARRIER() __builtin_ia32_sfence () +#else +#define CLIB_MEMORY_STORE_BARRIER() __sync_synchronize () +#endif + /* Arranges for function to be called before main. */ #define INIT_FUNCTION(decl) \ decl __attribute ((constructor)); \ -- cgit 1.2.3-korg