diff options
Diffstat (limited to 'src/plugins/memif')
-rw-r--r-- | src/plugins/memif/cli.c | 365 | ||||
-rw-r--r-- | src/plugins/memif/device.c | 380 | ||||
-rw-r--r-- | src/plugins/memif/memif.api | 124 | ||||
-rw-r--r-- | src/plugins/memif/memif.c | 819 | ||||
-rw-r--r-- | src/plugins/memif/memif.h | 185 | ||||
-rw-r--r-- | src/plugins/memif/memif_all_api_h.h | 18 | ||||
-rw-r--r-- | src/plugins/memif/memif_api.c | 350 | ||||
-rw-r--r-- | src/plugins/memif/memif_msg_enum.h | 31 | ||||
-rw-r--r-- | src/plugins/memif/memif_test.c | 372 | ||||
-rw-r--r-- | src/plugins/memif/node.c | 533 | ||||
-rw-r--r-- | src/plugins/memif/private.h | 261 | ||||
-rw-r--r-- | src/plugins/memif/socket.c | 740 |
12 files changed, 4178 insertions, 0 deletions
diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c new file mode 100644 index 00000000..e1bd0444 --- /dev/null +++ b/src/plugins/memif/cli.c @@ -0,0 +1,365 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <inttypes.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <memif/memif.h> +#include <memif/private.h> + +static clib_error_t * +memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + int r; + u32 ring_size = MEMIF_DEFAULT_RING_SIZE; + memif_create_if_args_t args = { 0 }; + args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE; + u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES; + u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "id %u", &args.id)) + ; + else if (unformat (line_input, "socket %s", &args.socket_filename)) + ; + else if (unformat (line_input, "secret %s", &args.secret)) + ; + else if (unformat (line_input, "ring-size %u", &ring_size)) + ; + else if (unformat (line_input, "rx-queues %u", &rx_queues)) + ; + else if (unformat (line_input, "tx-queues %u", &tx_queues)) + ; + else if (unformat (line_input, "buffer-size %u", &args.buffer_size)) + ; + else if (unformat (line_input, "master")) + args.is_master = 1; + else if (unformat (line_input, "slave")) + args.is_master = 0; + else if (unformat (line_input, "mode ip")) + args.mode = MEMIF_INTERFACE_MODE_IP; + else if (unformat (line_input, "hw-addr %U", + unformat_ethernet_address, args.hw_addr)) + args.hw_addr_set = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (!is_pow2 (ring_size)) + return clib_error_return (0, "ring size must be power of 2"); + + args.log2_ring_size = min_log2 (ring_size); + + if (rx_queues > 255 || rx_queues < 1) + return clib_error_return (0, "rx queue must be between 1 - 255"); + if (tx_queues > 255 || tx_queues < 1) + return clib_error_return (0, "tx queue must be between 1 - 255"); + + args.rx_queues = rx_queues; + args.tx_queues = tx_queues; + + r = memif_create_if (vm, &args); + + vec_free (args.socket_filename); + vec_free (args.secret); + + if (r <= VNET_API_ERROR_SYSCALL_ERROR_1 + && r >= VNET_API_ERROR_SYSCALL_ERROR_10) + return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + + if (r == VNET_API_ERROR_INVALID_INTERFACE) + return clib_error_return (0, "Invalid interface name"); + + if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) + return clib_error_return (0, "Interface with same id already exists"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (memif_create_command, static) = { + .path = "create memif", + .short_help = "create memif [id <id>] [socket <path>] " + "[ring-size <size>] [buffer-size <size>] [hw-addr <mac-address>] " + "<master|slave> [rx-queues <number>] [tx-queues <number>] " + "[mode ip] [secret <string>]", + .function = memif_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index = ~0; + vnet_hw_interface_t *hw; + memif_main_t *mm = &memif_main; + memif_if_t *mif; + vnet_main_t *vnm = vnet_get_main (); + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (sw_if_index == ~0) + return clib_error_return (0, + "please specify interface name or sw_if_index"); + + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hw == NULL || memif_device_class.index != hw->dev_class_index) + return clib_error_return (0, "not a memif interface"); + + mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + memif_delete_if (vm, mif); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (memif_delete_command, static) = { + .path = "delete memif", + .short_help = "delete memif {<interface> | sw_if_index <sw_idx>}", + .function = memif_delete_command_fn, +}; +/* *INDENT-ON* */ + +static u8 * +format_memif_if_flags (u8 * s, va_list * args) +{ + u32 flags = va_arg (*args, u32); +#define _(a,b,c) if ( flags & (1 << a)) s = format (s, " %s", c); + foreach_memif_if_flag +#undef _ + return s; +} + +static u8 * +format_memif_if_mode (u8 * s, va_list * args) +{ + memif_if_t *mif = va_arg (*args, memif_if_t *); + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) + return format (s, "ethernet"); + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + return format (s, "ip"); + if (mif->mode == MEMIF_INTERFACE_MODE_PUNT_INJECT) + return format (s, "punt-inject"); + return format (s, "unknown mode (%u)", mif->mode);; +} + +static u8 * +format_memif_queue (u8 * s, va_list * args) +{ + memif_if_t *mif = va_arg (*args, memif_if_t *); + memif_queue_t *mq = va_arg (*args, memif_queue_t *); + uword i = va_arg (*args, uword); + uword indent = format_get_indent (s); + + s = format (s, "%U%s ring %u:\n", + format_white_space, indent, + (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) ? + "slave-to-master" : "master-to-slave", i); + s = format (s, "%Uregion %u offset %u ring-size %u int-fd %d\n", + format_white_space, indent + 4, + mq->region, mq->offset, (1 << mq->log2_ring_size), mq->int_fd); + + if (mq->ring) + s = format (s, "%Uhead %u tail %u flags 0x%04x interrupts %u\n", + format_white_space, indent + 4, + mq->ring->head, mq->ring->tail, mq->ring->flags, + mq->int_count); + + return s; +} + +static u8 * +format_memif_descriptor (u8 * s, va_list * args) +{ + memif_if_t *mif = va_arg (*args, memif_if_t *); + memif_queue_t *mq = va_arg (*args, memif_queue_t *); + uword indent = format_get_indent (s); + memif_ring_t *ring; + u16 ring_size; + u16 slot; + + ring_size = 1 << mq->log2_ring_size; + ring = mq->ring; + if (ring) + { + s = format (s, "%Udescriptor table:\n", format_white_space, indent); + s = + format (s, + "%Uid flags buf len desc len address offset user address\n", + format_white_space, indent); + s = + format (s, + "%U===== ===== ======= ======== ================== ====== ==================\n", + format_white_space, indent); + for (slot = 0; slot < ring_size; slot++) + { + s = format (s, "%U%-5d %-5d %-7d %-7d 0x%016lx %-6d 0x%016lx\n", + format_white_space, indent, slot, + ring->desc[slot].flags, ring->desc[slot].buffer_length, + ring->desc[slot].length, + mif->regions[ring->desc[slot].region].shm, + ring->desc[slot].offset, memif_get_buffer (mif, ring, + slot)); + } + s = format (s, "\n"); + } + + return s; +} + +static clib_error_t * +memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif; + vnet_main_t *vnm = vnet_get_main (); + memif_queue_t *mq; + uword i; + int show_descr = 0; + clib_error_t *error = 0; + u32 hw_if_index, *hw_if_indices = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + vec_add1 (hw_if_indices, hw_if_index); + else if (unformat (input, "descriptors")) + show_descr = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (vec_len (hw_if_indices) == 0) + { + /* *INDENT-OFF* */ + pool_foreach (mif, mm->interfaces, + vec_add1 (hw_if_indices, mif->hw_if_index); + ); + /* *INDENT-ON* */ + } + + for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++) + { + vnet_hw_interface_t *hi = + vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]); + mif = pool_elt_at_index (mm->interfaces, hi->dev_instance); + memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files, + mif->socket_file_index); + vlib_cli_output (vm, "interface %U", format_vnet_sw_if_index_name, + vnm, mif->sw_if_index); + if (mif->remote_name) + vlib_cli_output (vm, " remote-name \"%s\"", mif->remote_name); + if (mif->remote_if_name) + vlib_cli_output (vm, " remote-interface \"%s\"", + mif->remote_if_name); + vlib_cli_output (vm, " id %d mode %U file %s", mif->id, + format_memif_if_mode, mif, msf->filename); + vlib_cli_output (vm, " flags%U", format_memif_if_flags, mif->flags); + vlib_cli_output (vm, " listener-fd %d conn-fd %d", msf->fd, + mif->conn_fd); + vlib_cli_output (vm, + " num-s2m-rings %u num-m2s-rings %u buffer-size %u", + mif->run.num_s2m_rings, mif->run.num_m2s_rings, + mif->run.buffer_size); + + if (mif->local_disc_string) + vlib_cli_output (vm, " local-disc-reason \"%s\"", + mif->local_disc_string); + if (mif->remote_disc_string) + vlib_cli_output (vm, " remote-disc-reason \"%s\"", + mif->remote_disc_string); + + vec_foreach_index (i, mif->tx_queues) + { + mq = vec_elt_at_index (mif->tx_queues, i); + vlib_cli_output (vm, " %U", format_memif_queue, mif, mq, i); + if (show_descr) + vlib_cli_output (vm, " %U", format_memif_descriptor, mif, mq); + } + vec_foreach_index (i, mif->rx_queues) + { + mq = vec_elt_at_index (mif->rx_queues, i); + vlib_cli_output (vm, " %U", format_memif_queue, mif, mq, i); + if (show_descr) + vlib_cli_output (vm, " %U", format_memif_descriptor, mif, mq); + } + } +done: + vec_free (hw_if_indices); + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (memif_show_command, static) = { + .path = "show memif", + .short_help = "show memif {<interface>] [descriptors]", + .function = memif_show_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +memif_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (memif_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c new file mode 100644 index 00000000..aff18f2d --- /dev/null +++ b/src/plugins/memif/device.c @@ -0,0 +1,380 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/uio.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <memif/memif.h> +#include <memif/private.h> + +#define foreach_memif_tx_func_error \ +_(NO_FREE_SLOTS, "no free tx slots") \ +_(TRUNC_PACKET, "packet > buffer size -- truncated in tx ring") \ +_(PENDING_MSGS, "pending msgs in tx ring") \ +_(NO_TX_QUEUES, "no tx queues") + +typedef enum +{ +#define _(f,s) MEMIF_TX_ERROR_##f, + foreach_memif_tx_func_error +#undef _ + MEMIF_TX_N_ERROR, +} memif_tx_func_error_t; + +static char *memif_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_memif_tx_func_error +#undef _ +}; + +u8 * +format_memif_device_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + memif_main_t *mm = &memif_main; + memif_if_t *mif = pool_elt_at_index (mm->interfaces, dev_instance); + + s = format (s, "memif%lu/%lu", mif->socket_file_index, mif->id); + return s; +} + +static u8 * +format_memif_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + uword indent = format_get_indent (s); + + s = format (s, "MEMIF interface"); + if (verbose) + { + s = format (s, "\n%U instance %u", format_white_space, indent + 2, + dev_instance); + } + return s; +} + +static u8 * +format_memif_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + +static_always_inline void +memif_prefetch_buffer_and_data (vlib_main_t * vm, u32 bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_prefetch_buffer_header (b, LOAD); + CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, LOAD); +} + +/** + * @brief Copy buffer to tx ring + * + * @param * vm (in) + * @param * node (in) + * @param * mif (in) pointer to memif interface + * @param bi (in) vlib buffer index + * @param * ring (in) pointer to memif ring + * @param * head (in/out) ring head + * @param mask (in) ring size - 1 + */ +static_always_inline void +memif_copy_buffer_to_tx_ring (vlib_main_t * vm, vlib_node_runtime_t * node, + memif_if_t * mif, u32 bi, memif_ring_t * ring, + u16 * head, u16 mask) +{ + vlib_buffer_t *b0; + void *mb0; + u32 total = 0, len; + + mb0 = memif_get_buffer (mif, ring, *head); + ring->desc[*head].flags = 0; + do + { + b0 = vlib_get_buffer (vm, bi); + len = b0->current_length; + if (PREDICT_FALSE (ring->desc[*head].buffer_length < (total + len))) + { + if (PREDICT_TRUE (total)) + { + ring->desc[*head].length = total; + total = 0; + ring->desc[*head].flags |= MEMIF_DESC_FLAG_NEXT; + *head = (*head + 1) & mask; + mb0 = memif_get_buffer (mif, ring, *head); + ring->desc[*head].flags = 0; + } + } + if (PREDICT_TRUE (ring->desc[*head].buffer_length >= (total + len))) + { + clib_memcpy (mb0 + total, vlib_buffer_get_current (b0), + CLIB_CACHE_LINE_BYTES); + if (len > CLIB_CACHE_LINE_BYTES) + clib_memcpy (mb0 + CLIB_CACHE_LINE_BYTES + total, + vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES, + len - CLIB_CACHE_LINE_BYTES); + total += len; + } + else + { + vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_TRUNC_PACKET, + 1); + break; + } + } + while ((bi = (b0->flags & VLIB_BUFFER_NEXT_PRESENT) ? b0->next_buffer : 0)); + + if (PREDICT_TRUE (total)) + { + ring->desc[*head].length = total; + *head = (*head + 1) & mask; + } +} + +static_always_inline uword +memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, memif_if_t * mif, + memif_ring_type_t type) +{ + u8 qid; + memif_ring_t *ring; + u32 *buffers = vlib_frame_args (frame); + u32 n_left = frame->n_vectors; + u16 ring_size, mask; + u16 head, tail; + u16 free_slots; + u32 thread_index = vlib_get_thread_index (); + u8 tx_queues = vec_len (mif->tx_queues); + memif_queue_t *mq; + + if (PREDICT_FALSE (tx_queues == 0)) + { + vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_TX_QUEUES, + n_left); + goto error; + } + + if (tx_queues < vec_len (vlib_mains)) + { + qid = thread_index % tx_queues; + clib_spinlock_lock_if_init (&mif->lockp); + } + else + { + qid = thread_index; + } + mq = vec_elt_at_index (mif->tx_queues, qid); + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + /* free consumed buffers */ + + head = ring->head; + tail = ring->tail; + + if (tail > head) + free_slots = tail - head; + else + free_slots = ring_size - head + tail; + + while (n_left > 5 && free_slots > 1) + { + if (PREDICT_TRUE (head + 5 < ring_size)) + { + CLIB_PREFETCH (memif_get_buffer (mif, ring, head + 2), + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (memif_get_buffer (mif, ring, head + 3), + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (&ring->desc[head + 4], CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (&ring->desc[head + 5], CLIB_CACHE_LINE_BYTES, STORE); + } + else + { + CLIB_PREFETCH (memif_get_buffer (mif, ring, (head + 2) % mask), + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (memif_get_buffer (mif, ring, (head + 3) % mask), + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (&ring->desc[(head + 4) % mask], + CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (&ring->desc[(head + 5) % mask], + CLIB_CACHE_LINE_BYTES, STORE); + } + + memif_prefetch_buffer_and_data (vm, buffers[2]); + memif_prefetch_buffer_and_data (vm, buffers[3]); + + memif_copy_buffer_to_tx_ring (vm, node, mif, buffers[0], ring, &head, + mask); + memif_copy_buffer_to_tx_ring (vm, node, mif, buffers[1], ring, &head, + mask); + + buffers += 2; + n_left -= 2; + free_slots -= 2; + } + + while (n_left && free_slots) + { + memif_copy_buffer_to_tx_ring (vm, node, mif, buffers[0], ring, &head, + mask); + buffers++; + n_left--; + free_slots--; + } + + CLIB_MEMORY_STORE_BARRIER (); + ring->head = head; + + clib_spinlock_unlock_if_init (&mif->lockp); + + if (n_left) + { + vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS, + n_left); + } + + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1) + { + u64 b = 1; + CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b)); + mq->int_count++; + } + +error: + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + + return frame->n_vectors; +} + +static uword +memif_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + memif_main_t *nm = &memif_main; + vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; + memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance); + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_S2M); + else + return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_M2S); +} + +static void +memif_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + memif_main_t *apm = &memif_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + memif_if_t *mif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + mif->per_interface_next_index = node_index; + return; + } + + mif->per_interface_next_index = + vlib_node_add_next (vlib_get_main (), memif_input_node.index, node_index); +} + +static void +memif_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, + vnet_hw_interface_rx_mode mode) +{ + memif_main_t *mm = &memif_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT; + else + mq->ring->flags &= ~MEMIF_RING_FLAG_MASK_INT; + + return 0; +} + +static clib_error_t * +memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + memif_main_t *mm = &memif_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + static clib_error_t *error = 0; + + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + mif->flags |= MEMIF_IF_FLAG_ADMIN_UP; + else + mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP; + + return error; +} + +static clib_error_t * +memif_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (memif_device_class) = { + .name = "memif", + .tx_function = memif_interface_tx, + .format_device_name = format_memif_device_name, + .format_device = format_memif_device, + .format_tx_trace = format_memif_tx_trace, + .tx_function_n_errors = MEMIF_TX_N_ERROR, + .tx_function_error_strings = memif_tx_func_error_strings, + .rx_redirect_to_node = memif_set_interface_next_node, + .clear_counters = memif_clear_hw_interface_counters, + .admin_up_down_function = memif_interface_admin_up_down, + .subif_add_del_function = memif_subif_add_del_function, + .rx_mode_change_function = memif_interface_rx_mode_change, +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH(memif_device_class, + memif_interface_tx) +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api new file mode 100644 index 00000000..c9632d10 --- /dev/null +++ b/src/plugins/memif/memif.api @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Create memory interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param role - role of the interface in the connection (master/slave) + @param mode - interface mode + @param rx_queues - number of rx queues (only valid for slave) + #param tx_queues - number of tx queues (only valid for slave) + @param id - 32bit integer used to authenticate and match opposite sides + of the connection + @param socket_filename - filename of the socket to be used for connection + establishment + @param ring_size - the number of entries of RX/TX rings + @param buffer_size - size of the buffer allocated for each ring entry + @param hw_addr - interface MAC address +*/ +define memif_create +{ + u32 client_index; + u32 context; + + u8 role; /* 0 = master, 1 = slave */ + u8 mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */ + u8 rx_queues; /* optional, default is 1 */ + u8 tx_queues; /* optional, default is 1 */ + u32 id; /* optional, default is 0 */ + u8 socket_filename[128]; /* optional, default is "/var/vpp/memif.sock" */ + u8 secret[24]; /* optional, default is "" */ + u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */ + u16 buffer_size; /* optional, default is 2048 bytes */ + u8 hw_addr[6]; /* optional, randomly generated if not defined */ +}; + +/** \brief Create memory interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request + @param sw_if_index - software index of the newly created interface +*/ +define memif_create_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Delete memory interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - software index of the interface to delete +*/ +autoreply define memif_delete +{ + u32 client_index; + u32 context; + + u32 sw_if_index; +}; + +/** \brief Memory interface details structure + @param context - sender context, to match reply w/ request (memif_dump) + @param sw_if_index - index of the interface + @param if_name - name of the interface + @param hw_addr - interface MAC address + @param id - id associated with the interface + @param role - role of the interface in the connection (master/slave) + @param mode - interface mode + @param socket_filename - name of the socket used by this interface + to establish new connections + @param ring_size - the number of entries of RX/TX rings + @param buffer_size - size of the buffer allocated for each ring entry + @param admin_up_down - interface administrative status + @param link_up_down - interface link status + +*/ +define memif_details +{ + u32 context; + + u32 sw_if_index; + u8 if_name[64]; + u8 hw_addr[6]; + + /* memif specific parameters */ + u32 id; + u8 role; /* 0 = master, 1 = slave */ + u8 mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */ + u8 socket_filename[128]; + u32 ring_size; + u16 buffer_size; /* optional, default is 2048 bytes */ + + /* 1 = up, 0 = down */ + u8 admin_up_down; + u8 link_up_down; +}; + +/** \brief Dump all memory interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define memif_dump +{ + u32 client_index; + u32 context; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c new file mode 100644 index 00000000..a3be49fa --- /dev/null +++ b/src/plugins/memif/memif.c @@ -0,0 +1,819 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + + +#define _GNU_SOURCE +#include <stdint.h> +#include <net/if.h> +#include <sys/types.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/uio.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/eventfd.h> +#include <inttypes.h> +#include <limits.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vppinfra/linux/syscall.h> +#include <vnet/plugin/plugin.h> +#include <vnet/ethernet/ethernet.h> +#include <vpp/app/version.h> +#include <memif/memif.h> +#include <memif/private.h> + +memif_main_t memif_main; + +static u32 +memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) +{ + /* nothing for now */ + return 0; +} + +static void +memif_queue_intfd_close (memif_queue_t * mq) +{ + if (mq->int_clib_file_index != ~0) + { + memif_file_del_by_index (mq->int_clib_file_index); + mq->int_clib_file_index = ~0; + mq->int_fd = -1; + } + else if (mq->int_fd > -1) + { + close (mq->int_fd); + mq->int_fd = -1; + } +} + +void +memif_disconnect (memif_if_t * mif, clib_error_t * err) +{ + memif_main_t *mm = &memif_main; + vnet_main_t *vnm = vnet_get_main (); + memif_region_t *mr; + memif_queue_t *mq; + int i; + + if (mif == 0) + return; + + DBG ("disconnect %u (%v)", mif->dev_instance, err ? err->what : 0); + + if (err) + { + clib_error_t *e = 0; + mif->local_disc_string = vec_dup (err->what); + if (mif->conn_fd > -1) + e = memif_msg_send_disconnect (mif, err); + clib_error_free (e); + } + + /* set interface down */ + mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING); + if (mif->hw_if_index != ~0) + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); + + /* close connection socket */ + if (mif->conn_clib_file_index != ~0) + { + memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files, + mif->socket_file_index); + hash_unset (msf->dev_instance_by_fd, mif->conn_fd); + memif_file_del_by_index (mif->conn_clib_file_index); + mif->conn_clib_file_index = ~0; + } + else if (mif->conn_fd > -1) + close (mif->conn_fd); + mif->conn_fd = -1; + + vec_foreach_index (i, mif->rx_queues) + { + mq = vec_elt_at_index (mif->rx_queues, i); + if (mq->ring) + { + int rv; + rv = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, i); + if (rv) + DBG ("Warning: unable to unassign interface %d, " + "queue %d: rc=%d", mif->hw_if_index, i, rv); + mq->ring = 0; + } + } + + /* free tx and rx queues */ + vec_foreach (mq, mif->rx_queues) memif_queue_intfd_close (mq); + vec_free (mif->rx_queues); + + vec_foreach (mq, mif->tx_queues) memif_queue_intfd_close (mq); + vec_free (mif->tx_queues); + + /* free memory regions */ + vec_foreach (mr, mif->regions) + { + int rv; + if ((rv = munmap (mr->shm, mr->region_size))) + clib_warning ("munmap failed, rv = %d", rv); + if (mr->fd > -1) + close (mr->fd); + } + vec_free (mif->regions); + + mif->remote_pid = 0; + vec_free (mif->remote_name); + vec_free (mif->remote_if_name); + clib_fifo_free (mif->msg_queue); +} + +static clib_error_t * +memif_int_fd_read_ready (clib_file_t * uf) +{ + memif_main_t *mm = &memif_main; + vnet_main_t *vnm = vnet_get_main (); + u16 qid = uf->private_data & 0xFFFF; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 16); + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + u64 b; + ssize_t size; + + size = read (uf->file_descriptor, &b, sizeof (b)); + if (size < 0) + { + DBG_UNIX_LOG ("Failed to read from socket"); + return 0; + } + + vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, qid); + mq->int_count++; + + return 0; +} + + +clib_error_t * +memif_connect (memif_if_t * mif) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_file_t template = { 0 }; + memif_region_t *mr; + int i; + + DBG ("connect %u", mif->dev_instance); + + vec_free (mif->local_disc_string); + vec_free (mif->remote_disc_string); + + vec_foreach (mr, mif->regions) + { + if (mr->shm) + continue; + + if (mr->fd < 0) + clib_error_return (0, "no memory region fd"); + + if ((mr->shm = mmap (NULL, mr->region_size, PROT_READ | PROT_WRITE, + MAP_SHARED, mr->fd, 0)) == MAP_FAILED) + return clib_error_return_unix (0, "mmap"); + } + + template.read_function = memif_int_fd_read_ready; + + vec_foreach_index (i, mif->tx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); + + mq->ring = mif->regions[mq->region].shm + mq->offset; + if (mq->ring->cookie != MEMIF_COOKIE) + return clib_error_return (0, "wrong cookie on tx ring %u", i); + } + + vec_foreach_index (i, mif->rx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); + int rv; + + mq->ring = mif->regions[mq->region].shm + mq->offset; + if (mq->ring->cookie != MEMIF_COOKIE) + return clib_error_return (0, "wrong cookie on tx ring %u", i); + + if (mq->int_fd > -1) + { + template.file_descriptor = mq->int_fd; + template.private_data = (mif->dev_instance << 16) | (i & 0xFFFF); + memif_file_add (&mq->int_clib_file_index, &template); + } + vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0); + rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i, + VNET_HW_INTERFACE_RX_MODE_DEFAULT); + if (rv) + clib_warning + ("Warning: unable to set rx mode for interface %d queue %d: " + "rc=%d", mif->hw_if_index, i, rv); + else + { + vnet_hw_interface_rx_mode rxmode; + vnet_hw_interface_get_rx_mode (vnm, mif->hw_if_index, i, &rxmode); + + if (rxmode == VNET_HW_INTERFACE_RX_MODE_POLLING) + mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT; + } + } + + mif->flags &= ~MEMIF_IF_FLAG_CONNECTING; + mif->flags |= MEMIF_IF_FLAG_CONNECTED; + + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + return 0; +} + +static_always_inline memif_ring_t * +memif_get_ring (memif_if_t * mif, memif_ring_type_t type, u16 ring_num) +{ + if (vec_len (mif->regions) == 0) + return NULL; + void *p = mif->regions[0].shm; + int ring_size = + sizeof (memif_ring_t) + + sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size); + p += (ring_num + type * mif->run.num_s2m_rings) * ring_size; + + return (memif_ring_t *) p; +} + +clib_error_t * +memif_init_regions_and_queues (memif_if_t * mif) +{ + memif_ring_t *ring = NULL; + int i, j; + u64 buffer_offset; + memif_region_t *r; + clib_mem_vm_alloc_t alloc = { 0 }; + clib_error_t *err; + + vec_validate_aligned (mif->regions, 0, CLIB_CACHE_LINE_BYTES); + r = vec_elt_at_index (mif->regions, 0); + + buffer_offset = (mif->run.num_s2m_rings + mif->run.num_m2s_rings) * + (sizeof (memif_ring_t) + + sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size)); + + r->region_size = buffer_offset + + mif->run.buffer_size * (1 << mif->run.log2_ring_size) * + (mif->run.num_s2m_rings + mif->run.num_m2s_rings); + + alloc.name = "memif region"; + alloc.size = r->region_size; + alloc.flags = CLIB_MEM_VM_F_SHARED; + + err = clib_mem_vm_ext_alloc (&alloc); + if (err) + return err; + + r->fd = alloc.fd; + r->shm = alloc.addr; + + for (i = 0; i < mif->run.num_s2m_rings; i++) + { + ring = memif_get_ring (mif, MEMIF_RING_S2M, i); + ring->head = ring->tail = 0; + ring->cookie = MEMIF_COOKIE; + for (j = 0; j < (1 << mif->run.log2_ring_size); j++) + { + u16 slot = i * (1 << mif->run.log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = + buffer_offset + (u32) (slot * mif->run.buffer_size); + ring->desc[j].buffer_length = mif->run.buffer_size; + } + } + for (i = 0; i < mif->run.num_m2s_rings; i++) + { + ring = memif_get_ring (mif, MEMIF_RING_M2S, i); + ring->head = ring->tail = 0; + ring->cookie = MEMIF_COOKIE; + for (j = 0; j < (1 << mif->run.log2_ring_size); j++) + { + u16 slot = + (i + mif->run.num_s2m_rings) * (1 << mif->run.log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = + buffer_offset + (u32) (slot * mif->run.buffer_size); + ring->desc[j].buffer_length = mif->run.buffer_size; + } + } + + ASSERT (mif->tx_queues == 0); + vec_validate_aligned (mif->tx_queues, mif->run.num_s2m_rings - 1, + CLIB_CACHE_LINE_BYTES); + vec_foreach_index (i, mif->tx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); + if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) + return clib_error_return_unix (0, "eventfd[tx queue %u]", i); + mq->int_clib_file_index = ~0; + mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i); + mq->log2_ring_size = mif->cfg.log2_ring_size; + mq->region = 0; + mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm; + mq->last_head = 0; + } + + ASSERT (mif->rx_queues == 0); + vec_validate_aligned (mif->rx_queues, mif->run.num_m2s_rings - 1, + CLIB_CACHE_LINE_BYTES); + vec_foreach_index (i, mif->rx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); + if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) + return clib_error_return_unix (0, "eventfd[rx queue %u]", i); + mq->int_clib_file_index = ~0; + mq->ring = memif_get_ring (mif, MEMIF_RING_M2S, i); + mq->log2_ring_size = mif->cfg.log2_ring_size; + mq->region = 0; + mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm; + mq->last_head = 0; + } + + return 0; +} + +static uword +memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif; + struct sockaddr_un sun; + int sockfd; + uword *event_data = 0, event_type; + u8 enabled = 0; + f64 start_time, last_run_duration = 0, now; + + sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0); + if (sockfd < 0) + { + DBG_UNIX_LOG ("socket AF_UNIX"); + return 0; + } + sun.sun_family = AF_UNIX; + + while (1) + { + if (enabled) + vlib_process_wait_for_event_or_clock (vm, (f64) 3 - + last_run_duration); + else + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + switch (event_type) + { + case ~0: + break; + case MEMIF_PROCESS_EVENT_START: + enabled = 1; + break; + case MEMIF_PROCESS_EVENT_STOP: + enabled = 0; + continue; + default: + ASSERT (0); + } + + last_run_duration = start_time = vlib_time_now (vm); + /* *INDENT-OFF* */ + pool_foreach (mif, mm->interfaces, + ({ + memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index); + /* Allow no more than 10us without a pause */ + now = vlib_time_now (vm); + if (now > start_time + 10e-6) + { + vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */ + start_time = vlib_time_now (vm); + } + + if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) == 0) + continue; + + if (mif->flags & MEMIF_IF_FLAG_CONNECTING) + continue; + + if (mif->flags & MEMIF_IF_FLAG_CONNECTED) + continue; + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + { + strncpy (sun.sun_path, (char *) msf->filename, + sizeof (sun.sun_path) - 1); + + if (connect + (sockfd, (struct sockaddr *) &sun, + sizeof (struct sockaddr_un)) == 0) + { + clib_file_t t = { 0 }; + + mif->conn_fd = sockfd; + t.read_function = memif_slave_conn_fd_read_ready; + t.write_function = memif_slave_conn_fd_write_ready; + t.error_function = memif_slave_conn_fd_error; + t.file_descriptor = mif->conn_fd; + t.private_data = mif->dev_instance; + memif_file_add (&mif->conn_clib_file_index, &t); + hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); + + mif->flags |= MEMIF_IF_FLAG_CONNECTING; + + /* grab another fd */ + sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0); + if (sockfd < 0) + { + DBG_UNIX_LOG ("socket AF_UNIX"); + return 0; + } + } + } + })); + /* *INDENT-ON* */ + last_run_duration = vlib_time_now (vm) - last_run_duration; + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (memif_process_node,static) = { + .function = memif_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "memif-process", +}; +/* *INDENT-ON* */ + +int +memif_delete_if (vlib_main_t * vm, memif_if_t * mif) +{ + vnet_main_t *vnm = vnet_get_main (); + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + vec_elt_at_index (mm->socket_files, mif->socket_file_index); + clib_error_t *err; + + mif->flags |= MEMIF_IF_FLAG_DELETING; + vec_free (mif->local_disc_string); + vec_free (mif->remote_disc_string); + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); + vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0); + + err = clib_error_return (0, "interface deleted"); + memif_disconnect (mif, err); + clib_error_free (err); + + /* remove the interface */ + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + vnet_delete_hw_interface (vnm, mif->hw_if_index); + else + ethernet_delete_interface (vnm, mif->hw_if_index); + mif->hw_if_index = ~0; + + /* free interface data structures */ + clib_spinlock_free (&mif->lockp); + mhash_unset (&msf->dev_instance_by_id, &mif->id, 0); + + /* remove socket file */ + if (--(msf->ref_cnt) == 0) + { + if (msf->is_listener) + { + uword *x; + memif_file_del_by_index (msf->clib_file_index); + vec_foreach (x, msf->pending_file_indices) + { + memif_file_del_by_index (*x); + } + vec_free (msf->pending_file_indices); + } + mhash_free (&msf->dev_instance_by_id); + hash_free (msf->dev_instance_by_fd); + mhash_unset (&mm->socket_file_index_by_filename, msf->filename, 0); + vec_free (msf->filename); + pool_put (mm->socket_files, msf); + } + + memset (mif, 0, sizeof (*mif)); + pool_put (mm->interfaces, mif); + + if (pool_elts (mm->interfaces) == 0) + vlib_process_signal_event (vm, memif_process_node.index, + MEMIF_PROCESS_EVENT_STOP, 0); + + return 0; +} + +/* *INDENT-OFF* */ +VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) = +{ + .name = "memif-ip", + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, +}; +/* *INDENT-ON* */ + +int +memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) +{ + memif_main_t *mm = &memif_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_main_t *vnm = vnet_get_main (); + memif_if_t *mif = 0; + vnet_sw_interface_t *sw; + clib_error_t *error = 0; + int ret = 0; + uword *p; + vnet_hw_interface_t *hw; + memif_socket_file_t *msf = 0; + u8 *socket_filename; + int rv = 0; + + if (args->socket_filename == 0 || args->socket_filename[0] != '/') + { + clib_error_t *error; + error = vlib_unix_recursive_mkdir (vlib_unix_get_runtime_dir ()); + if (error) + { + clib_error_free (error); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + + if (args->socket_filename == 0) + socket_filename = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (), + MEMIF_DEFAULT_SOCKET_FILENAME, 0); + else + socket_filename = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (), + args->socket_filename, 0); + + } + else + socket_filename = vec_dup (args->socket_filename); + + p = mhash_get (&mm->socket_file_index_by_filename, socket_filename); + + if (p) + { + msf = vec_elt_at_index (mm->socket_files, p[0]); + + /* existing socket file can be either master or slave but cannot be both */ + if (!msf->is_listener != !args->is_master) + { + rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + goto done; + } + + p = mhash_get (&msf->dev_instance_by_id, &args->id); + if (p) + { + rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + goto done; + } + } + + /* Create new socket file */ + if (msf == 0) + { + struct stat file_stat; + /* If we are creating listener make sure file doesn't exist or if it + * exists thn delete it if it is old socket file */ + if (args->is_master && + (stat ((char *) socket_filename, &file_stat) == 0)) + { + if (S_ISSOCK (file_stat.st_mode)) + { + unlink ((char *) socket_filename); + } + else + { + error = clib_error_return (0, "File exists for %s", + socket_filename); + clib_error_report (error); + rv = VNET_API_ERROR_VALUE_EXIST; + goto done; + } + } + pool_get (mm->socket_files, msf); + memset (msf, 0, sizeof (memif_socket_file_t)); + mhash_init (&msf->dev_instance_by_id, sizeof (uword), + sizeof (memif_interface_id_t)); + msf->dev_instance_by_fd = hash_create (0, sizeof (uword)); + msf->filename = socket_filename; + msf->fd = -1; + msf->is_listener = (args->is_master != 0); + socket_filename = 0; + mhash_set (&mm->socket_file_index_by_filename, msf->filename, + msf - mm->socket_files, 0); + DBG ("creating socket file %s", msf->filename); + } + + pool_get (mm->interfaces, mif); + memset (mif, 0, sizeof (*mif)); + mif->dev_instance = mif - mm->interfaces; + mif->socket_file_index = msf - mm->socket_files; + mif->id = args->id; + mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0; + mif->conn_clib_file_index = ~0; + mif->conn_fd = -1; + mif->mode = args->mode; + if (args->secret) + mif->secret = vec_dup (args->secret); + + if (tm->n_vlib_mains > 1) + clib_spinlock_init (&mif->lockp); + + + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + + if (!args->hw_addr_set) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (args->hw_addr + 2, &rnd, sizeof (rnd)); + args->hw_addr[0] = 2; + args->hw_addr[1] = 0xfe; + } + error = ethernet_register_interface (vnm, memif_device_class.index, + mif->dev_instance, args->hw_addr, + &mif->hw_if_index, + memif_eth_flag_change); + } + else if (mif->mode == MEMIF_INTERFACE_MODE_IP) + { + mif->hw_if_index = + vnet_register_interface (vnm, memif_device_class.index, + mif->dev_instance, + memif_ip_hw_if_class.index, + mif->dev_instance); + } + else + error = clib_error_return (0, "unsupported interface mode"); + + if (error) + { + clib_error_report (error); + ret = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } + + sw = vnet_get_hw_sw_interface (vnm, mif->hw_if_index); + mif->sw_if_index = sw->sw_if_index; + + mif->cfg.log2_ring_size = args->log2_ring_size; + mif->cfg.buffer_size = args->buffer_size; + mif->cfg.num_s2m_rings = + args->is_master ? args->rx_queues : args->tx_queues; + mif->cfg.num_m2s_rings = + args->is_master ? args->tx_queues : args->rx_queues; + + args->sw_if_index = mif->sw_if_index; + + /* If this is new one, start listening */ + if (msf->is_listener && msf->ref_cnt == 0) + { + struct sockaddr_un un = { 0 }; + struct stat file_stat; + int on = 1; + + if ((msf->fd = socket (AF_UNIX, SOCK_SEQPACKET, 0)) < 0) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_4; + goto error; + } + + un.sun_family = AF_UNIX; + strncpy ((char *) un.sun_path, (char *) msf->filename, + sizeof (un.sun_path) - 1); + + if (setsockopt (msf->fd, SOL_SOCKET, SO_PASSCRED, &on, sizeof (on)) < 0) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_5; + goto error; + } + if (bind (msf->fd, (struct sockaddr *) &un, sizeof (un)) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_6; + goto error; + } + if (listen (msf->fd, 1) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_7; + goto error; + } + + if (stat ((char *) msf->filename, &file_stat) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_8; + goto error; + } + + msf->clib_file_index = ~0; + clib_file_t template = { 0 }; + template.read_function = memif_conn_fd_accept_ready; + template.file_descriptor = msf->fd; + template.private_data = mif->socket_file_index; + memif_file_add (&msf->clib_file_index, &template); + } + + msf->ref_cnt++; + + if (args->is_master == 0) + mif->flags |= MEMIF_IF_FLAG_IS_SLAVE; + + hw = vnet_get_hw_interface (vnm, mif->hw_if_index); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + vnet_hw_interface_set_input_node (vnm, mif->hw_if_index, + memif_input_node.index); + + mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0); + + if (pool_elts (mm->interfaces) == 1) + { + vlib_process_signal_event (vm, memif_process_node.index, + MEMIF_PROCESS_EVENT_START, 0); + } + goto done; + +error: + if (mif->hw_if_index != ~0) + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + vnet_delete_hw_interface (vnm, mif->hw_if_index); + else + ethernet_delete_interface (vnm, mif->hw_if_index); + mif->hw_if_index = ~0; + } + memif_delete_if (vm, mif); + return ret; + +done: + vec_free (socket_filename); + return rv; +} + + +static clib_error_t * +memif_init (vlib_main_t * vm) +{ + memif_main_t *mm = &memif_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + memset (mm, 0, sizeof (memif_main_t)); + + /* initialize binary API */ + memif_plugin_api_hookup (vm); + + mhash_init_c_string (&mm->socket_file_index_by_filename, sizeof (uword)); + + vec_validate_aligned (mm->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + return 0; +} + +VLIB_INIT_FUNCTION (memif_init); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Packet Memory Interface (experimetal)", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif.h b/src/plugins/memif/memif.h new file mode 100644 index 00000000..11918eab --- /dev/null +++ b/src/plugins/memif/memif.h @@ -0,0 +1,185 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef _MEMIF_H_ +#define _MEMIF_H_ + +#ifndef MEMIF_CACHELINE_SIZE +#define MEMIF_CACHELINE_SIZE 64 +#endif + +#define MEMIF_COOKIE 0x3E31F10 +#define MEMIF_VERSION_MAJOR 1 +#define MEMIF_VERSION_MINOR 0 +#define MEMIF_VERSION ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR) + +/* + * Type definitions + */ + +typedef enum memif_msg_type +{ + MEMIF_MSG_TYPE_NONE = 0, + MEMIF_MSG_TYPE_ACK = 1, + MEMIF_MSG_TYPE_HELLO = 2, + MEMIF_MSG_TYPE_INIT = 3, + MEMIF_MSG_TYPE_ADD_REGION = 4, + MEMIF_MSG_TYPE_ADD_RING = 5, + MEMIF_MSG_TYPE_CONNECT = 6, + MEMIF_MSG_TYPE_CONNECTED = 7, + MEMIF_MSG_TYPE_DISCONNECT = 8, +} memif_msg_type_t; + +typedef enum +{ + MEMIF_RING_S2M = 0, + MEMIF_RING_M2S = 1 +} memif_ring_type_t; + +typedef enum +{ + MEMIF_INTERFACE_MODE_ETHERNET = 0, + MEMIF_INTERFACE_MODE_IP = 1, + MEMIF_INTERFACE_MODE_PUNT_INJECT = 2, +} memif_interface_mode_t; + +typedef uint16_t memif_region_index_t; +typedef uint64_t memif_region_offset_t; +typedef uint64_t memif_region_size_t; +typedef uint16_t memif_ring_index_t; +typedef uint32_t memif_interface_id_t; +typedef uint16_t memif_version_t; +typedef uint8_t memif_log2_ring_size_t; + +/* + * Socket messages + */ + +typedef struct __attribute__ ((packed)) +{ + uint8_t name[32]; + memif_version_t min_version; + memif_version_t max_version; + memif_region_index_t max_region; + memif_ring_index_t max_m2s_ring; + memif_ring_index_t max_s2m_ring; + memif_log2_ring_size_t max_log2_ring_size; +} memif_msg_hello_t; + +typedef struct __attribute__ ((packed)) +{ + memif_version_t version; + memif_interface_id_t id; + memif_interface_mode_t mode:8; + uint8_t secret[24]; + uint8_t name[32]; +} memif_msg_init_t; + +typedef struct __attribute__ ((packed)) +{ + memif_region_index_t index; + memif_region_size_t size; +} memif_msg_add_region_t; + +typedef struct __attribute__ ((packed)) +{ + uint16_t flags; +#define MEMIF_MSG_ADD_RING_FLAG_S2M (1 << 0) + memif_ring_index_t index; + memif_region_index_t region; + memif_region_offset_t offset; + memif_log2_ring_size_t log2_ring_size; +} memif_msg_add_ring_t; + +typedef struct __attribute__ ((packed)) +{ + uint8_t if_name[32]; +} memif_msg_connect_t; + +typedef struct __attribute__ ((packed)) +{ + uint8_t if_name[32]; +} memif_msg_connected_t; + +typedef struct __attribute__ ((packed)) +{ + uint32_t code; + uint8_t string[96]; +} memif_msg_disconnect_t; + +typedef struct __attribute__ ((packed, aligned (128))) +{ + memif_msg_type_t type:16; + union + { + memif_msg_hello_t hello; + memif_msg_init_t init; + memif_msg_add_region_t add_region; + memif_msg_add_ring_t add_ring; + memif_msg_connect_t connect; + memif_msg_connected_t connected; + memif_msg_disconnect_t disconnect; + }; +} memif_msg_t; + +_Static_assert (sizeof (memif_msg_t) == 128, + "Size of memif_msg_t must be 128"); + +/* + * Ring and Descriptor Layout + */ + +typedef struct __attribute__ ((packed)) +{ + uint16_t flags; +#define MEMIF_DESC_FLAG_NEXT (1 << 0) + memif_region_index_t region; + uint32_t buffer_length; + uint32_t length; + uint8_t reserved[4]; + memif_region_offset_t offset; + uint64_t metadata; +} memif_desc_t; + +_Static_assert (sizeof (memif_desc_t) == 32, + "Size of memif_dsct_t must be 32"); + +#define MEMIF_CACHELINE_ALIGN_MARK(mark) \ + uint8_t mark[0] __attribute__((aligned(MEMIF_CACHELINE_SIZE))) + +typedef struct +{ + MEMIF_CACHELINE_ALIGN_MARK (cacheline0); + uint32_t cookie; + uint16_t flags; +#define MEMIF_RING_FLAG_MASK_INT 1 + volatile uint16_t head; + MEMIF_CACHELINE_ALIGN_MARK (cacheline1); + volatile uint16_t tail; + MEMIF_CACHELINE_ALIGN_MARK (cacheline2); + memif_desc_t desc[0]; +} memif_ring_t; + +#endif /* _MEMIF_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif_all_api_h.h b/src/plugins/memif/memif_all_api_h.h new file mode 100644 index 00000000..9729ec16 --- /dev/null +++ b/src/plugins/memif/memif_all_api_h.h @@ -0,0 +1,18 @@ +/* + * memif_all_api_h.h - plug-in api #include file + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <memif/memif.api.h> diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c new file mode 100644 index 00000000..07347bc0 --- /dev/null +++ b/src/plugins/memif/memif_api.c @@ -0,0 +1,350 @@ +/* + *------------------------------------------------------------------ + * memif_api.c - memif api + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> +#include <vlib/unix/unix.h> +#include <memif/memif.h> +#include <memif/private.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +/* define message IDs */ +#include <memif/memif_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <memif/memif_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <memif/memif_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <memif/memif_all_api_h.h> +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <memif/memif_all_api_h.h> +#undef vl_api_version + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = htons ((t)+mm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = htonl (rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define REPLY_MACRO2(t, body) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = htons ((t)+mm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = htonl (rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define foreach_memif_plugin_api_msg \ +_(MEMIF_CREATE, memif_create) \ +_(MEMIF_DELETE, memif_delete) \ +_(MEMIF_DUMP, memif_dump) \ + +/** + * @brief Message handler for memif_create API. + * @param mp vl_api_memif_create_t * mp the api message + */ +void +vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + vl_api_memif_create_reply_t *rmp; + memif_create_if_args_t args = { 0 }; + u32 ring_size = MEMIF_DEFAULT_RING_SIZE; + static const u8 empty_hw_addr[6]; + int rv = 0; + + /* id */ + args.id = clib_net_to_host_u32 (mp->id); + + /* socket filename */ + mp->socket_filename[ARRAY_LEN (mp->socket_filename) - 1] = 0; + if (strlen ((char *) mp->socket_filename) > 0) + { + vec_validate (args.socket_filename, + strlen ((char *) mp->socket_filename)); + strncpy ((char *) args.socket_filename, (char *) mp->socket_filename, + vec_len (args.socket_filename)); + } + + /* secret */ + mp->secret[ARRAY_LEN (mp->secret) - 1] = 0; + if (strlen ((char *) mp->secret) > 0) + { + vec_validate (args.secret, strlen ((char *) mp->secret)); + strncpy ((char *) args.secret, (char *) mp->secret, + vec_len (args.secret)); + } + + /* role */ + args.is_master = (mp->role == 0); + + /* mode */ + args.mode = mp->mode; + + /* rx/tx queues */ + if (args.is_master == 0) + { + args.rx_queues = MEMIF_DEFAULT_RX_QUEUES; + args.tx_queues = MEMIF_DEFAULT_TX_QUEUES; + if (mp->rx_queues) + { + args.rx_queues = mp->rx_queues; + } + if (mp->tx_queues) + { + args.tx_queues = mp->tx_queues; + } + } + + /* ring size */ + if (mp->ring_size) + { + ring_size = ntohl (mp->ring_size); + } + if (!is_pow2 (ring_size)) + { + rv = VNET_API_ERROR_INVALID_ARGUMENT; + goto reply; + } + args.log2_ring_size = min_log2 (ring_size); + + /* buffer size */ + args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE; + if (mp->buffer_size) + { + args.buffer_size = ntohs (mp->buffer_size); + } + + /* MAC address */ + if (memcmp (mp->hw_addr, empty_hw_addr, 6) != 0) + { + memcpy (args.hw_addr, mp->hw_addr, 6); + args.hw_addr_set = 1; + } + + rv = memif_create_if (vm, &args); + + vec_free (args.socket_filename); + vec_free (args.secret); + +reply: + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_MEMIF_CREATE_REPLY, + ({ + rmp->sw_if_index = htonl (args.sw_if_index); + })); + /* *INDENT-ON* */ +} + +/** + * @brief Message handler for memif_delete API. + * @param mp vl_api_memif_delete_t * mp the api message + */ +void +vl_api_memif_delete_t_handler (vl_api_memif_delete_t * mp) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + vnet_main_t *vnm = vnet_get_main (); + vl_api_memif_delete_reply_t *rmp; + vnet_hw_interface_t *hi = + vnet_get_sup_hw_interface (vnm, ntohl (mp->sw_if_index)); + memif_if_t *mif; + int rv = 0; + + if (hi == NULL || memif_device_class.index != hi->dev_class_index) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + { + mif = pool_elt_at_index (mm->interfaces, hi->dev_instance); + rv = memif_delete_if (vm, mif); + } + + REPLY_MACRO (VL_API_MEMIF_DELETE_REPLY); +} + +static void +send_memif_details (unix_shared_memory_queue_t * q, + memif_if_t * mif, + vnet_sw_interface_t * swif, + u8 * interface_name, u32 context) +{ + vl_api_memif_details_t *mp; + vnet_main_t *vnm = vnet_get_main (); + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files, + mif->socket_file_index); + vnet_hw_interface_t *hwif; + + hwif = vnet_get_sup_hw_interface (vnm, swif->sw_if_index); + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->_vl_msg_id = htons (VL_API_MEMIF_DETAILS + mm->msg_id_base); + mp->context = context; + + mp->sw_if_index = htonl (swif->sw_if_index); + strncpy ((char *) mp->if_name, + (char *) interface_name, ARRAY_LEN (mp->if_name) - 1); + memcpy (mp->hw_addr, hwif->hw_address, ARRAY_LEN (mp->hw_addr)); + + mp->id = clib_host_to_net_u32 (mif->id); + mp->role = (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) ? 1 : 0; + strncpy ((char *) mp->socket_filename, + (char *) msf->filename, ARRAY_LEN (mp->socket_filename) - 1); + + mp->ring_size = htonl (1 << mif->run.log2_ring_size); + mp->buffer_size = htons (mif->run.buffer_size); + + mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0; + mp->link_up_down = (hwif->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? 1 : 0; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +/** + * @brief Message handler for memif_dump API. + * @param mp vl_api_memif_dump_t * mp the api message + */ +void +vl_api_memif_dump_t_handler (vl_api_memif_dump_t * mp) +{ + memif_main_t *mm = &memif_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *swif; + memif_if_t *mif; + u8 *if_name = 0; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (mif, mm->interfaces, + ({ + swif = vnet_get_sw_interface (vnm, mif->sw_if_index); + + if_name = format (if_name, "%U%c", + format_vnet_sw_interface_name, + vnm, swif, 0); + + send_memif_details (q, mif, swif, if_name, mp->context); + _vec_len (if_name) = 0; + })); + /* *INDENT-ON* */ + + vec_free (if_name); +} + +#define vl_msg_name_crc_list +#include <memif/memif_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (memif_main_t * mm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + mm->msg_id_base); + foreach_vl_msg_name_crc_memif; +#undef _ +} + +/* Set up the API message handling tables */ +clib_error_t * +memif_plugin_api_hookup (vlib_main_t * vm) +{ + memif_main_t *mm = &memif_main; + api_main_t *am = &api_main; + u8 *name; + + /* Construct the API name */ + name = format (0, "memif_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + mm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + mm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_memif_plugin_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (mm, am); + + vec_free (name); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/memif_msg_enum.h b/src/plugins/memif/memif_msg_enum.h new file mode 100644 index 00000000..74efee00 --- /dev/null +++ b/src/plugins/memif/memif_msg_enum.h @@ -0,0 +1,31 @@ +/* + * memif_msg_enum.h - vpp engine plug-in message enumeration + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_memif_msg_enum_h +#define included_memif_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum +{ +#include <memif/memif_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_memif_msg_enum_h */ diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c new file mode 100644 index 00000000..4ca7526d --- /dev/null +++ b/src/plugins/memif/memif_test.c @@ -0,0 +1,372 @@ +/* + * memif VAT support + * + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <inttypes.h> + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> +#include <memif/memif.h> +#include <memif/private.h> + +#define __plugin_msg_base memif_test_main.msg_id_base +#include <vlibapi/vat_helper_macros.h> + +/* declare message IDs */ +#include <memif/memif_msg_enum.h> + +/* Get CRC codes of the messages defined outside of this plugin */ +#define vl_msg_name_crc_list +#include <vpp/api/vpe_all_api_h.h> +#undef vl_msg_name_crc_list + +/* define message structures */ +#define vl_typedefs +#include <vpp/api/vpe_all_api_h.h> +#include <memif/memif_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <memif/memif_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <memif/memif_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <memif/memif_all_api_h.h> +#undef vl_api_version + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + u32 ping_id; + vat_main_t *vat_main; +} memif_test_main_t; + +memif_test_main_t memif_test_main; + +/* standard reply handlers */ +#define foreach_standard_reply_retval_handler \ +_(memif_delete_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = memif_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(MEMIF_CREATE_REPLY, memif_create_reply) \ +_(MEMIF_DELETE_REPLY, memif_delete_reply) \ +_(MEMIF_DETAILS, memif_details) + +static uword +unformat_memif_queues (unformat_input_t * input, va_list * args) +{ + u32 *rx_queues = va_arg (*args, u32 *); + u32 *tx_queues = va_arg (*args, u32 *); + + if (unformat (input, "rx-queues %u", rx_queues)) + ; + if (unformat (input, "tx-queues %u", tx_queues)) + ; + + return 1; +} + +/* memif-create API */ +static int +api_memif_create (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_memif_create_t *mp; + u32 id = 0; + u8 *socket_filename = 0; + u8 *secret = 0; + u8 role = 1; + u32 ring_size = 0; + u32 buffer_size = 0; + u8 hw_addr[6] = { 0 }; + u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES; + u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES; + int ret; + u8 mode = MEMIF_INTERFACE_MODE_ETHERNET; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "id %u", &id)) + ; + else if (unformat (i, "socket %s", &socket_filename)) + ; + else if (unformat (i, "secret %s", &secret)) + ; + else if (unformat (i, "ring_size %u", &ring_size)) + ; + else if (unformat (i, "buffer_size %u", &buffer_size)) + ; + else if (unformat (i, "master")) + role = 0; + else if (unformat (i, "slave %U", + unformat_memif_queues, &rx_queues, &tx_queues)) + role = 1; + else if (unformat (i, "mode ip")) + mode = MEMIF_INTERFACE_MODE_IP; + else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr)) + ; + else + { + clib_warning ("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + if (!is_pow2 (ring_size)) + { + errmsg ("ring size must be power of 2\n"); + return -99; + } + + if (rx_queues > 255 || rx_queues < 1) + { + errmsg ("rx queue must be between 1 - 255\n"); + return -99; + } + + if (tx_queues > 255 || tx_queues < 1) + { + errmsg ("tx queue must be between 1 - 255\n"); + return -99; + } + + M (MEMIF_CREATE, mp); + + mp->mode = mode; + mp->id = clib_host_to_net_u32 (id); + mp->role = role; + mp->ring_size = clib_host_to_net_u32 (ring_size); + mp->buffer_size = clib_host_to_net_u16 (buffer_size & 0xffff); + if (socket_filename != 0) + { + strncpy ((char *) mp->socket_filename, (char *) socket_filename, 127); + vec_free (socket_filename); + } + if (secret != 0) + { + strncpy ((char *) mp->secret, (char *) secret, 16); + vec_free (secret); + } + memcpy (mp->hw_addr, hw_addr, 6); + mp->rx_queues = rx_queues; + mp->tx_queues = tx_queues; + + S (mp); + W (ret); + return ret; +} + +/* memif-create reply handler */ +static void vl_api_memif_create_reply_t_handler + (vl_api_memif_create_reply_t * mp) +{ + vat_main_t *vam = memif_test_main.vat_main; + i32 retval = ntohl (mp->retval); + + if (retval == 0) + { + fformat (vam->ofp, "created memif with sw_if_index %d\n", + ntohl (mp->sw_if_index)); + } + + vam->retval = retval; + vam->result_ready = 1; +} + +/* memif-delete API */ +static int +api_memif_delete (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_memif_delete_t *mp; + u32 sw_if_index = 0; + u8 index_defined = 0; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sw_if_index %u", &sw_if_index)) + index_defined = 1; + else + { + clib_warning ("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + if (!index_defined) + { + errmsg ("missing sw_if_index\n"); + return -99; + } + + M (MEMIF_DELETE, mp); + + mp->sw_if_index = clib_host_to_net_u32 (sw_if_index); + + S (mp); + W (ret); + return ret; +} + +/* memif-dump API */ +static int +api_memif_dump (vat_main_t * vam) +{ + memif_test_main_t *mm = &memif_test_main; + vl_api_memif_dump_t *mp; + vl_api_control_ping_t *mp_ping; + int ret; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for memif_dump"); + return -99; + } + + M (MEMIF_DUMP, mp); + S (mp); + + /* Use a control ping for synchronization */ + mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping)); + mp_ping->_vl_msg_id = htons (mm->ping_id); + mp_ping->client_index = vam->my_client_index; + + fformat (vam->ofp, "Sending ping id=%d\n", mm->ping_id); + + vam->result_ready = 0; + S (mp_ping); + + W (ret); + return ret; +} + +/* memif-details message handler */ +static void vl_api_memif_details_t_handler (vl_api_memif_details_t * mp) +{ + vat_main_t *vam = memif_test_main.vat_main; + + fformat (vam->ofp, "%s: sw_if_index %u mac %U\n" + " id %u socket %s role %s\n" + " ring_size %u buffer_size %u\n" + " state %s link %s\n", + mp->if_name, ntohl (mp->sw_if_index), format_ethernet_address, + mp->hw_addr, clib_net_to_host_u32 (mp->id), mp->socket_filename, + mp->role ? "slave" : "master", + ntohl (mp->ring_size), ntohs (mp->buffer_size), + mp->admin_up_down ? "up" : "down", + mp->link_up_down ? "up" : "down"); +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(memif_create, "[id <id>] [socket <path>] [ring_size <size>] " \ + "[buffer_size <size>] [hw_addr <mac_address>] " \ + "[secret <string>] [mode ip] <master|slave>") \ +_(memif_delete, "<sw_if_index>") \ +_(memif_dump, "") + +static void +memif_vat_api_hookup (vat_main_t * vam) +{ + memif_test_main_t *mm __attribute__ ((unused)) = &memif_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + mm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) \ + hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * +vat_plugin_register (vat_main_t * vam) +{ + memif_test_main_t *mm = &memif_test_main; + u8 *name; + + mm->vat_main = vam; + + /* Ask the vpp engine for the first assigned message-id */ + name = format (0, "memif_%08x%c", api_version, 0); + mm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + /* Get the control ping ID */ +#define _(id,n,crc) \ + const char *id ## _CRC __attribute__ ((unused)) = #n "_" #crc; + foreach_vl_msg_name_crc_vpe; +#undef _ + mm->ping_id = vl_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC)); + + if (mm->msg_id_base != (u16) ~0) + memif_vat_api_hookup (vam); + + vec_free (name); + + return 0; +} diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c new file mode 100644 index 00000000..4acc7149 --- /dev/null +++ b/src/plugins/memif/node.c @@ -0,0 +1,533 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/uio.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/devices.h> +#include <vnet/feature/feature.h> + +#include <memif/memif.h> +#include <memif/private.h> + +#define foreach_memif_input_error \ + _(NOT_IP, "not ip packet") + +typedef enum +{ +#define _(f,s) MEMIF_INPUT_ERROR_##f, + foreach_memif_input_error +#undef _ + MEMIF_INPUT_N_ERROR, +} memif_input_error_t; + +static char *memif_input_error_strings[] = { +#define _(n,s) s, + foreach_memif_input_error +#undef _ +}; + +typedef struct +{ + u32 next_index; + u32 hw_if_index; + u16 ring; +} memif_input_trace_t; + +static u8 * +format_memif_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + memif_input_trace_t *t = va_arg (*args, memif_input_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "memif: hw_if_index %d next-index %d", + t->hw_if_index, t->next_index); + s = format (s, "\n%Uslot: ring %u", format_white_space, indent + 2, + t->ring); + return s; +} + +static_always_inline void +memif_prefetch (vlib_main_t * vm, u32 bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_prefetch_buffer_header (b, STORE); + CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE); +} + +static_always_inline void +memif_buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, + u32 prev_bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); + vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); + + /* update first buffer */ + first_b->total_length_not_including_first_buffer += b->current_length; + + /* update previous buffer */ + prev_b->next_buffer = bi; + prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; + + /* update current buffer */ + b->next_buffer = 0; +} + +/** + * @brief Copy buffer from rx ring + * + * @param * vm (in) + * @param * mif (in) pointer to memif interface + * @param * ring (in) pointer to memif ring + * @param * rd (in) pointer to ring data + * @param ring_size (in) ring size + * @param * n_free_bufs (in/out) the number of free vlib buffers available + * @param ** first_b (out) the first vlib buffer pointer + * @param * first_bi (out) the first vlib buffer index + * @param * bi (in/out) the current buffer index + * #param * num_slots (in/out) the number of descriptors available to read + * + * @return total bytes read from rx ring also written to vlib buffers + */ +static_always_inline uword +memif_copy_buffer_from_rx_ring (vlib_main_t * vm, memif_if_t * mif, + memif_ring_t * ring, memif_queue_t * mq, + u16 ring_size, u32 n_buffer_bytes, + u32 * n_free_bufs, vlib_buffer_t ** first_b, + u32 * first_bi, u32 * bi, u16 * num_slots) +{ + memif_main_t *nm = &memif_main; + u32 thread_index = vlib_get_thread_index (); + u32 total_bytes = 0, offset = 0; + u32 data_len; + u32 bytes_to_copy; + void *mb; + vlib_buffer_t *b; + u16 mask = ring_size - 1; + u32 prev_bi; + u16 last_head; + + while (*num_slots) + { + data_len = ring->desc[mq->last_head].length; + while (data_len && (*n_free_bufs)) + { + /* get empty buffer */ + u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1; + prev_bi = *bi; + *bi = nm->rx_buffers[thread_index][last_buf]; + b = vlib_get_buffer (vm, *bi); + _vec_len (nm->rx_buffers[thread_index]) = last_buf; + (*n_free_bufs)--; + if (PREDICT_FALSE (*n_free_bufs == 0)) + { + *n_free_bufs += + vlib_buffer_alloc (vm, + &nm->rx_buffers[thread_index] + [*n_free_bufs], ring_size); + _vec_len (nm->rx_buffers[thread_index]) = *n_free_bufs; + } + + if (last_buf > 4) + { + memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 2]); + memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 3]); + } + + /* copy buffer */ + bytes_to_copy = + data_len > n_buffer_bytes ? n_buffer_bytes : data_len; + b->current_data = 0; + mb = memif_get_buffer (mif, ring, mq->last_head); + clib_memcpy (vlib_buffer_get_current (b), mb + offset, + CLIB_CACHE_LINE_BYTES); + if (bytes_to_copy > CLIB_CACHE_LINE_BYTES) + clib_memcpy (vlib_buffer_get_current (b) + CLIB_CACHE_LINE_BYTES, + mb + CLIB_CACHE_LINE_BYTES + offset, + bytes_to_copy - CLIB_CACHE_LINE_BYTES); + + /* fill buffer header */ + b->current_length = bytes_to_copy; + + if (total_bytes == 0) + { + /* fill buffer metadata */ + b->total_length_not_including_first_buffer = 0; + b->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b)->sw_if_index[VLIB_RX] = mif->sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0; + *first_bi = *bi; + *first_b = vlib_get_buffer (vm, *first_bi); + } + else + memif_buffer_add_to_chain (vm, *bi, *first_bi, prev_bi); + + offset += bytes_to_copy; + total_bytes += bytes_to_copy; + data_len -= bytes_to_copy; + } + last_head = mq->last_head; + /* Advance to next descriptor */ + mq->last_head = (mq->last_head + 1) & mask; + offset = 0; + (*num_slots)--; + if ((ring->desc[last_head].flags & MEMIF_DESC_FLAG_NEXT) == 0) + break; + } + + return (total_bytes); +} + + +static_always_inline u32 +memif_next_from_ip_hdr (vlib_node_runtime_t * node, vlib_buffer_t * b) +{ + u8 *ptr = vlib_buffer_get_current (b); + u8 v = *ptr & 0xf0; + + if (PREDICT_TRUE (v == 0x40)) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else if (PREDICT_TRUE (v == 0x60)) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + + b->error = node->errors[MEMIF_INPUT_ERROR_NOT_IP]; + return VNET_DEVICE_INPUT_NEXT_DROP; +} + +static_always_inline uword +memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, memif_if_t * mif, + memif_ring_type_t type, u16 qid, + memif_interface_mode_t mode) +{ + vnet_main_t *vnm = vnet_get_main (); + memif_ring_t *ring; + memif_queue_t *mq; + u16 head; + u32 next_index; + uword n_trace = vlib_get_trace_count (vm, node); + memif_main_t *nm = &memif_main; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u32 *to_next = 0; + u32 n_free_bufs; + u32 b0_total, b1_total; + u32 thread_index = vlib_get_thread_index (); + u16 ring_size, mask, num_slots; + u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + mq = vec_elt_at_index (mif->rx_queues, qid); + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + if (mode == MEMIF_INTERFACE_MODE_IP) + { + next_index = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + } + else + { + next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + } + + n_free_bufs = vec_len (nm->rx_buffers[thread_index]); + if (PREDICT_FALSE (n_free_bufs < ring_size)) + { + vec_validate (nm->rx_buffers[thread_index], + ring_size + n_free_bufs - 1); + n_free_bufs += + vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs], + ring_size); + _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs; + } + + head = ring->head; + if (head == mq->last_head) + return 0; + + if (head > mq->last_head) + num_slots = head - mq->last_head; + else + num_slots = ring_size - mq->last_head + head; + + while (num_slots) + { + u32 n_left_to_next; + u32 next0 = next_index; + u32 next1 = next_index; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (num_slots > 11 && n_left_to_next > 2) + { + if (PREDICT_TRUE (mq->last_head + 5 < ring_size)) + { + CLIB_PREFETCH (memif_get_buffer (mif, ring, mq->last_head + 2), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (memif_get_buffer (mif, ring, mq->last_head + 3), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&ring->desc[mq->last_head + 4], + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&ring->desc[mq->last_head + 5], + CLIB_CACHE_LINE_BYTES, LOAD); + } + else + { + CLIB_PREFETCH (memif_get_buffer + (mif, ring, (mq->last_head + 2) % mask), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (memif_get_buffer + (mif, ring, (mq->last_head + 3) % mask), + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&ring->desc[(mq->last_head + 4) % mask], + CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (&ring->desc[(mq->last_head + 5) % mask], + CLIB_CACHE_LINE_BYTES, LOAD); + } + + vlib_buffer_t *first_b0 = 0; + u32 bi0 = 0, first_bi0 = 0; + b0_total = memif_copy_buffer_from_rx_ring (vm, mif, ring, mq, + ring_size, + n_buffer_bytes, + &n_free_bufs, &first_b0, + &first_bi0, &bi0, + &num_slots); + + vlib_buffer_t *first_b1 = 0; + u32 bi1 = 0, first_bi1 = 0; + b1_total = memif_copy_buffer_from_rx_ring (vm, mif, ring, mq, + ring_size, + n_buffer_bytes, + &n_free_bufs, &first_b1, + &first_bi1, &bi1, + &num_slots); + + /* enqueue buffer */ + to_next[0] = first_bi0; + to_next[1] = first_bi1; + to_next += 2; + n_left_to_next -= 2; + + + if (mode == MEMIF_INTERFACE_MODE_IP) + { + next0 = memif_next_from_ip_hdr (node, first_b0); + next1 = memif_next_from_ip_hdr (node, first_b1); + } + else if (mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + if (PREDICT_FALSE (mif->per_interface_next_index != ~0)) + next0 = next1 = mif->per_interface_next_index; + else + /* redirect if feature path + * enabled */ + vnet_feature_start_device_input_x2 (mif->sw_if_index, + &next0, &next1, + first_b0, first_b1); + } + + /* trace */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b1); + + if (PREDICT_FALSE (n_trace > 0)) + { + /* b0 */ + if (PREDICT_TRUE (first_b0 != 0)) + { + memif_input_trace_t *tr; + vlib_trace_buffer (vm, node, next0, first_b0, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = mif->hw_if_index; + tr->ring = qid; + } + if (n_trace) + { + /* b1 */ + if (PREDICT_TRUE (first_b1 != 0)) + { + memif_input_trace_t *tr; + vlib_trace_buffer (vm, node, next1, first_b1, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b1, sizeof (*tr)); + tr->next_index = next1; + tr->hw_if_index = mif->hw_if_index; + tr->ring = qid; + } + } + } + + /* enqueue */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, first_bi0, + first_bi1, next0, next1); + + /* next packet */ + n_rx_packets += 2; + n_rx_bytes += b0_total + b1_total; + } + while (num_slots && n_left_to_next) + { + vlib_buffer_t *first_b0 = 0; + u32 bi0 = 0, first_bi0 = 0; + b0_total = memif_copy_buffer_from_rx_ring (vm, mif, ring, mq, + ring_size, + n_buffer_bytes, + &n_free_bufs, &first_b0, + &first_bi0, &bi0, + &num_slots); + + if (mode == MEMIF_INTERFACE_MODE_IP) + { + next0 = memif_next_from_ip_hdr (node, first_b0); + } + else if (mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + if (PREDICT_FALSE (mif->per_interface_next_index != ~0)) + next0 = mif->per_interface_next_index; + else + /* redirect if feature path + * enabled */ + vnet_feature_start_device_input_x1 (mif->sw_if_index, + &next0, first_b0); + } + + /* trace */ + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0); + + if (PREDICT_FALSE (n_trace > 0)) + { + if (PREDICT_TRUE (first_b0 != 0)) + { + memif_input_trace_t *tr; + vlib_trace_buffer (vm, node, next0, first_b0, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = mif->hw_if_index; + tr->ring = qid; + } + } + + /* enqueue buffer */ + to_next[0] = first_bi0; + to_next += 1; + n_left_to_next--; + + /* enqueue */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, first_bi0, next0); + + /* next packet */ + n_rx_packets++; + n_rx_bytes += b0_total; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + CLIB_MEMORY_STORE_BARRIER (); + ring->tail = head; + + vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, thread_index, + mif->hw_if_index, n_rx_packets, + n_rx_bytes); + + return n_rx_packets; +} + +static uword +memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_rx = 0; + memif_main_t *nm = &memif_main; + vnet_device_input_runtime_t *rt = (void *) node->runtime_data; + vnet_device_and_queue_t *dq; + + foreach_device_and_queue (dq, rt->devices_and_queues) + { + memif_if_t *mif; + mif = vec_elt_at_index (nm->interfaces, dq->dev_instance); + if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) && + (mif->flags & MEMIF_IF_FLAG_CONNECTED)) + { + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_M2S, dq->queue_id, + MEMIF_INTERFACE_MODE_IP); + else + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_M2S, dq->queue_id, + MEMIF_INTERFACE_MODE_ETHERNET); + } + else + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_S2M, dq->queue_id, + MEMIF_INTERFACE_MODE_IP); + else + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_S2M, dq->queue_id, + MEMIF_INTERFACE_MODE_ETHERNET); + } + } + } + + return n_rx; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (memif_input_node) = { + .function = memif_input_fn, + .name = "memif-input", + .sibling_of = "device-input", + .format_trace = format_memif_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .n_errors = MEMIF_INPUT_N_ERROR, + .error_strings = memif_input_error_strings, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (memif_input_node, memif_input_fn) +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h new file mode 100644 index 00000000..912ec59a --- /dev/null +++ b/src/plugins/memif/private.h @@ -0,0 +1,261 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vppinfra/lock.h> + +#define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock" +#define MEMIF_DEFAULT_RING_SIZE 1024 +#define MEMIF_DEFAULT_RX_QUEUES 1 +#define MEMIF_DEFAULT_TX_QUEUES 1 +#define MEMIF_DEFAULT_BUFFER_SIZE 2048 + +#define MEMIF_MAX_M2S_RING (vec_len (vlib_mains) - 1) +#define MEMIF_MAX_S2M_RING (vec_len (vlib_mains) - 1) +#define MEMIF_MAX_REGION 255 +#define MEMIF_MAX_LOG2_RING_SIZE 14 + +#define MEMIF_DEBUG 0 + +#if MEMIF_DEBUG == 1 +#define DBG(...) clib_warning(__VA_ARGS__) +#define DBG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__) +#else +#define DBG(...) +#define DBG_UNIX_LOG(...) +#endif + +#if MEMIF_DEBUG == 1 +#define memif_file_add(a, b) do { \ + ASSERT (*a == ~0); \ + *a = clib_file_add (&file_main, b); \ + clib_warning ("clib_file_add fd %d private_data %u idx %u", \ + (b)->file_descriptor, (b)->private_data, *a); \ +} while (0) + +#define memif_file_del(a) do { \ + clib_warning ("clib_file_del idx %u",a - file_main.file_pool); \ + clib_file_del (&file_main, a); \ +} while (0) + +#define memif_file_del_by_index(a) do { \ + clib_warning ("clib_file_del idx %u", a); \ + clib_file_del_by_index (&file_main, a); \ +} while (0) +#else +#define memif_file_add(a, b) do { \ + ASSERT (*a == ~0); \ + *a = clib_file_add (&file_main, b); \ +} while (0) +#define memif_file_del(a) clib_file_del(&file_main, a) +#define memif_file_del_by_index(a) clib_file_del_by_index(&file_main, a) +#endif + +typedef struct +{ + u8 *filename; + int fd; + uword clib_file_index; + uword *pending_file_indices; + int ref_cnt; + int is_listener; + + /* hash of all registered id */ + mhash_t dev_instance_by_id; + + /* hash of all registered fds */ + uword *dev_instance_by_fd; +} memif_socket_file_t; + +typedef struct +{ + void *shm; + memif_region_size_t region_size; + int fd; +} memif_region_t; + +typedef struct +{ + memif_msg_t msg; + int fd; +} memif_msg_fifo_elt_t; + +typedef struct +{ + /* ring data */ + memif_ring_t *ring; + memif_log2_ring_size_t log2_ring_size; + memif_region_index_t region; + memif_region_offset_t offset; + + u16 last_head; + u16 last_tail; + + /* interrupts */ + int int_fd; + uword int_clib_file_index; + u64 int_count; +} memif_queue_t; + +#define foreach_memif_if_flag \ + _(0, ADMIN_UP, "admin-up") \ + _(1, IS_SLAVE, "slave") \ + _(2, CONNECTING, "connecting") \ + _(3, CONNECTED, "connected") \ + _(4, DELETING, "deleting") + +typedef enum +{ +#define _(a, b, c) MEMIF_IF_FLAG_##b = (1 << a), + foreach_memif_if_flag +#undef _ +} memif_if_flag_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + clib_spinlock_t lockp; + u32 flags; + memif_interface_id_t id; + u32 hw_if_index; + u32 sw_if_index; + uword dev_instance; + memif_interface_mode_t mode:8; + + u32 per_interface_next_index; + + /* socket connection */ + uword socket_file_index; + int conn_fd; + uword conn_clib_file_index; + memif_msg_fifo_elt_t *msg_queue; + u8 *secret; + + memif_region_t *regions; + + memif_queue_t *rx_queues; + memif_queue_t *tx_queues; + + /* remote info */ + pid_t remote_pid; + uid_t remote_uid; + gid_t remote_gid; + u8 *remote_name; + u8 *remote_if_name; + + struct + { + memif_log2_ring_size_t log2_ring_size; + u8 num_s2m_rings; + u8 num_m2s_rings; + u16 buffer_size; + } cfg; + + struct + { + memif_log2_ring_size_t log2_ring_size; + u8 num_s2m_rings; + u8 num_m2s_rings; + u16 buffer_size; + } run; + + /* disconnect strings */ + u8 *local_disc_string; + u8 *remote_disc_string; +} memif_if_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /** API message ID base */ + u16 msg_id_base; + + /* pool of all memory interfaces */ + memif_if_t *interfaces; + + /* pool of all unix socket files */ + memif_socket_file_t *socket_files; + mhash_t socket_file_index_by_filename; + + /* rx buffer cache */ + u32 **rx_buffers; + +} memif_main_t; + +extern memif_main_t memif_main; +extern vnet_device_class_t memif_device_class; +extern vlib_node_registration_t memif_input_node; + +enum +{ + MEMIF_PROCESS_EVENT_START = 1, + MEMIF_PROCESS_EVENT_STOP = 2, +} memif_process_event_t; + +typedef struct +{ + memif_interface_id_t id; + u8 *socket_filename; + u8 *secret; + u8 is_master; + memif_interface_mode_t mode:8; + memif_log2_ring_size_t log2_ring_size; + u16 buffer_size; + u8 hw_addr_set; + u8 hw_addr[6]; + u8 rx_queues; + u8 tx_queues; + + /* return */ + u32 sw_if_index; +} memif_create_if_args_t; + +int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); +int memif_delete_if (vlib_main_t * vm, memif_if_t * mif); +clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); + +static_always_inline void * +memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) +{ + u16 region = ring->desc[slot].region; + return mif->regions[region].shm + ring->desc[slot].offset; +} + +/* memif.c */ +clib_error_t *memif_init_regions_and_queues (memif_if_t * mif); +clib_error_t *memif_connect (memif_if_t * mif); +void memif_disconnect (memif_if_t * mif, clib_error_t * err); + +/* socket.c */ +clib_error_t *memif_conn_fd_accept_ready (clib_file_t * uf); +clib_error_t *memif_master_conn_fd_read_ready (clib_file_t * uf); +clib_error_t *memif_slave_conn_fd_read_ready (clib_file_t * uf); +clib_error_t *memif_master_conn_fd_write_ready (clib_file_t * uf); +clib_error_t *memif_slave_conn_fd_write_ready (clib_file_t * uf); +clib_error_t *memif_master_conn_fd_error (clib_file_t * uf); +clib_error_t *memif_slave_conn_fd_error (clib_file_t * uf); +clib_error_t *memif_msg_send_disconnect (memif_if_t * mif, + clib_error_t * err); +u8 *format_memif_device_name (u8 * s, va_list * args); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c new file mode 100644 index 00000000..1abc0f11 --- /dev/null +++ b/src/plugins/memif/socket.c @@ -0,0 +1,740 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include <stdint.h> +#include <net/if.h> +#include <sys/types.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/uio.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/eventfd.h> +#include <inttypes.h> +#include <limits.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/plugin/plugin.h> +#include <vnet/ethernet/ethernet.h> +#include <vpp/app/version.h> + +#include <memif/memif.h> +#include <memif/private.h> + +static u8 * +memif_str2vec (uint8_t * str, int len) +{ + u8 *s = 0; + int i; + + if (str[0] == 0) + return s; + + for (i = 0; i < len; i++) + { + vec_add1 (s, str[i]); + if (str[i] == 0) + return s; + } + vec_add1 (s, 0); + + return s; +} + +static clib_error_t * +memif_msg_send (int fd, memif_msg_t * msg, int afd) +{ + struct msghdr mh = { 0 }; + struct iovec iov[1]; + char ctl[CMSG_SPACE (sizeof (int))]; + int rv; + + iov[0].iov_base = (void *) msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + if (afd > 0) + { + struct cmsghdr *cmsg; + memset (&ctl, 0, sizeof (ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + cmsg = CMSG_FIRSTHDR (&mh); + cmsg->cmsg_len = CMSG_LEN (sizeof (int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy (CMSG_DATA (cmsg), &afd, sizeof (int)); + } + rv = sendmsg (fd, &mh, 0); + if (rv < 0) + return clib_error_return_unix (0, "sendmsg"); + DBG ("Message type %u sent (fd %d)", msg->type, afd); + return 0; +} + +static void +memif_msg_enq_ack (memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + + e->msg.type = MEMIF_MSG_TYPE_ACK; + e->fd = -1; +} + +static clib_error_t * +memif_msg_enq_hello (int fd) +{ + u8 *s; + memif_msg_t msg = { 0 }; + memif_msg_hello_t *h = &msg.hello; + msg.type = MEMIF_MSG_TYPE_HELLO; + h->min_version = MEMIF_VERSION; + h->max_version = MEMIF_VERSION; + h->max_m2s_ring = MEMIF_MAX_M2S_RING; + h->max_s2m_ring = MEMIF_MAX_M2S_RING; + h->max_region = MEMIF_MAX_REGION; + h->max_log2_ring_size = MEMIF_MAX_LOG2_RING_SIZE; + s = format (0, "VPP %s%c", VPP_BUILD_VER, 0); + strncpy ((char *) h->name, (char *) s, sizeof (h->name) - 1); + vec_free (s); + return memif_msg_send (fd, &msg, -1); +} + +static void +memif_msg_enq_init (memif_if_t * mif) +{ + u8 *s; + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_init_t *i = &e->msg.init; + + e->msg.type = MEMIF_MSG_TYPE_INIT; + e->fd = -1; + i->version = MEMIF_VERSION; + i->id = mif->id; + i->mode = mif->mode; + s = format (0, "VPP %s%c", VPP_BUILD_VER, 0); + strncpy ((char *) i->name, (char *) s, sizeof (i->name) - 1); + if (mif->secret) + strncpy ((char *) i->secret, (char *) mif->secret, + sizeof (i->secret) - 1); + vec_free (s); +} + +static void +memif_msg_enq_add_region (memif_if_t * mif, u8 region) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_add_region_t *ar = &e->msg.add_region; + + e->msg.type = MEMIF_MSG_TYPE_ADD_REGION; + e->fd = mif->regions[region].fd; + ar->index = region; + ar->size = mif->regions[region].region_size; +} + +static void +memif_msg_enq_add_ring (memif_if_t * mif, u8 index, u8 direction) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_add_ring_t *ar = &e->msg.add_ring; + memif_queue_t *mq; + + ASSERT ((mif->flags & MEMIF_IF_FLAG_IS_SLAVE) != 0); + + e->msg.type = MEMIF_MSG_TYPE_ADD_RING; + + if (direction == MEMIF_RING_M2S) + mq = vec_elt_at_index (mif->rx_queues, index); + else + mq = vec_elt_at_index (mif->tx_queues, index); + + e->fd = mq->int_fd; + ar->index = index; + ar->region = mq->region; + ar->offset = mq->offset; + ar->log2_ring_size = mq->log2_ring_size; + ar->flags = (direction == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0; +} + +static void +memif_msg_enq_connect (memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_connect_t *c = &e->msg.connect; + u8 *s; + + e->msg.type = MEMIF_MSG_TYPE_CONNECT; + e->fd = -1; + s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0); + strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name) - 1); + vec_free (s); +} + +static void +memif_msg_enq_connected (memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_connected_t *c = &e->msg.connected; + u8 *s; + + e->msg.type = MEMIF_MSG_TYPE_CONNECTED; + e->fd = -1; + s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0); + strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name) - 1); + vec_free (s); +} + +clib_error_t * +memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err) +{ + memif_msg_t msg = { 0 }; + msg.type = MEMIF_MSG_TYPE_DISCONNECT; + memif_msg_disconnect_t *d = &msg.disconnect; + + d->code = err->code; + strncpy ((char *) d->string, (char *) err->what, sizeof (d->string) - 1); + + return memif_msg_send (mif->conn_fd, &msg, -1); +} + +static clib_error_t * +memif_msg_receive_hello (memif_if_t * mif, memif_msg_t * msg) +{ + memif_msg_hello_t *h = &msg->hello; + + if (msg->hello.min_version > MEMIF_VERSION || + msg->hello.max_version < MEMIF_VERSION) + return clib_error_return (0, "incompatible protocol version"); + + mif->run.num_s2m_rings = clib_min (h->max_s2m_ring + 1, + mif->cfg.num_s2m_rings); + mif->run.num_m2s_rings = clib_min (h->max_m2s_ring + 1, + mif->cfg.num_m2s_rings); + mif->run.log2_ring_size = clib_min (h->max_log2_ring_size, + mif->cfg.log2_ring_size); + mif->run.buffer_size = mif->cfg.buffer_size; + + mif->remote_name = memif_str2vec (h->name, sizeof (h->name)); + + return 0; +} + +static clib_error_t * +memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, + clib_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + vec_elt_at_index (mm->socket_files, uf->private_data); + memif_msg_init_t *i = &msg->init; + memif_if_t *mif, tmp; + clib_error_t *err; + uword *p; + + if (i->version != MEMIF_VERSION) + { + memif_file_del_by_index (uf - file_main.file_pool); + return clib_error_return (0, "unsupported version"); + } + + p = mhash_get (&msf->dev_instance_by_id, &i->id); + + if (!p) + { + err = clib_error_return (0, "unmatched interface id"); + goto error; + } + + mif = vec_elt_at_index (mm->interfaces, p[0]); + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + { + err = clib_error_return (0, "cannot connect to slave"); + goto error; + } + + if (mif->conn_fd != -1) + { + err = clib_error_return (0, "already connected"); + goto error; + } + + if (i->mode != mif->mode) + { + err = clib_error_return (0, "mode mismatch"); + goto error; + } + + mif->conn_fd = uf->file_descriptor; + mif->conn_clib_file_index = uf - file_main.file_pool; + hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); + mif->remote_name = memif_str2vec (i->name, sizeof (i->name)); + *mifp = mif; + + if (mif->secret) + { + u8 *s; + int r; + s = memif_str2vec (i->secret, sizeof (i->secret)); + if (s == 0) + return clib_error_return (0, "secret required"); + + r = vec_cmp (s, mif->secret); + vec_free (s); + + if (r) + return clib_error_return (0, "incorrect secret"); + } + + return 0; + +error: + tmp.conn_fd = uf->file_descriptor; + memif_msg_send_disconnect (&tmp, err); + memif_file_del_by_index (uf - file_main.file_pool); + return err; +} + +static clib_error_t * +memif_msg_receive_add_region (memif_if_t * mif, memif_msg_t * msg, int fd) +{ + memif_msg_add_region_t *ar = &msg->add_region; + memif_region_t *mr; + if (fd < 0) + return clib_error_return (0, "missing memory region fd"); + + if (ar->index != vec_len (mif->regions)) + return clib_error_return (0, "unexpected region index"); + + if (ar->index > MEMIF_MAX_REGION) + return clib_error_return (0, "too many regions"); + + vec_validate_aligned (mif->regions, ar->index, CLIB_CACHE_LINE_BYTES); + mr = vec_elt_at_index (mif->regions, ar->index); + mr->fd = fd; + mr->region_size = ar->size; + + return 0; +} + +static clib_error_t * +memif_msg_receive_add_ring (memif_if_t * mif, memif_msg_t * msg, int fd) +{ + memif_msg_add_ring_t *ar = &msg->add_ring; + memif_queue_t *mq; + + if (fd < 0) + return clib_error_return (0, "missing ring interrupt fd"); + + if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) + { + if (ar->index != vec_len (mif->rx_queues)) + return clib_error_return (0, "unexpected ring index"); + + if (ar->index > MEMIF_MAX_S2M_RING) + return clib_error_return (0, "too many rings"); + + vec_validate_aligned (mif->rx_queues, ar->index, CLIB_CACHE_LINE_BYTES); + mq = vec_elt_at_index (mif->rx_queues, ar->index); + mif->run.num_s2m_rings = vec_len (mif->rx_queues); + } + else + { + if (ar->index != vec_len (mif->tx_queues)) + return clib_error_return (0, "unexpected ring index"); + + if (ar->index > MEMIF_MAX_M2S_RING) + return clib_error_return (0, "too many rings"); + + vec_validate_aligned (mif->tx_queues, ar->index, CLIB_CACHE_LINE_BYTES); + mq = vec_elt_at_index (mif->tx_queues, ar->index); + mif->run.num_m2s_rings = vec_len (mif->tx_queues); + } + + mq->int_fd = fd; + mq->int_clib_file_index = ~0; + mq->log2_ring_size = ar->log2_ring_size; + mq->region = ar->region; + mq->offset = ar->offset; + + return 0; +} + +static clib_error_t * +memif_msg_receive_connect (memif_if_t * mif, memif_msg_t * msg) +{ + clib_error_t *err; + memif_msg_connect_t *c = &msg->connect; + + if ((err = memif_connect (mif))) + return err; + + mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name)); + + return 0; +} + +static clib_error_t * +memif_msg_receive_connected (memif_if_t * mif, memif_msg_t * msg) +{ + clib_error_t *err; + memif_msg_connected_t *c = &msg->connected; + + if ((err = memif_connect (mif))) + return err; + + mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name)); + return 0; +} + +static clib_error_t * +memif_msg_receive_disconnect (memif_if_t * mif, memif_msg_t * msg) +{ + memif_msg_disconnect_t *d = &msg->disconnect; + + mif->remote_disc_string = memif_str2vec (d->string, sizeof (d->string)); + return clib_error_return (0, "disconnect received"); +} + +static clib_error_t * +memif_msg_receive (memif_if_t ** mifp, clib_file_t * uf) +{ + char ctl[CMSG_SPACE (sizeof (int)) + + CMSG_SPACE (sizeof (struct ucred))] = { 0 }; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + memif_msg_t msg = { 0 }; + ssize_t size; + clib_error_t *err = 0; + int fd = -1; + int i; + memif_if_t *mif = *mifp; + + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + + /* receive the incoming message */ + size = recvmsg (uf->file_descriptor, &mh, 0); + if (size != sizeof (memif_msg_t)) + { + return (size == 0) ? clib_error_return (0, "disconnected") : + clib_error_return_unix (0, + "recvmsg: malformed message received on fd %d", + uf->file_descriptor); + } + + if (mif == 0 && msg.type != MEMIF_MSG_TYPE_INIT) + { + memif_file_del (uf); + return clib_error_return (0, "unexpected message received"); + } + + /* process anciliary data */ + struct ucred *cr = 0; + struct cmsghdr *cmsg; + + cmsg = CMSG_FIRSTHDR (&mh); + while (cmsg) + { + if (cmsg->cmsg_level == SOL_SOCKET) + { + if (cmsg->cmsg_type == SCM_CREDENTIALS) + { + cr = (struct ucred *) CMSG_DATA (cmsg); + } + else if (cmsg->cmsg_type == SCM_RIGHTS) + { + int *fdp = (int *) CMSG_DATA (cmsg); + fd = *fdp; + } + } + cmsg = CMSG_NXTHDR (&mh, cmsg); + } + + DBG ("Message type %u received", msg.type); + /* process the message based on its type */ + switch (msg.type) + { + case MEMIF_MSG_TYPE_ACK: + break; + + case MEMIF_MSG_TYPE_HELLO: + if ((err = memif_msg_receive_hello (mif, &msg))) + return err; + if ((err = memif_init_regions_and_queues (mif))) + return err; + memif_msg_enq_init (mif); + memif_msg_enq_add_region (mif, 0); + vec_foreach_index (i, mif->tx_queues) + memif_msg_enq_add_ring (mif, i, MEMIF_RING_S2M); + vec_foreach_index (i, mif->rx_queues) + memif_msg_enq_add_ring (mif, i, MEMIF_RING_M2S); + memif_msg_enq_connect (mif); + break; + + case MEMIF_MSG_TYPE_INIT: + if ((err = memif_msg_receive_init (mifp, &msg, uf))) + return err; + mif = *mifp; + mif->remote_pid = cr->pid; + mif->remote_uid = cr->uid; + mif->remote_gid = cr->gid; + memif_msg_enq_ack (mif); + break; + + case MEMIF_MSG_TYPE_ADD_REGION: + if ((err = memif_msg_receive_add_region (mif, &msg, fd))) + return err; + memif_msg_enq_ack (mif); + break; + + case MEMIF_MSG_TYPE_ADD_RING: + if ((err = memif_msg_receive_add_ring (mif, &msg, fd))) + return err; + memif_msg_enq_ack (mif); + break; + + case MEMIF_MSG_TYPE_CONNECT: + if ((err = memif_msg_receive_connect (mif, &msg))) + return err; + memif_msg_enq_connected (mif); + break; + + case MEMIF_MSG_TYPE_CONNECTED: + if ((err = memif_msg_receive_connected (mif, &msg))) + return err; + break; + + case MEMIF_MSG_TYPE_DISCONNECT: + if ((err = memif_msg_receive_disconnect (mif, &msg))) + return err; + break; + + default: + err = clib_error_return (0, "unknown message type (0x%x)", msg.type); + return err; + } + + if (clib_fifo_elts (mif->msg_queue) && mif->conn_clib_file_index != ~0) + clib_file_set_data_available_to_write (&file_main, + mif->conn_clib_file_index, 1); + return 0; +} + +clib_error_t * +memif_master_conn_fd_read_ready (clib_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + uword *p; + memif_if_t *mif = 0; + uword conn_clib_file_index = ~0; + clib_error_t *err = 0; + + p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); + if (p) + { + mif = vec_elt_at_index (mm->interfaces, p[0]); + } + else + { + /* This is new connection, remove index from pending vector */ + int i; + vec_foreach_index (i, msf->pending_file_indices) + if (msf->pending_file_indices[i] == uf - file_main.file_pool) + { + conn_clib_file_index = msf->pending_file_indices[i]; + vec_del1 (msf->pending_file_indices, i); + break; + } + ASSERT (conn_clib_file_index != ~0); + } + err = memif_msg_receive (&mif, uf); + if (err) + { + memif_disconnect (mif, err); + clib_error_free (err); + } + return 0; +} + +clib_error_t * +memif_slave_conn_fd_read_ready (clib_file_t * uf) +{ + memif_main_t *mm = &memif_main; + clib_error_t *err; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + err = memif_msg_receive (&mif, uf); + if (err) + { + memif_disconnect (mif, err); + clib_error_free (err); + } + return 0; +} + +static clib_error_t * +memif_conn_fd_write_ready (clib_file_t * uf, memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_sub2 (mif->msg_queue, e); + clib_file_set_data_available_to_write (&file_main, + mif->conn_clib_file_index, 0); + memif_msg_send (mif->conn_fd, &e->msg, e->fd); + return 0; +} + +clib_error_t * +memif_master_conn_fd_write_ready (clib_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + uword *p; + memif_if_t *mif; + + p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); + if (!p) + return 0; + + mif = vec_elt_at_index (mm->interfaces, p[0]); + return memif_conn_fd_write_ready (uf, mif); +} + +clib_error_t * +memif_slave_conn_fd_write_ready (clib_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + return memif_conn_fd_write_ready (uf, mif); +} + +clib_error_t * +memif_slave_conn_fd_error (clib_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + clib_error_t *err; + + err = clib_error_return (0, "connection fd error"); + memif_disconnect (mif, err); + clib_error_free (err); + + return 0; +} + +clib_error_t * +memif_master_conn_fd_error (clib_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + uword *p; + + + p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); + if (p) + { + memif_if_t *mif; + clib_error_t *err; + mif = vec_elt_at_index (mm->interfaces, p[0]); + err = clib_error_return (0, "connection fd error"); + memif_disconnect (mif, err); + clib_error_free (err); + } + else + { + int i; + vec_foreach_index (i, msf->pending_file_indices) + if (msf->pending_file_indices[i] == uf - file_main.file_pool) + { + vec_del1 (msf->pending_file_indices, i); + memif_file_del (uf); + return 0; + } + } + + clib_warning ("Error on unknown file descriptor %d", uf->file_descriptor); + memif_file_del (uf); + return 0; +} + + +clib_error_t * +memif_conn_fd_accept_ready (clib_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + int addr_len; + struct sockaddr_un client; + int conn_fd; + clib_file_t template = { 0 }; + uword clib_file_index = ~0; + clib_error_t *err; + + + addr_len = sizeof (client); + conn_fd = accept (uf->file_descriptor, + (struct sockaddr *) &client, (socklen_t *) & addr_len); + + if (conn_fd < 0) + return clib_error_return_unix (0, "accept fd %d", uf->file_descriptor); + + template.read_function = memif_master_conn_fd_read_ready; + template.write_function = memif_master_conn_fd_write_ready; + template.error_function = memif_master_conn_fd_error; + template.file_descriptor = conn_fd; + template.private_data = uf->private_data; + + memif_file_add (&clib_file_index, &template); + + err = memif_msg_enq_hello (conn_fd); + if (err) + { + clib_error_report (err); + memif_file_del_by_index (clib_file_index); + } + else + vec_add1 (msf->pending_file_indices, clib_file_index); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |