From d51a1f6ffe80bcd8f44e72fa4a98ac70225ba519 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 5 Jun 2017 15:37:58 +0200 Subject: memif: complete refactor of socket handling code Change-Id: I4d41def83a23f13701f1ddcea722d481e4c85cbc Signed-off-by: Damjan Marion --- src/plugins/memif.am | 1 + src/plugins/memif/cli.c | 163 ++++-- src/plugins/memif/device.c | 69 +-- src/plugins/memif/memif.api | 17 +- src/plugins/memif/memif.c | 1144 ++++++++++++++-------------------------- src/plugins/memif/memif.h | 360 +++++-------- src/plugins/memif/memif_api.c | 52 +- src/plugins/memif/memif_test.c | 21 +- src/plugins/memif/node.c | 83 +-- src/plugins/memif/private.h | 296 +++++++++++ src/plugins/memif/socket.c | 736 ++++++++++++++++++++++++++ 11 files changed, 1796 insertions(+), 1146 deletions(-) create mode 100644 src/plugins/memif/private.h create mode 100644 src/plugins/memif/socket.c diff --git a/src/plugins/memif.am b/src/plugins/memif.am index bd01b2f8..15147e77 100644 --- a/src/plugins/memif.am +++ b/src/plugins/memif.am @@ -19,6 +19,7 @@ memif_plugin_la_SOURCES = memif/memif.c \ memif/cli.c \ memif/node.c \ memif/device.c \ + memif/socket.c \ memif/memif_plugin.api.h memif_test_plugin_la_SOURCES = \ diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c index 88c09e98..2d126aac 100644 --- a/src/plugins/memif/cli.c +++ b/src/plugins/memif/cli.c @@ -24,20 +24,7 @@ #include #include - -static uword -unformat_memif_queues (unformat_input_t * input, va_list * args) -{ - u32 *rx_queues = va_arg (*args, u32 *); - u32 *tx_queues = va_arg (*args, u32 *); - - if (unformat (input, "rx-queues %u", rx_queues)) - ; - if (unformat (input, "tx-queues %u", tx_queues)) - ; - - return 1; -} +#include static clib_error_t * memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -57,18 +44,23 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "key 0x%" PRIx64, &args.key)) + if (unformat (line_input, "id %u", &args.id)) ; else if (unformat (line_input, "socket %s", &args.socket_filename)) ; + else if (unformat (line_input, "secret %s", &args.secret)) + ; else if (unformat (line_input, "ring-size %u", &ring_size)) ; + else if (unformat (line_input, "rx-queues %u", &rx_queues)) + ; + else if (unformat (line_input, "tx-queues %u", &tx_queues)) + ; else if (unformat (line_input, "buffer-size %u", &args.buffer_size)) ; else if (unformat (line_input, "master")) args.is_master = 1; - else if (unformat (line_input, "slave %U", - unformat_memif_queues, &rx_queues, &tx_queues)) + else if (unformat (line_input, "slave")) args.is_master = 0; else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, args.hw_addr)) @@ -94,6 +86,9 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, r = memif_create_if (vm, &args); + vec_free (args.socket_filename); + vec_free (args.secret); + if (r <= VNET_API_ERROR_SYSCALL_ERROR_1 && r >= VNET_API_ERROR_SYSCALL_ERROR_10) return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); @@ -102,7 +97,7 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, return clib_error_return (0, "Invalid interface name"); if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - return clib_error_return (0, "Interface already exists"); + return clib_error_return (0, "Interface with same id already exists"); return 0; } @@ -110,9 +105,9 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (memif_create_command, static) = { .path = "create memif", - .short_help = "create memif [key ] [socket ] " + .short_help = "create memif [id ] [socket ] " "[ring-size ] [buffer-size ] [hw-addr ] " - "] [tx-queues ]>", + " [rx-queues ] [tx-queues ]", .function = memif_create_command_fn, }; /* *INDENT-ON* */ @@ -122,8 +117,11 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; - u64 key = 0; - u8 key_defined = 0; + u32 sw_if_index = ~0; + vnet_hw_interface_t *hw; + memif_main_t *mm = &memif_main; + memif_if_t *mif; + vnet_main_t *vnm = vnet_get_main (); /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -131,18 +129,27 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "key 0x%" PRIx64, &key)) - key_defined = 1; + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); } unformat_free (line_input); - if (!key_defined) - return clib_error_return (0, "missing key"); + if (sw_if_index == ~0) + return clib_error_return (0, + "please specify interface name or sw_if_index"); + + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hw == NULL || memif_device_class.index != hw->dev_class_index) + return clib_error_return (0, "not a memif interface"); - memif_delete_if (vm, key); + mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + memif_delete_if (vm, mif); return 0; } @@ -150,11 +157,59 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (memif_delete_command, static) = { .path = "delete memif", - .short_help = "delete memif key ", + .short_help = "delete memif { | sw_if_index }", .function = memif_delete_command_fn, }; /* *INDENT-ON* */ +static u8 * +format_memif_if_flags (u8 * s, va_list * args) +{ + u32 flags = va_arg (*args, u32); +#define _(a,b,c) if ( flags & (1 << a)) s = format (s, " %s", c); + foreach_memif_if_flag +#undef _ + return s; +} + +static u8 * +format_memif_if_mode (u8 * s, va_list * args) +{ + memif_if_t *mif = va_arg (*args, memif_if_t *); + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) + return format (s, "ethernet"); + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + return format (s, "ip"); + if (mif->mode == MEMIF_INTERFACE_MODE_PUNT_INJECT) + return format (s, "punt-inject"); + return format (s, "unknown mode (%u)", mif->mode);; +} + +static u8 * +format_memif_queue (u8 * s, va_list * args) +{ + memif_if_t *mif = va_arg (*args, memif_if_t *); + memif_queue_t *mq = va_arg (*args, memif_queue_t *); + uword i = va_arg (*args, uword); + uword indent = format_get_indent (s); + + s = format (s, "%U%s ring %u:\n", + format_white_space, indent, + (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) ? + "slave-to-master" : "master-to-slave", i); + s = format (s, "%Uregion %u offset %u ring-size %u int-fd %d\n", + format_white_space, indent + 4, + mq->region, mq->offset, (1 << mq->log2_ring_size), mq->int_fd); + + if (mq->ring) + s = format (s, "%Uhead %u tail %u flags 0x%04x interrupts %u\n", + format_white_space, indent + 4, + mq->ring->head, mq->ring->tail, mq->ring->flags, + mq->int_count); + + return s; +} + static clib_error_t * memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -162,39 +217,43 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, memif_main_t *mm = &memif_main; memif_if_t *mif; vnet_main_t *vnm = vnet_get_main (); - int i; + memif_queue_t *mq; + uword i; /* *INDENT-OFF* */ pool_foreach (mif, mm->interfaces, ({ + memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, + mif->socket_file_index); vlib_cli_output (vm, "interface %U", format_vnet_sw_if_index_name, vnm, mif->sw_if_index); - vlib_cli_output (vm, " key 0x%" PRIx64 " file %s", mif->key, - mif->socket_filename); - vlib_cli_output (vm, " listener %d conn-fd %d int-fd %d", mif->listener_index, - mif->connection.fd, mif->interrupt_line.fd); - vlib_cli_output (vm, " ring-size %u num-s2m-rings %u num-m2s-rings %u buffer_size %u", - (1 << mif->log2_ring_size), - mif->num_s2m_rings, - mif->num_m2s_rings, - mif->buffer_size); - for (i=0; i < mif->num_s2m_rings; i++) + if (mif->remote_name) + vlib_cli_output (vm, " remote-name \"%s\"", mif->remote_name); + if (mif->remote_if_name) + vlib_cli_output (vm, " remote-interface \"%s\"", mif->remote_if_name); + vlib_cli_output (vm, " id %d mode %U file %s", mif->id, + format_memif_if_mode, mif, msf->filename); + vlib_cli_output (vm, " flags%U", format_memif_if_flags, mif->flags); + vlib_cli_output (vm, " listener-fd %d conn-fd %d", msf->fd, mif->conn_fd); + vlib_cli_output (vm, " num-s2m-rings %u num-m2s-rings %u buffer-size %u", + mif->run.num_s2m_rings, + mif->run.num_m2s_rings, + mif->run.buffer_size); + + if (mif->local_disc_string) + vlib_cli_output (vm, " local-disc-reason \"%s\"", mif->local_disc_string); + if (mif->remote_disc_string) + vlib_cli_output (vm, " remote-disc-reason \"%s\"", mif->remote_disc_string); + + vec_foreach_index (i, mif->tx_queues) { - memif_ring_t * ring = memif_get_ring (mif, MEMIF_RING_S2M, i); - if (ring) - { - vlib_cli_output (vm, " slave-to-master ring %u:", i); - vlib_cli_output (vm, " head %u tail %u", ring->head, ring->tail); - } + mq = vec_elt_at_index (mif->tx_queues, i); + vlib_cli_output (vm, " %U", format_memif_queue, mif, mq, i); } - for (i=0; i < mif->num_m2s_rings; i++) + vec_foreach_index (i, mif->rx_queues) { - memif_ring_t * ring = memif_get_ring (mif, MEMIF_RING_M2S, i); - if (ring) - { - vlib_cli_output (vm, " master-to-slave ring %u:", i); - vlib_cli_output (vm, " head %u tail %u", ring->head, ring->tail); - } + mq = vec_elt_at_index (mif->rx_queues, i); + vlib_cli_output (vm, " %U", format_memif_queue, mif, mq, i); } })); /* *INDENT-ON* */ diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index f496b17d..870dd354 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -26,6 +26,7 @@ #include #include +#include #define foreach_memif_tx_func_error \ _(NO_FREE_SLOTS, "no free tx slots") \ @@ -45,8 +46,7 @@ static char *memif_tx_func_error_strings[] = { #undef _ }; - -static u8 * +u8 * format_memif_device_name (u8 * s, va_list * args) { u32 i = va_arg (*args, u32); @@ -91,27 +91,30 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, memif_if_t * mif, memif_ring_type_t type) { - u8 rid; + u8 qid; memif_ring_t *ring; u32 *buffers = vlib_frame_args (frame); u32 n_left = frame->n_vectors; - u16 ring_size = 1 << mif->log2_ring_size; - u16 mask = ring_size - 1; + u16 ring_size, mask; u16 head, tail; u16 free_slots; u32 thread_index = vlib_get_thread_index (); - u8 tx_queues = memif_get_tx_queues (mif); + u8 tx_queues = vec_len (mif->tx_queues); + memif_queue_t *mq; if (tx_queues < vec_len (vlib_mains)) { - rid = thread_index % tx_queues; + qid = thread_index % tx_queues; clib_spinlock_lock_if_init (&mif->lockp); } else { - rid = thread_index; + qid = thread_index; } - ring = memif_get_ring (mif, type, rid); + mq = vec_elt_at_index (mif->tx_queues, qid); + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; /* free consumed buffers */ @@ -214,10 +217,11 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); - if (mif->interrupt_line.fd > 0) + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1) { - u8 b = rid; - CLIB_UNUSED (int r) = write (mif->interrupt_line.fd, &b, sizeof (b)); + u64 b = 1; + CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b)); + mq->int_count++; } return frame->n_vectors; @@ -262,35 +266,35 @@ memif_clear_hw_interface_counters (u32 instance) /* Nothing for now */ } +static clib_error_t * +memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, + vnet_hw_interface_rx_mode mode) +{ + memif_main_t *mm = &memif_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT; + else + mq->ring->flags &= ~MEMIF_RING_FLAG_MASK_INT; + + return 0; +} + static clib_error_t * memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { - memif_main_t *apm = &memif_main; - vlib_main_t *vm = vlib_get_main (); - memif_msg_t msg = { 0 }; + memif_main_t *mm = &memif_main; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - memif_if_t *mif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); static clib_error_t *error = 0; if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) mif->flags |= MEMIF_IF_FLAG_ADMIN_UP; else - { - mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP; - if (!(mif->flags & MEMIF_IF_FLAG_DELETING) - && mif->connection.index != ~0) - { - msg.version = MEMIF_VERSION; - msg.type = MEMIF_MSG_TYPE_DISCONNECT; - if (send (mif->connection.fd, &msg, sizeof (msg), 0) < 0) - { - clib_unix_warning ("Failed to send disconnect request"); - error = clib_error_return_unix (0, "send fd %d", - mif->connection.fd); - memif_disconnect (vm, mif); - } - } - } + mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP; return error; } @@ -317,6 +321,7 @@ VNET_DEVICE_CLASS (memif_device_class) = { .clear_counters = memif_clear_hw_interface_counters, .admin_up_down_function = memif_interface_admin_up_down, .subif_add_del_function = memif_subif_add_del_function, + .rx_mode_change_function = memif_interface_rx_mode_change, }; VLIB_DEVICE_TX_FUNCTION_MULTIARCH(memif_device_class, diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index b0a351aa..c9632d10 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -17,9 +17,10 @@ @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param role - role of the interface in the connection (master/slave) + @param mode - interface mode @param rx_queues - number of rx queues (only valid for slave) #param tx_queues - number of tx queues (only valid for slave) - @param key - 64bit integer used to authenticate and match opposite sides + @param id - 32bit integer used to authenticate and match opposite sides of the connection @param socket_filename - filename of the socket to be used for connection establishment @@ -33,12 +34,12 @@ define memif_create u32 context; u8 role; /* 0 = master, 1 = slave */ + u8 mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */ u8 rx_queues; /* optional, default is 1 */ u8 tx_queues; /* optional, default is 1 */ - u64 key; /* optional, default is 0 */ - u8 socket_filename[128]; /* optional, default is "/var/vpp/memif.sock" - and can be changed in VPP startup config */ - + u32 id; /* optional, default is 0 */ + u8 socket_filename[128]; /* optional, default is "/var/vpp/memif.sock" */ + u8 secret[24]; /* optional, default is "" */ u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */ u16 buffer_size; /* optional, default is 2048 bytes */ u8 hw_addr[6]; /* optional, randomly generated if not defined */ @@ -74,8 +75,9 @@ autoreply define memif_delete @param sw_if_index - index of the interface @param if_name - name of the interface @param hw_addr - interface MAC address - @param key - key associated with the interface + @param id - id associated with the interface @param role - role of the interface in the connection (master/slave) + @param mode - interface mode @param socket_filename - name of the socket used by this interface to establish new connections @param ring_size - the number of entries of RX/TX rings @@ -93,8 +95,9 @@ define memif_details u8 hw_addr[6]; /* memif specific parameters */ - u64 key; + u32 id; u8 role; /* 0 = master, 1 = slave */ + u8 mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */ u8 socket_filename[128]; u32 ring_size; u16 buffer_size; /* optional, default is 2048 bytes */ diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 7d2a09a2..f082b58c 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -1,6 +1,6 @@ /* *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2017 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -15,6 +15,7 @@ *------------------------------------------------------------------ */ + #define _GNU_SOURCE #include #include @@ -26,7 +27,9 @@ #include #include #include +#include #include +#include #include #include @@ -34,21 +37,10 @@ #include #include #include - -#define MEMIF_DEBUG 1 - -#if MEMIF_DEBUG == 1 -#define DEBUG_LOG(...) clib_warning(__VA_ARGS__) -#define DEBUG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__) -#else -#define DEBUG_LOG(...) -#endif +#include memif_main_t memif_main; -static clib_error_t *memif_conn_fd_read_ready (unix_file_t * uf); -static clib_error_t *memif_int_fd_read_ready (unix_file_t * uf); - static u32 memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) { @@ -57,604 +49,299 @@ memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) } static void -memif_remove_pending_conn (memif_pending_conn_t * pending_conn) -{ - memif_main_t *mm = &memif_main; - - unix_file_del (&unix_main, - unix_main.file_pool + pending_conn->connection.index); - pool_put (mm->pending_conns, pending_conn); -} - -static void -memif_connect (vlib_main_t * vm, memif_if_t * mif) +memif_queue_intfd_close (memif_queue_t * mq) { - vnet_main_t *vnm = vnet_get_main (); - int num_rings = mif->num_s2m_rings + mif->num_m2s_rings; - memif_ring_data_t *rd = NULL; - vnet_hw_interface_t *hw; - u8 rid, rx_queues; - int ret; - - vec_validate_aligned (mif->ring_data, num_rings - 1, CLIB_CACHE_LINE_BYTES); - vec_foreach (rd, mif->ring_data) - { - rd->last_head = 0; - } - - mif->flags &= ~MEMIF_IF_FLAG_CONNECTING; - mif->flags |= MEMIF_IF_FLAG_CONNECTED; - vnet_hw_interface_set_flags (vnm, mif->hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); - - hw = vnet_get_hw_interface (vnm, mif->hw_if_index); - hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; - vnet_hw_interface_set_input_node (vnm, mif->hw_if_index, - memif_input_node.index); - rx_queues = memif_get_rx_queues (mif); - for (rid = 0; rid < rx_queues; rid++) + if (mq->int_unix_file_index != ~0) { - vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, rid, ~0); - ret = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, rid, - VNET_HW_INTERFACE_RX_MODE_INTERRUPT); - if (ret) - DEBUG_LOG ("Warning: unable to set rx mode for interface %d " - "queue %d: rc=%d", mif->hw_if_index, rid, ret); + memif_file_del_by_index (mq->int_unix_file_index); + mq->int_unix_file_index = ~0; + mq->int_fd = -1; } -} - -static void -memif_disconnect_do (vlib_main_t * vm, memif_if_t * mif) -{ - vnet_main_t *vnm = vnet_get_main (); - u8 rid, rx_queues; - int rv; - memif_shm_t **shm; - - mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING); - if (mif->hw_if_index != ~0) - vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); - - if (mif->connection.index != ~0) + else if (mq->int_fd > -1) { - unix_file_del (&unix_main, unix_main.file_pool + mif->connection.index); - mif->connection.index = ~0; - mif->connection.fd = -1; /* closed in unix_file_del */ + close (mq->int_fd); + mq->int_fd = -1; } - - rx_queues = memif_get_rx_queues (mif); - for (rid = 0; rid < rx_queues; rid++) - { - rv = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, rid); - if (rv) - DEBUG_LOG ("Warning: unable to unassign interface %d, " - "queue %d: rc=%d", mif->hw_if_index, rid, rv); - } - - shm = (memif_shm_t **) mif->regions; - rv = munmap ((void *) *shm, mif->shared_mem_size); - if (rv) - DEBUG_UNIX_LOG ("Error: failed munmap call"); - - vec_free (mif->regions); } void -memif_disconnect (vlib_main_t * vm, memif_if_t * mif) -{ - if (mif->interrupt_line.index != ~0) - { - unix_file_del (&unix_main, - unix_main.file_pool + mif->interrupt_line.index); - mif->interrupt_line.index = ~0; - mif->interrupt_line.fd = -1; /* closed in unix_file_del */ - } - - memif_disconnect_do (vm, mif); -} - -static clib_error_t * -memif_process_connect_req (memif_pending_conn_t * pending_conn, - memif_msg_t * req, struct ucred *slave_cr, - int shm_fd, int int_fd) +memif_disconnect (memif_if_t * mif, clib_error_t * err) { - memif_main_t *mm = &memif_main; - vlib_main_t *vm = vlib_get_main (); - int fd = pending_conn->connection.fd; - unix_file_t *uf = 0; - memif_if_t *mif = 0; - memif_msg_t resp = { 0 }; - unix_file_t template = { 0 }; - void *shm; - uword *p; - u8 retval = 0; - static clib_error_t *error = 0; - - if (shm_fd == -1) - { - DEBUG_LOG - ("Connection request is missing shared memory file descriptor"); - retval = 1; - goto response; - } - - if (int_fd == -1) - { - DEBUG_LOG - ("Connection request is missing interrupt line file descriptor"); - retval = 2; - goto response; - } + vnet_main_t *vnm = vnet_get_main (); + memif_region_t *mr; + memif_queue_t *mq; + int i; - if (slave_cr == NULL) - { - DEBUG_LOG ("Connection request is missing slave credentials"); - retval = 3; - goto response; - } + if (mif == 0) + return; - p = mhash_get (&mm->if_index_by_key, &req->key); - if (!p) - { - DEBUG_LOG - ("Connection request with unmatched key (0x%" PRIx64 ")", req->key); - retval = 4; - goto response; - } + DBG ("disconnect %u (%v)", mif->dev_instance, err ? err->what : 0); - mif = vec_elt_at_index (mm->interfaces, *p); - if (mif->listener_index != pending_conn->listener_index) + if (err) { - DEBUG_LOG - ("Connection request with non-matching listener (%d vs. %d)", - pending_conn->listener_index, mif->listener_index); - retval = 5; - goto response; + clib_error_t *e = 0; + mif->local_disc_string = vec_dup (err->what); + if (mif->conn_fd > -1) + e = memif_msg_send_disconnect (mif, err); + clib_error_free (e); } - if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - { - DEBUG_LOG ("Memif slave does not accept connection requests"); - retval = 6; - goto response; - } + /* set interface down */ + mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING); + if (mif->hw_if_index != ~0) + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); - if (mif->connection.fd != -1) + /* close connection socket */ + if (mif->conn_unix_file_index != ~0) { - DEBUG_LOG - ("Memif with key 0x%" PRIx64 " is already connected", mif->key); - retval = 7; - goto response; + memif_file_del_by_index (mif->conn_unix_file_index); + mif->conn_unix_file_index = ~0; } + else if (mif->conn_fd > -1) + close (mif->conn_fd); + mif->conn_fd = -1; - if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) == 0) - { - /* just silently decline the request */ - retval = 8; - goto response; - } + vec_foreach_index (i, mif->rx_queues) + { + mq = vec_elt_at_index (mif->rx_queues, i); + if (mq->ring) + { + int rv; + rv = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, i); + if (rv) + DBG ("Warning: unable to unassign interface %d, " + "queue %d: rc=%d", mif->hw_if_index, i, rv); + mq->ring = 0; + } + } - if (req->shared_mem_size < sizeof (memif_shm_t)) - { - DEBUG_LOG - ("Unexpectedly small shared memory segment received from slave."); - retval = 9; - goto response; - } + /* free tx and rx queues */ + vec_foreach (mq, mif->rx_queues) memif_queue_intfd_close (mq); + vec_free (mif->rx_queues); - if ((shm = - mmap (NULL, req->shared_mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, - shm_fd, 0)) == MAP_FAILED) - { - DEBUG_UNIX_LOG - ("Failed to map shared memory segment received from slave memif"); - error = clib_error_return_unix (0, "mmap fd %d", shm_fd); - retval = 10; - goto response; - } + vec_foreach (mq, mif->tx_queues) memif_queue_intfd_close (mq); + vec_free (mif->tx_queues); - if (((memif_shm_t *) shm)->cookie != 0xdeadbeef) - { - DEBUG_LOG - ("Possibly corrupted shared memory segment received from slave memif"); - munmap (shm, req->shared_mem_size); - retval = 11; - goto response; - } + /* free memory regions */ + vec_foreach (mr, mif->regions) + { + int rv; + if ((rv = munmap (mr->shm, mr->region_size))) + clib_warning ("munmap failed, rv = %d", rv); + if (mr->fd > -1) + close (mr->fd); + } + vec_free (mif->regions); - mif->shared_mem_size = req->shared_mem_size; - mif->log2_ring_size = req->log2_ring_size; - mif->num_s2m_rings = req->num_s2m_rings; - mif->num_m2s_rings = req->num_m2s_rings; - mif->buffer_size = req->buffer_size; - mif->remote_pid = slave_cr->pid; - mif->remote_uid = slave_cr->uid; - vec_add1 (mif->regions, shm); - - /* register interrupt line */ - mif->interrupt_line.fd = int_fd; - template.read_function = memif_int_fd_read_ready; - template.file_descriptor = int_fd; - template.private_data = mif->if_index; - mif->interrupt_line.index = unix_file_add (&unix_main, &template); - - /* change context for future messages */ - uf = vec_elt_at_index (unix_main.file_pool, pending_conn->connection.index); - uf->private_data = mif->if_index << 1; - mif->connection = pending_conn->connection; - pool_put (mm->pending_conns, pending_conn); - pending_conn = 0; - - memif_connect (vm, mif); - -response: - resp.version = MEMIF_VERSION; - resp.type = MEMIF_MSG_TYPE_CONNECT_RESP; - resp.retval = retval; - if (send (fd, &resp, sizeof (resp), 0) < 0) - { - DEBUG_UNIX_LOG ("Failed to send connection response"); - error = clib_error_return_unix (0, "send fd %d", fd); - if (pending_conn) - memif_remove_pending_conn (pending_conn); - else - memif_disconnect (vm, mif); - } - if (retval > 0) - { - if (shm_fd >= 0) - close (shm_fd); - if (int_fd >= 0) - close (int_fd); - } - return error; + mif->remote_pid = 0; + vec_free (mif->remote_name); + vec_free (mif->remote_if_name); + clib_fifo_free (mif->msg_queue); } static clib_error_t * -memif_process_connect_resp (memif_if_t * mif, memif_msg_t * resp) +memif_int_fd_read_ready (unix_file_t * uf) { - vlib_main_t *vm = vlib_get_main (); - - if ((mif->flags & MEMIF_IF_FLAG_IS_SLAVE) == 0) - { - DEBUG_LOG ("Memif master does not accept connection responses"); - return 0; - } + memif_main_t *mm = &memif_main; + vnet_main_t *vnm = vnet_get_main (); + u16 qid = uf->private_data & 0xFFFF; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 16); + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + u64 b; + ssize_t size; - if ((mif->flags & MEMIF_IF_FLAG_CONNECTING) == 0) + size = read (uf->file_descriptor, &b, sizeof (b)); + if (size < 0) { - DEBUG_LOG ("Unexpected connection response"); + DBG_UNIX_LOG ("Failed to read from socket"); return 0; } - if (resp->retval == 0) - memif_connect (vm, mif); - else - memif_disconnect (vm, mif); + vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, qid); + mq->int_count++; return 0; } -static clib_error_t * -memif_conn_fd_read_ready (unix_file_t * uf) -{ - memif_main_t *mm = &memif_main; - vlib_main_t *vm = vlib_get_main (); - memif_if_t *mif = 0; - memif_pending_conn_t *pending_conn = 0; - int fd_array[2] = { -1, -1 }; - char ctl[CMSG_SPACE (sizeof (fd_array)) + - CMSG_SPACE (sizeof (struct ucred))] = { 0 }; - struct msghdr mh = { 0 }; - struct iovec iov[1]; - struct ucred *cr = 0; - memif_msg_t msg = { 0 }; - struct cmsghdr *cmsg; - ssize_t size; - static clib_error_t *error = 0; - - iov[0].iov_base = (void *) &msg; - iov[0].iov_len = sizeof (memif_msg_t); - mh.msg_iov = iov; - mh.msg_iovlen = 1; - mh.msg_control = ctl; - mh.msg_controllen = sizeof (ctl); - - /* grab the appropriate context */ - if (uf->private_data & 1) - pending_conn = vec_elt_at_index (mm->pending_conns, - uf->private_data >> 1); - else - mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 1); - - /* Stop workers to avoid end of the world */ - vlib_worker_thread_barrier_sync (vlib_get_main ()); - /* receive the incoming message */ - size = recvmsg (uf->file_descriptor, &mh, 0); - if (size != sizeof (memif_msg_t)) - { - if (size == 0) - { - if (pending_conn) - memif_remove_pending_conn (pending_conn); - else - memif_disconnect_do (vm, mif); - goto return_ok; - } +clib_error_t * +memif_connect (memif_if_t * mif) +{ + vnet_main_t *vnm = vnet_get_main (); + unix_file_t template = { 0 }; + memif_region_t *mr; + int i; - DEBUG_UNIX_LOG ("Malformed message received on fd %d", - uf->file_descriptor); - error = clib_error_return_unix (0, "recvmsg fd %d", - uf->file_descriptor); - goto disconnect; - } + DBG ("connect %u", mif->dev_instance); - /* check version of the sender's memif plugin */ - if (msg.version != MEMIF_VERSION) - { - DEBUG_LOG ("Memif version mismatch"); - goto disconnect; - } + vec_free (mif->local_disc_string); + vec_free (mif->remote_disc_string); - /* process the message based on its type */ - switch (msg.type) - { - case MEMIF_MSG_TYPE_CONNECT_REQ: - if (pending_conn == 0) - DEBUG_LOG ("Received unexpected connection request"); - else - { - /* Read anciliary data */ - cmsg = CMSG_FIRSTHDR (&mh); - while (cmsg) - { - if (cmsg->cmsg_level == SOL_SOCKET - && cmsg->cmsg_type == SCM_CREDENTIALS) - { - cr = (struct ucred *) CMSG_DATA (cmsg); - } - else if (cmsg->cmsg_level == SOL_SOCKET - && cmsg->cmsg_type == SCM_RIGHTS) - { - memcpy (fd_array, CMSG_DATA (cmsg), sizeof (fd_array)); - } - cmsg = CMSG_NXTHDR (&mh, cmsg); - } - error = memif_process_connect_req (pending_conn, &msg, cr, - fd_array[0], fd_array[1]); - } - break; + vec_foreach (mr, mif->regions) + { + if (mr->shm) + continue; - case MEMIF_MSG_TYPE_CONNECT_RESP: - if (mif == 0) - DEBUG_LOG ("Received unexpected connection response"); - else - error = memif_process_connect_resp (mif, &msg); - break; + if (mr->fd < 0) + clib_error_return (0, "no memory region fd"); - case MEMIF_MSG_TYPE_DISCONNECT: - goto disconnect; + if ((mr->shm = mmap (NULL, mr->region_size, PROT_READ | PROT_WRITE, + MAP_SHARED, mr->fd, 0)) == MAP_FAILED) + return clib_error_return_unix (0, "mmap"); + } - default: - DEBUG_LOG ("Received unknown message type"); - goto disconnect; - } + template.read_function = memif_int_fd_read_ready; -return_ok: - vlib_worker_thread_barrier_release (vlib_get_main ()); - return error; + vec_foreach_index (i, mif->tx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); -disconnect: - if (pending_conn) - memif_remove_pending_conn (pending_conn); - else - memif_disconnect (vm, mif); - vlib_worker_thread_barrier_release (vlib_get_main ()); - return error; -} + mq->ring = mif->regions[mq->region].shm + mq->offset; + if (mq->ring->cookie != MEMIF_COOKIE) + return clib_error_return (0, "wrong cookie on tx ring %u", i); + } -static clib_error_t * -memif_int_fd_read_ready (unix_file_t * uf) -{ - memif_main_t *mm = &memif_main; - vnet_main_t *vnm = vnet_get_main (); - memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); - u8 b; - ssize_t size; + vec_foreach_index (i, mif->rx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); + int rv; + + mq->ring = mif->regions[mq->region].shm + mq->offset; + if (mq->ring->cookie != MEMIF_COOKIE) + return clib_error_return (0, "wrong cookie on tx ring %u", i); + + if (mq->int_fd > -1) + { + template.file_descriptor = mq->int_fd; + template.private_data = (mif->dev_instance << 16) | (i & 0xFFFF); + memif_file_add (&mq->int_unix_file_index, &template); + } + vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0); + rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i, + VNET_HW_INTERFACE_RX_MODE_INTERRUPT); + if (rv) + clib_warning + ("Warning: unable to set rx mode for interface %d queue %d: " + "rc=%d", mif->hw_if_index, i, rv); + } - size = read (uf->file_descriptor, &b, sizeof (b)); - if (0 == size) - { - /* interrupt line was disconnected */ - unix_file_del (&unix_main, - unix_main.file_pool + mif->interrupt_line.index); - mif->interrupt_line.index = ~0; - mif->interrupt_line.fd = -1; - } - else if (size < 0) - DEBUG_UNIX_LOG ("Failed to read from socket"); - else - vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, b); + mif->flags &= ~MEMIF_IF_FLAG_CONNECTING; + mif->flags |= MEMIF_IF_FLAG_CONNECTED; + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); return 0; } -static clib_error_t * -memif_conn_fd_accept_ready (unix_file_t * uf) +static_always_inline memif_ring_t * +memif_get_ring (memif_if_t * mif, memif_ring_type_t type, u16 ring_num) { - memif_main_t *mm = &memif_main; - memif_listener_t *listener = 0; - memif_pending_conn_t *pending_conn = 0; - int addr_len; - struct sockaddr_un client; - int conn_fd; - unix_file_t template = { 0 }; - - listener = pool_elt_at_index (mm->listeners, uf->private_data); - - addr_len = sizeof (client); - conn_fd = accept (uf->file_descriptor, - (struct sockaddr *) &client, (socklen_t *) & addr_len); - - if (conn_fd < 0) - return clib_error_return_unix (0, "accept fd %d", uf->file_descriptor); - - pool_get (mm->pending_conns, pending_conn); - pending_conn->index = pending_conn - mm->pending_conns; - pending_conn->listener_index = listener->index; - pending_conn->connection.fd = conn_fd; - - template.read_function = memif_conn_fd_read_ready; - template.file_descriptor = conn_fd; - template.private_data = (pending_conn->index << 1) | 1; - pending_conn->connection.index = unix_file_add (&unix_main, &template); - - return 0; + if (vec_len (mif->regions) == 0) + return NULL; + void *p = mif->regions[0].shm; + int ring_size = + sizeof (memif_ring_t) + + sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size); + p += (ring_num + type * mif->run.num_s2m_rings) * ring_size; + + return (memif_ring_t *) p; } -static void -memif_connect_master (vlib_main_t * vm, memif_if_t * mif) +clib_error_t * +memif_init_regions_and_queues (memif_if_t * mif) { - memif_msg_t msg; - struct msghdr mh = { 0 }; - struct iovec iov[1]; - struct cmsghdr *cmsg; - int mfd = -1; - int rv; - int fd_array[2] = { -1, -1 }; - char ctl[CMSG_SPACE (sizeof (fd_array))]; memif_ring_t *ring = NULL; int i, j; - void *shm = 0; u64 buffer_offset; - unix_file_t template = { 0 }; + memif_region_t *r; - msg.version = MEMIF_VERSION; - msg.type = MEMIF_MSG_TYPE_CONNECT_REQ; - msg.key = mif->key; - msg.log2_ring_size = mif->log2_ring_size; - msg.num_s2m_rings = mif->num_s2m_rings; - msg.num_m2s_rings = mif->num_m2s_rings; - msg.buffer_size = mif->buffer_size; + vec_validate_aligned (mif->regions, 0, CLIB_CACHE_LINE_BYTES); + r = vec_elt_at_index (mif->regions, 0); - buffer_offset = sizeof (memif_shm_t) + - (mif->num_s2m_rings + mif->num_m2s_rings) * + buffer_offset = (mif->run.num_s2m_rings + mif->run.num_m2s_rings) * (sizeof (memif_ring_t) + - sizeof (memif_desc_t) * (1 << mif->log2_ring_size)); + sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size)); - msg.shared_mem_size = buffer_offset + - mif->buffer_size * (1 << mif->log2_ring_size) * (mif->num_s2m_rings + - mif->num_m2s_rings); - - if ((mfd = memfd_create ("shared mem", MFD_ALLOW_SEALING)) == -1) - { - DEBUG_LOG ("Failed to create anonymous file"); - goto error; - } + r->region_size = buffer_offset + + mif->run.buffer_size * (1 << mif->run.log2_ring_size) * + (mif->run.num_s2m_rings + mif->run.num_m2s_rings); - if ((fcntl (mfd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - DEBUG_UNIX_LOG ("Failed to seal an anonymous file off from truncating"); - goto error; - } + if ((r->fd = memfd_create ("memif region 0", MFD_ALLOW_SEALING)) == -1) + return clib_error_return_unix (0, "memfd_create"); - if ((ftruncate (mfd, msg.shared_mem_size)) == -1) - { - DEBUG_UNIX_LOG ("Failed to extend the size of an anonymous file"); - goto error; - } + if ((fcntl (r->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + return clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); - if ((shm = mmap (NULL, msg.shared_mem_size, PROT_READ | PROT_WRITE, - MAP_SHARED, mfd, 0)) == MAP_FAILED) - { - DEBUG_UNIX_LOG ("Failed to map anonymous file into memory"); - goto error; - } + if ((ftruncate (r->fd, r->region_size)) == -1) + return clib_error_return_unix (0, "ftruncate"); - mif->shared_mem_size = msg.shared_mem_size; - vec_add1 (mif->regions, shm); - ((memif_shm_t *) mif->regions[0])->cookie = 0xdeadbeef; + if ((r->shm = mmap (NULL, r->region_size, PROT_READ | PROT_WRITE, + MAP_SHARED, r->fd, 0)) == MAP_FAILED) + return clib_error_return_unix (0, "mmap"); - for (i = 0; i < mif->num_s2m_rings; i++) + for (i = 0; i < mif->run.num_s2m_rings; i++) { ring = memif_get_ring (mif, MEMIF_RING_S2M, i); ring->head = ring->tail = 0; - for (j = 0; j < (1 << mif->log2_ring_size); j++) + ring->cookie = MEMIF_COOKIE; + for (j = 0; j < (1 << mif->run.log2_ring_size); j++) { - u16 slot = i * (1 << mif->log2_ring_size) + j; + u16 slot = i * (1 << mif->run.log2_ring_size) + j; ring->desc[j].region = 0; ring->desc[j].offset = - buffer_offset + (u32) (slot * mif->buffer_size); - ring->desc[j].buffer_length = mif->buffer_size; + buffer_offset + (u32) (slot * mif->run.buffer_size); + ring->desc[j].buffer_length = mif->run.buffer_size; } } - for (i = 0; i < mif->num_m2s_rings; i++) + for (i = 0; i < mif->run.num_m2s_rings; i++) { ring = memif_get_ring (mif, MEMIF_RING_M2S, i); ring->head = ring->tail = 0; - for (j = 0; j < (1 << mif->log2_ring_size); j++) + ring->cookie = MEMIF_COOKIE; + for (j = 0; j < (1 << mif->run.log2_ring_size); j++) { u16 slot = - (i + mif->num_s2m_rings) * (1 << mif->log2_ring_size) + j; + (i + mif->run.num_s2m_rings) * (1 << mif->run.log2_ring_size) + j; ring->desc[j].region = 0; ring->desc[j].offset = - buffer_offset + (u32) (slot * mif->buffer_size); - ring->desc[j].buffer_length = mif->buffer_size; + buffer_offset + (u32) (slot * mif->run.buffer_size); + ring->desc[j].buffer_length = mif->run.buffer_size; } } - iov[0].iov_base = (void *) &msg; - iov[0].iov_len = sizeof (memif_msg_t); - mh.msg_iov = iov; - mh.msg_iovlen = 1; - - /* create interrupt socket */ - if (socketpair (AF_UNIX, SOCK_STREAM, 0, fd_array) < 0) - { - DEBUG_UNIX_LOG ("Failed to create a pair of connected sockets"); - goto error; - } - - mif->interrupt_line.fd = fd_array[0]; - template.read_function = memif_int_fd_read_ready; - template.file_descriptor = mif->interrupt_line.fd; - template.private_data = mif->if_index; - mif->interrupt_line.index = unix_file_add (&unix_main, &template); - - memset (&ctl, 0, sizeof (ctl)); - mh.msg_control = ctl; - mh.msg_controllen = sizeof (ctl); - cmsg = CMSG_FIRSTHDR (&mh); - cmsg->cmsg_len = CMSG_LEN (sizeof (fd_array)); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - fd_array[0] = mfd; - memcpy (CMSG_DATA (cmsg), fd_array, sizeof (fd_array)); - - mif->flags |= MEMIF_IF_FLAG_CONNECTING; - rv = sendmsg (mif->connection.fd, &mh, 0); - if (rv < 0) - { - DEBUG_UNIX_LOG ("Failed to send memif connection request"); - goto error; - } + ASSERT (mif->tx_queues == 0); + vec_validate_aligned (mif->tx_queues, mif->run.num_s2m_rings - 1, + CLIB_CACHE_LINE_BYTES); + vec_foreach_index (i, mif->tx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); + if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) + return clib_error_return_unix (0, "eventfd[tx queue %u]", i); + mq->int_unix_file_index = ~0; + mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i); + mq->log2_ring_size = mif->cfg.log2_ring_size; + mq->region = 0; + mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm; + mq->last_head = 0; + } - /* No need to keep the descriptor open, - * mmap creates an extra reference to the underlying file */ - close (mfd); - mfd = -1; - /* This FD is given to peer, so we can close it */ - close (fd_array[1]); - fd_array[1] = -1; - return; + ASSERT (mif->rx_queues == 0); + vec_validate_aligned (mif->rx_queues, mif->run.num_m2s_rings - 1, + CLIB_CACHE_LINE_BYTES); + vec_foreach_index (i, mif->rx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); + if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) + return clib_error_return_unix (0, "eventfd[rx queue %u]", i); + mq->int_unix_file_index = ~0; + mq->ring = memif_get_ring (mif, MEMIF_RING_M2S, i); + mq->log2_ring_size = mif->cfg.log2_ring_size; + mq->region = 0; + mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm; + mq->last_head = 0; + } -error: - if (mfd > -1) - close (mfd); - if (fd_array[1] > -1) - close (fd_array[1]); - memif_disconnect (vm, mif); + return 0; } static uword @@ -665,24 +352,22 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) struct sockaddr_un sun; int sockfd; uword *event_data = 0, event_type; - unix_file_t template = { 0 }; u8 enabled = 0; f64 start_time, last_run_duration = 0, now; - sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0); if (sockfd < 0) { - DEBUG_UNIX_LOG ("socket AF_UNIX"); + DBG_UNIX_LOG ("socket AF_UNIX"); return 0; } sun.sun_family = AF_UNIX; - template.read_function = memif_conn_fd_read_ready; while (1) { if (enabled) - vlib_process_wait_for_event_or_clock (vm, - (f64) 3 - last_run_duration); + vlib_process_wait_for_event_or_clock (vm, (f64) 3 - + last_run_duration); else vlib_process_wait_for_event (vm); @@ -707,6 +392,7 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* *INDENT-OFF* */ pool_foreach (mif, mm->interfaces, ({ + memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index); /* Allow no more than 10us without a pause */ now = vlib_time_now (vm); if (now > start_time + 10e-6) @@ -726,24 +412,31 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) { - strncpy (sun.sun_path, (char *) mif->socket_filename, + strncpy (sun.sun_path, (char *) msf->filename, sizeof (sun.sun_path) - 1); if (connect (sockfd, (struct sockaddr *) &sun, sizeof (struct sockaddr_un)) == 0) { - mif->connection.fd = sockfd; - template.file_descriptor = sockfd; - template.private_data = mif->if_index << 1; - mif->connection.index = unix_file_add (&unix_main, &template); - memif_connect_master (vm, mif); + unix_file_t t = { 0 }; + + mif->conn_fd = sockfd; + t.read_function = memif_slave_conn_fd_read_ready; + t.write_function = memif_slave_conn_fd_write_ready; + t.error_function = memif_slave_conn_fd_error; + t.file_descriptor = mif->conn_fd; + t.private_data = mif->dev_instance; + memif_file_add (&mif->conn_unix_file_index, &t); + hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); + + mif->flags |= MEMIF_IF_FLAG_CONNECTING; /* grab another fd */ - sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0); if (sockfd < 0) { - DEBUG_UNIX_LOG ("socket AF_UNIX"); + DBG_UNIX_LOG ("socket AF_UNIX"); return 0; } } @@ -763,75 +456,62 @@ VLIB_REGISTER_NODE (memif_process_node,static) = { }; /* *INDENT-ON* */ -static void -memif_close_if (memif_main_t * mm, memif_if_t * mif) +int +memif_delete_if (vlib_main_t * vm, memif_if_t * mif) { - vlib_main_t *vm = vlib_get_main (); - memif_listener_t *listener = 0; - memif_pending_conn_t *pending_conn = 0; + vnet_main_t *vnm = vnet_get_main (); + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + vec_elt_at_index (mm->socket_files, mif->socket_file_index); + clib_error_t *err; + + mif->flags |= MEMIF_IF_FLAG_DELETING; + vec_free (mif->local_disc_string); + vec_free (mif->remote_disc_string); + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); + vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0); + + err = clib_error_return (0, "interface deleted"); + memif_disconnect (mif, err); + clib_error_free (err); - memif_disconnect (vm, mif); + /* remove the interface */ + ethernet_delete_interface (vnm, mif->hw_if_index); + mif->hw_if_index = ~0; + + /* free interface data structures */ + clib_spinlock_free (&mif->lockp); + mhash_unset (&msf->dev_instance_by_id, &mif->id, 0); - if (mif->listener_index != (uword) ~ 0) + /* remove socket file */ + if (--(msf->ref_cnt) == 0) { - listener = pool_elt_at_index (mm->listeners, mif->listener_index); - if (--listener->usage_counter == 0) + if (msf->is_listener) { - /* not used anymore -> remove the socket and pending connections */ - - /* *INDENT-OFF* */ - pool_foreach (pending_conn, mm->pending_conns, - ({ - if (pending_conn->listener_index == mif->listener_index) - { - memif_remove_pending_conn (pending_conn); - } - })); - /* *INDENT-ON* */ - - unix_file_del (&unix_main, - unix_main.file_pool + listener->socket.index); - pool_put (mm->listeners, listener); - unlink ((char *) mif->socket_filename); + uword *x; + memif_file_del_by_index (msf->unix_file_index); + vec_foreach (x, msf->pending_file_indices) + { + memif_file_del_by_index (*x); + } + vec_free (msf->pending_file_indices); } + mhash_free (&msf->dev_instance_by_id); + hash_free (msf->dev_instance_by_fd); + mhash_unset (&mm->socket_file_index_by_filename, msf->filename, 0); + vec_free (msf->filename); + pool_put (mm->socket_files, msf); } - clib_spinlock_free (&mif->lockp); - - mhash_unset (&mm->if_index_by_key, &mif->key, &mif->if_index); - vec_free (mif->socket_filename); - vec_free (mif->ring_data); - memset (mif, 0, sizeof (*mif)); pool_put (mm->interfaces, mif); -} -int -memif_worker_thread_enable () -{ - /* if worker threads are enabled, switch to polling mode */ - /* *INDENT-OFF* */ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - memif_input_node.index, - VLIB_NODE_STATE_POLLING); - })); - /* *INDENT-ON* */ - return 0; -} + if (pool_elts (mm->interfaces) == 0) + vlib_process_signal_event (vm, memif_process_node.index, + MEMIF_PROCESS_EVENT_STOP, 0); -int -memif_worker_thread_disable () -{ - /* *INDENT-OFF* */ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - memif_input_node.index, - VLIB_NODE_STATE_INTERRUPT); - })); - /* *INDENT-ON* */ return 0; } @@ -846,19 +526,92 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) clib_error_t *error = 0; int ret = 0; uword *p; + vnet_hw_interface_t *hw; + memif_socket_file_t *msf = 0; + u8 *socket_filename; + int rv = 0; + + if (args->socket_filename == 0 || args->socket_filename[0] != '/') + { + rv = mkdir (MEMIF_DEFAULT_SOCKET_DIR, 0755); + if (rv && errno != EEXIST) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + if (args->socket_filename == 0) + socket_filename = format (0, "%s/%s%c", MEMIF_DEFAULT_SOCKET_DIR, + MEMIF_DEFAULT_SOCKET_FILENAME, 0); + else + socket_filename = format (0, "%s/%s%c", MEMIF_DEFAULT_SOCKET_DIR, + args->socket_filename, 0); + + } + else + socket_filename = vec_dup (args->socket_filename); + + p = mhash_get (&mm->socket_file_index_by_filename, socket_filename); - p = mhash_get (&mm->if_index_by_key, &args->key); if (p) - return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + { + msf = vec_elt_at_index (mm->socket_files, p[0]); + + /* existing socket file can be either master or slave but cannot be both */ + if (!msf->is_listener != !args->is_master) + { + rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + goto done; + } + + p = mhash_get (&msf->dev_instance_by_id, &args->id); + if (p) + { + rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + goto done; + } + } + + /* Create new socket file */ + if (msf == 0) + { + struct stat file_stat; + /* If we are creating listener make sure file doesn't exist or if it + * exists thn delete it if it is old socket file */ + if (args->is_master && + (stat ((char *) socket_filename, &file_stat) == 0)) + { + if (S_ISSOCK (file_stat.st_mode)) + { + unlink ((char *) socket_filename); + } + else + { + ret = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } + } + pool_get (mm->socket_files, msf); + memset (msf, 0, sizeof (memif_socket_file_t)); + mhash_init (&msf->dev_instance_by_id, sizeof (uword), + sizeof (memif_interface_id_t)); + msf->dev_instance_by_fd = hash_create (0, sizeof (uword)); + msf->filename = socket_filename; + msf->fd = -1; + msf->is_listener = (args->is_master != 0); + socket_filename = 0; + mhash_set (&mm->socket_file_index_by_filename, msf->filename, + msf - mm->socket_files, 0); + DBG ("creating socket file %s", msf->filename); + } pool_get (mm->interfaces, mif); memset (mif, 0, sizeof (*mif)); - mif->key = args->key; - mif->if_index = mif - mm->interfaces; + mif->dev_instance = mif - mm->interfaces; + mif->socket_file_index = msf - mm->socket_files; + mif->id = args->id; mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0; - mif->listener_index = ~0; - mif->connection.index = mif->interrupt_line.index = ~0; - mif->connection.fd = mif->interrupt_line.fd = -1; + mif->conn_unix_file_index = ~0; + mif->conn_fd = -1; + if (args->secret) + mif->secret = vec_dup (args->secret); if (tm->n_vlib_mains > 1) clib_spinlock_init (&mif->lockp); @@ -876,137 +629,94 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) } error = ethernet_register_interface (vnm, memif_device_class.index, - mif->if_index, args->hw_addr, + mif->dev_instance, args->hw_addr, &mif->hw_if_index, memif_eth_flag_change); if (error) { clib_error_report (error); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; + ret = VNET_API_ERROR_SYSCALL_ERROR_2; goto error; } sw = vnet_get_hw_sw_interface (vnm, mif->hw_if_index); mif->sw_if_index = sw->sw_if_index; - mif->log2_ring_size = args->log2_ring_size; - mif->buffer_size = args->buffer_size; - - mif->num_s2m_rings = args->rx_queues; - mif->num_m2s_rings = args->tx_queues; - - mhash_set_mem (&mm->if_index_by_key, &args->key, &mif->if_index, 0); - - if (args->socket_filename != 0) - mif->socket_filename = args->socket_filename; - else - mif->socket_filename = vec_dup (mm->default_socket_filename); + mif->cfg.log2_ring_size = args->log2_ring_size; + mif->cfg.buffer_size = args->buffer_size; + mif->cfg.num_s2m_rings = + args->is_master ? args->rx_queues : args->tx_queues; + mif->cfg.num_m2s_rings = + args->is_master ? args->tx_queues : args->rx_queues; args->sw_if_index = mif->sw_if_index; - if (args->is_master) + /* If this is new one, start listening */ + if (msf->is_listener && msf->ref_cnt == 0) { struct sockaddr_un un = { 0 }; struct stat file_stat; int on = 1; - memif_listener_t *listener = 0; - - if (stat ((char *) mif->socket_filename, &file_stat) == 0) - { - if (!S_ISSOCK (file_stat.st_mode)) - { - errno = ENOTSOCK; - ret = VNET_API_ERROR_SYSCALL_ERROR_2; - goto error; - } - /* *INDENT-OFF* */ - pool_foreach (listener, mm->listeners, - ({ - if (listener->sock_dev == file_stat.st_dev && - listener->sock_ino == file_stat.st_ino) - { - /* attach memif to the existing listener */ - mif->listener_index = listener->index; - ++listener->usage_counter; - goto signal; - } - })); - /* *INDENT-ON* */ - unlink ((char *) mif->socket_filename); - } - pool_get (mm->listeners, listener); - memset (listener, 0, sizeof (*listener)); - listener->socket.fd = -1; - listener->socket.index = ~0; - listener->index = listener - mm->listeners; - listener->usage_counter = 1; - - if ((listener->socket.fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) + if ((msf->fd = socket (AF_UNIX, SOCK_SEQPACKET, 0)) < 0) { - ret = VNET_API_ERROR_SYSCALL_ERROR_3; + ret = VNET_API_ERROR_SYSCALL_ERROR_4; goto error; } un.sun_family = AF_UNIX; - strncpy ((char *) un.sun_path, (char *) mif->socket_filename, + strncpy ((char *) un.sun_path, (char *) msf->filename, sizeof (un.sun_path) - 1); - if (setsockopt (listener->socket.fd, SOL_SOCKET, SO_PASSCRED, - &on, sizeof (on)) < 0) - { - ret = VNET_API_ERROR_SYSCALL_ERROR_4; - goto error; - } - if (bind (listener->socket.fd, (struct sockaddr *) &un, - sizeof (un)) == -1) + if (setsockopt (msf->fd, SOL_SOCKET, SO_PASSCRED, &on, sizeof (on)) < 0) { ret = VNET_API_ERROR_SYSCALL_ERROR_5; goto error; } - if (listen (listener->socket.fd, 1) == -1) + if (bind (msf->fd, (struct sockaddr *) &un, sizeof (un)) == -1) { ret = VNET_API_ERROR_SYSCALL_ERROR_6; goto error; } - - if (stat ((char *) mif->socket_filename, &file_stat) == -1) + if (listen (msf->fd, 1) == -1) { ret = VNET_API_ERROR_SYSCALL_ERROR_7; goto error; } - listener->sock_dev = file_stat.st_dev; - listener->sock_ino = file_stat.st_ino; + if (stat ((char *) msf->filename, &file_stat) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_8; + goto error; + } + msf->unix_file_index = ~0; unix_file_t template = { 0 }; template.read_function = memif_conn_fd_accept_ready; - template.file_descriptor = listener->socket.fd; - template.private_data = listener->index; - listener->socket.index = unix_file_add (&unix_main, &template); - - mif->listener_index = listener->index; - } - else - { - mif->flags |= MEMIF_IF_FLAG_IS_SLAVE; + template.file_descriptor = msf->fd; + template.private_data = mif->socket_file_index; + memif_file_add (&msf->unix_file_index, &template); } -#if 0 - /* use configured or generate random MAC address */ - if (!args->hw_addr_set && - tm->n_vlib_mains > 1 && pool_elts (mm->interfaces) == 1) - memif_worker_thread_enable (); -#endif + msf->ref_cnt++; + + if (args->is_master == 0) + mif->flags |= MEMIF_IF_FLAG_IS_SLAVE; + + hw = vnet_get_hw_interface (vnm, mif->hw_if_index); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + vnet_hw_interface_set_input_node (vnm, mif->hw_if_index, + memif_input_node.index); + + mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0); -signal: if (pool_elts (mm->interfaces) == 1) { vlib_process_signal_event (vm, memif_process_node.index, MEMIF_PROCESS_EVENT_START, 0); } - return 0; + goto done; error: if (mif->hw_if_index != ~0) @@ -1014,89 +724,31 @@ error: ethernet_delete_interface (vnm, mif->hw_if_index); mif->hw_if_index = ~0; } - memif_close_if (mm, mif); + memif_delete_if (vm, mif); return ret; -} - -int -memif_delete_if (vlib_main_t * vm, u64 key) -{ - vnet_main_t *vnm = vnet_get_main (); - memif_main_t *mm = &memif_main; - memif_if_t *mif; - uword *p; - u32 hw_if_index; - - p = mhash_get (&mm->if_index_by_key, &key); - if (p == NULL) - { - DEBUG_LOG ("Memory interface with key 0x%" PRIx64 " does not exist", - key); - return VNET_API_ERROR_SYSCALL_ERROR_1; - } - mif = pool_elt_at_index (mm->interfaces, p[0]); - mif->flags |= MEMIF_IF_FLAG_DELETING; - - /* bring down the interface */ - vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0); - - hw_if_index = mif->hw_if_index; - memif_close_if (mm, mif); - - /* remove the interface */ - ethernet_delete_interface (vnm, hw_if_index); - if (pool_elts (mm->interfaces) == 0) - { - vlib_process_signal_event (vm, memif_process_node.index, - MEMIF_PROCESS_EVENT_STOP, 0); - } - -#if 0 - if (tm->n_vlib_mains > 1 && pool_elts (mm->interfaces) == 0) - memif_worker_thread_disable (); -#endif - return 0; +done: + vec_free (socket_filename); + return rv; } + static clib_error_t * memif_init (vlib_main_t * vm) { memif_main_t *mm = &memif_main; vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - uword *p; memset (mm, 0, sizeof (memif_main_t)); - mm->input_cpu_first_index = 0; - mm->input_cpu_count = 1; - /* initialize binary API */ memif_plugin_api_hookup (vm); - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - mm->input_cpu_first_index = tr->first_index; - mm->input_cpu_count = tr->count; - } - - mhash_init (&mm->if_index_by_key, sizeof (uword), sizeof (u64)); + mhash_init_c_string (&mm->socket_file_index_by_filename, sizeof (uword)); vec_validate_aligned (mm->rx_buffers, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); - /* set default socket filename */ - vec_validate (mm->default_socket_filename, - strlen (MEMIF_DEFAULT_SOCKET_FILENAME)); - strncpy ((char *) mm->default_socket_filename, - MEMIF_DEFAULT_SOCKET_FILENAME, - vec_len (mm->default_socket_filename) - 1); - return 0; } diff --git a/src/plugins/memif/memif.h b/src/plugins/memif/memif.h index 56028a25..30a27dc2 100644 --- a/src/plugins/memif/memif.h +++ b/src/plugins/memif/memif.h @@ -1,6 +1,6 @@ /* *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2017 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -15,278 +15,158 @@ *------------------------------------------------------------------ */ -#include +#ifndef _MEMIF_H_ +#define _MEMIF_H_ -typedef struct -{ - u16 version; -#define MEMIF_VERSION_MAJOR 0 -#define MEMIF_VERSION_MINOR 1 -#define MEMIF_VERSION ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR) - u8 type; -#define MEMIF_MSG_TYPE_CONNECT_REQ 0 -#define MEMIF_MSG_TYPE_CONNECT_RESP 1 -#define MEMIF_MSG_TYPE_DISCONNECT 2 - - /* Connection-request parameters: */ - u64 key; - u8 log2_ring_size; -#define MEMIF_DEFAULT_RING_SIZE 1024 - u16 num_s2m_rings; -#define MEMIF_DEFAULT_RX_QUEUES 1 - u16 num_m2s_rings; -#define MEMIF_DEFAULT_TX_QUEUES 1 - u16 buffer_size; -#define MEMIF_DEFAULT_BUFFER_SIZE 2048 - u32 shared_mem_size; - - /* Connection-response parameters: */ - u8 retval; -} memif_msg_t; - -typedef struct __attribute__ ((packed)) -{ - u16 flags; -#define MEMIF_DESC_FLAG_NEXT (1 << 0) - u16 region; - u32 buffer_length; - u32 length;; - u8 reserved[4]; - u64 offset; - u64 metadata; -} memif_desc_t; +#ifndef MEMIF_CACHELINE_SIZE +#define MEMIF_CACHELINE_SIZE 64 +#endif -STATIC_ASSERT_SIZEOF (memif_desc_t, 32); +/* + * Type definitions + */ -typedef struct +typedef enum memif_msg_type { - u16 head __attribute__ ((aligned (128))); - u16 tail __attribute__ ((aligned (128))); - memif_desc_t desc[0] __attribute__ ((aligned (128))); -} memif_ring_t; + MEMIF_MSG_TYPE_NONE = 0, + MEMIF_MSG_TYPE_ACK = 1, + MEMIF_MSG_TYPE_HELLO = 2, + MEMIF_MSG_TYPE_INIT = 3, + MEMIF_MSG_TYPE_ADD_REGION = 4, + MEMIF_MSG_TYPE_ADD_RING = 5, + MEMIF_MSG_TYPE_CONNECT = 6, + MEMIF_MSG_TYPE_CONNECTED = 7, + MEMIF_MSG_TYPE_DISCONNECT = 8, +} memif_msg_type_t; -typedef struct +typedef enum { - u32 cookie __attribute__ ((aligned (128))); -} memif_shm_t; - + MEMIF_RING_S2M = 0, + MEMIF_RING_M2S = 1 +} memif_ring_type_t; -typedef struct +typedef enum { - u16 last_head; - u16 last_tail; -} memif_ring_data_t; + MEMIF_INTERFACE_MODE_ETHERNET = 0, + MEMIF_INTERFACE_MODE_IP = 1, + MEMIF_INTERFACE_MODE_PUNT_INJECT = 2, +} memif_interface_mode_t; -typedef struct -{ - int fd; - u32 index; -} memif_file_t; +typedef uint16_t memif_region_index_t; +typedef uint16_t memif_ring_index_t; +typedef uint32_t memif_interface_id_t; +typedef uint16_t memif_version_t; -typedef struct -{ - uword index; - dev_t sock_dev; - ino_t sock_ino; - memif_file_t socket; - u16 usage_counter; -} memif_listener_t; +/* + * Socket messages + */ -typedef struct +typedef struct __attribute__ ((packed)) { - uword index; - memif_file_t connection; - uword listener_index; -} memif_pending_conn_t; + uint8_t name[32]; + memif_version_t min_version; + memif_version_t max_version; + memif_region_index_t max_region; + memif_ring_index_t max_m2s_ring; + memif_ring_index_t max_s2m_ring; + uint8_t max_log2_ring_size; +} memif_msg_hello_t; -typedef struct +typedef struct __attribute__ ((packed)) { - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - clib_spinlock_t lockp; - u32 flags; -#define MEMIF_IF_FLAG_ADMIN_UP (1 << 0) -#define MEMIF_IF_FLAG_IS_SLAVE (1 << 1) -#define MEMIF_IF_FLAG_CONNECTING (1 << 2) -#define MEMIF_IF_FLAG_CONNECTED (1 << 3) -#define MEMIF_IF_FLAG_DELETING (1 << 4) - - u64 key; - uword if_index; - u32 hw_if_index; - u32 sw_if_index; - - u32 per_interface_next_index; - - uword listener_index; - memif_file_t connection; - memif_file_t interrupt_line; - u8 *socket_filename; + memif_version_t version; + memif_interface_id_t id; + memif_interface_mode_t mode:8; + uint8_t secret[24]; + uint8_t name[32]; +} memif_msg_init_t; - void **regions; - - u8 log2_ring_size; - u8 num_s2m_rings; - u8 num_m2s_rings; - u16 buffer_size; - u32 shared_mem_size; - - memif_ring_data_t *ring_data; - - /* remote info */ - pid_t remote_pid; - uid_t remote_uid; -} memif_if_t; - -typedef struct +typedef struct __attribute__ ((packed)) { - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - - /** API message ID base */ - u16 msg_id_base; - - /* pool of all memory interfaces */ - memif_if_t *interfaces; - - /* pool of all listeners */ - memif_listener_t *listeners; - - /* pool of pending connections */ - memif_pending_conn_t *pending_conns; - - /* bitmap of pending rx interfaces */ - uword *pending_input_bitmap; - - /* rx buffer cache */ - u32 **rx_buffers; - - /* hash of all registered keys */ - mhash_t if_index_by_key; - - /* first cpu index */ - u32 input_cpu_first_index; - - /* total cpu count */ - u32 input_cpu_count; - - /* configuration */ - u8 *default_socket_filename; -#define MEMIF_DEFAULT_SOCKET_FILENAME "/var/vpp/memif.sock" -} memif_main_t; + memif_region_index_t index; + uint32_t size; +} memif_msg_add_region_t; -extern memif_main_t memif_main; -extern vnet_device_class_t memif_device_class; -extern vlib_node_registration_t memif_input_node; - -enum +typedef struct __attribute__ ((packed)) { - MEMIF_PROCESS_EVENT_START = 1, - MEMIF_PROCESS_EVENT_STOP = 2, -} memif_process_event_t; + uint16_t flags; +#define MEMIF_MSG_ADD_RING_FLAG_S2M (1 << 0) + memif_ring_index_t index; + memif_region_index_t region; + uint32_t offset; + uint8_t log2_ring_size; +} memif_msg_add_ring_t; -typedef struct +typedef struct __attribute__ ((packed)) { - u64 key; - u8 *socket_filename; - u8 is_master; - u8 log2_ring_size; - u16 buffer_size; - u8 hw_addr_set; - u8 hw_addr[6]; - u8 rx_queues; - u8 tx_queues; - - /* return */ - u32 sw_if_index; -} memif_create_if_args_t; + uint8_t if_name[32]; +} memif_msg_connect_t; -int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); -int memif_delete_if (vlib_main_t * vm, u64 key); -void memif_disconnect (vlib_main_t * vm, memif_if_t * mif); -clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); - -#ifndef __NR_memfd_create -#if defined __x86_64__ -#define __NR_memfd_create 319 -#elif defined __arm__ -#define __NR_memfd_create 385 -#elif defined __aarch64__ -#define __NR_memfd_create 279 -#else -#error "__NR_memfd_create unknown for this architecture" -#endif -#endif - -static_always_inline u8 -memif_get_rx_queues (memif_if_t * mif) +typedef struct __attribute__ ((packed)) { - u8 rx_queues; + uint8_t if_name[32]; +} memif_msg_connected_t; - if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - rx_queues = mif->num_m2s_rings; - else - rx_queues = mif->num_s2m_rings; - - return (rx_queues); -} - -static_always_inline u8 -memif_get_tx_queues (memif_if_t * mif) +typedef struct __attribute__ ((packed)) { - u8 tx_queues; - - if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - tx_queues = mif->num_s2m_rings; - else - tx_queues = mif->num_m2s_rings; + uint32_t code; + uint8_t string[96]; +} memif_msg_disconnect_t; + +typedef struct __attribute__ ((packed, aligned (128))) +{ + memif_msg_type_t type:16; + union + { + memif_msg_hello_t hello; + memif_msg_init_t init; + memif_msg_add_region_t add_region; + memif_msg_add_ring_t add_ring; + memif_msg_connect_t connect; + memif_msg_connected_t connected; + memif_msg_disconnect_t disconnect; + }; +} memif_msg_t; - return (tx_queues); -} +_Static_assert (sizeof (memif_msg_t) == 128, + "Size of memif_msg_t must be 128"); -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} +/* + * Ring and Descriptor Layout + */ -typedef enum +typedef struct __attribute__ ((packed)) { - MEMIF_RING_S2M = 0, - MEMIF_RING_M2S = 1 -} memif_ring_type_t; + uint16_t flags; +#define MEMIF_DESC_FLAG_NEXT (1 << 0) + memif_region_index_t region; + uint32_t buffer_length; + uint32_t length; + uint8_t reserved[4]; + uint64_t offset; + uint64_t metadata; +} memif_desc_t; -static_always_inline memif_ring_t * -memif_get_ring (memif_if_t * mif, memif_ring_type_t type, u16 ring_num) -{ - if (vec_len (mif->regions) == 0) - return NULL; - void *p = mif->regions[0]; - int ring_size = - sizeof (memif_ring_t) + - sizeof (memif_desc_t) * (1 << mif->log2_ring_size); - p += sizeof (memif_shm_t); - p += (ring_num + type * mif->num_s2m_rings) * ring_size; +_Static_assert (sizeof (memif_desc_t) == 32, + "Size of memif_dsct_t must be 32"); - return (memif_ring_t *) p; -} +#define MEMIF_CACHELINE_ALIGN_MARK(mark) \ + uint8_t mark[0] __attribute__((aligned(MEMIF_CACHELINE_SIZE))) -static_always_inline void * -memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) +typedef struct { - u16 region = ring->desc[slot].region; - return mif->regions[region] + ring->desc[slot].offset; -} - -#ifndef F_LINUX_SPECIFIC_BASE -#define F_LINUX_SPECIFIC_BASE 1024 -#endif -#define MFD_ALLOW_SEALING 0x0002U -#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + MEMIF_CACHELINE_ALIGN_MARK (cacheline0); + uint32_t cookie; + uint16_t flags; +#define MEMIF_RING_FLAG_MASK_INT 1 + volatile uint16_t head; + MEMIF_CACHELINE_ALIGN_MARK (cacheline1); + volatile uint16_t tail; + MEMIF_CACHELINE_ALIGN_MARK (cacheline2); + memif_desc_t desc[0]; +} memif_ring_t; -#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -#define F_SEAL_GROW 0x0004 /* prevent file from growing */ -#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif /* _MEMIF_H_ */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c index 1470f944..533e8482 100644 --- a/src/plugins/memif/memif_api.c +++ b/src/plugins/memif/memif_api.c @@ -19,7 +19,9 @@ #include #include +#include #include +#include #include #include @@ -107,8 +109,8 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) static const u8 empty_hw_addr[6]; int rv = 0; - /* key */ - args.key = clib_net_to_host_u64 (mp->key); + /* id */ + args.id = clib_net_to_host_u32 (mp->id); /* socket filename */ mp->socket_filename[ARRAY_LEN (mp->socket_filename) - 1] = 0; @@ -120,6 +122,15 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) vec_len (args.socket_filename)); } + /* secret */ + mp->secret[ARRAY_LEN (mp->secret) - 1] = 0; + if (strlen ((char *) mp->secret) > 0) + { + vec_validate (args.secret, strlen ((char *) mp->secret)); + strncpy ((char *) args.secret, (char *) mp->secret, + vec_len (args.secret)); + } + /* role */ args.is_master = (mp->role == 0); if (args.is_master == 0) @@ -156,6 +167,9 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) rv = memif_create_if (vm, &args); + vec_free (args.socket_filename); + vec_free (args.secret); + reply: /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_MEMIF_CREATE_REPLY, @@ -173,26 +187,19 @@ void vl_api_memif_delete_t_handler (vl_api_memif_delete_t * mp) { memif_main_t *mm = &memif_main; - memif_if_t *mif; vlib_main_t *vm = vlib_get_main (); + vnet_main_t *vnm = vnet_get_main (); vl_api_memif_delete_reply_t *rmp; - u32 sw_if_index = ntohl (mp->sw_if_index); + vnet_hw_interface_t *hi = + vnet_get_sup_hw_interface (vnm, ntohl (mp->sw_if_index)); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hi->dev_instance); int rv = 0; - /* *INDENT-OFF* */ - pool_foreach (mif, mm->interfaces, - ({ - if (sw_if_index == mif->sw_if_index) - { - rv = memif_delete_if (vm, mif->key); - goto reply; - } - })); - /* *INDENT-ON* */ - - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + if (hi == NULL || memif_device_class.index != hi->dev_class_index) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + rv = memif_delete_if (vm, mif); -reply: REPLY_MACRO (VL_API_MEMIF_DELETE_REPLY); } @@ -205,6 +212,8 @@ send_memif_details (unix_shared_memory_queue_t * q, vl_api_memif_details_t *mp; vnet_main_t *vnm = vnet_get_main (); memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files, + mif->socket_file_index); vnet_hw_interface_t *hwif; hwif = vnet_get_sup_hw_interface (vnm, swif->sw_if_index); @@ -220,14 +229,13 @@ send_memif_details (unix_shared_memory_queue_t * q, (char *) interface_name, ARRAY_LEN (mp->if_name) - 1); memcpy (mp->hw_addr, hwif->hw_address, ARRAY_LEN (mp->hw_addr)); - mp->key = clib_host_to_net_u64 (mif->key); + mp->id = clib_host_to_net_u32 (mif->id); mp->role = (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) ? 1 : 0; strncpy ((char *) mp->socket_filename, - (char *) mif->socket_filename, - ARRAY_LEN (mp->socket_filename) - 1); + (char *) msf->filename, ARRAY_LEN (mp->socket_filename) - 1); - mp->ring_size = htonl (1 << mif->log2_ring_size); - mp->buffer_size = htons (mif->buffer_size); + mp->ring_size = htonl (1 << mif->run.log2_ring_size); + mp->buffer_size = htons (mif->run.buffer_size); mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0; mp->link_up_down = (hwif->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? 1 : 0; diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c index c1ba9273..cbef4dfa 100644 --- a/src/plugins/memif/memif_test.c +++ b/src/plugins/memif/memif_test.c @@ -24,6 +24,7 @@ #include #include #include +#include #define __plugin_msg_base memif_test_main.msg_id_base #include @@ -118,8 +119,9 @@ api_memif_create (vat_main_t * vam) { unformat_input_t *i = vam->input; vl_api_memif_create_t *mp; - u64 key = 0; + u32 id = 0; u8 *socket_filename = 0; + u8 *secret = 0; u8 role = 1; u32 ring_size = 0; u32 buffer_size = 0; @@ -131,10 +133,12 @@ api_memif_create (vat_main_t * vam) while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { - if (unformat (i, "key 0x%" PRIx64, &key)) + if (unformat (i, "id %u", &id)) ; else if (unformat (i, "socket %s", &socket_filename)) ; + else if (unformat (i, "secret %s", &secret)) + ; else if (unformat (i, "ring_size %u", &ring_size)) ; else if (unformat (i, "buffer_size %u", &buffer_size)) @@ -173,7 +177,7 @@ api_memif_create (vat_main_t * vam) M (MEMIF_CREATE, mp); - mp->key = clib_host_to_net_u64 (key); + mp->id = clib_host_to_net_u32 (id); mp->role = role; mp->ring_size = clib_host_to_net_u32 (ring_size); mp->buffer_size = clib_host_to_net_u16 (buffer_size & 0xffff); @@ -182,6 +186,11 @@ api_memif_create (vat_main_t * vam) strncpy ((char *) mp->socket_filename, (char *) socket_filename, 127); vec_free (socket_filename); } + if (socket_filename != 0) + { + strncpy ((char *) mp->secret, (char *) secret, 16); + vec_free (socket_filename); + } memcpy (mp->hw_addr, hw_addr, 6); mp->rx_queues = rx_queues; mp->tx_queues = tx_queues; @@ -282,11 +291,11 @@ static void vl_api_memif_details_t_handler (vl_api_memif_details_t * mp) vat_main_t *vam = memif_test_main.vat_main; fformat (vam->ofp, "%s: sw_if_index %u mac %U\n" - " key 0x%" PRIx64 " socket %s role %s\n" + " id %u socket %s role %s\n" " ring_size %u buffer_size %u\n" " state %s link %s\n", mp->if_name, ntohl (mp->sw_if_index), format_ethernet_address, - mp->hw_addr, clib_net_to_host_u64 (mp->key), mp->socket_filename, + mp->hw_addr, clib_net_to_host_u32 (mp->id), mp->socket_filename, mp->role ? "slave" : "master", ntohl (mp->ring_size), ntohs (mp->buffer_size), mp->admin_up_down ? "up" : "down", @@ -298,7 +307,7 @@ static void vl_api_memif_details_t_handler (vl_api_memif_details_t * mp) * and that the data plane plugin processes */ #define foreach_vpe_api_msg \ -_(memif_create, "[key ] [socket ] [ring_size ] " \ +_(memif_create, "[id ] [socket ] [ring_size ] " \ "[buffer_size ] [hw_addr ] " \ "") \ _(memif_delete, "") \ diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index fd7baa30..e2c7631c 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -28,6 +28,7 @@ #include #include +#include #define foreach_memif_input_error @@ -78,11 +79,11 @@ memif_prefetch (vlib_main_t * vm, u32 bi) static_always_inline uword memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, memif_if_t * mif, - memif_ring_type_t type, u16 rid) + memif_ring_type_t type, u16 qid) { vnet_main_t *vnm = vnet_get_main (); - memif_ring_t *ring = memif_get_ring (mif, type, rid); - memif_ring_data_t *rd; + memif_ring_t *ring; + memif_queue_t *mq; u16 head; u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; uword n_trace = vlib_get_trace_count (vm, node); @@ -94,12 +95,14 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 thread_index = vlib_get_thread_index (); u32 bi0, bi1; vlib_buffer_t *b0, *b1; - u16 ring_size = 1 << mif->log2_ring_size; - u16 mask = ring_size - 1; - u16 num_slots; + u16 ring_size, mask, num_slots; void *mb0, *mb1; - rd = vec_elt_at_index (mif->ring_data, rid + type * mif->num_s2m_rings); + mq = vec_elt_at_index (mif->rx_queues, qid); + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + if (mif->per_interface_next_index != ~0) next_index = mif->per_interface_next_index; @@ -115,13 +118,13 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } head = ring->head; - if (head == rd->last_head) + if (head == mq->last_head) return 0; - if (head > rd->last_head) - num_slots = head - rd->last_head; + if (head > mq->last_head) + num_slots = head - mq->last_head; else - num_slots = ring_size - rd->last_head + head; + num_slots = ring_size - mq->last_head + head; while (num_slots) { @@ -132,28 +135,28 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (num_slots > 5 && n_left_to_next > 2) { - if (PREDICT_TRUE (rd->last_head + 5 < ring_size)) + if (PREDICT_TRUE (mq->last_head + 5 < ring_size)) { - CLIB_PREFETCH (memif_get_buffer (mif, ring, rd->last_head + 2), + CLIB_PREFETCH (memif_get_buffer (mif, ring, mq->last_head + 2), CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (memif_get_buffer (mif, ring, rd->last_head + 3), + CLIB_PREFETCH (memif_get_buffer (mif, ring, mq->last_head + 3), CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&ring->desc[rd->last_head + 4], + CLIB_PREFETCH (&ring->desc[mq->last_head + 4], CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&ring->desc[rd->last_head + 5], + CLIB_PREFETCH (&ring->desc[mq->last_head + 5], CLIB_CACHE_LINE_BYTES, LOAD); } else { CLIB_PREFETCH (memif_get_buffer - (mif, ring, (rd->last_head + 2) % mask), + (mif, ring, (mq->last_head + 2) % mask), CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (memif_get_buffer - (mif, ring, (rd->last_head + 3) % mask), + (mif, ring, (mq->last_head + 3) % mask), CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&ring->desc[(rd->last_head + 4) % mask], + CLIB_PREFETCH (&ring->desc[(mq->last_head + 4) % mask], CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&ring->desc[(rd->last_head + 5) % mask], + CLIB_PREFETCH (&ring->desc[(mq->last_head + 5) % mask], CLIB_CACHE_LINE_BYTES, LOAD); } /* get empty buffer */ @@ -185,17 +188,17 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; /* copy buffer */ - mb0 = memif_get_buffer (mif, ring, rd->last_head); + mb0 = memif_get_buffer (mif, ring, mq->last_head); clib_memcpy (vlib_buffer_get_current (b0), mb0, CLIB_CACHE_LINE_BYTES); - b0->current_length = ring->desc[rd->last_head].length; - rd->last_head = (rd->last_head + 1) & mask; + b0->current_length = ring->desc[mq->last_head].length; + mq->last_head = (mq->last_head + 1) & mask; - mb1 = memif_get_buffer (mif, ring, rd->last_head); + mb1 = memif_get_buffer (mif, ring, mq->last_head); clib_memcpy (vlib_buffer_get_current (b1), mb1, CLIB_CACHE_LINE_BYTES); - b1->current_length = ring->desc[rd->last_head].length; - rd->last_head = (rd->last_head + 1) & mask; + b1->current_length = ring->desc[mq->last_head].length; + mq->last_head = (mq->last_head + 1) & mask; if (b0->current_length > CLIB_CACHE_LINE_BYTES) clib_memcpy (vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES, @@ -221,7 +224,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->next_index = next0; tr->hw_if_index = mif->hw_if_index; - tr->ring = rid; + tr->ring = qid; if (n_trace) { @@ -233,7 +236,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tr = vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->next_index = next1; tr->hw_if_index = mif->hw_if_index; - tr->ring = rid; + tr->ring = qid; } } @@ -266,12 +269,12 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* fill buffer metadata */ b0 = vlib_get_buffer (vm, bi0); - b0->current_length = ring->desc[rd->last_head].length; + b0->current_length = ring->desc[mq->last_head].length; vnet_buffer (b0)->sw_if_index[VLIB_RX] = mif->sw_if_index; vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; /* copy buffer */ - mb0 = memif_get_buffer (mif, ring, rd->last_head); + mb0 = memif_get_buffer (mif, ring, mq->last_head); clib_memcpy (vlib_buffer_get_current (b0), mb0, CLIB_CACHE_LINE_BYTES); if (b0->current_length > CLIB_CACHE_LINE_BYTES) @@ -291,7 +294,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->next_index = next0; tr->hw_if_index = mif->hw_if_index; - tr->ring = rid; + tr->ring = qid; } @@ -303,7 +306,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_left_to_next, bi0, next0); /* next packet */ - rd->last_head = (rd->last_head + 1) & mask; + mq->last_head = (mq->last_head + 1) & mask; num_slots--; n_rx_packets++; n_rx_bytes += b0->current_length; @@ -325,30 +328,28 @@ static uword memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - u32 n_rx_packets = 0; + u32 n_rx = 0; memif_main_t *nm = &memif_main; - memif_if_t *mif; vnet_device_input_runtime_t *rt = (void *) node->runtime_data; vnet_device_and_queue_t *dq; - memif_ring_type_t type; foreach_device_and_queue (dq, rt->devices_and_queues) { + memif_if_t *mif; mif = vec_elt_at_index (nm->interfaces, dq->dev_instance); if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) && (mif->flags & MEMIF_IF_FLAG_CONNECTED)) { if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - type = MEMIF_RING_M2S; + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_M2S, dq->queue_id); else - type = MEMIF_RING_S2M; - n_rx_packets += - memif_device_input_inline (vm, node, frame, mif, type, - dq->queue_id); + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_S2M, dq->queue_id); } } - return n_rx_packets; + return n_rx; } /* *INDENT-OFF* */ diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h new file mode 100644 index 00000000..104706fa --- /dev/null +++ b/src/plugins/memif/private.h @@ -0,0 +1,296 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include + +#define MEMIF_DEFAULT_SOCKET_DIR "/run/vpp" +#define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock" +#define MEMIF_DEFAULT_RING_SIZE 1024 +#define MEMIF_DEFAULT_RX_QUEUES 1 +#define MEMIF_DEFAULT_TX_QUEUES 1 +#define MEMIF_DEFAULT_BUFFER_SIZE 2048 + +#define MEMIF_VERSION_MAJOR 0 +#define MEMIF_VERSION_MINOR 1 +#define MEMIF_VERSION ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR) +#define MEMIF_COOKIE 0xdeadbeef +#define MEMIF_MAX_M2S_RING (vec_len (vlib_mains) - 1) +#define MEMIF_MAX_S2M_RING (vec_len (vlib_mains) - 1) +#define MEMIF_MAX_REGION 255 +#define MEMIF_MAX_LOG2_RING_SIZE 14 + +#define MEMIF_DEBUG 0 + +#if MEMIF_DEBUG == 1 +#define DBG(...) clib_warning(__VA_ARGS__) +#define DBG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__) +#else +#define DBG(...) +#define DBG_UNIX_LOG(...) +#endif + +#if MEMIF_DEBUG == 1 +#define memif_file_add(a, b) do { \ + ASSERT (*a == ~0); \ + *a = unix_file_add (&unix_main, b); \ + clib_warning ("unix_file_add fd %d private_data %u idx %u", \ + (b)->file_descriptor, (b)->private_data, *a); \ +} while (0) + +#define memif_file_del(a) do { \ + clib_warning ("unix_file_del idx %u",a - unix_main.file_pool); \ + unix_file_del (&unix_main, a); \ +} while (0) + +#define memif_file_del_by_index(a) do { \ + clib_warning ("unix_file_del idx %u", a); \ + unix_file_del_by_index (&unix_main, a); \ +} while (0) +#else +#define memif_file_add(a, b) do { \ + ASSERT (*a == ~0); \ + *a = unix_file_add (&unix_main, b); \ +} while (0) +#define memif_file_del(a) unix_file_del(&unix_main, a) +#define memif_file_del_by_index(a) unix_file_del_by_index(&unix_main, a) +#endif + +typedef struct +{ + u8 *filename; + int fd; + uword unix_file_index; + uword *pending_file_indices; + int ref_cnt; + int is_listener; + + /* hash of all registered id */ + mhash_t dev_instance_by_id; + + /* hash of all registered fds */ + uword *dev_instance_by_fd; +} memif_socket_file_t; + +typedef struct +{ + void *shm; + u32 region_size; + int fd; +} memif_region_t; + +typedef struct +{ + memif_msg_t msg; + int fd; +} memif_msg_fifo_elt_t; + +typedef struct +{ + /* ring data */ + memif_ring_t *ring; + u8 log2_ring_size; + u8 region; + u32 offset; + + u16 last_head; + u16 last_tail; + + /* interrupts */ + int int_fd; + uword int_unix_file_index; + u64 int_count; +} memif_queue_t; + +#define foreach_memif_if_flag \ + _(0, ADMIN_UP, "admin-up") \ + _(1, IS_SLAVE, "slave") \ + _(2, CONNECTING, "connecting") \ + _(3, CONNECTED, "connected") \ + _(4, DELETING, "deleting") + +typedef enum +{ +#define _(a, b, c) MEMIF_IF_FLAG_##b = (1 << a), + foreach_memif_if_flag +#undef _ +} memif_if_flag_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + clib_spinlock_t lockp; + u32 flags; + memif_interface_id_t id; + u32 hw_if_index; + u32 sw_if_index; + uword dev_instance; + memif_interface_mode_t mode:8; + + u32 per_interface_next_index; + + /* socket connection */ + uword socket_file_index; + int conn_fd; + uword conn_unix_file_index; + memif_msg_fifo_elt_t *msg_queue; + u8 *secret; + + memif_region_t *regions; + + memif_queue_t *rx_queues; + memif_queue_t *tx_queues; + + /* remote info */ + pid_t remote_pid; + uid_t remote_uid; + gid_t remote_gid; + u8 *remote_name; + u8 *remote_if_name; + + struct + { + u8 log2_ring_size; + u8 num_s2m_rings; + u8 num_m2s_rings; + u16 buffer_size; + } cfg; + + struct + { + u8 log2_ring_size; + u8 num_s2m_rings; + u8 num_m2s_rings; + u16 buffer_size; + } run; + + /* disconnect strings */ + u8 *local_disc_string; + u8 *remote_disc_string; +} memif_if_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /** API message ID base */ + u16 msg_id_base; + + /* pool of all memory interfaces */ + memif_if_t *interfaces; + + /* pool of all unix socket files */ + memif_socket_file_t *socket_files; + mhash_t socket_file_index_by_filename; + + /* rx buffer cache */ + u32 **rx_buffers; + +} memif_main_t; + +extern memif_main_t memif_main; +extern vnet_device_class_t memif_device_class; +extern vlib_node_registration_t memif_input_node; + +enum +{ + MEMIF_PROCESS_EVENT_START = 1, + MEMIF_PROCESS_EVENT_STOP = 2, +} memif_process_event_t; + +typedef struct +{ + memif_interface_id_t id; + u8 *socket_filename; + u8 *secret; + u8 is_master; + memif_interface_mode_t mode:8; + u8 log2_ring_size; + u16 buffer_size; + u8 hw_addr_set; + u8 hw_addr[6]; + u8 rx_queues; + u8 tx_queues; + + /* return */ + u32 sw_if_index; +} memif_create_if_args_t; + +int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); +int memif_delete_if (vlib_main_t * vm, memif_if_t * mif); +clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); + +#ifndef __NR_memfd_create +#if defined __x86_64__ +#define __NR_memfd_create 319 +#elif defined __arm__ +#define __NR_memfd_create 385 +#elif defined __aarch64__ +#define __NR_memfd_create 279 +#else +#error "__NR_memfd_create unknown for this architecture" +#endif +#endif + +static inline int +memfd_create (const char *name, unsigned int flags) +{ + return syscall (__NR_memfd_create, name, flags); +} + +static_always_inline void * +memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) +{ + u16 region = ring->desc[slot].region; + return mif->regions[region].shm + ring->desc[slot].offset; +} + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif +#define MFD_ALLOW_SEALING 0x0002U +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ + +/* memif.c */ +clib_error_t *memif_init_regions_and_queues (memif_if_t * mif); +clib_error_t *memif_connect (memif_if_t * mif); +void memif_disconnect (memif_if_t * mif, clib_error_t * err); + +/* socket.c */ +clib_error_t *memif_conn_fd_accept_ready (unix_file_t * uf); +clib_error_t *memif_master_conn_fd_read_ready (unix_file_t * uf); +clib_error_t *memif_slave_conn_fd_read_ready (unix_file_t * uf); +clib_error_t *memif_master_conn_fd_write_ready (unix_file_t * uf); +clib_error_t *memif_slave_conn_fd_write_ready (unix_file_t * uf); +clib_error_t *memif_master_conn_fd_error (unix_file_t * uf); +clib_error_t *memif_slave_conn_fd_error (unix_file_t * uf); +clib_error_t *memif_msg_send_disconnect (memif_if_t * mif, + clib_error_t * err); +u8 *format_memif_device_name (u8 * s, va_list * args); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c new file mode 100644 index 00000000..d1a96db3 --- /dev/null +++ b/src/plugins/memif/socket.c @@ -0,0 +1,736 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +static u8 * +memif_str2vec (uint8_t * str, int len) +{ + u8 *s = 0; + int i; + + if (str[0] == 0) + return s; + + for (i = 0; i < len; i++) + { + vec_add1 (s, str[i]); + if (str[i] == 0) + return s; + } + vec_add1 (s, 0); + + return s; +} + +static clib_error_t * +memif_msg_send (int fd, memif_msg_t * msg, int afd) +{ + struct msghdr mh = { 0 }; + struct iovec iov[1]; + char ctl[CMSG_SPACE (sizeof (int))]; + int rv; + + iov[0].iov_base = (void *) msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + if (afd > 0) + { + struct cmsghdr *cmsg; + memset (&ctl, 0, sizeof (ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + cmsg = CMSG_FIRSTHDR (&mh); + cmsg->cmsg_len = CMSG_LEN (sizeof (int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy (CMSG_DATA (cmsg), &afd, sizeof (int)); + } + rv = sendmsg (fd, &mh, 0); + if (rv < 0) + return clib_error_return_unix (0, "sendmsg"); + DBG ("Message type %u sent (fd %d)", msg->type, afd); + return 0; +} + +static void +memif_msg_enq_ack (memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + + e->msg.type = MEMIF_MSG_TYPE_ACK; + e->fd = -1; +} + +static clib_error_t * +memif_msg_enq_hello (int fd) +{ + u8 *s; + memif_msg_t msg = { 0 }; + memif_msg_hello_t *h = &msg.hello; + msg.type = MEMIF_MSG_TYPE_HELLO; + h->min_version = MEMIF_VERSION; + h->max_version = MEMIF_VERSION; + h->max_m2s_ring = MEMIF_MAX_M2S_RING; + h->max_s2m_ring = MEMIF_MAX_M2S_RING; + h->max_region = MEMIF_MAX_REGION; + h->max_log2_ring_size = MEMIF_MAX_LOG2_RING_SIZE; + s = format (0, "VPP %s%c", VPP_BUILD_VER, 0); + strncpy ((char *) h->name, (char *) s, sizeof (h->name)); + vec_free (s); + return memif_msg_send (fd, &msg, -1); +} + +static void +memif_msg_enq_init (memif_if_t * mif) +{ + u8 *s; + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_init_t *i = &e->msg.init; + + e->msg.type = MEMIF_MSG_TYPE_INIT; + e->fd = -1; + i->version = MEMIF_VERSION; + i->id = mif->id; + s = format (0, "VPP %s%c", VPP_BUILD_VER, 0); + strncpy ((char *) i->name, (char *) s, sizeof (i->name)); + if (mif->secret) + strncpy ((char *) i->secret, (char *) mif->secret, sizeof (i->secret)); + vec_free (s); +} + +static void +memif_msg_enq_add_region (memif_if_t * mif, u8 region) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_add_region_t *ar = &e->msg.add_region; + + e->msg.type = MEMIF_MSG_TYPE_ADD_REGION; + e->fd = mif->regions[region].fd; + ar->index = region; + ar->size = mif->regions[region].region_size; +} + +static void +memif_msg_enq_add_ring (memif_if_t * mif, u8 index, u8 direction) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_add_ring_t *ar = &e->msg.add_ring; + memif_queue_t *mq; + + ASSERT ((mif->flags & MEMIF_IF_FLAG_IS_SLAVE) != 0); + + e->msg.type = MEMIF_MSG_TYPE_ADD_RING; + + if (direction == MEMIF_RING_M2S) + mq = vec_elt_at_index (mif->rx_queues, index); + else + mq = vec_elt_at_index (mif->tx_queues, index); + + e->fd = mq->int_fd; + ar->index = index; + ar->region = mq->region; + ar->offset = mq->offset; + ar->log2_ring_size = mq->log2_ring_size; + ar->flags = (direction == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0; +} + +static void +memif_msg_enq_connect (memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_connect_t *c = &e->msg.connect; + u8 *s; + + e->msg.type = MEMIF_MSG_TYPE_CONNECT; + e->fd = -1; + s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0); + strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name)); + vec_free (s); +} + +static void +memif_msg_enq_connected (memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_connected_t *c = &e->msg.connected; + u8 *s; + + e->msg.type = MEMIF_MSG_TYPE_CONNECTED; + e->fd = -1; + s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0); + strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name)); + vec_free (s); +} + +clib_error_t * +memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err) +{ + memif_msg_t msg = { 0 }; + msg.type = MEMIF_MSG_TYPE_DISCONNECT; + memif_msg_disconnect_t *d = &msg.disconnect; + + d->code = err->code; + strncpy ((char *) d->string, (char *) err->what, sizeof (d->string)); + + return memif_msg_send (mif->conn_fd, &msg, -1); +} + +static clib_error_t * +memif_msg_receive_hello (memif_if_t * mif, memif_msg_t * msg) +{ + memif_msg_hello_t *h = &msg->hello; + + if (msg->hello.min_version > MEMIF_VERSION || + msg->hello.max_version < MEMIF_VERSION) + return clib_error_return (0, "incompatible protocol version"); + + mif->run.num_s2m_rings = clib_min (h->max_s2m_ring + 1, + mif->cfg.num_s2m_rings); + mif->run.num_m2s_rings = clib_min (h->max_m2s_ring + 1, + mif->cfg.num_m2s_rings); + mif->run.log2_ring_size = clib_min (h->max_log2_ring_size, + mif->cfg.log2_ring_size); + mif->run.buffer_size = mif->cfg.buffer_size; + + mif->remote_name = memif_str2vec (h->name, sizeof (h->name)); + + return 0; +} + +static clib_error_t * +memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, + unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + vec_elt_at_index (mm->socket_files, uf->private_data); + memif_msg_init_t *i = &msg->init; + memif_if_t *mif, tmp; + clib_error_t *err; + uword *p; + + if (i->version != MEMIF_VERSION) + { + memif_file_del_by_index (uf - unix_main.file_pool); + return clib_error_return (0, "unsupported version"); + } + + p = mhash_get (&msf->dev_instance_by_id, &i->id); + + if (!p) + { + err = clib_error_return (0, "unmatched interface id"); + goto error; + } + + mif = vec_elt_at_index (mm->interfaces, p[0]); + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + { + err = clib_error_return (0, "cannot connect to slave"); + goto error; + } + + if (mif->conn_fd != -1) + { + err = clib_error_return (0, "already connected"); + goto error; + } + + if (i->mode != mif->mode) + { + err = clib_error_return (0, "mode mismatch"); + goto error; + } + + mif->conn_fd = uf->file_descriptor; + mif->conn_unix_file_index = uf - unix_main.file_pool; + hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); + mif->remote_name = memif_str2vec (i->name, sizeof (i->name)); + *mifp = mif; + + if (mif->secret) + { + u8 *s; + int r; + s = memif_str2vec (i->secret, sizeof (i->secret)); + if (s == 0) + return clib_error_return (0, "secret required"); + + r = vec_cmp (s, mif->secret); + vec_free (s); + + if (r) + return clib_error_return (0, "incorrect secret"); + } + + return 0; + +error: + tmp.conn_fd = uf->file_descriptor; + memif_msg_send_disconnect (&tmp, err); + memif_file_del_by_index (uf - unix_main.file_pool); + return err; +} + +static clib_error_t * +memif_msg_receive_add_region (memif_if_t * mif, memif_msg_t * msg, int fd) +{ + memif_msg_add_region_t *ar = &msg->add_region; + memif_region_t *mr; + if (fd < 0) + return clib_error_return (0, "missing memory region fd"); + + if (ar->index != vec_len (mif->regions)) + return clib_error_return (0, "unexpected region index"); + + if (ar->index > MEMIF_MAX_REGION) + return clib_error_return (0, "too many regions"); + + vec_validate_aligned (mif->regions, ar->index, CLIB_CACHE_LINE_BYTES); + mr = vec_elt_at_index (mif->regions, ar->index); + mr->fd = fd; + mr->region_size = ar->size; + + return 0; +} + +static clib_error_t * +memif_msg_receive_add_ring (memif_if_t * mif, memif_msg_t * msg, int fd) +{ + memif_msg_add_ring_t *ar = &msg->add_ring; + memif_queue_t *mq; + + if (fd < 0) + return clib_error_return (0, "missing ring interrupt fd"); + + if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) + { + if (ar->index != vec_len (mif->rx_queues)) + return clib_error_return (0, "unexpected ring index"); + + if (ar->index > MEMIF_MAX_S2M_RING) + return clib_error_return (0, "too many rings"); + + vec_validate_aligned (mif->rx_queues, ar->index, CLIB_CACHE_LINE_BYTES); + mq = vec_elt_at_index (mif->rx_queues, ar->index); + mif->run.num_s2m_rings = vec_len (mif->rx_queues); + } + else + { + if (ar->index != vec_len (mif->tx_queues)) + return clib_error_return (0, "unexpected ring index"); + + if (ar->index > MEMIF_MAX_M2S_RING) + return clib_error_return (0, "too many rings"); + + vec_validate_aligned (mif->tx_queues, ar->index, CLIB_CACHE_LINE_BYTES); + mq = vec_elt_at_index (mif->tx_queues, ar->index); + mif->run.num_m2s_rings = vec_len (mif->tx_queues); + } + + mq->int_fd = fd; + mq->int_unix_file_index = ~0; + mq->log2_ring_size = ar->log2_ring_size; + mq->region = ar->region; + mq->offset = ar->offset; + + return 0; +} + +static clib_error_t * +memif_msg_receive_connect (memif_if_t * mif, memif_msg_t * msg) +{ + clib_error_t *err; + memif_msg_connect_t *c = &msg->connect; + + if ((err = memif_connect (mif))) + return err; + + mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name)); + + return 0; +} + +static clib_error_t * +memif_msg_receive_connected (memif_if_t * mif, memif_msg_t * msg) +{ + clib_error_t *err; + memif_msg_connected_t *c = &msg->connected; + + if ((err = memif_connect (mif))) + return err; + + mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name)); + return 0; +} + +static clib_error_t * +memif_msg_receive_disconnect (memif_if_t * mif, memif_msg_t * msg) +{ + memif_msg_disconnect_t *d = &msg->disconnect; + + mif->remote_disc_string = memif_str2vec (d->string, sizeof (d->string)); + return clib_error_return (0, "disconnect received"); +} + +static clib_error_t * +memif_msg_receive (memif_if_t ** mifp, unix_file_t * uf) +{ + char ctl[CMSG_SPACE (sizeof (int)) + + CMSG_SPACE (sizeof (struct ucred))] = { 0 }; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + memif_msg_t msg = { 0 }; + ssize_t size; + clib_error_t *err = 0; + int fd = -1; + int i; + memif_if_t *mif = *mifp; + + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + + /* receive the incoming message */ + size = recvmsg (uf->file_descriptor, &mh, 0); + if (size != sizeof (memif_msg_t)) + { + return (size == 0) ? clib_error_return (0, "disconnected") : + clib_error_return_unix (0, + "recvmsg: malformed message received on fd %d", + uf->file_descriptor); + } + + if (mif == 0 && msg.type != MEMIF_MSG_TYPE_INIT) + { + memif_file_del (uf); + return clib_error_return (0, "unexpected message received"); + } + + /* process anciliary data */ + struct ucred *cr = 0; + struct cmsghdr *cmsg; + + cmsg = CMSG_FIRSTHDR (&mh); + while (cmsg) + { + if (cmsg->cmsg_level == SOL_SOCKET) + { + if (cmsg->cmsg_type == SCM_CREDENTIALS) + { + cr = (struct ucred *) CMSG_DATA (cmsg); + } + else if (cmsg->cmsg_type == SCM_RIGHTS) + { + int *fdp = (int *) CMSG_DATA (cmsg); + fd = *fdp; + } + } + cmsg = CMSG_NXTHDR (&mh, cmsg); + } + + DBG ("Message type %u received", msg.type); + /* process the message based on its type */ + switch (msg.type) + { + case MEMIF_MSG_TYPE_ACK: + break; + + case MEMIF_MSG_TYPE_HELLO: + if ((err = memif_msg_receive_hello (mif, &msg))) + return err; + if ((err = memif_init_regions_and_queues (mif))) + return err; + memif_msg_enq_init (mif); + memif_msg_enq_add_region (mif, 0); + vec_foreach_index (i, mif->tx_queues) + memif_msg_enq_add_ring (mif, i, MEMIF_RING_S2M); + vec_foreach_index (i, mif->rx_queues) + memif_msg_enq_add_ring (mif, i, MEMIF_RING_M2S); + memif_msg_enq_connect (mif); + break; + + case MEMIF_MSG_TYPE_INIT: + if ((err = memif_msg_receive_init (mifp, &msg, uf))) + return err; + mif = *mifp; + mif->remote_pid = cr->pid; + mif->remote_uid = cr->uid; + mif->remote_gid = cr->gid; + memif_msg_enq_ack (mif); + break; + + case MEMIF_MSG_TYPE_ADD_REGION: + if ((err = memif_msg_receive_add_region (mif, &msg, fd))) + return err; + memif_msg_enq_ack (mif); + break; + + case MEMIF_MSG_TYPE_ADD_RING: + if ((err = memif_msg_receive_add_ring (mif, &msg, fd))) + return err; + memif_msg_enq_ack (mif); + break; + + case MEMIF_MSG_TYPE_CONNECT: + if ((err = memif_msg_receive_connect (mif, &msg))) + return err; + memif_msg_enq_connected (mif); + break; + + case MEMIF_MSG_TYPE_CONNECTED: + if ((err = memif_msg_receive_connected (mif, &msg))) + return err; + break; + + case MEMIF_MSG_TYPE_DISCONNECT: + if ((err = memif_msg_receive_disconnect (mif, &msg))) + return err; + break; + + default: + err = clib_error_return (0, "unknown message type (0x%x)", msg.type); + return err; + } + + if (clib_fifo_elts (mif->msg_queue) && mif->conn_unix_file_index != ~0) + unix_file_set_data_available_to_write (mif->conn_unix_file_index, 1); + return 0; +} + +clib_error_t * +memif_master_conn_fd_read_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + uword *p; + memif_if_t *mif = 0; + uword conn_unix_file_index = ~0; + clib_error_t *err = 0; + + p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); + if (p) + { + mif = vec_elt_at_index (mm->interfaces, p[0]); + } + else + { + /* This is new connection, remove index from pending vector */ + int i; + vec_foreach_index (i, msf->pending_file_indices) + if (msf->pending_file_indices[i] == uf - unix_main.file_pool) + { + conn_unix_file_index = msf->pending_file_indices[i]; + vec_del1 (msf->pending_file_indices, i); + break; + } + ASSERT (conn_unix_file_index != ~0); + } + err = memif_msg_receive (&mif, uf); + if (err) + { + memif_disconnect (mif, err); + clib_error_free (err); + } + return 0; +} + +clib_error_t * +memif_slave_conn_fd_read_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + clib_error_t *err; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + err = memif_msg_receive (&mif, uf); + if (err) + { + memif_disconnect (mif, err); + clib_error_free (err); + } + return 0; +} + +static clib_error_t * +memif_conn_fd_write_ready (unix_file_t * uf, memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_sub2 (mif->msg_queue, e); + unix_file_set_data_available_to_write (mif->conn_unix_file_index, 0); + memif_msg_send (mif->conn_fd, &e->msg, e->fd); + return 0; +} + +clib_error_t * +memif_master_conn_fd_write_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + uword *p; + memif_if_t *mif; + + p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); + if (!p) + return 0; + + mif = vec_elt_at_index (mm->interfaces, p[0]); + return memif_conn_fd_write_ready (uf, mif); +} + +clib_error_t * +memif_slave_conn_fd_write_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + return memif_conn_fd_write_ready (uf, mif); +} + +clib_error_t * +memif_slave_conn_fd_error (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + clib_error_t *err; + + err = clib_error_return (0, "connection fd error"); + memif_disconnect (mif, err); + clib_error_free (err); + + return 0; +} + +clib_error_t * +memif_master_conn_fd_error (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + uword *p; + + + p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); + if (p) + { + memif_if_t *mif; + clib_error_t *err; + mif = vec_elt_at_index (mm->interfaces, p[0]); + err = clib_error_return (0, "connection fd error"); + memif_disconnect (mif, err); + clib_error_free (err); + } + else + { + int i; + vec_foreach_index (i, msf->pending_file_indices) + if (msf->pending_file_indices[i] == uf - unix_main.file_pool) + { + vec_del1 (msf->pending_file_indices, i); + memif_file_del (uf); + return 0; + } + } + + clib_warning ("Error on unknown file descriptor %d", uf->file_descriptor); + memif_file_del (uf); + return 0; +} + + +clib_error_t * +memif_conn_fd_accept_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + int addr_len; + struct sockaddr_un client; + int conn_fd; + unix_file_t template = { 0 }; + uword unix_file_index = ~0; + clib_error_t *err; + + + addr_len = sizeof (client); + conn_fd = accept (uf->file_descriptor, + (struct sockaddr *) &client, (socklen_t *) & addr_len); + + if (conn_fd < 0) + return clib_error_return_unix (0, "accept fd %d", uf->file_descriptor); + + template.read_function = memif_master_conn_fd_read_ready; + template.write_function = memif_master_conn_fd_write_ready; + template.error_function = memif_master_conn_fd_error; + template.file_descriptor = conn_fd; + template.private_data = uf->private_data; + + memif_file_add (&unix_file_index, &template); + + err = memif_msg_enq_hello (conn_fd); + if (err) + { + clib_error_report (err); + memif_file_del_by_index (unix_file_index); + } + else + vec_add1 (msf->pending_file_indices, unix_file_index); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg