From eaabe073515e7722ed546b36f99efc6feea305a1 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 22 Mar 2017 10:18:13 +0100 Subject: Add memif - packet memory interface for intra-host communication Change-Id: I94c06b07a39f07ceba87bf3e7fcfc70e43231e8a Signed-off-by: Damjan Marion Co-Authored-By: Milan Lenco --- src/plugins/memif/memif.c | 1045 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1045 insertions(+) create mode 100644 src/plugins/memif/memif.c (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c new file mode 100644 index 00000000..7ba67c5b --- /dev/null +++ b/src/plugins/memif/memif.c @@ -0,0 +1,1045 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define MEMIF_DEBUG 1 + +#if MEMIF_DEBUG == 1 +#define DEBUG_LOG(...) clib_warning(__VA_ARGS__) +#define DEBUG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__) +#else +#define DEBUG_LOG(...) +#endif + +memif_main_t memif_main; + +static clib_error_t *memif_conn_fd_read_ready (unix_file_t * uf); +static clib_error_t *memif_int_fd_read_ready (unix_file_t * uf); + +static u32 +memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) +{ + /* nothing for now */ + return 0; +} + +static void +memif_remove_pending_conn (memif_pending_conn_t * pending_conn) +{ + memif_main_t *mm = &memif_main; + + unix_file_del (&unix_main, + unix_main.file_pool + pending_conn->connection.index); + pool_put (mm->pending_conns, pending_conn); +} + +static void +memif_connect (vlib_main_t * vm, memif_if_t * mif) +{ + vnet_main_t *vnm = vnet_get_main (); + int num_rings = mif->num_s2m_rings + mif->num_m2s_rings; + memif_ring_data_t *rd = NULL; + + vec_validate_aligned (mif->ring_data, num_rings - 1, CLIB_CACHE_LINE_BYTES); + vec_foreach (rd, mif->ring_data) + { + rd->last_head = 0; + } + + mif->flags &= ~MEMIF_IF_FLAG_CONNECTING; + mif->flags |= MEMIF_IF_FLAG_CONNECTED; + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); +} + +static void +memif_disconnect (vlib_main_t * vm, memif_if_t * mif) +{ + vnet_main_t *vnm = vnet_get_main (); + + mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING); + if (mif->hw_if_index != ~0) + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); + + if (mif->interrupt_line.index != ~0) + { + unix_file_del (&unix_main, + unix_main.file_pool + mif->interrupt_line.index); + mif->interrupt_line.index = ~0; + mif->interrupt_line.fd = -1; /* closed in unix_file_del */ + } + if (mif->connection.index != ~0) + { + unix_file_del (&unix_main, unix_main.file_pool + mif->connection.index); + mif->connection.index = ~0; + mif->connection.fd = -1; /* closed in unix_file_del */ + } + + // TODO: properly munmap + close memif-owned shared memory segments + vec_free (mif->regions); +} + +static clib_error_t * +memif_process_connect_req (memif_pending_conn_t * pending_conn, + memif_msg_t * req, struct ucred *slave_cr, + int shm_fd, int int_fd) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + int fd = pending_conn->connection.fd; + unix_file_t *uf = 0; + memif_if_t *mif = 0; + memif_msg_t resp = { 0 }; + unix_file_t template = { 0 }; + void *shm; + uword *p; + u8 retval = 0; + static clib_error_t *error = 0; + + if (shm_fd == -1) + { + DEBUG_LOG + ("Connection request is missing shared memory file descriptor"); + retval = 1; + goto response; + } + + if (int_fd == -1) + { + DEBUG_LOG + ("Connection request is missing interrupt line file descriptor"); + retval = 2; + goto response; + } + + if (slave_cr == NULL) + { + DEBUG_LOG ("Connection request is missing slave credentials"); + retval = 3; + goto response; + } + + p = mhash_get (&mm->if_index_by_key, &req->key); + if (!p) + { + DEBUG_LOG + ("Connection request with unmatched key (0x%" PRIx64 ")", req->key); + retval = 4; + goto response; + } + + mif = vec_elt_at_index (mm->interfaces, *p); + if (mif->listener_index != pending_conn->listener_index) + { + DEBUG_LOG + ("Connection request with non-matching listener (%d vs. %d)", + pending_conn->listener_index, mif->listener_index); + retval = 5; + goto response; + } + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + { + DEBUG_LOG ("Memif slave does not accept connection requests"); + retval = 6; + goto response; + } + + if (mif->connection.fd != -1) + { + DEBUG_LOG + ("Memif with key 0x%" PRIx64 " is already connected", mif->key); + retval = 7; + goto response; + } + + if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) == 0) + { + /* just silently decline the request */ + retval = 8; + goto response; + } + + if (req->shared_mem_size < sizeof (memif_shm_t)) + { + DEBUG_LOG + ("Unexpectedly small shared memory segment received from slave."); + retval = 9; + goto response; + } + + if ((shm = + mmap (NULL, req->shared_mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd, 0)) == MAP_FAILED) + { + DEBUG_UNIX_LOG + ("Failed to map shared memory segment received from slave memif"); + error = clib_error_return_unix (0, "mmap fd %d", shm_fd); + retval = 10; + goto response; + } + + if (((memif_shm_t *) shm)->cookie != 0xdeadbeef) + { + DEBUG_LOG + ("Possibly corrupted shared memory segment received from slave memif"); + munmap (shm, req->shared_mem_size); + retval = 11; + goto response; + } + + mif->log2_ring_size = req->log2_ring_size; + mif->num_s2m_rings = req->num_s2m_rings; + mif->num_m2s_rings = req->num_m2s_rings; + mif->buffer_size = req->buffer_size; + mif->remote_pid = slave_cr->pid; + mif->remote_uid = slave_cr->uid; + vec_add1 (mif->regions, shm); + + /* register interrupt line */ + mif->interrupt_line.fd = int_fd; + template.read_function = memif_int_fd_read_ready; + template.file_descriptor = int_fd; + template.private_data = mif->if_index; + mif->interrupt_line.index = unix_file_add (&unix_main, &template); + + /* change context for future messages */ + uf = vec_elt_at_index (unix_main.file_pool, pending_conn->connection.index); + uf->private_data = mif->if_index << 1; + mif->connection = pending_conn->connection; + pool_put (mm->pending_conns, pending_conn); + + memif_connect (vm, mif); + +response: + resp.version = MEMIF_VERSION; + resp.type = MEMIF_MSG_TYPE_CONNECT_RESP; + resp.retval = retval; + send (fd, &resp, sizeof (resp), 0); + return error; +} + +static clib_error_t * +memif_process_connect_resp (memif_if_t * mif, memif_msg_t * resp) +{ + vlib_main_t *vm = vlib_get_main (); + + if ((mif->flags & MEMIF_IF_FLAG_IS_SLAVE) == 0) + { + DEBUG_LOG ("Memif master does not accept connection responses"); + return 0; + } + + if ((mif->flags & MEMIF_IF_FLAG_CONNECTING) == 0) + { + DEBUG_LOG ("Unexpected connection response"); + return 0; + } + + if (resp->retval == 0) + memif_connect (vm, mif); + else + memif_disconnect (vm, mif); + + return 0; +} + +static clib_error_t * +memif_conn_fd_read_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + memif_if_t *mif = 0; + memif_pending_conn_t *pending_conn = 0; + int fd_array[2] = { -1, -1 }; + char ctl[CMSG_SPACE (sizeof (fd_array)) + + CMSG_SPACE (sizeof (struct ucred))] = { 0 }; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + struct ucred *cr = 0; + memif_msg_t msg = { 0 }; + struct cmsghdr *cmsg; + ssize_t size; + static clib_error_t *error = 0; + + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + + /* grab the appropriate context */ + if (uf->private_data & 1) + pending_conn = vec_elt_at_index (mm->pending_conns, + uf->private_data >> 1); + else + mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 1); + + /* receive the incoming message */ + size = recvmsg (uf->file_descriptor, &mh, 0); + if (size != sizeof (memif_msg_t)) + { + if (size != 0) + { + DEBUG_UNIX_LOG ("Malformed message received on fd %d", + uf->file_descriptor); + error = clib_error_return_unix (0, "recvmsg fd %d", + uf->file_descriptor); + } + goto disconnect; + } + + /* check version of the sender's memif plugin */ + if (msg.version != MEMIF_VERSION) + { + DEBUG_LOG ("Memif version mismatch"); + goto disconnect; + } + + /* process the message based on its type */ + switch (msg.type) + { + case MEMIF_MSG_TYPE_CONNECT_REQ: + if (pending_conn == 0) + { + DEBUG_LOG ("Received unexpected connection request"); + return 0; + } + + /* Read anciliary data */ + cmsg = CMSG_FIRSTHDR (&mh); + while (cmsg) + { + if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_CREDENTIALS) + { + cr = (struct ucred *) CMSG_DATA (cmsg); + } + else if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_RIGHTS) + { + clib_memcpy (fd_array, CMSG_DATA (cmsg), sizeof (fd_array)); + } + cmsg = CMSG_NXTHDR (&mh, cmsg); + } + + return memif_process_connect_req (pending_conn, &msg, cr, + fd_array[0], fd_array[1]); + + case MEMIF_MSG_TYPE_CONNECT_RESP: + if (mif == 0) + { + DEBUG_LOG ("Received unexpected connection response"); + return 0; + } + return memif_process_connect_resp (mif, &msg); + + case MEMIF_MSG_TYPE_DISCONNECT: + goto disconnect; + + default: + DEBUG_LOG ("Received unknown message type"); + goto disconnect; + } + + return 0; + +disconnect: + if (pending_conn) + memif_remove_pending_conn (pending_conn); + else + memif_disconnect (vm, mif); + return error; +} + +static clib_error_t * +memif_int_fd_read_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + u8 b; + ssize_t size; + + size = read (uf->file_descriptor, &b, sizeof (b)); + if (0 == size) + { + /* interrupt line was disconnected */ + unix_file_del (&unix_main, + unix_main.file_pool + mif->interrupt_line.index); + mif->interrupt_line.index = ~0; + mif->interrupt_line.fd = -1; + } + vlib_node_set_interrupt_pending (vm, memif_input_node.index); + return 0; +} + +static clib_error_t * +memif_conn_fd_accept_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_listener_t *listener = 0; + memif_pending_conn_t *pending_conn = 0; + int addr_len; + struct sockaddr_un client; + int conn_fd; + unix_file_t template = { 0 }; + + listener = pool_elt_at_index (mm->listeners, uf->private_data); + + addr_len = sizeof (client); + conn_fd = accept (uf->file_descriptor, + (struct sockaddr *) &client, (socklen_t *) & addr_len); + + if (conn_fd < 0) + return clib_error_return_unix (0, "accept fd %d", uf->file_descriptor); + + pool_get (mm->pending_conns, pending_conn); + pending_conn->index = pending_conn - mm->pending_conns; + pending_conn->listener_index = listener->index; + pending_conn->connection.fd = conn_fd; + + template.read_function = memif_conn_fd_read_ready; + template.file_descriptor = conn_fd; + template.private_data = (pending_conn->index << 1) | 1; + pending_conn->connection.index = unix_file_add (&unix_main, &template); + + return 0; +} + +static void +memif_connect_master (vlib_main_t * vm, memif_if_t * mif) +{ + memif_msg_t msg; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + struct cmsghdr *cmsg; + int mfd = -1; + int rv; + int fd_array[2] = { -1, -1 }; + char ctl[CMSG_SPACE (sizeof (fd_array))]; + memif_ring_t *ring = NULL; + int i, j; + void *shm = 0; + u64 buffer_offset; + unix_file_t template = { 0 }; + + msg.version = MEMIF_VERSION; + msg.type = MEMIF_MSG_TYPE_CONNECT_REQ; + msg.key = mif->key; + msg.log2_ring_size = mif->log2_ring_size; + msg.num_s2m_rings = mif->num_s2m_rings; + msg.num_m2s_rings = mif->num_m2s_rings; + msg.buffer_size = mif->buffer_size; + + buffer_offset = sizeof (memif_shm_t) + + (mif->num_s2m_rings + mif->num_m2s_rings) * + (sizeof (memif_ring_t) + + sizeof (memif_desc_t) * (1 << mif->log2_ring_size)); + + msg.shared_mem_size = buffer_offset + + mif->buffer_size * (1 << mif->log2_ring_size) * (mif->num_s2m_rings + + mif->num_m2s_rings); + + if ((mfd = memfd_create ("shared mem", MFD_ALLOW_SEALING)) == -1) + { + DEBUG_LOG ("Failed to create anonymous file"); + goto error; + } + + if ((fcntl (mfd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + { + DEBUG_UNIX_LOG ("Failed to seal an anonymous file off from truncating"); + goto error; + } + + if ((ftruncate (mfd, msg.shared_mem_size)) == -1) + { + DEBUG_UNIX_LOG ("Failed to extend the size of an anonymous file"); + goto error; + } + + if ((shm = mmap (NULL, msg.shared_mem_size, PROT_READ | PROT_WRITE, + MAP_SHARED, mfd, 0)) == MAP_FAILED) + { + DEBUG_UNIX_LOG ("Failed to map anonymous file into memory"); + goto error; + } + + vec_add1 (mif->regions, shm); + ((memif_shm_t *) mif->regions[0])->cookie = 0xdeadbeef; + + for (i = 0; i < mif->num_s2m_rings; i++) + { + ring = memif_get_ring (mif, MEMIF_RING_S2M, i); + ring->head = ring->tail = 0; + for (j = 0; j < (1 << mif->log2_ring_size); j++) + { + u16 slot = i * (1 << mif->log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = buffer_offset + (slot * mif->buffer_size); + ring->desc[j].buffer_length = mif->buffer_size; + } + } + for (i = 0; i < mif->num_m2s_rings; i++) + { + ring = memif_get_ring (mif, MEMIF_RING_M2S, i); + ring->head = ring->tail = 0; + for (j = 0; j < (1 << mif->log2_ring_size); j++) + { + u16 slot = + (i + mif->num_s2m_rings) * (1 << mif->log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = buffer_offset + (slot * mif->buffer_size); + ring->desc[j].buffer_length = mif->buffer_size; + } + } + + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + /* create interrupt socket */ + if (socketpair (AF_UNIX, SOCK_STREAM, 0, fd_array) < 0) + { + DEBUG_UNIX_LOG ("Failed to create a pair of connected sockets"); + goto error; + } + + mif->interrupt_line.fd = fd_array[0]; + template.read_function = memif_int_fd_read_ready; + template.file_descriptor = mif->interrupt_line.fd; + template.private_data = mif->if_index; + mif->interrupt_line.index = unix_file_add (&unix_main, &template); + + memset (&ctl, 0, sizeof (ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + cmsg = CMSG_FIRSTHDR (&mh); + cmsg->cmsg_len = CMSG_LEN (sizeof (fd_array)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + fd_array[0] = mfd; + clib_memcpy (CMSG_DATA (cmsg), fd_array, sizeof (fd_array)); + + mif->flags |= MEMIF_IF_FLAG_CONNECTING; + rv = sendmsg (mif->connection.fd, &mh, 0); + if (rv < 0) + { + DEBUG_UNIX_LOG ("Failed to send memif connection request"); + goto error; + } + + /* No need to keep the descriptor open, + * mmap creates an extra reference to the underlying file */ + close (mfd); + mfd = -1; + /* This FD is given to peer, so we can close it */ + close (fd_array[1]); + fd_array[1] = -1; + return; + +error: + if (mfd > -1) + close (mfd); + if (fd_array[1] > -1) + close (fd_array[1]); + memif_disconnect (vm, mif); +} + +static uword +memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif; + struct sockaddr_un sun; + int sockfd; + uword *event_data = 0, event_type; + unix_file_t template = { 0 }; + u8 enabled = 0; + f64 start_time, last_run_duration = 0, now; + + sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + sun.sun_family = AF_UNIX; + template.read_function = memif_conn_fd_read_ready; + + while (1) + { + if (enabled) + vlib_process_wait_for_event_or_clock (vm, + (f64) 3 - last_run_duration); + else + vlib_process_wait_for_event (vm); + + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + switch (event_type) + { + case ~0: + break; + case MEMIF_PROCESS_EVENT_START: + enabled = 1; + break; + case MEMIF_PROCESS_EVENT_STOP: + enabled = 0; + continue; + default: + ASSERT (0); + } + + last_run_duration = start_time = vlib_time_now (vm); + /* *INDENT-OFF* */ + pool_foreach (mif, mm->interfaces, + ({ + /* Allow no more than 10us without a pause */ + now = vlib_time_now (vm); + if (now > start_time + 10e-6) + { + vlib_process_suspend (vm, 100e-6); /* suspend for 100 us */ + start_time = vlib_time_now (vm); + } + + if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) == 0) + continue; + + if (mif->flags & MEMIF_IF_FLAG_CONNECTING) + continue; + + if (mif->flags & MEMIF_IF_FLAG_CONNECTED) + continue; + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + { + strncpy (sun.sun_path, (char *) mif->socket_filename, + sizeof (sun.sun_path) - 1); + + if (connect + (sockfd, (struct sockaddr *) &sun, + sizeof (struct sockaddr_un)) == 0) + { + mif->connection.fd = sockfd; + template.file_descriptor = sockfd; + template.private_data = mif->if_index << 1; + mif->connection.index = unix_file_add (&unix_main, &template); + memif_connect_master (vm, mif); + + /* grab another fd */ + sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) + { + DEBUG_UNIX_LOG ("socket AF_UNIX"); + return 0; + } + } + } + })); + /* *INDENT-ON* */ + last_run_duration = vlib_time_now (vm) - last_run_duration; + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (memif_process_node,static) = { + .function = memif_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "memif-process", +}; +/* *INDENT-ON* */ + +static void +memif_close_if (memif_main_t * mm, memif_if_t * mif) +{ + vlib_main_t *vm = vlib_get_main (); + memif_listener_t *listener = 0; + memif_pending_conn_t *pending_conn = 0; + + memif_disconnect (vm, mif); + + if (mif->listener_index != (uword) ~ 0) + { + listener = pool_elt_at_index (mm->listeners, mif->listener_index); + if (--listener->usage_counter == 0) + { + /* not used anymore -> remove the socket and pending connections */ + + /* *INDENT-OFF* */ + pool_foreach (pending_conn, mm->pending_conns, + ({ + if (pending_conn->listener_index == mif->listener_index) + { + memif_remove_pending_conn (pending_conn); + } + })); + /* *INDENT-ON* */ + + unix_file_del (&unix_main, + unix_main.file_pool + listener->socket.index); + pool_put (mm->listeners, listener); + unlink ((char *) mif->socket_filename); + } + } + + if (mif->lockp != 0) + { + clib_mem_free ((void *) mif->lockp); + mif->lockp = 0; + } + + mhash_unset (&mm->if_index_by_key, &mif->key, &mif->if_index); + vec_free (mif->socket_filename); + vec_free (mif->ring_data); + + memset (mif, 0, sizeof (*mif)); + pool_put (mm->interfaces, mif); +} + +int +memif_worker_thread_enable () +{ + /* if worker threads are enabled, switch to polling mode */ + foreach_vlib_main (( + { + vlib_node_set_state (this_vlib_main, + memif_input_node.index, + VLIB_NODE_STATE_POLLING); + })); + + return 0; +} + +int +memif_worker_thread_disable () +{ + foreach_vlib_main (( + { + vlib_node_set_state (this_vlib_main, + memif_input_node.index, + VLIB_NODE_STATE_INTERRUPT); + })); + + return 0; +} + +int +memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) +{ + memif_main_t *mm = &memif_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_main_t *vnm = vnet_get_main (); + memif_if_t *mif = 0; + vnet_sw_interface_t *sw; + clib_error_t *error = 0; + int ret = 0; + uword *p; + + p = mhash_get (&mm->if_index_by_key, &args->key); + if (p) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + pool_get (mm->interfaces, mif); + memset (mif, 0, sizeof (*mif)); + mif->key = args->key; + mif->if_index = mif - mm->interfaces; + mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0; + mif->listener_index = ~0; + mif->connection.index = mif->interrupt_line.index = ~0; + mif->connection.fd = mif->interrupt_line.fd = -1; + + if (tm->n_vlib_mains > 1) + { + mif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) mif->lockp, 0, CLIB_CACHE_LINE_BYTES); + } + + if (!args->hw_addr_set) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (args->hw_addr + 2, &rnd, sizeof (rnd)); + args->hw_addr[0] = 2; + args->hw_addr[1] = 0xfe; + } + + error = ethernet_register_interface (vnm, memif_device_class.index, + mif->if_index, args->hw_addr, + &mif->hw_if_index, + memif_eth_flag_change); + + if (error) + { + clib_error_report (error); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + sw = vnet_get_hw_sw_interface (vnm, mif->hw_if_index); + mif->sw_if_index = sw->sw_if_index; + + mif->log2_ring_size = args->log2_ring_size; + mif->buffer_size = args->buffer_size; + + /* TODO: make configurable */ + mif->num_s2m_rings = 1; + mif->num_m2s_rings = 1; + + mhash_set_mem (&mm->if_index_by_key, &args->key, &mif->if_index, 0); + + if (args->socket_filename != 0) + mif->socket_filename = args->socket_filename; + else + mif->socket_filename = vec_dup (mm->default_socket_filename); + + args->sw_if_index = mif->sw_if_index; + + if (args->is_master) + { + struct sockaddr_un un = { 0 }; + struct stat file_stat; + int on = 1; + memif_listener_t *listener = 0; + + if (stat ((char *) mif->socket_filename, &file_stat) == 0) + { + if (!S_ISSOCK (file_stat.st_mode)) + { + errno = ENOTSOCK; + ret = VNET_API_ERROR_SYSCALL_ERROR_2; + goto error; + } + /* *INDENT-OFF* */ + pool_foreach (listener, mm->listeners, + ({ + if (listener->sock_dev == file_stat.st_dev && + listener->sock_ino == file_stat.st_ino) + { + /* attach memif to the existing listener */ + mif->listener_index = listener->index; + ++listener->usage_counter; + goto signal; + } + })); + /* *INDENT-ON* */ + unlink ((char *) mif->socket_filename); + } + + pool_get (mm->listeners, listener); + memset (listener, 0, sizeof (*listener)); + listener->socket.fd = -1; + listener->socket.index = ~0; + listener->index = listener - mm->listeners; + listener->usage_counter = 1; + + if ((listener->socket.fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } + + un.sun_family = AF_UNIX; + strncpy ((char *) un.sun_path, (char *) mif->socket_filename, + sizeof (un.sun_path) - 1); + + if (setsockopt (listener->socket.fd, SOL_SOCKET, SO_PASSCRED, + &on, sizeof (on)) < 0) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_4; + goto error; + } + if (bind (listener->socket.fd, (struct sockaddr *) &un, + sizeof (un)) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_5; + goto error; + } + if (listen (listener->socket.fd, 1) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_6; + goto error; + } + + if (stat ((char *) mif->socket_filename, &file_stat) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_7; + goto error; + } + + listener->sock_dev = file_stat.st_dev; + listener->sock_ino = file_stat.st_ino; + + unix_file_t template = { 0 }; + template.read_function = memif_conn_fd_accept_ready; + template.file_descriptor = listener->socket.fd; + template.private_data = listener->index; + listener->socket.index = unix_file_add (&unix_main, &template); + + mif->listener_index = listener->index; + } + else + { + mif->flags |= MEMIF_IF_FLAG_IS_SLAVE; + } + +#if 0 + /* use configured or generate random MAC address */ + if (!args->hw_addr_set && + tm->n_vlib_mains > 1 && pool_elts (mm->interfaces) == 1) + memif_worker_thread_enable (); +#endif + +signal: + if (pool_elts (mm->interfaces) == 1) + { + vlib_process_signal_event (vm, memif_process_node.index, + MEMIF_PROCESS_EVENT_START, 0); + } + return 0; + +error: + if (mif->hw_if_index != ~0) + { + ethernet_delete_interface (vnm, mif->hw_if_index); + mif->hw_if_index = ~0; + } + memif_close_if (mm, mif); + return ret; +} + +int +memif_delete_if (vlib_main_t * vm, u64 key) +{ + vnet_main_t *vnm = vnet_get_main (); + memif_main_t *mm = &memif_main; + memif_if_t *mif; + uword *p; + + p = mhash_get (&mm->if_index_by_key, &key); + if (p == NULL) + { + clib_warning ("Memory interface with key 0x%" PRIx64 " does not exist", + key); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + mif = pool_elt_at_index (mm->interfaces, p[0]); + mif->flags |= MEMIF_IF_FLAG_DELETING; + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); + vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0); + + /* remove the interface */ + ethernet_delete_interface (vnm, mif->hw_if_index); + mif->hw_if_index = ~0; + memif_close_if (mm, mif); + + if (pool_elts (mm->interfaces) == 0) + { + vlib_process_signal_event (vm, memif_process_node.index, + MEMIF_PROCESS_EVENT_STOP, 0); + } + +#if 0 + if (tm->n_vlib_mains > 1 && pool_elts (mm->interfaces) == 0) + memif_worker_thread_disable (); +#endif + + return 0; +} + +static clib_error_t * +memif_init (vlib_main_t * vm) +{ + memif_main_t *mm = &memif_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; + + memset (mm, 0, sizeof (memif_main_t)); + + mm->input_cpu_first_index = 0; + mm->input_cpu_count = 1; + + /* initialize binary API */ + memif_plugin_api_hookup (vm); + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + mm->input_cpu_first_index = tr->first_index; + mm->input_cpu_count = tr->count; + } + + mhash_init (&mm->if_index_by_key, sizeof (uword), sizeof (u64)); + + vec_validate_aligned (mm->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + /* set default socket filename */ + vec_validate (mm->default_socket_filename, + strlen (MEMIF_DEFAULT_SOCKET_FILENAME)); + strncpy ((char *) mm->default_socket_filename, + MEMIF_DEFAULT_SOCKET_FILENAME, + vec_len (mm->default_socket_filename) - 1); + + return 0; +} + +VLIB_INIT_FUNCTION (memif_init); + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Packet Memory Interface (experimetal)", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 1927da29ccbe1d4cc8e59ccfa197eb41c257814f Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 27 Mar 2017 17:08:20 +0200 Subject: vppinfra: add spinlock inline functions Change-Id: I86089e9bb604adfc260a111685001be1c897ce53 Signed-off-by: Damjan Marion --- src/plugins/memif/device.c | 21 +------- src/plugins/memif/memif.c | 12 +---- src/plugins/memif/memif.h | 4 +- src/vnet/devices/af_packet/af_packet.c | 6 +-- src/vnet/devices/af_packet/af_packet.h | 4 +- src/vnet/devices/af_packet/device.c | 9 +--- src/vnet/devices/netmap/device.c | 9 +--- src/vnet/devices/netmap/netmap.c | 6 +-- src/vnet/devices/netmap/netmap.h | 4 +- src/vppinfra.am | 1 + src/vppinfra/lock.h | 97 ++++++++++++++++++++++++++++++++++ 11 files changed, 117 insertions(+), 56 deletions(-) create mode 100644 src/vppinfra/lock.h (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index 446537a3..4faeb055 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -78,23 +78,6 @@ format_memif_tx_trace (u8 * s, va_list * args) return s; } -static_always_inline void -memif_interface_lock (memif_if_t * mif) -{ - if (PREDICT_FALSE (mif->lockp != 0)) - { - while (__sync_lock_test_and_set (mif->lockp, 1)) - ; - } -} - -static_always_inline void -memif_interface_unlock (memif_if_t * mif) -{ - if (PREDICT_FALSE (mif->lockp != 0)) - *mif->lockp = 0; -} - static_always_inline void memif_prefetch_buffer_and_data (vlib_main_t * vm, u32 bi) { @@ -117,7 +100,7 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u16 head, tail; u16 free_slots; - memif_interface_lock (mif); + clib_spinlock_lock_if_init (&mif->lockp); /* free consumed buffers */ @@ -210,7 +193,7 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, CLIB_MEMORY_STORE_BARRIER (); ring->head = head; - memif_interface_unlock (mif); + clib_spinlock_unlock (&mif->lockp); if (n_left) { diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 7ba67c5b..cf8ca577 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -716,11 +716,7 @@ memif_close_if (memif_main_t * mm, memif_if_t * mif) } } - if (mif->lockp != 0) - { - clib_mem_free ((void *) mif->lockp); - mif->lockp = 0; - } + clib_spinlock_free (&mif->lockp); mhash_unset (&mm->if_index_by_key, &mif->key, &mif->if_index); vec_free (mif->socket_filename); @@ -783,11 +779,7 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->connection.fd = mif->interrupt_line.fd = -1; if (tm->n_vlib_mains > 1) - { - mif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) mif->lockp, 0, CLIB_CACHE_LINE_BYTES); - } + clib_spinlock_init (&mif->lockp); if (!args->hw_addr_set) { diff --git a/src/plugins/memif/memif.h b/src/plugins/memif/memif.h index a7a88e07..f57170f8 100644 --- a/src/plugins/memif/memif.h +++ b/src/plugins/memif/memif.h @@ -15,6 +15,8 @@ *------------------------------------------------------------------ */ +#include + typedef struct { u16 version; @@ -98,7 +100,7 @@ typedef struct typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - volatile u32 *lockp; + clib_spinlock_t lockp; u32 flags; #define MEMIF_IF_FLAG_ADMIN_UP (1 << 0) #define MEMIF_IF_FLAG_IS_SLAVE (1 << 1) diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 5fdc59f2..20285107 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -229,11 +229,7 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, apif->next_rx_frame = 0; if (tm->n_vlib_mains > 1) - { - apif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) apif->lockp, 0, CLIB_CACHE_LINE_BYTES); - } + clib_spinlock_init (&apif->lockp); { unix_file_t template = { 0 }; diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index 50ec2378..77a2c7a3 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -17,10 +17,12 @@ *------------------------------------------------------------------ */ +#include + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - volatile u32 *lockp; + clib_spinlock_t lockp; u8 *host_if_name; int fd; struct tpacket_req *rx_req; diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index 9a94fc5e..2ba3f579 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -92,11 +92,7 @@ af_packet_interface_tx (vlib_main_t * vm, struct tpacket2_hdr *tph; u32 frame_not_ready = 0; - if (PREDICT_FALSE (apif->lockp != 0)) - { - while (__sync_lock_test_and_set (apif->lockp, 1)) - ; - } + clib_spinlock_lock_if_init (&apif->lockp); while (n_left > 0) { @@ -159,8 +155,7 @@ af_packet_interface_tx (vlib_main_t * vm, } } - if (PREDICT_FALSE (apif->lockp != 0)) - *apif->lockp = 0; + clib_spinlock_unlock_if_init (&apif->lockp); if (PREDICT_FALSE (frame_not_ready)) vlib_error_count (vm, node->node_index, diff --git a/src/vnet/devices/netmap/device.c b/src/vnet/devices/netmap/device.c index 2152824f..aea9ddf4 100644 --- a/src/vnet/devices/netmap/device.c +++ b/src/vnet/devices/netmap/device.c @@ -105,11 +105,7 @@ netmap_interface_tx (vlib_main_t * vm, netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance); int cur_ring; - if (PREDICT_FALSE (nif->lockp != 0)) - { - while (__sync_lock_test_and_set (nif->lockp, 1)) - ; - } + clib_spinlock_lock_if_init (&nif->lockp); cur_ring = nif->first_tx_ring; @@ -165,8 +161,7 @@ netmap_interface_tx (vlib_main_t * vm, if (n_left < frame->n_vectors) ioctl (nif->fd, NIOCTXSYNC, NULL); - if (PREDICT_FALSE (nif->lockp != 0)) - *nif->lockp = 0; + clib_spinlock_unlock_if_init (&nif->lockp); if (n_left) vlib_error_count (vm, node->node_index, diff --git a/src/vnet/devices/netmap/netmap.c b/src/vnet/devices/netmap/netmap.c index 3bdb442d..09afc764 100644 --- a/src/vnet/devices/netmap/netmap.c +++ b/src/vnet/devices/netmap/netmap.c @@ -185,11 +185,7 @@ netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, nif->per_interface_next_index = ~0; if (tm->n_vlib_mains > 1) - { - nif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) nif->lockp, 0, CLIB_CACHE_LINE_BYTES); - } + clib_spinlock_init (&nif->lockp); { unix_file_t template = { 0 }; diff --git a/src/vnet/devices/netmap/netmap.h b/src/vnet/devices/netmap/netmap.h index 39a94043..e04f045d 100644 --- a/src/vnet/devices/netmap/netmap.h +++ b/src/vnet/devices/netmap/netmap.h @@ -40,10 +40,12 @@ * SUCH DAMAGE. */ +#include + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - volatile u32 *lockp; + clib_spinlock_t lockp; u8 *host_if_name; uword if_index; u32 hw_if_index; diff --git a/src/vppinfra.am b/src/vppinfra.am index 4b9f0c29..fed1981e 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -180,6 +180,7 @@ nobase_include_HEADERS = \ vppinfra/graph.h \ vppinfra/hash.h \ vppinfra/heap.h \ + vppinfra/lock.h \ vppinfra/longjmp.h \ vppinfra/macros.h \ vppinfra/math.h \ diff --git a/src/vppinfra/lock.h b/src/vppinfra/lock.h new file mode 100644 index 00000000..c60ff414 --- /dev/null +++ b/src/vppinfra/lock.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_clib_lock_h +#define included_clib_lock_h + +#include + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u32 lock; +#if CLIB_DEBUG > 0 + pid_t pid; + uword cpu_index; + void *frame_address; +#endif +} *clib_spinlock_t; + +static inline void +clib_spinlock_init (clib_spinlock_t * p) +{ + *p = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + memset ((void *) *p, 0, CLIB_CACHE_LINE_BYTES); +} + +static inline void +clib_spinlock_free (clib_spinlock_t * p) +{ + if (*p) + { + clib_mem_free ((void *) *p); + *p = 0; + } +} + +static_always_inline void +clib_spinlock_lock (clib_spinlock_t * p) +{ + while (__sync_lock_test_and_set (&(*p)->lock, 1)) +#if __x86_64__ + __builtin_ia32_pause () +#endif + ; +#if CLIB_DEBUG > 0 + (*p)->frame_address = __builtin_frame_address (0); + (*p)->pid = getpid (); + (*p)->cpu_index = os_get_cpu_number (); +#endif +} + +static_always_inline void +clib_spinlock_lock_if_init (clib_spinlock_t * p) +{ + if (PREDICT_FALSE (*p != 0)) + clib_spinlock_lock (p); +} + +static_always_inline void +clib_spinlock_unlock (clib_spinlock_t * p) +{ + (*p)->lock = 0; +#if CLIB_DEBUG > 0 + (*p)->frame_address = 0; + (*p)->pid = 0; + (*p)->cpu_index = 0; +#endif +} + +static_always_inline void +clib_spinlock_unlock_if_init (clib_spinlock_t * p) +{ + if (PREDICT_FALSE (*p != 0)) + clib_spinlock_unlock (p); +} + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From c966ecd11ccb4e2ad8e92385a4935c8fd4586a2d Mon Sep 17 00:00:00 2001 From: Milan Lenco Date: Fri, 31 Mar 2017 14:51:18 +0200 Subject: Fix memif coverity issues Change-Id: I844ec53b55ceaa1e00996f5cf8a018537ea8b481 Signed-off-by: Milan Lenco --- src/plugins/memif/device.c | 16 ++++++++++++---- src/plugins/memif/memif.c | 26 ++++++++++++++++++++------ src/plugins/memif/memif.h | 1 + 3 files changed, 33 insertions(+), 10 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index 4faeb055..70bdb48b 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -193,7 +193,7 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, CLIB_MEMORY_STORE_BARRIER (); ring->head = head; - clib_spinlock_unlock (&mif->lockp); + clib_spinlock_unlock_if_init (&mif->lockp); if (n_left) { @@ -255,9 +255,11 @@ static clib_error_t * memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { memif_main_t *apm = &memif_main; - memif_msg_t msg; + vlib_main_t *vm = vlib_get_main (); + memif_msg_t msg = { 0 }; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); memif_if_t *mif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + static clib_error_t *error = 0; if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) mif->flags |= MEMIF_IF_FLAG_ADMIN_UP; @@ -269,11 +271,17 @@ memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { msg.version = MEMIF_VERSION; msg.type = MEMIF_MSG_TYPE_DISCONNECT; - send (mif->connection.fd, &msg, sizeof (msg), 0); + if (send (mif->connection.fd, &msg, sizeof (msg), 0) < 0) + { + clib_unix_warning ("Failed to send disconnect request"); + error = clib_error_return_unix (0, "send fd %d", + mif->connection.fd); + memif_disconnect (vm, mif); + } } } - return 0; + return error; } static clib_error_t * diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index cf8ca577..f0d4aacd 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -85,7 +85,7 @@ memif_connect (vlib_main_t * vm, memif_if_t * mif) VNET_HW_INTERFACE_FLAG_LINK_UP); } -static void +void memif_disconnect (vlib_main_t * vm, memif_if_t * mif) { vnet_main_t *vnm = vnet_get_main (); @@ -248,7 +248,12 @@ response: resp.version = MEMIF_VERSION; resp.type = MEMIF_MSG_TYPE_CONNECT_RESP; resp.retval = retval; - send (fd, &resp, sizeof (resp), 0); + if (send (fd, &resp, sizeof (resp), 0) < 0) + { + DEBUG_UNIX_LOG ("Failed to send connection response"); + error = clib_error_return_unix (0, "send fd %d", fd); + memif_disconnect (vm, mif); + } return error; } @@ -511,7 +516,8 @@ memif_connect_master (vlib_main_t * vm, memif_if_t * mif) { u16 slot = i * (1 << mif->log2_ring_size) + j; ring->desc[j].region = 0; - ring->desc[j].offset = buffer_offset + (slot * mif->buffer_size); + ring->desc[j].offset = + buffer_offset + (u32) (slot * mif->buffer_size); ring->desc[j].buffer_length = mif->buffer_size; } } @@ -524,7 +530,8 @@ memif_connect_master (vlib_main_t * vm, memif_if_t * mif) u16 slot = (i + mif->num_s2m_rings) * (1 << mif->log2_ring_size) + j; ring->desc[j].region = 0; - ring->desc[j].offset = buffer_offset + (slot * mif->buffer_size); + ring->desc[j].offset = + buffer_offset + (u32) (slot * mif->buffer_size); ring->desc[j].buffer_length = mif->buffer_size; } } @@ -595,6 +602,11 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) f64 start_time, last_run_duration = 0, now; sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) + { + DEBUG_UNIX_LOG ("socket AF_UNIX"); + return 0; + } sun.sun_family = AF_UNIX; template.read_function = memif_conn_fd_read_ready; @@ -730,26 +742,28 @@ int memif_worker_thread_enable () { /* if worker threads are enabled, switch to polling mode */ + /* *INDENT-OFF* */ foreach_vlib_main (( { vlib_node_set_state (this_vlib_main, memif_input_node.index, VLIB_NODE_STATE_POLLING); })); - + /* *INDENT-ON* */ return 0; } int memif_worker_thread_disable () { + /* *INDENT-OFF* */ foreach_vlib_main (( { vlib_node_set_state (this_vlib_main, memif_input_node.index, VLIB_NODE_STATE_INTERRUPT); })); - + /* *INDENT-ON* */ return 0; } diff --git a/src/plugins/memif/memif.h b/src/plugins/memif/memif.h index f57170f8..ea5b3501 100644 --- a/src/plugins/memif/memif.h +++ b/src/plugins/memif/memif.h @@ -196,6 +196,7 @@ typedef struct int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); int memif_delete_if (vlib_main_t * vm, u64 key); +void memif_disconnect (vlib_main_t * vm, memif_if_t * mif); clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); #ifndef __NR_memfd_create -- cgit 1.2.3-korg From bf123dbedafd8f6d30b6e671d391ce7452ce45d8 Mon Sep 17 00:00:00 2001 From: Milan Lenco Date: Wed, 5 Apr 2017 14:42:42 +0200 Subject: Fix two more memif coverity issues Change-Id: I935620798d6fe82b99b6bd564749e20a189b4ae3 Signed-off-by: Milan Lenco --- src/plugins/memif/memif.c | 13 ++++++++++++- src/plugins/memif/node.c | 44 ++++++++++++++++++++------------------------ 2 files changed, 32 insertions(+), 25 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index f0d4aacd..dc7ddd8a 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -241,6 +241,7 @@ memif_process_connect_req (memif_pending_conn_t * pending_conn, uf->private_data = mif->if_index << 1; mif->connection = pending_conn->connection; pool_put (mm->pending_conns, pending_conn); + pending_conn = 0; memif_connect (vm, mif); @@ -252,7 +253,17 @@ response: { DEBUG_UNIX_LOG ("Failed to send connection response"); error = clib_error_return_unix (0, "send fd %d", fd); - memif_disconnect (vm, mif); + if (pending_conn) + memif_remove_pending_conn (pending_conn); + else + memif_disconnect (vm, mif); + } + if (retval > 0) + { + if (shm_fd >= 0) + close (shm_fd); + if (int_fd >= 0) + close (int_fd); } return error; } diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index 298472b0..659d5dfb 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -214,20 +214,19 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_FALSE (n_trace > 0)) { - if (b0) - { - memif_input_trace_t *tr; - vlib_trace_buffer (vm, node, next0, b0, - /* follow_chain */ 0); - vlib_set_trace_count (vm, node, --n_trace); - tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->next_index = next0; - tr->hw_if_index = mif->hw_if_index; - tr->ring = rid; - } - - if (n_trace && b1) + /* b0 */ + memif_input_trace_t *tr; + vlib_trace_buffer (vm, node, next0, b0, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = mif->hw_if_index; + tr->ring = rid; + + if (n_trace) { + /* b1 */ memif_input_trace_t *tr; vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0); @@ -286,17 +285,14 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_FALSE (n_trace > 0)) { - if (b0) - { - memif_input_trace_t *tr; - vlib_trace_buffer (vm, node, next0, b0, - /* follow_chain */ 0); - vlib_set_trace_count (vm, node, --n_trace); - tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->next_index = next0; - tr->hw_if_index = mif->hw_if_index; - tr->ring = rid; - } + memif_input_trace_t *tr; + vlib_trace_buffer (vm, node, next0, b0, + /* follow_chain */ 0); + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = mif->hw_if_index; + tr->ring = rid; } -- cgit 1.2.3-korg From f1ff5ff1049fd2750823273a3a92779e5fd6e1a5 Mon Sep 17 00:00:00 2001 From: Milan Lenco Date: Wed, 19 Apr 2017 10:34:53 +0200 Subject: Temporary workaround for the bug VPP-698. Change-Id: I220b0b95449f24cc547206e38ab8e10019115ec0 Signed-off-by: Milan Lenco --- src/plugins/memif/memif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index dc7ddd8a..98d31eb0 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -368,7 +368,7 @@ memif_conn_fd_read_ready (unix_file_t * uf) else if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { - clib_memcpy (fd_array, CMSG_DATA (cmsg), sizeof (fd_array)); + memcpy (fd_array, CMSG_DATA (cmsg), sizeof (fd_array)); } cmsg = CMSG_NXTHDR (&mh, cmsg); } @@ -573,7 +573,7 @@ memif_connect_master (vlib_main_t * vm, memif_if_t * mif) cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; fd_array[0] = mfd; - clib_memcpy (CMSG_DATA (cmsg), fd_array, sizeof (fd_array)); + memcpy (CMSG_DATA (cmsg), fd_array, sizeof (fd_array)); mif->flags |= MEMIF_IF_FLAG_CONNECTING; rv = sendmsg (mif->connection.fd, &mh, 0); -- cgit 1.2.3-korg From e50ed1de1e22dfa04de26fa2a471a703c1b6ed8f Mon Sep 17 00:00:00 2001 From: Steven Date: Mon, 15 May 2017 09:33:11 -0700 Subject: memif: migrate memif to use vnet device infra APIs Migrate memif to use vnet device infra APIs. No new function is added. Change-Id: I70e440d2ae1e673876365041f31fe78997aceecf Signed-off-by: Steven --- src/plugins/memif/memif.c | 22 ++++++++++++++++++++-- src/plugins/memif/node.c | 40 ++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 22 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 98d31eb0..41c882fd 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -406,7 +406,7 @@ static clib_error_t * memif_int_fd_read_ready (unix_file_t * uf) { memif_main_t *mm = &memif_main; - vlib_main_t *vm = vlib_get_main (); + vnet_main_t *vnm = vnet_get_main (); memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); u8 b; ssize_t size; @@ -420,7 +420,7 @@ memif_int_fd_read_ready (unix_file_t * uf) mif->interrupt_line.index = ~0; mif->interrupt_line.fd = -1; } - vlib_node_set_interrupt_pending (vm, memif_input_node.index); + vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, 0); return 0; } @@ -789,6 +789,7 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) clib_error_t *error = 0; int ret = 0; uword *p; + vnet_hw_interface_t *hw; p = mhash_get (&mm->if_index_by_key, &args->key); if (p) @@ -937,6 +938,17 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->flags |= MEMIF_IF_FLAG_IS_SLAVE; } + hw = vnet_get_hw_interface (vnm, mif->hw_if_index); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + vnet_hw_interface_set_input_node (vnm, mif->hw_if_index, + memif_input_node.index); + vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, 0, ~0); + ret = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, 0, + VNET_HW_INTERFACE_RX_MODE_INTERRUPT); + if (ret) + clib_warning ("Warning: unable to set rx mode for interface %d: " + "rc=%d", mif->hw_if_index, ret); + #if 0 /* use configured or generate random MAC address */ if (!args->hw_addr_set && @@ -969,6 +981,7 @@ memif_delete_if (vlib_main_t * vm, u64 key) memif_main_t *mm = &memif_main; memif_if_t *mif; uword *p; + int ret; p = mhash_get (&mm->if_index_by_key, &key); if (p == NULL) @@ -980,6 +993,11 @@ memif_delete_if (vlib_main_t * vm, u64 key) mif = pool_elt_at_index (mm->interfaces, p[0]); mif->flags |= MEMIF_IF_FLAG_DELETING; + ret = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, 0); + if (ret) + clib_warning ("Warning: unable to unassign interface %d: rc=%d", + mif->hw_if_index, ret); + /* bring down the interface */ vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0); diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index cee1f3d1..2690dc4e 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -331,26 +331,26 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, u32 thread_index = vlib_get_thread_index (); memif_main_t *nm = &memif_main; memif_if_t *mif; - - /* *INDENT-OFF* */ - pool_foreach (mif, nm->interfaces, - ({ - if (mif->flags & MEMIF_IF_FLAG_ADMIN_UP && - mif->flags & MEMIF_IF_FLAG_CONNECTED && - (mif->if_index % nm->input_cpu_count) == - (thread_index - nm->input_cpu_first_index)) - { - if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - n_rx_packets += - memif_device_input_inline (vm, node, frame, mif, - MEMIF_RING_M2S); - else - n_rx_packets += - memif_device_input_inline (vm, node, frame, mif, - MEMIF_RING_S2M); - } - })); - /* *INDENT-ON* */ + vnet_device_input_runtime_t *rt = (void *) node->runtime_data; + vnet_device_and_queue_t *dq; + memif_ring_type_t type; + + foreach_device_and_queue (dq, rt->devices_and_queues) + { + mif = vec_elt_at_index (nm->interfaces, dq->dev_instance); + if (mif->flags & MEMIF_IF_FLAG_ADMIN_UP && + mif->flags & MEMIF_IF_FLAG_CONNECTED && + (mif->if_index % nm->input_cpu_count) == + (thread_index - nm->input_cpu_first_index)) + { + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + type = MEMIF_RING_M2S; + else + type = MEMIF_RING_S2M; + n_rx_packets += + memif_device_input_inline (vm, node, frame, mif, type); + } + } return n_rx_packets; } -- cgit 1.2.3-korg From 142d151f075a73706f914451bceef18f4f07c28f Mon Sep 17 00:00:00 2001 From: Steven Date: Fri, 26 May 2017 14:18:58 -0700 Subject: memif: master instance crashes when typing quit on slave When I type in 'quit' on the slave instance, the master instance crashes on this line. 0: /home/sluong/vpp-master/vpp/build-data/../src/vlib/unix/input.c:200 (linux_epoll_input) assertion `! pool_is_free (um->file_pool, _e)' fails Aborted (core dumped) Below is the decode from gdb line_number=0, fmt=0x7f57af6cc9a0 "%s:%d (%s) assertion `%s' fails") at /home/sluong/vpp-master/vpp/build-data/../src/vppinfra/error.c:143 vm=0x7f57af8e2400 , node=0x7f576d40ad80, frame=0x0) at /home/sluong/vpp-master/vpp/build-data/../src/vlib/unix/input.c:200 vm=0x7f57af8e2400 , node=0x7f576d40ad80, type=VLIB_NODE_TYPE_PRE_INPUT, dispatch_state=VLIB_NODE_STATE_POLLING, frame=0x0, last_time_stamp=1525665215050617) at /home/sluong/vpp-master/vpp/build-data/../src/vlib/main.c:1016 vm=0x7f57af8e2400 , is_main=1) at /home/sluong/vpp-master/vpp/build-data/../src/vlib/main.c:1500 I am able to reproduce the problem consistently with the below procedure. 1. Create 3 memif interfaces between slave and master instances. 2. Type 'quit' on the slave. Neither crashes the first time. 3. Bring back the slave. Type 'quit' on the master. Neither crashes. 4. Bring back the master. Type 'quit' on the slave. The master crashes. There are two places the interrupt line is disconnected and the unix file is removed via the call unix_file_del () 1. memif_int_fd_read_ready () 2. memif_disconnect () which is called via multiple places in memif. When the crash happens, the unix file was removed from memif_disconnect () via memif_conn_fd_read_ready () with size of the message == 0 in recvmsg (). It is noted when the unix file was removed from memif_int_fd_read_ready (), it never crashes. It is a race condition. However, if I follow the aformentioned procedure, the crash always happens. The reason the crash happens when memif_disconnect () removes the unix file is because there may still be pending input in linux_epoll_input (). When linux_epoll_input () tries to access the unix file via the line 200 unix_file_t *f = pool_elt_at_index (um->file_pool, i); it crashes. We could add code in linux_epoll_input () to avoid the crash if the index for the particular file_pool is already free. Or we could fix memif to not remove the unix file in memif_conn_fd_read_ready () when recvmsg () got 0 byte and just postpone the unix file deletion in memif_int_fd_read_ready () later after linux_epoll_input () got a chance to run to empty the input stream. I choose to fix the problem in the latter approach. I split the function memif_disconnect () into two parts. For the code path which memif_conn_fd_read_ready () calls memif_disconnect (), it does not remove the unix file. All other calls to memif_disconnect () will continue to do what it uses to do to avoid regression. Please let me know if I should fix the problem other way. Change-Id: I8efe2a3d24c6581609bc7b6fe82c2b59c22d8e4b Signed-off-by: Steven --- src/plugins/memif/memif.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 41c882fd..44c5012e 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -85,8 +85,8 @@ memif_connect (vlib_main_t * vm, memif_if_t * mif) VNET_HW_INTERFACE_FLAG_LINK_UP); } -void -memif_disconnect (vlib_main_t * vm, memif_if_t * mif) +static void +memif_disconnect_do (vlib_main_t * vm, memif_if_t * mif) { vnet_main_t *vnm = vnet_get_main (); @@ -94,13 +94,6 @@ memif_disconnect (vlib_main_t * vm, memif_if_t * mif) if (mif->hw_if_index != ~0) vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); - if (mif->interrupt_line.index != ~0) - { - unix_file_del (&unix_main, - unix_main.file_pool + mif->interrupt_line.index); - mif->interrupt_line.index = ~0; - mif->interrupt_line.fd = -1; /* closed in unix_file_del */ - } if (mif->connection.index != ~0) { unix_file_del (&unix_main, unix_main.file_pool + mif->connection.index); @@ -112,6 +105,20 @@ memif_disconnect (vlib_main_t * vm, memif_if_t * mif) vec_free (mif->regions); } +void +memif_disconnect (vlib_main_t * vm, memif_if_t * mif) +{ + if (mif->interrupt_line.index != ~0) + { + unix_file_del (&unix_main, + unix_main.file_pool + mif->interrupt_line.index); + mif->interrupt_line.index = ~0; + mif->interrupt_line.fd = -1; /* closed in unix_file_del */ + } + + memif_disconnect_do (vm, mif); +} + static clib_error_t * memif_process_connect_req (memif_pending_conn_t * pending_conn, memif_msg_t * req, struct ucred *slave_cr, @@ -329,13 +336,19 @@ memif_conn_fd_read_ready (unix_file_t * uf) size = recvmsg (uf->file_descriptor, &mh, 0); if (size != sizeof (memif_msg_t)) { - if (size != 0) + if (size == 0) { - DEBUG_UNIX_LOG ("Malformed message received on fd %d", - uf->file_descriptor); - error = clib_error_return_unix (0, "recvmsg fd %d", - uf->file_descriptor); + if (pending_conn) + memif_remove_pending_conn (pending_conn); + else + memif_disconnect_do (vm, mif); + return error; } + + DEBUG_UNIX_LOG ("Malformed message received on fd %d", + uf->file_descriptor); + error = clib_error_return_unix (0, "recvmsg fd %d", + uf->file_descriptor); goto disconnect; } @@ -420,7 +433,8 @@ memif_int_fd_read_ready (unix_file_t * uf) mif->interrupt_line.index = ~0; mif->interrupt_line.fd = -1; } - vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, 0); + else + vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, 0); return 0; } -- cgit 1.2.3-korg From c2ebcbb89bea719418fc3f896ae3de4302acaf6d Mon Sep 17 00:00:00 2001 From: Steven Date: Tue, 30 May 2017 10:35:51 -0700 Subject: memif: multi-queues support - Add rx-queues and tx-queues option to the create memif CLI - Add vlib_worker_thread_barrier_sync () to memif_conn_fd_read_ready () as the latter function may disconnect the ring and clean up the shared memory. - On transmit, write the rid (queue number) to the socket. - On receive, read the rid and trigger the interrupt for the corresponding thread. Change-Id: If1c7e26c7124174678f047909cbc33e931eaac8c Signed-off-by: Steven --- src/plugins/memif/cli.c | 31 +++++++++- src/plugins/memif/device.c | 17 +++++- src/plugins/memif/memif.api | 4 ++ src/plugins/memif/memif.c | 134 ++++++++++++++++++++++++------------------ src/plugins/memif/memif.h | 31 ++++++++++ src/plugins/memif/memif_api.c | 5 ++ src/plugins/memif/node.c | 18 +++--- 7 files changed, 167 insertions(+), 73 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c index ef73693b..88c09e98 100644 --- a/src/plugins/memif/cli.c +++ b/src/plugins/memif/cli.c @@ -25,6 +25,20 @@ #include +static uword +unformat_memif_queues (unformat_input_t * input, va_list * args) +{ + u32 *rx_queues = va_arg (*args, u32 *); + u32 *tx_queues = va_arg (*args, u32 *); + + if (unformat (input, "rx-queues %u", rx_queues)) + ; + if (unformat (input, "tx-queues %u", tx_queues)) + ; + + return 1; +} + static clib_error_t * memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -34,6 +48,8 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, u32 ring_size = MEMIF_DEFAULT_RING_SIZE; memif_create_if_args_t args = { 0 }; args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE; + u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES; + u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -51,7 +67,8 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, ; else if (unformat (line_input, "master")) args.is_master = 1; - else if (unformat (line_input, "slave")) + else if (unformat (line_input, "slave %U", + unformat_memif_queues, &rx_queues, &tx_queues)) args.is_master = 0; else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, args.hw_addr)) @@ -67,6 +84,14 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.log2_ring_size = min_log2 (ring_size); + if (rx_queues > 255 || rx_queues < 1) + return clib_error_return (0, "rx queue must be between 1 - 255"); + if (tx_queues > 255 || tx_queues < 1) + return clib_error_return (0, "tx queue must be between 1 - 255"); + + args.rx_queues = rx_queues; + args.tx_queues = tx_queues; + r = memif_create_if (vm, &args); if (r <= VNET_API_ERROR_SYSCALL_ERROR_1 @@ -87,7 +112,7 @@ VLIB_CLI_COMMAND (memif_create_command, static) = { .path = "create memif", .short_help = "create memif [key ] [socket ] " "[ring-size ] [buffer-size ] [hw-addr ] " - "", + "] [tx-queues ]>", .function = memif_create_command_fn, }; /* *INDENT-ON* */ @@ -148,7 +173,7 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, mif->socket_filename); vlib_cli_output (vm, " listener %d conn-fd %d int-fd %d", mif->listener_index, mif->connection.fd, mif->interrupt_line.fd); - vlib_cli_output (vm, " ring-size %u num-c2s-rings %u num-s2c-rings %u buffer_size %u", + vlib_cli_output (vm, " ring-size %u num-s2m-rings %u num-m2s-rings %u buffer_size %u", (1 << mif->log2_ring_size), mif->num_s2m_rings, mif->num_m2s_rings, diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index 70bdb48b..f496b17d 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -91,16 +91,27 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, memif_if_t * mif, memif_ring_type_t type) { - u8 rid = 0; - memif_ring_t *ring = memif_get_ring (mif, type, rid); + u8 rid; + memif_ring_t *ring; u32 *buffers = vlib_frame_args (frame); u32 n_left = frame->n_vectors; u16 ring_size = 1 << mif->log2_ring_size; u16 mask = ring_size - 1; u16 head, tail; u16 free_slots; + u32 thread_index = vlib_get_thread_index (); + u8 tx_queues = memif_get_tx_queues (mif); - clib_spinlock_lock_if_init (&mif->lockp); + if (tx_queues < vec_len (vlib_mains)) + { + rid = thread_index % tx_queues; + clib_spinlock_lock_if_init (&mif->lockp); + } + else + { + rid = thread_index; + } + ring = memif_get_ring (mif, type, rid); /* free consumed buffers */ diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index 95e016c3..b0a351aa 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -17,6 +17,8 @@ @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param role - role of the interface in the connection (master/slave) + @param rx_queues - number of rx queues (only valid for slave) + #param tx_queues - number of tx queues (only valid for slave) @param key - 64bit integer used to authenticate and match opposite sides of the connection @param socket_filename - filename of the socket to be used for connection @@ -31,6 +33,8 @@ define memif_create u32 context; u8 role; /* 0 = master, 1 = slave */ + u8 rx_queues; /* optional, default is 1 */ + u8 tx_queues; /* optional, default is 1 */ u64 key; /* optional, default is 0 */ u8 socket_filename[128]; /* optional, default is "/var/vpp/memif.sock" and can be changed in VPP startup config */ diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 44c5012e..a671bda5 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -72,6 +72,9 @@ memif_connect (vlib_main_t * vm, memif_if_t * mif) vnet_main_t *vnm = vnet_get_main (); int num_rings = mif->num_s2m_rings + mif->num_m2s_rings; memif_ring_data_t *rd = NULL; + vnet_hw_interface_t *hw; + u8 rid, rx_queues; + int ret; vec_validate_aligned (mif->ring_data, num_rings - 1, CLIB_CACHE_LINE_BYTES); vec_foreach (rd, mif->ring_data) @@ -83,12 +86,30 @@ memif_connect (vlib_main_t * vm, memif_if_t * mif) mif->flags |= MEMIF_IF_FLAG_CONNECTED; vnet_hw_interface_set_flags (vnm, mif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); + + hw = vnet_get_hw_interface (vnm, mif->hw_if_index); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + vnet_hw_interface_set_input_node (vnm, mif->hw_if_index, + memif_input_node.index); + rx_queues = memif_get_rx_queues (mif); + for (rid = 0; rid < rx_queues; rid++) + { + vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, rid, ~0); + ret = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, rid, + VNET_HW_INTERFACE_RX_MODE_INTERRUPT); + if (ret) + DEBUG_LOG ("Warning: unable to set rx mode for interface %d " + "queue %d: rc=%d", mif->hw_if_index, rid, ret); + } } static void memif_disconnect_do (vlib_main_t * vm, memif_if_t * mif) { vnet_main_t *vnm = vnet_get_main (); + u8 rid, rx_queues; + int rv; + memif_shm_t **shm; mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING); if (mif->hw_if_index != ~0) @@ -101,7 +122,20 @@ memif_disconnect_do (vlib_main_t * vm, memif_if_t * mif) mif->connection.fd = -1; /* closed in unix_file_del */ } - // TODO: properly munmap + close memif-owned shared memory segments + rx_queues = memif_get_rx_queues (mif); + for (rid = 0; rid < rx_queues; rid++) + { + rv = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, rid); + if (rv) + DEBUG_LOG ("Warning: unable to unassign interface %d, " + "queue %d: rc=%d", mif->hw_if_index, rid, rv); + } + + shm = (memif_shm_t **) mif->regions; + rv = munmap ((void *) *shm, mif->shared_mem_size); + if (rv) + DEBUG_UNIX_LOG ("Error: failed munmap call"); + vec_free (mif->regions); } @@ -228,6 +262,7 @@ memif_process_connect_req (memif_pending_conn_t * pending_conn, goto response; } + mif->shared_mem_size = req->shared_mem_size; mif->log2_ring_size = req->log2_ring_size; mif->num_s2m_rings = req->num_s2m_rings; mif->num_m2s_rings = req->num_m2s_rings; @@ -332,6 +367,9 @@ memif_conn_fd_read_ready (unix_file_t * uf) else mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 1); + /* Stop workers to avoid end of the world */ + vlib_worker_thread_barrier_sync (vlib_get_main ()); + /* receive the incoming message */ size = recvmsg (uf->file_descriptor, &mh, 0); if (size != sizeof (memif_msg_t)) @@ -342,7 +380,7 @@ memif_conn_fd_read_ready (unix_file_t * uf) memif_remove_pending_conn (pending_conn); else memif_disconnect_do (vm, mif); - return error; + goto return_ok; } DEBUG_UNIX_LOG ("Malformed message received on fd %d", @@ -364,38 +402,36 @@ memif_conn_fd_read_ready (unix_file_t * uf) { case MEMIF_MSG_TYPE_CONNECT_REQ: if (pending_conn == 0) + DEBUG_LOG ("Received unexpected connection request"); + else { - DEBUG_LOG ("Received unexpected connection request"); - return 0; - } - - /* Read anciliary data */ - cmsg = CMSG_FIRSTHDR (&mh); - while (cmsg) - { - if (cmsg->cmsg_level == SOL_SOCKET - && cmsg->cmsg_type == SCM_CREDENTIALS) + /* Read anciliary data */ + cmsg = CMSG_FIRSTHDR (&mh); + while (cmsg) { - cr = (struct ucred *) CMSG_DATA (cmsg); + if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_CREDENTIALS) + { + cr = (struct ucred *) CMSG_DATA (cmsg); + } + else if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_RIGHTS) + { + memcpy (fd_array, CMSG_DATA (cmsg), sizeof (fd_array)); + } + cmsg = CMSG_NXTHDR (&mh, cmsg); } - else if (cmsg->cmsg_level == SOL_SOCKET - && cmsg->cmsg_type == SCM_RIGHTS) - { - memcpy (fd_array, CMSG_DATA (cmsg), sizeof (fd_array)); - } - cmsg = CMSG_NXTHDR (&mh, cmsg); + error = memif_process_connect_req (pending_conn, &msg, cr, + fd_array[0], fd_array[1]); } - - return memif_process_connect_req (pending_conn, &msg, cr, - fd_array[0], fd_array[1]); + break; case MEMIF_MSG_TYPE_CONNECT_RESP: if (mif == 0) - { - DEBUG_LOG ("Received unexpected connection response"); - return 0; - } - return memif_process_connect_resp (mif, &msg); + DEBUG_LOG ("Received unexpected connection response"); + else + error = memif_process_connect_resp (mif, &msg); + break; case MEMIF_MSG_TYPE_DISCONNECT: goto disconnect; @@ -405,13 +441,16 @@ memif_conn_fd_read_ready (unix_file_t * uf) goto disconnect; } - return 0; +return_ok: + vlib_worker_thread_barrier_release (vlib_get_main ()); + return error; disconnect: if (pending_conn) memif_remove_pending_conn (pending_conn); else memif_disconnect (vm, mif); + vlib_worker_thread_barrier_release (vlib_get_main ()); return error; } @@ -434,7 +473,8 @@ memif_int_fd_read_ready (unix_file_t * uf) mif->interrupt_line.fd = -1; } else - vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, 0); + vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, b); + return 0; } @@ -530,6 +570,7 @@ memif_connect_master (vlib_main_t * vm, memif_if_t * mif) goto error; } + mif->shared_mem_size = msg.shared_mem_size; vec_add1 (mif->regions, shm); ((memif_shm_t *) mif->regions[0])->cookie = 0xdeadbeef; @@ -803,7 +844,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) clib_error_t *error = 0; int ret = 0; uword *p; - vnet_hw_interface_t *hw; p = mhash_get (&mm->if_index_by_key, &args->key); if (p) @@ -851,9 +891,8 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->log2_ring_size = args->log2_ring_size; mif->buffer_size = args->buffer_size; - /* TODO: make configurable */ - mif->num_s2m_rings = 1; - mif->num_m2s_rings = 1; + mif->num_s2m_rings = args->rx_queues; + mif->num_m2s_rings = args->tx_queues; mhash_set_mem (&mm->if_index_by_key, &args->key, &mif->if_index, 0); @@ -952,17 +991,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->flags |= MEMIF_IF_FLAG_IS_SLAVE; } - hw = vnet_get_hw_interface (vnm, mif->hw_if_index); - hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; - vnet_hw_interface_set_input_node (vnm, mif->hw_if_index, - memif_input_node.index); - vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, 0, ~0); - ret = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, 0, - VNET_HW_INTERFACE_RX_MODE_INTERRUPT); - if (ret) - clib_warning ("Warning: unable to set rx mode for interface %d: " - "rc=%d", mif->hw_if_index, ret); - #if 0 /* use configured or generate random MAC address */ if (!args->hw_addr_set && @@ -995,32 +1023,26 @@ memif_delete_if (vlib_main_t * vm, u64 key) memif_main_t *mm = &memif_main; memif_if_t *mif; uword *p; - int ret; + u32 hw_if_index; p = mhash_get (&mm->if_index_by_key, &key); if (p == NULL) { - clib_warning ("Memory interface with key 0x%" PRIx64 " does not exist", - key); + DEBUG_LOG ("Memory interface with key 0x%" PRIx64 " does not exist", + key); return VNET_API_ERROR_SYSCALL_ERROR_1; } mif = pool_elt_at_index (mm->interfaces, p[0]); mif->flags |= MEMIF_IF_FLAG_DELETING; - ret = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, 0); - if (ret) - clib_warning ("Warning: unable to unassign interface %d: rc=%d", - mif->hw_if_index, ret); - /* bring down the interface */ - vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0); - /* remove the interface */ - ethernet_delete_interface (vnm, mif->hw_if_index); - mif->hw_if_index = ~0; + hw_if_index = mif->hw_if_index; memif_close_if (mm, mif); + /* remove the interface */ + ethernet_delete_interface (vnm, hw_if_index); if (pool_elts (mm->interfaces) == 0) { vlib_process_signal_event (vm, memif_process_node.index, diff --git a/src/plugins/memif/memif.h b/src/plugins/memif/memif.h index ea5b3501..56028a25 100644 --- a/src/plugins/memif/memif.h +++ b/src/plugins/memif/memif.h @@ -33,7 +33,9 @@ typedef struct u8 log2_ring_size; #define MEMIF_DEFAULT_RING_SIZE 1024 u16 num_s2m_rings; +#define MEMIF_DEFAULT_RX_QUEUES 1 u16 num_m2s_rings; +#define MEMIF_DEFAULT_TX_QUEUES 1 u16 buffer_size; #define MEMIF_DEFAULT_BUFFER_SIZE 2048 u32 shared_mem_size; @@ -126,6 +128,7 @@ typedef struct u8 num_s2m_rings; u8 num_m2s_rings; u16 buffer_size; + u32 shared_mem_size; memif_ring_data_t *ring_data; @@ -189,6 +192,8 @@ typedef struct u16 buffer_size; u8 hw_addr_set; u8 hw_addr[6]; + u8 rx_queues; + u8 tx_queues; /* return */ u32 sw_if_index; @@ -211,6 +216,32 @@ clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); #endif #endif +static_always_inline u8 +memif_get_rx_queues (memif_if_t * mif) +{ + u8 rx_queues; + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + rx_queues = mif->num_m2s_rings; + else + rx_queues = mif->num_s2m_rings; + + return (rx_queues); +} + +static_always_inline u8 +memif_get_tx_queues (memif_if_t * mif) +{ + u8 tx_queues; + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + tx_queues = mif->num_s2m_rings; + else + tx_queues = mif->num_m2s_rings; + + return (tx_queues); +} + static inline int memfd_create (const char *name, unsigned int flags) { diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c index 1ade317e..1470f944 100644 --- a/src/plugins/memif/memif_api.c +++ b/src/plugins/memif/memif_api.c @@ -122,6 +122,11 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) /* role */ args.is_master = (mp->role == 0); + if (args.is_master == 0) + { + args.rx_queues = mp->rx_queues; + args.tx_queues = mp->tx_queues; + } /* ring size */ if (mp->ring_size) diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index 2690dc4e..fd7baa30 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -78,15 +78,12 @@ memif_prefetch (vlib_main_t * vm, u32 bi) static_always_inline uword memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, memif_if_t * mif, - memif_ring_type_t type) + memif_ring_type_t type, u16 rid) { vnet_main_t *vnm = vnet_get_main (); - u8 rid = 0; /* Ring id */ memif_ring_t *ring = memif_get_ring (mif, type, rid); - memif_ring_data_t *rd = - vec_elt_at_index (mif->ring_data, rid + type * mif->num_s2m_rings); + memif_ring_data_t *rd; u16 head; - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; uword n_trace = vlib_get_trace_count (vm, node); memif_main_t *nm = &memif_main; @@ -102,6 +99,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u16 num_slots; void *mb0, *mb1; + rd = vec_elt_at_index (mif->ring_data, rid + type * mif->num_s2m_rings); if (mif->per_interface_next_index != ~0) next_index = mif->per_interface_next_index; @@ -328,7 +326,6 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_rx_packets = 0; - u32 thread_index = vlib_get_thread_index (); memif_main_t *nm = &memif_main; memif_if_t *mif; vnet_device_input_runtime_t *rt = (void *) node->runtime_data; @@ -338,17 +335,16 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, foreach_device_and_queue (dq, rt->devices_and_queues) { mif = vec_elt_at_index (nm->interfaces, dq->dev_instance); - if (mif->flags & MEMIF_IF_FLAG_ADMIN_UP && - mif->flags & MEMIF_IF_FLAG_CONNECTED && - (mif->if_index % nm->input_cpu_count) == - (thread_index - nm->input_cpu_first_index)) + if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) && + (mif->flags & MEMIF_IF_FLAG_CONNECTED)) { if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) type = MEMIF_RING_M2S; else type = MEMIF_RING_S2M; n_rx_packets += - memif_device_input_inline (vm, node, frame, mif, type); + memif_device_input_inline (vm, node, frame, mif, type, + dq->queue_id); } } -- cgit 1.2.3-korg From f98bb74372410ed207891b744285cdb0618b560b Mon Sep 17 00:00:00 2001 From: Steven Date: Thu, 1 Jun 2017 14:25:15 -0700 Subject: memif: fix coverity warnings Check -1 for return from read prior to using the data Change-Id: Ibab7309244de488737ea7938b334fab495bf855d Signed-off-by: Steven --- src/plugins/memif/memif.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index a671bda5..7d2a09a2 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -472,6 +472,8 @@ memif_int_fd_read_ready (unix_file_t * uf) mif->interrupt_line.index = ~0; mif->interrupt_line.fd = -1; } + else if (size < 0) + DEBUG_UNIX_LOG ("Failed to read from socket"); else vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, b); -- cgit 1.2.3-korg From d51a1f6ffe80bcd8f44e72fa4a98ac70225ba519 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 5 Jun 2017 15:37:58 +0200 Subject: memif: complete refactor of socket handling code Change-Id: I4d41def83a23f13701f1ddcea722d481e4c85cbc Signed-off-by: Damjan Marion --- src/plugins/memif.am | 1 + src/plugins/memif/cli.c | 163 ++++-- src/plugins/memif/device.c | 69 +-- src/plugins/memif/memif.api | 17 +- src/plugins/memif/memif.c | 1144 ++++++++++++++-------------------------- src/plugins/memif/memif.h | 360 +++++-------- src/plugins/memif/memif_api.c | 52 +- src/plugins/memif/memif_test.c | 21 +- src/plugins/memif/node.c | 83 +-- src/plugins/memif/private.h | 296 +++++++++++ src/plugins/memif/socket.c | 736 ++++++++++++++++++++++++++ 11 files changed, 1796 insertions(+), 1146 deletions(-) create mode 100644 src/plugins/memif/private.h create mode 100644 src/plugins/memif/socket.c (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif.am b/src/plugins/memif.am index bd01b2f8..15147e77 100644 --- a/src/plugins/memif.am +++ b/src/plugins/memif.am @@ -19,6 +19,7 @@ memif_plugin_la_SOURCES = memif/memif.c \ memif/cli.c \ memif/node.c \ memif/device.c \ + memif/socket.c \ memif/memif_plugin.api.h memif_test_plugin_la_SOURCES = \ diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c index 88c09e98..2d126aac 100644 --- a/src/plugins/memif/cli.c +++ b/src/plugins/memif/cli.c @@ -24,20 +24,7 @@ #include #include - -static uword -unformat_memif_queues (unformat_input_t * input, va_list * args) -{ - u32 *rx_queues = va_arg (*args, u32 *); - u32 *tx_queues = va_arg (*args, u32 *); - - if (unformat (input, "rx-queues %u", rx_queues)) - ; - if (unformat (input, "tx-queues %u", tx_queues)) - ; - - return 1; -} +#include static clib_error_t * memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -57,18 +44,23 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "key 0x%" PRIx64, &args.key)) + if (unformat (line_input, "id %u", &args.id)) ; else if (unformat (line_input, "socket %s", &args.socket_filename)) ; + else if (unformat (line_input, "secret %s", &args.secret)) + ; else if (unformat (line_input, "ring-size %u", &ring_size)) ; + else if (unformat (line_input, "rx-queues %u", &rx_queues)) + ; + else if (unformat (line_input, "tx-queues %u", &tx_queues)) + ; else if (unformat (line_input, "buffer-size %u", &args.buffer_size)) ; else if (unformat (line_input, "master")) args.is_master = 1; - else if (unformat (line_input, "slave %U", - unformat_memif_queues, &rx_queues, &tx_queues)) + else if (unformat (line_input, "slave")) args.is_master = 0; else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, args.hw_addr)) @@ -94,6 +86,9 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, r = memif_create_if (vm, &args); + vec_free (args.socket_filename); + vec_free (args.secret); + if (r <= VNET_API_ERROR_SYSCALL_ERROR_1 && r >= VNET_API_ERROR_SYSCALL_ERROR_10) return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); @@ -102,7 +97,7 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, return clib_error_return (0, "Invalid interface name"); if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - return clib_error_return (0, "Interface already exists"); + return clib_error_return (0, "Interface with same id already exists"); return 0; } @@ -110,9 +105,9 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (memif_create_command, static) = { .path = "create memif", - .short_help = "create memif [key ] [socket ] " + .short_help = "create memif [id ] [socket ] " "[ring-size ] [buffer-size ] [hw-addr ] " - "] [tx-queues ]>", + " [rx-queues ] [tx-queues ]", .function = memif_create_command_fn, }; /* *INDENT-ON* */ @@ -122,8 +117,11 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; - u64 key = 0; - u8 key_defined = 0; + u32 sw_if_index = ~0; + vnet_hw_interface_t *hw; + memif_main_t *mm = &memif_main; + memif_if_t *mif; + vnet_main_t *vnm = vnet_get_main (); /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -131,18 +129,27 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "key 0x%" PRIx64, &key)) - key_defined = 1; + if (unformat (line_input, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (line_input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); } unformat_free (line_input); - if (!key_defined) - return clib_error_return (0, "missing key"); + if (sw_if_index == ~0) + return clib_error_return (0, + "please specify interface name or sw_if_index"); + + hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (hw == NULL || memif_device_class.index != hw->dev_class_index) + return clib_error_return (0, "not a memif interface"); - memif_delete_if (vm, key); + mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + memif_delete_if (vm, mif); return 0; } @@ -150,11 +157,59 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (memif_delete_command, static) = { .path = "delete memif", - .short_help = "delete memif key ", + .short_help = "delete memif { | sw_if_index }", .function = memif_delete_command_fn, }; /* *INDENT-ON* */ +static u8 * +format_memif_if_flags (u8 * s, va_list * args) +{ + u32 flags = va_arg (*args, u32); +#define _(a,b,c) if ( flags & (1 << a)) s = format (s, " %s", c); + foreach_memif_if_flag +#undef _ + return s; +} + +static u8 * +format_memif_if_mode (u8 * s, va_list * args) +{ + memif_if_t *mif = va_arg (*args, memif_if_t *); + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) + return format (s, "ethernet"); + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + return format (s, "ip"); + if (mif->mode == MEMIF_INTERFACE_MODE_PUNT_INJECT) + return format (s, "punt-inject"); + return format (s, "unknown mode (%u)", mif->mode);; +} + +static u8 * +format_memif_queue (u8 * s, va_list * args) +{ + memif_if_t *mif = va_arg (*args, memif_if_t *); + memif_queue_t *mq = va_arg (*args, memif_queue_t *); + uword i = va_arg (*args, uword); + uword indent = format_get_indent (s); + + s = format (s, "%U%s ring %u:\n", + format_white_space, indent, + (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) ? + "slave-to-master" : "master-to-slave", i); + s = format (s, "%Uregion %u offset %u ring-size %u int-fd %d\n", + format_white_space, indent + 4, + mq->region, mq->offset, (1 << mq->log2_ring_size), mq->int_fd); + + if (mq->ring) + s = format (s, "%Uhead %u tail %u flags 0x%04x interrupts %u\n", + format_white_space, indent + 4, + mq->ring->head, mq->ring->tail, mq->ring->flags, + mq->int_count); + + return s; +} + static clib_error_t * memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -162,39 +217,43 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, memif_main_t *mm = &memif_main; memif_if_t *mif; vnet_main_t *vnm = vnet_get_main (); - int i; + memif_queue_t *mq; + uword i; /* *INDENT-OFF* */ pool_foreach (mif, mm->interfaces, ({ + memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, + mif->socket_file_index); vlib_cli_output (vm, "interface %U", format_vnet_sw_if_index_name, vnm, mif->sw_if_index); - vlib_cli_output (vm, " key 0x%" PRIx64 " file %s", mif->key, - mif->socket_filename); - vlib_cli_output (vm, " listener %d conn-fd %d int-fd %d", mif->listener_index, - mif->connection.fd, mif->interrupt_line.fd); - vlib_cli_output (vm, " ring-size %u num-s2m-rings %u num-m2s-rings %u buffer_size %u", - (1 << mif->log2_ring_size), - mif->num_s2m_rings, - mif->num_m2s_rings, - mif->buffer_size); - for (i=0; i < mif->num_s2m_rings; i++) + if (mif->remote_name) + vlib_cli_output (vm, " remote-name \"%s\"", mif->remote_name); + if (mif->remote_if_name) + vlib_cli_output (vm, " remote-interface \"%s\"", mif->remote_if_name); + vlib_cli_output (vm, " id %d mode %U file %s", mif->id, + format_memif_if_mode, mif, msf->filename); + vlib_cli_output (vm, " flags%U", format_memif_if_flags, mif->flags); + vlib_cli_output (vm, " listener-fd %d conn-fd %d", msf->fd, mif->conn_fd); + vlib_cli_output (vm, " num-s2m-rings %u num-m2s-rings %u buffer-size %u", + mif->run.num_s2m_rings, + mif->run.num_m2s_rings, + mif->run.buffer_size); + + if (mif->local_disc_string) + vlib_cli_output (vm, " local-disc-reason \"%s\"", mif->local_disc_string); + if (mif->remote_disc_string) + vlib_cli_output (vm, " remote-disc-reason \"%s\"", mif->remote_disc_string); + + vec_foreach_index (i, mif->tx_queues) { - memif_ring_t * ring = memif_get_ring (mif, MEMIF_RING_S2M, i); - if (ring) - { - vlib_cli_output (vm, " slave-to-master ring %u:", i); - vlib_cli_output (vm, " head %u tail %u", ring->head, ring->tail); - } + mq = vec_elt_at_index (mif->tx_queues, i); + vlib_cli_output (vm, " %U", format_memif_queue, mif, mq, i); } - for (i=0; i < mif->num_m2s_rings; i++) + vec_foreach_index (i, mif->rx_queues) { - memif_ring_t * ring = memif_get_ring (mif, MEMIF_RING_M2S, i); - if (ring) - { - vlib_cli_output (vm, " master-to-slave ring %u:", i); - vlib_cli_output (vm, " head %u tail %u", ring->head, ring->tail); - } + mq = vec_elt_at_index (mif->rx_queues, i); + vlib_cli_output (vm, " %U", format_memif_queue, mif, mq, i); } })); /* *INDENT-ON* */ diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index f496b17d..870dd354 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -26,6 +26,7 @@ #include #include +#include #define foreach_memif_tx_func_error \ _(NO_FREE_SLOTS, "no free tx slots") \ @@ -45,8 +46,7 @@ static char *memif_tx_func_error_strings[] = { #undef _ }; - -static u8 * +u8 * format_memif_device_name (u8 * s, va_list * args) { u32 i = va_arg (*args, u32); @@ -91,27 +91,30 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, memif_if_t * mif, memif_ring_type_t type) { - u8 rid; + u8 qid; memif_ring_t *ring; u32 *buffers = vlib_frame_args (frame); u32 n_left = frame->n_vectors; - u16 ring_size = 1 << mif->log2_ring_size; - u16 mask = ring_size - 1; + u16 ring_size, mask; u16 head, tail; u16 free_slots; u32 thread_index = vlib_get_thread_index (); - u8 tx_queues = memif_get_tx_queues (mif); + u8 tx_queues = vec_len (mif->tx_queues); + memif_queue_t *mq; if (tx_queues < vec_len (vlib_mains)) { - rid = thread_index % tx_queues; + qid = thread_index % tx_queues; clib_spinlock_lock_if_init (&mif->lockp); } else { - rid = thread_index; + qid = thread_index; } - ring = memif_get_ring (mif, type, rid); + mq = vec_elt_at_index (mif->tx_queues, qid); + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; /* free consumed buffers */ @@ -214,10 +217,11 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); - if (mif->interrupt_line.fd > 0) + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1) { - u8 b = rid; - CLIB_UNUSED (int r) = write (mif->interrupt_line.fd, &b, sizeof (b)); + u64 b = 1; + CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b)); + mq->int_count++; } return frame->n_vectors; @@ -262,35 +266,35 @@ memif_clear_hw_interface_counters (u32 instance) /* Nothing for now */ } +static clib_error_t * +memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, + vnet_hw_interface_rx_mode mode) +{ + memif_main_t *mm = &memif_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT; + else + mq->ring->flags &= ~MEMIF_RING_FLAG_MASK_INT; + + return 0; +} + static clib_error_t * memif_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) { - memif_main_t *apm = &memif_main; - vlib_main_t *vm = vlib_get_main (); - memif_msg_t msg = { 0 }; + memif_main_t *mm = &memif_main; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - memif_if_t *mif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hw->dev_instance); static clib_error_t *error = 0; if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) mif->flags |= MEMIF_IF_FLAG_ADMIN_UP; else - { - mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP; - if (!(mif->flags & MEMIF_IF_FLAG_DELETING) - && mif->connection.index != ~0) - { - msg.version = MEMIF_VERSION; - msg.type = MEMIF_MSG_TYPE_DISCONNECT; - if (send (mif->connection.fd, &msg, sizeof (msg), 0) < 0) - { - clib_unix_warning ("Failed to send disconnect request"); - error = clib_error_return_unix (0, "send fd %d", - mif->connection.fd); - memif_disconnect (vm, mif); - } - } - } + mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP; return error; } @@ -317,6 +321,7 @@ VNET_DEVICE_CLASS (memif_device_class) = { .clear_counters = memif_clear_hw_interface_counters, .admin_up_down_function = memif_interface_admin_up_down, .subif_add_del_function = memif_subif_add_del_function, + .rx_mode_change_function = memif_interface_rx_mode_change, }; VLIB_DEVICE_TX_FUNCTION_MULTIARCH(memif_device_class, diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index b0a351aa..c9632d10 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -17,9 +17,10 @@ @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @param role - role of the interface in the connection (master/slave) + @param mode - interface mode @param rx_queues - number of rx queues (only valid for slave) #param tx_queues - number of tx queues (only valid for slave) - @param key - 64bit integer used to authenticate and match opposite sides + @param id - 32bit integer used to authenticate and match opposite sides of the connection @param socket_filename - filename of the socket to be used for connection establishment @@ -33,12 +34,12 @@ define memif_create u32 context; u8 role; /* 0 = master, 1 = slave */ + u8 mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */ u8 rx_queues; /* optional, default is 1 */ u8 tx_queues; /* optional, default is 1 */ - u64 key; /* optional, default is 0 */ - u8 socket_filename[128]; /* optional, default is "/var/vpp/memif.sock" - and can be changed in VPP startup config */ - + u32 id; /* optional, default is 0 */ + u8 socket_filename[128]; /* optional, default is "/var/vpp/memif.sock" */ + u8 secret[24]; /* optional, default is "" */ u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */ u16 buffer_size; /* optional, default is 2048 bytes */ u8 hw_addr[6]; /* optional, randomly generated if not defined */ @@ -74,8 +75,9 @@ autoreply define memif_delete @param sw_if_index - index of the interface @param if_name - name of the interface @param hw_addr - interface MAC address - @param key - key associated with the interface + @param id - id associated with the interface @param role - role of the interface in the connection (master/slave) + @param mode - interface mode @param socket_filename - name of the socket used by this interface to establish new connections @param ring_size - the number of entries of RX/TX rings @@ -93,8 +95,9 @@ define memif_details u8 hw_addr[6]; /* memif specific parameters */ - u64 key; + u32 id; u8 role; /* 0 = master, 1 = slave */ + u8 mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */ u8 socket_filename[128]; u32 ring_size; u16 buffer_size; /* optional, default is 2048 bytes */ diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 7d2a09a2..f082b58c 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -1,6 +1,6 @@ /* *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2017 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -15,6 +15,7 @@ *------------------------------------------------------------------ */ + #define _GNU_SOURCE #include #include @@ -26,7 +27,9 @@ #include #include #include +#include #include +#include #include #include @@ -34,21 +37,10 @@ #include #include #include - -#define MEMIF_DEBUG 1 - -#if MEMIF_DEBUG == 1 -#define DEBUG_LOG(...) clib_warning(__VA_ARGS__) -#define DEBUG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__) -#else -#define DEBUG_LOG(...) -#endif +#include memif_main_t memif_main; -static clib_error_t *memif_conn_fd_read_ready (unix_file_t * uf); -static clib_error_t *memif_int_fd_read_ready (unix_file_t * uf); - static u32 memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) { @@ -57,604 +49,299 @@ memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) } static void -memif_remove_pending_conn (memif_pending_conn_t * pending_conn) -{ - memif_main_t *mm = &memif_main; - - unix_file_del (&unix_main, - unix_main.file_pool + pending_conn->connection.index); - pool_put (mm->pending_conns, pending_conn); -} - -static void -memif_connect (vlib_main_t * vm, memif_if_t * mif) +memif_queue_intfd_close (memif_queue_t * mq) { - vnet_main_t *vnm = vnet_get_main (); - int num_rings = mif->num_s2m_rings + mif->num_m2s_rings; - memif_ring_data_t *rd = NULL; - vnet_hw_interface_t *hw; - u8 rid, rx_queues; - int ret; - - vec_validate_aligned (mif->ring_data, num_rings - 1, CLIB_CACHE_LINE_BYTES); - vec_foreach (rd, mif->ring_data) - { - rd->last_head = 0; - } - - mif->flags &= ~MEMIF_IF_FLAG_CONNECTING; - mif->flags |= MEMIF_IF_FLAG_CONNECTED; - vnet_hw_interface_set_flags (vnm, mif->hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); - - hw = vnet_get_hw_interface (vnm, mif->hw_if_index); - hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; - vnet_hw_interface_set_input_node (vnm, mif->hw_if_index, - memif_input_node.index); - rx_queues = memif_get_rx_queues (mif); - for (rid = 0; rid < rx_queues; rid++) + if (mq->int_unix_file_index != ~0) { - vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, rid, ~0); - ret = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, rid, - VNET_HW_INTERFACE_RX_MODE_INTERRUPT); - if (ret) - DEBUG_LOG ("Warning: unable to set rx mode for interface %d " - "queue %d: rc=%d", mif->hw_if_index, rid, ret); + memif_file_del_by_index (mq->int_unix_file_index); + mq->int_unix_file_index = ~0; + mq->int_fd = -1; } -} - -static void -memif_disconnect_do (vlib_main_t * vm, memif_if_t * mif) -{ - vnet_main_t *vnm = vnet_get_main (); - u8 rid, rx_queues; - int rv; - memif_shm_t **shm; - - mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING); - if (mif->hw_if_index != ~0) - vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); - - if (mif->connection.index != ~0) + else if (mq->int_fd > -1) { - unix_file_del (&unix_main, unix_main.file_pool + mif->connection.index); - mif->connection.index = ~0; - mif->connection.fd = -1; /* closed in unix_file_del */ + close (mq->int_fd); + mq->int_fd = -1; } - - rx_queues = memif_get_rx_queues (mif); - for (rid = 0; rid < rx_queues; rid++) - { - rv = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, rid); - if (rv) - DEBUG_LOG ("Warning: unable to unassign interface %d, " - "queue %d: rc=%d", mif->hw_if_index, rid, rv); - } - - shm = (memif_shm_t **) mif->regions; - rv = munmap ((void *) *shm, mif->shared_mem_size); - if (rv) - DEBUG_UNIX_LOG ("Error: failed munmap call"); - - vec_free (mif->regions); } void -memif_disconnect (vlib_main_t * vm, memif_if_t * mif) -{ - if (mif->interrupt_line.index != ~0) - { - unix_file_del (&unix_main, - unix_main.file_pool + mif->interrupt_line.index); - mif->interrupt_line.index = ~0; - mif->interrupt_line.fd = -1; /* closed in unix_file_del */ - } - - memif_disconnect_do (vm, mif); -} - -static clib_error_t * -memif_process_connect_req (memif_pending_conn_t * pending_conn, - memif_msg_t * req, struct ucred *slave_cr, - int shm_fd, int int_fd) +memif_disconnect (memif_if_t * mif, clib_error_t * err) { - memif_main_t *mm = &memif_main; - vlib_main_t *vm = vlib_get_main (); - int fd = pending_conn->connection.fd; - unix_file_t *uf = 0; - memif_if_t *mif = 0; - memif_msg_t resp = { 0 }; - unix_file_t template = { 0 }; - void *shm; - uword *p; - u8 retval = 0; - static clib_error_t *error = 0; - - if (shm_fd == -1) - { - DEBUG_LOG - ("Connection request is missing shared memory file descriptor"); - retval = 1; - goto response; - } - - if (int_fd == -1) - { - DEBUG_LOG - ("Connection request is missing interrupt line file descriptor"); - retval = 2; - goto response; - } + vnet_main_t *vnm = vnet_get_main (); + memif_region_t *mr; + memif_queue_t *mq; + int i; - if (slave_cr == NULL) - { - DEBUG_LOG ("Connection request is missing slave credentials"); - retval = 3; - goto response; - } + if (mif == 0) + return; - p = mhash_get (&mm->if_index_by_key, &req->key); - if (!p) - { - DEBUG_LOG - ("Connection request with unmatched key (0x%" PRIx64 ")", req->key); - retval = 4; - goto response; - } + DBG ("disconnect %u (%v)", mif->dev_instance, err ? err->what : 0); - mif = vec_elt_at_index (mm->interfaces, *p); - if (mif->listener_index != pending_conn->listener_index) + if (err) { - DEBUG_LOG - ("Connection request with non-matching listener (%d vs. %d)", - pending_conn->listener_index, mif->listener_index); - retval = 5; - goto response; + clib_error_t *e = 0; + mif->local_disc_string = vec_dup (err->what); + if (mif->conn_fd > -1) + e = memif_msg_send_disconnect (mif, err); + clib_error_free (e); } - if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - { - DEBUG_LOG ("Memif slave does not accept connection requests"); - retval = 6; - goto response; - } + /* set interface down */ + mif->flags &= ~(MEMIF_IF_FLAG_CONNECTED | MEMIF_IF_FLAG_CONNECTING); + if (mif->hw_if_index != ~0) + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); - if (mif->connection.fd != -1) + /* close connection socket */ + if (mif->conn_unix_file_index != ~0) { - DEBUG_LOG - ("Memif with key 0x%" PRIx64 " is already connected", mif->key); - retval = 7; - goto response; + memif_file_del_by_index (mif->conn_unix_file_index); + mif->conn_unix_file_index = ~0; } + else if (mif->conn_fd > -1) + close (mif->conn_fd); + mif->conn_fd = -1; - if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) == 0) - { - /* just silently decline the request */ - retval = 8; - goto response; - } + vec_foreach_index (i, mif->rx_queues) + { + mq = vec_elt_at_index (mif->rx_queues, i); + if (mq->ring) + { + int rv; + rv = vnet_hw_interface_unassign_rx_thread (vnm, mif->hw_if_index, i); + if (rv) + DBG ("Warning: unable to unassign interface %d, " + "queue %d: rc=%d", mif->hw_if_index, i, rv); + mq->ring = 0; + } + } - if (req->shared_mem_size < sizeof (memif_shm_t)) - { - DEBUG_LOG - ("Unexpectedly small shared memory segment received from slave."); - retval = 9; - goto response; - } + /* free tx and rx queues */ + vec_foreach (mq, mif->rx_queues) memif_queue_intfd_close (mq); + vec_free (mif->rx_queues); - if ((shm = - mmap (NULL, req->shared_mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, - shm_fd, 0)) == MAP_FAILED) - { - DEBUG_UNIX_LOG - ("Failed to map shared memory segment received from slave memif"); - error = clib_error_return_unix (0, "mmap fd %d", shm_fd); - retval = 10; - goto response; - } + vec_foreach (mq, mif->tx_queues) memif_queue_intfd_close (mq); + vec_free (mif->tx_queues); - if (((memif_shm_t *) shm)->cookie != 0xdeadbeef) - { - DEBUG_LOG - ("Possibly corrupted shared memory segment received from slave memif"); - munmap (shm, req->shared_mem_size); - retval = 11; - goto response; - } + /* free memory regions */ + vec_foreach (mr, mif->regions) + { + int rv; + if ((rv = munmap (mr->shm, mr->region_size))) + clib_warning ("munmap failed, rv = %d", rv); + if (mr->fd > -1) + close (mr->fd); + } + vec_free (mif->regions); - mif->shared_mem_size = req->shared_mem_size; - mif->log2_ring_size = req->log2_ring_size; - mif->num_s2m_rings = req->num_s2m_rings; - mif->num_m2s_rings = req->num_m2s_rings; - mif->buffer_size = req->buffer_size; - mif->remote_pid = slave_cr->pid; - mif->remote_uid = slave_cr->uid; - vec_add1 (mif->regions, shm); - - /* register interrupt line */ - mif->interrupt_line.fd = int_fd; - template.read_function = memif_int_fd_read_ready; - template.file_descriptor = int_fd; - template.private_data = mif->if_index; - mif->interrupt_line.index = unix_file_add (&unix_main, &template); - - /* change context for future messages */ - uf = vec_elt_at_index (unix_main.file_pool, pending_conn->connection.index); - uf->private_data = mif->if_index << 1; - mif->connection = pending_conn->connection; - pool_put (mm->pending_conns, pending_conn); - pending_conn = 0; - - memif_connect (vm, mif); - -response: - resp.version = MEMIF_VERSION; - resp.type = MEMIF_MSG_TYPE_CONNECT_RESP; - resp.retval = retval; - if (send (fd, &resp, sizeof (resp), 0) < 0) - { - DEBUG_UNIX_LOG ("Failed to send connection response"); - error = clib_error_return_unix (0, "send fd %d", fd); - if (pending_conn) - memif_remove_pending_conn (pending_conn); - else - memif_disconnect (vm, mif); - } - if (retval > 0) - { - if (shm_fd >= 0) - close (shm_fd); - if (int_fd >= 0) - close (int_fd); - } - return error; + mif->remote_pid = 0; + vec_free (mif->remote_name); + vec_free (mif->remote_if_name); + clib_fifo_free (mif->msg_queue); } static clib_error_t * -memif_process_connect_resp (memif_if_t * mif, memif_msg_t * resp) +memif_int_fd_read_ready (unix_file_t * uf) { - vlib_main_t *vm = vlib_get_main (); - - if ((mif->flags & MEMIF_IF_FLAG_IS_SLAVE) == 0) - { - DEBUG_LOG ("Memif master does not accept connection responses"); - return 0; - } + memif_main_t *mm = &memif_main; + vnet_main_t *vnm = vnet_get_main (); + u16 qid = uf->private_data & 0xFFFF; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 16); + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + u64 b; + ssize_t size; - if ((mif->flags & MEMIF_IF_FLAG_CONNECTING) == 0) + size = read (uf->file_descriptor, &b, sizeof (b)); + if (size < 0) { - DEBUG_LOG ("Unexpected connection response"); + DBG_UNIX_LOG ("Failed to read from socket"); return 0; } - if (resp->retval == 0) - memif_connect (vm, mif); - else - memif_disconnect (vm, mif); + vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, qid); + mq->int_count++; return 0; } -static clib_error_t * -memif_conn_fd_read_ready (unix_file_t * uf) -{ - memif_main_t *mm = &memif_main; - vlib_main_t *vm = vlib_get_main (); - memif_if_t *mif = 0; - memif_pending_conn_t *pending_conn = 0; - int fd_array[2] = { -1, -1 }; - char ctl[CMSG_SPACE (sizeof (fd_array)) + - CMSG_SPACE (sizeof (struct ucred))] = { 0 }; - struct msghdr mh = { 0 }; - struct iovec iov[1]; - struct ucred *cr = 0; - memif_msg_t msg = { 0 }; - struct cmsghdr *cmsg; - ssize_t size; - static clib_error_t *error = 0; - - iov[0].iov_base = (void *) &msg; - iov[0].iov_len = sizeof (memif_msg_t); - mh.msg_iov = iov; - mh.msg_iovlen = 1; - mh.msg_control = ctl; - mh.msg_controllen = sizeof (ctl); - - /* grab the appropriate context */ - if (uf->private_data & 1) - pending_conn = vec_elt_at_index (mm->pending_conns, - uf->private_data >> 1); - else - mif = vec_elt_at_index (mm->interfaces, uf->private_data >> 1); - - /* Stop workers to avoid end of the world */ - vlib_worker_thread_barrier_sync (vlib_get_main ()); - /* receive the incoming message */ - size = recvmsg (uf->file_descriptor, &mh, 0); - if (size != sizeof (memif_msg_t)) - { - if (size == 0) - { - if (pending_conn) - memif_remove_pending_conn (pending_conn); - else - memif_disconnect_do (vm, mif); - goto return_ok; - } +clib_error_t * +memif_connect (memif_if_t * mif) +{ + vnet_main_t *vnm = vnet_get_main (); + unix_file_t template = { 0 }; + memif_region_t *mr; + int i; - DEBUG_UNIX_LOG ("Malformed message received on fd %d", - uf->file_descriptor); - error = clib_error_return_unix (0, "recvmsg fd %d", - uf->file_descriptor); - goto disconnect; - } + DBG ("connect %u", mif->dev_instance); - /* check version of the sender's memif plugin */ - if (msg.version != MEMIF_VERSION) - { - DEBUG_LOG ("Memif version mismatch"); - goto disconnect; - } + vec_free (mif->local_disc_string); + vec_free (mif->remote_disc_string); - /* process the message based on its type */ - switch (msg.type) - { - case MEMIF_MSG_TYPE_CONNECT_REQ: - if (pending_conn == 0) - DEBUG_LOG ("Received unexpected connection request"); - else - { - /* Read anciliary data */ - cmsg = CMSG_FIRSTHDR (&mh); - while (cmsg) - { - if (cmsg->cmsg_level == SOL_SOCKET - && cmsg->cmsg_type == SCM_CREDENTIALS) - { - cr = (struct ucred *) CMSG_DATA (cmsg); - } - else if (cmsg->cmsg_level == SOL_SOCKET - && cmsg->cmsg_type == SCM_RIGHTS) - { - memcpy (fd_array, CMSG_DATA (cmsg), sizeof (fd_array)); - } - cmsg = CMSG_NXTHDR (&mh, cmsg); - } - error = memif_process_connect_req (pending_conn, &msg, cr, - fd_array[0], fd_array[1]); - } - break; + vec_foreach (mr, mif->regions) + { + if (mr->shm) + continue; - case MEMIF_MSG_TYPE_CONNECT_RESP: - if (mif == 0) - DEBUG_LOG ("Received unexpected connection response"); - else - error = memif_process_connect_resp (mif, &msg); - break; + if (mr->fd < 0) + clib_error_return (0, "no memory region fd"); - case MEMIF_MSG_TYPE_DISCONNECT: - goto disconnect; + if ((mr->shm = mmap (NULL, mr->region_size, PROT_READ | PROT_WRITE, + MAP_SHARED, mr->fd, 0)) == MAP_FAILED) + return clib_error_return_unix (0, "mmap"); + } - default: - DEBUG_LOG ("Received unknown message type"); - goto disconnect; - } + template.read_function = memif_int_fd_read_ready; -return_ok: - vlib_worker_thread_barrier_release (vlib_get_main ()); - return error; + vec_foreach_index (i, mif->tx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); -disconnect: - if (pending_conn) - memif_remove_pending_conn (pending_conn); - else - memif_disconnect (vm, mif); - vlib_worker_thread_barrier_release (vlib_get_main ()); - return error; -} + mq->ring = mif->regions[mq->region].shm + mq->offset; + if (mq->ring->cookie != MEMIF_COOKIE) + return clib_error_return (0, "wrong cookie on tx ring %u", i); + } -static clib_error_t * -memif_int_fd_read_ready (unix_file_t * uf) -{ - memif_main_t *mm = &memif_main; - vnet_main_t *vnm = vnet_get_main (); - memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); - u8 b; - ssize_t size; + vec_foreach_index (i, mif->rx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); + int rv; + + mq->ring = mif->regions[mq->region].shm + mq->offset; + if (mq->ring->cookie != MEMIF_COOKIE) + return clib_error_return (0, "wrong cookie on tx ring %u", i); + + if (mq->int_fd > -1) + { + template.file_descriptor = mq->int_fd; + template.private_data = (mif->dev_instance << 16) | (i & 0xFFFF); + memif_file_add (&mq->int_unix_file_index, &template); + } + vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0); + rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i, + VNET_HW_INTERFACE_RX_MODE_INTERRUPT); + if (rv) + clib_warning + ("Warning: unable to set rx mode for interface %d queue %d: " + "rc=%d", mif->hw_if_index, i, rv); + } - size = read (uf->file_descriptor, &b, sizeof (b)); - if (0 == size) - { - /* interrupt line was disconnected */ - unix_file_del (&unix_main, - unix_main.file_pool + mif->interrupt_line.index); - mif->interrupt_line.index = ~0; - mif->interrupt_line.fd = -1; - } - else if (size < 0) - DEBUG_UNIX_LOG ("Failed to read from socket"); - else - vnet_device_input_set_interrupt_pending (vnm, mif->hw_if_index, b); + mif->flags &= ~MEMIF_IF_FLAG_CONNECTING; + mif->flags |= MEMIF_IF_FLAG_CONNECTED; + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); return 0; } -static clib_error_t * -memif_conn_fd_accept_ready (unix_file_t * uf) +static_always_inline memif_ring_t * +memif_get_ring (memif_if_t * mif, memif_ring_type_t type, u16 ring_num) { - memif_main_t *mm = &memif_main; - memif_listener_t *listener = 0; - memif_pending_conn_t *pending_conn = 0; - int addr_len; - struct sockaddr_un client; - int conn_fd; - unix_file_t template = { 0 }; - - listener = pool_elt_at_index (mm->listeners, uf->private_data); - - addr_len = sizeof (client); - conn_fd = accept (uf->file_descriptor, - (struct sockaddr *) &client, (socklen_t *) & addr_len); - - if (conn_fd < 0) - return clib_error_return_unix (0, "accept fd %d", uf->file_descriptor); - - pool_get (mm->pending_conns, pending_conn); - pending_conn->index = pending_conn - mm->pending_conns; - pending_conn->listener_index = listener->index; - pending_conn->connection.fd = conn_fd; - - template.read_function = memif_conn_fd_read_ready; - template.file_descriptor = conn_fd; - template.private_data = (pending_conn->index << 1) | 1; - pending_conn->connection.index = unix_file_add (&unix_main, &template); - - return 0; + if (vec_len (mif->regions) == 0) + return NULL; + void *p = mif->regions[0].shm; + int ring_size = + sizeof (memif_ring_t) + + sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size); + p += (ring_num + type * mif->run.num_s2m_rings) * ring_size; + + return (memif_ring_t *) p; } -static void -memif_connect_master (vlib_main_t * vm, memif_if_t * mif) +clib_error_t * +memif_init_regions_and_queues (memif_if_t * mif) { - memif_msg_t msg; - struct msghdr mh = { 0 }; - struct iovec iov[1]; - struct cmsghdr *cmsg; - int mfd = -1; - int rv; - int fd_array[2] = { -1, -1 }; - char ctl[CMSG_SPACE (sizeof (fd_array))]; memif_ring_t *ring = NULL; int i, j; - void *shm = 0; u64 buffer_offset; - unix_file_t template = { 0 }; + memif_region_t *r; - msg.version = MEMIF_VERSION; - msg.type = MEMIF_MSG_TYPE_CONNECT_REQ; - msg.key = mif->key; - msg.log2_ring_size = mif->log2_ring_size; - msg.num_s2m_rings = mif->num_s2m_rings; - msg.num_m2s_rings = mif->num_m2s_rings; - msg.buffer_size = mif->buffer_size; + vec_validate_aligned (mif->regions, 0, CLIB_CACHE_LINE_BYTES); + r = vec_elt_at_index (mif->regions, 0); - buffer_offset = sizeof (memif_shm_t) + - (mif->num_s2m_rings + mif->num_m2s_rings) * + buffer_offset = (mif->run.num_s2m_rings + mif->run.num_m2s_rings) * (sizeof (memif_ring_t) + - sizeof (memif_desc_t) * (1 << mif->log2_ring_size)); + sizeof (memif_desc_t) * (1 << mif->run.log2_ring_size)); - msg.shared_mem_size = buffer_offset + - mif->buffer_size * (1 << mif->log2_ring_size) * (mif->num_s2m_rings + - mif->num_m2s_rings); - - if ((mfd = memfd_create ("shared mem", MFD_ALLOW_SEALING)) == -1) - { - DEBUG_LOG ("Failed to create anonymous file"); - goto error; - } + r->region_size = buffer_offset + + mif->run.buffer_size * (1 << mif->run.log2_ring_size) * + (mif->run.num_s2m_rings + mif->run.num_m2s_rings); - if ((fcntl (mfd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - DEBUG_UNIX_LOG ("Failed to seal an anonymous file off from truncating"); - goto error; - } + if ((r->fd = memfd_create ("memif region 0", MFD_ALLOW_SEALING)) == -1) + return clib_error_return_unix (0, "memfd_create"); - if ((ftruncate (mfd, msg.shared_mem_size)) == -1) - { - DEBUG_UNIX_LOG ("Failed to extend the size of an anonymous file"); - goto error; - } + if ((fcntl (r->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + return clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); - if ((shm = mmap (NULL, msg.shared_mem_size, PROT_READ | PROT_WRITE, - MAP_SHARED, mfd, 0)) == MAP_FAILED) - { - DEBUG_UNIX_LOG ("Failed to map anonymous file into memory"); - goto error; - } + if ((ftruncate (r->fd, r->region_size)) == -1) + return clib_error_return_unix (0, "ftruncate"); - mif->shared_mem_size = msg.shared_mem_size; - vec_add1 (mif->regions, shm); - ((memif_shm_t *) mif->regions[0])->cookie = 0xdeadbeef; + if ((r->shm = mmap (NULL, r->region_size, PROT_READ | PROT_WRITE, + MAP_SHARED, r->fd, 0)) == MAP_FAILED) + return clib_error_return_unix (0, "mmap"); - for (i = 0; i < mif->num_s2m_rings; i++) + for (i = 0; i < mif->run.num_s2m_rings; i++) { ring = memif_get_ring (mif, MEMIF_RING_S2M, i); ring->head = ring->tail = 0; - for (j = 0; j < (1 << mif->log2_ring_size); j++) + ring->cookie = MEMIF_COOKIE; + for (j = 0; j < (1 << mif->run.log2_ring_size); j++) { - u16 slot = i * (1 << mif->log2_ring_size) + j; + u16 slot = i * (1 << mif->run.log2_ring_size) + j; ring->desc[j].region = 0; ring->desc[j].offset = - buffer_offset + (u32) (slot * mif->buffer_size); - ring->desc[j].buffer_length = mif->buffer_size; + buffer_offset + (u32) (slot * mif->run.buffer_size); + ring->desc[j].buffer_length = mif->run.buffer_size; } } - for (i = 0; i < mif->num_m2s_rings; i++) + for (i = 0; i < mif->run.num_m2s_rings; i++) { ring = memif_get_ring (mif, MEMIF_RING_M2S, i); ring->head = ring->tail = 0; - for (j = 0; j < (1 << mif->log2_ring_size); j++) + ring->cookie = MEMIF_COOKIE; + for (j = 0; j < (1 << mif->run.log2_ring_size); j++) { u16 slot = - (i + mif->num_s2m_rings) * (1 << mif->log2_ring_size) + j; + (i + mif->run.num_s2m_rings) * (1 << mif->run.log2_ring_size) + j; ring->desc[j].region = 0; ring->desc[j].offset = - buffer_offset + (u32) (slot * mif->buffer_size); - ring->desc[j].buffer_length = mif->buffer_size; + buffer_offset + (u32) (slot * mif->run.buffer_size); + ring->desc[j].buffer_length = mif->run.buffer_size; } } - iov[0].iov_base = (void *) &msg; - iov[0].iov_len = sizeof (memif_msg_t); - mh.msg_iov = iov; - mh.msg_iovlen = 1; - - /* create interrupt socket */ - if (socketpair (AF_UNIX, SOCK_STREAM, 0, fd_array) < 0) - { - DEBUG_UNIX_LOG ("Failed to create a pair of connected sockets"); - goto error; - } - - mif->interrupt_line.fd = fd_array[0]; - template.read_function = memif_int_fd_read_ready; - template.file_descriptor = mif->interrupt_line.fd; - template.private_data = mif->if_index; - mif->interrupt_line.index = unix_file_add (&unix_main, &template); - - memset (&ctl, 0, sizeof (ctl)); - mh.msg_control = ctl; - mh.msg_controllen = sizeof (ctl); - cmsg = CMSG_FIRSTHDR (&mh); - cmsg->cmsg_len = CMSG_LEN (sizeof (fd_array)); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - fd_array[0] = mfd; - memcpy (CMSG_DATA (cmsg), fd_array, sizeof (fd_array)); - - mif->flags |= MEMIF_IF_FLAG_CONNECTING; - rv = sendmsg (mif->connection.fd, &mh, 0); - if (rv < 0) - { - DEBUG_UNIX_LOG ("Failed to send memif connection request"); - goto error; - } + ASSERT (mif->tx_queues == 0); + vec_validate_aligned (mif->tx_queues, mif->run.num_s2m_rings - 1, + CLIB_CACHE_LINE_BYTES); + vec_foreach_index (i, mif->tx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); + if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) + return clib_error_return_unix (0, "eventfd[tx queue %u]", i); + mq->int_unix_file_index = ~0; + mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i); + mq->log2_ring_size = mif->cfg.log2_ring_size; + mq->region = 0; + mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm; + mq->last_head = 0; + } - /* No need to keep the descriptor open, - * mmap creates an extra reference to the underlying file */ - close (mfd); - mfd = -1; - /* This FD is given to peer, so we can close it */ - close (fd_array[1]); - fd_array[1] = -1; - return; + ASSERT (mif->rx_queues == 0); + vec_validate_aligned (mif->rx_queues, mif->run.num_m2s_rings - 1, + CLIB_CACHE_LINE_BYTES); + vec_foreach_index (i, mif->rx_queues) + { + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); + if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) + return clib_error_return_unix (0, "eventfd[rx queue %u]", i); + mq->int_unix_file_index = ~0; + mq->ring = memif_get_ring (mif, MEMIF_RING_M2S, i); + mq->log2_ring_size = mif->cfg.log2_ring_size; + mq->region = 0; + mq->offset = (void *) mq->ring - (void *) mif->regions[mq->region].shm; + mq->last_head = 0; + } -error: - if (mfd > -1) - close (mfd); - if (fd_array[1] > -1) - close (fd_array[1]); - memif_disconnect (vm, mif); + return 0; } static uword @@ -665,24 +352,22 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) struct sockaddr_un sun; int sockfd; uword *event_data = 0, event_type; - unix_file_t template = { 0 }; u8 enabled = 0; f64 start_time, last_run_duration = 0, now; - sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0); if (sockfd < 0) { - DEBUG_UNIX_LOG ("socket AF_UNIX"); + DBG_UNIX_LOG ("socket AF_UNIX"); return 0; } sun.sun_family = AF_UNIX; - template.read_function = memif_conn_fd_read_ready; while (1) { if (enabled) - vlib_process_wait_for_event_or_clock (vm, - (f64) 3 - last_run_duration); + vlib_process_wait_for_event_or_clock (vm, (f64) 3 - + last_run_duration); else vlib_process_wait_for_event (vm); @@ -707,6 +392,7 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* *INDENT-OFF* */ pool_foreach (mif, mm->interfaces, ({ + memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index); /* Allow no more than 10us without a pause */ now = vlib_time_now (vm); if (now > start_time + 10e-6) @@ -726,24 +412,31 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) { - strncpy (sun.sun_path, (char *) mif->socket_filename, + strncpy (sun.sun_path, (char *) msf->filename, sizeof (sun.sun_path) - 1); if (connect (sockfd, (struct sockaddr *) &sun, sizeof (struct sockaddr_un)) == 0) { - mif->connection.fd = sockfd; - template.file_descriptor = sockfd; - template.private_data = mif->if_index << 1; - mif->connection.index = unix_file_add (&unix_main, &template); - memif_connect_master (vm, mif); + unix_file_t t = { 0 }; + + mif->conn_fd = sockfd; + t.read_function = memif_slave_conn_fd_read_ready; + t.write_function = memif_slave_conn_fd_write_ready; + t.error_function = memif_slave_conn_fd_error; + t.file_descriptor = mif->conn_fd; + t.private_data = mif->dev_instance; + memif_file_add (&mif->conn_unix_file_index, &t); + hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); + + mif->flags |= MEMIF_IF_FLAG_CONNECTING; /* grab another fd */ - sockfd = socket (AF_UNIX, SOCK_STREAM, 0); + sockfd = socket (AF_UNIX, SOCK_SEQPACKET, 0); if (sockfd < 0) { - DEBUG_UNIX_LOG ("socket AF_UNIX"); + DBG_UNIX_LOG ("socket AF_UNIX"); return 0; } } @@ -763,75 +456,62 @@ VLIB_REGISTER_NODE (memif_process_node,static) = { }; /* *INDENT-ON* */ -static void -memif_close_if (memif_main_t * mm, memif_if_t * mif) +int +memif_delete_if (vlib_main_t * vm, memif_if_t * mif) { - vlib_main_t *vm = vlib_get_main (); - memif_listener_t *listener = 0; - memif_pending_conn_t *pending_conn = 0; + vnet_main_t *vnm = vnet_get_main (); + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + vec_elt_at_index (mm->socket_files, mif->socket_file_index); + clib_error_t *err; + + mif->flags |= MEMIF_IF_FLAG_DELETING; + vec_free (mif->local_disc_string); + vec_free (mif->remote_disc_string); + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); + vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0); + + err = clib_error_return (0, "interface deleted"); + memif_disconnect (mif, err); + clib_error_free (err); - memif_disconnect (vm, mif); + /* remove the interface */ + ethernet_delete_interface (vnm, mif->hw_if_index); + mif->hw_if_index = ~0; + + /* free interface data structures */ + clib_spinlock_free (&mif->lockp); + mhash_unset (&msf->dev_instance_by_id, &mif->id, 0); - if (mif->listener_index != (uword) ~ 0) + /* remove socket file */ + if (--(msf->ref_cnt) == 0) { - listener = pool_elt_at_index (mm->listeners, mif->listener_index); - if (--listener->usage_counter == 0) + if (msf->is_listener) { - /* not used anymore -> remove the socket and pending connections */ - - /* *INDENT-OFF* */ - pool_foreach (pending_conn, mm->pending_conns, - ({ - if (pending_conn->listener_index == mif->listener_index) - { - memif_remove_pending_conn (pending_conn); - } - })); - /* *INDENT-ON* */ - - unix_file_del (&unix_main, - unix_main.file_pool + listener->socket.index); - pool_put (mm->listeners, listener); - unlink ((char *) mif->socket_filename); + uword *x; + memif_file_del_by_index (msf->unix_file_index); + vec_foreach (x, msf->pending_file_indices) + { + memif_file_del_by_index (*x); + } + vec_free (msf->pending_file_indices); } + mhash_free (&msf->dev_instance_by_id); + hash_free (msf->dev_instance_by_fd); + mhash_unset (&mm->socket_file_index_by_filename, msf->filename, 0); + vec_free (msf->filename); + pool_put (mm->socket_files, msf); } - clib_spinlock_free (&mif->lockp); - - mhash_unset (&mm->if_index_by_key, &mif->key, &mif->if_index); - vec_free (mif->socket_filename); - vec_free (mif->ring_data); - memset (mif, 0, sizeof (*mif)); pool_put (mm->interfaces, mif); -} -int -memif_worker_thread_enable () -{ - /* if worker threads are enabled, switch to polling mode */ - /* *INDENT-OFF* */ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - memif_input_node.index, - VLIB_NODE_STATE_POLLING); - })); - /* *INDENT-ON* */ - return 0; -} + if (pool_elts (mm->interfaces) == 0) + vlib_process_signal_event (vm, memif_process_node.index, + MEMIF_PROCESS_EVENT_STOP, 0); -int -memif_worker_thread_disable () -{ - /* *INDENT-OFF* */ - foreach_vlib_main (( - { - vlib_node_set_state (this_vlib_main, - memif_input_node.index, - VLIB_NODE_STATE_INTERRUPT); - })); - /* *INDENT-ON* */ return 0; } @@ -846,19 +526,92 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) clib_error_t *error = 0; int ret = 0; uword *p; + vnet_hw_interface_t *hw; + memif_socket_file_t *msf = 0; + u8 *socket_filename; + int rv = 0; + + if (args->socket_filename == 0 || args->socket_filename[0] != '/') + { + rv = mkdir (MEMIF_DEFAULT_SOCKET_DIR, 0755); + if (rv && errno != EEXIST) + return VNET_API_ERROR_SYSCALL_ERROR_1; + + if (args->socket_filename == 0) + socket_filename = format (0, "%s/%s%c", MEMIF_DEFAULT_SOCKET_DIR, + MEMIF_DEFAULT_SOCKET_FILENAME, 0); + else + socket_filename = format (0, "%s/%s%c", MEMIF_DEFAULT_SOCKET_DIR, + args->socket_filename, 0); + + } + else + socket_filename = vec_dup (args->socket_filename); + + p = mhash_get (&mm->socket_file_index_by_filename, socket_filename); - p = mhash_get (&mm->if_index_by_key, &args->key); if (p) - return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + { + msf = vec_elt_at_index (mm->socket_files, p[0]); + + /* existing socket file can be either master or slave but cannot be both */ + if (!msf->is_listener != !args->is_master) + { + rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + goto done; + } + + p = mhash_get (&msf->dev_instance_by_id, &args->id); + if (p) + { + rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + goto done; + } + } + + /* Create new socket file */ + if (msf == 0) + { + struct stat file_stat; + /* If we are creating listener make sure file doesn't exist or if it + * exists thn delete it if it is old socket file */ + if (args->is_master && + (stat ((char *) socket_filename, &file_stat) == 0)) + { + if (S_ISSOCK (file_stat.st_mode)) + { + unlink ((char *) socket_filename); + } + else + { + ret = VNET_API_ERROR_SYSCALL_ERROR_3; + goto error; + } + } + pool_get (mm->socket_files, msf); + memset (msf, 0, sizeof (memif_socket_file_t)); + mhash_init (&msf->dev_instance_by_id, sizeof (uword), + sizeof (memif_interface_id_t)); + msf->dev_instance_by_fd = hash_create (0, sizeof (uword)); + msf->filename = socket_filename; + msf->fd = -1; + msf->is_listener = (args->is_master != 0); + socket_filename = 0; + mhash_set (&mm->socket_file_index_by_filename, msf->filename, + msf - mm->socket_files, 0); + DBG ("creating socket file %s", msf->filename); + } pool_get (mm->interfaces, mif); memset (mif, 0, sizeof (*mif)); - mif->key = args->key; - mif->if_index = mif - mm->interfaces; + mif->dev_instance = mif - mm->interfaces; + mif->socket_file_index = msf - mm->socket_files; + mif->id = args->id; mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0; - mif->listener_index = ~0; - mif->connection.index = mif->interrupt_line.index = ~0; - mif->connection.fd = mif->interrupt_line.fd = -1; + mif->conn_unix_file_index = ~0; + mif->conn_fd = -1; + if (args->secret) + mif->secret = vec_dup (args->secret); if (tm->n_vlib_mains > 1) clib_spinlock_init (&mif->lockp); @@ -876,137 +629,94 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) } error = ethernet_register_interface (vnm, memif_device_class.index, - mif->if_index, args->hw_addr, + mif->dev_instance, args->hw_addr, &mif->hw_if_index, memif_eth_flag_change); if (error) { clib_error_report (error); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; + ret = VNET_API_ERROR_SYSCALL_ERROR_2; goto error; } sw = vnet_get_hw_sw_interface (vnm, mif->hw_if_index); mif->sw_if_index = sw->sw_if_index; - mif->log2_ring_size = args->log2_ring_size; - mif->buffer_size = args->buffer_size; - - mif->num_s2m_rings = args->rx_queues; - mif->num_m2s_rings = args->tx_queues; - - mhash_set_mem (&mm->if_index_by_key, &args->key, &mif->if_index, 0); - - if (args->socket_filename != 0) - mif->socket_filename = args->socket_filename; - else - mif->socket_filename = vec_dup (mm->default_socket_filename); + mif->cfg.log2_ring_size = args->log2_ring_size; + mif->cfg.buffer_size = args->buffer_size; + mif->cfg.num_s2m_rings = + args->is_master ? args->rx_queues : args->tx_queues; + mif->cfg.num_m2s_rings = + args->is_master ? args->tx_queues : args->rx_queues; args->sw_if_index = mif->sw_if_index; - if (args->is_master) + /* If this is new one, start listening */ + if (msf->is_listener && msf->ref_cnt == 0) { struct sockaddr_un un = { 0 }; struct stat file_stat; int on = 1; - memif_listener_t *listener = 0; - - if (stat ((char *) mif->socket_filename, &file_stat) == 0) - { - if (!S_ISSOCK (file_stat.st_mode)) - { - errno = ENOTSOCK; - ret = VNET_API_ERROR_SYSCALL_ERROR_2; - goto error; - } - /* *INDENT-OFF* */ - pool_foreach (listener, mm->listeners, - ({ - if (listener->sock_dev == file_stat.st_dev && - listener->sock_ino == file_stat.st_ino) - { - /* attach memif to the existing listener */ - mif->listener_index = listener->index; - ++listener->usage_counter; - goto signal; - } - })); - /* *INDENT-ON* */ - unlink ((char *) mif->socket_filename); - } - pool_get (mm->listeners, listener); - memset (listener, 0, sizeof (*listener)); - listener->socket.fd = -1; - listener->socket.index = ~0; - listener->index = listener - mm->listeners; - listener->usage_counter = 1; - - if ((listener->socket.fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) + if ((msf->fd = socket (AF_UNIX, SOCK_SEQPACKET, 0)) < 0) { - ret = VNET_API_ERROR_SYSCALL_ERROR_3; + ret = VNET_API_ERROR_SYSCALL_ERROR_4; goto error; } un.sun_family = AF_UNIX; - strncpy ((char *) un.sun_path, (char *) mif->socket_filename, + strncpy ((char *) un.sun_path, (char *) msf->filename, sizeof (un.sun_path) - 1); - if (setsockopt (listener->socket.fd, SOL_SOCKET, SO_PASSCRED, - &on, sizeof (on)) < 0) - { - ret = VNET_API_ERROR_SYSCALL_ERROR_4; - goto error; - } - if (bind (listener->socket.fd, (struct sockaddr *) &un, - sizeof (un)) == -1) + if (setsockopt (msf->fd, SOL_SOCKET, SO_PASSCRED, &on, sizeof (on)) < 0) { ret = VNET_API_ERROR_SYSCALL_ERROR_5; goto error; } - if (listen (listener->socket.fd, 1) == -1) + if (bind (msf->fd, (struct sockaddr *) &un, sizeof (un)) == -1) { ret = VNET_API_ERROR_SYSCALL_ERROR_6; goto error; } - - if (stat ((char *) mif->socket_filename, &file_stat) == -1) + if (listen (msf->fd, 1) == -1) { ret = VNET_API_ERROR_SYSCALL_ERROR_7; goto error; } - listener->sock_dev = file_stat.st_dev; - listener->sock_ino = file_stat.st_ino; + if (stat ((char *) msf->filename, &file_stat) == -1) + { + ret = VNET_API_ERROR_SYSCALL_ERROR_8; + goto error; + } + msf->unix_file_index = ~0; unix_file_t template = { 0 }; template.read_function = memif_conn_fd_accept_ready; - template.file_descriptor = listener->socket.fd; - template.private_data = listener->index; - listener->socket.index = unix_file_add (&unix_main, &template); - - mif->listener_index = listener->index; - } - else - { - mif->flags |= MEMIF_IF_FLAG_IS_SLAVE; + template.file_descriptor = msf->fd; + template.private_data = mif->socket_file_index; + memif_file_add (&msf->unix_file_index, &template); } -#if 0 - /* use configured or generate random MAC address */ - if (!args->hw_addr_set && - tm->n_vlib_mains > 1 && pool_elts (mm->interfaces) == 1) - memif_worker_thread_enable (); -#endif + msf->ref_cnt++; + + if (args->is_master == 0) + mif->flags |= MEMIF_IF_FLAG_IS_SLAVE; + + hw = vnet_get_hw_interface (vnm, mif->hw_if_index); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; + vnet_hw_interface_set_input_node (vnm, mif->hw_if_index, + memif_input_node.index); + + mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0); -signal: if (pool_elts (mm->interfaces) == 1) { vlib_process_signal_event (vm, memif_process_node.index, MEMIF_PROCESS_EVENT_START, 0); } - return 0; + goto done; error: if (mif->hw_if_index != ~0) @@ -1014,89 +724,31 @@ error: ethernet_delete_interface (vnm, mif->hw_if_index); mif->hw_if_index = ~0; } - memif_close_if (mm, mif); + memif_delete_if (vm, mif); return ret; -} - -int -memif_delete_if (vlib_main_t * vm, u64 key) -{ - vnet_main_t *vnm = vnet_get_main (); - memif_main_t *mm = &memif_main; - memif_if_t *mif; - uword *p; - u32 hw_if_index; - - p = mhash_get (&mm->if_index_by_key, &key); - if (p == NULL) - { - DEBUG_LOG ("Memory interface with key 0x%" PRIx64 " does not exist", - key); - return VNET_API_ERROR_SYSCALL_ERROR_1; - } - mif = pool_elt_at_index (mm->interfaces, p[0]); - mif->flags |= MEMIF_IF_FLAG_DELETING; - - /* bring down the interface */ - vnet_sw_interface_set_flags (vnm, mif->sw_if_index, 0); - - hw_if_index = mif->hw_if_index; - memif_close_if (mm, mif); - - /* remove the interface */ - ethernet_delete_interface (vnm, hw_if_index); - if (pool_elts (mm->interfaces) == 0) - { - vlib_process_signal_event (vm, memif_process_node.index, - MEMIF_PROCESS_EVENT_STOP, 0); - } - -#if 0 - if (tm->n_vlib_mains > 1 && pool_elts (mm->interfaces) == 0) - memif_worker_thread_disable (); -#endif - return 0; +done: + vec_free (socket_filename); + return rv; } + static clib_error_t * memif_init (vlib_main_t * vm) { memif_main_t *mm = &memif_main; vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - uword *p; memset (mm, 0, sizeof (memif_main_t)); - mm->input_cpu_first_index = 0; - mm->input_cpu_count = 1; - /* initialize binary API */ memif_plugin_api_hookup (vm); - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - mm->input_cpu_first_index = tr->first_index; - mm->input_cpu_count = tr->count; - } - - mhash_init (&mm->if_index_by_key, sizeof (uword), sizeof (u64)); + mhash_init_c_string (&mm->socket_file_index_by_filename, sizeof (uword)); vec_validate_aligned (mm->rx_buffers, tm->n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); - /* set default socket filename */ - vec_validate (mm->default_socket_filename, - strlen (MEMIF_DEFAULT_SOCKET_FILENAME)); - strncpy ((char *) mm->default_socket_filename, - MEMIF_DEFAULT_SOCKET_FILENAME, - vec_len (mm->default_socket_filename) - 1); - return 0; } diff --git a/src/plugins/memif/memif.h b/src/plugins/memif/memif.h index 56028a25..30a27dc2 100644 --- a/src/plugins/memif/memif.h +++ b/src/plugins/memif/memif.h @@ -1,6 +1,6 @@ /* *------------------------------------------------------------------ - * Copyright (c) 2016 Cisco and/or its affiliates. + * Copyright (c) 2017 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: @@ -15,278 +15,158 @@ *------------------------------------------------------------------ */ -#include +#ifndef _MEMIF_H_ +#define _MEMIF_H_ -typedef struct -{ - u16 version; -#define MEMIF_VERSION_MAJOR 0 -#define MEMIF_VERSION_MINOR 1 -#define MEMIF_VERSION ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR) - u8 type; -#define MEMIF_MSG_TYPE_CONNECT_REQ 0 -#define MEMIF_MSG_TYPE_CONNECT_RESP 1 -#define MEMIF_MSG_TYPE_DISCONNECT 2 - - /* Connection-request parameters: */ - u64 key; - u8 log2_ring_size; -#define MEMIF_DEFAULT_RING_SIZE 1024 - u16 num_s2m_rings; -#define MEMIF_DEFAULT_RX_QUEUES 1 - u16 num_m2s_rings; -#define MEMIF_DEFAULT_TX_QUEUES 1 - u16 buffer_size; -#define MEMIF_DEFAULT_BUFFER_SIZE 2048 - u32 shared_mem_size; - - /* Connection-response parameters: */ - u8 retval; -} memif_msg_t; - -typedef struct __attribute__ ((packed)) -{ - u16 flags; -#define MEMIF_DESC_FLAG_NEXT (1 << 0) - u16 region; - u32 buffer_length; - u32 length;; - u8 reserved[4]; - u64 offset; - u64 metadata; -} memif_desc_t; +#ifndef MEMIF_CACHELINE_SIZE +#define MEMIF_CACHELINE_SIZE 64 +#endif -STATIC_ASSERT_SIZEOF (memif_desc_t, 32); +/* + * Type definitions + */ -typedef struct +typedef enum memif_msg_type { - u16 head __attribute__ ((aligned (128))); - u16 tail __attribute__ ((aligned (128))); - memif_desc_t desc[0] __attribute__ ((aligned (128))); -} memif_ring_t; + MEMIF_MSG_TYPE_NONE = 0, + MEMIF_MSG_TYPE_ACK = 1, + MEMIF_MSG_TYPE_HELLO = 2, + MEMIF_MSG_TYPE_INIT = 3, + MEMIF_MSG_TYPE_ADD_REGION = 4, + MEMIF_MSG_TYPE_ADD_RING = 5, + MEMIF_MSG_TYPE_CONNECT = 6, + MEMIF_MSG_TYPE_CONNECTED = 7, + MEMIF_MSG_TYPE_DISCONNECT = 8, +} memif_msg_type_t; -typedef struct +typedef enum { - u32 cookie __attribute__ ((aligned (128))); -} memif_shm_t; - + MEMIF_RING_S2M = 0, + MEMIF_RING_M2S = 1 +} memif_ring_type_t; -typedef struct +typedef enum { - u16 last_head; - u16 last_tail; -} memif_ring_data_t; + MEMIF_INTERFACE_MODE_ETHERNET = 0, + MEMIF_INTERFACE_MODE_IP = 1, + MEMIF_INTERFACE_MODE_PUNT_INJECT = 2, +} memif_interface_mode_t; -typedef struct -{ - int fd; - u32 index; -} memif_file_t; +typedef uint16_t memif_region_index_t; +typedef uint16_t memif_ring_index_t; +typedef uint32_t memif_interface_id_t; +typedef uint16_t memif_version_t; -typedef struct -{ - uword index; - dev_t sock_dev; - ino_t sock_ino; - memif_file_t socket; - u16 usage_counter; -} memif_listener_t; +/* + * Socket messages + */ -typedef struct +typedef struct __attribute__ ((packed)) { - uword index; - memif_file_t connection; - uword listener_index; -} memif_pending_conn_t; + uint8_t name[32]; + memif_version_t min_version; + memif_version_t max_version; + memif_region_index_t max_region; + memif_ring_index_t max_m2s_ring; + memif_ring_index_t max_s2m_ring; + uint8_t max_log2_ring_size; +} memif_msg_hello_t; -typedef struct +typedef struct __attribute__ ((packed)) { - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - clib_spinlock_t lockp; - u32 flags; -#define MEMIF_IF_FLAG_ADMIN_UP (1 << 0) -#define MEMIF_IF_FLAG_IS_SLAVE (1 << 1) -#define MEMIF_IF_FLAG_CONNECTING (1 << 2) -#define MEMIF_IF_FLAG_CONNECTED (1 << 3) -#define MEMIF_IF_FLAG_DELETING (1 << 4) - - u64 key; - uword if_index; - u32 hw_if_index; - u32 sw_if_index; - - u32 per_interface_next_index; - - uword listener_index; - memif_file_t connection; - memif_file_t interrupt_line; - u8 *socket_filename; + memif_version_t version; + memif_interface_id_t id; + memif_interface_mode_t mode:8; + uint8_t secret[24]; + uint8_t name[32]; +} memif_msg_init_t; - void **regions; - - u8 log2_ring_size; - u8 num_s2m_rings; - u8 num_m2s_rings; - u16 buffer_size; - u32 shared_mem_size; - - memif_ring_data_t *ring_data; - - /* remote info */ - pid_t remote_pid; - uid_t remote_uid; -} memif_if_t; - -typedef struct +typedef struct __attribute__ ((packed)) { - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - - /** API message ID base */ - u16 msg_id_base; - - /* pool of all memory interfaces */ - memif_if_t *interfaces; - - /* pool of all listeners */ - memif_listener_t *listeners; - - /* pool of pending connections */ - memif_pending_conn_t *pending_conns; - - /* bitmap of pending rx interfaces */ - uword *pending_input_bitmap; - - /* rx buffer cache */ - u32 **rx_buffers; - - /* hash of all registered keys */ - mhash_t if_index_by_key; - - /* first cpu index */ - u32 input_cpu_first_index; - - /* total cpu count */ - u32 input_cpu_count; - - /* configuration */ - u8 *default_socket_filename; -#define MEMIF_DEFAULT_SOCKET_FILENAME "/var/vpp/memif.sock" -} memif_main_t; + memif_region_index_t index; + uint32_t size; +} memif_msg_add_region_t; -extern memif_main_t memif_main; -extern vnet_device_class_t memif_device_class; -extern vlib_node_registration_t memif_input_node; - -enum +typedef struct __attribute__ ((packed)) { - MEMIF_PROCESS_EVENT_START = 1, - MEMIF_PROCESS_EVENT_STOP = 2, -} memif_process_event_t; + uint16_t flags; +#define MEMIF_MSG_ADD_RING_FLAG_S2M (1 << 0) + memif_ring_index_t index; + memif_region_index_t region; + uint32_t offset; + uint8_t log2_ring_size; +} memif_msg_add_ring_t; -typedef struct +typedef struct __attribute__ ((packed)) { - u64 key; - u8 *socket_filename; - u8 is_master; - u8 log2_ring_size; - u16 buffer_size; - u8 hw_addr_set; - u8 hw_addr[6]; - u8 rx_queues; - u8 tx_queues; - - /* return */ - u32 sw_if_index; -} memif_create_if_args_t; + uint8_t if_name[32]; +} memif_msg_connect_t; -int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); -int memif_delete_if (vlib_main_t * vm, u64 key); -void memif_disconnect (vlib_main_t * vm, memif_if_t * mif); -clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); - -#ifndef __NR_memfd_create -#if defined __x86_64__ -#define __NR_memfd_create 319 -#elif defined __arm__ -#define __NR_memfd_create 385 -#elif defined __aarch64__ -#define __NR_memfd_create 279 -#else -#error "__NR_memfd_create unknown for this architecture" -#endif -#endif - -static_always_inline u8 -memif_get_rx_queues (memif_if_t * mif) +typedef struct __attribute__ ((packed)) { - u8 rx_queues; + uint8_t if_name[32]; +} memif_msg_connected_t; - if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - rx_queues = mif->num_m2s_rings; - else - rx_queues = mif->num_s2m_rings; - - return (rx_queues); -} - -static_always_inline u8 -memif_get_tx_queues (memif_if_t * mif) +typedef struct __attribute__ ((packed)) { - u8 tx_queues; - - if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - tx_queues = mif->num_s2m_rings; - else - tx_queues = mif->num_m2s_rings; + uint32_t code; + uint8_t string[96]; +} memif_msg_disconnect_t; + +typedef struct __attribute__ ((packed, aligned (128))) +{ + memif_msg_type_t type:16; + union + { + memif_msg_hello_t hello; + memif_msg_init_t init; + memif_msg_add_region_t add_region; + memif_msg_add_ring_t add_ring; + memif_msg_connect_t connect; + memif_msg_connected_t connected; + memif_msg_disconnect_t disconnect; + }; +} memif_msg_t; - return (tx_queues); -} +_Static_assert (sizeof (memif_msg_t) == 128, + "Size of memif_msg_t must be 128"); -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} +/* + * Ring and Descriptor Layout + */ -typedef enum +typedef struct __attribute__ ((packed)) { - MEMIF_RING_S2M = 0, - MEMIF_RING_M2S = 1 -} memif_ring_type_t; + uint16_t flags; +#define MEMIF_DESC_FLAG_NEXT (1 << 0) + memif_region_index_t region; + uint32_t buffer_length; + uint32_t length; + uint8_t reserved[4]; + uint64_t offset; + uint64_t metadata; +} memif_desc_t; -static_always_inline memif_ring_t * -memif_get_ring (memif_if_t * mif, memif_ring_type_t type, u16 ring_num) -{ - if (vec_len (mif->regions) == 0) - return NULL; - void *p = mif->regions[0]; - int ring_size = - sizeof (memif_ring_t) + - sizeof (memif_desc_t) * (1 << mif->log2_ring_size); - p += sizeof (memif_shm_t); - p += (ring_num + type * mif->num_s2m_rings) * ring_size; +_Static_assert (sizeof (memif_desc_t) == 32, + "Size of memif_dsct_t must be 32"); - return (memif_ring_t *) p; -} +#define MEMIF_CACHELINE_ALIGN_MARK(mark) \ + uint8_t mark[0] __attribute__((aligned(MEMIF_CACHELINE_SIZE))) -static_always_inline void * -memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) +typedef struct { - u16 region = ring->desc[slot].region; - return mif->regions[region] + ring->desc[slot].offset; -} - -#ifndef F_LINUX_SPECIFIC_BASE -#define F_LINUX_SPECIFIC_BASE 1024 -#endif -#define MFD_ALLOW_SEALING 0x0002U -#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + MEMIF_CACHELINE_ALIGN_MARK (cacheline0); + uint32_t cookie; + uint16_t flags; +#define MEMIF_RING_FLAG_MASK_INT 1 + volatile uint16_t head; + MEMIF_CACHELINE_ALIGN_MARK (cacheline1); + volatile uint16_t tail; + MEMIF_CACHELINE_ALIGN_MARK (cacheline2); + memif_desc_t desc[0]; +} memif_ring_t; -#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -#define F_SEAL_GROW 0x0004 /* prevent file from growing */ -#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif /* _MEMIF_H_ */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c index 1470f944..533e8482 100644 --- a/src/plugins/memif/memif_api.c +++ b/src/plugins/memif/memif_api.c @@ -19,7 +19,9 @@ #include #include +#include #include +#include #include #include @@ -107,8 +109,8 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) static const u8 empty_hw_addr[6]; int rv = 0; - /* key */ - args.key = clib_net_to_host_u64 (mp->key); + /* id */ + args.id = clib_net_to_host_u32 (mp->id); /* socket filename */ mp->socket_filename[ARRAY_LEN (mp->socket_filename) - 1] = 0; @@ -120,6 +122,15 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) vec_len (args.socket_filename)); } + /* secret */ + mp->secret[ARRAY_LEN (mp->secret) - 1] = 0; + if (strlen ((char *) mp->secret) > 0) + { + vec_validate (args.secret, strlen ((char *) mp->secret)); + strncpy ((char *) args.secret, (char *) mp->secret, + vec_len (args.secret)); + } + /* role */ args.is_master = (mp->role == 0); if (args.is_master == 0) @@ -156,6 +167,9 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) rv = memif_create_if (vm, &args); + vec_free (args.socket_filename); + vec_free (args.secret); + reply: /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_MEMIF_CREATE_REPLY, @@ -173,26 +187,19 @@ void vl_api_memif_delete_t_handler (vl_api_memif_delete_t * mp) { memif_main_t *mm = &memif_main; - memif_if_t *mif; vlib_main_t *vm = vlib_get_main (); + vnet_main_t *vnm = vnet_get_main (); vl_api_memif_delete_reply_t *rmp; - u32 sw_if_index = ntohl (mp->sw_if_index); + vnet_hw_interface_t *hi = + vnet_get_sup_hw_interface (vnm, ntohl (mp->sw_if_index)); + memif_if_t *mif = pool_elt_at_index (mm->interfaces, hi->dev_instance); int rv = 0; - /* *INDENT-OFF* */ - pool_foreach (mif, mm->interfaces, - ({ - if (sw_if_index == mif->sw_if_index) - { - rv = memif_delete_if (vm, mif->key); - goto reply; - } - })); - /* *INDENT-ON* */ - - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + if (hi == NULL || memif_device_class.index != hi->dev_class_index) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + rv = memif_delete_if (vm, mif); -reply: REPLY_MACRO (VL_API_MEMIF_DELETE_REPLY); } @@ -205,6 +212,8 @@ send_memif_details (unix_shared_memory_queue_t * q, vl_api_memif_details_t *mp; vnet_main_t *vnm = vnet_get_main (); memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files, + mif->socket_file_index); vnet_hw_interface_t *hwif; hwif = vnet_get_sup_hw_interface (vnm, swif->sw_if_index); @@ -220,14 +229,13 @@ send_memif_details (unix_shared_memory_queue_t * q, (char *) interface_name, ARRAY_LEN (mp->if_name) - 1); memcpy (mp->hw_addr, hwif->hw_address, ARRAY_LEN (mp->hw_addr)); - mp->key = clib_host_to_net_u64 (mif->key); + mp->id = clib_host_to_net_u32 (mif->id); mp->role = (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) ? 1 : 0; strncpy ((char *) mp->socket_filename, - (char *) mif->socket_filename, - ARRAY_LEN (mp->socket_filename) - 1); + (char *) msf->filename, ARRAY_LEN (mp->socket_filename) - 1); - mp->ring_size = htonl (1 << mif->log2_ring_size); - mp->buffer_size = htons (mif->buffer_size); + mp->ring_size = htonl (1 << mif->run.log2_ring_size); + mp->buffer_size = htons (mif->run.buffer_size); mp->admin_up_down = (swif->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0; mp->link_up_down = (hwif->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? 1 : 0; diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c index c1ba9273..cbef4dfa 100644 --- a/src/plugins/memif/memif_test.c +++ b/src/plugins/memif/memif_test.c @@ -24,6 +24,7 @@ #include #include #include +#include #define __plugin_msg_base memif_test_main.msg_id_base #include @@ -118,8 +119,9 @@ api_memif_create (vat_main_t * vam) { unformat_input_t *i = vam->input; vl_api_memif_create_t *mp; - u64 key = 0; + u32 id = 0; u8 *socket_filename = 0; + u8 *secret = 0; u8 role = 1; u32 ring_size = 0; u32 buffer_size = 0; @@ -131,10 +133,12 @@ api_memif_create (vat_main_t * vam) while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { - if (unformat (i, "key 0x%" PRIx64, &key)) + if (unformat (i, "id %u", &id)) ; else if (unformat (i, "socket %s", &socket_filename)) ; + else if (unformat (i, "secret %s", &secret)) + ; else if (unformat (i, "ring_size %u", &ring_size)) ; else if (unformat (i, "buffer_size %u", &buffer_size)) @@ -173,7 +177,7 @@ api_memif_create (vat_main_t * vam) M (MEMIF_CREATE, mp); - mp->key = clib_host_to_net_u64 (key); + mp->id = clib_host_to_net_u32 (id); mp->role = role; mp->ring_size = clib_host_to_net_u32 (ring_size); mp->buffer_size = clib_host_to_net_u16 (buffer_size & 0xffff); @@ -182,6 +186,11 @@ api_memif_create (vat_main_t * vam) strncpy ((char *) mp->socket_filename, (char *) socket_filename, 127); vec_free (socket_filename); } + if (socket_filename != 0) + { + strncpy ((char *) mp->secret, (char *) secret, 16); + vec_free (socket_filename); + } memcpy (mp->hw_addr, hw_addr, 6); mp->rx_queues = rx_queues; mp->tx_queues = tx_queues; @@ -282,11 +291,11 @@ static void vl_api_memif_details_t_handler (vl_api_memif_details_t * mp) vat_main_t *vam = memif_test_main.vat_main; fformat (vam->ofp, "%s: sw_if_index %u mac %U\n" - " key 0x%" PRIx64 " socket %s role %s\n" + " id %u socket %s role %s\n" " ring_size %u buffer_size %u\n" " state %s link %s\n", mp->if_name, ntohl (mp->sw_if_index), format_ethernet_address, - mp->hw_addr, clib_net_to_host_u64 (mp->key), mp->socket_filename, + mp->hw_addr, clib_net_to_host_u32 (mp->id), mp->socket_filename, mp->role ? "slave" : "master", ntohl (mp->ring_size), ntohs (mp->buffer_size), mp->admin_up_down ? "up" : "down", @@ -298,7 +307,7 @@ static void vl_api_memif_details_t_handler (vl_api_memif_details_t * mp) * and that the data plane plugin processes */ #define foreach_vpe_api_msg \ -_(memif_create, "[key ] [socket ] [ring_size ] " \ +_(memif_create, "[id ] [socket ] [ring_size ] " \ "[buffer_size ] [hw_addr ] " \ "") \ _(memif_delete, "") \ diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index fd7baa30..e2c7631c 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -28,6 +28,7 @@ #include #include +#include #define foreach_memif_input_error @@ -78,11 +79,11 @@ memif_prefetch (vlib_main_t * vm, u32 bi) static_always_inline uword memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, memif_if_t * mif, - memif_ring_type_t type, u16 rid) + memif_ring_type_t type, u16 qid) { vnet_main_t *vnm = vnet_get_main (); - memif_ring_t *ring = memif_get_ring (mif, type, rid); - memif_ring_data_t *rd; + memif_ring_t *ring; + memif_queue_t *mq; u16 head; u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; uword n_trace = vlib_get_trace_count (vm, node); @@ -94,12 +95,14 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 thread_index = vlib_get_thread_index (); u32 bi0, bi1; vlib_buffer_t *b0, *b1; - u16 ring_size = 1 << mif->log2_ring_size; - u16 mask = ring_size - 1; - u16 num_slots; + u16 ring_size, mask, num_slots; void *mb0, *mb1; - rd = vec_elt_at_index (mif->ring_data, rid + type * mif->num_s2m_rings); + mq = vec_elt_at_index (mif->rx_queues, qid); + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + if (mif->per_interface_next_index != ~0) next_index = mif->per_interface_next_index; @@ -115,13 +118,13 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } head = ring->head; - if (head == rd->last_head) + if (head == mq->last_head) return 0; - if (head > rd->last_head) - num_slots = head - rd->last_head; + if (head > mq->last_head) + num_slots = head - mq->last_head; else - num_slots = ring_size - rd->last_head + head; + num_slots = ring_size - mq->last_head + head; while (num_slots) { @@ -132,28 +135,28 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (num_slots > 5 && n_left_to_next > 2) { - if (PREDICT_TRUE (rd->last_head + 5 < ring_size)) + if (PREDICT_TRUE (mq->last_head + 5 < ring_size)) { - CLIB_PREFETCH (memif_get_buffer (mif, ring, rd->last_head + 2), + CLIB_PREFETCH (memif_get_buffer (mif, ring, mq->last_head + 2), CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (memif_get_buffer (mif, ring, rd->last_head + 3), + CLIB_PREFETCH (memif_get_buffer (mif, ring, mq->last_head + 3), CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&ring->desc[rd->last_head + 4], + CLIB_PREFETCH (&ring->desc[mq->last_head + 4], CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&ring->desc[rd->last_head + 5], + CLIB_PREFETCH (&ring->desc[mq->last_head + 5], CLIB_CACHE_LINE_BYTES, LOAD); } else { CLIB_PREFETCH (memif_get_buffer - (mif, ring, (rd->last_head + 2) % mask), + (mif, ring, (mq->last_head + 2) % mask), CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (memif_get_buffer - (mif, ring, (rd->last_head + 3) % mask), + (mif, ring, (mq->last_head + 3) % mask), CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&ring->desc[(rd->last_head + 4) % mask], + CLIB_PREFETCH (&ring->desc[(mq->last_head + 4) % mask], CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&ring->desc[(rd->last_head + 5) % mask], + CLIB_PREFETCH (&ring->desc[(mq->last_head + 5) % mask], CLIB_CACHE_LINE_BYTES, LOAD); } /* get empty buffer */ @@ -185,17 +188,17 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; /* copy buffer */ - mb0 = memif_get_buffer (mif, ring, rd->last_head); + mb0 = memif_get_buffer (mif, ring, mq->last_head); clib_memcpy (vlib_buffer_get_current (b0), mb0, CLIB_CACHE_LINE_BYTES); - b0->current_length = ring->desc[rd->last_head].length; - rd->last_head = (rd->last_head + 1) & mask; + b0->current_length = ring->desc[mq->last_head].length; + mq->last_head = (mq->last_head + 1) & mask; - mb1 = memif_get_buffer (mif, ring, rd->last_head); + mb1 = memif_get_buffer (mif, ring, mq->last_head); clib_memcpy (vlib_buffer_get_current (b1), mb1, CLIB_CACHE_LINE_BYTES); - b1->current_length = ring->desc[rd->last_head].length; - rd->last_head = (rd->last_head + 1) & mask; + b1->current_length = ring->desc[mq->last_head].length; + mq->last_head = (mq->last_head + 1) & mask; if (b0->current_length > CLIB_CACHE_LINE_BYTES) clib_memcpy (vlib_buffer_get_current (b0) + CLIB_CACHE_LINE_BYTES, @@ -221,7 +224,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->next_index = next0; tr->hw_if_index = mif->hw_if_index; - tr->ring = rid; + tr->ring = qid; if (n_trace) { @@ -233,7 +236,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tr = vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->next_index = next1; tr->hw_if_index = mif->hw_if_index; - tr->ring = rid; + tr->ring = qid; } } @@ -266,12 +269,12 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* fill buffer metadata */ b0 = vlib_get_buffer (vm, bi0); - b0->current_length = ring->desc[rd->last_head].length; + b0->current_length = ring->desc[mq->last_head].length; vnet_buffer (b0)->sw_if_index[VLIB_RX] = mif->sw_if_index; vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; /* copy buffer */ - mb0 = memif_get_buffer (mif, ring, rd->last_head); + mb0 = memif_get_buffer (mif, ring, mq->last_head); clib_memcpy (vlib_buffer_get_current (b0), mb0, CLIB_CACHE_LINE_BYTES); if (b0->current_length > CLIB_CACHE_LINE_BYTES) @@ -291,7 +294,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->next_index = next0; tr->hw_if_index = mif->hw_if_index; - tr->ring = rid; + tr->ring = qid; } @@ -303,7 +306,7 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_left_to_next, bi0, next0); /* next packet */ - rd->last_head = (rd->last_head + 1) & mask; + mq->last_head = (mq->last_head + 1) & mask; num_slots--; n_rx_packets++; n_rx_bytes += b0->current_length; @@ -325,30 +328,28 @@ static uword memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - u32 n_rx_packets = 0; + u32 n_rx = 0; memif_main_t *nm = &memif_main; - memif_if_t *mif; vnet_device_input_runtime_t *rt = (void *) node->runtime_data; vnet_device_and_queue_t *dq; - memif_ring_type_t type; foreach_device_and_queue (dq, rt->devices_and_queues) { + memif_if_t *mif; mif = vec_elt_at_index (nm->interfaces, dq->dev_instance); if ((mif->flags & MEMIF_IF_FLAG_ADMIN_UP) && (mif->flags & MEMIF_IF_FLAG_CONNECTED)) { if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - type = MEMIF_RING_M2S; + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_M2S, dq->queue_id); else - type = MEMIF_RING_S2M; - n_rx_packets += - memif_device_input_inline (vm, node, frame, mif, type, - dq->queue_id); + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_S2M, dq->queue_id); } } - return n_rx_packets; + return n_rx; } /* *INDENT-OFF* */ diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h new file mode 100644 index 00000000..104706fa --- /dev/null +++ b/src/plugins/memif/private.h @@ -0,0 +1,296 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include + +#define MEMIF_DEFAULT_SOCKET_DIR "/run/vpp" +#define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock" +#define MEMIF_DEFAULT_RING_SIZE 1024 +#define MEMIF_DEFAULT_RX_QUEUES 1 +#define MEMIF_DEFAULT_TX_QUEUES 1 +#define MEMIF_DEFAULT_BUFFER_SIZE 2048 + +#define MEMIF_VERSION_MAJOR 0 +#define MEMIF_VERSION_MINOR 1 +#define MEMIF_VERSION ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR) +#define MEMIF_COOKIE 0xdeadbeef +#define MEMIF_MAX_M2S_RING (vec_len (vlib_mains) - 1) +#define MEMIF_MAX_S2M_RING (vec_len (vlib_mains) - 1) +#define MEMIF_MAX_REGION 255 +#define MEMIF_MAX_LOG2_RING_SIZE 14 + +#define MEMIF_DEBUG 0 + +#if MEMIF_DEBUG == 1 +#define DBG(...) clib_warning(__VA_ARGS__) +#define DBG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__) +#else +#define DBG(...) +#define DBG_UNIX_LOG(...) +#endif + +#if MEMIF_DEBUG == 1 +#define memif_file_add(a, b) do { \ + ASSERT (*a == ~0); \ + *a = unix_file_add (&unix_main, b); \ + clib_warning ("unix_file_add fd %d private_data %u idx %u", \ + (b)->file_descriptor, (b)->private_data, *a); \ +} while (0) + +#define memif_file_del(a) do { \ + clib_warning ("unix_file_del idx %u",a - unix_main.file_pool); \ + unix_file_del (&unix_main, a); \ +} while (0) + +#define memif_file_del_by_index(a) do { \ + clib_warning ("unix_file_del idx %u", a); \ + unix_file_del_by_index (&unix_main, a); \ +} while (0) +#else +#define memif_file_add(a, b) do { \ + ASSERT (*a == ~0); \ + *a = unix_file_add (&unix_main, b); \ +} while (0) +#define memif_file_del(a) unix_file_del(&unix_main, a) +#define memif_file_del_by_index(a) unix_file_del_by_index(&unix_main, a) +#endif + +typedef struct +{ + u8 *filename; + int fd; + uword unix_file_index; + uword *pending_file_indices; + int ref_cnt; + int is_listener; + + /* hash of all registered id */ + mhash_t dev_instance_by_id; + + /* hash of all registered fds */ + uword *dev_instance_by_fd; +} memif_socket_file_t; + +typedef struct +{ + void *shm; + u32 region_size; + int fd; +} memif_region_t; + +typedef struct +{ + memif_msg_t msg; + int fd; +} memif_msg_fifo_elt_t; + +typedef struct +{ + /* ring data */ + memif_ring_t *ring; + u8 log2_ring_size; + u8 region; + u32 offset; + + u16 last_head; + u16 last_tail; + + /* interrupts */ + int int_fd; + uword int_unix_file_index; + u64 int_count; +} memif_queue_t; + +#define foreach_memif_if_flag \ + _(0, ADMIN_UP, "admin-up") \ + _(1, IS_SLAVE, "slave") \ + _(2, CONNECTING, "connecting") \ + _(3, CONNECTED, "connected") \ + _(4, DELETING, "deleting") + +typedef enum +{ +#define _(a, b, c) MEMIF_IF_FLAG_##b = (1 << a), + foreach_memif_if_flag +#undef _ +} memif_if_flag_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + clib_spinlock_t lockp; + u32 flags; + memif_interface_id_t id; + u32 hw_if_index; + u32 sw_if_index; + uword dev_instance; + memif_interface_mode_t mode:8; + + u32 per_interface_next_index; + + /* socket connection */ + uword socket_file_index; + int conn_fd; + uword conn_unix_file_index; + memif_msg_fifo_elt_t *msg_queue; + u8 *secret; + + memif_region_t *regions; + + memif_queue_t *rx_queues; + memif_queue_t *tx_queues; + + /* remote info */ + pid_t remote_pid; + uid_t remote_uid; + gid_t remote_gid; + u8 *remote_name; + u8 *remote_if_name; + + struct + { + u8 log2_ring_size; + u8 num_s2m_rings; + u8 num_m2s_rings; + u16 buffer_size; + } cfg; + + struct + { + u8 log2_ring_size; + u8 num_s2m_rings; + u8 num_m2s_rings; + u16 buffer_size; + } run; + + /* disconnect strings */ + u8 *local_disc_string; + u8 *remote_disc_string; +} memif_if_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /** API message ID base */ + u16 msg_id_base; + + /* pool of all memory interfaces */ + memif_if_t *interfaces; + + /* pool of all unix socket files */ + memif_socket_file_t *socket_files; + mhash_t socket_file_index_by_filename; + + /* rx buffer cache */ + u32 **rx_buffers; + +} memif_main_t; + +extern memif_main_t memif_main; +extern vnet_device_class_t memif_device_class; +extern vlib_node_registration_t memif_input_node; + +enum +{ + MEMIF_PROCESS_EVENT_START = 1, + MEMIF_PROCESS_EVENT_STOP = 2, +} memif_process_event_t; + +typedef struct +{ + memif_interface_id_t id; + u8 *socket_filename; + u8 *secret; + u8 is_master; + memif_interface_mode_t mode:8; + u8 log2_ring_size; + u16 buffer_size; + u8 hw_addr_set; + u8 hw_addr[6]; + u8 rx_queues; + u8 tx_queues; + + /* return */ + u32 sw_if_index; +} memif_create_if_args_t; + +int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); +int memif_delete_if (vlib_main_t * vm, memif_if_t * mif); +clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); + +#ifndef __NR_memfd_create +#if defined __x86_64__ +#define __NR_memfd_create 319 +#elif defined __arm__ +#define __NR_memfd_create 385 +#elif defined __aarch64__ +#define __NR_memfd_create 279 +#else +#error "__NR_memfd_create unknown for this architecture" +#endif +#endif + +static inline int +memfd_create (const char *name, unsigned int flags) +{ + return syscall (__NR_memfd_create, name, flags); +} + +static_always_inline void * +memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) +{ + u16 region = ring->desc[slot].region; + return mif->regions[region].shm + ring->desc[slot].offset; +} + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif +#define MFD_ALLOW_SEALING 0x0002U +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ + +/* memif.c */ +clib_error_t *memif_init_regions_and_queues (memif_if_t * mif); +clib_error_t *memif_connect (memif_if_t * mif); +void memif_disconnect (memif_if_t * mif, clib_error_t * err); + +/* socket.c */ +clib_error_t *memif_conn_fd_accept_ready (unix_file_t * uf); +clib_error_t *memif_master_conn_fd_read_ready (unix_file_t * uf); +clib_error_t *memif_slave_conn_fd_read_ready (unix_file_t * uf); +clib_error_t *memif_master_conn_fd_write_ready (unix_file_t * uf); +clib_error_t *memif_slave_conn_fd_write_ready (unix_file_t * uf); +clib_error_t *memif_master_conn_fd_error (unix_file_t * uf); +clib_error_t *memif_slave_conn_fd_error (unix_file_t * uf); +clib_error_t *memif_msg_send_disconnect (memif_if_t * mif, + clib_error_t * err); +u8 *format_memif_device_name (u8 * s, va_list * args); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c new file mode 100644 index 00000000..d1a96db3 --- /dev/null +++ b/src/plugins/memif/socket.c @@ -0,0 +1,736 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +static u8 * +memif_str2vec (uint8_t * str, int len) +{ + u8 *s = 0; + int i; + + if (str[0] == 0) + return s; + + for (i = 0; i < len; i++) + { + vec_add1 (s, str[i]); + if (str[i] == 0) + return s; + } + vec_add1 (s, 0); + + return s; +} + +static clib_error_t * +memif_msg_send (int fd, memif_msg_t * msg, int afd) +{ + struct msghdr mh = { 0 }; + struct iovec iov[1]; + char ctl[CMSG_SPACE (sizeof (int))]; + int rv; + + iov[0].iov_base = (void *) msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + if (afd > 0) + { + struct cmsghdr *cmsg; + memset (&ctl, 0, sizeof (ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + cmsg = CMSG_FIRSTHDR (&mh); + cmsg->cmsg_len = CMSG_LEN (sizeof (int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy (CMSG_DATA (cmsg), &afd, sizeof (int)); + } + rv = sendmsg (fd, &mh, 0); + if (rv < 0) + return clib_error_return_unix (0, "sendmsg"); + DBG ("Message type %u sent (fd %d)", msg->type, afd); + return 0; +} + +static void +memif_msg_enq_ack (memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + + e->msg.type = MEMIF_MSG_TYPE_ACK; + e->fd = -1; +} + +static clib_error_t * +memif_msg_enq_hello (int fd) +{ + u8 *s; + memif_msg_t msg = { 0 }; + memif_msg_hello_t *h = &msg.hello; + msg.type = MEMIF_MSG_TYPE_HELLO; + h->min_version = MEMIF_VERSION; + h->max_version = MEMIF_VERSION; + h->max_m2s_ring = MEMIF_MAX_M2S_RING; + h->max_s2m_ring = MEMIF_MAX_M2S_RING; + h->max_region = MEMIF_MAX_REGION; + h->max_log2_ring_size = MEMIF_MAX_LOG2_RING_SIZE; + s = format (0, "VPP %s%c", VPP_BUILD_VER, 0); + strncpy ((char *) h->name, (char *) s, sizeof (h->name)); + vec_free (s); + return memif_msg_send (fd, &msg, -1); +} + +static void +memif_msg_enq_init (memif_if_t * mif) +{ + u8 *s; + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_init_t *i = &e->msg.init; + + e->msg.type = MEMIF_MSG_TYPE_INIT; + e->fd = -1; + i->version = MEMIF_VERSION; + i->id = mif->id; + s = format (0, "VPP %s%c", VPP_BUILD_VER, 0); + strncpy ((char *) i->name, (char *) s, sizeof (i->name)); + if (mif->secret) + strncpy ((char *) i->secret, (char *) mif->secret, sizeof (i->secret)); + vec_free (s); +} + +static void +memif_msg_enq_add_region (memif_if_t * mif, u8 region) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_add_region_t *ar = &e->msg.add_region; + + e->msg.type = MEMIF_MSG_TYPE_ADD_REGION; + e->fd = mif->regions[region].fd; + ar->index = region; + ar->size = mif->regions[region].region_size; +} + +static void +memif_msg_enq_add_ring (memif_if_t * mif, u8 index, u8 direction) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_add_ring_t *ar = &e->msg.add_ring; + memif_queue_t *mq; + + ASSERT ((mif->flags & MEMIF_IF_FLAG_IS_SLAVE) != 0); + + e->msg.type = MEMIF_MSG_TYPE_ADD_RING; + + if (direction == MEMIF_RING_M2S) + mq = vec_elt_at_index (mif->rx_queues, index); + else + mq = vec_elt_at_index (mif->tx_queues, index); + + e->fd = mq->int_fd; + ar->index = index; + ar->region = mq->region; + ar->offset = mq->offset; + ar->log2_ring_size = mq->log2_ring_size; + ar->flags = (direction == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0; +} + +static void +memif_msg_enq_connect (memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_connect_t *c = &e->msg.connect; + u8 *s; + + e->msg.type = MEMIF_MSG_TYPE_CONNECT; + e->fd = -1; + s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0); + strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name)); + vec_free (s); +} + +static void +memif_msg_enq_connected (memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_add2 (mif->msg_queue, e); + memif_msg_connected_t *c = &e->msg.connected; + u8 *s; + + e->msg.type = MEMIF_MSG_TYPE_CONNECTED; + e->fd = -1; + s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0); + strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name)); + vec_free (s); +} + +clib_error_t * +memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err) +{ + memif_msg_t msg = { 0 }; + msg.type = MEMIF_MSG_TYPE_DISCONNECT; + memif_msg_disconnect_t *d = &msg.disconnect; + + d->code = err->code; + strncpy ((char *) d->string, (char *) err->what, sizeof (d->string)); + + return memif_msg_send (mif->conn_fd, &msg, -1); +} + +static clib_error_t * +memif_msg_receive_hello (memif_if_t * mif, memif_msg_t * msg) +{ + memif_msg_hello_t *h = &msg->hello; + + if (msg->hello.min_version > MEMIF_VERSION || + msg->hello.max_version < MEMIF_VERSION) + return clib_error_return (0, "incompatible protocol version"); + + mif->run.num_s2m_rings = clib_min (h->max_s2m_ring + 1, + mif->cfg.num_s2m_rings); + mif->run.num_m2s_rings = clib_min (h->max_m2s_ring + 1, + mif->cfg.num_m2s_rings); + mif->run.log2_ring_size = clib_min (h->max_log2_ring_size, + mif->cfg.log2_ring_size); + mif->run.buffer_size = mif->cfg.buffer_size; + + mif->remote_name = memif_str2vec (h->name, sizeof (h->name)); + + return 0; +} + +static clib_error_t * +memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, + unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + vec_elt_at_index (mm->socket_files, uf->private_data); + memif_msg_init_t *i = &msg->init; + memif_if_t *mif, tmp; + clib_error_t *err; + uword *p; + + if (i->version != MEMIF_VERSION) + { + memif_file_del_by_index (uf - unix_main.file_pool); + return clib_error_return (0, "unsupported version"); + } + + p = mhash_get (&msf->dev_instance_by_id, &i->id); + + if (!p) + { + err = clib_error_return (0, "unmatched interface id"); + goto error; + } + + mif = vec_elt_at_index (mm->interfaces, p[0]); + + if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) + { + err = clib_error_return (0, "cannot connect to slave"); + goto error; + } + + if (mif->conn_fd != -1) + { + err = clib_error_return (0, "already connected"); + goto error; + } + + if (i->mode != mif->mode) + { + err = clib_error_return (0, "mode mismatch"); + goto error; + } + + mif->conn_fd = uf->file_descriptor; + mif->conn_unix_file_index = uf - unix_main.file_pool; + hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); + mif->remote_name = memif_str2vec (i->name, sizeof (i->name)); + *mifp = mif; + + if (mif->secret) + { + u8 *s; + int r; + s = memif_str2vec (i->secret, sizeof (i->secret)); + if (s == 0) + return clib_error_return (0, "secret required"); + + r = vec_cmp (s, mif->secret); + vec_free (s); + + if (r) + return clib_error_return (0, "incorrect secret"); + } + + return 0; + +error: + tmp.conn_fd = uf->file_descriptor; + memif_msg_send_disconnect (&tmp, err); + memif_file_del_by_index (uf - unix_main.file_pool); + return err; +} + +static clib_error_t * +memif_msg_receive_add_region (memif_if_t * mif, memif_msg_t * msg, int fd) +{ + memif_msg_add_region_t *ar = &msg->add_region; + memif_region_t *mr; + if (fd < 0) + return clib_error_return (0, "missing memory region fd"); + + if (ar->index != vec_len (mif->regions)) + return clib_error_return (0, "unexpected region index"); + + if (ar->index > MEMIF_MAX_REGION) + return clib_error_return (0, "too many regions"); + + vec_validate_aligned (mif->regions, ar->index, CLIB_CACHE_LINE_BYTES); + mr = vec_elt_at_index (mif->regions, ar->index); + mr->fd = fd; + mr->region_size = ar->size; + + return 0; +} + +static clib_error_t * +memif_msg_receive_add_ring (memif_if_t * mif, memif_msg_t * msg, int fd) +{ + memif_msg_add_ring_t *ar = &msg->add_ring; + memif_queue_t *mq; + + if (fd < 0) + return clib_error_return (0, "missing ring interrupt fd"); + + if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) + { + if (ar->index != vec_len (mif->rx_queues)) + return clib_error_return (0, "unexpected ring index"); + + if (ar->index > MEMIF_MAX_S2M_RING) + return clib_error_return (0, "too many rings"); + + vec_validate_aligned (mif->rx_queues, ar->index, CLIB_CACHE_LINE_BYTES); + mq = vec_elt_at_index (mif->rx_queues, ar->index); + mif->run.num_s2m_rings = vec_len (mif->rx_queues); + } + else + { + if (ar->index != vec_len (mif->tx_queues)) + return clib_error_return (0, "unexpected ring index"); + + if (ar->index > MEMIF_MAX_M2S_RING) + return clib_error_return (0, "too many rings"); + + vec_validate_aligned (mif->tx_queues, ar->index, CLIB_CACHE_LINE_BYTES); + mq = vec_elt_at_index (mif->tx_queues, ar->index); + mif->run.num_m2s_rings = vec_len (mif->tx_queues); + } + + mq->int_fd = fd; + mq->int_unix_file_index = ~0; + mq->log2_ring_size = ar->log2_ring_size; + mq->region = ar->region; + mq->offset = ar->offset; + + return 0; +} + +static clib_error_t * +memif_msg_receive_connect (memif_if_t * mif, memif_msg_t * msg) +{ + clib_error_t *err; + memif_msg_connect_t *c = &msg->connect; + + if ((err = memif_connect (mif))) + return err; + + mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name)); + + return 0; +} + +static clib_error_t * +memif_msg_receive_connected (memif_if_t * mif, memif_msg_t * msg) +{ + clib_error_t *err; + memif_msg_connected_t *c = &msg->connected; + + if ((err = memif_connect (mif))) + return err; + + mif->remote_if_name = memif_str2vec (c->if_name, sizeof (c->if_name)); + return 0; +} + +static clib_error_t * +memif_msg_receive_disconnect (memif_if_t * mif, memif_msg_t * msg) +{ + memif_msg_disconnect_t *d = &msg->disconnect; + + mif->remote_disc_string = memif_str2vec (d->string, sizeof (d->string)); + return clib_error_return (0, "disconnect received"); +} + +static clib_error_t * +memif_msg_receive (memif_if_t ** mifp, unix_file_t * uf) +{ + char ctl[CMSG_SPACE (sizeof (int)) + + CMSG_SPACE (sizeof (struct ucred))] = { 0 }; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + memif_msg_t msg = { 0 }; + ssize_t size; + clib_error_t *err = 0; + int fd = -1; + int i; + memif_if_t *mif = *mifp; + + iov[0].iov_base = (void *) &msg; + iov[0].iov_len = sizeof (memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + + /* receive the incoming message */ + size = recvmsg (uf->file_descriptor, &mh, 0); + if (size != sizeof (memif_msg_t)) + { + return (size == 0) ? clib_error_return (0, "disconnected") : + clib_error_return_unix (0, + "recvmsg: malformed message received on fd %d", + uf->file_descriptor); + } + + if (mif == 0 && msg.type != MEMIF_MSG_TYPE_INIT) + { + memif_file_del (uf); + return clib_error_return (0, "unexpected message received"); + } + + /* process anciliary data */ + struct ucred *cr = 0; + struct cmsghdr *cmsg; + + cmsg = CMSG_FIRSTHDR (&mh); + while (cmsg) + { + if (cmsg->cmsg_level == SOL_SOCKET) + { + if (cmsg->cmsg_type == SCM_CREDENTIALS) + { + cr = (struct ucred *) CMSG_DATA (cmsg); + } + else if (cmsg->cmsg_type == SCM_RIGHTS) + { + int *fdp = (int *) CMSG_DATA (cmsg); + fd = *fdp; + } + } + cmsg = CMSG_NXTHDR (&mh, cmsg); + } + + DBG ("Message type %u received", msg.type); + /* process the message based on its type */ + switch (msg.type) + { + case MEMIF_MSG_TYPE_ACK: + break; + + case MEMIF_MSG_TYPE_HELLO: + if ((err = memif_msg_receive_hello (mif, &msg))) + return err; + if ((err = memif_init_regions_and_queues (mif))) + return err; + memif_msg_enq_init (mif); + memif_msg_enq_add_region (mif, 0); + vec_foreach_index (i, mif->tx_queues) + memif_msg_enq_add_ring (mif, i, MEMIF_RING_S2M); + vec_foreach_index (i, mif->rx_queues) + memif_msg_enq_add_ring (mif, i, MEMIF_RING_M2S); + memif_msg_enq_connect (mif); + break; + + case MEMIF_MSG_TYPE_INIT: + if ((err = memif_msg_receive_init (mifp, &msg, uf))) + return err; + mif = *mifp; + mif->remote_pid = cr->pid; + mif->remote_uid = cr->uid; + mif->remote_gid = cr->gid; + memif_msg_enq_ack (mif); + break; + + case MEMIF_MSG_TYPE_ADD_REGION: + if ((err = memif_msg_receive_add_region (mif, &msg, fd))) + return err; + memif_msg_enq_ack (mif); + break; + + case MEMIF_MSG_TYPE_ADD_RING: + if ((err = memif_msg_receive_add_ring (mif, &msg, fd))) + return err; + memif_msg_enq_ack (mif); + break; + + case MEMIF_MSG_TYPE_CONNECT: + if ((err = memif_msg_receive_connect (mif, &msg))) + return err; + memif_msg_enq_connected (mif); + break; + + case MEMIF_MSG_TYPE_CONNECTED: + if ((err = memif_msg_receive_connected (mif, &msg))) + return err; + break; + + case MEMIF_MSG_TYPE_DISCONNECT: + if ((err = memif_msg_receive_disconnect (mif, &msg))) + return err; + break; + + default: + err = clib_error_return (0, "unknown message type (0x%x)", msg.type); + return err; + } + + if (clib_fifo_elts (mif->msg_queue) && mif->conn_unix_file_index != ~0) + unix_file_set_data_available_to_write (mif->conn_unix_file_index, 1); + return 0; +} + +clib_error_t * +memif_master_conn_fd_read_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + uword *p; + memif_if_t *mif = 0; + uword conn_unix_file_index = ~0; + clib_error_t *err = 0; + + p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); + if (p) + { + mif = vec_elt_at_index (mm->interfaces, p[0]); + } + else + { + /* This is new connection, remove index from pending vector */ + int i; + vec_foreach_index (i, msf->pending_file_indices) + if (msf->pending_file_indices[i] == uf - unix_main.file_pool) + { + conn_unix_file_index = msf->pending_file_indices[i]; + vec_del1 (msf->pending_file_indices, i); + break; + } + ASSERT (conn_unix_file_index != ~0); + } + err = memif_msg_receive (&mif, uf); + if (err) + { + memif_disconnect (mif, err); + clib_error_free (err); + } + return 0; +} + +clib_error_t * +memif_slave_conn_fd_read_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + clib_error_t *err; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + err = memif_msg_receive (&mif, uf); + if (err) + { + memif_disconnect (mif, err); + clib_error_free (err); + } + return 0; +} + +static clib_error_t * +memif_conn_fd_write_ready (unix_file_t * uf, memif_if_t * mif) +{ + memif_msg_fifo_elt_t *e; + clib_fifo_sub2 (mif->msg_queue, e); + unix_file_set_data_available_to_write (mif->conn_unix_file_index, 0); + memif_msg_send (mif->conn_fd, &e->msg, e->fd); + return 0; +} + +clib_error_t * +memif_master_conn_fd_write_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + uword *p; + memif_if_t *mif; + + p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); + if (!p) + return 0; + + mif = vec_elt_at_index (mm->interfaces, p[0]); + return memif_conn_fd_write_ready (uf, mif); +} + +clib_error_t * +memif_slave_conn_fd_write_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + return memif_conn_fd_write_ready (uf, mif); +} + +clib_error_t * +memif_slave_conn_fd_error (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); + clib_error_t *err; + + err = clib_error_return (0, "connection fd error"); + memif_disconnect (mif, err); + clib_error_free (err); + + return 0; +} + +clib_error_t * +memif_master_conn_fd_error (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + uword *p; + + + p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); + if (p) + { + memif_if_t *mif; + clib_error_t *err; + mif = vec_elt_at_index (mm->interfaces, p[0]); + err = clib_error_return (0, "connection fd error"); + memif_disconnect (mif, err); + clib_error_free (err); + } + else + { + int i; + vec_foreach_index (i, msf->pending_file_indices) + if (msf->pending_file_indices[i] == uf - unix_main.file_pool) + { + vec_del1 (msf->pending_file_indices, i); + memif_file_del (uf); + return 0; + } + } + + clib_warning ("Error on unknown file descriptor %d", uf->file_descriptor); + memif_file_del (uf); + return 0; +} + + +clib_error_t * +memif_conn_fd_accept_ready (unix_file_t * uf) +{ + memif_main_t *mm = &memif_main; + memif_socket_file_t *msf = + pool_elt_at_index (mm->socket_files, uf->private_data); + int addr_len; + struct sockaddr_un client; + int conn_fd; + unix_file_t template = { 0 }; + uword unix_file_index = ~0; + clib_error_t *err; + + + addr_len = sizeof (client); + conn_fd = accept (uf->file_descriptor, + (struct sockaddr *) &client, (socklen_t *) & addr_len); + + if (conn_fd < 0) + return clib_error_return_unix (0, "accept fd %d", uf->file_descriptor); + + template.read_function = memif_master_conn_fd_read_ready; + template.write_function = memif_master_conn_fd_write_ready; + template.error_function = memif_master_conn_fd_error; + template.file_descriptor = conn_fd; + template.private_data = uf->private_data; + + memif_file_add (&unix_file_index, &template); + + err = memif_msg_enq_hello (conn_fd); + if (err) + { + clib_error_report (err); + memif_file_del_by_index (unix_file_index); + } + else + vec_add1 (msf->pending_file_indices, unix_file_index); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 3b20ee4e2b66c287d7a67aff1a304b45be55a4a8 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 12 Jun 2017 21:38:35 +0200 Subject: memif: fix crash during interface delete Change-Id: Ide6d26d6fcc81be6f26ac0abe2cd0d6a0838cfe6 Signed-off-by: Damjan Marion --- src/plugins/memif/memif.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index f082b58c..d21a30eb 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -67,6 +67,7 @@ memif_queue_intfd_close (memif_queue_t * mq) void memif_disconnect (memif_if_t * mif, clib_error_t * err) { + memif_main_t *mm = &memif_main; vnet_main_t *vnm = vnet_get_main (); memif_region_t *mr; memif_queue_t *mq; @@ -94,6 +95,9 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) /* close connection socket */ if (mif->conn_unix_file_index != ~0) { + memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files, + mif->socket_file_index); + hash_unset (msf->dev_instance_by_fd, mif->conn_fd); memif_file_del_by_index (mif->conn_unix_file_index); mif->conn_unix_file_index = ~0; } -- cgit 1.2.3-korg From 45ca2c7a9c0835e1b5d5641ca230f720b49e8cf4 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 16 Jun 2017 22:06:00 +0200 Subject: memif: add ip mode In IP mode memif interface is L3 point-to-point interfaces and we don't pass l2 header. There is no l2 header rewrite operation and received packets are sent straight to ip4-input / ip6-input nodes. Change-Id: I4177f3fce3004da7ecf14d235006ae053fcf3f09 Signed-off-by: Damjan Marion --- src/plugins/memif/cli.c | 5 ++- src/plugins/memif/memif.c | 61 +++++++++++++++++++-------- src/plugins/memif/node.c | 100 +++++++++++++++++++++++++++++++++++++-------- src/plugins/memif/socket.c | 1 + 4 files changed, 132 insertions(+), 35 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c index 34c77def..8af76164 100644 --- a/src/plugins/memif/cli.c +++ b/src/plugins/memif/cli.c @@ -62,6 +62,8 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.is_master = 1; else if (unformat (line_input, "slave")) args.is_master = 0; + else if (unformat (line_input, "mode ip")) + args.mode = MEMIF_INTERFACE_MODE_IP; else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, args.hw_addr)) args.hw_addr_set = 1; @@ -107,7 +109,8 @@ VLIB_CLI_COMMAND (memif_create_command, static) = { .path = "create memif", .short_help = "create memif [id ] [socket ] " "[ring-size ] [buffer-size ] [hw-addr ] " - " [rx-queues ] [tx-queues ]", + " [rx-queues ] [tx-queues ]" + "[mode ip]", .function = memif_create_command_fn, }; /* *INDENT-ON* */ diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index d21a30eb..ec67023b 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -482,7 +482,10 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif) clib_error_free (err); /* remove the interface */ - ethernet_delete_interface (vnm, mif->hw_if_index); + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + vnet_delete_hw_interface (vnm, mif->hw_if_index); + else + ethernet_delete_interface (vnm, mif->hw_if_index); mif->hw_if_index = ~0; /* free interface data structures */ @@ -519,6 +522,14 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif) return 0; } +/* *INDENT-OFF* */ +VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) = +{ + .name = "memif-ip", + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, +}; +/* *INDENT-ON* */ + int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) { @@ -614,28 +625,43 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0; mif->conn_unix_file_index = ~0; mif->conn_fd = -1; + mif->mode = args->mode; if (args->secret) mif->secret = vec_dup (args->secret); if (tm->n_vlib_mains > 1) clib_spinlock_init (&mif->lockp); - if (!args->hw_addr_set) + + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) { - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - - memcpy (args->hw_addr + 2, &rnd, sizeof (rnd)); - args->hw_addr[0] = 2; - args->hw_addr[1] = 0xfe; - } - error = ethernet_register_interface (vnm, memif_device_class.index, - mif->dev_instance, args->hw_addr, - &mif->hw_if_index, - memif_eth_flag_change); + if (!args->hw_addr_set) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (args->hw_addr + 2, &rnd, sizeof (rnd)); + args->hw_addr[0] = 2; + args->hw_addr[1] = 0xfe; + } + error = ethernet_register_interface (vnm, memif_device_class.index, + mif->dev_instance, args->hw_addr, + &mif->hw_if_index, + memif_eth_flag_change); + } + else if (mif->mode == MEMIF_INTERFACE_MODE_IP) + { + mif->hw_if_index = + vnet_register_interface (vnm, memif_device_class.index, + mif->dev_instance, + memif_ip_hw_if_class.index, + mif->dev_instance); + } + else + error = clib_error_return (0, "unsupported interface mode"); if (error) { @@ -725,7 +751,10 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) error: if (mif->hw_if_index != ~0) { - ethernet_delete_interface (vnm, mif->hw_if_index); + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + vnet_delete_hw_interface (vnm, mif->hw_if_index); + else + ethernet_delete_interface (vnm, mif->hw_if_index); mif->hw_if_index = ~0; } memif_delete_if (vm, mif); diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index c6403fef..4acc7149 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -30,7 +30,8 @@ #include #include -#define foreach_memif_input_error +#define foreach_memif_input_error \ + _(NOT_IP, "not ip packet") typedef enum { @@ -200,16 +201,33 @@ memif_copy_buffer_from_rx_ring (vlib_main_t * vm, memif_if_t * mif, return (total_bytes); } + +static_always_inline u32 +memif_next_from_ip_hdr (vlib_node_runtime_t * node, vlib_buffer_t * b) +{ + u8 *ptr = vlib_buffer_get_current (b); + u8 v = *ptr & 0xf0; + + if (PREDICT_TRUE (v == 0x40)) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else if (PREDICT_TRUE (v == 0x60)) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + + b->error = node->errors[MEMIF_INPUT_ERROR_NOT_IP]; + return VNET_DEVICE_INPUT_NEXT_DROP; +} + static_always_inline uword memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, memif_if_t * mif, - memif_ring_type_t type, u16 qid) + memif_ring_type_t type, u16 qid, + memif_interface_mode_t mode) { vnet_main_t *vnm = vnet_get_main (); memif_ring_t *ring; memif_queue_t *mq; u16 head; - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + u32 next_index; uword n_trace = vlib_get_trace_count (vm, node); memif_main_t *nm = &memif_main; u32 n_rx_packets = 0; @@ -227,8 +245,14 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, ring_size = 1 << mq->log2_ring_size; mask = ring_size - 1; - if (mif->per_interface_next_index != ~0) - next_index = mif->per_interface_next_index; + if (mode == MEMIF_INTERFACE_MODE_IP) + { + next_index = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + } + else + { + next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + } n_free_bufs = vec_len (nm->rx_buffers[thread_index]); if (PREDICT_FALSE (n_free_bufs < ring_size)) @@ -308,6 +332,24 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, to_next += 2; n_left_to_next -= 2; + + if (mode == MEMIF_INTERFACE_MODE_IP) + { + next0 = memif_next_from_ip_hdr (node, first_b0); + next1 = memif_next_from_ip_hdr (node, first_b1); + } + else if (mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + if (PREDICT_FALSE (mif->per_interface_next_index != ~0)) + next0 = next1 = mif->per_interface_next_index; + else + /* redirect if feature path + * enabled */ + vnet_feature_start_device_input_x2 (mif->sw_if_index, + &next0, &next1, + first_b0, first_b1); + } + /* trace */ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b1); @@ -343,11 +385,6 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } } - /* redirect if feature path enabled */ - vnet_feature_start_device_input_x2 (mif->sw_if_index, - &next0, &next1, first_b0, - first_b1); - /* enqueue */ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, first_bi0, @@ -368,6 +405,21 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, &first_bi0, &bi0, &num_slots); + if (mode == MEMIF_INTERFACE_MODE_IP) + { + next0 = memif_next_from_ip_hdr (node, first_b0); + } + else if (mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + if (PREDICT_FALSE (mif->per_interface_next_index != ~0)) + next0 = mif->per_interface_next_index; + else + /* redirect if feature path + * enabled */ + vnet_feature_start_device_input_x1 (mif->sw_if_index, + &next0, first_b0); + } + /* trace */ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0); @@ -391,10 +443,6 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, to_next += 1; n_left_to_next--; - /* redirect if feature path enabled */ - vnet_feature_start_device_input_x1 (mif->sw_if_index, &next0, - first_b0); - /* enqueue */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, first_bi0, next0); @@ -433,11 +481,27 @@ memif_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, (mif->flags & MEMIF_IF_FLAG_CONNECTED)) { if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) - n_rx += memif_device_input_inline (vm, node, frame, mif, - MEMIF_RING_M2S, dq->queue_id); + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_M2S, dq->queue_id, + MEMIF_INTERFACE_MODE_IP); + else + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_M2S, dq->queue_id, + MEMIF_INTERFACE_MODE_ETHERNET); + } else - n_rx += memif_device_input_inline (vm, node, frame, mif, - MEMIF_RING_S2M, dq->queue_id); + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_S2M, dq->queue_id, + MEMIF_INTERFACE_MODE_IP); + else + n_rx += memif_device_input_inline (vm, node, frame, mif, + MEMIF_RING_S2M, dq->queue_id, + MEMIF_INTERFACE_MODE_ETHERNET); + } } } diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c index d1a96db3..a3c8d544 100644 --- a/src/plugins/memif/socket.c +++ b/src/plugins/memif/socket.c @@ -132,6 +132,7 @@ memif_msg_enq_init (memif_if_t * mif) e->fd = -1; i->version = MEMIF_VERSION; i->id = mif->id; + i->mode = mif->mode; s = format (0, "VPP %s%c", VPP_BUILD_VER, 0); strncpy ((char *) i->name, (char *) s, sizeof (i->name)); if (mif->secret) -- cgit 1.2.3-korg From 4e53a0d0f01c8c81842d1f50fb5cf0d26e0c1713 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 21 Jun 2017 14:29:44 +0200 Subject: Introduce default rx mode for device drivers If interface is down and queues are not configured then we are not able to change rx-mode. This change introducess default mode which is stored per interface and applied if driver wants. Change-Id: I70149c21c1530eafc148d5e4aa03fbee53dec62f Signed-off-by: Damjan Marion --- src/plugins/memif/memif.c | 2 +- src/vnet/devices/devices.c | 3 +++ src/vnet/interface.c | 1 + src/vnet/interface.h | 2 ++ src/vnet/interface_cli.c | 18 ++++++++++++------ 5 files changed, 19 insertions(+), 7 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index ec67023b..fffb94c9 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -222,7 +222,7 @@ memif_connect (memif_if_t * mif) } vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0); rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i, - VNET_HW_INTERFACE_RX_MODE_INTERRUPT); + VNET_HW_INTERFACE_RX_MODE_DEFAULT); if (rv) clib_warning ("Warning: unable to set rx mode for interface %d queue %d: " diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index e3311c43..f64c6e0d 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -245,6 +245,9 @@ vnet_hw_interface_set_rx_mode (vnet_main_t * vnm, u32 hw_if_index, vnet_device_input_runtime_t *rt; int is_polling = 0; + if (mode == VNET_HW_INTERFACE_RX_MODE_DEFAULT) + mode = hw->default_rx_mode; + if (hw->input_node_thread_index_by_queue == 0 || hw->rx_mode_by_queue == 0) return VNET_API_ERROR_INVALID_INTERFACE; diff --git a/src/vnet/interface.c b/src/vnet/interface.c index e9042ae8..1370d048 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -706,6 +706,7 @@ vnet_register_interface (vnet_main_t * vnm, hw_index = hw - im->hw_interfaces; hw->hw_if_index = hw_index; + hw->default_rx_mode = VNET_HW_INTERFACE_RX_MODE_POLLING; if (dev_class->format_device_name) hw->name = format (0, "%U", dev_class->format_device_name, dev_instance); diff --git a/src/vnet/interface.h b/src/vnet/interface.h index 095fe961..d684e356 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -54,6 +54,7 @@ typedef enum VNET_HW_INTERFACE_RX_MODE_POLLING, VNET_HW_INTERFACE_RX_MODE_INTERRUPT, VNET_HW_INTERFACE_RX_MODE_ADAPTIVE, + VNET_HW_INTERFACE_RX_MODE_DEFAULT, VNET_HW_INTERFACE_NUM_RX_MODES, } vnet_hw_interface_rx_mode; @@ -492,6 +493,7 @@ typedef struct vnet_hw_interface_t /* vnet_hw_interface_rx_mode by queue */ u8 *rx_mode_by_queue; + vnet_hw_interface_rx_mode default_rx_mode; /* device input device_and_queue runtime index */ uword *dq_runtime_index_by_queue; diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index d37c7894..bf2873ac 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -1186,6 +1186,9 @@ set_hw_interface_rx_mode (vnet_main_t * vnm, u32 hw_if_index, vnet_hw_interface_rx_mode old_mode; int rv; + if (mode == VNET_HW_INTERFACE_RX_MODE_DEFAULT) + mode = hw->default_rx_mode; + rv = vnet_hw_interface_get_rx_mode (vnm, hw_if_index, queue_id, &old_mode); switch (rv) { @@ -1272,12 +1275,15 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, hw = vnet_get_hw_interface (vnm, hw_if_index); if (queue_id == ~0) - for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++) - { - error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode); - if (error) - break; - } + { + for (i = 0; i < vec_len (hw->dq_runtime_index_by_queue); i++) + { + error = set_hw_interface_rx_mode (vnm, hw_if_index, i, mode); + if (error) + break; + } + hw->default_rx_mode = mode; + } else error = set_hw_interface_rx_mode (vnm, hw_if_index, queue_id, mode); -- cgit 1.2.3-korg From e2b3493029659bb0fd7eb9c571a757a060726774 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 5 Jul 2017 18:13:10 +0200 Subject: memif: mask interrupts on startup if we are in the polling mode Change-Id: Ief02eb1109a1bc463665d9747e9fa4e0c0e3d7e0 Signed-off-by: Damjan Marion --- src/plugins/memif/memif.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index fffb94c9..ba123149 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -227,6 +227,14 @@ memif_connect (memif_if_t * mif) clib_warning ("Warning: unable to set rx mode for interface %d queue %d: " "rc=%d", mif->hw_if_index, i, rv); + else + { + vnet_hw_interface_rx_mode rxmode; + vnet_hw_interface_get_rx_mode (vnm, mif->hw_if_index, i, &rxmode); + + if (rxmode == VNET_HW_INTERFACE_RX_MODE_POLLING) + mq->ring->flags |= MEMIF_RING_FLAG_MASK_INT; + } } mif->flags &= ~MEMIF_IF_FLAG_CONNECTING; -- cgit 1.2.3-korg From 57d963f88b2c99e698e2b29f72e190f47f41b1ad Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 20 Jul 2017 19:17:06 +0200 Subject: Make VPP runtime directory configurable New startup config command: unix { runtime-dir /run/vpp } Also, adds recursive mkdir funtion for use in deifferent places like cli-config socket path and dpdk hugepage directory path. Change-Id: I1446ceab9c220c25804e73a743a3ebb383450124 Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/init.c | 29 +++++++++++++---------------- src/plugins/memif/memif.c | 14 +++++++++----- src/plugins/memif/private.h | 1 - src/vlib/unix/cli.c | 20 ++++++++++++-------- src/vlib/unix/main.c | 13 ++++++++++++- src/vlib/unix/unix.h | 14 ++++++++++---- src/vlib/unix/util.c | 36 +++++++++++++++++++++++++++++------- src/vpp/vnet/main.c | 5 +++++ 8 files changed, 90 insertions(+), 42 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index c6c9ee34..6f7e168b 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -37,8 +37,6 @@ dpdk_main_t dpdk_main; #define LINK_STATE_ELOGS 0 -#define DEFAULT_HUGE_DIR (VPP_RUN_DIR "/hugepages") - /* Port configuration, mildly modified Intel app values */ static struct rte_eth_conf port_conf_template = { @@ -835,6 +833,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) u8 huge_dir = 0; u8 file_prefix = 0; u8 *socket_mem = 0; + u8 *huge_dir_path = 0; + + huge_dir_path = + format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0); conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); log_level = RTE_LOG_NOTICE; @@ -980,7 +982,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) u8 less_than_1g = 1; int rv; - umount (DEFAULT_HUGE_DIR); + umount ((char *) huge_dir_path); /* Process "socket-mem" parameter value */ if (vec_len (socket_mem)) @@ -1057,27 +1059,20 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_free (mem_by_socket); - /* Make sure VPP_RUN_DIR exists */ - error = unix_make_vpp_run_dir (); + error = vlib_unix_recursive_mkdir ((char *) huge_dir_path); if (error) - goto done; - - rv = mkdir (DEFAULT_HUGE_DIR, 0755); - if (rv && errno != EEXIST) { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - DEFAULT_HUGE_DIR, errno); goto done; } if (use_1g && !(less_than_1g && use_2m)) { - rv = - mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); + rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, + "pagesize=1G"); } else if (use_2m) { - rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); + rv = mount ("none", (char *) huge_dir_path, "hugetlbfs", 0, NULL); } else { @@ -1092,7 +1087,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) tmp = format (0, "--huge-dir%c", 0); vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); + tmp = format (0, "%s%c", huge_dir_path, 0); vec_add1 (conf->eal_init_args, tmp); if (!file_prefix) { @@ -1209,7 +1204,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) (char **) conf->eal_init_args); /* lazy umount hugepages */ - umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); + umount2 ((char *) huge_dir_path, MNT_DETACH); + rmdir ((char *) huge_dir_path); + vec_free (huge_dir_path); if (ret < 0) return clib_error_return (0, "rte_eal_init returned %d", ret); diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index ba123149..af81faf2 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -556,15 +556,19 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) if (args->socket_filename == 0 || args->socket_filename[0] != '/') { - rv = mkdir (MEMIF_DEFAULT_SOCKET_DIR, 0755); - if (rv && errno != EEXIST) - return VNET_API_ERROR_SYSCALL_ERROR_1; + clib_error_t *error; + error = vlib_unix_recursive_mkdir (vlib_unix_get_runtime_dir ()); + if (error) + { + clib_error_free (error); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } if (args->socket_filename == 0) - socket_filename = format (0, "%s/%s%c", MEMIF_DEFAULT_SOCKET_DIR, + socket_filename = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (), MEMIF_DEFAULT_SOCKET_FILENAME, 0); else - socket_filename = format (0, "%s/%s%c", MEMIF_DEFAULT_SOCKET_DIR, + socket_filename = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (), args->socket_filename, 0); } diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index 0f82f1e9..985ac5ec 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -17,7 +17,6 @@ #include -#define MEMIF_DEFAULT_SOCKET_DIR "/run/vpp" #define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock" #define MEMIF_DEFAULT_RING_SIZE 1024 #define MEMIF_DEFAULT_RX_QUEUES 1 diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 1befa25d..068a4e16 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -2642,15 +2642,19 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) /* CLI listen. */ unix_file_t template = { 0 }; - /* If our listen address looks like a path and it starts with - * VPP_RUN_DIR, go make sure VPP_RUN_DIR exists before trying to open - * a socket in it. - */ - if (strncmp (s->config, VPP_RUN_DIR "/", strlen (VPP_RUN_DIR) + 1) == 0) + /* mkdir of file socketu, only under /run */ + if (strncmp (s->config, "/run", 4) == 0) { - error = unix_make_vpp_run_dir (); - if (error) - return error; + u8 *tmp = format (0, "%s", s->config); + int i = vec_len (tmp); + while (i && tmp[--i] != '/') + ; + + tmp[i] = 0; + + if (i) + vlib_unix_recursive_mkdir ((char *) tmp); + vec_free (tmp); } s->flags = SOCKET_IS_SERVER | /* listen, don't connect */ diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index ad1a7c3c..cb34a89f 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -56,6 +56,8 @@ /** Default CLI history depth if not configured in startup.conf */ #define UNIX_CLI_DEFAULT_HISTORY 50 +char *vlib_default_runtime_dir __attribute__ ((weak)); +char *vlib_default_runtime_dir = "/run/vlib"; unix_main_t unix_main; @@ -332,6 +334,8 @@ unix_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "cli-listen %s", &um->cli_listen_socket.config)) ; + else if (unformat (input, "runtime-dir %s", &um->runtime_dir)) + ; else if (unformat (input, "cli-line-mode")) um->cli_line_mode = 1; else if (unformat (input, "cli-no-banner")) @@ -432,6 +436,9 @@ unix_config (vlib_main_t * vm, unformat_input_t * input) } um->unix_config_complete = 1; + if (um->runtime_dir == 0) + um->runtime_dir = format (0, "%s%c", vlib_default_runtime_dir, 0); + return 0; } @@ -463,6 +470,10 @@ unix_config (vlib_main_t * vm, unformat_input_t * input) * Ask the Linux kernel to dump all memory-mapped address regions, instead * of just text+data+bss. * + * @cfgcmd{runtime-dir} + * Define directory where VPP is going to store all runtime files. + * Default is /run/vpp. + * * @cfgcmd{cli-listen, <address:port>} * Bind the CLI to listen at the address and port given. @clocalhost * on TCP port @c 5002, given as cli-listen localhost:5002, @@ -489,7 +500,7 @@ unix_config (vlib_main_t * vm, unformat_input_t * input) * Limit pager buffer to @c nn lines of output. * A value of @c 0 disables the pager. Default value: @c 100000 ?*/ -VLIB_CONFIG_FUNCTION (unix_config, "unix"); +VLIB_EARLY_CONFIG_FUNCTION (unix_config, "unix"); static clib_error_t * unix_exit (vlib_main_t * vm) diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h index ffa92bba..ee1312e3 100644 --- a/src/vlib/unix/unix.h +++ b/src/vlib/unix/unix.h @@ -44,9 +44,6 @@ #include -/** VPP runtime ephemeral directory. Typically stored in a tmpfs. */ -#define VPP_RUN_DIR "/run/vpp" - struct unix_file; typedef clib_error_t *(unix_file_function_t) (struct unix_file * f); @@ -106,6 +103,9 @@ typedef struct /* startup-config filename */ u8 *startup_config_filename; + /* runtime directory path */ + u8 *runtime_dir; + /* unix config complete */ volatile int unix_config_complete; @@ -214,6 +214,12 @@ vlib_unix_get_main (void) return &unix_main; } +static inline char * +vlib_unix_get_runtime_dir (void) +{ + return (char *) unix_main.runtime_dir; +} + /* thread stack array; vec_len = max number of threads */ extern u8 **vlib_thread_stacks; @@ -233,7 +239,7 @@ clib_error_t *foreach_directory_file (char *dir_name, u8 * file_name), void *arg, int scan_dirs); -clib_error_t *unix_make_vpp_run_dir (void); +clib_error_t *vlib_unix_recursive_mkdir (char *path); #endif /* included_unix_unix_h */ diff --git a/src/vlib/unix/util.c b/src/vlib/unix/util.c index 51b4a4ed..93aeb99c 100644 --- a/src/vlib/unix/util.c +++ b/src/vlib/unix/util.c @@ -223,16 +223,38 @@ done: } clib_error_t * -unix_make_vpp_run_dir (void) +vlib_unix_recursive_mkdir (char *path) { - int rv; + clib_error_t *error = 0; + char *c = 0; + int i = 0; - rv = mkdir (VPP_RUN_DIR, 0755); - if (rv && errno != EEXIST) - return clib_error_return (0, "mkdir '%s' failed errno %d", - VPP_RUN_DIR, errno); + while (path[i] != 0) + { + if (c && path[i] == '/') + { + vec_add1 (c, 0); + if ((mkdir (c, 0755)) && (errno != EEXIST)) + { + error = clib_error_return_unix (0, "mkdir '%s'", c); + goto done; + } + _vec_len (c)--; + } + vec_add1 (c, path[i]); + i++; + } - return 0; + if ((mkdir (path, 0755)) && (errno != EEXIST)) + { + error = clib_error_return_unix (0, "mkdir '%s'", path); + goto done; + } + +done: + vec_free (c); + + return error; } /* diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index ade32aa1..9fe65fe2 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -41,6 +41,11 @@ vpe_main_init (vlib_main_t * vm) vat_plugin_hash_create (); } +/* + * Default path for runtime data + */ +char *vlib_default_runtime_dir = "/run/vpp"; + /* * Load plugins from /usr/lib/vpp_plugins by default */ -- cgit 1.2.3-korg From d6042d4f1ea0baf02bc87c72960a331a9e08dfab Mon Sep 17 00:00:00 2001 From: Steven Date: Thu, 7 Sep 2017 09:20:56 -0700 Subject: memif: fix coverity warnings as of 9/7 1. coverity complains about "buffer not null terminated" for strncpy because we pass the size of the destination to the call which is equal to the true size of the destination. We subtract 1 for the size to accommodate the null like all other places are already doing it. 2. Add a check to tx_queues in memif_interface_tx_inline to avoid "divide by zero". 3. To avoid null pointer dereference in memif_create_if, change the goto done rather than goto error and spit a more meaningful error rather than silent about it. 4. Shuffle a line to avoid "check after use" in vl_api_memif_delete_t_handler. Change-Id: Icba7ecd5362c012a48ac35795d31aab356617420 Signed-off-by: Steven --- src/plugins/memif/device.c | 14 ++++++++++++-- src/plugins/memif/memif.c | 7 +++++-- src/plugins/memif/memif_api.c | 7 +++++-- src/plugins/memif/socket.c | 13 +++++++------ 4 files changed, 29 insertions(+), 12 deletions(-) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index c7682534..aff18f2d 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -31,7 +31,8 @@ #define foreach_memif_tx_func_error \ _(NO_FREE_SLOTS, "no free tx slots") \ _(TRUNC_PACKET, "packet > buffer size -- truncated in tx ring") \ -_(PENDING_MSGS, "pending msgs in tx ring") +_(PENDING_MSGS, "pending msgs in tx ring") \ +_(NO_TX_QUEUES, "no tx queues") typedef enum { @@ -169,6 +170,13 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u8 tx_queues = vec_len (mif->tx_queues); memif_queue_t *mq; + if (PREDICT_FALSE (tx_queues == 0)) + { + vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_TX_QUEUES, + n_left); + goto error; + } + if (tx_queues < vec_len (vlib_mains)) { qid = thread_index % tx_queues; @@ -249,7 +257,6 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_left); } - vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1) { u64 b = 1; @@ -257,6 +264,9 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node, mq->int_count++; } +error: + vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors); + return frame->n_vectors; } diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index af81faf2..7e2d947f 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -611,8 +611,11 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) } else { - ret = VNET_API_ERROR_SYSCALL_ERROR_3; - goto error; + error = clib_error_return (0, "File exists for %s", + socket_filename); + clib_error_report (error); + rv = VNET_API_ERROR_VALUE_EXIST; + goto done; } } pool_get (mm->socket_files, msf); diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c index 2b7b792a..89afdb8d 100644 --- a/src/plugins/memif/memif_api.c +++ b/src/plugins/memif/memif_api.c @@ -201,13 +201,16 @@ vl_api_memif_delete_t_handler (vl_api_memif_delete_t * mp) vl_api_memif_delete_reply_t *rmp; vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, ntohl (mp->sw_if_index)); - memif_if_t *mif = pool_elt_at_index (mm->interfaces, hi->dev_instance); + memif_if_t *mif; int rv = 0; if (hi == NULL || memif_device_class.index != hi->dev_class_index) rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; else - rv = memif_delete_if (vm, mif); + { + mif = pool_elt_at_index (mm->interfaces, hi->dev_instance); + rv = memif_delete_if (vm, mif); + } REPLY_MACRO (VL_API_MEMIF_DELETE_REPLY); } diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c index a3c8d544..79ae07be 100644 --- a/src/plugins/memif/socket.c +++ b/src/plugins/memif/socket.c @@ -115,7 +115,7 @@ memif_msg_enq_hello (int fd) h->max_region = MEMIF_MAX_REGION; h->max_log2_ring_size = MEMIF_MAX_LOG2_RING_SIZE; s = format (0, "VPP %s%c", VPP_BUILD_VER, 0); - strncpy ((char *) h->name, (char *) s, sizeof (h->name)); + strncpy ((char *) h->name, (char *) s, sizeof (h->name) - 1); vec_free (s); return memif_msg_send (fd, &msg, -1); } @@ -134,9 +134,10 @@ memif_msg_enq_init (memif_if_t * mif) i->id = mif->id; i->mode = mif->mode; s = format (0, "VPP %s%c", VPP_BUILD_VER, 0); - strncpy ((char *) i->name, (char *) s, sizeof (i->name)); + strncpy ((char *) i->name, (char *) s, sizeof (i->name) - 1); if (mif->secret) - strncpy ((char *) i->secret, (char *) mif->secret, sizeof (i->secret)); + strncpy ((char *) i->secret, (char *) mif->secret, + sizeof (i->secret) - 1); vec_free (s); } @@ -189,7 +190,7 @@ memif_msg_enq_connect (memif_if_t * mif) e->msg.type = MEMIF_MSG_TYPE_CONNECT; e->fd = -1; s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0); - strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name)); + strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name) - 1); vec_free (s); } @@ -204,7 +205,7 @@ memif_msg_enq_connected (memif_if_t * mif) e->msg.type = MEMIF_MSG_TYPE_CONNECTED; e->fd = -1; s = format (0, "%U%c", format_memif_device_name, mif->dev_instance, 0); - strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name)); + strncpy ((char *) c->if_name, (char *) s, sizeof (c->if_name) - 1); vec_free (s); } @@ -216,7 +217,7 @@ memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err) memif_msg_disconnect_t *d = &msg.disconnect; d->code = err->code; - strncpy ((char *) d->string, (char *) err->what, sizeof (d->string)); + strncpy ((char *) d->string, (char *) err->what, sizeof (d->string) - 1); return memif_msg_send (mif->conn_fd, &msg, -1); } -- cgit 1.2.3-korg From 3b64d6334b4e8d0759cff043a55042f88d1ccb0e Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 8 Sep 2017 12:26:12 +0200 Subject: vlib: move linux-specific code to vlib/linux Change-Id: Id79d2c2be7a98e15416a537c890a8f2dd6d4464d Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/init.c | 1 + src/plugins/memif/memif.c | 1 + src/plugins/memif/private.h | 30 -- src/vlib.am | 7 +- src/vlib/linux/pci.c | 666 +++++++++++++++++++++++++++++++++ src/vlib/linux/physmem.c | 411 ++++++++++++++++++++ src/vlib/linux/syscall.h | 58 +++ src/vlib/linux/sysfs.c | 250 +++++++++++++ src/vlib/linux/sysfs.h | 44 +++ src/vlib/pci/linux_pci.c | 665 -------------------------------- src/vlib/threads_cli.c | 1 + src/vlib/unix/physmem.c | 439 ---------------------- src/vlib/unix/unix.h | 17 - src/vlib/unix/util.c | 219 ----------- src/vnet/devices/af_packet/af_packet.c | 1 + 15 files changed, 1438 insertions(+), 1372 deletions(-) create mode 100644 src/vlib/linux/pci.c create mode 100644 src/vlib/linux/physmem.c create mode 100644 src/vlib/linux/syscall.h create mode 100644 src/vlib/linux/sysfs.c create mode 100644 src/vlib/linux/sysfs.h delete mode 100644 src/vlib/pci/linux_pci.c delete mode 100644 src/vlib/unix/physmem.c (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index e23542f7..4ef3b676 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 7e2d947f..4c387b92 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index 985ac5ec..b5f2f8ff 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -228,24 +228,6 @@ int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); int memif_delete_if (vlib_main_t * vm, memif_if_t * mif); clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); -#ifndef __NR_memfd_create -#if defined __x86_64__ -#define __NR_memfd_create 319 -#elif defined __arm__ -#define __NR_memfd_create 385 -#elif defined __aarch64__ -#define __NR_memfd_create 279 -#else -#error "__NR_memfd_create unknown for this architecture" -#endif -#endif - -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} - static_always_inline void * memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) { @@ -253,18 +235,6 @@ memif_get_buffer (memif_if_t * mif, memif_ring_t * ring, u16 slot) return mif->regions[region].shm + ring->desc[slot].offset; } -#ifndef F_LINUX_SPECIFIC_BASE -#define F_LINUX_SPECIFIC_BASE 1024 -#endif -#define MFD_ALLOW_SEALING 0x0002U -#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) - -#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -#define F_SEAL_GROW 0x0004 /* prevent file from growing */ -#define F_SEAL_WRITE 0x0008 /* prevent writes */ - /* memif.c */ clib_error_t *memif_init_regions_and_queues (memif_if_t * mif); clib_error_t *memif_connect (memif_if_t * mif); diff --git a/src/vlib.am b/src/vlib.am index cab90e2d..41d68690 100644 --- a/src/vlib.am +++ b/src/vlib.am @@ -32,13 +32,15 @@ libvlib_la_SOURCES = \ vlib/format.c \ vlib/i2c.c \ vlib/init.c \ + vlib/linux/pci.c \ + vlib/linux/physmem.c \ + vlib/linux/sysfs.c \ vlib/main.c \ vlib/mc.c \ vlib/node.c \ vlib/node_cli.c \ vlib/node_format.c \ vlib/pci/pci.c \ - vlib/pci/linux_pci.c \ vlib/threads.c \ vlib/threads_cli.c \ vlib/trace.c @@ -58,6 +60,8 @@ nobase_include_HEADERS += \ vlib/global_funcs.h \ vlib/i2c.h \ vlib/init.h \ + vlib/linux/sysfs.h \ + vlib/linux/syscall.h \ vlib/main.h \ vlib/mc.h \ vlib/node_funcs.h \ @@ -79,7 +83,6 @@ libvlib_la_SOURCES += \ vlib/unix/mc_socket.c \ vlib/unix/plugin.c \ vlib/unix/plugin.h \ - vlib/unix/physmem.c \ vlib/unix/util.c nobase_include_HEADERS += \ diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c new file mode 100644 index 00000000..cd2affdc --- /dev/null +++ b/src/vlib/linux/pci.c @@ -0,0 +1,666 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pci.c: Linux user space PCI bus management. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct +{ + /* /sys/bus/pci/devices/... directory name for this device. */ + u8 *dev_dir_name; + + /* Resource file descriptors. */ + int *resource_fds; + + /* File descriptor for config space read/write. */ + int config_fd; + + /* File descriptor for /dev/uio%d */ + int uio_fd; + + /* Minor device for uio device. */ + u32 uio_minor; + + /* Index given by unix_file_add. */ + u32 unix_file_index; + +} linux_pci_device_t; + +/* Pool of PCI devices. */ +typedef struct +{ + vlib_main_t *vlib_main; + linux_pci_device_t *linux_pci_devices; +} linux_pci_main_t; + +extern linux_pci_main_t linux_pci_main; + +/* Call to allocate/initialize the pci subsystem. + This is not an init function so that users can explicitly enable + pci only when it's needed. */ +clib_error_t *pci_bus_init (vlib_main_t * vm); + +clib_error_t *vlib_pci_bind_to_uio (vlib_pci_device_t * d, + char *uio_driver_name); + +linux_pci_main_t linux_pci_main; + +clib_error_t * +vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) +{ + clib_error_t *error = 0; + u8 *s = 0, *driver_name = 0; + DIR *dir = 0; + struct dirent *e; + int fd, clear_driver_override = 0; + u8 *dev_dir_name = format (0, "/sys/bus/pci/devices/%U", + format_vlib_pci_addr, &d->bus_address); + + s = format (s, "%v/driver%c", dev_dir_name, 0); + driver_name = vlib_sysfs_link_to_name ((char *) s); + vec_reset_length (s); + + if (driver_name && + ((strcmp ("vfio-pci", (char *) driver_name) == 0) || + (strcmp ("uio_pci_generic", (char *) driver_name) == 0) || + (strcmp ("igb_uio", (char *) driver_name) == 0))) + goto done; + + /* walk trough all linux interfaces and if interface belonging to + this device is founf check if interface is admin up */ + dir = opendir ("/sys/class/net"); + s = format (s, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); + + if (!dir) + { + error = clib_error_return (0, "Skipping PCI device %U: failed to " + "read /sys/class/net", + format_vlib_pci_addr, &d->bus_address); + goto done; + } + + fd = socket (PF_INET, SOCK_DGRAM, 0); + if (fd < 0) + { + error = clib_error_return_unix (0, "socket"); + goto done; + } + + while ((e = readdir (dir))) + { + struct ifreq ifr; + struct ethtool_drvinfo drvinfo; + + if (e->d_name[0] == '.') /* skip . and .. */ + continue; + + memset (&ifr, 0, sizeof ifr); + memset (&drvinfo, 0, sizeof drvinfo); + ifr.ifr_data = (char *) &drvinfo; + strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1); + drvinfo.cmd = ETHTOOL_GDRVINFO; + if (ioctl (fd, SIOCETHTOOL, &ifr) < 0) + { + /* Some interfaces (eg "lo") don't support this ioctl */ + if ((errno != ENOTSUP) && (errno != ENODEV)) + clib_unix_warning ("ioctl fetch intf %s bus info error", + e->d_name); + continue; + } + + if (strcmp ((char *) s, drvinfo.bus_info)) + continue; + + memset (&ifr, 0, sizeof (ifr)); + strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1); + if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl fetch intf %s flags", + e->d_name); + close (fd); + goto done; + } + + if (ifr.ifr_flags & IFF_UP) + { + error = clib_error_return (0, "Skipping PCI device %U as host " + "interface %s is up", + format_vlib_pci_addr, &d->bus_address, + e->d_name); + close (fd); + goto done; + } + } + + close (fd); + vec_reset_length (s); + + s = format (s, "%v/driver/unbind%c", dev_dir_name, 0); + vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + vec_reset_length (s); + + s = format (s, "%v/driver_override%c", dev_dir_name, 0); + if (access ((char *) s, F_OK) == 0) + { + vlib_sysfs_write ((char *) s, "%s", uio_driver_name); + clear_driver_override = 1; + } + else + { + vec_reset_length (s); + s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0); + vlib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, + d->device_id); + } + vec_reset_length (s); + + s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0); + vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + vec_reset_length (s); + + if (clear_driver_override) + { + s = format (s, "%v/driver_override%c", dev_dir_name, 0); + vlib_sysfs_write ((char *) s, "%c", 0); + vec_reset_length (s); + } + +done: + closedir (dir); + vec_free (s); + vec_free (dev_dir_name); + vec_free (driver_name); + return error; +} + + +static clib_error_t * +scan_uio_dir (void *arg, u8 * path_name, u8 * file_name) +{ + linux_pci_device_t *l = arg; + unformat_input_t input; + + unformat_init_string (&input, (char *) file_name, vec_len (file_name)); + + if (!unformat (&input, "uio%d", &l->uio_minor)) + abort (); + + unformat_free (&input); + return 0; +} + +static clib_error_t * +linux_pci_uio_read_ready (unix_file_t * uf) +{ + vlib_pci_main_t *pm = &pci_main; + vlib_pci_device_t *d; + int __attribute__ ((unused)) rv; + + u32 icount; + rv = read (uf->file_descriptor, &icount, 4); + + d = pool_elt_at_index (pm->pci_devs, uf->private_data); + + if (d->interrupt_handler) + d->interrupt_handler (d); + + vlib_pci_intr_enable (d); + + return /* no error */ 0; +} + +static clib_error_t * +linux_pci_uio_error_ready (unix_file_t * uf) +{ + u32 error_index = (u32) uf->private_data; + + return clib_error_return (0, "pci device %d: error", error_index); +} + +static void +add_device (vlib_pci_device_t * dev, linux_pci_device_t * pdev) +{ + vlib_pci_main_t *pm = &pci_main; + linux_pci_main_t *lpm = &linux_pci_main; + linux_pci_device_t *l; + + pool_get (lpm->linux_pci_devices, l); + l[0] = pdev[0]; + + l->dev_dir_name = vec_dup (l->dev_dir_name); + + dev->os_handle = l - lpm->linux_pci_devices; + + { + u8 *uio_dir = format (0, "%s/uio", l->dev_dir_name); + foreach_directory_file ((char *) uio_dir, scan_uio_dir, l, /* scan_dirs */ + 1); + vec_free (uio_dir); + } + + { + char *uio_name = (char *) format (0, "/dev/uio%d%c", l->uio_minor, 0); + l->uio_fd = open (uio_name, O_RDWR); + if (l->uio_fd < 0) + clib_unix_error ("open `%s'", uio_name); + vec_free (uio_name); + } + + { + unix_file_t template = { 0 }; + unix_main_t *um = &unix_main; + + template.read_function = linux_pci_uio_read_ready; + template.file_descriptor = l->uio_fd; + template.error_function = linux_pci_uio_error_ready; + template.private_data = dev - pm->pci_devs; + + l->unix_file_index = unix_file_add (um, &template); + } +} + +static void +linux_pci_device_free (linux_pci_device_t * l) +{ + int i; + for (i = 0; i < vec_len (l->resource_fds); i++) + if (l->resource_fds[i] > 0) + close (l->resource_fds[i]); + if (l->config_fd > 0) + close (l->config_fd); + if (l->uio_fd > 0) + close (l->uio_fd); + vec_free (l->resource_fds); + vec_free (l->dev_dir_name); +} + +/* Configuration space read/write. */ +clib_error_t * +vlib_pci_read_write_config (vlib_pci_device_t * dev, + vlib_read_or_write_t read_or_write, + uword address, void *data, u32 n_bytes) +{ + linux_pci_main_t *lpm = &linux_pci_main; + linux_pci_device_t *p; + int n; + + p = pool_elt_at_index (lpm->linux_pci_devices, dev->os_handle); + + if (read_or_write == VLIB_READ) + n = pread (p->config_fd, data, n_bytes, address); + else + n = pwrite (p->config_fd, data, n_bytes, address); + + if (n != n_bytes) + return clib_error_return_unix (0, "%s", + read_or_write == VLIB_READ + ? "read" : "write"); + + return 0; +} + +static clib_error_t * +os_map_pci_resource_internal (uword os_handle, + u32 resource, u8 * addr, void **result) +{ + linux_pci_main_t *pm = &linux_pci_main; + linux_pci_device_t *p; + struct stat stat_buf; + u8 *file_name; + int fd; + clib_error_t *error; + int flags = MAP_SHARED; + + error = 0; + p = pool_elt_at_index (pm->linux_pci_devices, os_handle); + + file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0); + fd = open ((char *) file_name, O_RDWR); + if (fd < 0) + { + error = clib_error_return_unix (0, "open `%s'", file_name); + goto done; + } + + if (fstat (fd, &stat_buf) < 0) + { + error = clib_error_return_unix (0, "fstat `%s'", file_name); + goto done; + } + + vec_validate (p->resource_fds, resource); + p->resource_fds[resource] = fd; + if (addr != 0) + flags |= MAP_FIXED; + + *result = mmap (addr, + /* size */ stat_buf.st_size, + PROT_READ | PROT_WRITE, flags, + /* file */ fd, + /* offset */ 0); + if (*result == (void *) -1) + { + error = clib_error_return_unix (0, "mmap `%s'", file_name); + goto done; + } + +done: + if (error) + { + if (fd >= 0) + close (fd); + } + vec_free (file_name); + return error; +} + +clib_error_t * +vlib_pci_map_resource (vlib_pci_device_t * dev, u32 resource, void **result) +{ + return (os_map_pci_resource_internal + (dev->os_handle, resource, 0 /* addr */ , + result)); +} + +clib_error_t * +vlib_pci_map_resource_fixed (vlib_pci_device_t * dev, + u32 resource, u8 * addr, void **result) +{ + return (os_map_pci_resource_internal + (dev->os_handle, resource, addr, result)); +} + +void +vlib_pci_free_device (vlib_pci_device_t * dev) +{ + linux_pci_main_t *pm = &linux_pci_main; + linux_pci_device_t *l; + + l = pool_elt_at_index (pm->linux_pci_devices, dev->os_handle); + linux_pci_device_free (l); + pool_put (pm->linux_pci_devices, l); +} + +pci_device_registration_t * __attribute__ ((unused)) +pci_device_next_registered (pci_device_registration_t * r) +{ + uword i; + + /* Null vendor id marks end of initialized list. */ + for (i = 0; r->supported_devices[i].vendor_id != 0; i++) + ; + + return clib_elf_section_data_next (r, i * sizeof (r->supported_devices[0])); +} + +static clib_error_t * +init_device_from_registered (vlib_main_t * vm, + vlib_pci_device_t * dev, + linux_pci_device_t * pdev) +{ + vlib_pci_main_t *pm = &pci_main; + pci_device_registration_t *r; + pci_device_id_t *i; + clib_error_t *error; + + r = pm->pci_device_registrations; + + while (r) + { + for (i = r->supported_devices; i->vendor_id != 0; i++) + if (i->vendor_id == dev->vendor_id && i->device_id == dev->device_id) + { + error = vlib_pci_bind_to_uio (dev, "uio_pci_generic"); + if (error) + { + clib_error_report (error); + continue; + } + + add_device (dev, pdev); + dev->interrupt_handler = r->interrupt_handler; + return r->init_function (vm, dev); + } + r = r->next_registration; + } + /* No driver, close the PCI config-space FD */ + close (pdev->config_fd); + return 0; +} + +static clib_error_t * +init_device (vlib_main_t * vm, + vlib_pci_device_t * dev, linux_pci_device_t * pdev) +{ + return init_device_from_registered (vm, dev, pdev); +} + +static clib_error_t * +scan_device (void *arg, u8 * dev_dir_name, u8 * ignored) +{ + vlib_main_t *vm = arg; + vlib_pci_main_t *pm = &pci_main; + int fd; + u8 *f; + clib_error_t *error = 0; + vlib_pci_device_t *dev; + linux_pci_device_t pdev = { 0 }; + u32 tmp; + + f = format (0, "%v/config%c", dev_dir_name, 0); + fd = open ((char *) f, O_RDWR); + + /* Try read-only access if write fails. */ + if (fd < 0) + fd = open ((char *) f, O_RDONLY); + + if (fd < 0) + { + error = clib_error_return_unix (0, "open `%s'", f); + goto done; + } + + pool_get (pm->pci_devs, dev); + + /* You can only read more that 64 bytes of config space as root; so we try to + read the full space but fall back to just the first 64 bytes. */ + if (read (fd, &dev->config_data, sizeof (dev->config_data)) != + sizeof (dev->config_data) + && read (fd, &dev->config0, + sizeof (dev->config0)) != sizeof (dev->config0)) + { + pool_put (pm->pci_devs, dev); + error = clib_error_return_unix (0, "read `%s'", f); + close (fd); + goto done; + } + + { + static pci_config_header_t all_ones; + if (all_ones.vendor_id == 0) + memset (&all_ones, ~0, sizeof (all_ones)); + + if (!memcmp (&dev->config0.header, &all_ones, sizeof (all_ones))) + { + pool_put (pm->pci_devs, dev); + error = clib_error_return (0, "invalid PCI config for `%s'", f); + close (fd); + goto done; + } + } + + if (dev->config0.header.header_type == 0) + pci_config_type0_little_to_host (&dev->config0); + else + pci_config_type1_little_to_host (&dev->config1); + + /* Parse bus, dev, function from directory name. */ + { + unformat_input_t input; + + unformat_init_string (&input, (char *) dev_dir_name, + vec_len (dev_dir_name)); + + if (!unformat (&input, "/sys/bus/pci/devices/%U", + unformat_vlib_pci_addr, &dev->bus_address)) + abort (); + + unformat_free (&input); + + } + + + pdev.config_fd = fd; + pdev.dev_dir_name = dev_dir_name; + + hash_set (pm->pci_dev_index_by_pci_addr, dev->bus_address.as_u32, + dev - pm->pci_devs); + + vec_reset_length (f); + f = format (f, "%v/vpd%c", dev_dir_name, 0); + fd = open ((char *) f, O_RDONLY); + if (fd >= 0) + { + while (1) + { + u8 tag[3]; + u8 *data = 0; + int len; + + if (read (fd, &tag, 3) != 3) + break; + + if (tag[0] != 0x82 && tag[0] != 0x90 && tag[0] != 0x91) + break; + + len = (tag[2] << 8) | tag[1]; + vec_validate (data, len); + + if (read (fd, data, len) != len) + { + vec_free (data); + break; + } + if (tag[0] == 0x82) + dev->product_name = data; + else if (tag[0] == 0x90) + dev->vpd_r = data; + else if (tag[0] == 0x91) + dev->vpd_w = data; + + data = 0; + } + close (fd); + } + + dev->numa_node = -1; + vec_reset_length (f); + f = format (f, "%v/numa_node%c", dev_dir_name, 0); + vlib_sysfs_read ((char *) f, "%u", &dev->numa_node); + + vec_reset_length (f); + f = format (f, "%v/class%c", dev_dir_name, 0); + vlib_sysfs_read ((char *) f, "0x%x", &tmp); + dev->device_class = tmp >> 8; + + vec_reset_length (f); + f = format (f, "%v/vendor%c", dev_dir_name, 0); + vlib_sysfs_read ((char *) f, "0x%x", &tmp); + dev->vendor_id = tmp; + + vec_reset_length (f); + f = format (f, "%v/device%c", dev_dir_name, 0); + vlib_sysfs_read ((char *) f, "0x%x", &tmp); + dev->device_id = tmp; + + error = init_device (vm, dev, &pdev); + + vec_reset_length (f); + f = format (f, "%v/driver%c", dev_dir_name, 0); + dev->driver_name = vlib_sysfs_link_to_name ((char *) f); + +done: + vec_free (f); + return error; +} + +clib_error_t * +linux_pci_init (vlib_main_t * vm) +{ + vlib_pci_main_t *pm = &pci_main; + clib_error_t *error; + + pm->vlib_main = vm; + + if ((error = vlib_call_init_function (vm, unix_input_init))) + return error; + + ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32)); + pm->pci_dev_index_by_pci_addr = hash_create (0, sizeof (uword)); + + error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm, + /* scan_dirs */ 0); + + /* Complain and continue. might not be root, etc. */ + if (error) + clib_error_report (error); + + return error; +} + +VLIB_INIT_FUNCTION (linux_pci_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c new file mode 100644 index 00000000..6731295c --- /dev/null +++ b/src/vlib/linux/physmem.c @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * physmem.c: Unix physical memory + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static void * +unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, + uword n_bytes, uword alignment) +{ + vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); + uword lo_offset, hi_offset; + uword *to_free = 0; + + if (pr->heap == 0) + return 0; + + /* IO memory is always at least cache aligned. */ + alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES); + + while (1) + { + mheap_get_aligned (pr->heap, n_bytes, + /* align */ alignment, + /* align offset */ 0, + &lo_offset); + + /* Allocation failed? */ + if (lo_offset == ~0) + break; + + if (pr->flags & VLIB_PHYSMEM_F_FAKE) + break; + + /* Make sure allocation does not span DMA physical chunk boundary. */ + hi_offset = lo_offset + n_bytes - 1; + + if ((lo_offset >> pr->log2_page_size) == + (hi_offset >> pr->log2_page_size)) + break; + + /* Allocation would span chunk boundary, queue it to be freed as soon as + we find suitable chunk. */ + vec_add1 (to_free, lo_offset); + } + + if (to_free != 0) + { + uword i; + for (i = 0; i < vec_len (to_free); i++) + mheap_put (pr->heap, to_free[i]); + vec_free (to_free); + } + + return lo_offset != ~0 ? pr->heap + lo_offset : 0; +} + +static void +unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) +{ + vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); + /* Return object to region's heap. */ + mheap_put (pr->heap, x - pr->heap); +} + +static u64 +get_page_paddr (int fd, uword addr) +{ + int pagesize = sysconf (_SC_PAGESIZE); + u64 seek, pagemap = 0; + + seek = ((u64) addr / pagesize) * sizeof (u64); + if (lseek (fd, seek, SEEK_SET) != seek) + { + clib_unix_warning ("lseek to 0x%llx", seek); + return 0; + } + if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) + { + clib_unix_warning ("read ptbits"); + return 0; + } + if ((pagemap & (1ULL << 63)) == 0) + return 0; + + pagemap &= pow2_mask (55); + + return pagemap * pagesize; +} + +static clib_error_t * +unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, + u8 numa_node, u32 flags, + vlib_physmem_region_index_t * idx) +{ + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_region_t *pr; + clib_error_t *error = 0; + int pagemap_fd = -1; + u8 *mount_dir = 0; + u8 *filename = 0; + struct stat st; + int old_mpol; + int mmap_flags; + struct bitmask *old_mask = numa_allocate_nodemask (); + + if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0) + return clib_error_return (0, "not allowed"); + + pool_get (vpm->regions, pr); + + if ((pr - vpm->regions) >= 256) + { + error = clib_error_return (0, "maximum number of regions reached"); + goto error; + } + + pr->index = pr - vpm->regions; + pr->fd = -1; + pr->flags = flags; + + if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) + == -1) + { + error = clib_error_return_unix (0, "get_mempolicy"); + goto error; + } + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) + { + error = clib_error_return_unix (0, "open '/proc/self/pagemap'"); + goto error; + } + + mount_dir = format (0, "%s/physmem_region%d%c", + vlib_unix_get_runtime_dir (), pr->index, 0); + filename = format (0, "%s/mem%c", mount_dir, 0); + + unlink ((char *) mount_dir); + + error = vlib_unix_recursive_mkdir ((char *) mount_dir); + if (error) + goto error; + + if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) + { + error = clib_error_return_unix (0, "mount hugetlb directory '%s'", + mount_dir); + goto error; + } + + if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) + { + error = clib_error_return_unix (0, "open"); + goto error; + } + + mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED; + } + else + { + if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) + return clib_error_return_unix (0, "memfd_create"); + + if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + { + error = + clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); + goto error; + } + mmap_flags = MAP_SHARED; + } + + if (fstat (pr->fd, &st)) + { + error = clib_error_return_unix (0, "fstat"); + goto error; + } + + pr->log2_page_size = min_log2 (st.st_blksize); + pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1; + size = pr->n_pages * (1 << pr->log2_page_size); + + if ((ftruncate (pr->fd, size)) == -1) + { + error = clib_error_return_unix (0, "ftruncate length: %d", size); + goto error; + } + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + error = vlib_sysfs_prealloc_hugepages (numa_node, + 1 << (pr->log2_page_size - 10), + pr->n_pages); + if (error) + goto error; + } + + numa_set_preferred (numa_node); + + pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); + + if (pr->mem == MAP_FAILED) + { + pr->mem = 0; + error = clib_error_return_unix (0, "mmap"); + goto error; + } + + if (set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) + { + error = clib_error_return_unix (0, "set_mempolicy"); + goto error; + } + + pr->size = pr->n_pages << pr->log2_page_size; + pr->page_mask = (1 << pr->log2_page_size) - 1; + pr->numa_node = numa_node; + pr->name = format (0, "%s", name); + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + int i; + for (i = 0; i < pr->n_pages; i++) + { + void *ptr = pr->mem + (i << pr->log2_page_size); + int node; + move_pages (0, 1, &ptr, 0, &node, 0); + if (numa_node != node) + { + clib_warning + ("physmem page for region \'%s\' allocated on the wrong" + " numa node (requested %u actual %u)", pr->name, + pr->numa_node, node, i); + break; + } + } + } + + if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) + { + pr->heap = mheap_alloc_with_flags (pr->mem, pr->size, + /* Don't want mheap mmap/munmap with IO memory. */ + MHEAP_FLAG_DISABLE_VM | + MHEAP_FLAG_THREAD_SAFE); + fformat (stdout, "%U", format_mheap, pr->heap, /* verbose */ 1); + } + + if (flags & VLIB_PHYSMEM_F_HAVE_BUFFERS) + { + vlib_buffer_add_mem_range (vm, pointer_to_uword (pr->mem), pr->size); + } + + *idx = pr->index; + + if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) + { + int i; + for (i = 0; i < pr->n_pages; i++) + { + uword vaddr = + pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size); + u64 page_paddr = get_page_paddr (pagemap_fd, vaddr); + vec_add1 (pr->page_table, page_paddr); + } + } + + goto done; + +error: + if (pr->fd > -1) + close (pr->fd); + + if (pr->mem) + munmap (pr->mem, size); + + memset (pr, 0, sizeof (*pr)); + pool_put (vpm->regions, pr); + +done: + if (mount_dir) + { + umount2 ((char *) mount_dir, MNT_DETACH); + rmdir ((char *) mount_dir); + vec_free (mount_dir); + } + numa_free_cpumask (old_mask); + vec_free (filename); + if (pagemap_fd > -1) + close (pagemap_fd); + return error; +} + +static void +unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx) +{ + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); + + if (pr->fd > 0) + close (pr->fd); + munmap (pr->mem, pr->size); + vec_free (pr->name); + pool_put (vpm->regions, pr); +} + +clib_error_t * +unix_physmem_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + + /* Avoid multiple calls. */ + if (vm->os_physmem_alloc_aligned) + return error; + + vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned; + vm->os_physmem_free = unix_physmem_free; + vm->os_physmem_region_alloc = unix_physmem_region_alloc; + vm->os_physmem_region_free = unix_physmem_region_free; + + return error; +} + +static clib_error_t * +show_physmem (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vlib_physmem_main_t *vpm = &vm->physmem_main; + vlib_physmem_region_t *pr; + + /* *INDENT-OFF* */ + pool_foreach (pr, vpm->regions, ( + { + vlib_cli_output (vm, "index %u name '%s' page-size %uKB num-pages %d " + "numa-node %u fd %d\n", + pr->index, pr->name, (1 << (pr->log2_page_size -10)), + pr->n_pages, pr->numa_node, pr->fd); + if (pr->heap) + vlib_cli_output (vm, " %U", format_mheap, pr->heap, /* verbose */ 1); + else + vlib_cli_output (vm, " no heap\n"); + })); + /* *INDENT-ON* */ + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_physmem_command, static) = { + .path = "show physmem", + .short_help = "Show physical memory allocation", + .function = show_physmem, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/linux/syscall.h b/src/vlib/linux/syscall.h new file mode 100644 index 00000000..9e37997e --- /dev/null +++ b/src/vlib/linux/syscall.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_syscall_h +#define included_linux_syscall_h + +#ifndef __NR_memfd_create +#if defined __x86_64__ +#define __NR_memfd_create 319 +#elif defined __arm__ +#define __NR_memfd_create 385 +#elif defined __aarch64__ +#define __NR_memfd_create 279 +#else +#error "__NR_memfd_create unknown for this architecture" +#endif +#endif + +static inline int +memfd_create (const char *name, unsigned int flags) +{ + return syscall (__NR_memfd_create, name, flags); +} + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif +#define MFD_ALLOW_SEALING 0x0002U +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ + + +#endif /* included_linux_syscall_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/linux/sysfs.c b/src/vlib/linux/sysfs.c new file mode 100644 index 00000000..f92f9ef5 --- /dev/null +++ b/src/vlib/linux/sysfs.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +clib_error_t * +vlib_sysfs_write (char *file_name, char *fmt, ...) +{ + u8 *s; + int fd; + clib_error_t *error = 0; + + fd = open (file_name, O_WRONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + va_list va; + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + if (write (fd, s, vec_len (s)) < 0) + error = clib_error_return_unix (0, "write `%s'", file_name); + + vec_free (s); + close (fd); + return error; +} + +clib_error_t * +vlib_sysfs_read (char *file_name, char *fmt, ...) +{ + unformat_input_t input; + u8 *s = 0; + int fd; + ssize_t sz; + uword result; + + fd = open (file_name, O_RDONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + vec_validate (s, 4095); + + sz = read (fd, s, vec_len (s)); + if (sz < 0) + { + close (fd); + vec_free (s); + return clib_error_return_unix (0, "read `%s'", file_name); + } + + _vec_len (s) = sz; + unformat_init_vector (&input, s); + + va_list va; + va_start (va, fmt); + result = va_unformat (&input, fmt, &va); + va_end (va); + + vec_free (s); + close (fd); + + if (result == 0) + return clib_error_return (0, "unformat error"); + + return 0; +} + +u8 * +vlib_sysfs_link_to_name (char *link) +{ + char *p, buffer[64]; + unformat_input_t in; + u8 *s = 0; + int r; + + r = readlink (link, buffer, sizeof (buffer) - 1); + + if (r < 0) + return 0; + + buffer[r] = 0; + p = strrchr (buffer, '/'); + + if (!p) + return 0; + + unformat_init_string (&in, p + 1, strlen (p + 1)); + if (unformat (&in, "%s", &s) != 1) + clib_unix_warning ("no string?"); + unformat_free (&in); + + return s; +} + +clib_error_t * +vlib_sysfs_set_nr_hugepages (unsigned int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); + vlib_sysfs_write ((char *) p, "%d", nr); + +done: + vec_free (p); + return error; +} + + +static clib_error_t * +vlib_sysfs_get_xxx_hugepages (char *type, unsigned int numa_node, + int page_size, int *val) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, + type, 0); + error = vlib_sysfs_read ((char *) p, "%d", val); + +done: + vec_free (p); + return error; +} + +clib_error_t * +vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size, int *v) +{ + return vlib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); +} + +clib_error_t * +vlib_sysfs_get_nr_hugepages (unsigned int numa_node, int page_size, int *v) +{ + return vlib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); +} + +clib_error_t * +vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, int page_size, + int *v) +{ + return vlib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); +} + +clib_error_t * +vlib_sysfs_prealloc_hugepages (unsigned int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + int n, needed; + error = vlib_sysfs_get_free_hugepages (numa_node, page_size, &n); + if (error) + return error; + needed = nr - n; + if (needed <= 0) + return 0; + + error = vlib_sysfs_get_nr_hugepages (numa_node, page_size, &n); + if (error) + return error; + clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", + needed, page_size, numa_node); + return vlib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/linux/sysfs.h b/src/vlib/linux/sysfs.h new file mode 100644 index 00000000..14b71317 --- /dev/null +++ b/src/vlib/linux/sysfs.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_sysfs_h +#define included_linux_sysfs_h + +clib_error_t *vlib_sysfs_write (char *file_name, char *fmt, ...); + +clib_error_t *vlib_sysfs_read (char *file_name, char *fmt, ...); + +u8 *vlib_sysfs_link_to_name (char *link); + +clib_error_t *vlib_sysfs_set_nr_hugepages (unsigned int numa_node, + int page_size, int nr); +clib_error_t *vlib_sysfs_get_nr_hugepages (unsigned int numa_node, + int page_size, int *v); +clib_error_t *vlib_sysfs_get_free_hugepages (unsigned int numa_node, + int page_size, int *v); +clib_error_t *vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, + int page_size, int *v); +clib_error_t *vlib_sysfs_prealloc_hugepages (unsigned int numa_node, + int page_size, int nr); + +#endif /* included_linux_sysfs_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vlib/pci/linux_pci.c b/src/vlib/pci/linux_pci.c deleted file mode 100644 index 2d3c0a88..00000000 --- a/src/vlib/pci/linux_pci.c +++ /dev/null @@ -1,665 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * pci.c: Linux user space PCI bus management. - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -typedef struct -{ - /* /sys/bus/pci/devices/... directory name for this device. */ - u8 *dev_dir_name; - - /* Resource file descriptors. */ - int *resource_fds; - - /* File descriptor for config space read/write. */ - int config_fd; - - /* File descriptor for /dev/uio%d */ - int uio_fd; - - /* Minor device for uio device. */ - u32 uio_minor; - - /* Index given by unix_file_add. */ - u32 unix_file_index; - -} linux_pci_device_t; - -/* Pool of PCI devices. */ -typedef struct -{ - vlib_main_t *vlib_main; - linux_pci_device_t *linux_pci_devices; -} linux_pci_main_t; - -extern linux_pci_main_t linux_pci_main; - -/* Call to allocate/initialize the pci subsystem. - This is not an init function so that users can explicitly enable - pci only when it's needed. */ -clib_error_t *pci_bus_init (vlib_main_t * vm); - -clib_error_t *vlib_pci_bind_to_uio (vlib_pci_device_t * d, - char *uio_driver_name); - -linux_pci_main_t linux_pci_main; - -clib_error_t * -vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) -{ - clib_error_t *error = 0; - u8 *s = 0, *driver_name = 0; - DIR *dir = 0; - struct dirent *e; - int fd, clear_driver_override = 0; - u8 *dev_dir_name = format (0, "/sys/bus/pci/devices/%U", - format_vlib_pci_addr, &d->bus_address); - - s = format (s, "%v/driver%c", dev_dir_name, 0); - driver_name = vlib_sysfs_link_to_name ((char *) s); - vec_reset_length (s); - - if (driver_name && - ((strcmp ("vfio-pci", (char *) driver_name) == 0) || - (strcmp ("uio_pci_generic", (char *) driver_name) == 0) || - (strcmp ("igb_uio", (char *) driver_name) == 0))) - goto done; - - /* walk trough all linux interfaces and if interface belonging to - this device is founf check if interface is admin up */ - dir = opendir ("/sys/class/net"); - s = format (s, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); - - if (!dir) - { - error = clib_error_return (0, "Skipping PCI device %U: failed to " - "read /sys/class/net", - format_vlib_pci_addr, &d->bus_address); - goto done; - } - - fd = socket (PF_INET, SOCK_DGRAM, 0); - if (fd < 0) - { - error = clib_error_return_unix (0, "socket"); - goto done; - } - - while ((e = readdir (dir))) - { - struct ifreq ifr; - struct ethtool_drvinfo drvinfo; - - if (e->d_name[0] == '.') /* skip . and .. */ - continue; - - memset (&ifr, 0, sizeof ifr); - memset (&drvinfo, 0, sizeof drvinfo); - ifr.ifr_data = (char *) &drvinfo; - strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1); - drvinfo.cmd = ETHTOOL_GDRVINFO; - if (ioctl (fd, SIOCETHTOOL, &ifr) < 0) - { - /* Some interfaces (eg "lo") don't support this ioctl */ - if ((errno != ENOTSUP) && (errno != ENODEV)) - clib_unix_warning ("ioctl fetch intf %s bus info error", - e->d_name); - continue; - } - - if (strcmp ((char *) s, drvinfo.bus_info)) - continue; - - memset (&ifr, 0, sizeof (ifr)); - strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1); - if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0) - { - error = clib_error_return_unix (0, "ioctl fetch intf %s flags", - e->d_name); - close (fd); - goto done; - } - - if (ifr.ifr_flags & IFF_UP) - { - error = clib_error_return (0, "Skipping PCI device %U as host " - "interface %s is up", - format_vlib_pci_addr, &d->bus_address, - e->d_name); - close (fd); - goto done; - } - } - - close (fd); - vec_reset_length (s); - - s = format (s, "%v/driver/unbind%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); - vec_reset_length (s); - - s = format (s, "%v/driver_override%c", dev_dir_name, 0); - if (access ((char *) s, F_OK) == 0) - { - vlib_sysfs_write ((char *) s, "%s", uio_driver_name); - clear_driver_override = 1; - } - else - { - vec_reset_length (s); - s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, - d->device_id); - } - vec_reset_length (s); - - s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); - vec_reset_length (s); - - if (clear_driver_override) - { - s = format (s, "%v/driver_override%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%c", 0); - vec_reset_length (s); - } - -done: - closedir (dir); - vec_free (s); - vec_free (dev_dir_name); - vec_free (driver_name); - return error; -} - - -static clib_error_t * -scan_uio_dir (void *arg, u8 * path_name, u8 * file_name) -{ - linux_pci_device_t *l = arg; - unformat_input_t input; - - unformat_init_string (&input, (char *) file_name, vec_len (file_name)); - - if (!unformat (&input, "uio%d", &l->uio_minor)) - abort (); - - unformat_free (&input); - return 0; -} - -static clib_error_t * -linux_pci_uio_read_ready (unix_file_t * uf) -{ - vlib_pci_main_t *pm = &pci_main; - vlib_pci_device_t *d; - int __attribute__ ((unused)) rv; - - u32 icount; - rv = read (uf->file_descriptor, &icount, 4); - - d = pool_elt_at_index (pm->pci_devs, uf->private_data); - - if (d->interrupt_handler) - d->interrupt_handler (d); - - vlib_pci_intr_enable (d); - - return /* no error */ 0; -} - -static clib_error_t * -linux_pci_uio_error_ready (unix_file_t * uf) -{ - u32 error_index = (u32) uf->private_data; - - return clib_error_return (0, "pci device %d: error", error_index); -} - -static void -add_device (vlib_pci_device_t * dev, linux_pci_device_t * pdev) -{ - vlib_pci_main_t *pm = &pci_main; - linux_pci_main_t *lpm = &linux_pci_main; - linux_pci_device_t *l; - - pool_get (lpm->linux_pci_devices, l); - l[0] = pdev[0]; - - l->dev_dir_name = vec_dup (l->dev_dir_name); - - dev->os_handle = l - lpm->linux_pci_devices; - - { - u8 *uio_dir = format (0, "%s/uio", l->dev_dir_name); - foreach_directory_file ((char *) uio_dir, scan_uio_dir, l, /* scan_dirs */ - 1); - vec_free (uio_dir); - } - - { - char *uio_name = (char *) format (0, "/dev/uio%d%c", l->uio_minor, 0); - l->uio_fd = open (uio_name, O_RDWR); - if (l->uio_fd < 0) - clib_unix_error ("open `%s'", uio_name); - vec_free (uio_name); - } - - { - unix_file_t template = { 0 }; - unix_main_t *um = &unix_main; - - template.read_function = linux_pci_uio_read_ready; - template.file_descriptor = l->uio_fd; - template.error_function = linux_pci_uio_error_ready; - template.private_data = dev - pm->pci_devs; - - l->unix_file_index = unix_file_add (um, &template); - } -} - -static void -linux_pci_device_free (linux_pci_device_t * l) -{ - int i; - for (i = 0; i < vec_len (l->resource_fds); i++) - if (l->resource_fds[i] > 0) - close (l->resource_fds[i]); - if (l->config_fd > 0) - close (l->config_fd); - if (l->uio_fd > 0) - close (l->uio_fd); - vec_free (l->resource_fds); - vec_free (l->dev_dir_name); -} - -/* Configuration space read/write. */ -clib_error_t * -vlib_pci_read_write_config (vlib_pci_device_t * dev, - vlib_read_or_write_t read_or_write, - uword address, void *data, u32 n_bytes) -{ - linux_pci_main_t *lpm = &linux_pci_main; - linux_pci_device_t *p; - int n; - - p = pool_elt_at_index (lpm->linux_pci_devices, dev->os_handle); - - if (read_or_write == VLIB_READ) - n = pread (p->config_fd, data, n_bytes, address); - else - n = pwrite (p->config_fd, data, n_bytes, address); - - if (n != n_bytes) - return clib_error_return_unix (0, "%s", - read_or_write == VLIB_READ - ? "read" : "write"); - - return 0; -} - -static clib_error_t * -os_map_pci_resource_internal (uword os_handle, - u32 resource, u8 * addr, void **result) -{ - linux_pci_main_t *pm = &linux_pci_main; - linux_pci_device_t *p; - struct stat stat_buf; - u8 *file_name; - int fd; - clib_error_t *error; - int flags = MAP_SHARED; - - error = 0; - p = pool_elt_at_index (pm->linux_pci_devices, os_handle); - - file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0); - fd = open ((char *) file_name, O_RDWR); - if (fd < 0) - { - error = clib_error_return_unix (0, "open `%s'", file_name); - goto done; - } - - if (fstat (fd, &stat_buf) < 0) - { - error = clib_error_return_unix (0, "fstat `%s'", file_name); - goto done; - } - - vec_validate (p->resource_fds, resource); - p->resource_fds[resource] = fd; - if (addr != 0) - flags |= MAP_FIXED; - - *result = mmap (addr, - /* size */ stat_buf.st_size, - PROT_READ | PROT_WRITE, flags, - /* file */ fd, - /* offset */ 0); - if (*result == (void *) -1) - { - error = clib_error_return_unix (0, "mmap `%s'", file_name); - goto done; - } - -done: - if (error) - { - if (fd >= 0) - close (fd); - } - vec_free (file_name); - return error; -} - -clib_error_t * -vlib_pci_map_resource (vlib_pci_device_t * dev, u32 resource, void **result) -{ - return (os_map_pci_resource_internal - (dev->os_handle, resource, 0 /* addr */ , - result)); -} - -clib_error_t * -vlib_pci_map_resource_fixed (vlib_pci_device_t * dev, - u32 resource, u8 * addr, void **result) -{ - return (os_map_pci_resource_internal - (dev->os_handle, resource, addr, result)); -} - -void -vlib_pci_free_device (vlib_pci_device_t * dev) -{ - linux_pci_main_t *pm = &linux_pci_main; - linux_pci_device_t *l; - - l = pool_elt_at_index (pm->linux_pci_devices, dev->os_handle); - linux_pci_device_free (l); - pool_put (pm->linux_pci_devices, l); -} - -pci_device_registration_t * __attribute__ ((unused)) -pci_device_next_registered (pci_device_registration_t * r) -{ - uword i; - - /* Null vendor id marks end of initialized list. */ - for (i = 0; r->supported_devices[i].vendor_id != 0; i++) - ; - - return clib_elf_section_data_next (r, i * sizeof (r->supported_devices[0])); -} - -static clib_error_t * -init_device_from_registered (vlib_main_t * vm, - vlib_pci_device_t * dev, - linux_pci_device_t * pdev) -{ - vlib_pci_main_t *pm = &pci_main; - pci_device_registration_t *r; - pci_device_id_t *i; - clib_error_t *error; - - r = pm->pci_device_registrations; - - while (r) - { - for (i = r->supported_devices; i->vendor_id != 0; i++) - if (i->vendor_id == dev->vendor_id && i->device_id == dev->device_id) - { - error = vlib_pci_bind_to_uio (dev, "uio_pci_generic"); - if (error) - { - clib_error_report (error); - continue; - } - - add_device (dev, pdev); - dev->interrupt_handler = r->interrupt_handler; - return r->init_function (vm, dev); - } - r = r->next_registration; - } - /* No driver, close the PCI config-space FD */ - close (pdev->config_fd); - return 0; -} - -static clib_error_t * -init_device (vlib_main_t * vm, - vlib_pci_device_t * dev, linux_pci_device_t * pdev) -{ - return init_device_from_registered (vm, dev, pdev); -} - -static clib_error_t * -scan_device (void *arg, u8 * dev_dir_name, u8 * ignored) -{ - vlib_main_t *vm = arg; - vlib_pci_main_t *pm = &pci_main; - int fd; - u8 *f; - clib_error_t *error = 0; - vlib_pci_device_t *dev; - linux_pci_device_t pdev = { 0 }; - u32 tmp; - - f = format (0, "%v/config%c", dev_dir_name, 0); - fd = open ((char *) f, O_RDWR); - - /* Try read-only access if write fails. */ - if (fd < 0) - fd = open ((char *) f, O_RDONLY); - - if (fd < 0) - { - error = clib_error_return_unix (0, "open `%s'", f); - goto done; - } - - pool_get (pm->pci_devs, dev); - - /* You can only read more that 64 bytes of config space as root; so we try to - read the full space but fall back to just the first 64 bytes. */ - if (read (fd, &dev->config_data, sizeof (dev->config_data)) != - sizeof (dev->config_data) - && read (fd, &dev->config0, - sizeof (dev->config0)) != sizeof (dev->config0)) - { - pool_put (pm->pci_devs, dev); - error = clib_error_return_unix (0, "read `%s'", f); - close (fd); - goto done; - } - - { - static pci_config_header_t all_ones; - if (all_ones.vendor_id == 0) - memset (&all_ones, ~0, sizeof (all_ones)); - - if (!memcmp (&dev->config0.header, &all_ones, sizeof (all_ones))) - { - pool_put (pm->pci_devs, dev); - error = clib_error_return (0, "invalid PCI config for `%s'", f); - close (fd); - goto done; - } - } - - if (dev->config0.header.header_type == 0) - pci_config_type0_little_to_host (&dev->config0); - else - pci_config_type1_little_to_host (&dev->config1); - - /* Parse bus, dev, function from directory name. */ - { - unformat_input_t input; - - unformat_init_string (&input, (char *) dev_dir_name, - vec_len (dev_dir_name)); - - if (!unformat (&input, "/sys/bus/pci/devices/%U", - unformat_vlib_pci_addr, &dev->bus_address)) - abort (); - - unformat_free (&input); - - } - - - pdev.config_fd = fd; - pdev.dev_dir_name = dev_dir_name; - - hash_set (pm->pci_dev_index_by_pci_addr, dev->bus_address.as_u32, - dev - pm->pci_devs); - - vec_reset_length (f); - f = format (f, "%v/vpd%c", dev_dir_name, 0); - fd = open ((char *) f, O_RDONLY); - if (fd >= 0) - { - while (1) - { - u8 tag[3]; - u8 *data = 0; - int len; - - if (read (fd, &tag, 3) != 3) - break; - - if (tag[0] != 0x82 && tag[0] != 0x90 && tag[0] != 0x91) - break; - - len = (tag[2] << 8) | tag[1]; - vec_validate (data, len); - - if (read (fd, data, len) != len) - { - vec_free (data); - break; - } - if (tag[0] == 0x82) - dev->product_name = data; - else if (tag[0] == 0x90) - dev->vpd_r = data; - else if (tag[0] == 0x91) - dev->vpd_w = data; - - data = 0; - } - close (fd); - } - - dev->numa_node = -1; - vec_reset_length (f); - f = format (f, "%v/numa_node%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "%u", &dev->numa_node); - - vec_reset_length (f); - f = format (f, "%v/class%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); - dev->device_class = tmp >> 8; - - vec_reset_length (f); - f = format (f, "%v/vendor%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); - dev->vendor_id = tmp; - - vec_reset_length (f); - f = format (f, "%v/device%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); - dev->device_id = tmp; - - error = init_device (vm, dev, &pdev); - - vec_reset_length (f); - f = format (f, "%v/driver%c", dev_dir_name, 0); - dev->driver_name = vlib_sysfs_link_to_name ((char *) f); - -done: - vec_free (f); - return error; -} - -clib_error_t * -linux_pci_init (vlib_main_t * vm) -{ - vlib_pci_main_t *pm = &pci_main; - clib_error_t *error; - - pm->vlib_main = vm; - - if ((error = vlib_call_init_function (vm, unix_input_init))) - return error; - - ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32)); - pm->pci_dev_index_by_pci_addr = hash_create (0, sizeof (uword)); - - error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm, - /* scan_dirs */ 0); - - /* Complain and continue. might not be root, etc. */ - if (error) - clib_error_report (error); - - return error; -} - -VLIB_INIT_FUNCTION (linux_pci_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index 36f8109e..f8d5d8f9 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -18,6 +18,7 @@ #include #include +#include #include static u8 * diff --git a/src/vlib/unix/physmem.c b/src/vlib/unix/physmem.c deleted file mode 100644 index d5d5d6c8..00000000 --- a/src/vlib/unix/physmem.c +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - * physmem.c: Unix physical memory - * - * Copyright (c) 2008 Eliot Dresselhaus - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#ifndef __NR_memfd_create -#if defined __x86_64__ -#define __NR_memfd_create 319 -#elif defined __arm__ -#define __NR_memfd_create 385 -#elif defined __aarch64__ -#define __NR_memfd_create 279 -#else -#error "__NR_memfd_create unknown for this architecture" -#endif -#endif - -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} - -#ifndef F_LINUX_SPECIFIC_BASE -#define F_LINUX_SPECIFIC_BASE 1024 -#endif -#define MFD_ALLOW_SEALING 0x0002U -#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) - -#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -#define F_SEAL_GROW 0x0004 /* prevent file from growing */ -#define F_SEAL_WRITE 0x0008 /* prevent writes */ - -static void * -unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, - uword n_bytes, uword alignment) -{ - vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); - uword lo_offset, hi_offset; - uword *to_free = 0; - - if (pr->heap == 0) - return 0; - - /* IO memory is always at least cache aligned. */ - alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES); - - while (1) - { - mheap_get_aligned (pr->heap, n_bytes, - /* align */ alignment, - /* align offset */ 0, - &lo_offset); - - /* Allocation failed? */ - if (lo_offset == ~0) - break; - - if (pr->flags & VLIB_PHYSMEM_F_FAKE) - break; - - /* Make sure allocation does not span DMA physical chunk boundary. */ - hi_offset = lo_offset + n_bytes - 1; - - if ((lo_offset >> pr->log2_page_size) == - (hi_offset >> pr->log2_page_size)) - break; - - /* Allocation would span chunk boundary, queue it to be freed as soon as - we find suitable chunk. */ - vec_add1 (to_free, lo_offset); - } - - if (to_free != 0) - { - uword i; - for (i = 0; i < vec_len (to_free); i++) - mheap_put (pr->heap, to_free[i]); - vec_free (to_free); - } - - return lo_offset != ~0 ? pr->heap + lo_offset : 0; -} - -static void -unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) -{ - vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); - /* Return object to region's heap. */ - mheap_put (pr->heap, x - pr->heap); -} - -static u64 -get_page_paddr (int fd, uword addr) -{ - int pagesize = sysconf (_SC_PAGESIZE); - u64 seek, pagemap = 0; - - seek = ((u64) addr / pagesize) * sizeof (u64); - if (lseek (fd, seek, SEEK_SET) != seek) - { - clib_unix_warning ("lseek to 0x%llx", seek); - return 0; - } - if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) - { - clib_unix_warning ("read ptbits"); - return 0; - } - if ((pagemap & (1ULL << 63)) == 0) - return 0; - - pagemap &= pow2_mask (55); - - return pagemap * pagesize; -} - -static clib_error_t * -unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, - u8 numa_node, u32 flags, - vlib_physmem_region_index_t * idx) -{ - vlib_physmem_main_t *vpm = &vm->physmem_main; - vlib_physmem_region_t *pr; - clib_error_t *error = 0; - int pagemap_fd = -1; - u8 *mount_dir = 0; - u8 *filename = 0; - struct stat st; - int old_mpol; - int mmap_flags; - struct bitmask *old_mask = numa_allocate_nodemask (); - - if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0) - return clib_error_return (0, "not allowed"); - - pool_get (vpm->regions, pr); - - if ((pr - vpm->regions) >= 256) - { - error = clib_error_return (0, "maximum number of regions reached"); - goto error; - } - - pr->index = pr - vpm->regions; - pr->fd = -1; - pr->flags = flags; - - if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) - == -1) - { - error = clib_error_return_unix (0, "get_mempolicy"); - goto error; - } - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) - { - error = clib_error_return_unix (0, "open '/proc/self/pagemap'"); - goto error; - } - - mount_dir = format (0, "%s/physmem_region%d%c", - vlib_unix_get_runtime_dir (), pr->index, 0); - filename = format (0, "%s/mem%c", mount_dir, 0); - - unlink ((char *) mount_dir); - - error = vlib_unix_recursive_mkdir ((char *) mount_dir); - if (error) - goto error; - - if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) - { - error = clib_error_return_unix (0, "mount hugetlb directory '%s'", - mount_dir); - goto error; - } - - if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) - { - error = clib_error_return_unix (0, "open"); - goto error; - } - - mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED; - } - else - { - if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - error = - clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); - goto error; - } - mmap_flags = MAP_SHARED; - } - - if (fstat (pr->fd, &st)) - { - error = clib_error_return_unix (0, "fstat"); - goto error; - } - - pr->log2_page_size = min_log2 (st.st_blksize); - pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1; - size = pr->n_pages * (1 << pr->log2_page_size); - - if ((ftruncate (pr->fd, size)) == -1) - { - error = clib_error_return_unix (0, "ftruncate length: %d", size); - goto error; - } - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = vlib_sysfs_prealloc_hugepages (numa_node, - 1 << (pr->log2_page_size - 10), - pr->n_pages); - if (error) - goto error; - } - - numa_set_preferred (numa_node); - - pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); - - if (pr->mem == MAP_FAILED) - { - pr->mem = 0; - error = clib_error_return_unix (0, "mmap"); - goto error; - } - - if (set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) - { - error = clib_error_return_unix (0, "set_mempolicy"); - goto error; - } - - pr->size = pr->n_pages << pr->log2_page_size; - pr->page_mask = (1 << pr->log2_page_size) - 1; - pr->numa_node = numa_node; - pr->name = format (0, "%s", name); - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - int i; - for (i = 0; i < pr->n_pages; i++) - { - void *ptr = pr->mem + (i << pr->log2_page_size); - int node; - move_pages (0, 1, &ptr, 0, &node, 0); - if (numa_node != node) - { - clib_warning - ("physmem page for region \'%s\' allocated on the wrong" - " numa node (requested %u actual %u)", pr->name, - pr->numa_node, node, i); - break; - } - } - } - - if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) - { - pr->heap = mheap_alloc_with_flags (pr->mem, pr->size, - /* Don't want mheap mmap/munmap with IO memory. */ - MHEAP_FLAG_DISABLE_VM | - MHEAP_FLAG_THREAD_SAFE); - fformat (stdout, "%U", format_mheap, pr->heap, /* verbose */ 1); - } - - if (flags & VLIB_PHYSMEM_F_HAVE_BUFFERS) - { - vlib_buffer_add_mem_range (vm, pointer_to_uword (pr->mem), pr->size); - } - - *idx = pr->index; - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - int i; - for (i = 0; i < pr->n_pages; i++) - { - uword vaddr = - pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size); - u64 page_paddr = get_page_paddr (pagemap_fd, vaddr); - vec_add1 (pr->page_table, page_paddr); - } - } - - goto done; - -error: - if (pr->fd > -1) - close (pr->fd); - - if (pr->mem) - munmap (pr->mem, size); - - memset (pr, 0, sizeof (*pr)); - pool_put (vpm->regions, pr); - -done: - if (mount_dir) - { - umount2 ((char *) mount_dir, MNT_DETACH); - rmdir ((char *) mount_dir); - vec_free (mount_dir); - } - numa_free_cpumask (old_mask); - vec_free (filename); - if (pagemap_fd > -1) - close (pagemap_fd); - return error; -} - -static void -unix_physmem_region_free (vlib_main_t * vm, vlib_physmem_region_index_t idx) -{ - vlib_physmem_main_t *vpm = &vm->physmem_main; - vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx); - - if (pr->fd > 0) - close (pr->fd); - munmap (pr->mem, pr->size); - vec_free (pr->name); - pool_put (vpm->regions, pr); -} - -clib_error_t * -unix_physmem_init (vlib_main_t * vm) -{ - clib_error_t *error = 0; - - /* Avoid multiple calls. */ - if (vm->os_physmem_alloc_aligned) - return error; - - vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned; - vm->os_physmem_free = unix_physmem_free; - vm->os_physmem_region_alloc = unix_physmem_region_alloc; - vm->os_physmem_region_free = unix_physmem_region_free; - - return error; -} - -static clib_error_t * -show_physmem (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vlib_physmem_main_t *vpm = &vm->physmem_main; - vlib_physmem_region_t *pr; - - /* *INDENT-OFF* */ - pool_foreach (pr, vpm->regions, ( - { - vlib_cli_output (vm, "index %u name '%s' page-size %uKB num-pages %d " - "numa-node %u fd %d\n", - pr->index, pr->name, (1 << (pr->log2_page_size -10)), - pr->n_pages, pr->numa_node, pr->fd); - if (pr->heap) - vlib_cli_output (vm, " %U", format_mheap, pr->heap, /* verbose */ 1); - else - vlib_cli_output (vm, " no heap\n"); - })); - /* *INDENT-ON* */ - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_physmem_command, static) = { - .path = "show physmem", - .short_help = "Show physical memory allocation", - .function = show_physmem, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h index b5a33427..1b0d8b9d 100644 --- a/src/vlib/unix/unix.h +++ b/src/vlib/unix/unix.h @@ -217,23 +217,6 @@ extern u8 **vlib_thread_stacks; /* utils */ -clib_error_t *vlib_sysfs_write (char *file_name, char *fmt, ...); - -clib_error_t *vlib_sysfs_read (char *file_name, char *fmt, ...); - -u8 *vlib_sysfs_link_to_name (char *link); - -clib_error_t *vlib_sysfs_set_nr_hugepages (unsigned int numa_node, - int page_size, int nr); -clib_error_t *vlib_sysfs_get_nr_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_free_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_prealloc_hugepages (unsigned int numa_node, - int page_size, int nr); - clib_error_t *foreach_directory_file (char *dir_name, clib_error_t * (*f) (void *arg, u8 * path_name, diff --git a/src/vlib/unix/util.c b/src/vlib/unix/util.c index 0e252aca..5472751e 100644 --- a/src/vlib/unix/util.c +++ b/src/vlib/unix/util.c @@ -98,225 +98,6 @@ foreach_directory_file (char *dir_name, return error; } -clib_error_t * -vlib_sysfs_write (char *file_name, char *fmt, ...) -{ - u8 *s; - int fd; - clib_error_t *error = 0; - - fd = open (file_name, O_WRONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - va_list va; - va_start (va, fmt); - s = va_format (0, fmt, &va); - va_end (va); - - if (write (fd, s, vec_len (s)) < 0) - error = clib_error_return_unix (0, "write `%s'", file_name); - - vec_free (s); - close (fd); - return error; -} - -clib_error_t * -vlib_sysfs_read (char *file_name, char *fmt, ...) -{ - unformat_input_t input; - u8 *s = 0; - int fd; - ssize_t sz; - uword result; - - fd = open (file_name, O_RDONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - vec_validate (s, 4095); - - sz = read (fd, s, vec_len (s)); - if (sz < 0) - { - close (fd); - vec_free (s); - return clib_error_return_unix (0, "read `%s'", file_name); - } - - _vec_len (s) = sz; - unformat_init_vector (&input, s); - - va_list va; - va_start (va, fmt); - result = va_unformat (&input, fmt, &va); - va_end (va); - - vec_free (s); - close (fd); - - if (result == 0) - return clib_error_return (0, "unformat error"); - - return 0; -} - -u8 * -vlib_sysfs_link_to_name (char *link) -{ - char *p, buffer[64]; - unformat_input_t in; - u8 *s = 0; - int r; - - r = readlink (link, buffer, sizeof (buffer) - 1); - - if (r < 0) - return 0; - - buffer[r] = 0; - p = strrchr (buffer, '/'); - - if (!p) - return 0; - - unformat_init_string (&in, p + 1, strlen (p + 1)); - if (unformat (&in, "%s", &s) != 1) - clib_unix_warning ("no string?"); - unformat_free (&in); - - return s; -} - -clib_error_t * -vlib_sysfs_set_nr_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); - vlib_sysfs_write ((char *) p, "%d", nr); - -done: - vec_free (p); - return error; -} - - -static clib_error_t * -vlib_sysfs_get_xxx_hugepages (char *type, unsigned int numa_node, - int page_size, int *val) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, - type, 0); - error = vlib_sysfs_read ((char *) p, "%d", val); - -done: - vec_free (p); - return error; -} - -clib_error_t * -vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_nr_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, int page_size, - int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_prealloc_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - int n, needed; - error = vlib_sysfs_get_free_hugepages (numa_node, page_size, &n); - if (error) - return error; - needed = nr - n; - if (needed <= 0) - return 0; - - error = vlib_sysfs_get_nr_hugepages (numa_node, page_size, &n); - if (error) - return error; - clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", - needed, page_size, numa_node); - return vlib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); -} - clib_error_t * vlib_unix_recursive_mkdir (char *path) { diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index ea52878d..e7e69214 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -26,6 +26,7 @@ #include #include +#include #include #include -- cgit 1.2.3-korg From 56dd5438b04b869065d8e901c315496bb6777455 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 8 Sep 2017 19:52:02 +0200 Subject: move unix_file_* code to vppinfra This will allow us to use this code in client libraries without vlib. Change-Id: I8557b752496841ba588aa36b6082cbe2cd1867fe Signed-off-by: Damjan Marion --- src/plugins/memif/memif.c | 36 ++++----- src/plugins/memif/private.h | 38 +++++----- src/plugins/memif/socket.c | 56 +++++++------- src/vlib/linux/pci.c | 13 ++-- src/vlib/unix/cli.c | 85 +++++++++++---------- src/vlib/unix/input.c | 13 ++-- src/vlib/unix/main.c | 1 + src/vlib/unix/mc_socket.c | 69 ++++++++--------- src/vlib/unix/mc_socket.h | 2 +- src/vlib/unix/unix.h | 76 +------------------ src/vlibapi/api_common.h | 2 +- src/vlibsocket/api.h | 12 +-- src/vlibsocket/sockclnt_vlib.c | 12 +-- src/vlibsocket/socksvr_vlib.c | 92 +++++++++++----------- src/vnet/devices/af_packet/af_packet.c | 12 +-- src/vnet/devices/af_packet/af_packet.h | 2 +- src/vnet/devices/netmap/netmap.c | 14 ++-- src/vnet/devices/netmap/netmap.h | 2 +- src/vnet/devices/virtio/vhost-user.c | 62 +++++++-------- src/vnet/devices/virtio/vhost-user.h | 2 +- src/vnet/ip/punt.c | 8 +- src/vnet/ip/punt.h | 2 +- src/vnet/unix/tapcli.c | 19 ++--- src/vnet/unix/tuntap.c | 10 +-- src/vppinfra.am | 1 + src/vppinfra/file.h | 134 +++++++++++++++++++++++++++++++++ 26 files changed, 423 insertions(+), 352 deletions(-) create mode 100644 src/vppinfra/file.h (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 4c387b92..8fec409a 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -52,10 +52,10 @@ memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) static void memif_queue_intfd_close (memif_queue_t * mq) { - if (mq->int_unix_file_index != ~0) + if (mq->int_clib_file_index != ~0) { - memif_file_del_by_index (mq->int_unix_file_index); - mq->int_unix_file_index = ~0; + memif_file_del_by_index (mq->int_clib_file_index); + mq->int_clib_file_index = ~0; mq->int_fd = -1; } else if (mq->int_fd > -1) @@ -94,13 +94,13 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) vnet_hw_interface_set_flags (vnm, mif->hw_if_index, 0); /* close connection socket */ - if (mif->conn_unix_file_index != ~0) + if (mif->conn_clib_file_index != ~0) { memif_socket_file_t *msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index); hash_unset (msf->dev_instance_by_fd, mif->conn_fd); - memif_file_del_by_index (mif->conn_unix_file_index); - mif->conn_unix_file_index = ~0; + memif_file_del_by_index (mif->conn_clib_file_index); + mif->conn_clib_file_index = ~0; } else if (mif->conn_fd > -1) close (mif->conn_fd); @@ -145,7 +145,7 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) } static clib_error_t * -memif_int_fd_read_ready (unix_file_t * uf) +memif_int_fd_read_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; vnet_main_t *vnm = vnet_get_main (); @@ -173,7 +173,7 @@ clib_error_t * memif_connect (memif_if_t * mif) { vnet_main_t *vnm = vnet_get_main (); - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; memif_region_t *mr; int i; @@ -219,7 +219,7 @@ memif_connect (memif_if_t * mif) { template.file_descriptor = mq->int_fd; template.private_data = (mif->dev_instance << 16) | (i & 0xFFFF); - memif_file_add (&mq->int_unix_file_index, &template); + memif_file_add (&mq->int_clib_file_index, &template); } vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0); rv = vnet_hw_interface_set_rx_mode (vnm, mif->hw_if_index, i, @@ -330,7 +330,7 @@ memif_init_regions_and_queues (memif_if_t * mif) memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) return clib_error_return_unix (0, "eventfd[tx queue %u]", i); - mq->int_unix_file_index = ~0; + mq->int_clib_file_index = ~0; mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i); mq->log2_ring_size = mif->cfg.log2_ring_size; mq->region = 0; @@ -346,7 +346,7 @@ memif_init_regions_and_queues (memif_if_t * mif) memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); if ((mq->int_fd = eventfd (0, EFD_NONBLOCK)) < 0) return clib_error_return_unix (0, "eventfd[rx queue %u]", i); - mq->int_unix_file_index = ~0; + mq->int_clib_file_index = ~0; mq->ring = memif_get_ring (mif, MEMIF_RING_M2S, i); mq->log2_ring_size = mif->cfg.log2_ring_size; mq->region = 0; @@ -432,7 +432,7 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) (sockfd, (struct sockaddr *) &sun, sizeof (struct sockaddr_un)) == 0) { - unix_file_t t = { 0 }; + clib_file_t t = { 0 }; mif->conn_fd = sockfd; t.read_function = memif_slave_conn_fd_read_ready; @@ -440,7 +440,7 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) t.error_function = memif_slave_conn_fd_error; t.file_descriptor = mif->conn_fd; t.private_data = mif->dev_instance; - memif_file_add (&mif->conn_unix_file_index, &t); + memif_file_add (&mif->conn_clib_file_index, &t); hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); mif->flags |= MEMIF_IF_FLAG_CONNECTING; @@ -507,7 +507,7 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif) if (msf->is_listener) { uword *x; - memif_file_del_by_index (msf->unix_file_index); + memif_file_del_by_index (msf->clib_file_index); vec_foreach (x, msf->pending_file_indices) { memif_file_del_by_index (*x); @@ -639,7 +639,7 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->socket_file_index = msf - mm->socket_files; mif->id = args->id; mif->sw_if_index = mif->hw_if_index = mif->per_interface_next_index = ~0; - mif->conn_unix_file_index = ~0; + mif->conn_clib_file_index = ~0; mif->conn_fd = -1; mif->mode = args->mode; if (args->secret) @@ -737,12 +737,12 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) goto error; } - msf->unix_file_index = ~0; - unix_file_t template = { 0 }; + msf->clib_file_index = ~0; + clib_file_t template = { 0 }; template.read_function = memif_conn_fd_accept_ready; template.file_descriptor = msf->fd; template.private_data = mif->socket_file_index; - memif_file_add (&msf->unix_file_index, &template); + memif_file_add (&msf->clib_file_index, &template); } msf->ref_cnt++; diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index b5f2f8ff..912ec59a 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -41,34 +41,34 @@ #if MEMIF_DEBUG == 1 #define memif_file_add(a, b) do { \ ASSERT (*a == ~0); \ - *a = unix_file_add (&unix_main, b); \ - clib_warning ("unix_file_add fd %d private_data %u idx %u", \ + *a = clib_file_add (&file_main, b); \ + clib_warning ("clib_file_add fd %d private_data %u idx %u", \ (b)->file_descriptor, (b)->private_data, *a); \ } while (0) #define memif_file_del(a) do { \ - clib_warning ("unix_file_del idx %u",a - unix_main.file_pool); \ - unix_file_del (&unix_main, a); \ + clib_warning ("clib_file_del idx %u",a - file_main.file_pool); \ + clib_file_del (&file_main, a); \ } while (0) #define memif_file_del_by_index(a) do { \ - clib_warning ("unix_file_del idx %u", a); \ - unix_file_del_by_index (&unix_main, a); \ + clib_warning ("clib_file_del idx %u", a); \ + clib_file_del_by_index (&file_main, a); \ } while (0) #else #define memif_file_add(a, b) do { \ ASSERT (*a == ~0); \ - *a = unix_file_add (&unix_main, b); \ + *a = clib_file_add (&file_main, b); \ } while (0) -#define memif_file_del(a) unix_file_del(&unix_main, a) -#define memif_file_del_by_index(a) unix_file_del_by_index(&unix_main, a) +#define memif_file_del(a) clib_file_del(&file_main, a) +#define memif_file_del_by_index(a) clib_file_del_by_index(&file_main, a) #endif typedef struct { u8 *filename; int fd; - uword unix_file_index; + uword clib_file_index; uword *pending_file_indices; int ref_cnt; int is_listener; @@ -106,7 +106,7 @@ typedef struct /* interrupts */ int int_fd; - uword int_unix_file_index; + uword int_clib_file_index; u64 int_count; } memif_queue_t; @@ -140,7 +140,7 @@ typedef struct /* socket connection */ uword socket_file_index; int conn_fd; - uword conn_unix_file_index; + uword conn_clib_file_index; memif_msg_fifo_elt_t *msg_queue; u8 *secret; @@ -241,13 +241,13 @@ clib_error_t *memif_connect (memif_if_t * mif); void memif_disconnect (memif_if_t * mif, clib_error_t * err); /* socket.c */ -clib_error_t *memif_conn_fd_accept_ready (unix_file_t * uf); -clib_error_t *memif_master_conn_fd_read_ready (unix_file_t * uf); -clib_error_t *memif_slave_conn_fd_read_ready (unix_file_t * uf); -clib_error_t *memif_master_conn_fd_write_ready (unix_file_t * uf); -clib_error_t *memif_slave_conn_fd_write_ready (unix_file_t * uf); -clib_error_t *memif_master_conn_fd_error (unix_file_t * uf); -clib_error_t *memif_slave_conn_fd_error (unix_file_t * uf); +clib_error_t *memif_conn_fd_accept_ready (clib_file_t * uf); +clib_error_t *memif_master_conn_fd_read_ready (clib_file_t * uf); +clib_error_t *memif_slave_conn_fd_read_ready (clib_file_t * uf); +clib_error_t *memif_master_conn_fd_write_ready (clib_file_t * uf); +clib_error_t *memif_slave_conn_fd_write_ready (clib_file_t * uf); +clib_error_t *memif_master_conn_fd_error (clib_file_t * uf); +clib_error_t *memif_slave_conn_fd_error (clib_file_t * uf); clib_error_t *memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err); u8 *format_memif_device_name (u8 * s, va_list * args); diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c index 79ae07be..1abc0f11 100644 --- a/src/plugins/memif/socket.c +++ b/src/plugins/memif/socket.c @@ -246,7 +246,7 @@ memif_msg_receive_hello (memif_if_t * mif, memif_msg_t * msg) static clib_error_t * memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, - unix_file_t * uf) + clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = @@ -258,7 +258,7 @@ memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, if (i->version != MEMIF_VERSION) { - memif_file_del_by_index (uf - unix_main.file_pool); + memif_file_del_by_index (uf - file_main.file_pool); return clib_error_return (0, "unsupported version"); } @@ -291,7 +291,7 @@ memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, } mif->conn_fd = uf->file_descriptor; - mif->conn_unix_file_index = uf - unix_main.file_pool; + mif->conn_clib_file_index = uf - file_main.file_pool; hash_set (msf->dev_instance_by_fd, mif->conn_fd, mif->dev_instance); mif->remote_name = memif_str2vec (i->name, sizeof (i->name)); *mifp = mif; @@ -316,7 +316,7 @@ memif_msg_receive_init (memif_if_t ** mifp, memif_msg_t * msg, error: tmp.conn_fd = uf->file_descriptor; memif_msg_send_disconnect (&tmp, err); - memif_file_del_by_index (uf - unix_main.file_pool); + memif_file_del_by_index (uf - file_main.file_pool); return err; } @@ -377,7 +377,7 @@ memif_msg_receive_add_ring (memif_if_t * mif, memif_msg_t * msg, int fd) } mq->int_fd = fd; - mq->int_unix_file_index = ~0; + mq->int_clib_file_index = ~0; mq->log2_ring_size = ar->log2_ring_size; mq->region = ar->region; mq->offset = ar->offset; @@ -422,7 +422,7 @@ memif_msg_receive_disconnect (memif_if_t * mif, memif_msg_t * msg) } static clib_error_t * -memif_msg_receive (memif_if_t ** mifp, unix_file_t * uf) +memif_msg_receive (memif_if_t ** mifp, clib_file_t * uf) { char ctl[CMSG_SPACE (sizeof (int)) + CMSG_SPACE (sizeof (struct ucred))] = { 0 }; @@ -544,20 +544,21 @@ memif_msg_receive (memif_if_t ** mifp, unix_file_t * uf) return err; } - if (clib_fifo_elts (mif->msg_queue) && mif->conn_unix_file_index != ~0) - unix_file_set_data_available_to_write (mif->conn_unix_file_index, 1); + if (clib_fifo_elts (mif->msg_queue) && mif->conn_clib_file_index != ~0) + clib_file_set_data_available_to_write (&file_main, + mif->conn_clib_file_index, 1); return 0; } clib_error_t * -memif_master_conn_fd_read_ready (unix_file_t * uf) +memif_master_conn_fd_read_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = pool_elt_at_index (mm->socket_files, uf->private_data); uword *p; memif_if_t *mif = 0; - uword conn_unix_file_index = ~0; + uword conn_clib_file_index = ~0; clib_error_t *err = 0; p = hash_get (msf->dev_instance_by_fd, uf->file_descriptor); @@ -570,13 +571,13 @@ memif_master_conn_fd_read_ready (unix_file_t * uf) /* This is new connection, remove index from pending vector */ int i; vec_foreach_index (i, msf->pending_file_indices) - if (msf->pending_file_indices[i] == uf - unix_main.file_pool) + if (msf->pending_file_indices[i] == uf - file_main.file_pool) { - conn_unix_file_index = msf->pending_file_indices[i]; + conn_clib_file_index = msf->pending_file_indices[i]; vec_del1 (msf->pending_file_indices, i); break; } - ASSERT (conn_unix_file_index != ~0); + ASSERT (conn_clib_file_index != ~0); } err = memif_msg_receive (&mif, uf); if (err) @@ -588,7 +589,7 @@ memif_master_conn_fd_read_ready (unix_file_t * uf) } clib_error_t * -memif_slave_conn_fd_read_ready (unix_file_t * uf) +memif_slave_conn_fd_read_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; clib_error_t *err; @@ -603,17 +604,18 @@ memif_slave_conn_fd_read_ready (unix_file_t * uf) } static clib_error_t * -memif_conn_fd_write_ready (unix_file_t * uf, memif_if_t * mif) +memif_conn_fd_write_ready (clib_file_t * uf, memif_if_t * mif) { memif_msg_fifo_elt_t *e; clib_fifo_sub2 (mif->msg_queue, e); - unix_file_set_data_available_to_write (mif->conn_unix_file_index, 0); + clib_file_set_data_available_to_write (&file_main, + mif->conn_clib_file_index, 0); memif_msg_send (mif->conn_fd, &e->msg, e->fd); return 0; } clib_error_t * -memif_master_conn_fd_write_ready (unix_file_t * uf) +memif_master_conn_fd_write_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = @@ -630,7 +632,7 @@ memif_master_conn_fd_write_ready (unix_file_t * uf) } clib_error_t * -memif_slave_conn_fd_write_ready (unix_file_t * uf) +memif_slave_conn_fd_write_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); @@ -638,7 +640,7 @@ memif_slave_conn_fd_write_ready (unix_file_t * uf) } clib_error_t * -memif_slave_conn_fd_error (unix_file_t * uf) +memif_slave_conn_fd_error (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_if_t *mif = vec_elt_at_index (mm->interfaces, uf->private_data); @@ -652,7 +654,7 @@ memif_slave_conn_fd_error (unix_file_t * uf) } clib_error_t * -memif_master_conn_fd_error (unix_file_t * uf) +memif_master_conn_fd_error (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = @@ -674,7 +676,7 @@ memif_master_conn_fd_error (unix_file_t * uf) { int i; vec_foreach_index (i, msf->pending_file_indices) - if (msf->pending_file_indices[i] == uf - unix_main.file_pool) + if (msf->pending_file_indices[i] == uf - file_main.file_pool) { vec_del1 (msf->pending_file_indices, i); memif_file_del (uf); @@ -689,7 +691,7 @@ memif_master_conn_fd_error (unix_file_t * uf) clib_error_t * -memif_conn_fd_accept_ready (unix_file_t * uf) +memif_conn_fd_accept_ready (clib_file_t * uf) { memif_main_t *mm = &memif_main; memif_socket_file_t *msf = @@ -697,8 +699,8 @@ memif_conn_fd_accept_ready (unix_file_t * uf) int addr_len; struct sockaddr_un client; int conn_fd; - unix_file_t template = { 0 }; - uword unix_file_index = ~0; + clib_file_t template = { 0 }; + uword clib_file_index = ~0; clib_error_t *err; @@ -715,16 +717,16 @@ memif_conn_fd_accept_ready (unix_file_t * uf) template.file_descriptor = conn_fd; template.private_data = uf->private_data; - memif_file_add (&unix_file_index, &template); + memif_file_add (&clib_file_index, &template); err = memif_msg_enq_hello (conn_fd); if (err) { clib_error_report (err); - memif_file_del_by_index (unix_file_index); + memif_file_del_by_index (clib_file_index); } else - vec_add1 (msf->pending_file_indices, unix_file_index); + vec_add1 (msf->pending_file_indices, clib_file_index); return 0; } diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index cd2affdc..4ce19190 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -68,8 +68,8 @@ typedef struct /* Minor device for uio device. */ u32 uio_minor; - /* Index given by unix_file_add. */ - u32 unix_file_index; + /* Index given by clib_file_add. */ + u32 clib_file_index; } linux_pci_device_t; @@ -237,7 +237,7 @@ scan_uio_dir (void *arg, u8 * path_name, u8 * file_name) } static clib_error_t * -linux_pci_uio_read_ready (unix_file_t * uf) +linux_pci_uio_read_ready (clib_file_t * uf) { vlib_pci_main_t *pm = &pci_main; vlib_pci_device_t *d; @@ -257,7 +257,7 @@ linux_pci_uio_read_ready (unix_file_t * uf) } static clib_error_t * -linux_pci_uio_error_ready (unix_file_t * uf) +linux_pci_uio_error_ready (clib_file_t * uf) { u32 error_index = (u32) uf->private_data; @@ -294,15 +294,14 @@ add_device (vlib_pci_device_t * dev, linux_pci_device_t * pdev) } { - unix_file_t template = { 0 }; - unix_main_t *um = &unix_main; + clib_file_t template = { 0 }; template.read_function = linux_pci_uio_read_ready; template.file_descriptor = l->uio_fd; template.error_function = linux_pci_uio_error_ready; template.private_data = dev - pm->pci_devs; - l->unix_file_index = unix_file_add (um, &template); + l->clib_file_index = clib_file_add (&file_main, &template); } } diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 068a4e16..39368823 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -141,7 +141,7 @@ typedef struct typedef struct { /** The file index held by unix.c */ - u32 unix_file_index; + u32 clib_file_index; /** Vector of output pending write to file descriptor. */ u8 *output_vector; @@ -502,11 +502,11 @@ unix_cli_match_action (unix_cli_parse_actions_t * a, * are available to be sent. */ static void -unix_cli_add_pending_output (unix_file_t * uf, +unix_cli_add_pending_output (clib_file_t * uf, unix_cli_file_t * cf, u8 * buffer, uword buffer_bytes) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vec_add (cf->output_vector, buffer, buffer_bytes); if (vec_len (cf->output_vector) > 0) @@ -514,7 +514,7 @@ unix_cli_add_pending_output (unix_file_t * uf, int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); } } @@ -522,10 +522,10 @@ unix_cli_add_pending_output (unix_file_t * uf, * that no more bytes are available to be sent. */ static void -unix_cli_del_pending_output (unix_file_t * uf, +unix_cli_del_pending_output (clib_file_t * uf, unix_cli_file_t * cf, uword n_bytes) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vec_delete (cf->output_vector, n_bytes, 0); if (vec_len (cf->output_vector) <= 0) @@ -533,7 +533,7 @@ unix_cli_del_pending_output (unix_file_t * uf, int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); } } @@ -580,7 +580,7 @@ unix_vlib_findchr (u8 chr, u8 * str, word len) */ static void unix_vlib_cli_output_raw (unix_cli_file_t * cf, - unix_file_t * uf, u8 * buffer, uword buffer_bytes) + clib_file_t * uf, u8 * buffer, uword buffer_bytes) { int n = 0; @@ -610,7 +610,7 @@ unix_vlib_cli_output_raw (unix_cli_file_t * cf, */ static void unix_vlib_cli_output_cooked (unix_cli_file_t * cf, - unix_file_t * uf, + clib_file_t * uf, u8 * buffer, uword buffer_bytes) { word end = 0, start = 0; @@ -646,7 +646,7 @@ unix_vlib_cli_output_cooked (unix_cli_file_t * cf, /** @brief Output the CLI prompt */ static void -unix_cli_cli_prompt (unix_cli_file_t * cf, unix_file_t * uf) +unix_cli_cli_prompt (unix_cli_file_t * cf, clib_file_t * uf) { unix_cli_main_t *cm = &unix_cli_main; @@ -655,7 +655,7 @@ unix_cli_cli_prompt (unix_cli_file_t * cf, unix_file_t * uf) /** @brief Output a pager prompt and show number of buffered lines */ static void -unix_cli_pager_prompt (unix_cli_file_t * cf, unix_file_t * uf) +unix_cli_pager_prompt (unix_cli_file_t * cf, clib_file_t * uf) { u8 *prompt; u32 h; @@ -678,7 +678,7 @@ unix_cli_pager_prompt (unix_cli_file_t * cf, unix_file_t * uf) /** @brief Output a pager "skipping" message */ static void -unix_cli_pager_message (unix_cli_file_t * cf, unix_file_t * uf, +unix_cli_pager_message (unix_cli_file_t * cf, clib_file_t * uf, char *message, char *postfix) { u8 *prompt; @@ -694,7 +694,7 @@ unix_cli_pager_message (unix_cli_file_t * cf, unix_file_t * uf, /** @brief Erase the printed pager prompt */ static void -unix_cli_pager_prompt_erase (unix_cli_file_t * cf, unix_file_t * uf) +unix_cli_pager_prompt_erase (unix_cli_file_t * cf, clib_file_t * uf) { if (cf->ansi_capable) { @@ -716,7 +716,7 @@ unix_cli_pager_prompt_erase (unix_cli_file_t * cf, unix_file_t * uf) /** @brief Uses an ANSI escape sequence to move the cursor */ static void -unix_cli_ansi_cursor (unix_cli_file_t * cf, unix_file_t * uf, u16 x, u16 y) +unix_cli_ansi_cursor (unix_cli_file_t * cf, clib_file_t * uf, u16 x, u16 y) { u8 *str; @@ -732,7 +732,7 @@ unix_cli_ansi_cursor (unix_cli_file_t * cf, unix_file_t * uf, u16 x, u16 y) * @param uf Unix file of the CLI session. */ static void -unix_cli_pager_redraw (unix_cli_file_t * cf, unix_file_t * uf) +unix_cli_pager_redraw (unix_cli_file_t * cf, clib_file_t * uf) { unix_cli_pager_index_t *pi = NULL; u8 *line = NULL; @@ -930,12 +930,13 @@ static void unix_vlib_cli_output (uword cli_file_index, u8 * buffer, uword buffer_bytes) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf; - unix_file_t *uf; + clib_file_t *uf; cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); - uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); if (cf->no_pager || um->cli_pager_buffer_limit == 0 || cf->height == 0) { @@ -1037,7 +1038,8 @@ static void unix_cli_file_welcome (unix_cli_main_t * cm, unix_cli_file_t * cf) { unix_main_t *um = &unix_main; - unix_file_t *uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + clib_file_main_t *fm = &file_main; + clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); unix_cli_banner_t *banner; int i, len; @@ -1104,7 +1106,7 @@ unix_cli_file_welcome_timer (any arg, f64 delay) static i32 unix_cli_process_telnet (unix_main_t * um, unix_cli_file_t * cf, - unix_file_t * uf, u8 * input_vector, uword len) + clib_file_t * uf, u8 * input_vector, uword len) { /* Input_vector starts at IAC byte. * See if we have a complete message; if not, return -1 so we wait for more. @@ -1229,7 +1231,7 @@ static int unix_cli_line_process_one (unix_cli_main_t * cm, unix_main_t * um, unix_cli_file_t * cf, - unix_file_t * uf, + clib_file_t * uf, u8 input, unix_cli_parse_action_t action) { u8 *prev; @@ -2059,10 +2061,10 @@ unix_cli_line_process_one (unix_cli_main_t * cm, /** @brief Process input bytes on a stream to provide line editing and * command history in the CLI. */ static int -unix_cli_line_edit (unix_cli_main_t * cm, - unix_main_t * um, unix_cli_file_t * cf) +unix_cli_line_edit (unix_cli_main_t * cm, unix_main_t * um, + clib_file_main_t * fm, unix_cli_file_t * cf) { - unix_file_t *uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); int i; for (i = 0; i < vec_len (cf->input_vector); i++) @@ -2139,7 +2141,8 @@ static void unix_cli_process_input (unix_cli_main_t * cm, uword cli_file_index) { unix_main_t *um = &unix_main; - unix_file_t *uf; + clib_file_main_t *fm = &file_main; + clib_file_t *uf; unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); unformat_input_t input; int vlib_parse_eval (u8 *); @@ -2157,7 +2160,7 @@ more: else { /* Line edit, echo, etc. */ - if (unix_cli_line_edit (cm, um, cf)) + if (unix_cli_line_edit (cm, um, fm, cf)) /* want more input */ return; } @@ -2196,7 +2199,7 @@ more: /* Re-fetch pointer since pool may have moved. */ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); - uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); done: /* reset vector; we'll re-use it later */ @@ -2240,12 +2243,13 @@ static void unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_file_t *cf; - unix_file_t *uf; + clib_file_t *uf; int i; cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); - uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); /* Quit/EOF on stdin means quit program. */ if (uf->file_descriptor == UNIX_CLI_STDIN_FD) @@ -2259,7 +2263,7 @@ unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index) vec_free (cf->command_history); - unix_file_del (um, uf); + clib_file_del (fm, uf); unix_cli_file_free (cf); pool_put (cm->cli_file_pool, cf); @@ -2311,7 +2315,7 @@ done: /** Called when a CLI session file descriptor can be written to without * blocking. */ static clib_error_t * -unix_cli_write_ready (unix_file_t * uf) +unix_cli_write_ready (clib_file_t * uf) { unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf; @@ -2334,7 +2338,7 @@ unix_cli_write_ready (unix_file_t * uf) /** Called when a CLI session file descriptor has data to be read. */ static clib_error_t * -unix_cli_read_ready (unix_file_t * uf) +unix_cli_read_ready (clib_file_t * uf) { unix_main_t *um = &unix_main; unix_cli_main_t *cm = &unix_cli_main; @@ -2380,8 +2384,9 @@ static u32 unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_file_t *cf; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; vlib_main_t *vm = um->vlib_main; vlib_node_t *n; @@ -2424,7 +2429,7 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) template.private_data = cf - cm->cli_file_pool; cf->process_node_index = n->index; - cf->unix_file_index = unix_file_add (um, &template); + cf->clib_file_index = clib_file_add (fm, &template); cf->output_vector = 0; cf->input_vector = 0; @@ -2439,9 +2444,10 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) /** Telnet listening socket has a new connection. */ static clib_error_t * -unix_cli_listen_read_ready (unix_file_t * uf) +unix_cli_listen_read_ready (clib_file_t * uf) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_main_t *cm = &unix_cli_main; clib_socket_t *s = &um->cli_listen_socket; clib_socket_t client; @@ -2497,7 +2503,7 @@ unix_cli_listen_read_ready (unix_file_t * uf) /* Setup the pager */ cf->no_pager = um->cli_no_pager; - uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); /* Send the telnet options */ unix_vlib_cli_output_raw (cf, uf, charmode_option, @@ -2517,11 +2523,11 @@ unix_cli_listen_read_ready (unix_file_t * uf) static void unix_cli_resize_interrupt (int signum) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool, cm->stdin_cli_file_index); - unix_file_t *uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); struct winsize ws; (void) signum; @@ -2548,6 +2554,7 @@ static clib_error_t * unix_cli_config (vlib_main_t * vm, unformat_input_t * input) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; unix_cli_main_t *cm = &unix_cli_main; int flags; clib_error_t *error = 0; @@ -2640,7 +2647,7 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) if (s->config && s->config[0] != 0) { /* CLI listen. */ - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; /* mkdir of file socketu, only under /run */ if (strncmp (s->config, "/run", 4) == 0) @@ -2667,7 +2674,7 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) template.read_function = unix_cli_listen_read_ready; template.file_descriptor = s->fd; - unix_file_add (um, &template); + clib_file_add (fm, &template); } /* Set CLI prompt. */ diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 515dae94..ecd31791 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -62,9 +62,9 @@ typedef struct static linux_epoll_main_t linux_epoll_main; static void -linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) +linux_epoll_file_update (clib_file_t * f, unix_file_update_type_t update_type) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; linux_epoll_main_t *em = &linux_epoll_main; struct epoll_event e; int op; @@ -76,7 +76,7 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) e.events |= EPOLLOUT; if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED) e.events |= EPOLLET; - e.data.u32 = f - um->file_pool; + e.data.u32 = f - fm->file_pool; op = -1; @@ -108,6 +108,7 @@ linux_epoll_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; linux_epoll_main_t *em = &linux_epoll_main; struct epoll_event *e; int n_fds_ready; @@ -186,7 +187,7 @@ linux_epoll_input (vlib_main_t * vm, for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++) { u32 i = e->data.u32; - unix_file_t *f = pool_elt_at_index (um->file_pool, i); + clib_file_t *f = pool_elt_at_index (fm->file_pool, i); clib_error_t *errors[4]; int n_errors = 0; @@ -236,7 +237,7 @@ clib_error_t * linux_epoll_input_init (vlib_main_t * vm) { linux_epoll_main_t *em = &linux_epoll_main; - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; /* Allocate some events. */ vec_resize (em->epoll_events, VLIB_FRAME_SIZE); @@ -245,7 +246,7 @@ linux_epoll_input_init (vlib_main_t * vm) if (em->epoll_fd < 0) return clib_error_return_unix (0, "epoll_create"); - um->file_update = linux_epoll_file_update; + fm->file_update = linux_epoll_file_update; return 0; } diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index 3a92b2e3..ed0631ec 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -60,6 +60,7 @@ char *vlib_default_runtime_dir __attribute__ ((weak)); char *vlib_default_runtime_dir = "vlib"; unix_main_t unix_main; +clib_file_main_t file_main; static clib_error_t * unix_main_init (vlib_main_t * vm) diff --git a/src/vlib/unix/mc_socket.c b/src/vlib/unix/mc_socket.c index 9c12ad3b..3f1cd99d 100644 --- a/src/vlib/unix/mc_socket.c +++ b/src/vlib/unix/mc_socket.c @@ -243,7 +243,7 @@ recvmsg_helper (mc_socket_main_t * msm, } static clib_error_t * -mastership_socket_read_ready (unix_file_t * uf) +mastership_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -263,7 +263,7 @@ mastership_socket_read_ready (unix_file_t * uf) } static clib_error_t * -to_relay_socket_read_ready (unix_file_t * uf) +to_relay_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -297,7 +297,7 @@ to_relay_socket_read_ready (unix_file_t * uf) } static clib_error_t * -from_relay_socket_read_ready (unix_file_t * uf) +from_relay_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -317,7 +317,7 @@ from_relay_socket_read_ready (unix_file_t * uf) } static clib_error_t * -join_socket_read_ready (unix_file_t * uf) +join_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -354,7 +354,7 @@ join_socket_read_ready (unix_file_t * uf) } static clib_error_t * -ack_socket_read_ready (unix_file_t * uf) +ack_socket_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; @@ -371,10 +371,11 @@ ack_socket_read_ready (unix_file_t * uf) static void catchup_cleanup (mc_socket_main_t * msm, - mc_socket_catchup_t * c, unix_main_t * um, unix_file_t * uf) + mc_socket_catchup_t * c, clib_file_main_t * um, + clib_file_t * uf) { hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor); - unix_file_del (um, uf); + clib_file_del (um, uf); vec_free (c->input_vector); vec_free (c->output_vector); pool_put (msm->catchups, c); @@ -390,9 +391,9 @@ find_catchup_from_file_descriptor (mc_socket_main_t * msm, } static clib_error_t * -catchup_socket_read_ready (unix_file_t * uf, int is_server) +catchup_socket_read_ready (clib_file_t * uf, int is_server) { - unix_main_t *um = &unix_main; + clib_file_main_t *um = &file_main; mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_main_t *mcm = &msm->mc_main; mc_socket_catchup_t *c = @@ -440,13 +441,13 @@ catchup_socket_read_ready (unix_file_t * uf, int is_server) } static clib_error_t * -catchup_server_read_ready (unix_file_t * uf) +catchup_server_read_ready (clib_file_t * uf) { return catchup_socket_read_ready (uf, /* is_server */ 1); } static clib_error_t * -catchup_client_read_ready (unix_file_t * uf) +catchup_client_read_ready (clib_file_t * uf) { if (MC_EVENT_LOGGING) { @@ -460,9 +461,9 @@ catchup_client_read_ready (unix_file_t * uf) } static clib_error_t * -catchup_socket_write_ready (unix_file_t * uf, int is_server) +catchup_socket_write_ready (clib_file_t * uf, int is_server) { - unix_main_t *um = &unix_main; + clib_file_main_t *um = &file_main; mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor); @@ -522,7 +523,7 @@ catchup_socket_write_ready (unix_file_t * uf, int is_server) if (!is_server) { uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; - unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); + file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); /* Send EOF to other side. */ shutdown (uf->file_descriptor, SHUT_WR); return error; @@ -537,21 +538,21 @@ catchup_socket_write_ready (unix_file_t * uf, int is_server) } static clib_error_t * -catchup_server_write_ready (unix_file_t * uf) +catchup_server_write_ready (clib_file_t * uf) { return catchup_socket_write_ready (uf, /* is_server */ 1); } static clib_error_t * -catchup_client_write_ready (unix_file_t * uf) +catchup_client_write_ready (clib_file_t * uf) { return catchup_socket_write_ready (uf, /* is_server */ 0); } static clib_error_t * -catchup_socket_error_ready (unix_file_t * uf) +catchup_socket_error_ready (clib_file_t * uf) { - unix_main_t *um = &unix_main; + clib_file_main_t *um = &file_main; mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor); @@ -560,13 +561,13 @@ catchup_socket_error_ready (unix_file_t * uf) } static clib_error_t * -catchup_listen_read_ready (unix_file_t * uf) +catchup_listen_read_ready (clib_file_t * uf) { mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data; struct sockaddr_in client_addr; int client_len; mc_socket_catchup_t *c; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; pool_get (msm->catchups, c); memset (c, 0, sizeof (c[0])); @@ -616,7 +617,7 @@ catchup_listen_read_ready (unix_file_t * uf) template.error_function = catchup_socket_error_ready; template.file_descriptor = c->socket; template.private_data = pointer_to_uword (msm); - c->unix_file_index = unix_file_add (&unix_main, &template); + c->clib_file_index = clib_file_add (&file_main, &template); hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups); @@ -772,45 +773,45 @@ socket_setup (mc_socket_main_t * msm) /* epoll setup for multicast mastership socket */ { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = mastership_socket_read_ready; template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); /* epoll setup for multicast to_relay socket */ template.read_function = to_relay_socket_read_ready; template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); /* epoll setup for multicast from_relay socket */ template.read_function = from_relay_socket_read_ready; template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); template.read_function = join_socket_read_ready; template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_JOIN].socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); /* epoll setup for ack rx socket */ template.read_function = ack_socket_read_ready; template.file_descriptor = msm->ack_socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); /* epoll setup for TCP catchup server */ template.read_function = catchup_listen_read_ready; template.file_descriptor = msm->catchup_server_socket; template.private_data = (uword) msm; - unix_file_add (&unix_main, &template); + clib_file_add (&file_main, &template); } return 0; @@ -820,8 +821,8 @@ static void * catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes, u8 * set_output_vector) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, - c->unix_file_index); + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, + c->clib_file_index); u8 *result = 0; if (set_output_vector) @@ -833,7 +834,7 @@ catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes, int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); + file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); } return result; } @@ -847,7 +848,7 @@ catchup_request_fun (void *transport_main, vlib_main_t *vm = mcm->vlib_main; mc_socket_catchup_t *c; struct sockaddr_in addr; - unix_main_t *um = &unix_main; + clib_file_main_t *um = &file_main; int one = 1; pool_get (msm->catchups, c); @@ -895,14 +896,14 @@ catchup_request_fun (void *transport_main, } { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = catchup_client_read_ready; template.write_function = catchup_client_write_ready; template.error_function = catchup_socket_error_ready; template.file_descriptor = c->socket; template.private_data = (uword) msm; - c->unix_file_index = unix_file_add (um, &template); + c->clib_file_index = clib_file_add (um, &template); hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups); diff --git a/src/vlib/unix/mc_socket.h b/src/vlib/unix/mc_socket.h index 273c9ad4..3686c824 100644 --- a/src/vlib/unix/mc_socket.h +++ b/src/vlib/unix/mc_socket.h @@ -31,7 +31,7 @@ typedef struct typedef struct { int socket; - u32 unix_file_index; + u32 clib_file_index; u8 *input_vector; u8 *output_vector; diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h index 1b0d8b9d..4c8566b7 100644 --- a/src/vlib/unix/unix.h +++ b/src/vlib/unix/unix.h @@ -40,42 +40,16 @@ #ifndef included_unix_unix_h #define included_unix_unix_h +#include #include #include - -struct unix_file; -typedef clib_error_t *(unix_file_function_t) (struct unix_file * f); - -typedef struct unix_file -{ - /* Unix file descriptor from open/socket. */ - u32 file_descriptor; - - u32 flags; -#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0) -#define UNIX_FILE_EVENT_EDGE_TRIGGERED (1 << 1) - - /* Data available for function's use. */ - uword private_data; - - /* Functions to be called when read/write data becomes ready. */ - unix_file_function_t *read_function, *write_function, *error_function; -} unix_file_t; - typedef struct { f64 time; clib_error_t *error; } unix_error_history_t; -typedef enum -{ - UNIX_FILE_UPDATE_ADD, - UNIX_FILE_UPDATE_MODIFY, - UNIX_FILE_UPDATE_DELETE, -} unix_file_update_type_t; - typedef struct { /* Back pointer to main structure. */ @@ -86,15 +60,9 @@ typedef struct #define UNIX_FLAG_INTERACTIVE (1 << 0) #define UNIX_FLAG_NODAEMON (1 << 1) - /* Pool of files to poll for input/output. */ - unix_file_t *file_pool; - /* CLI listen socket. */ clib_socket_t cli_listen_socket; - void (*file_update) (unix_file_t * file, - unix_file_update_type_t update_type); - /* Circular buffer of last unix errors. */ unix_error_history_t error_history[128]; u32 error_history_index; @@ -138,47 +106,7 @@ typedef struct /* Global main structure. */ extern unix_main_t unix_main; - -always_inline uword -unix_file_add (unix_main_t * um, unix_file_t * template) -{ - unix_file_t *f; - pool_get (um->file_pool, f); - f[0] = template[0]; - um->file_update (f, UNIX_FILE_UPDATE_ADD); - return f - um->file_pool; -} - -always_inline void -unix_file_del (unix_main_t * um, unix_file_t * f) -{ - um->file_update (f, UNIX_FILE_UPDATE_DELETE); - close (f->file_descriptor); - f->file_descriptor = ~0; - pool_put (um->file_pool, f); -} - -always_inline void -unix_file_del_by_index (unix_main_t * um, uword index) -{ - unix_file_t *uf; - uf = pool_elt_at_index (um->file_pool, index); - unix_file_del (um, uf); -} - -always_inline uword -unix_file_set_data_available_to_write (u32 unix_file_index, - uword is_available) -{ - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, unix_file_index); - uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); - if ((was_available != 0) != (is_available != 0)) - { - uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; - unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); - } - return was_available != 0; -} +extern clib_file_main_t file_main; always_inline void unix_save_error (unix_main_t * um, clib_error_t * error) diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h index b84d269e..651566ae 100644 --- a/src/vlibapi/api_common.h +++ b/src/vlibapi/api_common.h @@ -51,7 +51,7 @@ typedef struct vl_api_registration_ unix_shared_memory_queue_t *vl_input_queue; /* socket server and client */ - u32 unix_file_index; + u32 clib_file_index; i8 *unprocessed_input; u32 unprocessed_msg_length; u8 *output_vector; diff --git a/src/vlibsocket/api.h b/src/vlibsocket/api.h index 79c0d08a..d7b7055d 100644 --- a/src/vlibsocket/api.h +++ b/src/vlibsocket/api.h @@ -41,27 +41,27 @@ typedef struct * or to a shared-memory connection. */ vl_api_registration_t *current_rp; - unix_file_t *current_uf; + clib_file_t *current_uf; /* One input buffer, shared across all sockets */ i8 *input_buffer; } socket_main_t; extern socket_main_t socket_main; -void socksvr_add_pending_output (struct unix_file *uf, +void socksvr_add_pending_output (clib_file_t * uf, struct vl_api_registration_ *cf, u8 * buffer, uword buffer_bytes); #define SOCKSVR_DEFAULT_PORT 32741 /* whatever */ void vl_free_socket_registration_index (u32 pool_index); -void vl_socket_process_msg (struct unix_file *uf, +void vl_socket_process_msg (clib_file_t * uf, struct vl_api_registration_ *rp, i8 * input_v); -clib_error_t *vl_socket_read_ready (struct unix_file *uf); -void vl_socket_add_pending_output (struct unix_file *uf, +clib_error_t *vl_socket_read_ready (clib_file_t * uf); +void vl_socket_add_pending_output (clib_file_t * uf, struct vl_api_registration_ *rp, u8 * buffer, uword buffer_bytes); -clib_error_t *vl_socket_write_ready (struct unix_file *uf); +clib_error_t *vl_socket_write_ready (clib_file_t * uf); void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem); void vl_socket_api_send_with_data (vl_api_registration_t * rp, u8 * elem, u8 * data_vector); diff --git a/src/vlibsocket/sockclnt_vlib.c b/src/vlibsocket/sockclnt_vlib.c index e16adfeb..760ad944 100644 --- a/src/vlibsocket/sockclnt_vlib.c +++ b/src/vlibsocket/sockclnt_vlib.c @@ -60,11 +60,11 @@ vl_api_sockclnt_create_reply_t_handler (vl_api_sockclnt_create_reply_t * mp) static void vl_api_sockclnt_delete_reply_t_handler (vl_api_sockclnt_delete_reply_t * mp) { - unix_main_t *um = &unix_main; - unix_file_t *uf = socket_main.current_uf; + clib_file_main_t *fm = &file_main; + clib_file_t *uf = socket_main.current_uf; vl_api_registration_t *rp = socket_main.current_rp; - unix_file_del (um, uf); + clib_file_del (fm, uf); vl_free_socket_registration_index (rp->vl_api_registration_pool_index); } @@ -72,8 +72,8 @@ u32 sockclnt_open_index (char *client_name, char *hostname, int port) { vl_api_registration_t *rp; - unix_main_t *um = &unix_main; - unix_file_t template = { 0 }; + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; int sockfd; int one = 1; int rv; @@ -129,7 +129,7 @@ sockclnt_open_index (char *client_name, char *hostname, int port) template.file_descriptor = sockfd; template.private_data = rp - socket_main.registration_pool; - rp->unix_file_index = unix_file_add (um, &template); + rp->clib_file_index = clib_file_add (fm, &template); rp->name = format (0, "%s:%d", hostname, port); mp = vl_msg_api_alloc (sizeof (*mp)); diff --git a/src/vlibsocket/socksvr_vlib.c b/src/vlibsocket/socksvr_vlib.c index dc8c63eb..31b33df5 100644 --- a/src/vlibsocket/socksvr_vlib.c +++ b/src/vlibsocket/socksvr_vlib.c @@ -53,8 +53,8 @@ dump_socket_clients (vlib_main_t * vm, api_main_t * am) { vl_api_registration_t *reg; socket_main_t *sm = &socket_main; - unix_main_t *um = &unix_main; - unix_file_t *f; + clib_file_main_t *fm = &file_main; + clib_file_t *f; /* * Must have at least one active client, not counting the @@ -69,7 +69,7 @@ dump_socket_clients (vlib_main_t * vm, api_main_t * am) pool_foreach (reg, sm->registration_pool, ({ if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) { - f = pool_elt_at_index (um->file_pool, reg->unix_file_index); + f = pool_elt_at_index (fm->file_pool, reg->clib_file_index); vlib_cli_output (vm, "%16s %8d", reg->name, f->file_descriptor); } @@ -99,13 +99,13 @@ vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) nbytes += msg_length; tmp = clib_host_to_net_u32 (nbytes); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, elem, msg_length); @@ -139,18 +139,18 @@ vl_socket_api_send_with_data (vl_api_registration_t * rp, /* Length in network byte order */ tmp = clib_host_to_net_u32 (nbytes); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, elem, msg_length); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, data_vector, vec_len (data_vector)); @@ -181,13 +181,13 @@ vl_socket_api_send_with_length_internal (vl_api_registration_t * rp, /* Length in network byte order */ tmp = clib_host_to_net_u32 (nbytes); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->unix_file_index - + unix_main.file_pool, + vl_socket_add_pending_output (rp->clib_file_index + + file_main.file_pool, rp->vl_api_registration_pool_index + socket_main.registration_pool, elem, msg_length); @@ -231,7 +231,7 @@ vl_free_socket_registration_index (u32 pool_index) } static inline void -socket_process_msg (unix_file_t * uf, vl_api_registration_t * rp, +socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp, i8 * input_v) { u8 *the_msg = (u8 *) (input_v + sizeof (u32)); @@ -243,9 +243,9 @@ socket_process_msg (unix_file_t * uf, vl_api_registration_t * rp, } clib_error_t * -vl_socket_read_ready (unix_file_t * uf) +vl_socket_read_ready (clib_file_t * uf) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vl_api_registration_t *rp; int n; i8 *msg_buffer = 0; @@ -259,7 +259,7 @@ vl_socket_read_ready (unix_file_t * uf) if (n <= 0 && errno != EAGAIN) { - unix_file_del (um, uf); + clib_file_del (fm, uf); if (!pool_is_free (socket_main.registration_pool, rp)) { @@ -352,11 +352,11 @@ turf_it: } void -vl_socket_add_pending_output (unix_file_t * uf, +vl_socket_add_pending_output (clib_file_t * uf, vl_api_registration_t * rp, u8 * buffer, uword buffer_bytes) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vec_add (rp->output_vector, buffer, buffer_bytes); if (vec_len (rp->output_vector) > 0) @@ -364,15 +364,15 @@ vl_socket_add_pending_output (unix_file_t * uf, int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); } } static void -socket_del_pending_output (unix_file_t * uf, +socket_del_pending_output (clib_file_t * uf, vl_api_registration_t * rp, uword n_bytes) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vec_delete (rp->output_vector, n_bytes, 0); if (vec_len (rp->output_vector) <= 0) @@ -380,14 +380,14 @@ socket_del_pending_output (unix_file_t * uf, int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; if (!skip_update) - um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY); } } clib_error_t * -vl_socket_write_ready (unix_file_t * uf) +vl_socket_write_ready (clib_file_t * uf) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vl_api_registration_t *rp; int n; @@ -402,7 +402,7 @@ vl_socket_write_ready (unix_file_t * uf) #if DEBUG > 2 clib_warning ("write error, close the file...\n"); #endif - unix_file_del (um, uf); + clib_file_del (fm, uf); vl_free_socket_registration_index (rp - socket_main.registration_pool); return 0; @@ -415,23 +415,23 @@ vl_socket_write_ready (unix_file_t * uf) } clib_error_t * -vl_socket_error_ready (unix_file_t * uf) +vl_socket_error_ready (clib_file_t * uf) { vl_api_registration_t *rp; - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); - unix_file_del (um, uf); + clib_file_del (fm, uf); vl_free_socket_registration_index (rp - socket_main.registration_pool); return 0; } void -socksvr_file_add (unix_main_t * um, int fd) +socksvr_file_add (clib_file_main_t * fm, int fd) { vl_api_registration_t *rp; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; pool_get (socket_main.registration_pool, rp); memset (rp, 0, sizeof (*rp)); @@ -444,13 +444,13 @@ socksvr_file_add (unix_main_t * um, int fd) rp->registration_type = REGISTRATION_TYPE_SOCKET_SERVER; rp->vl_api_registration_pool_index = rp - socket_main.registration_pool; - rp->unix_file_index = unix_file_add (um, &template); + rp->clib_file_index = clib_file_add (fm, &template); } static clib_error_t * -socksvr_accept_ready (unix_file_t * uf) +socksvr_accept_ready (clib_file_t * uf) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; struct sockaddr_in client_addr; int client_fd; int client_len; @@ -468,12 +468,12 @@ socksvr_accept_ready (unix_file_t * uf) if (client_fd < 0) return clib_error_return_unix (0, "socksvr_accept_ready: accept"); - socksvr_file_add (um, client_fd); + socksvr_file_add (fm, client_fd); return 0; } static clib_error_t * -socksvr_bogus_write (unix_file_t * uf) +socksvr_bogus_write (clib_file_t * uf) { clib_warning ("why am I here?"); return 0; @@ -525,7 +525,7 @@ vl_api_sockclnt_delete_t_handler (vl_api_sockclnt_delete_t * mp) vl_msg_api_send (regp, (u8 *) rp); - unix_file_del (&unix_main, unix_main.file_pool + regp->unix_file_index); + clib_file_del (&file_main, file_main.file_pool + regp->clib_file_index); vl_free_socket_registration_index (mp->index); } @@ -542,8 +542,8 @@ _(SOCKCLNT_DELETE, sockclnt_delete) static clib_error_t * socksvr_api_init (vlib_main_t * vm) { - unix_main_t *um = &unix_main; - unix_file_t template = { 0 }; + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; int sockfd; int one = 1; int rv; @@ -625,14 +625,14 @@ socksvr_api_init (vlib_main_t * vm) template.file_descriptor = sockfd; template.private_data = rp - socket_main.registration_pool; - rp->unix_file_index = unix_file_add (um, &template); + rp->clib_file_index = clib_file_add (fm, &template); return 0; } static clib_error_t * socket_exit (vlib_main_t * vm) { - unix_main_t *um = &unix_main; + clib_file_main_t *fm = &file_main; vl_api_registration_t *rp; /* Defensive driving in case something wipes out early */ @@ -641,7 +641,7 @@ socket_exit (vlib_main_t * vm) u32 index; /* *INDENT-OFF* */ pool_foreach (rp, socket_main.registration_pool, ({ - unix_file_del (um, um->file_pool + rp->unix_file_index); + clib_file_del (fm, fm->file_pool + rp->clib_file_index); index = rp->vl_api_registration_pool_index; vl_free_socket_registration_index (index); })); diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index e7e69214..62bb228f 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -89,7 +89,7 @@ af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, } static clib_error_t * -af_packet_fd_read_ready (unix_file_t * uf) +af_packet_fd_read_ready (clib_file_t * uf) { af_packet_main_t *apm = &af_packet_main; vnet_main_t *vnm = vnet_get_main (); @@ -281,12 +281,12 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, clib_spinlock_init (&apif->lockp); { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = af_packet_fd_read_ready; template.file_descriptor = fd; template.private_data = if_index; template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED; - apif->unix_file_index = unix_file_add (&unix_main, &template); + apif->clib_file_index = clib_file_add (&file_main, &template); } /*use configured or generate random MAC address */ @@ -371,10 +371,10 @@ af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name) vnet_hw_interface_unassign_rx_thread (vnm, apif->hw_if_index, 0); /* clean up */ - if (apif->unix_file_index != ~0) + if (apif->clib_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + apif->unix_file_index); - apif->unix_file_index = ~0; + clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index); + apif->clib_file_index = ~0; } else close (apif->fd); diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index 194977f0..95c7e7cf 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -32,7 +32,7 @@ typedef struct u8 *tx_ring; u32 hw_if_index; u32 sw_if_index; - u32 unix_file_index; + u32 clib_file_index; u32 next_rx_frame; u32 next_tx_frame; diff --git a/src/vnet/devices/netmap/netmap.c b/src/vnet/devices/netmap/netmap.c index 09afc764..fc49ed62 100644 --- a/src/vnet/devices/netmap/netmap.c +++ b/src/vnet/devices/netmap/netmap.c @@ -36,7 +36,7 @@ netmap_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, } static clib_error_t * -netmap_fd_read_ready (unix_file_t * uf) +netmap_fd_read_ready (clib_file_t * uf) { vlib_main_t *vm = vlib_get_main (); netmap_main_t *nm = &netmap_main; @@ -54,10 +54,10 @@ netmap_fd_read_ready (unix_file_t * uf) static void close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) { - if (nif->unix_file_index != ~0) + if (nif->clib_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + nif->unix_file_index); - nif->unix_file_index = ~0; + clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index); + nif->clib_file_index = ~0; } else if (nif->fd > -1) close (nif->fd); @@ -137,7 +137,7 @@ netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, pool_get (nm->interfaces, nif); nif->if_index = nif - nm->interfaces; nif->fd = fd; - nif->unix_file_index = ~0; + nif->clib_file_index = ~0; vec_validate (req, 0); nif->req = req; @@ -188,11 +188,11 @@ netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, clib_spinlock_init (&nif->lockp); { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = netmap_fd_read_ready; template.file_descriptor = nif->fd; template.private_data = nif->if_index; - nif->unix_file_index = unix_file_add (&unix_main, &template); + nif->clib_file_index = clib_file_add (&file_main, &template); } /*use configured or generate random MAC address */ diff --git a/src/vnet/devices/netmap/netmap.h b/src/vnet/devices/netmap/netmap.h index e04f045d..04731890 100644 --- a/src/vnet/devices/netmap/netmap.h +++ b/src/vnet/devices/netmap/netmap.h @@ -50,7 +50,7 @@ typedef struct uword if_index; u32 hw_if_index; u32 sw_if_index; - u32 unix_file_index; + u32 clib_file_index; u32 per_interface_next_index; u8 is_admin_up; diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 5fe378cb..2af96ee7 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -89,7 +89,7 @@ #define UNIX_GET_FD(unixfd_idx) \ (unixfd_idx != ~0) ? \ - pool_elt_at_index (unix_main.file_pool, \ + pool_elt_at_index (file_main.file_pool, \ unixfd_idx)->file_descriptor : -1; #define foreach_virtio_trace_flags \ @@ -477,7 +477,7 @@ vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) } static clib_error_t * -vhost_user_callfd_read_ready (unix_file_t * uf) +vhost_user_callfd_read_ready (clib_file_t * uf) { __attribute__ ((unused)) int n; u8 buff[8]; @@ -488,7 +488,7 @@ vhost_user_callfd_read_ready (unix_file_t * uf) } static clib_error_t * -vhost_user_kickfd_read_ready (unix_file_t * uf) +vhost_user_kickfd_read_ready (clib_file_t * uf) { __attribute__ ((unused)) int n; u8 buff[8]; @@ -569,16 +569,16 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid) vhost_user_vring_t *vring = &vui->vrings[qid]; if (vring->kickfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vring->kickfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vring->kickfd_idx = ~0; } if (vring->callfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vring->callfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vring->callfd_idx = ~0; } if (vring->errfd != -1) @@ -597,10 +597,10 @@ vhost_user_if_disconnect (vhost_user_intf_t * vui) vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); - if (vui->unix_file_index != ~0) + if (vui->clib_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + vui->unix_file_index); - vui->unix_file_index = ~0; + clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index); + vui->clib_file_index = ~0; } vui->is_up = 0; @@ -654,7 +654,7 @@ vhost_user_log_dirty_pages (vhost_user_intf_t * vui, u64 addr, u64 len) } static clib_error_t * -vhost_user_socket_read (unix_file_t * uf) +vhost_user_socket_read (clib_file_t * uf) { int n, i; int fd, number_of_fds = 0; @@ -666,7 +666,7 @@ vhost_user_socket_read (unix_file_t * uf) vhost_user_intf_t *vui; struct cmsghdr *cmsg; u8 q; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; vnet_main_t *vnm = vnet_get_main (); vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data); @@ -927,9 +927,9 @@ vhost_user_socket_read (unix_file_t * uf) /* if there is old fd, delete and close it */ if (vui->vrings[q].callfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vui->vrings[q].callfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vui->vrings[q].callfd_idx = ~0; } @@ -945,7 +945,7 @@ vhost_user_socket_read (unix_file_t * uf) template.file_descriptor = fds[0]; template.private_data = ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q; - vui->vrings[q].callfd_idx = unix_file_add (&unix_main, &template); + vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template); } else vui->vrings[q].callfd_idx = ~0; @@ -959,9 +959,9 @@ vhost_user_socket_read (unix_file_t * uf) if (vui->vrings[q].kickfd_idx != ~0) { - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vui->vrings[q].kickfd_idx); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vui->vrings[q].kickfd_idx = ~0; } @@ -978,7 +978,7 @@ vhost_user_socket_read (unix_file_t * uf) template.private_data = (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) + q; - vui->vrings[q].kickfd_idx = unix_file_add (&unix_main, &template); + vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template); } else { @@ -1168,7 +1168,7 @@ close_socket: } static clib_error_t * -vhost_user_socket_error (unix_file_t * uf) +vhost_user_socket_error (clib_file_t * uf) { vlib_main_t *vm = vlib_get_main (); vhost_user_main_t *vum = &vhost_user_main; @@ -1184,11 +1184,11 @@ vhost_user_socket_error (unix_file_t * uf) } static clib_error_t * -vhost_user_socksvr_accept_ready (unix_file_t * uf) +vhost_user_socksvr_accept_ready (clib_file_t * uf) { int client_fd, client_len; struct sockaddr_un client; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui; @@ -1207,7 +1207,7 @@ vhost_user_socksvr_accept_ready (unix_file_t * uf) template.error_function = vhost_user_socket_error; template.file_descriptor = client_fd; template.private_data = vui - vhost_user_main.vhost_user_interfaces; - vui->unix_file_index = unix_file_add (&unix_main, &template); + vui->clib_file_index = clib_file_add (&file_main, &template); return 0; } @@ -2475,7 +2475,7 @@ vhost_user_process (vlib_main_t * vm, vhost_user_intf_t *vui; struct sockaddr_un sun; int sockfd; - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; f64 timeout = 3153600000.0 /* 100 years */ ; uword *event_data = 0; @@ -2496,7 +2496,7 @@ vhost_user_process (vlib_main_t * vm, pool_foreach (vui, vum->vhost_user_interfaces, { if (vui->unix_server_index == ~0) { //Nothing to do for server sockets - if (vui->unix_file_index == ~0) + if (vui->clib_file_index == ~0) { if ((sockfd < 0) && ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)) @@ -2534,7 +2534,7 @@ vhost_user_process (vlib_main_t * vm, template.file_descriptor = sockfd; template.private_data = vui - vhost_user_main.vhost_user_interfaces; - vui->unix_file_index = unix_file_add (&unix_main, &template); + vui->clib_file_index = clib_file_add (&file_main, &template); /* This sockfd is considered consumed */ sockfd = -1; @@ -2549,7 +2549,7 @@ vhost_user_process (vlib_main_t * vm, /* check if socket is alive */ int error = 0; socklen_t len = sizeof (error); - int fd = UNIX_GET_FD(vui->unix_file_index); + int fd = UNIX_GET_FD(vui->clib_file_index); int retval = getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len); @@ -2596,9 +2596,9 @@ vhost_user_term_if (vhost_user_intf_t * vui) if (vui->unix_server_index != ~0) { //Close server socket - unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, + clib_file_t *uf = pool_elt_at_index (file_main.file_pool, vui->unix_server_index); - unix_file_del (&unix_main, uf); + clib_file_del (&file_main, uf); vui->unix_server_index = ~0; unlink (vui->sock_filename); } @@ -2780,11 +2780,11 @@ vhost_user_vui_init (vnet_main_t * vnm, sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); if (server_sock_fd != -1) { - unix_file_t template = { 0 }; + clib_file_t template = { 0 }; template.read_function = vhost_user_socksvr_accept_ready; template.file_descriptor = server_sock_fd; template.private_data = vui - vum->vhost_user_interfaces; //hw index - vui->unix_server_index = unix_file_add (&unix_main, &template); + vui->unix_server_index = clib_file_add (&file_main, &template); } else { @@ -2797,7 +2797,7 @@ vhost_user_vui_init (vnet_main_t * vnm, vui->sock_errno = 0; vui->is_up = 0; vui->feature_mask = feature_mask; - vui->unix_file_index = ~0; + vui->clib_file_index = ~0; vui->log_base_addr = 0; vui->if_index = vui - vum->vhost_user_interfaces; mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename, diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index ae3b88e8..105b92b7 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -223,7 +223,7 @@ typedef struct u32 is_up; u32 admin_up; u32 unix_server_index; - u32 unix_file_index; + u32 clib_file_index; char sock_filename[256]; int sock_errno; uword if_index; diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index 67c54c3c..1ea32fa0 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -550,7 +550,7 @@ VLIB_REGISTER_NODE (punt_socket_rx_node, static) = format_punt_trace,}; static clib_error_t * -punt_socket_read_ready (unix_file_t * uf) +punt_socket_read_ready (clib_file_t * uf) { vlib_main_t *vm = vlib_get_main (); punt_main_t *pm = &punt_main; @@ -790,11 +790,11 @@ punt_config (vlib_main_t * vm, unformat_input_t * input) } /* Register socket */ - unix_main_t *um = &unix_main; - unix_file_t template = { 0 }; + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; template.read_function = punt_socket_read_ready; template.file_descriptor = pm->socket_fd; - pm->unix_file_index = unix_file_add (um, &template); + pm->clib_file_index = clib_file_add (fm, &template); pm->is_configured = true; diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h index 0103249c..9defa881 100644 --- a/src/vnet/ip/punt.h +++ b/src/vnet/ip/punt.h @@ -72,7 +72,7 @@ typedef struct char sun_path[sizeof (struct sockaddr_un)]; punt_client_t *clients_by_dst_port4; punt_client_t *clients_by_dst_port6; - u32 unix_file_index; + u32 clib_file_index; bool is_configured; vlib_node_t *interface_output_node; u32 *ready_fds; diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 0fc62f6c..13154b3b 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -56,7 +56,7 @@ static void tapcli_nopunt_frame (vlib_main_t * vm, */ typedef struct { u32 unix_fd; - u32 unix_file_index; + u32 clib_file_index; u32 provision_fd; /** For counters */ u32 sw_if_index; @@ -137,8 +137,6 @@ typedef struct { vlib_main_t * vlib_main; /** convenience - vnet_main_t */ vnet_main_t * vnet_main; - /** convenience - unix_main_t */ - unix_main_t * unix_main; } tapcli_main_t; static tapcli_main_t tapcli_main; @@ -453,12 +451,12 @@ VLIB_REGISTER_NODE (tapcli_rx_node, static) = { /** * @brief Gets called when file descriptor is ready from epoll. * - * @param *uf - unix_file_t + * @param *uf - clib_file_t * * @return error - clib_error_t * */ -static clib_error_t * tapcli_read_ready (unix_file_t * uf) +static clib_error_t * tapcli_read_ready (clib_file_t * uf) { vlib_main_t * vm = vlib_get_main(); tapcli_main_t * tm = &tapcli_main; @@ -999,10 +997,10 @@ int vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t *ap) } { - unix_file_t template = {0}; + clib_file_t template = {0}; template.read_function = tapcli_read_ready; template.file_descriptor = dev_net_tun_fd; - ti->unix_file_index = unix_file_add (&unix_main, &template); + ti->clib_file_index = clib_file_add (&file_main, &template); ti->unix_fd = dev_net_tun_fd; ti->provision_fd = dev_tap_fd; clib_memcpy (&ti->ifr, &ifr, sizeof (ifr)); @@ -1079,9 +1077,9 @@ static int tapcli_tap_disconnect (tapcli_interface_t *ti) // bring interface down vnet_sw_interface_set_flags (vnm, sw_if_index, 0); - if (ti->unix_file_index != ~0) { - unix_file_del (&unix_main, unix_main.file_pool + ti->unix_file_index); - ti->unix_file_index = ~0; + if (ti->clib_file_index != ~0) { + clib_file_del (&file_main, file_main.file_pool + ti->clib_file_index); + ti->clib_file_index = ~0; } else close(ti->unix_fd); @@ -1455,7 +1453,6 @@ tapcli_init (vlib_main_t * vm) tm->vlib_main = vm; tm->vnet_main = vnet_get_main(); - tm->unix_main = &unix_main; tm->mtu_bytes = TAP_MTU_DEFAULT; tm->tapcli_interface_index_by_sw_if_index = hash_create (0, sizeof(uword)); tm->tapcli_interface_index_by_unix_fd = hash_create (0, sizeof (uword)); diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index 2c403679..9616feb2 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -104,7 +104,7 @@ typedef struct { mhash_t subif_mhash; /** Unix file index */ - u32 unix_file_index; + u32 clib_file_index; /** For the "normal" interface, if configured */ u32 hw_if_index, sw_if_index; @@ -388,11 +388,11 @@ VLIB_REGISTER_NODE (tuntap_rx_node,static) = { /** * @brief Gets called when file descriptor is ready from epoll. * - * @param *uf - unix_file_t + * @param *uf - clib_file_t * * @return error - clib_error_t */ -static clib_error_t * tuntap_read_ready (unix_file_t * uf) +static clib_error_t * tuntap_read_ready (clib_file_t * uf) { vlib_main_t * vm = vlib_get_main(); vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index); @@ -645,10 +645,10 @@ tuntap_config (vlib_main_t * vm, unformat_input_t * input) } { - unix_file_t template = {0}; + clib_file_t template = {0}; template.read_function = tuntap_read_ready; template.file_descriptor = tm->dev_net_tun_fd; - tm->unix_file_index = unix_file_add (&unix_main, &template); + tm->clib_file_index = clib_file_add (&file_main, &template); } done: diff --git a/src/vppinfra.am b/src/vppinfra.am index 8f01114c..a5769a0d 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -183,6 +183,7 @@ nobase_include_HEADERS = \ vppinfra/error.h \ vppinfra/error_bootstrap.h \ vppinfra/fifo.h \ + vppinfra/file.h \ vppinfra/format.h \ vppinfra/graph.h \ vppinfra/hash.h \ diff --git a/src/vppinfra/file.h b/src/vppinfra/file.h new file mode 100644 index 00000000..69facea9 --- /dev/null +++ b/src/vppinfra/file.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * file.h: unix file handling + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_clib_file_h +#define included_clib_file_h + +#include +#include + + +struct clib_file; +typedef clib_error_t *(clib_file_function_t) (struct clib_file * f); + +typedef struct clib_file +{ + /* Unix file descriptor from open/socket. */ + u32 file_descriptor; + + u32 flags; +#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0) +#define UNIX_FILE_EVENT_EDGE_TRIGGERED (1 << 1) + + /* Data available for function's use. */ + uword private_data; + + /* Functions to be called when read/write data becomes ready. */ + clib_file_function_t *read_function, *write_function, *error_function; +} clib_file_t; + +typedef enum +{ + UNIX_FILE_UPDATE_ADD, + UNIX_FILE_UPDATE_MODIFY, + UNIX_FILE_UPDATE_DELETE, +} unix_file_update_type_t; + +typedef struct +{ + /* Pool of files to poll for input/output. */ + clib_file_t *file_pool; + + void (*file_update) (clib_file_t * file, + unix_file_update_type_t update_type); + +} clib_file_main_t; + +always_inline uword +clib_file_add (clib_file_main_t * um, clib_file_t * template) +{ + clib_file_t *f; + pool_get (um->file_pool, f); + f[0] = template[0]; + um->file_update (f, UNIX_FILE_UPDATE_ADD); + return f - um->file_pool; +} + +always_inline void +clib_file_del (clib_file_main_t * um, clib_file_t * f) +{ + um->file_update (f, UNIX_FILE_UPDATE_DELETE); + close (f->file_descriptor); + f->file_descriptor = ~0; + pool_put (um->file_pool, f); +} + +always_inline void +clib_file_del_by_index (clib_file_main_t * um, uword index) +{ + clib_file_t *uf; + uf = pool_elt_at_index (um->file_pool, index); + clib_file_del (um, uf); +} + +always_inline uword +clib_file_set_data_available_to_write (clib_file_main_t * um, + u32 clib_file_index, + uword is_available) +{ + clib_file_t *uf = pool_elt_at_index (um->file_pool, clib_file_index); + uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + if ((was_available != 0) != (is_available != 0)) + { + uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } + return was_available != 0; +} + + +#endif /* included_clib_file_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 01914ce45729833cec88c65689de9a0336cd40cc Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 14 Sep 2017 19:04:50 +0200 Subject: vppinfra: add clib_mem_vm_ext_alloc function Change-Id: Iff33694fc42cc3bcc73cf1372339053a6365039c Signed-off-by: Damjan Marion --- src/plugins/dpdk/device/init.c | 6 +- src/plugins/memif/memif.c | 21 ++- src/vlib.am | 5 +- src/vlib/linux/pci.c | 25 ++-- src/vlib/linux/physmem.c | 192 ++++-------------------- src/vlib/linux/syscall.h | 58 -------- src/vlib/linux/sysfs.c | 250 ------------------------------- src/vlib/linux/sysfs.h | 44 ------ src/vlib/threads.c | 6 +- src/vlib/threads_cli.c | 6 +- src/vnet/devices/af_packet/af_packet.c | 4 +- src/vppinfra.am | 5 +- src/vppinfra/linux/mem.c | 260 +++++++++++++++++++++++++++++++++ src/vppinfra/linux/syscall.h | 56 +++++++ src/vppinfra/linux/sysfs.c | 250 +++++++++++++++++++++++++++++++ src/vppinfra/linux/sysfs.h | 46 ++++++ src/vppinfra/mem.h | 94 ++++++++++-- src/vppinfra/vm_linux_kernel.h | 78 ---------- src/vppinfra/vm_standalone.h | 74 ---------- src/vppinfra/vm_unix.h | 106 -------------- 20 files changed, 761 insertions(+), 825 deletions(-) delete mode 100644 src/vlib/linux/syscall.h delete mode 100644 src/vlib/linux/sysfs.c delete mode 100644 src/vlib/linux/sysfs.h create mode 100644 src/vppinfra/linux/mem.c create mode 100644 src/vppinfra/linux/syscall.h create mode 100644 src/vppinfra/linux/sysfs.c create mode 100644 src/vppinfra/linux/sysfs.h delete mode 100644 src/vppinfra/vm_linux_kernel.h delete mode 100644 src/vppinfra/vm_standalone.h delete mode 100644 src/vppinfra/vm_unix.h (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 95176fb8..ee61f94e 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -1040,7 +1040,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) mem = mem_by_socket[c]; page_size = 1024; - e = vlib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); + e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) use_1g = 0; @@ -1049,7 +1049,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) clib_error_free (e); page_size = 2; - e = vlib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); + e = clib_sysfs_get_free_hugepages(c, page_size * 1024, &pages_avail); if (e != 0 || pages_avail < 0 || page_size * pages_avail < mem) use_2m = 0; diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 8fec409a..6a609a57 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include @@ -267,6 +267,8 @@ memif_init_regions_and_queues (memif_if_t * mif) int i, j; u64 buffer_offset; memif_region_t *r; + clib_mem_vm_alloc_t alloc = { 0 }; + clib_error_t *err; vec_validate_aligned (mif->regions, 0, CLIB_CACHE_LINE_BYTES); r = vec_elt_at_index (mif->regions, 0); @@ -279,18 +281,15 @@ memif_init_regions_and_queues (memif_if_t * mif) mif->run.buffer_size * (1 << mif->run.log2_ring_size) * (mif->run.num_s2m_rings + mif->run.num_m2s_rings); - if ((r->fd = memfd_create ("memif region 0", MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (r->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - return clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); + alloc.name = "memif region"; + alloc.size = r->region_size; + alloc.flags = CLIB_MEM_VM_F_SHARED; - if ((ftruncate (r->fd, r->region_size)) == -1) - return clib_error_return_unix (0, "ftruncate"); + err = clib_mem_vm_ext_alloc (&alloc); + if (err) + return err; - if ((r->shm = mmap (NULL, r->region_size, PROT_READ | PROT_WRITE, - MAP_SHARED, r->fd, 0)) == MAP_FAILED) - return clib_error_return_unix (0, "mmap"); + r->fd = alloc.fd; for (i = 0; i < mif->run.num_s2m_rings; i++) { diff --git a/src/vlib.am b/src/vlib.am index 41d68690..067e4afc 100644 --- a/src/vlib.am +++ b/src/vlib.am @@ -13,7 +13,7 @@ lib_LTLIBRARIES += libvlib.la -libvlib_la_LIBADD = libvppinfra.la -ldl -lpthread -lnuma +libvlib_la_LIBADD = libvppinfra.la -ldl -lpthread libvlib_la_DEPENDENCIES = libvppinfra.la BUILT_SOURCES += vlib/config.h @@ -34,7 +34,6 @@ libvlib_la_SOURCES = \ vlib/init.c \ vlib/linux/pci.c \ vlib/linux/physmem.c \ - vlib/linux/sysfs.c \ vlib/main.c \ vlib/mc.c \ vlib/node.c \ @@ -60,8 +59,6 @@ nobase_include_HEADERS += \ vlib/global_funcs.h \ vlib/i2c.h \ vlib/init.h \ - vlib/linux/sysfs.h \ - vlib/linux/syscall.h \ vlib/main.h \ vlib/mc.h \ vlib/node_funcs.h \ diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index 4ce19190..790f168a 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -37,10 +37,11 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include #include #include -#include #include #include @@ -104,7 +105,7 @@ vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) format_vlib_pci_addr, &d->bus_address); s = format (s, "%v/driver%c", dev_dir_name, 0); - driver_name = vlib_sysfs_link_to_name ((char *) s); + driver_name = clib_sysfs_link_to_name ((char *) s); vec_reset_length (s); if (driver_name && @@ -183,32 +184,32 @@ vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name) vec_reset_length (s); s = format (s, "%v/driver/unbind%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); vec_reset_length (s); s = format (s, "%v/driver_override%c", dev_dir_name, 0); if (access ((char *) s, F_OK) == 0) { - vlib_sysfs_write ((char *) s, "%s", uio_driver_name); + clib_sysfs_write ((char *) s, "%s", uio_driver_name); clear_driver_override = 1; } else { vec_reset_length (s); s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, + clib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, d->device_id); } vec_reset_length (s); s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0); - vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); + clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address); vec_reset_length (s); if (clear_driver_override) { s = format (s, "%v/driver_override%c", dev_dir_name, 0); - vlib_sysfs_write ((char *) s, "%c", 0); + clib_sysfs_write ((char *) s, "%c", 0); vec_reset_length (s); } @@ -602,28 +603,28 @@ scan_device (void *arg, u8 * dev_dir_name, u8 * ignored) dev->numa_node = -1; vec_reset_length (f); f = format (f, "%v/numa_node%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "%u", &dev->numa_node); + clib_sysfs_read ((char *) f, "%u", &dev->numa_node); vec_reset_length (f); f = format (f, "%v/class%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); + clib_sysfs_read ((char *) f, "0x%x", &tmp); dev->device_class = tmp >> 8; vec_reset_length (f); f = format (f, "%v/vendor%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); + clib_sysfs_read ((char *) f, "0x%x", &tmp); dev->vendor_id = tmp; vec_reset_length (f); f = format (f, "%v/device%c", dev_dir_name, 0); - vlib_sysfs_read ((char *) f, "0x%x", &tmp); + clib_sysfs_read ((char *) f, "0x%x", &tmp); dev->device_id = tmp; error = init_device (vm, dev, &pdev); vec_reset_length (f); f = format (f, "%v/driver%c", dev_dir_name, 0); - dev->driver_name = vlib_sysfs_link_to_name ((char *) f); + dev->driver_name = clib_sysfs_link_to_name ((char *) f); done: vec_free (f); diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c index d8c5dc9b..3cc42a06 100644 --- a/src/vlib/linux/physmem.c +++ b/src/vlib/linux/physmem.c @@ -43,14 +43,12 @@ #include #include #include -#include -#include +#include +#include #include #include #include -#include -#include static void * unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx, @@ -111,31 +109,6 @@ unix_physmem_free (vlib_main_t * vm, vlib_physmem_region_index_t idx, void *x) mheap_put (pr->heap, x - pr->heap); } -static u64 -get_page_paddr (int fd, uword addr) -{ - int pagesize = sysconf (_SC_PAGESIZE); - u64 seek, pagemap = 0; - - seek = ((u64) addr / pagesize) * sizeof (u64); - if (lseek (fd, seek, SEEK_SET) != seek) - { - clib_unix_warning ("lseek to 0x%llx", seek); - return 0; - } - if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) - { - clib_unix_warning ("read ptbits"); - return 0; - } - if ((pagemap & (1ULL << 63)) == 0) - return 0; - - pagemap &= pow2_mask (55); - - return pagemap * pagesize; -} - static clib_error_t * unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, u8 numa_node, u32 flags, @@ -144,13 +117,8 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, vlib_physmem_main_t *vpm = &vm->physmem_main; vlib_physmem_region_t *pr; clib_error_t *error = 0; - int pagemap_fd = -1; - u8 *mount_dir = 0; - u8 *filename = 0; - struct stat st; - int old_mpol; - int mmap_flags; - struct bitmask *old_mask = numa_allocate_nodemask (); + clib_mem_vm_alloc_t alloc = { 0 }; + if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0) return clib_error_return (0, "not allowed"); @@ -163,113 +131,32 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, goto error; } - pr->index = pr - vpm->regions; - pr->fd = -1; - pr->flags = flags; - - if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0) - == -1) - { - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = clib_error_return_unix (0, "get_mempolicy"); - goto error; - } - else - old_mpol = -1; - } + alloc.name = name; + alloc.size = size; + alloc.numa_node = numa_node; + alloc.flags = CLIB_MEM_VM_F_SHARED; if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) { - if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) - { - error = clib_error_return_unix (0, "open '/proc/self/pagemap'"); - goto error; - } - - mount_dir = format (0, "%s/physmem_region%d%c", - vlib_unix_get_runtime_dir (), pr->index, 0); - filename = format (0, "%s/mem%c", mount_dir, 0); - - unlink ((char *) mount_dir); - - error = vlib_unix_recursive_mkdir ((char *) mount_dir); - if (error) - goto error; - - if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) - { - error = clib_error_return_unix (0, "mount hugetlb directory '%s'", - mount_dir); - goto error; - } - - if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) - { - error = clib_error_return_unix (0, "open"); - goto error; - } - - mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED; + alloc.flags |= CLIB_MEM_VM_F_HUGETLB; + alloc.flags |= CLIB_MEM_VM_F_HUGETLB_PREALLOC; + alloc.flags |= CLIB_MEM_VM_F_NUMA_FORCE; } else { - if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - error = - clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)"); - goto error; - } - mmap_flags = MAP_SHARED; - } - - if (fstat (pr->fd, &st)) - { - error = clib_error_return_unix (0, "fstat"); - goto error; - } - - pr->log2_page_size = min_log2 (st.st_blksize); - pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1; - size = pr->n_pages * (1 << pr->log2_page_size); - - if ((ftruncate (pr->fd, size)) == -1) - { - error = clib_error_return_unix (0, "ftruncate length: %d", size); - goto error; - } - - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - error = vlib_sysfs_prealloc_hugepages (numa_node, - 1 << (pr->log2_page_size - 10), - pr->n_pages); - if (error) - goto error; - } - - if (old_mpol != -1) - numa_set_preferred (numa_node); - - pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0); - - if (pr->mem == MAP_FAILED) - { - pr->mem = 0; - error = clib_error_return_unix (0, "mmap"); - goto error; + alloc.flags |= CLIB_MEM_VM_F_NUMA_PREFER; } - if (old_mpol != -1 && - set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1) - { - error = clib_error_return_unix (0, "set_mempolicy"); - goto error; - } + error = clib_mem_vm_ext_alloc (&alloc); + if (error) + goto error; + pr->index = pr - vpm->regions; + pr->flags = flags; + pr->fd = alloc.fd; + pr->mem = alloc.addr; + pr->log2_page_size = alloc.log2_page_size; + pr->n_pages = alloc.n_pages; pr->size = pr->n_pages << pr->log2_page_size; pr->page_mask = (1 << pr->log2_page_size) - 1; pr->numa_node = numa_node; @@ -285,13 +172,14 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, move_pages (0, 1, &ptr, 0, &node, 0); if (numa_node != node) { - clib_warning - ("physmem page for region \'%s\' allocated on the wrong" - " numa node (requested %u actual %u)", pr->name, - pr->numa_node, node, i); + clib_warning ("physmem page for region \'%s\' allocated on the" + " wrong numa node (requested %u actual %u)", + pr->name, pr->numa_node, node, i); break; } } + pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size, + pr->n_pages); } if (flags & VLIB_PHYSMEM_F_INIT_MHEAP) @@ -309,41 +197,13 @@ unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size, *idx = pr->index; - if ((flags & VLIB_PHYSMEM_F_FAKE) == 0) - { - int i; - for (i = 0; i < pr->n_pages; i++) - { - uword vaddr = - pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size); - u64 page_paddr = get_page_paddr (pagemap_fd, vaddr); - vec_add1 (pr->page_table, page_paddr); - } - } - goto done; error: - if (pr->fd > -1) - close (pr->fd); - - if (pr->mem) - munmap (pr->mem, size); - memset (pr, 0, sizeof (*pr)); pool_put (vpm->regions, pr); done: - if (mount_dir) - { - umount2 ((char *) mount_dir, MNT_DETACH); - rmdir ((char *) mount_dir); - vec_free (mount_dir); - } - numa_free_cpumask (old_mask); - vec_free (filename); - if (pagemap_fd > -1) - close (pagemap_fd); return error; } diff --git a/src/vlib/linux/syscall.h b/src/vlib/linux/syscall.h deleted file mode 100644 index 9e37997e..00000000 --- a/src/vlib/linux/syscall.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_linux_syscall_h -#define included_linux_syscall_h - -#ifndef __NR_memfd_create -#if defined __x86_64__ -#define __NR_memfd_create 319 -#elif defined __arm__ -#define __NR_memfd_create 385 -#elif defined __aarch64__ -#define __NR_memfd_create 279 -#else -#error "__NR_memfd_create unknown for this architecture" -#endif -#endif - -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} - -#ifndef F_LINUX_SPECIFIC_BASE -#define F_LINUX_SPECIFIC_BASE 1024 -#endif -#define MFD_ALLOW_SEALING 0x0002U -#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) -#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) - -#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ -#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ -#define F_SEAL_GROW 0x0004 /* prevent file from growing */ -#define F_SEAL_WRITE 0x0008 /* prevent writes */ - - -#endif /* included_linux_syscall_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/linux/sysfs.c b/src/vlib/linux/sysfs.c deleted file mode 100644 index f92f9ef5..00000000 --- a/src/vlib/linux/sysfs.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include -#include -#include - -clib_error_t * -vlib_sysfs_write (char *file_name, char *fmt, ...) -{ - u8 *s; - int fd; - clib_error_t *error = 0; - - fd = open (file_name, O_WRONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - va_list va; - va_start (va, fmt); - s = va_format (0, fmt, &va); - va_end (va); - - if (write (fd, s, vec_len (s)) < 0) - error = clib_error_return_unix (0, "write `%s'", file_name); - - vec_free (s); - close (fd); - return error; -} - -clib_error_t * -vlib_sysfs_read (char *file_name, char *fmt, ...) -{ - unformat_input_t input; - u8 *s = 0; - int fd; - ssize_t sz; - uword result; - - fd = open (file_name, O_RDONLY); - if (fd < 0) - return clib_error_return_unix (0, "open `%s'", file_name); - - vec_validate (s, 4095); - - sz = read (fd, s, vec_len (s)); - if (sz < 0) - { - close (fd); - vec_free (s); - return clib_error_return_unix (0, "read `%s'", file_name); - } - - _vec_len (s) = sz; - unformat_init_vector (&input, s); - - va_list va; - va_start (va, fmt); - result = va_unformat (&input, fmt, &va); - va_end (va); - - vec_free (s); - close (fd); - - if (result == 0) - return clib_error_return (0, "unformat error"); - - return 0; -} - -u8 * -vlib_sysfs_link_to_name (char *link) -{ - char *p, buffer[64]; - unformat_input_t in; - u8 *s = 0; - int r; - - r = readlink (link, buffer, sizeof (buffer) - 1); - - if (r < 0) - return 0; - - buffer[r] = 0; - p = strrchr (buffer, '/'); - - if (!p) - return 0; - - unformat_init_string (&in, p + 1, strlen (p + 1)); - if (unformat (&in, "%s", &s) != 1) - clib_unix_warning ("no string?"); - unformat_free (&in); - - return s; -} - -clib_error_t * -vlib_sysfs_set_nr_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); - vlib_sysfs_write ((char *) p, "%d", nr); - -done: - vec_free (p); - return error; -} - - -static clib_error_t * -vlib_sysfs_get_xxx_hugepages (char *type, unsigned int numa_node, - int page_size, int *val) -{ - clib_error_t *error = 0; - struct stat sb; - u8 *p = 0; - - p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); - - if (stat ((char *) p, &sb) == 0) - { - if (S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' is not directory", p); - goto done; - } - } - else if (numa_node == 0) - { - vec_reset_length (p); - p = format (p, "/sys/kernel/mm%c", 0); - if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) - { - error = clib_error_return (0, "'%s' does not exist or it is not " - "directory", p); - goto done; - } - } - else - { - error = clib_error_return (0, "'%s' does not exist", p); - goto done; - } - - _vec_len (p) -= 1; - p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, - type, 0); - error = vlib_sysfs_read ((char *) p, "%d", val); - -done: - vec_free (p); - return error; -} - -clib_error_t * -vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_nr_hugepages (unsigned int numa_node, int page_size, int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, int page_size, - int *v) -{ - return vlib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); -} - -clib_error_t * -vlib_sysfs_prealloc_hugepages (unsigned int numa_node, int page_size, int nr) -{ - clib_error_t *error = 0; - int n, needed; - error = vlib_sysfs_get_free_hugepages (numa_node, page_size, &n); - if (error) - return error; - needed = nr - n; - if (needed <= 0) - return 0; - - error = vlib_sysfs_get_nr_hugepages (numa_node, page_size, &n); - if (error) - return error; - clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", - needed, page_size, numa_node); - return vlib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); -} - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/linux/sysfs.h b/src/vlib/linux/sysfs.h deleted file mode 100644 index 14b71317..00000000 --- a/src/vlib/linux/sysfs.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_linux_sysfs_h -#define included_linux_sysfs_h - -clib_error_t *vlib_sysfs_write (char *file_name, char *fmt, ...); - -clib_error_t *vlib_sysfs_read (char *file_name, char *fmt, ...); - -u8 *vlib_sysfs_link_to_name (char *link); - -clib_error_t *vlib_sysfs_set_nr_hugepages (unsigned int numa_node, - int page_size, int nr); -clib_error_t *vlib_sysfs_get_nr_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_free_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, - int page_size, int *v); -clib_error_t *vlib_sysfs_prealloc_hugepages (unsigned int numa_node, - int page_size, int nr); - -#endif /* included_linux_sysfs_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 2d9ce84a..f9c7043c 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -289,7 +289,7 @@ sort_registrations_by_no_clone (void *a0, void *a1) } static uword * -vlib_sysfs_list_to_bitmap (char *filename) +clib_sysfs_list_to_bitmap (char *filename) { FILE *fp; uword *r = 0; @@ -331,9 +331,9 @@ vlib_thread_init (vlib_main_t * vm) /* get bitmaps of active cpu cores and sockets */ tm->cpu_core_bitmap = - vlib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online"); + clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online"); tm->cpu_socket_bitmap = - vlib_sysfs_list_to_bitmap ("/sys/devices/system/node/online"); + clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online"); avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap); diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index f8d5d8f9..02bdea5c 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -15,10 +15,10 @@ #define _GNU_SOURCE #include +#include #include #include -#include #include static u8 * @@ -98,14 +98,14 @@ show_threads_fn (vlib_main_t * vm, u8 *p = 0; p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, lcore, 0); - vlib_sysfs_read ((char *) p, "%d", &core_id); + clib_sysfs_read ((char *) p, "%d", &core_id); vec_reset_length (p); p = format (p, "%s%u/topology/physical_package_id%c", sys_cpu_path, lcore, 0); - vlib_sysfs_read ((char *) p, "%d", &socket_id); + clib_sysfs_read ((char *) p, "%d", &socket_id); vec_free (p); line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id); diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 62bb228f..32696014 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -24,9 +24,9 @@ #include #include +#include #include #include -#include #include #include @@ -75,7 +75,7 @@ af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, { s = format (0, "/sys/class/net/%s/mtu%c", apif->host_if_name, 0); - error = vlib_sysfs_write ((char *) s, "%d", hi->max_packet_bytes); + error = clib_sysfs_write ((char *) s, "%d", hi->max_packet_bytes); vec_free (s); if (error) diff --git a/src/vppinfra.am b/src/vppinfra.am index a5769a0d..daca9954 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -188,6 +188,8 @@ nobase_include_HEADERS = \ vppinfra/graph.h \ vppinfra/hash.h \ vppinfra/heap.h \ + vppinfra/linux/sysfs.h \ + vppinfra/linux/syscall.h \ vppinfra/lock.h \ vppinfra/longjmp.h \ vppinfra/macros.h \ @@ -233,7 +235,6 @@ nobase_include_HEADERS = \ vppinfra/vector_neon.h \ vppinfra/vector_sse2.h \ vppinfra/valgrind.h \ - vppinfra/vm_unix.h \ vppinfra/xxhash.h \ vppinfra/xy.h \ vppinfra/zvec.h @@ -291,6 +292,8 @@ CLIB_CORE = \ libvppinfra_la_SOURCES = \ $(CLIB_CORE) \ vppinfra/elf_clib.c \ + vppinfra/linux/mem.c \ + vppinfra/linux/sysfs.c \ vppinfra/socket.c \ vppinfra/timer.c \ vppinfra/unix-formats.c \ diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c new file mode 100644 index 00000000..665ddf61 --- /dev/null +++ b/src/vppinfra/linux/mem.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +int +clib_mem_vm_get_log2_page_size (int fd) +{ + struct stat st = { 0 }; + if (fstat (fd, &st)) + return 0; + return min_log2 (st.st_blksize); +} + +clib_error_t * +clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a) +{ + int fd = -1; + clib_error_t *err = 0; + void *addr = 0; + u8 *filename = 0; + int mmap_flags = MAP_SHARED; + int log2_page_size; + int n_pages; + int old_mpol = -1; + u64 old_mask[16] = { 0 }; + + /* save old numa mem policy if needed */ + if (a->flags & (CLIB_MEM_VM_F_NUMA_PREFER | CLIB_MEM_VM_F_NUMA_FORCE)) + { + int rv; + rv = + get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0, 0); + + if (rv == -1) + { + if ((a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0) + { + err = clib_error_return_unix (0, "get_mempolicy"); + goto error; + } + else + old_mpol = -1; + } + } + + /* if we are creating shared segment, we need file descriptor */ + if (a->flags & CLIB_MEM_VM_F_SHARED) + { + /* if hugepages are needed we need to create mount point */ + if (a->flags & CLIB_MEM_VM_F_HUGETLB) + { + char *mount_dir; + char template[] = "/tmp/hugepage_mount.XXXXXX"; + + mount_dir = mkdtemp (template); + if (mount_dir == 0) + return clib_error_return_unix (0, "mkdtemp \'%s\'", template); + + if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) + { + err = clib_error_return_unix (0, "mount hugetlb directory '%s'", + mount_dir); + goto error; + } + + filename = format (0, "%s/%s%c", mount_dir, a->name, 0); + + if ((fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) + { + err = clib_error_return_unix (0, "open"); + goto error; + } + umount2 ((char *) mount_dir, MNT_DETACH); + rmdir ((char *) mount_dir); + mmap_flags |= MAP_LOCKED; + } + else + { + if ((fd = memfd_create (a->name, MFD_ALLOW_SEALING)) == -1) + { + err = clib_error_return_unix (0, "memfd_create"); + goto error; + } + + if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) + { + err = clib_error_return_unix (0, "fcntl (F_ADD_SEALS)"); + goto error; + } + } + log2_page_size = clib_mem_vm_get_log2_page_size (fd); + } + else /* not CLIB_MEM_VM_F_SHARED */ + { + if (a->flags & CLIB_MEM_VM_F_HUGETLB) + { + mmap_flags |= MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS; + log2_page_size = 21; + } + else + { + mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS; + log2_page_size = min_log2 (sysconf (_SC_PAGESIZE)); + } + } + + n_pages = ((a->size - 1) >> log2_page_size) + 1; + + + if (a->flags & CLIB_MEM_VM_F_HUGETLB_PREALLOC) + { + err = clib_sysfs_prealloc_hugepages (a->numa_node, + 1 << (log2_page_size - 10), + n_pages); + if (err) + goto error; + + } + + if (fd != -1) + if ((ftruncate (fd, a->size)) == -1) + { + err = clib_error_return_unix (0, "ftruncate"); + goto error; + } + + if (old_mpol != -1) + { + int rv; + u64 mask[16] = { 0 }; + mask[0] = 1 << a->numa_node; + rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1); + if (rv) + { + err = clib_error_return_unix (0, "set_mempolicy"); + goto error; + } + } + + addr = mmap (0, a->size, (PROT_READ | PROT_WRITE), mmap_flags, fd, 0); + if (addr == MAP_FAILED) + { + err = clib_error_return_unix (0, "mmap"); + goto error; + } + + /* re-apply ole numa memory policy */ + if (old_mpol != -1 && + set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1) == -1) + { + err = clib_error_return_unix (0, "set_mempolicy"); + goto error; + } + + a->log2_page_size = log2_page_size; + a->n_pages = n_pages; + a->addr = addr; + a->fd = fd; + goto done; + +error: + if (fd != -1) + close (fd); + +done: + vec_free (filename); + return err; +} + +u64 * +clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages) +{ + int pagesize = sysconf (_SC_PAGESIZE); + int fd; + int i; + u64 *r = 0; + + if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) + return 0; + + for (i = 0; i < n_pages; i++) + { + u64 seek, pagemap = 0; + uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size); + seek = ((u64) vaddr / pagesize) * sizeof (u64); + if (lseek (fd, seek, SEEK_SET) != seek) + goto done; + + if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap))) + goto done; + + if ((pagemap & (1ULL << 63)) == 0) + goto done; + + pagemap &= pow2_mask (55); + vec_add1 (r, pagemap * pagesize); + } + +done: + close (fd); + if (vec_len (r) != n_pages) + { + vec_free (r); + return 0; + } + return r; +} + + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/linux/syscall.h b/src/vppinfra/linux/syscall.h new file mode 100644 index 00000000..f8ec5919 --- /dev/null +++ b/src/vppinfra/linux/syscall.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_syscall_h +#define included_linux_syscall_h + +#include +#include + +static inline long +set_mempolicy (int mode, const unsigned long *nodemask, unsigned long maxnode) +{ + return syscall (__NR_set_mempolicy, mode, nodemask, maxnode); +} + +static inline int +get_mempolicy (int *mode, unsigned long *nodemask, unsigned long maxnode, + void *addr, unsigned long flags) +{ + return syscall (__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags); +} + +static inline long +move_pages (int pid, unsigned long count, void **pages, const int *nodes, + int *status, int flags) +{ + return syscall (__NR_move_pages, pid, count, pages, nodes, status, flags); +} + +static inline int +memfd_create (const char *name, unsigned int flags) +{ + return syscall (__NR_memfd_create, name, flags); +} + +#endif /* included_linux_syscall_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/linux/sysfs.c b/src/vppinfra/linux/sysfs.c new file mode 100644 index 00000000..5f611e6a --- /dev/null +++ b/src/vppinfra/linux/sysfs.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +clib_error_t * +clib_sysfs_write (char *file_name, char *fmt, ...) +{ + u8 *s; + int fd; + clib_error_t *error = 0; + + fd = open (file_name, O_WRONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + va_list va; + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + if (write (fd, s, vec_len (s)) < 0) + error = clib_error_return_unix (0, "write `%s'", file_name); + + vec_free (s); + close (fd); + return error; +} + +clib_error_t * +clib_sysfs_read (char *file_name, char *fmt, ...) +{ + unformat_input_t input; + u8 *s = 0; + int fd; + ssize_t sz; + uword result; + + fd = open (file_name, O_RDONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + vec_validate (s, 4095); + + sz = read (fd, s, vec_len (s)); + if (sz < 0) + { + close (fd); + vec_free (s); + return clib_error_return_unix (0, "read `%s'", file_name); + } + + _vec_len (s) = sz; + unformat_init_vector (&input, s); + + va_list va; + va_start (va, fmt); + result = va_unformat (&input, fmt, &va); + va_end (va); + + vec_free (s); + close (fd); + + if (result == 0) + return clib_error_return (0, "unformat error"); + + return 0; +} + +u8 * +clib_sysfs_link_to_name (char *link) +{ + char *p, buffer[64]; + unformat_input_t in; + u8 *s = 0; + int r; + + r = readlink (link, buffer, sizeof (buffer) - 1); + + if (r < 0) + return 0; + + buffer[r] = 0; + p = strrchr (buffer, '/'); + + if (!p) + return 0; + + unformat_init_string (&in, p + 1, strlen (p + 1)); + if (unformat (&in, "%s", &s) != 1) + clib_unix_warning ("no string?"); + unformat_free (&in); + + return s; +} + +clib_error_t * +clib_sysfs_set_nr_hugepages (int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); + clib_sysfs_write ((char *) p, "%d", nr); + +done: + vec_free (p); + return error; +} + + +static clib_error_t * +clib_sysfs_get_xxx_hugepages (char *type, int numa_node, + int page_size, int *val) +{ + clib_error_t *error = 0; + struct stat sb; + u8 *p = 0; + + p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0); + + if (stat ((char *) p, &sb) == 0) + { + if (S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' is not directory", p); + goto done; + } + } + else if (numa_node == 0) + { + vec_reset_length (p); + p = format (p, "/sys/kernel/mm%c", 0); + if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0) + { + error = clib_error_return (0, "'%s' does not exist or it is not " + "directory", p); + goto done; + } + } + else + { + error = clib_error_return (0, "'%s' does not exist", p); + goto done; + } + + _vec_len (p) -= 1; + p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, + type, 0); + error = clib_sysfs_read ((char *) p, "%d", val); + +done: + vec_free (p); + return error; +} + +clib_error_t * +clib_sysfs_get_free_hugepages (int numa_node, int page_size, int *v) +{ + return clib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v); +} + +clib_error_t * +clib_sysfs_get_nr_hugepages (int numa_node, int page_size, int *v) +{ + return clib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v); +} + +clib_error_t * +clib_sysfs_get_surplus_hugepages (int numa_node, int page_size, int *v) +{ + return clib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v); +} + +clib_error_t * +clib_sysfs_prealloc_hugepages (int numa_node, int page_size, int nr) +{ + clib_error_t *error = 0; + int n, needed; + error = clib_sysfs_get_free_hugepages (numa_node, page_size, &n); + if (error) + return error; + needed = nr - n; + if (needed <= 0) + return 0; + + error = clib_sysfs_get_nr_hugepages (numa_node, page_size, &n); + if (error) + return error; + clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u", + needed, page_size, numa_node); + return clib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/linux/sysfs.h b/src/vppinfra/linux/sysfs.h new file mode 100644 index 00000000..6c80cf95 --- /dev/null +++ b/src/vppinfra/linux/sysfs.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_linux_sysfs_h +#define included_linux_sysfs_h + +#include + +clib_error_t *clib_sysfs_write (char *file_name, char *fmt, ...); + +clib_error_t *clib_sysfs_read (char *file_name, char *fmt, ...); + +u8 *clib_sysfs_link_to_name (char *link); + +clib_error_t *clib_sysfs_set_nr_hugepages (int numa_node, + int page_size, int nr); +clib_error_t *clib_sysfs_get_nr_hugepages (int numa_node, + int page_size, int *v); +clib_error_t *clib_sysfs_get_free_hugepages (int numa_node, + int page_size, int *v); +clib_error_t *clib_sysfs_get_surplus_hugepages (int numa_node, + int page_size, int *v); +clib_error_t *clib_sysfs_prealloc_hugepages (int numa_node, + int page_size, int nr); + +#endif /* included_linux_sysfs_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index 63c5ac16..69ab8803 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -39,8 +39,11 @@ #define _included_clib_mem_h #include +#include +#include #include /* uword, etc */ +#include #include #include #include /* memcpy, memset */ @@ -264,19 +267,90 @@ void clib_mem_usage (clib_mem_usage_t * usage); u8 *format_clib_mem_usage (u8 * s, va_list * args); -/* Include appropriate VM functions depending on whether - we are compiling for linux kernel, for Unix or standalone. */ -#ifdef CLIB_LINUX_KERNEL -#include -#endif +/* Allocate virtual address space. */ +always_inline void * +clib_mem_vm_alloc (uword size) +{ + void *mmap_addr; + uword flags = MAP_PRIVATE; -#ifdef CLIB_UNIX -#include +#ifdef MAP_ANONYMOUS + flags |= MAP_ANONYMOUS; #endif -#ifdef CLIB_STANDALONE -#include -#endif + mmap_addr = mmap (0, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (mmap_addr == (void *) -1) + mmap_addr = 0; + + return mmap_addr; +} + +always_inline void +clib_mem_vm_free (void *addr, uword size) +{ + munmap (addr, size); +} + +always_inline void * +clib_mem_vm_unmap (void *addr, uword size) +{ + void *mmap_addr; + uword flags = MAP_PRIVATE | MAP_FIXED; + + /* To unmap we "map" with no protection. If we actually called + munmap then other callers could steal the address space. By + changing to PROT_NONE the kernel can free up the pages which is + really what we want "unmap" to mean. */ + mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0); + if (mmap_addr == (void *) -1) + mmap_addr = 0; + + return mmap_addr; +} + +always_inline void * +clib_mem_vm_map (void *addr, uword size) +{ + void *mmap_addr; + uword flags = MAP_PRIVATE | MAP_FIXED; + + mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0); + if (mmap_addr == (void *) -1) + mmap_addr = 0; + + return mmap_addr; +} + +typedef struct +{ +#define CLIB_MEM_VM_F_SHARED (1 << 0) +#define CLIB_MEM_VM_F_HUGETLB (1 << 1) +#define CLIB_MEM_VM_F_NUMA_PREFER (1 << 2) +#define CLIB_MEM_VM_F_NUMA_FORCE (1 << 3) +#define CLIB_MEM_VM_F_HUGETLB_PREALLOC (1 << 4) + u32 flags; /**< vm allocation flags: +
CLIB_MEM_VM_F_SHARED: request shared memory, file + destiptor will be provided on successful allocation. +
CLIB_MEM_VM_F_HUGETLB: request hugepages. +
CLIB_MEM_VM_F_NUMA_PREFER: numa_node field contains valid + numa node preference. +
CLIB_MEM_VM_F_NUMA_FORCE: fail if setting numa policy fails. +
CLIB_MEM_VM_F_HUGETLB_PREALLOC: pre-allocate hugepages if + number of available pages is not sufficient. + */ + char *name; /**< Name for memory allocation, set by caller. */ + uword size; /**< Allocation size, set by caller. */ + int numa_node; /**< numa node preference. Valid if CLIB_MEM_VM_F_NUMA_PREFER set. */ + void *addr; /**< Pointer to allocated memory, set on successful allocation. */ + int fd; /**< File desriptor, set on successful allocation if CLIB_MEM_VM_F_SHARED is set. */ + int log2_page_size; /* Page size in log2 format, set on successful allocation. */ + int n_pages; /* Number of pages. */ +} clib_mem_vm_alloc_t; + +clib_error_t *clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a); +int clib_mem_vm_get_log2_page_size (int fd); +u64 *clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages); + #include /* clib_panic */ diff --git a/src/vppinfra/vm_linux_kernel.h b/src/vppinfra/vm_linux_kernel.h deleted file mode 100644 index fd9e6148..00000000 --- a/src/vppinfra/vm_linux_kernel.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_vm_linux_kernel_h -#define included_vm_linux_kernel_h - -#include -#include /* for GFP_* */ -#include /* for PAGE_KERNEL */ - -/* Allocate virtual address space. */ -always_inline void * -clib_mem_vm_alloc (uword size) -{ - return vmalloc (size); -} - -always_inline void -clib_mem_vm_free (void *addr, uword size) -{ - vfree (addr); -} - -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - return 0; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - return addr; -} - -#endif /* included_vm_linux_kernel_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/vm_standalone.h b/src/vppinfra/vm_standalone.h deleted file mode 100644 index 2cd431bc..00000000 --- a/src/vppinfra/vm_standalone.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_vm_standalone_h -#define included_vm_standalone_h - -/* Stubs for standalone "system" which has no VM support. */ - -always_inline void * -clib_mem_vm_alloc (uword size) -{ - return 0; -} - -always_inline void -clib_mem_vm_free (void *addr, uword size) -{ -} - -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - return 0; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - return addr; -} - -#endif /* included_vm_standalone_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/vm_unix.h b/src/vppinfra/vm_unix.h deleted file mode 100644 index 07e86516..00000000 --- a/src/vppinfra/vm_unix.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* - Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef included_vm_unix_h -#define included_vm_unix_h - -#include -#include - -/* Allocate virtual address space. */ -always_inline void * -clib_mem_vm_alloc (uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE; - -#ifdef MAP_ANONYMOUS - flags |= MAP_ANONYMOUS; -#endif - - mmap_addr = mmap (0, size, PROT_READ | PROT_WRITE, flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - - return mmap_addr; -} - -always_inline void -clib_mem_vm_free (void *addr, uword size) -{ - munmap (addr, size); -} - -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE | MAP_FIXED; - - /* To unmap we "map" with no protection. If we actually called - munmap then other callers could steal the address space. By - changing to PROT_NONE the kernel can free up the pages which is - really what we want "unmap" to mean. */ - mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - - return mmap_addr; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE | MAP_FIXED; - - mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - - return mmap_addr; -} - -#endif /* included_vm_unix_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ -- cgit 1.2.3-korg From 79bfbae54c8ecf4c87b42beee26b71e09e3eb494 Mon Sep 17 00:00:00 2001 From: Steven Date: Sat, 30 Sep 2017 10:50:20 -0700 Subject: memif: crash on slave mode (VPP-1006) Crash was seen on recent image with this BT on top of the stack (gdb) bt full (mif=0x7fffb6226568) at /vpp/build-data/../src/plugins/memif/memif.c:297 ring = 0x0 <<<<<<<<<< i = 0 j = 0 buffer_offset = 65792 r = 0x7fffb5e59f80 alloc = {flags = 1, name = 0x7fffb449f965 "memif region", size = 4260096, numa_node = 0, addr = 0x7fff41dac000, fd = 11, log2_page_size = 12, n_pages = 1041} err = 0x0 __FUNCTION__ = "memif_init_regions_and_queues" The crash happened at this line. ring = memif_get_ring (mif, MEMIF_RING_S2M, i); ring=>head = ring->tail = 0; <===== Please note that the crash is caused by dereferencing NULL rinng. Put breakpoint into the function. I notice that mif->regions[0].shm is not initialized. (gdb) p mif->regions[0].shm $8 = (void *) 0x0 It looks like we forgot to set shm after clib_mem_vm_ext_alloc(). Add the missing cide and the crash is fixed. Change-Id: Ib722a6c241c77acfa8e33962106b57faa50e1ea7 Signed-off-by: Steven (cherry picked from commit 9fefa9a697daf0e949ea7a2700ecaf2ba4d1d2cb) --- src/plugins/memif/memif.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/plugins/memif/memif.c') diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 6a609a57..a3be49fa 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -290,6 +290,7 @@ memif_init_regions_and_queues (memif_if_t * mif) return err; r->fd = alloc.fd; + r->shm = alloc.addr; for (i = 0; i < mif->run.num_s2m_rings; i++) { -- cgit 1.2.3-korg