diff options
Diffstat (limited to 'src/plugins/memif')
-rw-r--r-- | src/plugins/memif/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/plugins/memif/cli.c | 125 | ||||
-rw-r--r-- | src/plugins/memif/device.c | 315 | ||||
-rw-r--r-- | src/plugins/memif/memif.api | 92 | ||||
-rw-r--r-- | src/plugins/memif/memif.c | 477 | ||||
-rw-r--r-- | src/plugins/memif/memif_api.c | 150 | ||||
-rw-r--r-- | src/plugins/memif/memif_test.c | 197 | ||||
-rw-r--r-- | src/plugins/memif/node.c | 871 | ||||
-rw-r--r-- | src/plugins/memif/private.h | 101 | ||||
-rw-r--r-- | src/plugins/memif/socket.c | 6 |
10 files changed, 1792 insertions, 544 deletions
diff --git a/src/plugins/memif/CMakeLists.txt b/src/plugins/memif/CMakeLists.txt index b86d30adb97..4bbf6ba39db 100644 --- a/src/plugins/memif/CMakeLists.txt +++ b/src/plugins/memif/CMakeLists.txt @@ -33,3 +33,5 @@ add_vpp_plugin(memif INSTALL_HEADERS memif.h ) + +add_compile_definitions(MEMIF_CACHELINE_SIZE=${VPP_CACHE_LINE_SIZE}) diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c index b313e9737b3..c2ed63747fa 100644 --- a/src/plugins/memif/cli.c +++ b/src/plugins/memif/cli.c @@ -33,7 +33,7 @@ memif_socket_filename_create_command_fn (vlib_main_t * vm, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; - int r; + clib_error_t *err; u32 socket_id; u8 *socket_filename; @@ -53,6 +53,7 @@ memif_socket_filename_create_command_fn (vlib_main_t * vm, else { vec_free (socket_filename); + unformat_free (line_input); return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); } @@ -72,37 +73,18 @@ memif_socket_filename_create_command_fn (vlib_main_t * vm, return clib_error_return (0, "Invalid socket filename"); } - r = memif_socket_filename_add_del (1, socket_id, socket_filename); + err = memif_socket_filename_add_del (1, socket_id, (char *) socket_filename); vec_free (socket_filename); - if (r < 0) - { - switch (r) - { - case VNET_API_ERROR_INVALID_ARGUMENT: - return clib_error_return (0, "Invalid argument"); - case VNET_API_ERROR_SYSCALL_ERROR_1: - return clib_error_return (0, "Syscall error 1"); - case VNET_API_ERROR_ENTRY_ALREADY_EXISTS: - return clib_error_return (0, "Already exists"); - case VNET_API_ERROR_UNEXPECTED_INTF_STATE: - return clib_error_return (0, "Interface still in use"); - default: - return clib_error_return (0, "Unknown error"); - } - } - - return 0; + return err; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (memif_socket_filename_create_command, static) = { .path = "create memif socket", .short_help = "create memif socket [id <id>] [filename <path>]", .function = memif_socket_filename_create_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * memif_socket_filename_delete_command_fn (vlib_main_t * vm, @@ -110,7 +92,6 @@ memif_socket_filename_delete_command_fn (vlib_main_t * vm, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; - int r; u32 socket_id; /* Get a line of input. */ @@ -125,6 +106,7 @@ memif_socket_filename_delete_command_fn (vlib_main_t * vm, ; else { + unformat_free (line_input); return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); } @@ -137,42 +119,21 @@ memif_socket_filename_delete_command_fn (vlib_main_t * vm, return clib_error_return (0, "Invalid socket id"); } - r = memif_socket_filename_add_del (0, socket_id, 0); - - if (r < 0) - { - switch (r) - { - case VNET_API_ERROR_INVALID_ARGUMENT: - return clib_error_return (0, "Invalid argument"); - case VNET_API_ERROR_SYSCALL_ERROR_1: - return clib_error_return (0, "Syscall error 1"); - case VNET_API_ERROR_ENTRY_ALREADY_EXISTS: - return clib_error_return (0, "Already exists"); - case VNET_API_ERROR_UNEXPECTED_INTF_STATE: - return clib_error_return (0, "Interface still in use"); - default: - return clib_error_return (0, "Unknown error"); - } - } - - return 0; + return memif_socket_filename_add_del (0, socket_id, 0); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (memif_socket_filename_delete_command, static) = { .path = "delete memif socket", .short_help = "delete memif socket [id <id>]", .function = memif_socket_filename_delete_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; - int r; + clib_error_t *err; u32 ring_size = MEMIF_DEFAULT_RING_SIZE; memif_create_if_args_t args = { 0 }; args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE; @@ -207,14 +168,19 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.is_master = 0; else if (unformat (line_input, "no-zero-copy")) args.is_zero_copy = 0; + else if (unformat (line_input, "use-dma")) + args.use_dma = 1; else if (unformat (line_input, "mode ip")) args.mode = MEMIF_INTERFACE_MODE_IP; else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, args.hw_addr)) args.hw_addr_set = 1; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + unformat_free (line_input); + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } } unformat_free (line_input); @@ -234,27 +200,13 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.rx_queues = rx_queues; args.tx_queues = tx_queues; - r = memif_create_if (vm, &args); + err = memif_create_if (vm, &args); vec_free (args.secret); - if (r <= VNET_API_ERROR_SYSCALL_ERROR_1 - && r >= VNET_API_ERROR_SYSCALL_ERROR_10) - return clib_error_return (0, "%s (errno %d)", strerror (errno), errno); - - if (r == VNET_API_ERROR_INVALID_ARGUMENT) - return clib_error_return (0, "Invalid argument"); - - if (r == VNET_API_ERROR_INVALID_INTERFACE) - return clib_error_return (0, "Invalid interface name"); - - if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) - return clib_error_return (0, "Interface with same id already exists"); - - return 0; + return err; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (memif_create_command, static) = { .path = "create interface memif", .short_help = "create interface memif [id <id>] [socket-id <socket-id>] " @@ -264,7 +216,6 @@ VLIB_CLI_COMMAND (memif_create_command, static) = { "[mode ip] [secret <string>]", .function = memif_create_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -289,8 +240,11 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, vnm, &sw_if_index)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + unformat_free (line_input); + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } } unformat_free (line_input); @@ -308,13 +262,11 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (memif_delete_command, static) = { .path = "delete interface memif", .short_help = "delete interface memif {<interface> | sw_if_index <sw_idx>}", .function = memif_delete_command_fn, }; -/* *INDENT-ON* */ static u8 * format_memif_if_flags (u8 * s, va_list * args) @@ -378,23 +330,22 @@ format_memif_descriptor (u8 * s, va_list * args) if (ring) { s = format (s, "%Udescriptor table:\n", format_white_space, indent); - s = - format (s, - "%Uid flags len address offset user address\n", - format_white_space, indent); - s = - format (s, - "%U===== ===== ======== ================== ====== ==================\n", - format_white_space, indent); + s = format (s, + "%Uid flags region len address offset " + " user address\n", + format_white_space, indent); + s = format (s, + "%U===== ===== ====== ======== ================== " + "========== ==================\n", + format_white_space, indent); for (slot = 0; slot < ring_size; slot++) { - s = format (s, "%U%-5d %-5d %-7d 0x%016lx %-6d 0x%016lx\n", - format_white_space, indent, slot, - ring->desc[slot].flags, - ring->desc[slot].length, + s = format (s, "%U%-5d %-5d %-6d %-7d 0x%016lx %-10d 0x%016lx\n", + format_white_space, indent, slot, ring->desc[slot].flags, + ring->desc[slot].region, ring->desc[slot].length, mif->regions[ring->desc[slot].region].shm, - ring->desc[slot].offset, memif_get_buffer (mif, ring, - slot)); + ring->desc[slot].offset, + memif_get_buffer (mif, ring, slot)); } s = format (s, "\n"); } @@ -437,7 +388,6 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "sockets\n"); vlib_cli_output (vm, " %-3s %-11s %s\n", "id", "listener", "filename"); - /* *INDENT-OFF* */ hash_foreach (sock_id, msf_idx, mm->socket_file_index_by_sock_id, ({ memif_socket_file_t *msf; @@ -453,17 +403,14 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output(vm, " %-3u %-11v %s\n", sock_id, s, filename); vec_reset_length (s); })); - /* *INDENT-ON* */ vec_free (s); vlib_cli_output (vm, "\n"); if (vec_len (hw_if_indices) == 0) { - /* *INDENT-OFF* */ pool_foreach (mif, mm->interfaces) vec_add1 (hw_if_indices, mif->hw_if_index); - /* *INDENT-ON* */ } for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++) @@ -498,7 +445,6 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, " remote-disc-reason \"%s\"", mif->remote_disc_string); - /* *INDENT-OFF* */ vec_foreach_index (i, mif->regions) { mr = vec_elt_at_index (mif->regions, i); @@ -519,20 +465,17 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input, if (show_descr) vlib_cli_output (vm, " %U", format_memif_descriptor, mif, mq); } - /* *INDENT-ON* */ } done: vec_free (hw_if_indices); return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (memif_show_command, static) = { .path = "show memif", .short_help = "show memif [<interface>] [descriptors]", .function = memif_show_command_fn, }; -/* *INDENT-ON* */ clib_error_t * memif_cli_init (vlib_main_t * vm) diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index fc66420a6ad..017a001168b 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -194,8 +194,8 @@ retry: else { /* we need to rollback vectors before bailing out */ - _vec_len (ptd->buffers) = saved_ptd_buffers_len; - _vec_len (ptd->copy_ops) = saved_ptd_copy_ops_len; + vec_set_len (ptd->buffers, saved_ptd_buffers_len); + vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len); vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_ROLLBACK, 1); slot = saved_slot; @@ -369,6 +369,270 @@ no_free_slots: return n_left; } +CLIB_MARCH_FN (memif_tx_dma_completion_cb, void, vlib_main_t *vm, + vlib_dma_batch_t *b) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16); + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, b->cookie & 0xffff); + memif_dma_info_t *dma_info = mq->dma_info + mq->dma_info_head; + memif_per_thread_data_t *ptd = &dma_info->data; + + vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers)); + + dma_info->finished = 1; + vec_reset_length (ptd->buffers); + vec_reset_length (ptd->copy_ops); + + __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE); + + mq->dma_info_head++; + if (mq->dma_info_head == mq->dma_info_size) + mq->dma_info_head = 0; + mq->dma_info_full = 0; +} + +#ifndef CLIB_MARCH_VARIANT +void +memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b) +{ + return CLIB_MARCH_FN_SELECT (memif_tx_dma_completion_cb) (vm, b); +} +#endif + +static_always_inline uword +memif_interface_tx_dma_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 *buffers, memif_if_t *mif, + memif_ring_type_t type, memif_queue_t *mq, + u32 n_left) +{ + memif_ring_t *ring; + u32 n_copy_op; + u16 ring_size, mask, slot, free_slots; + int n_retries = 5, fallback = 0; + vlib_buffer_t *b0, *b1, *b2, *b3; + memif_copy_op_t *co; + memif_region_index_t last_region = ~0; + void *last_region_shm = 0; + u16 head, tail; + memif_dma_info_t *dma_info; + memif_per_thread_data_t *ptd; + memif_main_t *mm = &memif_main; + u16 mif_id = mif - mm->interfaces; + + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + dma_info = mq->dma_info + mq->dma_info_tail; + ptd = &dma_info->data; + + /* do software fallback if dma info ring is full */ + u16 dma_mask = mq->dma_info_size - 1; + if ((((mq->dma_info_tail + 1) & dma_mask) == mq->dma_info_head) || + ((mq->dma_info_head == dma_mask) && (mq->dma_info_tail == 0))) + { + if (!mq->dma_info_full) + mq->dma_info_full = 1; + else + fallback = 1; + } + + vlib_dma_batch_t *b = NULL; + if (PREDICT_TRUE (!fallback)) + b = vlib_dma_batch_new (vm, mif->dma_tx_config); + if (!b) + return n_left; + +retry: + + slot = tail = mq->dma_tail; + head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE); + mq->last_tail += tail - mq->last_tail; + free_slots = head - mq->dma_tail; + + while (n_left && free_slots) + { + memif_desc_t *d0; + void *mb0; + i32 src_off; + u32 bi0, dst_off, src_left, dst_left, bytes_to_copy; + u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops); + u32 saved_ptd_buffers_len = _vec_len (ptd->buffers); + u16 saved_slot = slot; + + clib_prefetch_load (&ring->desc[(slot + 8) & mask]); + + d0 = &ring->desc[slot & mask]; + if (PREDICT_FALSE (last_region != d0->region)) + { + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; + } + mb0 = last_region_shm + d0->offset; + + dst_off = 0; + + /* slave is the producer, so it should be able to reset buffer length */ + dst_left = d0->length; + + if (PREDICT_TRUE (n_left >= 4)) + vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD); + bi0 = buffers[0]; + + next_in_chain: + + b0 = vlib_get_buffer (vm, bi0); + src_off = b0->current_data; + src_left = b0->current_length; + + while (src_left) + { + if (PREDICT_FALSE (dst_left == 0)) + { + if (free_slots) + { + d0->length = dst_off; + d0->flags = MEMIF_DESC_FLAG_NEXT; + d0 = &ring->desc[slot & mask]; + dst_off = 0; + dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size : + d0->length; + + if (PREDICT_FALSE (last_region != d0->region)) + { + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; + } + mb0 = last_region_shm + d0->offset; + } + else + { + /* we need to rollback vectors before bailing out */ + vec_set_len (ptd->buffers, saved_ptd_buffers_len); + vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len); + vlib_error_count (vm, node->node_index, + MEMIF_TX_ERROR_ROLLBACK, 1); + slot = saved_slot; + goto no_free_slots; + } + } + bytes_to_copy = clib_min (src_left, dst_left); + memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off, + vec_len (ptd->buffers)); + src_off += bytes_to_copy; + dst_off += bytes_to_copy; + src_left -= bytes_to_copy; + dst_left -= bytes_to_copy; + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + slot++; + free_slots--; + bi0 = b0->next_buffer; + goto next_in_chain; + } + + vec_add1_aligned (ptd->buffers, buffers[0], CLIB_CACHE_LINE_BYTES); + d0->length = dst_off; + d0->flags = 0; + + free_slots -= 1; + slot += 1; + + buffers++; + n_left--; + } +no_free_slots: + + /* copy data */ + n_copy_op = vec_len (ptd->copy_ops); + co = ptd->copy_ops; + while (n_copy_op >= 8) + { + clib_prefetch_load (co[4].data); + clib_prefetch_load (co[5].data); + clib_prefetch_load (co[6].data); + clib_prefetch_load (co[7].data); + + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]); + b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]); + b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]); + + if (PREDICT_TRUE (!fallback)) + { + vlib_dma_batch_add (vm, b, co[0].data, + b0->data + co[0].buffer_offset, co[0].data_len); + vlib_dma_batch_add (vm, b, co[1].data, + b1->data + co[1].buffer_offset, co[1].data_len); + vlib_dma_batch_add (vm, b, co[2].data, + b2->data + co[2].buffer_offset, co[2].data_len); + vlib_dma_batch_add (vm, b, co[3].data, + b3->data + co[3].buffer_offset, co[3].data_len); + } + else + { + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset, + co[1].data_len); + clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset, + co[2].data_len); + clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset, + co[3].data_len); + } + + co += 4; + n_copy_op -= 4; + } + while (n_copy_op) + { + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + if (PREDICT_TRUE (!fallback)) + vlib_dma_batch_add (vm, b, co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + else + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + co += 1; + n_copy_op -= 1; + } + + /* save dma info before retry */ + dma_info->dma_tail = slot; + mq->dma_tail = slot; + vec_reset_length (ptd->copy_ops); + + if (n_left && n_retries--) + goto retry; + + if (PREDICT_TRUE (!fallback)) + { + vlib_dma_batch_set_cookie (vm, b, + ((u64) mif_id << 16) | (mq - mif->tx_queues)); + vlib_dma_batch_submit (vm, b); + dma_info->finished = 0; + + if (b->n_enq) + { + mq->dma_info_tail++; + if (mq->dma_info_tail == mq->dma_info_size) + mq->dma_info_tail = 0; + } + } + else if (fallback && dma_info->finished) + { + /* if dma has been completed, update ring immediately */ + vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers)); + vec_reset_length (ptd->buffers); + __atomic_store_n (&mq->ring->tail, slot, __ATOMIC_RELEASE); + } + + return n_left; +} + VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -376,22 +640,19 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, memif_main_t *nm = &memif_main; vnet_interface_output_runtime_t *rund = (void *) node->runtime_data; memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance); + vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame); memif_queue_t *mq; + u32 qid = tf->queue_id; u32 *from, thread_index = vm->thread_index; memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data, thread_index); - u8 tx_queues = vec_len (mif->tx_queues); uword n_left; - if (tx_queues < vlib_get_n_threads ()) - { - ASSERT (tx_queues > 0); - mq = vec_elt_at_index (mif->tx_queues, thread_index % tx_queues); - } - else - mq = vec_elt_at_index (mif->tx_queues, thread_index); + ASSERT (vec_len (mif->tx_queues) > qid); + mq = vec_elt_at_index (mif->tx_queues, qid); - clib_spinlock_lock_if_init (&mif->lockp); + if (tf->shared_queue) + clib_spinlock_lock (&mq->lockp); from = vlib_frame_vector_args (frame); n_left = frame->n_vectors; @@ -402,10 +663,17 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_S2M, mq, ptd, n_left); else - n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_M2S, - mq, ptd, n_left); + { + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0)) + n_left = memif_interface_tx_dma_inline (vm, node, from, mif, + MEMIF_RING_M2S, mq, n_left); + else + n_left = memif_interface_tx_inline (vm, node, from, mif, + MEMIF_RING_M2S, mq, ptd, n_left); + } - clib_spinlock_unlock_if_init (&mif->lockp); + if (tf->shared_queue) + clib_spinlock_unlock (&mq->lockp); if (n_left) vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS, @@ -418,7 +686,12 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, mq->int_count++; } - if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0) + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0)) + { + if (n_left) + vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left); + } + else if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0) vlib_buffer_free (vm, from, frame->n_vectors); else if (n_left) vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left); @@ -468,16 +741,6 @@ memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, return 0; } -static clib_error_t * -memif_subif_add_del_function (vnet_main_t * vnm, - u32 hw_if_index, - struct vnet_sw_interface_t *st, int is_add) -{ - /* Nothing for now */ - return 0; -} - -/* *INDENT-OFF* */ VNET_DEVICE_CLASS (memif_device_class) = { .name = "memif", .format_device_name = format_memif_device_name, @@ -488,11 +751,9 @@ VNET_DEVICE_CLASS (memif_device_class) = { .rx_redirect_to_node = memif_set_interface_next_node, .clear_counters = memif_clear_hw_interface_counters, .admin_up_down_function = memif_interface_admin_up_down, - .subif_add_del_function = memif_subif_add_del_function, .rx_mode_change_function = memif_interface_rx_mode_change, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index 9e32db5b470..5973ad60054 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -14,7 +14,7 @@ * limitations under the License. */ -option version = "3.0.0"; +option version = "3.1.0"; import "vnet/interface_types.api"; import "vnet/ethernet/ethernet_types.api"; @@ -43,6 +43,8 @@ enum memif_mode */ autoreply define memif_socket_filename_add_del { + option deprecated; + u32 client_index; u32 context; bool is_add; /* 0 = remove, 1 = add association */ @@ -51,6 +53,40 @@ autoreply define memif_socket_filename_add_del option vat_help = "[add|del] id <id> filename <file>"; }; +/** \brief Create or remove named socket file for memif interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - 0 = remove, 1 = add association + @param socket_id - non-0 32-bit integer used to identify a socket file + ~0 means autogenerate + @param socket_filename - filename of the socket to be used for connection + establishment; id 0 always maps to default "/var/vpp/memif.sock"; + no socket filename needed when is_add == 0. + socket_filename starting with '@' will create an abstract socket + in the given namespace +*/ +define memif_socket_filename_add_del_v2 +{ + u32 client_index; + u32 context; + bool is_add; /* 0 = remove, 1 = add association */ + u32 socket_id [default=0xffffffff]; /* unique non-0 id for given socket file name */ + string socket_filename[]; /* NUL terminated filename */ + option vat_help = "[add|del] id <id> filename <file>"; +}; + +/** \brief Create memory interface socket file response + @param context - sender context, to match reply w/ request + @param retval - return value for request + @param socket_id - non-0 32-bit integer used to identify a socket file +*/ +define memif_socket_filename_add_del_v2_reply +{ + u32 context; + i32 retval; + u32 socket_id; +}; + /** \brief Create memory interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request @@ -70,6 +106,8 @@ autoreply define memif_socket_filename_add_del */ define memif_create { + option deprecated; + u32 client_index; u32 context; @@ -94,6 +132,58 @@ define memif_create */ define memif_create_reply { + option deprecated; + + u32 context; + i32 retval; + vl_api_interface_index_t sw_if_index; +}; + +/** \brief Create memory interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param role - role of the interface in the connection (master/slave) + @param mode - interface mode + @param rx_queues - number of rx queues (only valid for slave) + @param tx_queues - number of tx queues (only valid for slave) + @param id - 32bit integer used to authenticate and match opposite sides + of the connection + @param socket_id - socket filename id to be used for connection + establishment + @param ring_size - the number of entries of RX/TX rings + @param buffer_size - size of the buffer allocated for each ring entry + @param no_zero_copy - if true, disable zero copy + @param use_dma - if true, use dma accelerate memory copy + @param hw_addr - interface MAC address + @param secret - optional, default is "", max length 24 +*/ +define memif_create_v2 +{ + u32 client_index; + u32 context; + + vl_api_memif_role_t role; /* 0 = master, 1 = slave */ + vl_api_memif_mode_t mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */ + u8 rx_queues; /* optional, default is 1 */ + u8 tx_queues; /* optional, default is 1 */ + u32 id; /* optional, default is 0 */ + u32 socket_id; /* optional, default is 0, "/var/vpp/memif.sock" */ + u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */ + u16 buffer_size; /* optional, default is 2048 bytes */ + bool no_zero_copy; /* disable zero copy */ + bool use_dma; /* use dma acceleration */ + vl_api_mac_address_t hw_addr; /* optional, randomly generated if zero */ + string secret[24]; /* optional, default is "", max length 24 */ + option vat_help = "[id <id>] [socket-id <id>] [ring_size <size>] [buffer_size <size>] [hw_addr <mac_address>] [secret <string>] [mode ip] <master|slave>"; +}; + +/** \brief Create memory interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request + @param sw_if_index - software index of the newly created interface +*/ +define memif_create_v2_reply +{ u32 context; i32 retval; vl_api_interface_index_t sw_if_index; diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index 9bbbe7f9d89..7e3dd44db2c 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -26,7 +26,6 @@ #include <sys/un.h> #include <sys/uio.h> #include <sys/mman.h> -#include <sys/prctl.h> #include <sys/eventfd.h> #include <inttypes.h> #include <limits.h> @@ -36,6 +35,7 @@ #include <vnet/plugin/plugin.h> #include <vnet/ethernet/ethernet.h> #include <vnet/interface/rx_queue_funcs.h> +#include <vnet/interface/tx_queue_funcs.h> #include <vpp/app/version.h> #include <memif/memif.h> #include <memif/private.h> @@ -49,6 +49,14 @@ memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) return 0; } +static clib_error_t * +memif_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi, + u32 flags) +{ + /* nothing for now */ + return 0; +} + static void memif_queue_intfd_close (memif_queue_t * mq) { @@ -91,6 +99,8 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) memif_region_t *mr; memif_queue_t *mq; int i; + vlib_main_t *vm = vlib_get_main (); + int with_barrier = 0; if (mif == 0) return; @@ -132,7 +142,12 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) clib_mem_free (mif->sock); } - /* *INDENT-OFF* */ + if (vlib_worker_thread_barrier_held () == 0) + { + with_barrier = 1; + vlib_worker_thread_barrier_sync (vm); + } + vec_foreach_index (i, mif->rx_queues) { mq = vec_elt_at_index (mif->rx_queues, i); @@ -146,9 +161,7 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) } } vnet_hw_if_unregister_all_rx_queues (vnm, mif->hw_if_index); - vnet_hw_if_update_runtime_data (vnm, mif->hw_if_index); - /* *INDENT-OFF* */ vec_foreach_index (i, mif->tx_queues) { mq = vec_elt_at_index (mif->tx_queues, i); @@ -158,9 +171,12 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) { memif_disconnect_free_zc_queue_buffer(mq, 0); } + clib_spinlock_free (&mq->lockp); } mq->ring = 0; } + vnet_hw_if_unregister_all_tx_queues (vnm, mif->hw_if_index); + vnet_hw_if_update_runtime_data (vnm, mif->hw_if_index); /* free tx and rx queues */ vec_foreach (mq, mif->rx_queues) @@ -182,11 +198,13 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err) if (mr->fd > -1) close (mr->fd); } - /* *INDENT-ON* */ vec_free (mif->regions); vec_free (mif->remote_name); vec_free (mif->remote_if_name); clib_fifo_free (mif->msg_queue); + + if (with_barrier) + vlib_worker_thread_barrier_release (vm); } static clib_error_t * @@ -228,19 +246,22 @@ memif_int_fd_read_ready (clib_file_t * uf) clib_error_t * memif_connect (memif_if_t * mif) { + memif_main_t *mm = &memif_main; vlib_main_t *vm = vlib_get_main (); vnet_main_t *vnm = vnet_get_main (); clib_file_t template = { 0 }; memif_region_t *mr; - int i; + int i, j; + u32 n_txqs = 0, n_threads = vlib_get_n_threads (); clib_error_t *err = NULL; + u8 max_log2_ring_sz = 0; + int with_barrier = 0; memif_log_debug (mif, "connect %u", mif->dev_instance); vec_free (mif->local_disc_string); vec_free (mif->remote_disc_string); - /* *INDENT-OFF* */ vec_foreach (mr, mif->regions) { if (mr->shm) @@ -259,15 +280,21 @@ memif_connect (memif_if_t * mif) goto error; } } - /* *INDENT-ON* */ template.read_function = memif_int_fd_read_ready; template.write_function = memif_int_fd_write_ready; - /* *INDENT-OFF* */ + with_barrier = 1; + if (vlib_worker_thread_barrier_held ()) + with_barrier = 0; + + if (with_barrier) + vlib_worker_thread_barrier_sync (vm); + vec_foreach_index (i, mif->tx_queues) { memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); + max_log2_ring_sz = clib_max (max_log2_ring_sz, mq->log2_ring_size); mq->ring = mif->regions[mq->region].shm + mq->offset; if (mq->ring->cookie != MEMIF_COOKIE) @@ -275,6 +302,50 @@ memif_connect (memif_if_t * mif) err = clib_error_return (0, "wrong cookie on tx ring %u", i); goto error; } + mq->queue_index = + vnet_hw_if_register_tx_queue (vnm, mif->hw_if_index, i); + clib_spinlock_init (&mq->lockp); + + if (mif->flags & MEMIF_IF_FLAG_USE_DMA) + { + memif_dma_info_t *dma_info; + mq->dma_head = 0; + mq->dma_tail = 0; + mq->dma_info_head = 0; + mq->dma_info_tail = 0; + mq->dma_info_size = MEMIF_DMA_INFO_SIZE; + vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE, + CLIB_CACHE_LINE_BYTES); + + vec_foreach (dma_info, mq->dma_info) + { + vec_validate_aligned (dma_info->data.desc_data, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_len, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_status, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.copy_ops, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.copy_ops); + vec_validate_aligned (dma_info->data.buffers, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.buffers); + } + } + } + + if (vec_len (mif->tx_queues) > 0) + { + n_txqs = vec_len (mif->tx_queues); + for (j = 0; j < n_threads; j++) + { + u32 qi = mif->tx_queues[j % n_txqs].queue_index; + vnet_hw_if_tx_queue_assign_thread (vnm, qi, j); + } } vec_foreach_index (i, mif->rx_queues) @@ -284,6 +355,8 @@ memif_connect (memif_if_t * mif) u32 qi; int rv; + max_log2_ring_sz = clib_max (max_log2_ring_sz, mq->log2_ring_size); + mq->ring = mif->regions[mq->region].shm + mq->offset; if (mq->ring->cookie != MEMIF_COOKIE) { @@ -293,6 +366,37 @@ memif_connect (memif_if_t * mif) qi = vnet_hw_if_register_rx_queue (vnm, mif->hw_if_index, i, VNET_HW_IF_RXQ_THREAD_ANY); mq->queue_index = qi; + + if (mif->flags & MEMIF_IF_FLAG_USE_DMA) + { + memif_dma_info_t *dma_info; + mq->dma_head = 0; + mq->dma_tail = 0; + mq->dma_info_head = 0; + mq->dma_info_tail = 0; + mq->dma_info_size = MEMIF_DMA_INFO_SIZE; + vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE, + CLIB_CACHE_LINE_BYTES); + vec_foreach (dma_info, mq->dma_info) + { + vec_validate_aligned (dma_info->data.desc_data, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_len, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_status, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.copy_ops, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.copy_ops); + vec_validate_aligned (dma_info->data.buffers, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.buffers); + } + } + if (mq->int_fd > -1) { template.file_descriptor = mq->int_fd; @@ -324,7 +428,23 @@ memif_connect (memif_if_t * mif) vnet_hw_if_rx_queue_set_int_pending (vnm, qi); } } - /* *INDENT-ON* */ + + if (1 << max_log2_ring_sz > vec_len (mm->per_thread_data[0].desc_data)) + { + memif_per_thread_data_t *ptd; + + vec_foreach (ptd, mm->per_thread_data) + { + vec_validate_aligned (ptd->desc_data, pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (ptd->desc_len, pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (ptd->desc_status, pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + } + } + if (with_barrier) + vlib_worker_thread_barrier_release (vm); mif->flags &= ~MEMIF_IF_FLAG_CONNECTING; mif->flags |= MEMIF_IF_FLAG_CONNECTED; @@ -334,6 +454,8 @@ memif_connect (memif_if_t * mif) return 0; error: + if (with_barrier) + vlib_worker_thread_barrier_release (vm); memif_log_err (mif, "%U", format_clib_error, err); return err; } @@ -405,7 +527,6 @@ memif_init_regions_and_queues (memif_if_t * mif) if (mif->flags & MEMIF_IF_FLAG_ZERO_COPY) { vlib_buffer_pool_t *bp; - /* *INDENT-OFF* */ vec_foreach (bp, vm->buffer_main->buffer_pools) { vlib_physmem_map_t *pm; @@ -416,7 +537,6 @@ memif_init_regions_and_queues (memif_if_t * mif) r->shm = pm->base; r->is_external = 1; } - /* *INDENT-ON* */ } for (i = 0; i < mif->run.num_s2m_rings; i++) @@ -461,7 +581,6 @@ memif_init_regions_and_queues (memif_if_t * mif) vec_validate_aligned (mif->tx_queues, mif->run.num_s2m_rings - 1, CLIB_CACHE_LINE_BYTES); - /* *INDENT-OFF* */ vec_foreach_index (i, mif->tx_queues) { memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i); @@ -470,6 +589,7 @@ memif_init_regions_and_queues (memif_if_t * mif) err = clib_error_return_unix (0, "eventfd[tx queue %u]", i); goto error; } + mq->int_clib_file_index = ~0; mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i); mq->log2_ring_size = mif->cfg.log2_ring_size; @@ -481,13 +601,11 @@ memif_init_regions_and_queues (memif_if_t * mif) vec_validate_aligned (mq->buffers, 1 << mq->log2_ring_size, CLIB_CACHE_LINE_BYTES); } - /* *INDENT-ON* */ ASSERT (mif->rx_queues == 0); vec_validate_aligned (mif->rx_queues, mif->run.num_m2s_rings - 1, CLIB_CACHE_LINE_BYTES); - /* *INDENT-OFF* */ vec_foreach_index (i, mif->rx_queues) { memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i); @@ -507,7 +625,6 @@ memif_init_regions_and_queues (memif_if_t * mif) vec_validate_aligned (mq->buffers, 1 << mq->log2_ring_size, CLIB_CACHE_LINE_BYTES); } - /* *INDENT-ON* */ return 0; @@ -558,7 +675,6 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) } last_run_duration = start_time = vlib_time_now (vm); - /* *INDENT-OFF* */ pool_foreach (mif, mm->interfaces) { memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index); @@ -583,8 +699,8 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { clib_memset (sock, 0, sizeof(clib_socket_t)); sock->config = (char *) msf->filename; - sock->flags = CLIB_SOCKET_F_IS_CLIENT | CLIB_SOCKET_F_SEQPACKET | - CLIB_SOCKET_F_BLOCKING; + sock->is_seqpacket = 1; + sock->is_blocking = 1; if ((err = clib_socket_init (sock))) { @@ -611,162 +727,160 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) } } } - /* *INDENT-ON* */ last_run_duration = vlib_time_now (vm) - last_run_duration; } return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (memif_process_node,static) = { .function = memif_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "memif-process", }; -/* *INDENT-ON* */ -static int -memif_add_socket_file (u32 sock_id, u8 * socket_filename) +/* + * Returns an unused socket id, and ~0 if it can't find one. + */ +u32 +memif_get_unused_socket_id () { memif_main_t *mm = &memif_main; uword *p; - memif_socket_file_t *msf; + int i, j; - p = hash_get (mm->socket_file_index_by_sock_id, sock_id); - if (p) + static u32 seed = 0; + /* limit to 1M tries */ + for (j = 0; j < 1 << 10; j++) { - msf = pool_elt_at_index (mm->socket_files, *p); - if (strcmp ((char *) msf->filename, (char *) socket_filename) == 0) + seed = random_u32 (&seed); + for (i = 0; i < 1 << 10; i++) { - /* Silently accept identical "add". */ - return 0; + /* look around randomly generated id */ + seed += (2 * (i % 2) - 1) * i; + if (seed == (u32) ~0) + continue; + p = hash_get (mm->socket_file_index_by_sock_id, seed); + if (!p) + return seed; } - - /* But don't allow a direct add of a different filename. */ - return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; } - pool_get (mm->socket_files, msf); - clib_memset (msf, 0, sizeof (memif_socket_file_t)); - - msf->filename = socket_filename; - msf->socket_id = sock_id; - - hash_set (mm->socket_file_index_by_sock_id, sock_id, - msf - mm->socket_files); - - return 0; + return ~0; } -static int -memif_delete_socket_file (u32 sock_id) +clib_error_t * +memif_socket_filename_add_del (u8 is_add, u32 sock_id, char *sock_filename) { memif_main_t *mm = &memif_main; uword *p; memif_socket_file_t *msf; + clib_error_t *err = 0; + char *dir = 0, *tmp; + u32 idx = 0; + u8 *name = 0; - p = hash_get (mm->socket_file_index_by_sock_id, sock_id); - if (!p) - { - /* Don't delete non-existent entries. */ - return VNET_API_ERROR_INVALID_ARGUMENT; - } + /* allow adding socket id 0 */ + if (sock_id == 0 && is_add == 0) + return vnet_error (VNET_ERR_INVALID_ARGUMENT, "cannot delete socket id 0"); - msf = pool_elt_at_index (mm->socket_files, *p); - if (msf->ref_cnt > 0) + if (sock_id == ~0) + return vnet_error (VNET_ERR_INVALID_ARGUMENT, + "socked id is not specified"); + + if (is_add == 0) { - return VNET_API_ERROR_UNEXPECTED_INTF_STATE; - } + p = hash_get (mm->socket_file_index_by_sock_id, sock_id); + if (!p) + /* Don't delete non-existent entries. */ + return vnet_error (VNET_ERR_INVALID_ARGUMENT, + "socket file with id %u does not exist", sock_id); - vec_free (msf->filename); - pool_put (mm->socket_files, msf); + msf = pool_elt_at_index (mm->socket_files, *p); + if (msf->ref_cnt > 0) + return vnet_error (VNET_ERR_UNEXPECTED_INTF_STATE, + "socket file '%s' is in use", msf->filename); - hash_unset (mm->socket_file_index_by_sock_id, sock_id); + vec_free (msf->filename); + pool_put (mm->socket_files, msf); - return 0; -} - -int -memif_socket_filename_add_del (u8 is_add, u32 sock_id, u8 * sock_filename) -{ - char *dir = 0, *tmp; - u32 idx = 0; + hash_unset (mm->socket_file_index_by_sock_id, sock_id); - /* allow adding socket id 0 */ - if ((sock_id == 0 && is_add == 0) || sock_id == ~0) - { - return VNET_API_ERROR_INVALID_ARGUMENT; + return 0; } - if (is_add == 0) + if (sock_filename == 0 || sock_filename[0] == 0) + return vnet_error (VNET_ERR_INVALID_ARGUMENT, + "socket filename not specified"); + + if (clib_socket_prefix_is_valid (sock_filename)) { - return memif_delete_socket_file (sock_id); + name = format (0, "%s%c", sock_filename, 0); } - - if (sock_filename == 0 || sock_filename[0] == 0) + else if (sock_filename[0] == '/') { - return VNET_API_ERROR_INVALID_ARGUMENT; + name = format (0, "%s%c", sock_filename, 0); } - - if (sock_filename[0] != '/') + else { - clib_error_t *error; - /* copy runtime dir path */ vec_add (dir, vlib_unix_get_runtime_dir (), strlen (vlib_unix_get_runtime_dir ())); vec_add1 (dir, '/'); /* if sock_filename contains dirs, add them to path */ - tmp = strrchr ((char *) sock_filename, '/'); + tmp = strrchr (sock_filename, '/'); if (tmp) { - idx = tmp - (char *) sock_filename; + idx = tmp - sock_filename; vec_add (dir, sock_filename, idx); } vec_add1 (dir, '\0'); /* create socket dir */ - error = vlib_unix_recursive_mkdir (dir); - if (error) + if ((err = vlib_unix_recursive_mkdir (dir))) { - clib_error_free (error); - return VNET_API_ERROR_SYSCALL_ERROR_1; + clib_error_free (err); + err = vnet_error (VNET_ERR_SYSCALL_ERROR_1, + "unable to create socket dir"); + goto done; } - sock_filename = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (), - sock_filename, 0); + name = + format (0, "%s/%s%c", vlib_unix_get_runtime_dir (), sock_filename, 0); } - else - { - sock_filename = vec_dup (sock_filename); - /* check if directory exists */ - tmp = strrchr ((char *) sock_filename, '/'); - if (tmp) + p = hash_get (mm->socket_file_index_by_sock_id, sock_id); + if (p) + { + msf = pool_elt_at_index (mm->socket_files, *p); + if (strcmp ((char *) msf->filename, (char *) name) == 0) { - idx = tmp - (char *) sock_filename; - vec_add (dir, sock_filename, idx); - vec_add1 (dir, '\0'); + /* Silently accept identical "add". */ + goto done; } - /* check dir existance and access rights for effective user/group IDs */ - if ((dir == NULL) - || - (faccessat ( /* ignored */ -1, dir, F_OK | R_OK | W_OK, AT_EACCESS) - < 0)) - { - vec_free (dir); - return VNET_API_ERROR_INVALID_ARGUMENT; - } + /* But don't allow a direct add of a different filename. */ + err = vnet_error (VNET_ERR_ENTRY_ALREADY_EXISTS, "entry already exists"); + goto done; } - vec_free (dir); - return memif_add_socket_file (sock_id, sock_filename); + pool_get (mm->socket_files, msf); + clib_memset (msf, 0, sizeof (memif_socket_file_t)); + + msf->filename = name; + msf->socket_id = sock_id; + name = 0; + + hash_set (mm->socket_file_index_by_sock_id, sock_id, msf - mm->socket_files); + +done: + vec_free (name); + vec_free (dir); + return err; } -int -memif_delete_if (vlib_main_t * vm, memif_if_t * mif) +clib_error_t * +memif_delete_if (vlib_main_t *vm, memif_if_t *mif) { vnet_main_t *vnm = vnet_get_main (); memif_main_t *mm = &memif_main; @@ -797,7 +911,6 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif) } /* free interface data structures */ - clib_spinlock_free (&mif->lockp); mhash_unset (&msf->dev_instance_by_id, &mif->id, 0); /* remove socket file */ @@ -806,10 +919,8 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif) if (msf->is_listener) { int i; - /* *INDENT-OFF* */ vec_foreach_index (i, msf->pending_clients) memif_socket_close (msf->pending_clients + i); - /* *INDENT-ON* */ memif_socket_close (&msf->sock); vec_free (msf->pending_clients); } @@ -827,6 +938,7 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif) } } + vec_free (mif->local_disc_string); clib_memset (mif, 0, sizeof (*mif)); pool_put (mm->interfaces, mif); @@ -837,33 +949,39 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif) return 0; } -/* *INDENT-OFF* */ -VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) = -{ +VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) = { .name = "memif-ip", .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, + .tx_hash_fn_type = VNET_HASH_FN_TYPE_IP, }; -/* *INDENT-ON* */ -int -memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) +static void +memif_prepare_dma_args (vlib_dma_config_t *args) +{ + args->max_batches = 256; + args->max_transfer_size = VLIB_BUFFER_DEFAULT_DATA_SIZE; + args->barrier_before_last = 1; + args->sw_fallback = 1; + args->callback_fn = NULL; +} + +clib_error_t * +memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args) { memif_main_t *mm = &memif_main; vlib_thread_main_t *tm = vlib_get_thread_main (); vnet_main_t *vnm = vnet_get_main (); + vnet_eth_interface_registration_t eir = {}; memif_if_t *mif = 0; vnet_sw_interface_t *sw; - clib_error_t *error = 0; - int ret = 0; uword *p; - vnet_hw_interface_t *hw; memif_socket_file_t *msf = 0; - int rv = 0; + clib_error_t *err = 0; p = hash_get (mm->socket_file_index_by_sock_id, args->socket_id); if (p == 0) { - rv = VNET_API_ERROR_INVALID_ARGUMENT; + err = vnet_error (VNET_ERR_INVALID_ARGUMENT, "unknown socket id"); goto done; } @@ -874,14 +992,17 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) { if ((!msf->is_listener != !args->is_master)) { - rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + err = + vnet_error (VNET_ERR_SUBIF_ALREADY_EXISTS, + "socket file cannot be used by both master and slave"); goto done; } p = mhash_get (&msf->dev_instance_by_id, &args->id); if (p) { - rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + err = vnet_error (VNET_ERR_SUBIF_ALREADY_EXISTS, + "interface already exists"); goto done; } } @@ -889,25 +1010,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) /* Create new socket file */ if (msf->ref_cnt == 0) { - struct stat file_stat; - - /* If we are creating listener make sure file doesn't exist or if it - * exists thn delete it if it is old socket file */ - if (args->is_master && (stat ((char *) msf->filename, &file_stat) == 0)) - { - if (S_ISSOCK (file_stat.st_mode)) - { - unlink ((char *) msf->filename); - } - else - { - error = clib_error_return (0, "File exists for %s", - msf->filename); - rv = VNET_API_ERROR_VALUE_EXIST; - goto done; - } - } - mhash_init (&msf->dev_instance_by_id, sizeof (uword), sizeof (memif_interface_id_t)); msf->dev_instance_by_fd = hash_create (0, sizeof (uword)); @@ -933,8 +1035,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) bt->total_length_not_including_first_buffer = 0; vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0; - /* initially prealloc copy_ops so we can use - _vec_len instead of vec_elen */ vec_validate_aligned (ptd->copy_ops, 0, CLIB_CACHE_LINE_BYTES); vec_reset_length (ptd->copy_ops); vec_validate_aligned (ptd->buffers, 0, CLIB_CACHE_LINE_BYTES); @@ -952,8 +1052,19 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) if (args->secret) mif->secret = vec_dup (args->secret); - if (tm->n_vlib_mains > 1) - clib_spinlock_init (&mif->lockp); + /* register dma config if enabled */ + if (args->use_dma) + { + vlib_dma_config_t dma_args; + bzero (&dma_args, sizeof (dma_args)); + memif_prepare_dma_args (&dma_args); + + dma_args.max_transfers = 1 << args->log2_ring_size; + dma_args.callback_fn = memif_dma_completion_cb; + mif->dma_input_config = vlib_dma_config_add (vm, &dma_args); + dma_args.callback_fn = memif_tx_dma_completion_cb; + mif->dma_tx_config = vlib_dma_config_add (vm, &dma_args); + } if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) { @@ -969,10 +1080,13 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) args->hw_addr[0] = 2; args->hw_addr[1] = 0xfe; } - error = ethernet_register_interface (vnm, memif_device_class.index, - mif->dev_instance, args->hw_addr, - &mif->hw_if_index, - memif_eth_flag_change); + + eir.dev_class_index = memif_device_class.index; + eir.dev_instance = mif->dev_instance; + eir.address = args->hw_addr; + eir.cb.flag_change = memif_eth_flag_change; + eir.cb.set_max_frame_size = memif_eth_set_max_frame_size; + mif->hw_if_index = vnet_eth_register_interface (vnm, &eir); } else if (mif->mode == MEMIF_INTERFACE_MODE_IP) { @@ -983,11 +1097,9 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->dev_instance); } else - error = clib_error_return (0, "unsupported interface mode"); - - if (error) { - ret = VNET_API_ERROR_SYSCALL_ERROR_2; + err = + vnet_error (VNET_ERR_SYSCALL_ERROR_2, "unsupported interface mode"); goto error; } @@ -1006,7 +1118,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) /* If this is new one, start listening */ if (msf->is_listener && msf->ref_cnt == 0) { - struct stat file_stat; clib_socket_t *s = clib_mem_alloc (sizeof (clib_socket_t)); ASSERT (msf->sock == 0); @@ -1014,19 +1125,15 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) clib_memset (s, 0, sizeof (clib_socket_t)); s->config = (char *) msf->filename; - s->flags = CLIB_SOCKET_F_IS_SERVER | - CLIB_SOCKET_F_ALLOW_GROUP_WRITE | - CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED; + s->local_only = 1; + s->is_server = 1; + s->allow_group_write = 1; + s->is_seqpacket = 1; + s->passcred = 1; - if ((error = clib_socket_init (s))) + if ((err = clib_socket_init (s))) { - ret = VNET_API_ERROR_SYSCALL_ERROR_4; - goto error; - } - - if (stat ((char *) msf->filename, &file_stat) == -1) - { - ret = VNET_API_ERROR_SYSCALL_ERROR_8; + err->code = VNET_ERR_SYSCALL_ERROR_4; goto error; } @@ -1047,8 +1154,10 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) mif->flags |= MEMIF_IF_FLAG_ZERO_COPY; } - hw = vnet_get_hw_interface (vnm, mif->hw_if_index); - hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE; + if (args->use_dma) + mif->flags |= MEMIF_IF_FLAG_USE_DMA; + + vnet_hw_if_set_caps (vnm, mif->hw_if_index, VNET_HW_IF_CAP_INT_MODE); vnet_hw_if_set_input_node (vnm, mif->hw_if_index, memif_input_node.index); mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0); @@ -1061,15 +1170,12 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args) error: memif_delete_if (vm, mif); - if (error) - { - memif_log_err (mif, "%U", format_clib_error, error); - clib_error_free (error); - } - return ret; + if (err) + memif_log_err (mif, "%U", format_clib_error, err); + return err; done: - return rv; + return err; } clib_error_t * @@ -1081,7 +1187,14 @@ memif_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags) static clib_error_t *error = 0; if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) - mif->flags |= MEMIF_IF_FLAG_ADMIN_UP; + { + if (mif->flags & MEMIF_IF_FLAG_CONNECTED) + { + vnet_hw_interface_set_flags (vnm, mif->hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + } + mif->flags |= MEMIF_IF_FLAG_ADMIN_UP; + } else mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP; @@ -1108,19 +1221,15 @@ memif_init (vlib_main_t * vm) * for socket-id 0 to MEMIF_DEFAULT_SOCKET_FILENAME in the * default run-time directory. */ - memif_socket_filename_add_del (1, 0, (u8 *) MEMIF_DEFAULT_SOCKET_FILENAME); - - return 0; + return memif_socket_filename_add_del (1, 0, MEMIF_DEFAULT_SOCKET_FILENAME); } VLIB_INIT_FUNCTION (memif_init); -/* *INDENT-OFF* */ VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, .description = "Packet Memory Interface (memif) -- Experimental", }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c index a50e7ce8882..16a52ffc0dc 100644 --- a/src/plugins/memif/memif_api.c +++ b/src/plugins/memif/memif_api.c @@ -48,8 +48,6 @@ void memif_main_t *mm = &memif_main; u8 is_add; u32 socket_id; - u32 len; - u8 *socket_filename; vl_api_memif_socket_filename_add_del_reply_t *rmp; int rv; @@ -65,24 +63,52 @@ void } /* socket filename */ - socket_filename = 0; mp->socket_filename[ARRAY_LEN (mp->socket_filename) - 1] = 0; - len = strlen ((char *) mp->socket_filename); - if (mp->is_add) + + rv = vnet_get_api_error_and_free (memif_socket_filename_add_del ( + is_add, socket_id, (char *) mp->socket_filename)); + +reply: + REPLY_MACRO (VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_REPLY); +} + +/** + * @brief Message handler for memif_socket_filename_add_del API. + * @param mp the vl_api_memif_socket_filename_add_del_t API message + */ +void +vl_api_memif_socket_filename_add_del_v2_t_handler ( + vl_api_memif_socket_filename_add_del_v2_t *mp) +{ + vl_api_memif_socket_filename_add_del_v2_reply_t *rmp; + memif_main_t *mm = &memif_main; + char *socket_filename = 0; + u32 socket_id; + int rv; + + /* socket_id */ + socket_id = clib_net_to_host_u32 (mp->socket_id); + if (socket_id == 0) { - vec_validate (socket_filename, len); - memcpy (socket_filename, mp->socket_filename, len); + rv = VNET_API_ERROR_INVALID_ARGUMENT; + goto reply; } - rv = memif_socket_filename_add_del (is_add, socket_id, socket_filename); + /* socket filename */ + socket_filename = vl_api_from_api_to_new_c_string (&mp->socket_filename); + if (mp->is_add && socket_id == (u32) ~0) + socket_id = memif_get_unused_socket_id (); + + rv = vnet_get_api_error_and_free ( + memif_socket_filename_add_del (mp->is_add, socket_id, socket_filename)); vec_free (socket_filename); reply: - REPLY_MACRO (VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_REPLY); + REPLY_MACRO2 (VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_V2_REPLY, + ({ rmp->socket_id = htonl (socket_id); })); } - /** * @brief Message handler for memif_create API. * @param mp vl_api_memif_create_t * mp the api message @@ -164,17 +190,107 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp) args.hw_addr_set = 1; } - rv = memif_create_if (vm, &args); + rv = vnet_get_api_error_and_free (memif_create_if (vm, &args)); vec_free (args.secret); reply: - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_MEMIF_CREATE_REPLY, ({ rmp->sw_if_index = htonl (args.sw_if_index); })); - /* *INDENT-ON* */ +} + +/** + * @brief Message handler for memif_create_v2 API. + * @param mp vl_api_memif_create_v2_t * mp the api message + */ +void +vl_api_memif_create_v2_t_handler (vl_api_memif_create_v2_t *mp) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + vl_api_memif_create_reply_t *rmp; + memif_create_if_args_t args = { 0 }; + u32 ring_size = MEMIF_DEFAULT_RING_SIZE; + static const u8 empty_hw_addr[6]; + int rv = 0; + mac_address_t mac; + + /* id */ + args.id = clib_net_to_host_u32 (mp->id); + + /* socket-id */ + args.socket_id = clib_net_to_host_u32 (mp->socket_id); + + /* secret */ + mp->secret[ARRAY_LEN (mp->secret) - 1] = 0; + if (strlen ((char *) mp->secret) > 0) + { + vec_validate (args.secret, strlen ((char *) mp->secret)); + strncpy ((char *) args.secret, (char *) mp->secret, + vec_len (args.secret)); + } + + /* role */ + args.is_master = (ntohl (mp->role) == MEMIF_ROLE_API_MASTER); + + /* mode */ + args.mode = ntohl (mp->mode); + + args.is_zero_copy = mp->no_zero_copy ? 0 : 1; + + args.use_dma = mp->use_dma; + + /* rx/tx queues */ + if (args.is_master == 0) + { + args.rx_queues = MEMIF_DEFAULT_RX_QUEUES; + args.tx_queues = MEMIF_DEFAULT_TX_QUEUES; + if (mp->rx_queues) + { + args.rx_queues = mp->rx_queues; + } + if (mp->tx_queues) + { + args.tx_queues = mp->tx_queues; + } + } + + /* ring size */ + if (mp->ring_size) + { + ring_size = ntohl (mp->ring_size); + } + if (!is_pow2 (ring_size)) + { + rv = VNET_API_ERROR_INVALID_ARGUMENT; + goto reply; + } + args.log2_ring_size = min_log2 (ring_size); + + /* buffer size */ + args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE; + if (mp->buffer_size) + { + args.buffer_size = ntohs (mp->buffer_size); + } + + /* MAC address */ + mac_address_decode (mp->hw_addr, &mac); + if (memcmp (&mac, empty_hw_addr, 6) != 0) + { + memcpy (args.hw_addr, &mac, 6); + args.hw_addr_set = 1; + } + + rv = vnet_api_error (memif_create_if (vm, &args)); + + vec_free (args.secret); + +reply: + REPLY_MACRO2 (VL_API_MEMIF_CREATE_V2_REPLY, + ({ rmp->sw_if_index = htonl (args.sw_if_index); })); } /** @@ -201,7 +317,7 @@ vl_api_memif_delete_t_handler (vl_api_memif_delete_t * mp) else { mif = pool_elt_at_index (mm->interfaces, hi->dev_instance); - rv = memif_delete_if (vm, mif); + rv = vnet_get_api_error_and_free (memif_delete_if (vm, mif)); } REPLY_MACRO (VL_API_MEMIF_DELETE_REPLY); @@ -279,7 +395,6 @@ vl_api_memif_dump_t_handler (vl_api_memif_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ pool_foreach (mif, mm->interfaces) { swif = vnet_get_sw_interface (vnm, mif->sw_if_index); @@ -289,9 +404,8 @@ vl_api_memif_dump_t_handler (vl_api_memif_dump_t * mp) vnm, swif, 0); send_memif_details (reg, mif, swif, if_name, mp->context); - _vec_len (if_name) = 0; + vec_set_len (if_name, 0); } - /* *INDENT-ON* */ vec_free (if_name); } @@ -335,7 +449,6 @@ void if (!reg) return; - /* *INDENT-OFF* */ hash_foreach (sock_id, msf_idx, mm->socket_file_index_by_sock_id, ({ memif_socket_file_t *msf; @@ -345,7 +458,6 @@ void filename = msf->filename; send_memif_socket_filename_details(reg, sock_id, filename, mp->context); })); - /* *INDENT-ON* */ } /* Set up the API message handling tables */ diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c index 1ec6703d135..d3290ac0340 100644 --- a/src/plugins/memif/memif_test.c +++ b/src/plugins/memif/memif_test.c @@ -33,8 +33,7 @@ #include <vnet/format_fns.h> #include <memif/memif.api_enum.h> #include <memif/memif.api_types.h> -#include <vpp/api/vpe.api_types.h> -//#include <vnet/ethernet/ethernet_types.api_types.h> +#include <vlibmemory/vlib.api_types.h> typedef struct { @@ -122,6 +121,86 @@ api_memif_socket_filename_add_del (vat_main_t * vam) return ret; } +/* memif_socket_filename_add_del API */ +static int +api_memif_socket_filename_add_del_v2 (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + vl_api_memif_socket_filename_add_del_v2_t *mp; + u8 is_add; + u32 socket_id; + u8 *socket_filename; + int ret; + + is_add = 1; + socket_id = ~0; + socket_filename = 0; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "id %u", &socket_id)) + ; + else if (unformat (i, "filename %s", &socket_filename)) + ; + else if (unformat (i, "del")) + is_add = 0; + else if (unformat (i, "add")) + is_add = 1; + else + { + vec_free (socket_filename); + clib_warning ("unknown input `%U'", format_unformat_error, i); + return -99; + } + } + + if (socket_id == 0 || socket_id == ~0) + { + vec_free (socket_filename); + errmsg ("Invalid socket id"); + return -99; + } + + if (is_add && (!socket_filename || *socket_filename == 0)) + { + vec_free (socket_filename); + errmsg ("Invalid socket filename"); + return -99; + } + + M2 (MEMIF_SOCKET_FILENAME_ADD_DEL_V2, mp, strlen ((char *) socket_filename)); + + mp->is_add = is_add; + mp->socket_id = htonl (socket_id); + char *p = (char *) &mp->socket_filename; + p += vl_api_vec_to_api_string (socket_filename, (vl_api_string_t *) p); + + vec_free (socket_filename); + + S (mp); + W (ret); + + return ret; +} + +/* memif socket-create reply handler */ +static void +vl_api_memif_socket_filename_add_del_v2_reply_t_handler ( + vl_api_memif_socket_filename_add_del_v2_reply_t *mp) +{ + vat_main_t *vam = memif_test_main.vat_main; + i32 retval = ntohl (mp->retval); + + if (retval == 0) + { + fformat (vam->ofp, "created memif socket with socket_id %d\n", + ntohl (mp->socket_id)); + } + + vam->retval = retval; + vam->result_ready = 1; +} + /* memif_socket_filename_add_del reply handler */ #define VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_REPLY_T_HANDLER static void vl_api_memif_socket_filename_add_del_reply_t_handler @@ -246,6 +325,120 @@ static void vl_api_memif_create_reply_t_handler vam->regenerate_interface_table = 1; } +/* memif-create_v2 API */ +static int +api_memif_create_v2 (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + vl_api_memif_create_v2_t *mp; + u32 id = 0; + u32 socket_id = 0; + u8 *secret = 0; + u8 role = 1; + u32 ring_size = 0; + u8 use_dma = 0; + u32 buffer_size = 0; + u8 hw_addr[6] = { 0 }; + u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES; + u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES; + int ret; + u8 mode = MEMIF_INTERFACE_MODE_ETHERNET; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "id %u", &id)) + ; + else if (unformat (i, "socket-id %u", &socket_id)) + ; + else if (unformat (i, "secret %s", &secret)) + ; + else if (unformat (i, "ring_size %u", &ring_size)) + ; + else if (unformat (i, "buffer_size %u", &buffer_size)) + ; + else if (unformat (i, "master")) + role = 0; + else if (unformat (i, "use_dma %u", &use_dma)) + ; + else if (unformat (i, "slave %U", unformat_memif_queues, &rx_queues, + &tx_queues)) + role = 1; + else if (unformat (i, "mode ip")) + mode = MEMIF_INTERFACE_MODE_IP; + else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr)) + ; + else + { + clib_warning ("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + if (socket_id == ~0) + { + errmsg ("invalid socket-id\n"); + return -99; + } + + if (!is_pow2 (ring_size)) + { + errmsg ("ring size must be power of 2\n"); + return -99; + } + + if (rx_queues > 255 || rx_queues < 1) + { + errmsg ("rx queue must be between 1 - 255\n"); + return -99; + } + + if (tx_queues > 255 || tx_queues < 1) + { + errmsg ("tx queue must be between 1 - 255\n"); + return -99; + } + + M2 (MEMIF_CREATE, mp, strlen ((char *) secret)); + + mp->mode = mode; + mp->id = clib_host_to_net_u32 (id); + mp->role = role; + mp->use_dma = use_dma; + mp->ring_size = clib_host_to_net_u32 (ring_size); + mp->buffer_size = clib_host_to_net_u16 (buffer_size & 0xffff); + mp->socket_id = clib_host_to_net_u32 (socket_id); + + char *p = (char *) &mp->secret; + p += vl_api_vec_to_api_string (secret, (vl_api_string_t *) p); + vec_free (secret); + + memcpy (mp->hw_addr, hw_addr, 6); + mp->rx_queues = rx_queues; + mp->tx_queues = tx_queues; + + S (mp); + W (ret); + return ret; +} + +/* memif-create_v2 reply handler */ +static void +vl_api_memif_create_v2_reply_t_handler (vl_api_memif_create_reply_t *mp) +{ + vat_main_t *vam = memif_test_main.vat_main; + i32 retval = ntohl (mp->retval); + + if (retval == 0) + { + fformat (vam->ofp, "created memif with sw_if_index %d\n", + ntohl (mp->sw_if_index)); + } + + vam->retval = retval; + vam->result_ready = 1; + vam->regenerate_interface_table = 1; +} + /* memif-delete API */ static int api_memif_delete (vat_main_t * vam) diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index 794e6b713bf..70933f4aa9d 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -30,6 +30,8 @@ #include <memif/memif.h> #include <memif/private.h> +#define MEMIF_IP_OFFSET 14 + #define foreach_memif_input_error \ _ (BUFFER_ALLOC_FAIL, buffer_alloc, ERROR, "buffer allocation failed") \ _ (BAD_DESC, bad_desc, ERROR, "bad descriptor") \ @@ -140,96 +142,141 @@ memif_add_to_chain (vlib_main_t * vm, vlib_buffer_t * b, u32 * buffers, } } -static_always_inline uword -memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, - memif_if_t *mif, memif_ring_type_t type, u16 qid, - memif_interface_mode_t mode) +static_always_inline u16 +memif_parse_desc (memif_per_thread_data_t *ptd, memif_if_t *mif, + memif_queue_t *mq, u16 next, u16 n_avail) { - vnet_main_t *vnm = vnet_get_main (); - memif_main_t *mm = &memif_main; - memif_ring_t *ring; - memif_queue_t *mq; - u16 buffer_size = vlib_buffer_get_default_data_size (vm); - uword n_trace; - u16 nexts[MEMIF_RX_VECTOR_SZ], *next = nexts; - u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi; - u32 n_rx_packets = 0, n_rx_bytes = 0; - u32 n_left, n_left_to_next; - u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - vlib_buffer_t *b0, *b1, *b2, *b3; - u32 thread_index = vm->thread_index; - memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data, - thread_index); - vlib_buffer_t bt; - u16 cur_slot, last_slot, ring_size, n_slots, mask; - i16 start_offset; - u16 n_buffers = 0, n_alloc; - memif_copy_op_t *co; - memif_packet_op_t *po; - memif_region_index_t last_region = ~0; - void *last_region_shm = 0; - void *last_region_max = 0; + memif_ring_t *ring = mq->ring; + memif_desc_t *descs = ring->desc; + void **desc_data = ptd->desc_data; + u16 *desc_len = ptd->desc_len; + memif_desc_status_t *desc_status = ptd->desc_status; + u16 n_desc = 0, n_pkts = 0; + u32 i = 0; + u16 mask = pow2_mask (mq->log2_ring_size); + memif_desc_t *d = 0; + u32 slot = next; + + while (i < n_avail) + { + u8 flags; + d = descs + (slot++ & mask); + desc_data[i] = (void *) ((u64) d->region << 32 | d->offset); + desc_len[i] = d->length; + desc_status[i].as_u8 = flags = d->flags; + i++; + if (PREDICT_FALSE ((flags & MEMIF_DESC_FLAG_NEXT)) == 0) + { + n_desc = i; + if (++n_pkts == MEMIF_RX_VECTOR_SZ) + goto frame_full; + } + } +frame_full: - mq = vec_elt_at_index (mif->rx_queues, qid); - ring = mq->ring; - ring_size = 1 << mq->log2_ring_size; - mask = ring_size - 1; + /* done */ + ptd->n_packets = n_pkts; + return n_desc; +} - /* assume that somebody will want to add ethernet header on the packet - so start with IP header at offset 14 */ - start_offset = (mode == MEMIF_INTERFACE_MODE_IP) ? 14 : 0; +static_always_inline void +memif_desc_status_set_err (memif_desc_status_t *p, + memif_desc_status_err_code_t e) +{ + memif_desc_status_t s = { .err = 1, .err_code = e }; + p->as_u8 |= s.as_u8; +} - /* for S2M rings, we are consumers of packet buffers, and for M2S rings we - are producers of empty buffers */ - cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail; +static_always_inline void +memif_validate_desc_data (memif_per_thread_data_t *ptd, memif_if_t *mif, + u16 n_desc, int is_ethernet) +{ + void **desc_data = ptd->desc_data; + u16 *desc_len = ptd->desc_len; + memif_desc_status_t *desc_status = ptd->desc_status; + u16 n_regions = vec_len (mif->regions); + u32 n_rx_bytes = 0; + u16 max_len = 0; + u8 xor_status = 0; + + for (u32 i = 0; i < n_desc; i++) + { + u16 region = ((u64) desc_data[i]) >> 32; + u32 offset = (u64) desc_data[i]; + u16 len = desc_len[i]; + memif_region_t *r = mif->regions + region; + + if (region >= n_regions) + memif_desc_status_set_err (desc_status + i, + MEMIF_DESC_STATUS_ERR_BAD_REGION); + else if (offset + len > r->region_size) + memif_desc_status_set_err (desc_status + i, + MEMIF_DESC_STATUS_ERR_REGION_OVERRUN); + else if (is_ethernet && len > ETHERNET_MAX_PACKET_BYTES) + memif_desc_status_set_err (desc_status + i, + MEMIF_DESC_STATUS_ERR_DATA_TOO_BIG); + else if (len == 0) + memif_desc_status_set_err (desc_status + i, + MEMIF_DESC_STATUS_ERR_ZERO_LENGTH); + else + { + desc_data[i] = r->shm + offset; + if (len > max_len) + max_len = len; + n_rx_bytes += len; + } + xor_status |= desc_status[i].as_u8; + } - if (type == MEMIF_RING_S2M) - last_slot = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE); - else - last_slot = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE); + ptd->max_desc_len = max_len; + ptd->xor_status = xor_status; + ptd->n_rx_bytes = n_rx_bytes; +} - if (cur_slot == last_slot) - goto refill; - n_slots = last_slot - cur_slot; +static_always_inline u32 +memif_process_desc (vlib_main_t *vm, vlib_node_runtime_t *node, + memif_per_thread_data_t *ptd, memif_if_t *mif) +{ + u16 buffer_size = vlib_buffer_get_default_data_size (vm); + int is_ip = mif->mode == MEMIF_INTERFACE_MODE_IP; + i16 start_offset = (is_ip) ? MEMIF_IP_OFFSET : 0; + memif_packet_op_t *po = ptd->packet_ops; + void **desc_data = ptd->desc_data; + u16 *desc_len = ptd->desc_len; + memif_desc_status_t *desc_status = ptd->desc_status; + u32 n_buffers = 0; + u32 n_left = ptd->n_packets; + u32 packet_len; + int i = -1; + int bad_packets = 0; /* construct copy and packet vector out of ring slots */ - while (n_slots && n_rx_packets < MEMIF_RX_VECTOR_SZ) + while (n_left) { u32 dst_off, src_off, n_bytes_left; - u16 s0; - memif_desc_t *d0; void *mb0; - po = ptd->packet_ops + n_rx_packets; - n_rx_packets++; po->first_buffer_vec_index = n_buffers++; - po->packet_len = 0; + + packet_len = 0; src_off = 0; dst_off = start_offset; next_slot: - clib_prefetch_load (&ring->desc[(cur_slot + 8) & mask]); - s0 = cur_slot & mask; - d0 = &ring->desc[s0]; - n_bytes_left = d0->length; + i++; /* next descriptor */ + n_bytes_left = desc_len[i]; - /* slave resets buffer length, - * so it can produce full size buffer for master - */ - if (type == MEMIF_RING_M2S) - d0->length = mif->run.buffer_size; + packet_len += n_bytes_left; + mb0 = desc_data[i]; - po->packet_len += n_bytes_left; - if (PREDICT_FALSE (last_region != d0->region)) + if (PREDICT_FALSE (desc_status[i].err)) { - last_region_shm = mif->regions[d0->region].shm; - last_region = d0->region; - last_region_max = - last_region_shm + mif->regions[last_region].region_size; + vlib_error_count (vm, node->node_index, MEMIF_INPUT_ERROR_BAD_DESC, + 1); + bad_packets++; + ASSERT (n_buffers > 0); + n_buffers--; + goto next_packet; } - mb0 = last_region_shm + d0->offset; - - if (PREDICT_FALSE (mb0 + n_bytes_left > last_region_max)) - vlib_error_count (vm, node->node_index, MEMIF_INPUT_ERROR_BAD_DESC, 1); else do { @@ -249,115 +296,98 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } while (PREDICT_FALSE (n_bytes_left)); - cur_slot++; - n_slots--; - if ((d0->flags & MEMIF_DESC_FLAG_NEXT) && n_slots) + if (desc_status[i].next) { src_off = 0; goto next_slot; } - } - /* allocate free buffers */ - vec_validate_aligned (ptd->buffers, n_buffers - 1, CLIB_CACHE_LINE_BYTES); - n_alloc = vlib_buffer_alloc_from_pool (vm, ptd->buffers, n_buffers, - mq->buffer_pool_index); - if (PREDICT_FALSE (n_alloc != n_buffers)) - { - if (n_alloc) - vlib_buffer_free (vm, ptd->buffers, n_alloc); - vlib_error_count (vm, node->node_index, - MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1); - goto refill; + /* update packet op */ + po->packet_len = packet_len; + po++; + + next_packet: + /* next packet */ + n_left--; } + ASSERT (ptd->n_packets >= bad_packets); + ptd->n_packets -= bad_packets; + return n_buffers; +} +static_always_inline void +memif_fill_buffer_mdata_simple (vlib_node_runtime_t *node, + memif_per_thread_data_t *ptd, + vlib_buffer_t **b, u16 *next, int is_ip) +{ + vlib_buffer_t bt; + u16 *dl = ptd->desc_len; + /* process buffer metadata */ + + u32 n_left = ptd->n_packets; + + /* copy template into local variable - will save per packet load */ + vlib_buffer_copy_template (&bt, &ptd->buffer_template); - /* copy data */ - n_left = vec_len (ptd->copy_ops); - co = ptd->copy_ops; while (n_left >= 8) { - clib_prefetch_load (co[4].data); - clib_prefetch_load (co[5].data); - clib_prefetch_load (co[6].data); - clib_prefetch_load (co[7].data); - - b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); - b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]); - b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]); - b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]); - - clib_memcpy_fast (b0->data + co[0].buffer_offset, co[0].data, - co[0].data_len); - clib_memcpy_fast (b1->data + co[1].buffer_offset, co[1].data, - co[1].data_len); - clib_memcpy_fast (b2->data + co[2].buffer_offset, co[2].data, - co[2].data_len); - clib_memcpy_fast (b3->data + co[3].buffer_offset, co[3].data, - co[3].data_len); - - co += 4; - n_left -= 4; - } - while (n_left) - { - b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); - clib_memcpy_fast (b0->data + co[0].buffer_offset, co[0].data, - co[0].data_len); - co += 1; - n_left -= 1; - } + vlib_prefetch_buffer_header (b[4], STORE); + vlib_prefetch_buffer_header (b[5], STORE); + vlib_prefetch_buffer_header (b[6], STORE); + vlib_prefetch_buffer_header (b[7], STORE); + + vlib_buffer_copy_template (b[0], &bt); + vlib_buffer_copy_template (b[1], &bt); + vlib_buffer_copy_template (b[2], &bt); + vlib_buffer_copy_template (b[3], &bt); + + b[0]->current_length = dl[0]; + b[1]->current_length = dl[1]; + b[2]->current_length = dl[2]; + b[3]->current_length = dl[3]; + + if (is_ip) + { + next[0] = memif_next_from_ip_hdr (node, b[0]); + next[1] = memif_next_from_ip_hdr (node, b[1]); + next[2] = memif_next_from_ip_hdr (node, b[2]); + next[3] = memif_next_from_ip_hdr (node, b[3]); + } - /* release slots from the ring */ - if (type == MEMIF_RING_S2M) - { - __atomic_store_n (&ring->tail, cur_slot, __ATOMIC_RELEASE); - mq->last_head = cur_slot; - } - else - { - mq->last_tail = cur_slot; + /* next */ + n_left -= 4; + b += 4; + dl += 4; + next += 4; } - /* prepare buffer template and next indices */ - vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = - mif->sw_if_index; - vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0; - ptd->buffer_template.current_data = start_offset; - ptd->buffer_template.current_config_index = 0; - ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index; - ptd->buffer_template.ref_count = 1; - - if (mode == MEMIF_INTERFACE_MODE_ETHERNET) + while (n_left) { - next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - if (mif->per_interface_next_index != ~0) - next_index = mif->per_interface_next_index; - else - vnet_feature_start_device_input_x1 (mif->sw_if_index, &next_index, - &ptd->buffer_template); - - vlib_get_new_next_frame (vm, node, next_index, to_next_bufs, - n_left_to_next); - if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)) - { - vlib_next_frame_t *nf; - vlib_frame_t *f; - ethernet_input_frame_t *ef; - nf = vlib_node_runtime_get_next_frame (vm, node, next_index); - f = vlib_get_frame (vm, nf->frame); - f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + /* enqueue buffer */ + vlib_buffer_copy_template (b[0], &bt); + b[0]->current_length = dl[0]; + if (is_ip) + next[0] = memif_next_from_ip_hdr (node, b[0]); - ef = vlib_frame_scalar_args (f); - ef->sw_if_index = mif->sw_if_index; - ef->hw_if_index = mif->hw_if_index; - vlib_frame_no_append (f); - } + /* next */ + n_left -= 1; + b += 1; + dl += 1; + next += 1; } +} +static_always_inline void +memif_fill_buffer_mdata (vlib_main_t *vm, vlib_node_runtime_t *node, + memif_per_thread_data_t *ptd, memif_if_t *mif, + u32 *bi, u16 *next, int is_ip) +{ + u16 buffer_size = vlib_buffer_get_default_data_size (vm); + vlib_buffer_t *b0, *b1, *b2, *b3, bt; + memif_packet_op_t *po; /* process buffer metadata */ - u32 n_from = n_rx_packets; + + u32 n_from = ptd->n_packets; po = ptd->packet_ops; - bi = to_next_bufs; /* copy template into local variable - will save per packet load */ vlib_buffer_copy_template (&bt, &ptd->buffer_template); @@ -397,20 +427,16 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_copy_template (b3, &bt); b0->current_length = po[0].packet_len; - n_rx_bytes += b0->current_length; b1->current_length = po[1].packet_len; - n_rx_bytes += b1->current_length; b2->current_length = po[2].packet_len; - n_rx_bytes += b2->current_length; b3->current_length = po[3].packet_len; - n_rx_bytes += b3->current_length; memif_add_to_chain (vm, b0, ptd->buffers + fbvi[0] + 1, buffer_size); memif_add_to_chain (vm, b1, ptd->buffers + fbvi[1] + 1, buffer_size); memif_add_to_chain (vm, b2, ptd->buffers + fbvi[2] + 1, buffer_size); memif_add_to_chain (vm, b3, ptd->buffers + fbvi[3] + 1, buffer_size); - if (mode == MEMIF_INTERFACE_MODE_IP) + if (is_ip) { next[0] = memif_next_from_ip_hdr (node, b0); next[1] = memif_next_from_ip_hdr (node, b1); @@ -426,21 +452,18 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, } while (n_from) { - u32 fbvi[4]; + u32 fbvi[1]; /* enqueue buffer */ fbvi[0] = po[0].first_buffer_vec_index; bi[0] = ptd->buffers[fbvi[0]]; b0 = vlib_get_buffer (vm, bi[0]); vlib_buffer_copy_template (b0, &bt); b0->current_length = po->packet_len; - n_rx_bytes += b0->current_length; memif_add_to_chain (vm, b0, ptd->buffers + fbvi[0] + 1, buffer_size); - if (mode == MEMIF_INTERFACE_MODE_IP) - { - next[0] = memif_next_from_ip_hdr (node, b0); - } + if (is_ip) + next[0] = memif_next_from_ip_hdr (node, b0); /* next */ n_from -= 1; @@ -448,11 +471,216 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, bi += 1; next += 1; } +} + +static_always_inline void +memif_advance_ring (memif_ring_type_t type, memif_queue_t *mq, + memif_ring_t *ring, u16 cur_slot) +{ + if (type == MEMIF_RING_S2M) + { + __atomic_store_n (&ring->tail, cur_slot, __ATOMIC_RELEASE); + mq->last_head = cur_slot; + } + else + { + mq->last_tail = cur_slot; + } +} + +static_always_inline uword +memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + memif_if_t *mif, memif_ring_type_t type, u16 qid, + memif_interface_mode_t mode) +{ + vnet_main_t *vnm = vnet_get_main (); + memif_main_t *mm = &memif_main; + memif_ring_t *ring; + memif_queue_t *mq; + u16 buffer_size = vlib_buffer_get_default_data_size (vm); + uword n_trace; + u16 nexts[MEMIF_RX_VECTOR_SZ], *next = nexts; + u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi; + u32 n_left_to_next; + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + vlib_buffer_t *buffer_ptrs[MEMIF_RX_VECTOR_SZ]; + u32 thread_index = vm->thread_index; + memif_per_thread_data_t *ptd = + vec_elt_at_index (mm->per_thread_data, thread_index); + u16 cur_slot, ring_size, n_slots, mask; + u16 n_buffers, n_alloc, n_desc; + i16 start_offset; + memif_copy_op_t *co; + int is_slave = (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) != 0; + int is_simple = 1; + int i; + + mq = vec_elt_at_index (mif->rx_queues, qid); + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + start_offset = (mode == MEMIF_INTERFACE_MODE_IP) ? MEMIF_IP_OFFSET : 0; + + if (is_slave) + { + cur_slot = mq->last_tail; + n_slots = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE) - cur_slot; + } + else + { + cur_slot = mq->last_head; + n_slots = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE) - cur_slot; + } + + if (n_slots == 0) + { + ptd->n_packets = 0; + goto refill; + } + + n_desc = memif_parse_desc (ptd, mif, mq, cur_slot, n_slots); + + if (n_desc != ptd->n_packets) + is_simple = 0; + + cur_slot += n_desc; + + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) + memif_validate_desc_data (ptd, mif, n_desc, /* is_ethernet */ 1); + else + memif_validate_desc_data (ptd, mif, n_desc, /* is_ethernet */ 0); + + if (ptd->max_desc_len > buffer_size - start_offset) + is_simple = 0; + + if (ptd->xor_status != 0) + is_simple = 0; + + if (is_simple) + n_buffers = ptd->n_packets; + else + n_buffers = memif_process_desc (vm, node, ptd, mif); + + if (PREDICT_FALSE (n_buffers == 0)) + { + /* All descriptors are bad. Release slots in the ring and bail */ + memif_advance_ring (type, mq, ring, cur_slot); + goto refill; + } + + /* allocate free buffers */ + vec_validate_aligned (ptd->buffers, n_buffers - 1, CLIB_CACHE_LINE_BYTES); + n_alloc = vlib_buffer_alloc_from_pool (vm, ptd->buffers, n_buffers, + mq->buffer_pool_index); + if (PREDICT_FALSE (n_alloc != n_buffers)) + { + if (n_alloc) + vlib_buffer_free (vm, ptd->buffers, n_alloc); + vlib_error_count (vm, node->node_index, + MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1); + goto refill; + } + + /* copy data */ + if (is_simple) + { + int n_pkts = ptd->n_packets; + void **desc_data = ptd->desc_data; + u16 *desc_len = ptd->desc_len; + + vlib_get_buffers (vm, ptd->buffers, buffer_ptrs, n_buffers); + + for (i = 0; i + 8 < n_pkts; i++) + { + clib_prefetch_load (desc_data[i + 8]); + clib_prefetch_store (buffer_ptrs[i + 8]->data); + clib_memcpy_fast (buffer_ptrs[i]->data + start_offset, desc_data[i], + desc_len[i]); + } + for (; i < n_pkts; i++) + clib_memcpy_fast (buffer_ptrs[i]->data + start_offset, desc_data[i], + desc_len[i]); + } + else + { + vlib_buffer_t *b; + u32 n_pkts = vec_len (ptd->copy_ops); + co = ptd->copy_ops; + + for (i = 0; i + 8 < n_pkts; i++) + { + clib_prefetch_load (co[i + 8].data); + b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]); + clib_memcpy_fast (b->data + co[i].buffer_offset, co[i].data, + co[i].data_len); + } + for (; i < n_pkts; i++) + { + b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]); + clib_memcpy_fast (b->data + co[i].buffer_offset, co[i].data, + co[i].data_len); + } + } + + /* release slots from the ring */ + memif_advance_ring (type, mq, ring, cur_slot); + + /* prepare buffer template and next indices */ + vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = mif->sw_if_index; + vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0; + ptd->buffer_template.current_data = start_offset; + ptd->buffer_template.current_config_index = 0; + ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index; + ptd->buffer_template.ref_count = 1; + + if (mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + if (mif->per_interface_next_index != ~0) + next_index = mif->per_interface_next_index; + else + vnet_feature_start_device_input (mif->sw_if_index, &next_index, + &ptd->buffer_template); + + vlib_get_new_next_frame (vm, node, next_index, to_next_bufs, + n_left_to_next); + if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)) + { + vlib_next_frame_t *nf; + vlib_frame_t *f; + ethernet_input_frame_t *ef; + nf = vlib_node_runtime_get_next_frame (vm, node, next_index); + f = vlib_get_frame (vm, nf->frame); + f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + + ef = vlib_frame_scalar_args (f); + ef->sw_if_index = mif->sw_if_index; + ef->hw_if_index = mif->hw_if_index; + vlib_frame_no_append (f); + } + } + + if (is_simple) + { + vlib_buffer_copy_indices (to_next_bufs, ptd->buffers, ptd->n_packets); + if (mode == MEMIF_INTERFACE_MODE_IP) + memif_fill_buffer_mdata_simple (node, ptd, buffer_ptrs, nexts, 1); + else + memif_fill_buffer_mdata_simple (node, ptd, buffer_ptrs, nexts, 0); + } + else + { + if (mode == MEMIF_INTERFACE_MODE_IP) + memif_fill_buffer_mdata (vm, node, ptd, mif, to_next_bufs, nexts, 1); + else + memif_fill_buffer_mdata (vm, node, ptd, mif, to_next_bufs, nexts, 0); + } /* packet trace if enabled */ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)))) { - u32 n_left = n_rx_packets; + u32 n_left = ptd->n_packets; bi = to_next_bufs; next = nexts; u32 ni = next_index; @@ -483,16 +711,16 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (mode == MEMIF_INTERFACE_MODE_ETHERNET) { - n_left_to_next -= n_rx_packets; + n_left_to_next -= ptd->n_packets; vlib_put_next_frame (vm, node, next_index, n_left_to_next); } else - vlib_buffer_enqueue_to_next (vm, node, to_next_bufs, nexts, n_rx_packets); + vlib_buffer_enqueue_to_next (vm, node, to_next_bufs, nexts, + ptd->n_packets); - vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, thread_index, - mif->sw_if_index, n_rx_packets, - n_rx_bytes); + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + thread_index, mif->sw_if_index, ptd->n_packets, ptd->n_rx_bytes); /* refill ring with empty buffers */ refill: @@ -514,7 +742,7 @@ refill: __atomic_store_n (&ring->head, head, __ATOMIC_RELEASE); } - return n_rx_packets; + return ptd->n_packets; } static_always_inline uword @@ -675,14 +903,14 @@ memif_device_input_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { next0 = next1 = next2 = next3 = next_index; /* redirect if feature path enabled */ - vnet_feature_start_device_input_x1 (mif->sw_if_index, - &next0, b0); - vnet_feature_start_device_input_x1 (mif->sw_if_index, - &next1, b1); - vnet_feature_start_device_input_x1 (mif->sw_if_index, - &next2, b2); - vnet_feature_start_device_input_x1 (mif->sw_if_index, - &next3, b3); + vnet_feature_start_device_input (mif->sw_if_index, &next0, + b0); + vnet_feature_start_device_input (mif->sw_if_index, &next1, + b1); + vnet_feature_start_device_input (mif->sw_if_index, &next2, + b2); + vnet_feature_start_device_input (mif->sw_if_index, &next3, + b3); } } @@ -730,8 +958,8 @@ memif_device_input_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, { next0 = next_index; /* redirect if feature path enabled */ - vnet_feature_start_device_input_x1 (mif->sw_if_index, - &next0, b0); + vnet_feature_start_device_input (mif->sw_if_index, &next0, + b0); } } @@ -826,6 +1054,244 @@ done: return n_rx_packets; } +CLIB_MARCH_FN (memif_dma_completion_cb, void, vlib_main_t *vm, + vlib_dma_batch_t *b) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16); + u32 thread_index = vm->thread_index; + u32 n_left_to_next = 0; + u16 nexts[MEMIF_RX_VECTOR_SZ], *next; + u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi; + uword n_trace; + memif_dma_info_t *dma_info; + u16 qid = b->cookie & 0xffff; + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + dma_info = mq->dma_info + mq->dma_info_head; + memif_per_thread_data_t *ptd = &dma_info->data; + vnet_main_t *vnm = vnet_get_main (); + + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + + __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE); + + /* prepare buffer template and next indices */ + i16 start_offset = + (dma_info->mode == MEMIF_INTERFACE_MODE_IP) ? MEMIF_IP_OFFSET : 0; + vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = mif->sw_if_index; + vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0; + ptd->buffer_template.current_data = start_offset; + ptd->buffer_template.current_config_index = 0; + ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index; + ptd->buffer_template.ref_count = 1; + + if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + if (mif->per_interface_next_index != ~0) + next_index = mif->per_interface_next_index; + else + vnet_feature_start_device_input (mif->sw_if_index, &next_index, + &ptd->buffer_template); + + vlib_get_new_next_frame (vm, dma_info->node, next_index, to_next_bufs, + n_left_to_next); + if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)) + { + vlib_next_frame_t *nf; + vlib_frame_t *f; + ethernet_input_frame_t *ef; + nf = + vlib_node_runtime_get_next_frame (vm, dma_info->node, next_index); + f = vlib_get_frame (vm, nf->frame); + f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + + ef = vlib_frame_scalar_args (f); + ef->sw_if_index = mif->sw_if_index; + ef->hw_if_index = mif->hw_if_index; + vlib_frame_no_append (f); + } + } + + vec_reset_length (ptd->buffers); + + if (dma_info->mode == MEMIF_INTERFACE_MODE_IP) + memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts, + 1); + else + memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts, + 0); + + /* packet trace if enabled */ + if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, dma_info->node)))) + { + u32 n_left = ptd->n_packets; + bi = to_next_bufs; + next = nexts; + u32 ni = next_index; + while (n_trace && n_left) + { + vlib_buffer_t *b; + memif_input_trace_t *tr; + if (dma_info->mode != MEMIF_INTERFACE_MODE_ETHERNET) + ni = next[0]; + b = vlib_get_buffer (vm, bi[0]); + if (PREDICT_TRUE (vlib_trace_buffer (vm, dma_info->node, ni, b, + /* follow_chain */ 0))) + { + tr = vlib_add_trace (vm, dma_info->node, b, sizeof (*tr)); + tr->next_index = ni; + tr->hw_if_index = mif->hw_if_index; + tr->ring = qid; + n_trace--; + } + + /* next */ + n_left--; + bi++; + next++; + } + vlib_set_trace_count (vm, dma_info->node, n_trace); + } + + if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + n_left_to_next -= ptd->n_packets; + vlib_put_next_frame (vm, dma_info->node, next_index, n_left_to_next); + } + else + vlib_buffer_enqueue_to_next (vm, dma_info->node, to_next_bufs, nexts, + ptd->n_packets); + + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + thread_index, mif->sw_if_index, ptd->n_packets, ptd->n_rx_bytes); + + mq->dma_info_head++; + if (mq->dma_info_head == mq->dma_info_size) + mq->dma_info_head = 0; + + return; +} + +#ifndef CLIB_MARCH_VARIANT +void +memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b) +{ + return CLIB_MARCH_FN_SELECT (memif_dma_completion_cb) (vm, b); +} +#endif + +static_always_inline uword +memif_device_input_inline_dma (vlib_main_t *vm, vlib_node_runtime_t *node, + memif_if_t *mif, memif_ring_type_t type, + u16 qid, memif_interface_mode_t mode) +{ + memif_main_t *mm = &memif_main; + memif_ring_t *ring; + memif_queue_t *mq; + memif_per_thread_data_t *ptd; + u16 cur_slot, n_slots; + u16 n_buffers, n_alloc, n_desc; + memif_copy_op_t *co; + memif_dma_info_t *dma_info; + + u16 mif_id = mif - mm->interfaces; + u32 i; + + mq = vec_elt_at_index (mif->rx_queues, qid); + ring = mq->ring; + + cur_slot = mq->last_head; + n_slots = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE) - cur_slot; + + if (n_slots == 0) + return 0; + + if ((mq->dma_info_tail + 1 == mq->dma_info_head) || + ((mq->dma_info_head == mq->dma_info_size - 1) && + (mq->dma_info_tail == 0))) + return 0; + + vlib_dma_batch_t *db; + db = vlib_dma_batch_new (vm, mif->dma_input_config); + if (!db) + return 0; + + dma_info = mq->dma_info + mq->dma_info_tail; + dma_info->node = node; + dma_info->mode = mode; + ptd = &dma_info->data; + vec_validate_aligned (dma_info->data.desc_len, + pow2_mask (mq->log2_ring_size), CLIB_CACHE_LINE_BYTES); + + n_desc = memif_parse_desc (&dma_info->data, mif, mq, cur_slot, n_slots); + cur_slot += n_desc; + + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) + memif_validate_desc_data (&dma_info->data, mif, n_desc, + /* is_ethernet */ 1); + else + memif_validate_desc_data (&dma_info->data, mif, n_desc, + /* is_ethernet */ 0); + + n_buffers = memif_process_desc (vm, node, ptd, mif); + + if (PREDICT_FALSE (n_buffers == 0)) + { + /* All descriptors are bad. Release slots in the ring and bail */ + memif_advance_ring (type, mq, ring, cur_slot); + goto done; + } + + /* allocate free buffers */ + vec_validate_aligned (dma_info->data.buffers, n_buffers - 1, + CLIB_CACHE_LINE_BYTES); + n_alloc = vlib_buffer_alloc_from_pool (vm, dma_info->data.buffers, n_buffers, + mq->buffer_pool_index); + if (PREDICT_FALSE (n_alloc != n_buffers)) + { + if (n_alloc) + vlib_buffer_free (vm, dma_info->data.buffers, n_alloc); + vlib_error_count (vm, node->node_index, + MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1); + goto done; + } + + dma_info->data.n_rx_bytes = ptd->n_rx_bytes; + dma_info->data.n_packets = ptd->n_packets; + /* copy data */ + vlib_buffer_t *b; + u32 n_pkts = clib_min (MEMIF_RX_VECTOR_SZ, vec_len (ptd->copy_ops)); + co = ptd->copy_ops; + + for (i = 0; i < n_pkts; i++) + { + b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]); + vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data, + co[i].data_len); + } + + for (i = n_pkts; i < vec_len (ptd->copy_ops); i++) + { + b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]); + vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data, + co[i].data_len); + } + + dma_info->dma_tail = cur_slot; + mq->last_head = dma_info->dma_tail; + mq->dma_info_tail++; + if (mq->dma_info_tail == mq->dma_info_size) + mq->dma_info_tail = 0; + +done: + vlib_dma_batch_set_cookie (vm, db, ((u64) mif_id << 16) | qid); + vlib_dma_batch_submit (vm, db); + vec_reset_length (ptd->copy_ops); + + return ptd->n_packets; +} VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -867,12 +1333,25 @@ VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm, } else { - if (mif->mode == MEMIF_INTERFACE_MODE_IP) - n_rx += memif_device_input_inline ( - vm, node, mif, MEMIF_RING_S2M, qid, mode_ip); + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && + (mif->dma_input_config >= 0)) + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline_dma ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_ip); + else + n_rx += memif_device_input_inline_dma ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_eth); + } else - n_rx += memif_device_input_inline ( - vm, node, mif, MEMIF_RING_S2M, qid, mode_eth); + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_ip); + else + n_rx += memif_device_input_inline ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_eth); + } } } } @@ -880,7 +1359,6 @@ VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm, return n_rx; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (memif_input_node) = { .name = "memif-input", .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, @@ -892,7 +1370,6 @@ VLIB_REGISTER_NODE (memif_input_node) = { .error_counters = memif_input_error_counters, }; -/* *INDENT-ON* */ /* diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index 838651abc27..f6335410ba8 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -16,6 +16,7 @@ */ #include <vppinfra/lock.h> +#include <vlib/dma/dma.h> #include <vlib/log.h> #define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock" @@ -24,7 +25,7 @@ #define MEMIF_DEFAULT_TX_QUEUES 1 #define MEMIF_DEFAULT_BUFFER_SIZE 2048 -#define MEMIF_MAX_M2S_RING (vlib_get_n_threads ()) +#define MEMIF_MAX_M2S_RING 256 #define MEMIF_MAX_S2M_RING 256 #define MEMIF_MAX_REGION 256 #define MEMIF_MAX_LOG2_RING_SIZE 14 @@ -120,9 +121,15 @@ typedef struct int fd; } memif_msg_fifo_elt_t; +#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE +#define MEMIF_DMA_INFO_SIZE VLIB_FRAME_SIZE + +struct memif_dma_info; + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + clib_spinlock_t lockp; /* ring data */ memif_ring_t *ring; memif_log2_ring_size_t log2_ring_size; @@ -134,6 +141,15 @@ typedef struct u32 *buffers; u8 buffer_pool_index; + /* dma data */ + u16 dma_head; + u16 dma_tail; + struct memif_dma_info *dma_info; + u16 dma_info_head; + u16 dma_info_tail; + u16 dma_info_size; + u8 dma_info_full; + /* interrupts */ int int_fd; uword int_clib_file_index; @@ -144,14 +160,15 @@ typedef struct u32 queue_index; } memif_queue_t; -#define foreach_memif_if_flag \ - _(0, ADMIN_UP, "admin-up") \ - _(1, IS_SLAVE, "slave") \ - _(2, CONNECTING, "connecting") \ - _(3, CONNECTED, "connected") \ - _(4, DELETING, "deleting") \ - _(5, ZERO_COPY, "zero-copy") \ - _(6, ERROR, "error") +#define foreach_memif_if_flag \ + _ (0, ADMIN_UP, "admin-up") \ + _ (1, IS_SLAVE, "slave") \ + _ (2, CONNECTING, "connecting") \ + _ (3, CONNECTED, "connected") \ + _ (4, DELETING, "deleting") \ + _ (5, ZERO_COPY, "zero-copy") \ + _ (6, ERROR, "error") \ + _ (7, USE_DMA, "use_dma") typedef enum { @@ -163,7 +180,6 @@ typedef enum typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - clib_spinlock_t lockp; u32 flags; memif_interface_id_t id; u32 hw_if_index; @@ -207,11 +223,15 @@ typedef struct /* disconnect strings */ u8 *local_disc_string; u8 *remote_disc_string; + + /* dma config index */ + int dma_input_config; + int dma_tx_config; } memif_if_t; typedef struct { - u32 packet_len; + u16 packet_len; u16 first_buffer_vec_index; } memif_packet_op_t; @@ -224,21 +244,61 @@ typedef struct u16 buffer_vec_index; } memif_copy_op_t; -#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE +typedef enum +{ + MEMIF_DESC_STATUS_OK = 0, + MEMIF_DESC_STATUS_ERR_BAD_REGION, + MEMIF_DESC_STATUS_ERR_REGION_OVERRUN, + MEMIF_DESC_STATUS_ERR_DATA_TOO_BIG, + MEMIF_DESC_STATUS_ERR_ZERO_LENGTH +} __clib_packed memif_desc_status_err_code_t; + +typedef union +{ + struct + { + u8 next : 1; + u8 err : 1; + u8 reserved : 2; + memif_desc_status_err_code_t err_code : 4; + }; + u8 as_u8; +} memif_desc_status_t; + +STATIC_ASSERT_SIZEOF (memif_desc_status_t, 1); typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - + u16 n_packets; + u16 max_desc_len; + u32 n_rx_bytes; + u8 xor_status; /* copy vector */ - memif_packet_op_t packet_ops[MEMIF_RX_VECTOR_SZ]; memif_copy_op_t *copy_ops; u32 *buffers; + memif_packet_op_t packet_ops[MEMIF_RX_VECTOR_SZ]; + + /* temp storage for compressed descriptors */ + void **desc_data; + u16 *desc_len; + memif_desc_status_t *desc_status; /* buffer template */ vlib_buffer_t buffer_template; } memif_per_thread_data_t; +typedef struct memif_dma_info +{ + /* per thread data */ + memif_interface_mode_t mode; + vlib_node_runtime_t *node; + u32 dma_head; + u32 dma_tail; + u8 finished; + memif_per_thread_data_t data; +} memif_dma_info_t; + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -278,6 +338,7 @@ typedef struct u8 *secret; u8 is_master; u8 is_zero_copy; + u8 use_dma; memif_interface_mode_t mode:8; memif_log2_ring_size_t log2_ring_size; u16 buffer_size; @@ -290,10 +351,11 @@ typedef struct u32 sw_if_index; } memif_create_if_args_t; -int memif_socket_filename_add_del (u8 is_add, u32 sock_id, - u8 * sock_filename); -int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args); -int memif_delete_if (vlib_main_t * vm, memif_if_t * mif); +u32 memif_get_unused_socket_id (); +clib_error_t *memif_socket_filename_add_del (u8 is_add, u32 sock_id, + char *sock_filename); +clib_error_t *memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args); +clib_error_t *memif_delete_if (vlib_main_t *vm, memif_if_t *mif); clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm); clib_error_t *memif_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags); @@ -322,7 +384,8 @@ clib_error_t *memif_slave_conn_fd_error (clib_file_t * uf); clib_error_t *memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err); u8 *format_memif_device_name (u8 * s, va_list * args); - +void memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b); +void memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b); /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c index 5a381a7c7cd..001f26f13ef 100644 --- a/src/plugins/memif/socket.c +++ b/src/plugins/memif/socket.c @@ -25,7 +25,6 @@ #include <sys/un.h> #include <sys/uio.h> #include <sys/mman.h> -#include <sys/prctl.h> #include <sys/eventfd.h> #include <inttypes.h> #include <limits.h> @@ -446,14 +445,12 @@ memif_msg_receive (memif_if_t ** mifp, clib_socket_t * sock, clib_file_t * uf) if ((err = memif_init_regions_and_queues (mif))) goto error; memif_msg_enq_init (mif); - /* *INDENT-OFF* */ vec_foreach_index (i, mif->regions) memif_msg_enq_add_region (mif, i); vec_foreach_index (i, mif->tx_queues) memif_msg_enq_add_ring (mif, i, MEMIF_RING_S2M); vec_foreach_index (i, mif->rx_queues) memif_msg_enq_add_ring (mif, i, MEMIF_RING_M2S); - /* *INDENT-ON* */ memif_msg_enq_connect (mif); break; @@ -648,7 +645,8 @@ memif_master_conn_fd_error (clib_file_t * uf) memif_log_warn (0, "Error on unknown file descriptor %d", uf->file_descriptor); - memif_file_del (uf); + if (uf->file_descriptor != ~0) + memif_file_del (uf); return 0; } |