diff options
Diffstat (limited to 'src/plugins/memif')
-rw-r--r-- | src/plugins/memif/cli.c | 2 | ||||
-rw-r--r-- | src/plugins/memif/device.c | 281 | ||||
-rw-r--r-- | src/plugins/memif/memif.api | 52 | ||||
-rw-r--r-- | src/plugins/memif/memif.c | 89 | ||||
-rw-r--r-- | src/plugins/memif/memif_api.c | 92 | ||||
-rw-r--r-- | src/plugins/memif/memif_test.c | 115 | ||||
-rw-r--r-- | src/plugins/memif/node.c | 261 | ||||
-rw-r--r-- | src/plugins/memif/private.h | 53 |
8 files changed, 925 insertions, 20 deletions
diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c index 3d3a681f18d..309cd8e49d7 100644 --- a/src/plugins/memif/cli.c +++ b/src/plugins/memif/cli.c @@ -172,6 +172,8 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.is_master = 0; else if (unformat (line_input, "no-zero-copy")) args.is_zero_copy = 0; + else if (unformat (line_input, "use-dma")) + args.use_dma = 1; else if (unformat (line_input, "mode ip")) args.mode = MEMIF_INTERFACE_MODE_IP; else if (unformat (line_input, "hw-addr %U", diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index f049a7be38e..ff6068f8243 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -369,6 +369,270 @@ no_free_slots: return n_left; } +CLIB_MARCH_FN (memif_tx_dma_completion_cb, void, vlib_main_t *vm, + vlib_dma_batch_t *b) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16); + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, b->cookie & 0xffff); + memif_dma_info_t *dma_info = mq->dma_info + mq->dma_info_head; + memif_per_thread_data_t *ptd = &dma_info->data; + + vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers)); + + dma_info->finished = 1; + vec_reset_length (ptd->buffers); + vec_reset_length (ptd->copy_ops); + + __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE); + + mq->dma_info_head++; + if (mq->dma_info_head == mq->dma_info_size) + mq->dma_info_head = 0; + mq->dma_info_full = 0; +} + +#ifndef CLIB_MARCH_VARIANT +void +memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b) +{ + return CLIB_MARCH_FN_SELECT (memif_tx_dma_completion_cb) (vm, b); +} +#endif + +static_always_inline uword +memif_interface_tx_dma_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 *buffers, memif_if_t *mif, + memif_ring_type_t type, memif_queue_t *mq, + u32 n_left) +{ + memif_ring_t *ring; + u32 n_copy_op; + u16 ring_size, mask, slot, free_slots; + int n_retries = 5, fallback = 0; + vlib_buffer_t *b0, *b1, *b2, *b3; + memif_copy_op_t *co; + memif_region_index_t last_region = ~0; + void *last_region_shm = 0; + u16 head, tail; + memif_dma_info_t *dma_info; + memif_per_thread_data_t *ptd; + memif_main_t *mm = &memif_main; + u16 mif_id = mif - mm->interfaces; + + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + dma_info = mq->dma_info + mq->dma_info_tail; + ptd = &dma_info->data; + + /* do software fallback if dma info ring is full */ + u16 dma_mask = mq->dma_info_size - 1; + if ((((mq->dma_info_tail + 1) & dma_mask) == mq->dma_info_head) || + ((mq->dma_info_head == dma_mask) && (mq->dma_info_tail == 0))) + { + if (!mq->dma_info_full) + mq->dma_info_full = 1; + else + fallback = 1; + } + + vlib_dma_batch_t *b = NULL; + if (PREDICT_TRUE (!fallback)) + b = vlib_dma_batch_new (vm, mif->dma_tx_config); + if (!b) + return n_left; + +retry: + + slot = tail = mq->dma_tail; + head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE); + mq->last_tail += tail - mq->last_tail; + free_slots = head - mq->dma_tail; + + while (n_left && free_slots) + { + memif_desc_t *d0; + void *mb0; + i32 src_off; + u32 bi0, dst_off, src_left, dst_left, bytes_to_copy; + u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops); + u32 saved_ptd_buffers_len = _vec_len (ptd->buffers); + u16 saved_slot = slot; + + clib_prefetch_load (&ring->desc[(slot + 8) & mask]); + + d0 = &ring->desc[slot & mask]; + if (PREDICT_FALSE (last_region != d0->region)) + { + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; + } + mb0 = last_region_shm + d0->offset; + + dst_off = 0; + + /* slave is the producer, so it should be able to reset buffer length */ + dst_left = d0->length; + + if (PREDICT_TRUE (n_left >= 4)) + vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD); + bi0 = buffers[0]; + + next_in_chain: + + b0 = vlib_get_buffer (vm, bi0); + src_off = b0->current_data; + src_left = b0->current_length; + + while (src_left) + { + if (PREDICT_FALSE (dst_left == 0)) + { + if (free_slots) + { + d0->length = dst_off; + d0->flags = MEMIF_DESC_FLAG_NEXT; + d0 = &ring->desc[slot & mask]; + dst_off = 0; + dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size : + d0->length; + + if (PREDICT_FALSE (last_region != d0->region)) + { + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; + } + mb0 = last_region_shm + d0->offset; + } + else + { + /* we need to rollback vectors before bailing out */ + vec_set_len (ptd->buffers, saved_ptd_buffers_len); + vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len); + vlib_error_count (vm, node->node_index, + MEMIF_TX_ERROR_ROLLBACK, 1); + slot = saved_slot; + goto no_free_slots; + } + } + bytes_to_copy = clib_min (src_left, dst_left); + memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off, + vec_len (ptd->buffers)); + src_off += bytes_to_copy; + dst_off += bytes_to_copy; + src_left -= bytes_to_copy; + dst_left -= bytes_to_copy; + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + slot++; + free_slots--; + bi0 = b0->next_buffer; + goto next_in_chain; + } + + vec_add1_aligned (ptd->buffers, buffers[0], CLIB_CACHE_LINE_BYTES); + d0->length = dst_off; + d0->flags = 0; + + free_slots -= 1; + slot += 1; + + buffers++; + n_left--; + } +no_free_slots: + + /* copy data */ + n_copy_op = vec_len (ptd->copy_ops); + co = ptd->copy_ops; + while (n_copy_op >= 8) + { + clib_prefetch_load (co[4].data); + clib_prefetch_load (co[5].data); + clib_prefetch_load (co[6].data); + clib_prefetch_load (co[7].data); + + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]); + b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]); + b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]); + + if (PREDICT_TRUE (!fallback)) + { + vlib_dma_batch_add (vm, b, co[0].data, + b0->data + co[0].buffer_offset, co[0].data_len); + vlib_dma_batch_add (vm, b, co[1].data, + b1->data + co[1].buffer_offset, co[1].data_len); + vlib_dma_batch_add (vm, b, co[2].data, + b2->data + co[2].buffer_offset, co[2].data_len); + vlib_dma_batch_add (vm, b, co[3].data, + b3->data + co[3].buffer_offset, co[3].data_len); + } + else + { + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset, + co[1].data_len); + clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset, + co[2].data_len); + clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset, + co[3].data_len); + } + + co += 4; + n_copy_op -= 4; + } + while (n_copy_op) + { + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + if (PREDICT_TRUE (!fallback)) + vlib_dma_batch_add (vm, b, co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + else + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + co += 1; + n_copy_op -= 1; + } + + /* save dma info before retry */ + dma_info->dma_tail = slot; + mq->dma_tail = slot; + vec_reset_length (ptd->copy_ops); + + if (n_left && n_retries--) + goto retry; + + if (PREDICT_TRUE (!fallback)) + { + vlib_dma_batch_set_cookie (vm, b, + (mif_id << 16) | (mq - mif->tx_queues)); + vlib_dma_batch_submit (vm, b); + dma_info->finished = 0; + + if (b->n_enq) + { + mq->dma_info_tail++; + if (mq->dma_info_tail == mq->dma_info_size) + mq->dma_info_tail = 0; + } + } + else if (fallback && dma_info->finished) + { + /* if dma has been completed, update ring immediately */ + vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers)); + vec_reset_length (ptd->buffers); + __atomic_store_n (&mq->ring->tail, slot, __ATOMIC_RELEASE); + } + + return n_left; +} + VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -399,8 +663,14 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_S2M, mq, ptd, n_left); else - n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_M2S, - mq, ptd, n_left); + { + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0)) + n_left = memif_interface_tx_dma_inline (vm, node, from, mif, + MEMIF_RING_M2S, mq, n_left); + else + n_left = memif_interface_tx_inline (vm, node, from, mif, + MEMIF_RING_M2S, mq, ptd, n_left); + } if (tf->shared_queue) clib_spinlock_unlock (&mq->lockp); @@ -416,7 +686,12 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, mq->int_count++; } - if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0) + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0)) + { + if (n_left) + vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left); + } + else if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0) vlib_buffer_free (vm, from, frame->n_vectors); else if (n_left) vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left); diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index 91e72f73ab4..b800bfa6517 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -14,7 +14,7 @@ * limitations under the License. */ -option version = "3.0.0"; +option version = "3.1.0"; import "vnet/interface_types.api"; import "vnet/ethernet/ethernet_types.api"; @@ -133,6 +133,56 @@ define memif_create_reply vl_api_interface_index_t sw_if_index; }; +/** \brief Create memory interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param role - role of the interface in the connection (master/slave) + @param mode - interface mode + @param rx_queues - number of rx queues (only valid for slave) + @param tx_queues - number of tx queues (only valid for slave) + @param id - 32bit integer used to authenticate and match opposite sides + of the connection + @param socket_id - socket filename id to be used for connection + establishment + @param ring_size - the number of entries of RX/TX rings + @param buffer_size - size of the buffer allocated for each ring entry + @param no_zero_copy - if true, disable zero copy + @param use_dma - if true, use dma accelerate memory copy + @param hw_addr - interface MAC address + @param secret - optional, default is "", max length 24 +*/ +define memif_create_v2 +{ + u32 client_index; + u32 context; + + vl_api_memif_role_t role; /* 0 = master, 1 = slave */ + vl_api_memif_mode_t mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */ + u8 rx_queues; /* optional, default is 1 */ + u8 tx_queues; /* optional, default is 1 */ + u32 id; /* optional, default is 0 */ + u32 socket_id; /* optional, default is 0, "/var/vpp/memif.sock" */ + u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */ + u16 buffer_size; /* optional, default is 2048 bytes */ + bool no_zero_copy; /* disable zero copy */ + bool use_dma; /* use dma acceleration */ + vl_api_mac_address_t hw_addr; /* optional, randomly generated if zero */ + string secret[24]; /* optional, default is "", max length 24 */ + option vat_help = "[id <id>] [socket-id <id>] [ring_size <size>] [buffer_size <size>] [hw_addr <mac_address>] [secret <string>] [mode ip] <master|slave>"; +}; + +/** \brief Create memory interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request + @param sw_if_index - software index of the newly created interface +*/ +define memif_create_v2_reply +{ + u32 context; + i32 retval; + vl_api_interface_index_t sw_if_index; +}; + /** \brief Delete memory interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index c9d2f008cca..37028d8223e 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -301,6 +301,37 @@ memif_connect (memif_if_t * mif) mq->queue_index = vnet_hw_if_register_tx_queue (vnm, mif->hw_if_index, i); clib_spinlock_init (&mq->lockp); + + if (mif->flags & MEMIF_IF_FLAG_USE_DMA) + { + memif_dma_info_t *dma_info; + mq->dma_head = 0; + mq->dma_tail = 0; + mq->dma_info_head = 0; + mq->dma_info_tail = 0; + mq->dma_info_size = MEMIF_DMA_INFO_SIZE; + vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE, + CLIB_CACHE_LINE_BYTES); + + vec_foreach (dma_info, mq->dma_info) + { + vec_validate_aligned (dma_info->data.desc_data, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_len, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_status, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.copy_ops, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.copy_ops); + vec_validate_aligned (dma_info->data.buffers, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.buffers); + } + } } if (vec_len (mif->tx_queues) > 0) @@ -331,6 +362,37 @@ memif_connect (memif_if_t * mif) qi = vnet_hw_if_register_rx_queue (vnm, mif->hw_if_index, i, VNET_HW_IF_RXQ_THREAD_ANY); mq->queue_index = qi; + + if (mif->flags & MEMIF_IF_FLAG_USE_DMA) + { + memif_dma_info_t *dma_info; + mq->dma_head = 0; + mq->dma_tail = 0; + mq->dma_info_head = 0; + mq->dma_info_tail = 0; + mq->dma_info_size = MEMIF_DMA_INFO_SIZE; + vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE, + CLIB_CACHE_LINE_BYTES); + vec_foreach (dma_info, mq->dma_info) + { + vec_validate_aligned (dma_info->data.desc_data, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_len, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_status, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.copy_ops, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.copy_ops); + vec_validate_aligned (dma_info->data.buffers, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.buffers); + } + } + if (mq->int_fd > -1) { template.file_descriptor = mq->int_fd; @@ -902,6 +964,16 @@ VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) = { }; /* *INDENT-ON* */ +static void +memif_prepare_dma_args (vlib_dma_config_t *args) +{ + args->max_batches = 256; + args->max_transfer_size = VLIB_BUFFER_DEFAULT_DATA_SIZE; + args->barrier_before_last = 1; + args->sw_fallback = 1; + args->callback_fn = NULL; +} + clib_error_t * memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args) { @@ -989,6 +1061,20 @@ memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args) if (args->secret) mif->secret = vec_dup (args->secret); + /* register dma config if enabled */ + if (args->use_dma) + { + vlib_dma_config_t dma_args; + bzero (&dma_args, sizeof (dma_args)); + memif_prepare_dma_args (&dma_args); + + dma_args.max_transfers = 1 << args->log2_ring_size; + dma_args.callback_fn = memif_dma_completion_cb; + mif->dma_input_config = vlib_dma_config_add (vm, &dma_args); + dma_args.callback_fn = memif_tx_dma_completion_cb; + mif->dma_tx_config = vlib_dma_config_add (vm, &dma_args); + } + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) { @@ -1077,6 +1163,9 @@ memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args) mif->flags |= MEMIF_IF_FLAG_ZERO_COPY; } + if (args->use_dma) + mif->flags |= MEMIF_IF_FLAG_USE_DMA; + vnet_hw_if_set_caps (vnm, mif->hw_if_index, VNET_HW_IF_CAP_INT_MODE); vnet_hw_if_set_input_node (vnm, mif->hw_if_index, memif_input_node.index); mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0); diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c index 1a58e4c068d..16d3686d3d4 100644 --- a/src/plugins/memif/memif_api.c +++ b/src/plugins/memif/memif_api.c @@ -204,6 +204,98 @@ reply: } /** + * @brief Message handler for memif_create_v2 API. + * @param mp vl_api_memif_create_v2_t * mp the api message + */ +void +vl_api_memif_create_v2_t_handler (vl_api_memif_create_v2_t *mp) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + vl_api_memif_create_reply_t *rmp; + memif_create_if_args_t args = { 0 }; + u32 ring_size = MEMIF_DEFAULT_RING_SIZE; + static const u8 empty_hw_addr[6]; + int rv = 0; + mac_address_t mac; + + /* id */ + args.id = clib_net_to_host_u32 (mp->id); + + /* socket-id */ + args.socket_id = clib_net_to_host_u32 (mp->socket_id); + + /* secret */ + mp->secret[ARRAY_LEN (mp->secret) - 1] = 0; + if (strlen ((char *) mp->secret) > 0) + { + vec_validate (args.secret, strlen ((char *) mp->secret)); + strncpy ((char *) args.secret, (char *) mp->secret, + vec_len (args.secret)); + } + + /* role */ + args.is_master = (ntohl (mp->role) == MEMIF_ROLE_API_MASTER); + + /* mode */ + args.mode = ntohl (mp->mode); + + args.is_zero_copy = mp->no_zero_copy ? 0 : 1; + + args.use_dma = mp->use_dma; + + /* rx/tx queues */ + if (args.is_master == 0) + { + args.rx_queues = MEMIF_DEFAULT_RX_QUEUES; + args.tx_queues = MEMIF_DEFAULT_TX_QUEUES; + if (mp->rx_queues) + { + args.rx_queues = mp->rx_queues; + } + if (mp->tx_queues) + { + args.tx_queues = mp->tx_queues; + } + } + + /* ring size */ + if (mp->ring_size) + { + ring_size = ntohl (mp->ring_size); + } + if (!is_pow2 (ring_size)) + { + rv = VNET_API_ERROR_INVALID_ARGUMENT; + goto reply; + } + args.log2_ring_size = min_log2 (ring_size); + + /* buffer size */ + args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE; + if (mp->buffer_size) + { + args.buffer_size = ntohs (mp->buffer_size); + } + + /* MAC address */ + mac_address_decode (mp->hw_addr, &mac); + if (memcmp (&mac, empty_hw_addr, 6) != 0) + { + memcpy (args.hw_addr, &mac, 6); + args.hw_addr_set = 1; + } + + rv = vnet_api_error (memif_create_if (vm, &args)); + + vec_free (args.secret); + +reply: + REPLY_MACRO2 (VL_API_MEMIF_CREATE_V2_REPLY, + ({ rmp->sw_if_index = htonl (args.sw_if_index); })); +} + +/** * @brief Message handler for memif_delete API. * @param mp vl_api_memif_delete_t * mp the api message */ diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c index 07d68924b86..e8fb37c949f 100644 --- a/src/plugins/memif/memif_test.c +++ b/src/plugins/memif/memif_test.c @@ -325,6 +325,121 @@ static void vl_api_memif_create_reply_t_handler vam->regenerate_interface_table = 1; } +/* memif-create_v2 API */ +static int +api_memif_create_v2 (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + vl_api_memif_create_v2_t *mp; + u32 id = 0; + u32 socket_id = 0; + u8 *secret = 0; + u8 role = 1; + u32 ring_size = 0; + u8 use_dma = 0; + u32 buffer_size = 0; + u8 hw_addr[6] = { 0 }; + u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES; + u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES; + int ret; + u8 mode = MEMIF_INTERFACE_MODE_ETHERNET; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "id %u", &id)) + ; + else if (unformat (i, "socket-id %u", &socket_id)) + ; + else if (unformat (i, "secret %s", &secret)) + ; + else if (unformat (i, "ring_size %u", &ring_size)) + ; + else if (unformat (i, "buffer_size %u", &buffer_size)) + ; + else if (unformat (i, "master")) + role = 0; + else if (unformat (i, "use_dma %u", &use_dma)) + ; + else if (unformat (i, "slave %U", unformat_memif_queues, &rx_queues, + &tx_queues)) + role = 1; + else if (unformat (i, "mode ip")) + mode = MEMIF_INTERFACE_MODE_IP; + else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr)) + ; + else + { + clib_warning ("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + if (socket_id == ~0) + { + errmsg ("invalid socket-id\n"); + return -99; + } + + if (!is_pow2 (ring_size)) + { + errmsg ("ring size must be power of 2\n"); + return -99; + } + + if (rx_queues > 255 || rx_queues < 1) + { + errmsg ("rx queue must be between 1 - 255\n"); + return -99; + } + + if (tx_queues > 255 || tx_queues < 1) + { + errmsg ("tx queue must be between 1 - 255\n"); + return -99; + } + + M2 (MEMIF_CREATE, mp, strlen ((char *) secret)); + + mp->mode = mode; + mp->id = clib_host_to_net_u32 (id); + mp->role = role; + mp->use_dma = use_dma; + mp->ring_size = clib_host_to_net_u32 (ring_size); + mp->buffer_size = clib_host_to_net_u16 (buffer_size & 0xffff); + mp->socket_id = clib_host_to_net_u32 (socket_id); + if (secret != 0) + { + char *p = (char *) &mp->secret; + p += vl_api_vec_to_api_string (secret, (vl_api_string_t *) p); + vec_free (secret); + } + memcpy (mp->hw_addr, hw_addr, 6); + mp->rx_queues = rx_queues; + mp->tx_queues = tx_queues; + + S (mp); + W (ret); + return ret; +} + +/* memif-create_v2 reply handler */ +static void +vl_api_memif_create_v2_reply_t_handler (vl_api_memif_create_reply_t *mp) +{ + vat_main_t *vam = memif_test_main.vat_main; + i32 retval = ntohl (mp->retval); + + if (retval == 0) + { + fformat (vam->ofp, "created memif with sw_if_index %d\n", + ntohl (mp->sw_if_index)); + } + + vam->retval = retval; + vam->result_ready = 1; + vam->regenerate_interface_table = 1; +} + /* memif-delete API */ static int api_memif_delete (vat_main_t * vam) diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index 40dcf682798..1ee94f2e81b 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -1054,6 +1054,244 @@ done: return n_rx_packets; } +CLIB_MARCH_FN (memif_dma_completion_cb, void, vlib_main_t *vm, + vlib_dma_batch_t *b) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16); + u32 thread_index = vm->thread_index; + u32 n_left_to_next = 0; + u16 nexts[MEMIF_RX_VECTOR_SZ], *next; + u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi; + uword n_trace; + memif_dma_info_t *dma_info; + u16 qid = b->cookie & 0xffff; + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + dma_info = mq->dma_info + mq->dma_info_head; + memif_per_thread_data_t *ptd = &dma_info->data; + vnet_main_t *vnm = vnet_get_main (); + + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + + __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE); + + /* prepare buffer template and next indices */ + i16 start_offset = + (dma_info->mode == MEMIF_INTERFACE_MODE_IP) ? MEMIF_IP_OFFSET : 0; + vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = mif->sw_if_index; + vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0; + ptd->buffer_template.current_data = start_offset; + ptd->buffer_template.current_config_index = 0; + ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index; + ptd->buffer_template.ref_count = 1; + + if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + if (mif->per_interface_next_index != ~0) + next_index = mif->per_interface_next_index; + else + vnet_feature_start_device_input_x1 (mif->sw_if_index, &next_index, + &ptd->buffer_template); + + vlib_get_new_next_frame (vm, dma_info->node, next_index, to_next_bufs, + n_left_to_next); + if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)) + { + vlib_next_frame_t *nf; + vlib_frame_t *f; + ethernet_input_frame_t *ef; + nf = + vlib_node_runtime_get_next_frame (vm, dma_info->node, next_index); + f = vlib_get_frame (vm, nf->frame); + f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + + ef = vlib_frame_scalar_args (f); + ef->sw_if_index = mif->sw_if_index; + ef->hw_if_index = mif->hw_if_index; + vlib_frame_no_append (f); + } + } + + vec_reset_length (ptd->buffers); + + if (dma_info->mode == MEMIF_INTERFACE_MODE_IP) + memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts, + 1); + else + memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts, + 0); + + /* packet trace if enabled */ + if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, dma_info->node)))) + { + u32 n_left = ptd->n_packets; + bi = to_next_bufs; + next = nexts; + u32 ni = next_index; + while (n_trace && n_left) + { + vlib_buffer_t *b; + memif_input_trace_t *tr; + if (dma_info->mode != MEMIF_INTERFACE_MODE_ETHERNET) + ni = next[0]; + b = vlib_get_buffer (vm, bi[0]); + if (PREDICT_TRUE (vlib_trace_buffer (vm, dma_info->node, ni, b, + /* follow_chain */ 0))) + { + tr = vlib_add_trace (vm, dma_info->node, b, sizeof (*tr)); + tr->next_index = ni; + tr->hw_if_index = mif->hw_if_index; + tr->ring = qid; + n_trace--; + } + + /* next */ + n_left--; + bi++; + next++; + } + vlib_set_trace_count (vm, dma_info->node, n_trace); + } + + if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + n_left_to_next -= ptd->n_packets; + vlib_put_next_frame (vm, dma_info->node, next_index, n_left_to_next); + } + else + vlib_buffer_enqueue_to_next (vm, dma_info->node, to_next_bufs, nexts, + ptd->n_packets); + + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + thread_index, mif->sw_if_index, ptd->n_packets, ptd->n_rx_bytes); + + mq->dma_info_head++; + if (mq->dma_info_head == mq->dma_info_size) + mq->dma_info_head = 0; + + return; +} + +#ifndef CLIB_MARCH_VARIANT +void +memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b) +{ + return CLIB_MARCH_FN_SELECT (memif_dma_completion_cb) (vm, b); +} +#endif + +static_always_inline uword +memif_device_input_inline_dma (vlib_main_t *vm, vlib_node_runtime_t *node, + memif_if_t *mif, memif_ring_type_t type, + u16 qid, memif_interface_mode_t mode) +{ + memif_main_t *mm = &memif_main; + memif_ring_t *ring; + memif_queue_t *mq; + memif_per_thread_data_t *ptd; + u16 cur_slot, n_slots; + u16 n_buffers, n_alloc, n_desc; + memif_copy_op_t *co; + memif_dma_info_t *dma_info; + + u16 mif_id = mif - mm->interfaces; + u32 i; + + mq = vec_elt_at_index (mif->rx_queues, qid); + ring = mq->ring; + + cur_slot = mq->last_head; + n_slots = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE) - cur_slot; + + if (n_slots == 0) + return 0; + + if ((mq->dma_info_tail + 1 == mq->dma_info_head) || + ((mq->dma_info_head == mq->dma_info_size - 1) && + (mq->dma_info_tail == 0))) + return 0; + + vlib_dma_batch_t *db; + db = vlib_dma_batch_new (vm, mif->dma_input_config); + if (!db) + return 0; + + dma_info = mq->dma_info + mq->dma_info_tail; + dma_info->node = node; + dma_info->mode = mode; + ptd = &dma_info->data; + vec_validate_aligned (dma_info->data.desc_len, + pow2_mask (mq->log2_ring_size), CLIB_CACHE_LINE_BYTES); + + n_desc = memif_parse_desc (&dma_info->data, mif, mq, cur_slot, n_slots); + cur_slot += n_desc; + + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) + memif_validate_desc_data (&dma_info->data, mif, n_desc, + /* is_ethernet */ 1); + else + memif_validate_desc_data (&dma_info->data, mif, n_desc, + /* is_ethernet */ 0); + + n_buffers = memif_process_desc (vm, node, ptd, mif); + + if (PREDICT_FALSE (n_buffers == 0)) + { + /* All descriptors are bad. Release slots in the ring and bail */ + memif_advance_ring (type, mq, ring, cur_slot); + goto done; + } + + /* allocate free buffers */ + vec_validate_aligned (dma_info->data.buffers, n_buffers - 1, + CLIB_CACHE_LINE_BYTES); + n_alloc = vlib_buffer_alloc_from_pool (vm, dma_info->data.buffers, n_buffers, + mq->buffer_pool_index); + if (PREDICT_FALSE (n_alloc != n_buffers)) + { + if (n_alloc) + vlib_buffer_free (vm, dma_info->data.buffers, n_alloc); + vlib_error_count (vm, node->node_index, + MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1); + goto done; + } + + dma_info->data.n_rx_bytes = ptd->n_rx_bytes; + dma_info->data.n_packets = ptd->n_packets; + /* copy data */ + vlib_buffer_t *b; + u32 n_pkts = clib_min (MEMIF_RX_VECTOR_SZ, vec_len (ptd->copy_ops)); + co = ptd->copy_ops; + + for (i = 0; i < n_pkts; i++) + { + b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]); + vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data, + co[i].data_len); + } + + for (i = n_pkts; i < vec_len (ptd->copy_ops); i++) + { + b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]); + vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data, + co[i].data_len); + } + + dma_info->dma_tail = cur_slot; + mq->last_head = dma_info->dma_tail; + mq->dma_info_tail++; + if (mq->dma_info_tail == mq->dma_info_size) + mq->dma_info_tail = 0; + +done: + vlib_dma_batch_set_cookie (vm, db, (mif_id << 16) | qid); + vlib_dma_batch_submit (vm, db); + vec_reset_length (ptd->copy_ops); + + return ptd->n_packets; +} VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1095,12 +1333,25 @@ VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm, } else { - if (mif->mode == MEMIF_INTERFACE_MODE_IP) - n_rx += memif_device_input_inline ( - vm, node, mif, MEMIF_RING_S2M, qid, mode_ip); + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && + (mif->dma_input_config >= 0)) + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline_dma ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_ip); + else + n_rx += memif_device_input_inline_dma ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_eth); + } else - n_rx += memif_device_input_inline ( - vm, node, mif, MEMIF_RING_S2M, qid, mode_eth); + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_ip); + else + n_rx += memif_device_input_inline ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_eth); + } } } } diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index 5d8718dd669..f6335410ba8 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -16,6 +16,7 @@ */ #include <vppinfra/lock.h> +#include <vlib/dma/dma.h> #include <vlib/log.h> #define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock" @@ -120,6 +121,11 @@ typedef struct int fd; } memif_msg_fifo_elt_t; +#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE +#define MEMIF_DMA_INFO_SIZE VLIB_FRAME_SIZE + +struct memif_dma_info; + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -135,6 +141,15 @@ typedef struct u32 *buffers; u8 buffer_pool_index; + /* dma data */ + u16 dma_head; + u16 dma_tail; + struct memif_dma_info *dma_info; + u16 dma_info_head; + u16 dma_info_tail; + u16 dma_info_size; + u8 dma_info_full; + /* interrupts */ int int_fd; uword int_clib_file_index; @@ -145,14 +160,15 @@ typedef struct u32 queue_index; } memif_queue_t; -#define foreach_memif_if_flag \ - _(0, ADMIN_UP, "admin-up") \ - _(1, IS_SLAVE, "slave") \ - _(2, CONNECTING, "connecting") \ - _(3, CONNECTED, "connected") \ - _(4, DELETING, "deleting") \ - _(5, ZERO_COPY, "zero-copy") \ - _(6, ERROR, "error") +#define foreach_memif_if_flag \ + _ (0, ADMIN_UP, "admin-up") \ + _ (1, IS_SLAVE, "slave") \ + _ (2, CONNECTING, "connecting") \ + _ (3, CONNECTED, "connected") \ + _ (4, DELETING, "deleting") \ + _ (5, ZERO_COPY, "zero-copy") \ + _ (6, ERROR, "error") \ + _ (7, USE_DMA, "use_dma") typedef enum { @@ -207,6 +223,10 @@ typedef struct /* disconnect strings */ u8 *local_disc_string; u8 *remote_disc_string; + + /* dma config index */ + int dma_input_config; + int dma_tx_config; } memif_if_t; typedef struct @@ -224,8 +244,6 @@ typedef struct u16 buffer_vec_index; } memif_copy_op_t; -#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE - typedef enum { MEMIF_DESC_STATUS_OK = 0, @@ -270,6 +288,17 @@ typedef struct vlib_buffer_t buffer_template; } memif_per_thread_data_t; +typedef struct memif_dma_info +{ + /* per thread data */ + memif_interface_mode_t mode; + vlib_node_runtime_t *node; + u32 dma_head; + u32 dma_tail; + u8 finished; + memif_per_thread_data_t data; +} memif_dma_info_t; + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -309,6 +338,7 @@ typedef struct u8 *secret; u8 is_master; u8 is_zero_copy; + u8 use_dma; memif_interface_mode_t mode:8; memif_log2_ring_size_t log2_ring_size; u16 buffer_size; @@ -354,7 +384,8 @@ clib_error_t *memif_slave_conn_fd_error (clib_file_t * uf); clib_error_t *memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err); u8 *format_memif_device_name (u8 * s, va_list * args); - +void memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b); +void memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b); /* * fd.io coding-style-patch-verification: ON |