diff options
author | Marvin Liu <yong.liu@intel.com> | 2023-03-15 01:01:38 +0800 |
---|---|---|
committer | Damjan Marion <dmarion@0xa5.net> | 2023-04-25 15:18:27 +0000 |
commit | cada0c5075ebf4c59db3192f190b35bf588fac34 (patch) | |
tree | f5852b11087ab0c2f2aa13a6e36ca96d6b568787 | |
parent | efad24a84d35458e2c672b94027e54923a42fd25 (diff) |
memif: support dma option
Introduce async model into memif by utilizing new DMA API. Original
process is broken down to submission stage and completion stage. As
multiple submissions may in flight simultaneously, per thread data is
no longer safe, now replace thread data into each dma data structure.
As slave side already support zero copy mode, DMA option is only added
in master side.
Type: feature
Signed-off-by: Marvin Liu <yong.liu@intel.com>
Change-Id: I084f253866f5127cdc73b9a08c8ce73b091488f3
-rw-r--r-- | src/plugins/memif/cli.c | 2 | ||||
-rw-r--r-- | src/plugins/memif/device.c | 281 | ||||
-rw-r--r-- | src/plugins/memif/memif.api | 52 | ||||
-rw-r--r-- | src/plugins/memif/memif.c | 89 | ||||
-rw-r--r-- | src/plugins/memif/memif_api.c | 92 | ||||
-rw-r--r-- | src/plugins/memif/memif_test.c | 115 | ||||
-rw-r--r-- | src/plugins/memif/node.c | 261 | ||||
-rw-r--r-- | src/plugins/memif/private.h | 53 |
8 files changed, 925 insertions, 20 deletions
diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c index 3d3a681f18d..309cd8e49d7 100644 --- a/src/plugins/memif/cli.c +++ b/src/plugins/memif/cli.c @@ -172,6 +172,8 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input, args.is_master = 0; else if (unformat (line_input, "no-zero-copy")) args.is_zero_copy = 0; + else if (unformat (line_input, "use-dma")) + args.use_dma = 1; else if (unformat (line_input, "mode ip")) args.mode = MEMIF_INTERFACE_MODE_IP; else if (unformat (line_input, "hw-addr %U", diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c index f049a7be38e..ff6068f8243 100644 --- a/src/plugins/memif/device.c +++ b/src/plugins/memif/device.c @@ -369,6 +369,270 @@ no_free_slots: return n_left; } +CLIB_MARCH_FN (memif_tx_dma_completion_cb, void, vlib_main_t *vm, + vlib_dma_batch_t *b) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16); + memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, b->cookie & 0xffff); + memif_dma_info_t *dma_info = mq->dma_info + mq->dma_info_head; + memif_per_thread_data_t *ptd = &dma_info->data; + + vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers)); + + dma_info->finished = 1; + vec_reset_length (ptd->buffers); + vec_reset_length (ptd->copy_ops); + + __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE); + + mq->dma_info_head++; + if (mq->dma_info_head == mq->dma_info_size) + mq->dma_info_head = 0; + mq->dma_info_full = 0; +} + +#ifndef CLIB_MARCH_VARIANT +void +memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b) +{ + return CLIB_MARCH_FN_SELECT (memif_tx_dma_completion_cb) (vm, b); +} +#endif + +static_always_inline uword +memif_interface_tx_dma_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 *buffers, memif_if_t *mif, + memif_ring_type_t type, memif_queue_t *mq, + u32 n_left) +{ + memif_ring_t *ring; + u32 n_copy_op; + u16 ring_size, mask, slot, free_slots; + int n_retries = 5, fallback = 0; + vlib_buffer_t *b0, *b1, *b2, *b3; + memif_copy_op_t *co; + memif_region_index_t last_region = ~0; + void *last_region_shm = 0; + u16 head, tail; + memif_dma_info_t *dma_info; + memif_per_thread_data_t *ptd; + memif_main_t *mm = &memif_main; + u16 mif_id = mif - mm->interfaces; + + ring = mq->ring; + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + dma_info = mq->dma_info + mq->dma_info_tail; + ptd = &dma_info->data; + + /* do software fallback if dma info ring is full */ + u16 dma_mask = mq->dma_info_size - 1; + if ((((mq->dma_info_tail + 1) & dma_mask) == mq->dma_info_head) || + ((mq->dma_info_head == dma_mask) && (mq->dma_info_tail == 0))) + { + if (!mq->dma_info_full) + mq->dma_info_full = 1; + else + fallback = 1; + } + + vlib_dma_batch_t *b = NULL; + if (PREDICT_TRUE (!fallback)) + b = vlib_dma_batch_new (vm, mif->dma_tx_config); + if (!b) + return n_left; + +retry: + + slot = tail = mq->dma_tail; + head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE); + mq->last_tail += tail - mq->last_tail; + free_slots = head - mq->dma_tail; + + while (n_left && free_slots) + { + memif_desc_t *d0; + void *mb0; + i32 src_off; + u32 bi0, dst_off, src_left, dst_left, bytes_to_copy; + u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops); + u32 saved_ptd_buffers_len = _vec_len (ptd->buffers); + u16 saved_slot = slot; + + clib_prefetch_load (&ring->desc[(slot + 8) & mask]); + + d0 = &ring->desc[slot & mask]; + if (PREDICT_FALSE (last_region != d0->region)) + { + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; + } + mb0 = last_region_shm + d0->offset; + + dst_off = 0; + + /* slave is the producer, so it should be able to reset buffer length */ + dst_left = d0->length; + + if (PREDICT_TRUE (n_left >= 4)) + vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD); + bi0 = buffers[0]; + + next_in_chain: + + b0 = vlib_get_buffer (vm, bi0); + src_off = b0->current_data; + src_left = b0->current_length; + + while (src_left) + { + if (PREDICT_FALSE (dst_left == 0)) + { + if (free_slots) + { + d0->length = dst_off; + d0->flags = MEMIF_DESC_FLAG_NEXT; + d0 = &ring->desc[slot & mask]; + dst_off = 0; + dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size : + d0->length; + + if (PREDICT_FALSE (last_region != d0->region)) + { + last_region_shm = mif->regions[d0->region].shm; + last_region = d0->region; + } + mb0 = last_region_shm + d0->offset; + } + else + { + /* we need to rollback vectors before bailing out */ + vec_set_len (ptd->buffers, saved_ptd_buffers_len); + vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len); + vlib_error_count (vm, node->node_index, + MEMIF_TX_ERROR_ROLLBACK, 1); + slot = saved_slot; + goto no_free_slots; + } + } + bytes_to_copy = clib_min (src_left, dst_left); + memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off, + vec_len (ptd->buffers)); + src_off += bytes_to_copy; + dst_off += bytes_to_copy; + src_left -= bytes_to_copy; + dst_left -= bytes_to_copy; + } + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + slot++; + free_slots--; + bi0 = b0->next_buffer; + goto next_in_chain; + } + + vec_add1_aligned (ptd->buffers, buffers[0], CLIB_CACHE_LINE_BYTES); + d0->length = dst_off; + d0->flags = 0; + + free_slots -= 1; + slot += 1; + + buffers++; + n_left--; + } +no_free_slots: + + /* copy data */ + n_copy_op = vec_len (ptd->copy_ops); + co = ptd->copy_ops; + while (n_copy_op >= 8) + { + clib_prefetch_load (co[4].data); + clib_prefetch_load (co[5].data); + clib_prefetch_load (co[6].data); + clib_prefetch_load (co[7].data); + + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]); + b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]); + b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]); + + if (PREDICT_TRUE (!fallback)) + { + vlib_dma_batch_add (vm, b, co[0].data, + b0->data + co[0].buffer_offset, co[0].data_len); + vlib_dma_batch_add (vm, b, co[1].data, + b1->data + co[1].buffer_offset, co[1].data_len); + vlib_dma_batch_add (vm, b, co[2].data, + b2->data + co[2].buffer_offset, co[2].data_len); + vlib_dma_batch_add (vm, b, co[3].data, + b3->data + co[3].buffer_offset, co[3].data_len); + } + else + { + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset, + co[1].data_len); + clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset, + co[2].data_len); + clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset, + co[3].data_len); + } + + co += 4; + n_copy_op -= 4; + } + while (n_copy_op) + { + b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]); + if (PREDICT_TRUE (!fallback)) + vlib_dma_batch_add (vm, b, co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + else + clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset, + co[0].data_len); + co += 1; + n_copy_op -= 1; + } + + /* save dma info before retry */ + dma_info->dma_tail = slot; + mq->dma_tail = slot; + vec_reset_length (ptd->copy_ops); + + if (n_left && n_retries--) + goto retry; + + if (PREDICT_TRUE (!fallback)) + { + vlib_dma_batch_set_cookie (vm, b, + (mif_id << 16) | (mq - mif->tx_queues)); + vlib_dma_batch_submit (vm, b); + dma_info->finished = 0; + + if (b->n_enq) + { + mq->dma_info_tail++; + if (mq->dma_info_tail == mq->dma_info_size) + mq->dma_info_tail = 0; + } + } + else if (fallback && dma_info->finished) + { + /* if dma has been completed, update ring immediately */ + vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers)); + vec_reset_length (ptd->buffers); + __atomic_store_n (&mq->ring->tail, slot, __ATOMIC_RELEASE); + } + + return n_left; +} + VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -399,8 +663,14 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_S2M, mq, ptd, n_left); else - n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_M2S, - mq, ptd, n_left); + { + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0)) + n_left = memif_interface_tx_dma_inline (vm, node, from, mif, + MEMIF_RING_M2S, mq, n_left); + else + n_left = memif_interface_tx_inline (vm, node, from, mif, + MEMIF_RING_M2S, mq, ptd, n_left); + } if (tf->shared_queue) clib_spinlock_unlock (&mq->lockp); @@ -416,7 +686,12 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm, mq->int_count++; } - if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0) + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0)) + { + if (n_left) + vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left); + } + else if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0) vlib_buffer_free (vm, from, frame->n_vectors); else if (n_left) vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left); diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api index 91e72f73ab4..b800bfa6517 100644 --- a/src/plugins/memif/memif.api +++ b/src/plugins/memif/memif.api @@ -14,7 +14,7 @@ * limitations under the License. */ -option version = "3.0.0"; +option version = "3.1.0"; import "vnet/interface_types.api"; import "vnet/ethernet/ethernet_types.api"; @@ -133,6 +133,56 @@ define memif_create_reply vl_api_interface_index_t sw_if_index; }; +/** \brief Create memory interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param role - role of the interface in the connection (master/slave) + @param mode - interface mode + @param rx_queues - number of rx queues (only valid for slave) + @param tx_queues - number of tx queues (only valid for slave) + @param id - 32bit integer used to authenticate and match opposite sides + of the connection + @param socket_id - socket filename id to be used for connection + establishment + @param ring_size - the number of entries of RX/TX rings + @param buffer_size - size of the buffer allocated for each ring entry + @param no_zero_copy - if true, disable zero copy + @param use_dma - if true, use dma accelerate memory copy + @param hw_addr - interface MAC address + @param secret - optional, default is "", max length 24 +*/ +define memif_create_v2 +{ + u32 client_index; + u32 context; + + vl_api_memif_role_t role; /* 0 = master, 1 = slave */ + vl_api_memif_mode_t mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */ + u8 rx_queues; /* optional, default is 1 */ + u8 tx_queues; /* optional, default is 1 */ + u32 id; /* optional, default is 0 */ + u32 socket_id; /* optional, default is 0, "/var/vpp/memif.sock" */ + u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */ + u16 buffer_size; /* optional, default is 2048 bytes */ + bool no_zero_copy; /* disable zero copy */ + bool use_dma; /* use dma acceleration */ + vl_api_mac_address_t hw_addr; /* optional, randomly generated if zero */ + string secret[24]; /* optional, default is "", max length 24 */ + option vat_help = "[id <id>] [socket-id <id>] [ring_size <size>] [buffer_size <size>] [hw_addr <mac_address>] [secret <string>] [mode ip] <master|slave>"; +}; + +/** \brief Create memory interface response + @param context - sender context, to match reply w/ request + @param retval - return value for request + @param sw_if_index - software index of the newly created interface +*/ +define memif_create_v2_reply +{ + u32 context; + i32 retval; + vl_api_interface_index_t sw_if_index; +}; + /** \brief Delete memory interface @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c index c9d2f008cca..37028d8223e 100644 --- a/src/plugins/memif/memif.c +++ b/src/plugins/memif/memif.c @@ -301,6 +301,37 @@ memif_connect (memif_if_t * mif) mq->queue_index = vnet_hw_if_register_tx_queue (vnm, mif->hw_if_index, i); clib_spinlock_init (&mq->lockp); + + if (mif->flags & MEMIF_IF_FLAG_USE_DMA) + { + memif_dma_info_t *dma_info; + mq->dma_head = 0; + mq->dma_tail = 0; + mq->dma_info_head = 0; + mq->dma_info_tail = 0; + mq->dma_info_size = MEMIF_DMA_INFO_SIZE; + vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE, + CLIB_CACHE_LINE_BYTES); + + vec_foreach (dma_info, mq->dma_info) + { + vec_validate_aligned (dma_info->data.desc_data, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_len, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_status, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.copy_ops, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.copy_ops); + vec_validate_aligned (dma_info->data.buffers, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.buffers); + } + } } if (vec_len (mif->tx_queues) > 0) @@ -331,6 +362,37 @@ memif_connect (memif_if_t * mif) qi = vnet_hw_if_register_rx_queue (vnm, mif->hw_if_index, i, VNET_HW_IF_RXQ_THREAD_ANY); mq->queue_index = qi; + + if (mif->flags & MEMIF_IF_FLAG_USE_DMA) + { + memif_dma_info_t *dma_info; + mq->dma_head = 0; + mq->dma_tail = 0; + mq->dma_info_head = 0; + mq->dma_info_tail = 0; + mq->dma_info_size = MEMIF_DMA_INFO_SIZE; + vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE, + CLIB_CACHE_LINE_BYTES); + vec_foreach (dma_info, mq->dma_info) + { + vec_validate_aligned (dma_info->data.desc_data, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_len, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.desc_status, + pow2_mask (max_log2_ring_sz), + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (dma_info->data.copy_ops, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.copy_ops); + vec_validate_aligned (dma_info->data.buffers, 0, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (dma_info->data.buffers); + } + } + if (mq->int_fd > -1) { template.file_descriptor = mq->int_fd; @@ -902,6 +964,16 @@ VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) = { }; /* *INDENT-ON* */ +static void +memif_prepare_dma_args (vlib_dma_config_t *args) +{ + args->max_batches = 256; + args->max_transfer_size = VLIB_BUFFER_DEFAULT_DATA_SIZE; + args->barrier_before_last = 1; + args->sw_fallback = 1; + args->callback_fn = NULL; +} + clib_error_t * memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args) { @@ -989,6 +1061,20 @@ memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args) if (args->secret) mif->secret = vec_dup (args->secret); + /* register dma config if enabled */ + if (args->use_dma) + { + vlib_dma_config_t dma_args; + bzero (&dma_args, sizeof (dma_args)); + memif_prepare_dma_args (&dma_args); + + dma_args.max_transfers = 1 << args->log2_ring_size; + dma_args.callback_fn = memif_dma_completion_cb; + mif->dma_input_config = vlib_dma_config_add (vm, &dma_args); + dma_args.callback_fn = memif_tx_dma_completion_cb; + mif->dma_tx_config = vlib_dma_config_add (vm, &dma_args); + } + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) { @@ -1077,6 +1163,9 @@ memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args) mif->flags |= MEMIF_IF_FLAG_ZERO_COPY; } + if (args->use_dma) + mif->flags |= MEMIF_IF_FLAG_USE_DMA; + vnet_hw_if_set_caps (vnm, mif->hw_if_index, VNET_HW_IF_CAP_INT_MODE); vnet_hw_if_set_input_node (vnm, mif->hw_if_index, memif_input_node.index); mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0); diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c index 1a58e4c068d..16d3686d3d4 100644 --- a/src/plugins/memif/memif_api.c +++ b/src/plugins/memif/memif_api.c @@ -204,6 +204,98 @@ reply: } /** + * @brief Message handler for memif_create_v2 API. + * @param mp vl_api_memif_create_v2_t * mp the api message + */ +void +vl_api_memif_create_v2_t_handler (vl_api_memif_create_v2_t *mp) +{ + memif_main_t *mm = &memif_main; + vlib_main_t *vm = vlib_get_main (); + vl_api_memif_create_reply_t *rmp; + memif_create_if_args_t args = { 0 }; + u32 ring_size = MEMIF_DEFAULT_RING_SIZE; + static const u8 empty_hw_addr[6]; + int rv = 0; + mac_address_t mac; + + /* id */ + args.id = clib_net_to_host_u32 (mp->id); + + /* socket-id */ + args.socket_id = clib_net_to_host_u32 (mp->socket_id); + + /* secret */ + mp->secret[ARRAY_LEN (mp->secret) - 1] = 0; + if (strlen ((char *) mp->secret) > 0) + { + vec_validate (args.secret, strlen ((char *) mp->secret)); + strncpy ((char *) args.secret, (char *) mp->secret, + vec_len (args.secret)); + } + + /* role */ + args.is_master = (ntohl (mp->role) == MEMIF_ROLE_API_MASTER); + + /* mode */ + args.mode = ntohl (mp->mode); + + args.is_zero_copy = mp->no_zero_copy ? 0 : 1; + + args.use_dma = mp->use_dma; + + /* rx/tx queues */ + if (args.is_master == 0) + { + args.rx_queues = MEMIF_DEFAULT_RX_QUEUES; + args.tx_queues = MEMIF_DEFAULT_TX_QUEUES; + if (mp->rx_queues) + { + args.rx_queues = mp->rx_queues; + } + if (mp->tx_queues) + { + args.tx_queues = mp->tx_queues; + } + } + + /* ring size */ + if (mp->ring_size) + { + ring_size = ntohl (mp->ring_size); + } + if (!is_pow2 (ring_size)) + { + rv = VNET_API_ERROR_INVALID_ARGUMENT; + goto reply; + } + args.log2_ring_size = min_log2 (ring_size); + + /* buffer size */ + args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE; + if (mp->buffer_size) + { + args.buffer_size = ntohs (mp->buffer_size); + } + + /* MAC address */ + mac_address_decode (mp->hw_addr, &mac); + if (memcmp (&mac, empty_hw_addr, 6) != 0) + { + memcpy (args.hw_addr, &mac, 6); + args.hw_addr_set = 1; + } + + rv = vnet_api_error (memif_create_if (vm, &args)); + + vec_free (args.secret); + +reply: + REPLY_MACRO2 (VL_API_MEMIF_CREATE_V2_REPLY, + ({ rmp->sw_if_index = htonl (args.sw_if_index); })); +} + +/** * @brief Message handler for memif_delete API. * @param mp vl_api_memif_delete_t * mp the api message */ diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c index 07d68924b86..e8fb37c949f 100644 --- a/src/plugins/memif/memif_test.c +++ b/src/plugins/memif/memif_test.c @@ -325,6 +325,121 @@ static void vl_api_memif_create_reply_t_handler vam->regenerate_interface_table = 1; } +/* memif-create_v2 API */ +static int +api_memif_create_v2 (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + vl_api_memif_create_v2_t *mp; + u32 id = 0; + u32 socket_id = 0; + u8 *secret = 0; + u8 role = 1; + u32 ring_size = 0; + u8 use_dma = 0; + u32 buffer_size = 0; + u8 hw_addr[6] = { 0 }; + u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES; + u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES; + int ret; + u8 mode = MEMIF_INTERFACE_MODE_ETHERNET; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "id %u", &id)) + ; + else if (unformat (i, "socket-id %u", &socket_id)) + ; + else if (unformat (i, "secret %s", &secret)) + ; + else if (unformat (i, "ring_size %u", &ring_size)) + ; + else if (unformat (i, "buffer_size %u", &buffer_size)) + ; + else if (unformat (i, "master")) + role = 0; + else if (unformat (i, "use_dma %u", &use_dma)) + ; + else if (unformat (i, "slave %U", unformat_memif_queues, &rx_queues, + &tx_queues)) + role = 1; + else if (unformat (i, "mode ip")) + mode = MEMIF_INTERFACE_MODE_IP; + else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr)) + ; + else + { + clib_warning ("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + if (socket_id == ~0) + { + errmsg ("invalid socket-id\n"); + return -99; + } + + if (!is_pow2 (ring_size)) + { + errmsg ("ring size must be power of 2\n"); + return -99; + } + + if (rx_queues > 255 || rx_queues < 1) + { + errmsg ("rx queue must be between 1 - 255\n"); + return -99; + } + + if (tx_queues > 255 || tx_queues < 1) + { + errmsg ("tx queue must be between 1 - 255\n"); + return -99; + } + + M2 (MEMIF_CREATE, mp, strlen ((char *) secret)); + + mp->mode = mode; + mp->id = clib_host_to_net_u32 (id); + mp->role = role; + mp->use_dma = use_dma; + mp->ring_size = clib_host_to_net_u32 (ring_size); + mp->buffer_size = clib_host_to_net_u16 (buffer_size & 0xffff); + mp->socket_id = clib_host_to_net_u32 (socket_id); + if (secret != 0) + { + char *p = (char *) &mp->secret; + p += vl_api_vec_to_api_string (secret, (vl_api_string_t *) p); + vec_free (secret); + } + memcpy (mp->hw_addr, hw_addr, 6); + mp->rx_queues = rx_queues; + mp->tx_queues = tx_queues; + + S (mp); + W (ret); + return ret; +} + +/* memif-create_v2 reply handler */ +static void +vl_api_memif_create_v2_reply_t_handler (vl_api_memif_create_reply_t *mp) +{ + vat_main_t *vam = memif_test_main.vat_main; + i32 retval = ntohl (mp->retval); + + if (retval == 0) + { + fformat (vam->ofp, "created memif with sw_if_index %d\n", + ntohl (mp->sw_if_index)); + } + + vam->retval = retval; + vam->result_ready = 1; + vam->regenerate_interface_table = 1; +} + /* memif-delete API */ static int api_memif_delete (vat_main_t * vam) diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c index 40dcf682798..1ee94f2e81b 100644 --- a/src/plugins/memif/node.c +++ b/src/plugins/memif/node.c @@ -1054,6 +1054,244 @@ done: return n_rx_packets; } +CLIB_MARCH_FN (memif_dma_completion_cb, void, vlib_main_t *vm, + vlib_dma_batch_t *b) +{ + memif_main_t *mm = &memif_main; + memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16); + u32 thread_index = vm->thread_index; + u32 n_left_to_next = 0; + u16 nexts[MEMIF_RX_VECTOR_SZ], *next; + u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi; + uword n_trace; + memif_dma_info_t *dma_info; + u16 qid = b->cookie & 0xffff; + memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid); + dma_info = mq->dma_info + mq->dma_info_head; + memif_per_thread_data_t *ptd = &dma_info->data; + vnet_main_t *vnm = vnet_get_main (); + + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + + __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE); + + /* prepare buffer template and next indices */ + i16 start_offset = + (dma_info->mode == MEMIF_INTERFACE_MODE_IP) ? MEMIF_IP_OFFSET : 0; + vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = mif->sw_if_index; + vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0; + ptd->buffer_template.current_data = start_offset; + ptd->buffer_template.current_config_index = 0; + ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index; + ptd->buffer_template.ref_count = 1; + + if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + if (mif->per_interface_next_index != ~0) + next_index = mif->per_interface_next_index; + else + vnet_feature_start_device_input_x1 (mif->sw_if_index, &next_index, + &ptd->buffer_template); + + vlib_get_new_next_frame (vm, dma_info->node, next_index, to_next_bufs, + n_left_to_next); + if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)) + { + vlib_next_frame_t *nf; + vlib_frame_t *f; + ethernet_input_frame_t *ef; + nf = + vlib_node_runtime_get_next_frame (vm, dma_info->node, next_index); + f = vlib_get_frame (vm, nf->frame); + f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX; + + ef = vlib_frame_scalar_args (f); + ef->sw_if_index = mif->sw_if_index; + ef->hw_if_index = mif->hw_if_index; + vlib_frame_no_append (f); + } + } + + vec_reset_length (ptd->buffers); + + if (dma_info->mode == MEMIF_INTERFACE_MODE_IP) + memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts, + 1); + else + memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts, + 0); + + /* packet trace if enabled */ + if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, dma_info->node)))) + { + u32 n_left = ptd->n_packets; + bi = to_next_bufs; + next = nexts; + u32 ni = next_index; + while (n_trace && n_left) + { + vlib_buffer_t *b; + memif_input_trace_t *tr; + if (dma_info->mode != MEMIF_INTERFACE_MODE_ETHERNET) + ni = next[0]; + b = vlib_get_buffer (vm, bi[0]); + if (PREDICT_TRUE (vlib_trace_buffer (vm, dma_info->node, ni, b, + /* follow_chain */ 0))) + { + tr = vlib_add_trace (vm, dma_info->node, b, sizeof (*tr)); + tr->next_index = ni; + tr->hw_if_index = mif->hw_if_index; + tr->ring = qid; + n_trace--; + } + + /* next */ + n_left--; + bi++; + next++; + } + vlib_set_trace_count (vm, dma_info->node, n_trace); + } + + if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET) + { + n_left_to_next -= ptd->n_packets; + vlib_put_next_frame (vm, dma_info->node, next_index, n_left_to_next); + } + else + vlib_buffer_enqueue_to_next (vm, dma_info->node, to_next_bufs, nexts, + ptd->n_packets); + + vlib_increment_combined_counter ( + vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, + thread_index, mif->sw_if_index, ptd->n_packets, ptd->n_rx_bytes); + + mq->dma_info_head++; + if (mq->dma_info_head == mq->dma_info_size) + mq->dma_info_head = 0; + + return; +} + +#ifndef CLIB_MARCH_VARIANT +void +memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b) +{ + return CLIB_MARCH_FN_SELECT (memif_dma_completion_cb) (vm, b); +} +#endif + +static_always_inline uword +memif_device_input_inline_dma (vlib_main_t *vm, vlib_node_runtime_t *node, + memif_if_t *mif, memif_ring_type_t type, + u16 qid, memif_interface_mode_t mode) +{ + memif_main_t *mm = &memif_main; + memif_ring_t *ring; + memif_queue_t *mq; + memif_per_thread_data_t *ptd; + u16 cur_slot, n_slots; + u16 n_buffers, n_alloc, n_desc; + memif_copy_op_t *co; + memif_dma_info_t *dma_info; + + u16 mif_id = mif - mm->interfaces; + u32 i; + + mq = vec_elt_at_index (mif->rx_queues, qid); + ring = mq->ring; + + cur_slot = mq->last_head; + n_slots = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE) - cur_slot; + + if (n_slots == 0) + return 0; + + if ((mq->dma_info_tail + 1 == mq->dma_info_head) || + ((mq->dma_info_head == mq->dma_info_size - 1) && + (mq->dma_info_tail == 0))) + return 0; + + vlib_dma_batch_t *db; + db = vlib_dma_batch_new (vm, mif->dma_input_config); + if (!db) + return 0; + + dma_info = mq->dma_info + mq->dma_info_tail; + dma_info->node = node; + dma_info->mode = mode; + ptd = &dma_info->data; + vec_validate_aligned (dma_info->data.desc_len, + pow2_mask (mq->log2_ring_size), CLIB_CACHE_LINE_BYTES); + + n_desc = memif_parse_desc (&dma_info->data, mif, mq, cur_slot, n_slots); + cur_slot += n_desc; + + if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET) + memif_validate_desc_data (&dma_info->data, mif, n_desc, + /* is_ethernet */ 1); + else + memif_validate_desc_data (&dma_info->data, mif, n_desc, + /* is_ethernet */ 0); + + n_buffers = memif_process_desc (vm, node, ptd, mif); + + if (PREDICT_FALSE (n_buffers == 0)) + { + /* All descriptors are bad. Release slots in the ring and bail */ + memif_advance_ring (type, mq, ring, cur_slot); + goto done; + } + + /* allocate free buffers */ + vec_validate_aligned (dma_info->data.buffers, n_buffers - 1, + CLIB_CACHE_LINE_BYTES); + n_alloc = vlib_buffer_alloc_from_pool (vm, dma_info->data.buffers, n_buffers, + mq->buffer_pool_index); + if (PREDICT_FALSE (n_alloc != n_buffers)) + { + if (n_alloc) + vlib_buffer_free (vm, dma_info->data.buffers, n_alloc); + vlib_error_count (vm, node->node_index, + MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1); + goto done; + } + + dma_info->data.n_rx_bytes = ptd->n_rx_bytes; + dma_info->data.n_packets = ptd->n_packets; + /* copy data */ + vlib_buffer_t *b; + u32 n_pkts = clib_min (MEMIF_RX_VECTOR_SZ, vec_len (ptd->copy_ops)); + co = ptd->copy_ops; + + for (i = 0; i < n_pkts; i++) + { + b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]); + vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data, + co[i].data_len); + } + + for (i = n_pkts; i < vec_len (ptd->copy_ops); i++) + { + b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]); + vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data, + co[i].data_len); + } + + dma_info->dma_tail = cur_slot; + mq->last_head = dma_info->dma_tail; + mq->dma_info_tail++; + if (mq->dma_info_tail == mq->dma_info_size) + mq->dma_info_tail = 0; + +done: + vlib_dma_batch_set_cookie (vm, db, (mif_id << 16) | qid); + vlib_dma_batch_submit (vm, db); + vec_reset_length (ptd->copy_ops); + + return ptd->n_packets; +} VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1095,12 +1333,25 @@ VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm, } else { - if (mif->mode == MEMIF_INTERFACE_MODE_IP) - n_rx += memif_device_input_inline ( - vm, node, mif, MEMIF_RING_S2M, qid, mode_ip); + if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && + (mif->dma_input_config >= 0)) + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline_dma ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_ip); + else + n_rx += memif_device_input_inline_dma ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_eth); + } else - n_rx += memif_device_input_inline ( - vm, node, mif, MEMIF_RING_S2M, qid, mode_eth); + { + if (mif->mode == MEMIF_INTERFACE_MODE_IP) + n_rx += memif_device_input_inline ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_ip); + else + n_rx += memif_device_input_inline ( + vm, node, mif, MEMIF_RING_S2M, qid, mode_eth); + } } } } diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h index 5d8718dd669..f6335410ba8 100644 --- a/src/plugins/memif/private.h +++ b/src/plugins/memif/private.h @@ -16,6 +16,7 @@ */ #include <vppinfra/lock.h> +#include <vlib/dma/dma.h> #include <vlib/log.h> #define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock" @@ -120,6 +121,11 @@ typedef struct int fd; } memif_msg_fifo_elt_t; +#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE +#define MEMIF_DMA_INFO_SIZE VLIB_FRAME_SIZE + +struct memif_dma_info; + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -135,6 +141,15 @@ typedef struct u32 *buffers; u8 buffer_pool_index; + /* dma data */ + u16 dma_head; + u16 dma_tail; + struct memif_dma_info *dma_info; + u16 dma_info_head; + u16 dma_info_tail; + u16 dma_info_size; + u8 dma_info_full; + /* interrupts */ int int_fd; uword int_clib_file_index; @@ -145,14 +160,15 @@ typedef struct u32 queue_index; } memif_queue_t; -#define foreach_memif_if_flag \ - _(0, ADMIN_UP, "admin-up") \ - _(1, IS_SLAVE, "slave") \ - _(2, CONNECTING, "connecting") \ - _(3, CONNECTED, "connected") \ - _(4, DELETING, "deleting") \ - _(5, ZERO_COPY, "zero-copy") \ - _(6, ERROR, "error") +#define foreach_memif_if_flag \ + _ (0, ADMIN_UP, "admin-up") \ + _ (1, IS_SLAVE, "slave") \ + _ (2, CONNECTING, "connecting") \ + _ (3, CONNECTED, "connected") \ + _ (4, DELETING, "deleting") \ + _ (5, ZERO_COPY, "zero-copy") \ + _ (6, ERROR, "error") \ + _ (7, USE_DMA, "use_dma") typedef enum { @@ -207,6 +223,10 @@ typedef struct /* disconnect strings */ u8 *local_disc_string; u8 *remote_disc_string; + + /* dma config index */ + int dma_input_config; + int dma_tx_config; } memif_if_t; typedef struct @@ -224,8 +244,6 @@ typedef struct u16 buffer_vec_index; } memif_copy_op_t; -#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE - typedef enum { MEMIF_DESC_STATUS_OK = 0, @@ -270,6 +288,17 @@ typedef struct vlib_buffer_t buffer_template; } memif_per_thread_data_t; +typedef struct memif_dma_info +{ + /* per thread data */ + memif_interface_mode_t mode; + vlib_node_runtime_t *node; + u32 dma_head; + u32 dma_tail; + u8 finished; + memif_per_thread_data_t data; +} memif_dma_info_t; + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -309,6 +338,7 @@ typedef struct u8 *secret; u8 is_master; u8 is_zero_copy; + u8 use_dma; memif_interface_mode_t mode:8; memif_log2_ring_size_t log2_ring_size; u16 buffer_size; @@ -354,7 +384,8 @@ clib_error_t *memif_slave_conn_fd_error (clib_file_t * uf); clib_error_t *memif_msg_send_disconnect (memif_if_t * mif, clib_error_t * err); u8 *format_memif_device_name (u8 * s, va_list * args); - +void memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b); +void memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b); /* * fd.io coding-style-patch-verification: ON |